From 01fdea5a975dcdd6632db89a1a18b0383c7adb59 Mon Sep 17 00:00:00 2001 From: Liyun Xiu Date: Fri, 23 Aug 2024 13:41:39 +0800 Subject: [PATCH] Remove batch_size of bulk API from tests & refactor BWC version check (#852) * Remove batch_size of bulk API from tests & refactor BWC version check Signed-off-by: Liyun Xiu * Update changelog Signed-off-by: Liyun Xiu * Address some comments Signed-off-by: Liyun Xiu * Update Changelog Signed-off-by: Liyun Xiu --------- Signed-off-by: Liyun Xiu (cherry picked from commit e1c3878cd6ce4fdfbc00f69d8b9ba143e9aa0a25) Signed-off-by: Martin Gaievski --- CHANGELOG.md | 1 + qa/restart-upgrade/build.gradle | 59 +++++----- .../AbstractRestartUpgradeRestTestCase.java | 15 ++- .../neuralsearch/bwc/BatchIngestionIT.java | 6 +- qa/rolling-upgrade/build.gradle | 109 +++++++++--------- .../bwc/AbstractRollingUpgradeTestCase.java | 22 +++- .../neuralsearch/bwc/BatchIngestionIT.java | 8 +- ...rSparseEncodingProcessorConfiguration.json | 1 + .../processor/TextChunkingProcessorIT.java | 2 +- .../processor/TextEmbeddingProcessorIT.java | 15 ++- .../processor/PipelineConfiguration.json | 1 + .../SparseEncodingPipelineConfiguration.json | 1 + .../neuralsearch/BaseNeuralSearchIT.java | 34 +++--- 13 files changed, 152 insertions(+), 122 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 63aca0c29..f0dcaa9e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Bug Fixes - Address inconsistent scoring in hybrid query results ([#998](https://github.com/opensearch-project/neural-search/pull/998)) ### Infrastructure +- Update batch related tests to use batch_size in processor & refactor BWC version check ([#852](https://github.com/opensearch-project/neural-search/pull/852)) ### Documentation ### Maintenance ### Refactoring diff --git a/qa/restart-upgrade/build.gradle b/qa/restart-upgrade/build.gradle index fe3db254c..0163c36c1 100644 --- a/qa/restart-upgrade/build.gradle +++ b/qa/restart-upgrade/build.gradle @@ -54,6 +54,13 @@ testClusters { } } +def versionsBelow2_11 = ["2.9", "2.10"] +def versionsBelow2_12 = versionsBelow2_11 + "2.11" +def versionsBelow2_13 = versionsBelow2_12 + "2.12" +def versionsBelow2_14 = versionsBelow2_13 + "2.13" +def versionsBelow2_15 = versionsBelow2_14 + "2.14" +def versionsBelow2_16 = versionsBelow2_15 + "2.15" + // Task to run BWC tests against the old cluster task testAgainstOldCluster(type: StandaloneRestIntegTestTask) { if(!ext.bwcBundleTest){ @@ -67,7 +74,7 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) { // Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10 // because these features were released in 2.11 version. - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){ + if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*" excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*" @@ -76,35 +83,35 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) { } } - // Excluding the test because we introduce this feature in 2.13 - if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){ + // Excluding the these tests because we introduce them in 2.13 + if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow" - } - } - - // Excluding the text chunking processor test because we introduce this feature in 2.13 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){ - filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*" } } - // Excluding the k-NN radial search tests and batch ingestion tests because we introduce these features in 2.14 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){ + // Excluding the k-NN radial search tests because we introduce this feature in 2.14 + if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*" - excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*" } } // Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" } } + // Excluding the batching processor tests because we introduce this feature in 2.16 + if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*" + } + } + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' @@ -131,7 +138,7 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) { // Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10 // because these features were released in 2.11 version. - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){ + if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*" excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*" @@ -140,35 +147,35 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) { } } - // Excluding the test because we introduce this feature in 2.13 - if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){ + // Excluding these tests because we introduce them in 2.13 + if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow" - } - } - - // Excluding the text chunking processor test because we introduce this feature in 2.13 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){ - filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*" } } - // Excluding the k-NN radial search tests and batch ingestion tests because we introduce these features in 2.14 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){ + // Excluding the k-NN radial search tests because we introduce this feature in 2.14 + if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*" - excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*" } } // Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" } } + // Excluding the batch processor tests because we introduce this feature in 2.16 + if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*" + } + } + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java index bdbba92e8..7028888ca 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRestartUpgradeRestTestCase.java @@ -76,7 +76,7 @@ protected String registerModelGroupAndGetModelId(final String requestBody) throw protected void createPipelineProcessor(final String modelId, final String pipelineName) throws Exception { String requestBody = Files.readString(Path.of(classLoader.getResource("processor/PipelineConfiguration.json").toURI())); - createPipelineProcessor(requestBody, pipelineName, modelId); + createPipelineProcessor(requestBody, pipelineName, modelId, null); } protected String uploadSparseEncodingModel() throws Exception { @@ -90,20 +90,25 @@ protected void createPipelineForTextImageProcessor(final String modelId, final S String requestBody = Files.readString( Path.of(classLoader.getResource("processor/PipelineForTextImageProcessorConfiguration.json").toURI()) ); - createPipelineProcessor(requestBody, pipelineName, modelId); + createPipelineProcessor(requestBody, pipelineName, modelId, null); } - protected void createPipelineForSparseEncodingProcessor(final String modelId, final String pipelineName) throws Exception { + protected void createPipelineForSparseEncodingProcessor(final String modelId, final String pipelineName, final Integer batchSize) + throws Exception { String requestBody = Files.readString( Path.of(classLoader.getResource("processor/PipelineForSparseEncodingProcessorConfiguration.json").toURI()) ); - createPipelineProcessor(requestBody, pipelineName, modelId); + createPipelineProcessor(requestBody, pipelineName, modelId, batchSize); + } + + protected void createPipelineForSparseEncodingProcessor(final String modelId, final String pipelineName) throws Exception { + createPipelineForSparseEncodingProcessor(modelId, pipelineName, null); } protected void createPipelineForTextChunkingProcessor(String pipelineName) throws Exception { String requestBody = Files.readString( Path.of(classLoader.getResource("processor/PipelineForTextChunkingProcessorConfiguration.json").toURI()) ); - createPipelineProcessor(requestBody, pipelineName, ""); + createPipelineProcessor(requestBody, pipelineName, "", null); } } diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/BatchIngestionIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/BatchIngestionIT.java index 0e490e2e4..f9cd11251 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/BatchIngestionIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/BatchIngestionIT.java @@ -27,14 +27,14 @@ public void testBatchIngestionWithNeuralSparseProcessor_E2EFlow() throws Excepti if (isRunningAgainstOldCluster()) { String modelId = uploadSparseEncodingModel(); loadModel(modelId); - createPipelineForSparseEncodingProcessor(modelId, PIPELINE_NAME); + createPipelineForSparseEncodingProcessor(modelId, PIPELINE_NAME, batchSize); createIndexWithConfiguration( indexName, Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())), PIPELINE_NAME ); List> docs = prepareDataForBulkIngestion(0, 5); - bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs, batchSize); + bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs); validateDocCountAndInfo(indexName, 5, () -> getDocById(indexName, "4"), EMBEDDING_FIELD_NAME, Map.class); } else { String modelId = null; @@ -42,7 +42,7 @@ public void testBatchIngestionWithNeuralSparseProcessor_E2EFlow() throws Excepti loadModel(modelId); try { List> docs = prepareDataForBulkIngestion(5, 5); - bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs, batchSize); + bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs); validateDocCountAndInfo(indexName, 10, () -> getDocById(indexName, "9"), EMBEDDING_FIELD_NAME, Map.class); } finally { wipeOfTestResources(indexName, PIPELINE_NAME, modelId, null); diff --git a/qa/rolling-upgrade/build.gradle b/qa/rolling-upgrade/build.gradle index 345dbda89..ab063ab49 100644 --- a/qa/rolling-upgrade/build.gradle +++ b/qa/rolling-upgrade/build.gradle @@ -54,6 +54,12 @@ testClusters { } } +def versionsBelow2_11 = ["2.9", "2.10"] +def versionsBelow2_12 = versionsBelow2_11 + "2.11" +def versionsBelow2_13 = versionsBelow2_12 + "2.12" +def versionsBelow2_14 = versionsBelow2_13 + "2.13" +def versionsBelow2_15 = versionsBelow2_14 + "2.14" +def versionsBelow2_16 = versionsBelow2_15 + "2.15" // Task to run BWC tests against the old cluster task testAgainstOldCluster(type: StandaloneRestIntegTestTask) { @@ -67,7 +73,7 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) { //Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10 // because these features were released in 2.11 version. - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){ + if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*" excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*" @@ -76,46 +82,42 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) { } } + // Excluding the test because hybrid query with rescore is not compatible with 2.14 and lower - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") - || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") - || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")) { + if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*" } } - // Excluding the test because we introduce this feature in 2.13 - if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){ + // Excluding the tests because we introduce these features in 2.13 + if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow" - } - } - - // Excluding the text chunking processor test because we introduce this feature in 2.13 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){ - filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*" } } // Excluding the k-NN radial search and batch ingestion tests because we introduce these features in 2.14 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){ + if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*" - excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*" } } // Excluding the neural sparse two phase processor test because we introduce this feature in 2.15 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") - || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") - || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" } } + // Excluding the batching processor tests because we introduce this feature in 2.16 + if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*" + } + } nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") @@ -144,7 +146,7 @@ task testAgainstOneThirdUpgradedCluster(type: StandaloneRestIntegTestTask) { //Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10 // because these features were released in 2.11 version. - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){ + if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*" excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*" @@ -162,22 +164,16 @@ task testAgainstOneThirdUpgradedCluster(type: StandaloneRestIntegTestTask) { } } - // Excluding the test because we introduce this feature in 2.13 - if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){ + // Excluding the tests because we introduce these features in 2.13 + if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow" - } - } - - // Excluding the text chunking processor test because we introduce this feature in 2.13 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){ - filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*" } } // Excluding the k-NN radial search and batch ingestion tests because we introduce these features in 2.14 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){ + if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*" excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*" @@ -185,14 +181,18 @@ task testAgainstOneThirdUpgradedCluster(type: StandaloneRestIntegTestTask) { } // Excluding the neural sparse two phase processor test because we introduce this feature in 2.15 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") - || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") - || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" } } + // Excluding the batching processor tests because we introduce this feature in 2.16 + if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*" + } + } nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") @@ -220,7 +220,7 @@ task testAgainstTwoThirdsUpgradedCluster(type: StandaloneRestIntegTestTask) { // Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10 // because these features were released in 2.11 version. - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){ + if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*" excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*" @@ -238,22 +238,16 @@ task testAgainstTwoThirdsUpgradedCluster(type: StandaloneRestIntegTestTask) { } } - // Excluding the test because we introduce this feature in 2.13 - if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){ + // Excluding the tests because we introduce these features in 2.13 + if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow" - } - } - - // Excluding the text chunking processor test because we introduce this feature in 2.13 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){ - filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*" } } // Excluding the k-NN radial search and batch ingestion tests because we introduce these features in 2.14 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){ + if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*" excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*" @@ -261,14 +255,18 @@ task testAgainstTwoThirdsUpgradedCluster(type: StandaloneRestIntegTestTask) { } // Excluding the neural sparse two phase processor test because we introduce this feature in 2.15 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") - || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") - || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" } } + // Excluding the batching processor tests because we introduce this feature in 2.16 + if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*" + } + } nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") @@ -296,7 +294,7 @@ task testRollingUpgrade(type: StandaloneRestIntegTestTask) { //Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10 // because these features were released in 2.11 version. - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){ + if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*" excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*" @@ -314,22 +312,16 @@ task testRollingUpgrade(type: StandaloneRestIntegTestTask) { } } - // Excluding the test because we introduce this feature in 2.13 - if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){ + // Excluding the tests because we introduce these features in 2.13 + if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow" - } - } - - // Excluding the text chunking processor test because we introduce this feature in 2.13 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){ - filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*" } } // Excluding the k-NN radial search and batch ingestion tests because we introduce these features in 2.14 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){ + if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*" excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*" @@ -337,14 +329,19 @@ task testRollingUpgrade(type: StandaloneRestIntegTestTask) { } // Excluding the neural sparse two phase processor test because we introduce this feature in 2.15 - if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") - || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") - || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){ filter { excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" } } + // Excluding the batching processor tests because we introduce this feature in 2.16 + if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*" + } + } + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRollingUpgradeTestCase.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRollingUpgradeTestCase.java index 00fd9ade4..c3fd8228b 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRollingUpgradeTestCase.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/AbstractRollingUpgradeTestCase.java @@ -102,7 +102,7 @@ protected String registerModelGroupAndGetModelId(String requestBody) throws Exce protected void createPipelineProcessor(String modelId, String pipelineName) throws Exception { String requestBody = Files.readString(Path.of(classLoader.getResource("processor/PipelineConfiguration.json").toURI())); - createPipelineProcessor(requestBody, pipelineName, modelId); + createPipelineProcessor(requestBody, pipelineName, modelId, null); } protected String uploadTextImageEmbeddingModel() throws Exception { @@ -114,7 +114,7 @@ protected void createPipelineForTextImageProcessor(String modelId, String pipeli String requestBody = Files.readString( Path.of(classLoader.getResource("processor/PipelineForTextImageProcessorConfiguration.json").toURI()) ); - createPipelineProcessor(requestBody, pipelineName, modelId); + createPipelineProcessor(requestBody, pipelineName, modelId, null); } protected String uploadSparseEncodingModel() throws Exception { @@ -124,11 +124,23 @@ protected String uploadSparseEncodingModel() throws Exception { return registerModelGroupAndGetModelId(requestBody); } - protected void createPipelineForSparseEncodingProcessor(String modelId, String pipelineName) throws Exception { + protected void createPipelineForSparseEncodingProcessor(String modelId, String pipelineName, Integer batchSize) throws Exception { String requestBody = Files.readString( Path.of(classLoader.getResource("processor/PipelineForSparseEncodingProcessorConfiguration.json").toURI()) ); - createPipelineProcessor(requestBody, pipelineName, modelId); + final String batchSizeTag = "{{batch_size}}"; + if (requestBody.contains(batchSizeTag)) { + if (batchSize != null) { + requestBody = requestBody.replace(batchSizeTag, String.format(LOCALE, "\n\"batch_size\": %d,\n", batchSize)); + } else { + requestBody = requestBody.replace(batchSizeTag, ""); + } + } + createPipelineProcessor(requestBody, pipelineName, modelId, null); + } + + protected void createPipelineForSparseEncodingProcessor(String modelId, String pipelineName) throws Exception { + createPipelineForSparseEncodingProcessor(modelId, pipelineName, null); } @Override @@ -143,6 +155,6 @@ protected void createPipelineForTextChunkingProcessor(String pipelineName) throw String requestBody = Files.readString( Path.of(classLoader.getResource("processor/PipelineForTextChunkingProcessorConfiguration.json").toURI()) ); - createPipelineProcessor(requestBody, pipelineName, ""); + createPipelineProcessor(requestBody, pipelineName, "", null); } } diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/BatchIngestionIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/BatchIngestionIT.java index 3052b48cd..e57802816 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/BatchIngestionIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/BatchIngestionIT.java @@ -28,21 +28,21 @@ public void testBatchIngestion_SparseEncodingProcessor_E2EFlow() throws Exceptio case OLD: sparseModelId = uploadSparseEncodingModel(); loadModel(sparseModelId); - createPipelineForSparseEncodingProcessor(sparseModelId, SPARSE_PIPELINE); + createPipelineForSparseEncodingProcessor(sparseModelId, SPARSE_PIPELINE, 2); createIndexWithConfiguration( indexName, Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())), SPARSE_PIPELINE ); List> docs = prepareDataForBulkIngestion(0, 5); - bulkAddDocuments(indexName, TEXT_FIELD_NAME, SPARSE_PIPELINE, docs, 2); + bulkAddDocuments(indexName, TEXT_FIELD_NAME, SPARSE_PIPELINE, docs); validateDocCountAndInfo(indexName, 5, () -> getDocById(indexName, "4"), EMBEDDING_FIELD_NAME, Map.class); break; case MIXED: sparseModelId = TestUtils.getModelId(getIngestionPipeline(SPARSE_PIPELINE), SPARSE_ENCODING_PROCESSOR); loadModel(sparseModelId); List> docsForMixed = prepareDataForBulkIngestion(5, 5); - bulkAddDocuments(indexName, TEXT_FIELD_NAME, SPARSE_PIPELINE, docsForMixed, 3); + bulkAddDocuments(indexName, TEXT_FIELD_NAME, SPARSE_PIPELINE, docsForMixed); validateDocCountAndInfo(indexName, 10, () -> getDocById(indexName, "9"), EMBEDDING_FIELD_NAME, Map.class); break; case UPGRADED: @@ -50,7 +50,7 @@ public void testBatchIngestion_SparseEncodingProcessor_E2EFlow() throws Exceptio sparseModelId = TestUtils.getModelId(getIngestionPipeline(SPARSE_PIPELINE), SPARSE_ENCODING_PROCESSOR); loadModel(sparseModelId); List> docsForUpgraded = prepareDataForBulkIngestion(10, 5); - bulkAddDocuments(indexName, TEXT_FIELD_NAME, SPARSE_PIPELINE, docsForUpgraded, 2); + bulkAddDocuments(indexName, TEXT_FIELD_NAME, SPARSE_PIPELINE, docsForUpgraded); validateDocCountAndInfo(indexName, 15, () -> getDocById(indexName, "14"), EMBEDDING_FIELD_NAME, Map.class); } finally { wipeOfTestResources(indexName, SPARSE_PIPELINE, sparseModelId, null); diff --git a/qa/rolling-upgrade/src/test/resources/processor/PipelineForSparseEncodingProcessorConfiguration.json b/qa/rolling-upgrade/src/test/resources/processor/PipelineForSparseEncodingProcessorConfiguration.json index fe885a0a2..a597c2939 100644 --- a/qa/rolling-upgrade/src/test/resources/processor/PipelineForSparseEncodingProcessorConfiguration.json +++ b/qa/rolling-upgrade/src/test/resources/processor/PipelineForSparseEncodingProcessorConfiguration.json @@ -4,6 +4,7 @@ { "sparse_encoding": { "model_id": "%s", + "batch_size": "%d", "field_map": { "passage_text": "passage_embedding" } diff --git a/src/test/java/org/opensearch/neuralsearch/processor/TextChunkingProcessorIT.java b/src/test/java/org/opensearch/neuralsearch/processor/TextChunkingProcessorIT.java index 0293c1398..b41bd961d 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/TextChunkingProcessorIT.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/TextChunkingProcessorIT.java @@ -196,7 +196,7 @@ private void createPipelineProcessor(String pipelineName) throws Exception { URL pipelineURLPath = classLoader.getResource(PIPELINE_CONFIGS_BY_NAME.get(pipelineName)); Objects.requireNonNull(pipelineURLPath); String requestBody = Files.readString(Path.of(pipelineURLPath.toURI())); - createPipelineProcessor(requestBody, pipelineName, ""); + createPipelineProcessor(requestBody, pipelineName, "", null); } private void createTextChunkingIndex(String indexName, String pipelineName) throws Exception { diff --git a/src/test/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessorIT.java b/src/test/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessorIT.java index 0ef9dda40..c00423b1f 100644 --- a/src/test/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessorIT.java +++ b/src/test/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessorIT.java @@ -82,9 +82,9 @@ public void testTextEmbeddingProcessor_batch() throws Exception { try { modelId = uploadTextEmbeddingModel(); loadModel(modelId); - createPipelineProcessor(modelId, PIPELINE_NAME, ProcessorType.TEXT_EMBEDDING); + createPipelineProcessor(modelId, PIPELINE_NAME, ProcessorType.TEXT_EMBEDDING, 2); createTextEmbeddingIndex(); - ingestBatchDocumentWithBulk("batch_", 2, 2, Collections.emptySet(), Collections.emptySet()); + ingestBatchDocumentWithBulk("batch_", 2, Collections.emptySet(), Collections.emptySet()); assertEquals(2, getDocCount(INDEX_NAME)); ingestDocument(String.format(LOCALE, INGEST_DOC1, "success"), "1"); @@ -183,10 +183,10 @@ public void testTextEmbeddingProcessor_withBatchSizeInProcessor() throws Excepti URL pipelineURLPath = classLoader.getResource("processor/PipelineConfigurationWithBatchSize.json"); Objects.requireNonNull(pipelineURLPath); String requestBody = Files.readString(Path.of(pipelineURLPath.toURI())); - createPipelineProcessor(requestBody, PIPELINE_NAME, modelId); + createPipelineProcessor(requestBody, PIPELINE_NAME, modelId, null); createTextEmbeddingIndex(); int docCount = 5; - ingestBatchDocumentWithBulk("batch_", docCount, docCount, Collections.emptySet(), Collections.emptySet()); + ingestBatchDocumentWithBulk("batch_", docCount, Collections.emptySet(), Collections.emptySet()); assertEquals(5, getDocCount(INDEX_NAME)); for (int i = 0; i < docCount; ++i) { @@ -215,10 +215,10 @@ public void testTextEmbeddingProcessor_withFailureAndSkip() throws Exception { URL pipelineURLPath = classLoader.getResource("processor/PipelineConfigurationWithBatchSize.json"); Objects.requireNonNull(pipelineURLPath); String requestBody = Files.readString(Path.of(pipelineURLPath.toURI())); - createPipelineProcessor(requestBody, PIPELINE_NAME, modelId); + createPipelineProcessor(requestBody, PIPELINE_NAME, modelId, null); createTextEmbeddingIndex(); int docCount = 5; - ingestBatchDocumentWithBulk("batch_", docCount, docCount, Set.of(0), Set.of(1)); + ingestBatchDocumentWithBulk("batch_", docCount, Set.of(0), Set.of(1)); assertEquals(3, getDocCount(INDEX_NAME)); for (int i = 2; i < docCount; ++i) { @@ -275,7 +275,7 @@ private void ingestDocument(String doc, String id) throws Exception { assertEquals("created", map.get("result")); } - private void ingestBatchDocumentWithBulk(String idPrefix, int docCount, int batchSize, Set failedIds, Set droppedIds) + private void ingestBatchDocumentWithBulk(String idPrefix, int docCount, Set failedIds, Set droppedIds) throws Exception { StringBuilder payloadBuilder = new StringBuilder(); for (int i = 0; i < docCount; ++i) { @@ -295,7 +295,6 @@ private void ingestBatchDocumentWithBulk(String idPrefix, int docCount, int batc final String payload = payloadBuilder.toString(); Map params = new HashMap<>(); params.put("refresh", "true"); - params.put("batch_size", String.valueOf(batchSize)); Response response = makeRequest( client(), "POST", diff --git a/src/test/resources/processor/PipelineConfiguration.json b/src/test/resources/processor/PipelineConfiguration.json index d833576a0..65dce44a2 100644 --- a/src/test/resources/processor/PipelineConfiguration.json +++ b/src/test/resources/processor/PipelineConfiguration.json @@ -4,6 +4,7 @@ { "text_embedding": { "model_id": "%s", + "batch_size": "%d", "field_map": { "title": "title_knn", "favor_list": "favor_list_knn", diff --git a/src/test/resources/processor/SparseEncodingPipelineConfiguration.json b/src/test/resources/processor/SparseEncodingPipelineConfiguration.json index 04a4baf80..4166e2082 100644 --- a/src/test/resources/processor/SparseEncodingPipelineConfiguration.json +++ b/src/test/resources/processor/SparseEncodingPipelineConfiguration.json @@ -4,6 +4,7 @@ { "sparse_encoding": { "model_id": "%s", + "batch_size": "%d", "field_map": { "title": "title_sparse", "favor_list": "favor_list_sparse", diff --git a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java index 5d8e79e72..323f9be9d 100644 --- a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java +++ b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java @@ -296,17 +296,31 @@ protected void createIndexWithConfiguration(final String indexName, String index protected void createPipelineProcessor(final String modelId, final String pipelineName, final ProcessorType processorType) throws Exception { + createPipelineProcessor(modelId, pipelineName, processorType, null); + } + + protected void createPipelineProcessor( + final String modelId, + final String pipelineName, + final ProcessorType processorType, + final Integer batchSize + ) throws Exception { String requestBody = Files.readString(Path.of(classLoader.getResource(PIPELINE_CONFIGS_BY_TYPE.get(processorType)).toURI())); - createPipelineProcessor(requestBody, pipelineName, modelId); + createPipelineProcessor(requestBody, pipelineName, modelId, batchSize); } - protected void createPipelineProcessor(final String requestBody, final String pipelineName, final String modelId) throws Exception { + protected void createPipelineProcessor( + final String requestBody, + final String pipelineName, + final String modelId, + final Integer batchSize + ) throws Exception { Response pipelineCreateResponse = makeRequest( client(), "PUT", "/_ingest/pipeline/" + pipelineName, null, - toHttpEntity(String.format(LOCALE, requestBody, modelId)), + toHttpEntity(String.format(LOCALE, requestBody, modelId, batchSize == null ? 1 : batchSize)), ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, DEFAULT_USER_AGENT)) ); Map node = XContentHelper.convertToMap( @@ -750,13 +764,8 @@ protected void addSparseEncodingDoc( assertEquals(request.getEndpoint() + ": failed", RestStatus.CREATED, RestStatus.fromCode(response.getStatusLine().getStatusCode())); } - protected void bulkAddDocuments( - final String index, - final String textField, - final String pipeline, - final List> docs, - final int batchSize - ) throws IOException, ParseException { + protected void bulkAddDocuments(final String index, final String textField, final String pipeline, final List> docs) + throws IOException { StringBuilder builder = new StringBuilder(); for (int i = 0; i < docs.size(); ++i) { String doc = String.format( @@ -770,10 +779,7 @@ protected void bulkAddDocuments( builder.append(doc); builder.append("\n"); } - Request request = new Request( - "POST", - String.format(Locale.ROOT, "/_bulk?refresh=true&pipeline=%s&batch_size=%d", pipeline, batchSize) - ); + Request request = new Request("POST", String.format(Locale.ROOT, "/_bulk?refresh=true&pipeline=%s", pipeline)); request.setJsonEntity(builder.toString()); Response response = client().performRequest(request);