diff --git a/CHANGELOG.md b/CHANGELOG.md index 164b42253..5f6eca41c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix for missing HybridQuery results when concurrent segment search is enabled ([#800](https://github.com/opensearch-project/neural-search/pull/800)) ### Infrastructure - Add BWC for batch ingestion ([#769](https://github.com/opensearch-project/neural-search/pull/769)) +- Add backward test cases for neural sparse two phase processor ([#777](https://github.com/opensearch-project/neural-search/pull/777)) ### Documentation ### Maintenance ### Refactoring diff --git a/qa/restart-upgrade/build.gradle b/qa/restart-upgrade/build.gradle index dc6e4504e..c5badd248 100644 --- a/qa/restart-upgrade/build.gradle +++ b/qa/restart-upgrade/build.gradle @@ -98,6 +98,13 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) { } } + // Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15 + if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" + } + } + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' @@ -155,6 +162,13 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) { } } + // Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15 + if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" + } + } + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java new file mode 100644 index 000000000..4b00a7916 --- /dev/null +++ b/qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java @@ -0,0 +1,65 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ +package org.opensearch.neuralsearch.bwc; + +import org.opensearch.common.settings.Settings; +import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder; +import org.opensearch.neuralsearch.util.TestUtils; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER; +import static org.opensearch.neuralsearch.util.TestUtils.SPARSE_ENCODING_PROCESSOR; + +public class NeuralSparseTwoPhaseProcessorIT extends AbstractRestartUpgradeRestTestCase { + + private static final String NEURAL_SPARSE_INGEST_PIPELINE_NAME = "nstp-nlp-ingest-pipeline-dense"; + private static final String NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME = "nstp-nlp-two-phase-search-pipeline-sparse"; + private static final String TEST_ENCODING_FIELD = "passage_embedding"; + private static final String TEST_TEXT_FIELD = "passage_text"; + private static final String TEXT_1 = "Hello world a b"; + + public void testNeuralSparseQueryTwoPhaseProcessor_NeuralSearch_E2EFlow() throws Exception { + waitForClusterHealthGreen(NODES_BWC_CLUSTER); + NeuralSparseQueryBuilder neuralSparseQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1); + if (isRunningAgainstOldCluster()) { + String modelId = uploadSparseEncodingModel(); + loadModel(modelId); + neuralSparseQueryBuilder.modelId(modelId); + createPipelineForSparseEncodingProcessor(modelId, NEURAL_SPARSE_INGEST_PIPELINE_NAME); + createIndexWithConfiguration( + getIndexNameForTest(), + Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())), + NEURAL_SPARSE_INGEST_PIPELINE_NAME + ); + addSparseEncodingDoc(getIndexNameForTest(), "0", List.of(), List.of(), List.of(TEST_TEXT_FIELD), List.of(TEXT_1)); + createNeuralSparseTwoPhaseSearchProcessor(NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME); + updateIndexSettings( + getIndexNameForTest(), + Settings.builder().put("index.search.default_pipeline", NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME) + ); + Object resultWith2PhasePipeline = search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits"); + assertNotNull(resultWith2PhasePipeline); + } else { + String modelId = null; + try { + modelId = TestUtils.getModelId(getIngestionPipeline(NEURAL_SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR); + loadModel(modelId); + neuralSparseQueryBuilder.modelId(modelId); + Object resultWith2PhasePipeline = search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits"); + assertNotNull(resultWith2PhasePipeline); + } finally { + wipeOfTestResources( + getIndexNameForTest(), + NEURAL_SPARSE_INGEST_PIPELINE_NAME, + modelId, + NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME + ); + } + } + } +} diff --git a/qa/restart-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json b/qa/restart-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json new file mode 100644 index 000000000..45e435268 --- /dev/null +++ b/qa/restart-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json @@ -0,0 +1,16 @@ +{ + "request_processors": [ + { + "neural_sparse_two_phase_processor": { + "tag": "neural-sparse", + "description": "This processor is making two-phase rescorer.", + "enabled": true, + "two_phase_parameter": { + "prune_ratio": %f, + "expansion_rate": %f, + "max_window_size": %d + } + } + } + ] +} diff --git a/qa/rolling-upgrade/build.gradle b/qa/rolling-upgrade/build.gradle index 1146bac19..285e65093 100644 --- a/qa/rolling-upgrade/build.gradle +++ b/qa/rolling-upgrade/build.gradle @@ -98,6 +98,16 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) { } } + // Excluding the neural sparse two phase processor test because we introduce this feature in 2.15 + if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") + || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") + || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" + } + } + + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' @@ -156,6 +166,16 @@ task testAgainstOneThirdUpgradedCluster(type: StandaloneRestIntegTestTask) { } } + // Excluding the neural sparse two phase processor test because we introduce this feature in 2.15 + if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") + || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") + || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" + } + } + + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' @@ -213,6 +233,16 @@ task testAgainstTwoThirdsUpgradedCluster(type: StandaloneRestIntegTestTask) { } } + // Excluding the neural sparse two phase processor test because we introduce this feature in 2.15 + if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") + || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") + || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" + } + } + + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' @@ -270,6 +300,15 @@ task testRollingUpgrade(type: StandaloneRestIntegTestTask) { } } + // Excluding the neural sparse two phase processor test because we introduce this feature in 2.15 + if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") + || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") + || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){ + filter { + excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*" + } + } + nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}") nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}") systemProperty 'tests.security.manager', 'false' diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java new file mode 100644 index 000000000..c95ee93e0 --- /dev/null +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/NeuralSparseTwoPhaseProcessorIT.java @@ -0,0 +1,78 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ +package org.opensearch.neuralsearch.bwc; + +import org.opensearch.common.settings.Settings; +import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder; +import org.opensearch.neuralsearch.util.TestUtils; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER; +import static org.opensearch.neuralsearch.util.TestUtils.SPARSE_ENCODING_PROCESSOR; + +public class NeuralSparseTwoPhaseProcessorIT extends AbstractRollingUpgradeTestCase { + // add prefix to avoid conflicts with other IT class, since don't wipe resources after first round + private static final String SPARSE_INGEST_PIPELINE_NAME = "nstp-nlp-ingest-pipeline-sparse"; + private static final String SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME = "nstp-nlp-two-phase-search-pipeline-sparse"; + private static final String TEST_ENCODING_FIELD = "passage_embedding"; + private static final String TEST_TEXT_FIELD = "passage_text"; + private static final String TEXT_1 = "Hello world a b"; + private String sparseModelId = ""; + + // test of NeuralSparseTwoPhaseProcessor supports neural_sparse query's two phase speed up + // the feature is introduced from 2.15 + public void testNeuralSparseTwoPhaseProcessorIT_NeuralSparseSearch_E2EFlow() throws Exception { + waitForClusterHealthGreen(NODES_BWC_CLUSTER); + // will set the model_id after we obtain the id + NeuralSparseQueryBuilder neuralSparseQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1); + + switch (getClusterType()) { + case OLD: + sparseModelId = uploadSparseEncodingModel(); + loadModel(sparseModelId); + neuralSparseQueryBuilder.modelId(sparseModelId); + createPipelineForSparseEncodingProcessor(sparseModelId, SPARSE_INGEST_PIPELINE_NAME); + createIndexWithConfiguration( + getIndexNameForTest(), + Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())), + SPARSE_INGEST_PIPELINE_NAME + ); + addSparseEncodingDoc(getIndexNameForTest(), "0", List.of(), List.of(), List.of(TEST_TEXT_FIELD), List.of(TEXT_1)); + createNeuralSparseTwoPhaseSearchProcessor(SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME); + updateIndexSettings( + getIndexNameForTest(), + Settings.builder().put("index.search.default_pipeline", SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME) + ); + assertNotNull(search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits")); + break; + case MIXED: + sparseModelId = TestUtils.getModelId(getIngestionPipeline(SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR); + loadModel(sparseModelId); + neuralSparseQueryBuilder.modelId(sparseModelId); + assertNotNull(search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits")); + break; + case UPGRADED: + try { + sparseModelId = TestUtils.getModelId(getIngestionPipeline(SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR); + loadModel(sparseModelId); + neuralSparseQueryBuilder.modelId(sparseModelId); + assertNotNull(search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits")); + } finally { + wipeOfTestResources( + getIndexNameForTest(), + SPARSE_INGEST_PIPELINE_NAME, + sparseModelId, + SPARSE_SEARCH_TWO_PHASE_PIPELINE_NAME + ); + } + break; + default: + throw new IllegalStateException("Unexpected value: " + getClusterType()); + } + } +} diff --git a/qa/rolling-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json b/qa/rolling-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json new file mode 100644 index 000000000..45e435268 --- /dev/null +++ b/qa/rolling-upgrade/src/test/resources/processor/NeuralSparseTwoPhaseProcessorConfiguration.json @@ -0,0 +1,16 @@ +{ + "request_processors": [ + { + "neural_sparse_two_phase_processor": { + "tag": "neural-sparse", + "description": "This processor is making two-phase rescorer.", + "enabled": true, + "two_phase_parameter": { + "prune_ratio": %f, + "expansion_rate": %f, + "max_window_size": %d + } + } + } + ] +} diff --git a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryTests.java b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryTests.java index b74bd010c..afb9ecb44 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/HybridQueryTests.java +++ b/src/test/java/org/opensearch/neuralsearch/query/HybridQueryTests.java @@ -39,6 +39,7 @@ import org.opensearch.index.query.QueryBuilders; import org.opensearch.index.query.QueryShardContext; import org.opensearch.knn.index.SpaceType; +import org.opensearch.knn.index.VectorDataType; import org.opensearch.knn.index.mapper.KNNVectorFieldMapper; import org.opensearch.knn.index.query.KNNQueryBuilder; @@ -119,6 +120,7 @@ public void testRewrite_whenRewriteQuery_thenSuccessful() { when(mockKNNVectorField.getDimension()).thenReturn(4); when(mockQueryShardContext.fieldMapper(eq(VECTOR_FIELD_NAME))).thenReturn(mockKNNVectorField); when(mockKNNVectorField.getSpaceType()).thenReturn(SpaceType.L2); + when(mockKNNVectorField.getVectorDataType()).thenReturn(VectorDataType.FLOAT); KNNQueryBuilder knnQueryBuilder = new KNNQueryBuilder(VECTOR_FIELD_NAME, VECTOR_QUERY, K); Query knnQuery = knnQueryBuilder.toQuery(mockQueryShardContext); diff --git a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java index 7e2baef85..dd1dac432 100644 --- a/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java +++ b/src/testFixtures/java/org/opensearch/neuralsearch/BaseNeuralSearchIT.java @@ -867,7 +867,7 @@ protected float computeExpectedScore( final String queryText ) { float[] queryVector = runInference(modelId, queryText); - return spaceType.getVectorSimilarityFunction().compare(queryVector, indexVector); + return spaceType.getKnnVectorSimilarityFunction().compare(queryVector, indexVector); } protected Map getTaskQueryResponse(final String taskId) throws Exception {