Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] Support default model id in neural_sparse query #637

Merged
merged 5 commits into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),

## [Unreleased 2.x](https://github.com/opensearch-project/neural-search/compare/2.12...2.x)
### Features
- Enabled support for applying default modelId in neural sparse query ([#614](https://github.com/opensearch-project/neural-search/pull/614)
### Enhancements
- Adding aggregations in hybrid query ([#630](https://github.com/opensearch-project/neural-search/pull/630))
- Support for post filter in hybrid query ([#633](https://github.com/opensearch-project/neural-search/pull/633))
Expand Down
22 changes: 20 additions & 2 deletions qa/restart-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,21 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.skip_delete_model_index', 'true'
systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT tests from neural search version 2.9 and 2.10 because these features were released in 2.11 version.
//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
}
}

Expand Down Expand Up @@ -98,12 +107,21 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.is_old_cluster', 'false'
systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT tests from neural search version 2.9 and 2.10 because these features were released in 2.11 version.
//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;

import static org.opensearch.neuralsearch.TestUtils.NODES_BWC_CLUSTER;
import static org.opensearch.neuralsearch.TestUtils.SPARSE_ENCODING_PROCESSOR;
import static org.opensearch.neuralsearch.TestUtils.TEXT_EMBEDDING_PROCESSOR;

import org.opensearch.common.settings.Settings;
import org.opensearch.neuralsearch.TestUtils;
import org.opensearch.neuralsearch.query.NeuralQueryBuilder;
import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;

public class NeuralQueryEnricherProcessorIT extends AbstractRestartUpgradeRestTestCase {
// add prefix to avoid conflicts with other IT class, since we don't wipe resources after first round
private static final String SPARSE_INGEST_PIPELINE_NAME = "nqep-nlp-ingest-pipeline-sparse";
private static final String DENSE_INGEST_PIPELINE_NAME = "nqep-nlp-ingest-pipeline-dense";
private static final String SPARSE_SEARCH_PIPELINE_NAME = "nqep-nlp-search-pipeline-sparse";
private static final String DENSE_SEARCH_PIPELINE_NAME = "nqep-nlp-search-pipeline-dense";
private static final String TEST_ENCODING_FIELD = "passage_embedding";
private static final String TEST_TEXT_FIELD = "passage_text";
private static final String TEXT_1 = "Hello world a b";

// Test restart-upgrade neural_query_enricher in restart-upgrade scenario
public void testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
NeuralSparseQueryBuilder sparseEncodingQueryBuilderWithoutModelId = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD)
.queryText(TEXT_1);
// will set the model_id after we obtain the id
NeuralSparseQueryBuilder sparseEncodingQueryBuilderWithModelId = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD)
.queryText(TEXT_1);

if (isRunningAgainstOldCluster()) {
String modelId = uploadSparseEncodingModel();
loadModel(modelId);
sparseEncodingQueryBuilderWithModelId.modelId(modelId);
createPipelineForSparseEncodingProcessor(modelId, SPARSE_INGEST_PIPELINE_NAME);
createIndexWithConfiguration(
getIndexNameForTest(),
Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())),
SPARSE_INGEST_PIPELINE_NAME
);

addSparseEncodingDoc(getIndexNameForTest(), "0", List.of(), List.of(), List.of(TEST_TEXT_FIELD), List.of(TEXT_1));

createSearchRequestProcessor(modelId, SPARSE_SEARCH_PIPELINE_NAME);
updateIndexSettings(
getIndexNameForTest(),
Settings.builder().put("index.search.default_pipeline", SPARSE_SEARCH_PIPELINE_NAME)
);
} else {
String modelId = null;
try {
modelId = TestUtils.getModelId(getIngestionPipeline(SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR);
loadModel(modelId);
sparseEncodingQueryBuilderWithModelId.modelId(modelId);
assertEquals(
search(getIndexNameForTest(), sparseEncodingQueryBuilderWithoutModelId, 1).get("hits"),
search(getIndexNameForTest(), sparseEncodingQueryBuilderWithModelId, 1).get("hits")
);
} finally {
wipeOfTestResources(getIndexNameForTest(), SPARSE_INGEST_PIPELINE_NAME, modelId, SPARSE_SEARCH_PIPELINE_NAME);
}
}
}

public void testNeuralQueryEnricherProcessor_NeuralSearch_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
NeuralQueryBuilder neuralQueryBuilderWithoutModelId = new NeuralQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1);
NeuralQueryBuilder neuralQueryBuilderWithModelId = new NeuralQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1);

if (isRunningAgainstOldCluster()) {
String modelId = uploadTextEmbeddingModel();
loadModel(modelId);
neuralQueryBuilderWithModelId.modelId(modelId);
createPipelineProcessor(modelId, DENSE_INGEST_PIPELINE_NAME);
createIndexWithConfiguration(
getIndexNameForTest(),
Files.readString(Path.of(classLoader.getResource("processor/IndexMappingMultipleShard.json").toURI())),
DENSE_INGEST_PIPELINE_NAME
);

addDocument(getIndexNameForTest(), "0", TEST_TEXT_FIELD, TEXT_1, null, null);

createSearchRequestProcessor(modelId, DENSE_SEARCH_PIPELINE_NAME);
updateIndexSettings(getIndexNameForTest(), Settings.builder().put("index.search.default_pipeline", DENSE_SEARCH_PIPELINE_NAME));
assertEquals(
search(getIndexNameForTest(), neuralQueryBuilderWithoutModelId, 1).get("hits"),
search(getIndexNameForTest(), neuralQueryBuilderWithModelId, 1).get("hits")
);
} else {
String modelId = null;
try {
modelId = TestUtils.getModelId(getIngestionPipeline(DENSE_INGEST_PIPELINE_NAME), TEXT_EMBEDDING_PROCESSOR);
loadModel(modelId);
neuralQueryBuilderWithModelId.modelId(modelId);

assertEquals(
search(getIndexNameForTest(), neuralQueryBuilderWithoutModelId, 1).get("hits"),
search(getIndexNameForTest(), neuralQueryBuilderWithModelId, 1).get("hits")
);
} finally {
wipeOfTestResources(getIndexNameForTest(), DENSE_INGEST_PIPELINE_NAME, modelId, DENSE_SEARCH_PIPELINE_NAME);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"request_processors": [
{
"neural_query_enricher": {
"tag": "tag1",
"description": "This processor is going to restrict to publicly visible documents",
"default_model_id": "%s"
}
}
]
}
44 changes: 40 additions & 4 deletions qa/rolling-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,21 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version
systemProperty 'tests.skip_delete_model_index', 'true'

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT tests from neural search version 2.9 and 2.10 because these features were released in 2.11 version.
//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
}
}

Expand Down Expand Up @@ -99,12 +108,21 @@ task testAgainstOneThirdUpgradedCluster(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.skip_delete_model_index', 'true'
systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT tests from neural search version 2.9 and 2.10 because these features were released in 2.11 version.
//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
}
}

Expand Down Expand Up @@ -132,12 +150,21 @@ task testAgainstTwoThirdsUpgradedCluster(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.skip_delete_model_index', 'true'
systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT tests from neural search version 2.9 and 2.10 because these features were released in 2.11 version.
//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
}
}

Expand Down Expand Up @@ -165,12 +192,21 @@ task testRollingUpgrade(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.skip_delete_model_index', 'true'
systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT tests from neural search version 2.9 and 2.10 because these features were released in 2.11 version.
//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the test because we introduce this feature in 2.13
if (ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
}
}

Expand Down
Loading
Loading