Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add hybrid search with rescore IT #1066

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions qa/restart-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this check is not needed in main, latest 2.x which is 2.18 already supports rescore.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer to keep it so that the code looks same between main and 2.x

}
}

Expand Down Expand Up @@ -166,6 +167,7 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchWithRescoreIT.*"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above

}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Objects;

import static org.opensearch.neuralsearch.util.TestUtils.DEFAULT_COMBINATION_METHOD;
import static org.opensearch.neuralsearch.util.TestUtils.DEFAULT_NORMALIZATION_METHOD;
import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER;
import static org.opensearch.neuralsearch.util.TestUtils.PARAM_NAME_WEIGHTS;
import static org.opensearch.neuralsearch.util.TestUtils.TEXT_EMBEDDING_PROCESSOR;
import static org.opensearch.neuralsearch.util.TestUtils.getModelId;

import org.opensearch.index.query.MatchQueryBuilder;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryBuilders;
import org.opensearch.knn.index.query.rescore.RescoreContext;
import org.opensearch.neuralsearch.query.HybridQueryBuilder;
import org.opensearch.neuralsearch.query.NeuralQueryBuilder;

public class HybridSearchWithRescoreIT extends AbstractRestartUpgradeRestTestCase {
private static final String PIPELINE_NAME = "nlp-hybrid-with-rescore-pipeline";
private static final String SEARCH_PIPELINE_NAME = "nlp-search-with_rescore-pipeline";
private static final String TEST_FIELD = "passage_text";
private static final String TEXT = "Hello world";
private static final String TEXT_UPGRADED = "Hi earth";
private static final String QUERY = "Hi world";
private static final int NUM_DOCS_PER_ROUND = 1;
private static final String VECTOR_EMBEDDING_FIELD = "passage_embedding";
protected static final String RESCORE_QUERY = "hi";

/**
* Test normalization with hybrid query and rescore. This test is required as rescore will not be compatible with version lower than 2.15
*/
public void testHybridQueryWithRescore_whenIndexWithMultipleShards_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);

if (isRunningAgainstOldCluster()) {
String modelId = uploadTextEmbeddingModel();
loadModel(modelId);
createPipelineProcessor(modelId, PIPELINE_NAME);
createIndexWithConfiguration(
getIndexNameForTest(),
Files.readString(Path.of(classLoader.getResource("processor/IndexMappingMultipleShard.json").toURI())),
PIPELINE_NAME
);
addDocument(getIndexNameForTest(), "0", TEST_FIELD, TEXT, null, null);
createSearchPipeline(
SEARCH_PIPELINE_NAME,
DEFAULT_NORMALIZATION_METHOD,
DEFAULT_COMBINATION_METHOD,
Map.of(PARAM_NAME_WEIGHTS, Arrays.toString(new float[] { 0.3f, 0.7f }))
);
} else {
String modelId = null;
try {
modelId = getModelId(getIngestionPipeline(PIPELINE_NAME), TEXT_EMBEDDING_PROCESSOR);
loadModel(modelId);
addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT_UPGRADED, null, null);
HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId, null, null);
QueryBuilder rescorer = QueryBuilders.matchQuery(TEST_FIELD, RESCORE_QUERY).boost(0.3f);
validateTestIndex(getIndexNameForTest(), hybridQueryBuilder, rescorer);
hybridQueryBuilder = getQueryBuilder(modelId, Map.of("ef_search", 100), RescoreContext.getDefault());
validateTestIndex(getIndexNameForTest(), hybridQueryBuilder, rescorer);
} finally {
wipeOfTestResources(getIndexNameForTest(), PIPELINE_NAME, modelId, null);
}
}
}

private void validateTestIndex(final String index, HybridQueryBuilder queryBuilder, QueryBuilder rescorer) {
int docCount = getDocCount(index);
assertEquals(2, docCount);
Map<String, Object> searchResponseAsMap = search(index, queryBuilder, rescorer, 1, Map.of("search_pipeline", SEARCH_PIPELINE_NAME));
assertNotNull(searchResponseAsMap);
int hits = getHitCount(searchResponseAsMap);
assertEquals(1, hits);
List<Double> scoresList = getNormalizationScoreList(searchResponseAsMap);
for (Double score : scoresList) {
assertTrue(0 <= score && score <= 2);
}
}

private HybridQueryBuilder getQueryBuilder(
final String modelId,
final Map<String, ?> methodParameters,
final RescoreContext rescoreContextForNeuralQuery
) {
NeuralQueryBuilder neuralQueryBuilder = NeuralQueryBuilder.builder()
.fieldName(VECTOR_EMBEDDING_FIELD)
.modelId(modelId)
.queryText(QUERY)
.k(5)
.build();
if (methodParameters != null) {
neuralQueryBuilder.methodParameters(methodParameters);
}
if (Objects.nonNull(rescoreContextForNeuralQuery)) {
neuralQueryBuilder.rescoreContext(rescoreContextForNeuralQuery);
}

MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("text", QUERY);

HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder();
hybridQueryBuilder.add(matchQueryBuilder);
hybridQueryBuilder.add(neuralQueryBuilder);

return hybridQueryBuilder;
}
}
163 changes: 0 additions & 163 deletions qa/rolling-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -71,46 +71,6 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version
systemProperty 'tests.skip_delete_model_index', 'true'

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the tests because we introduce these features in 2.13
if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
}
}

// Excluding the k-NN radial search and batch ingestion tests because we introduce these features in 2.14
if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
}
}

// Excluding the neural sparse two phase processor test because we introduce this feature in 2.15
if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}

// Excluding the batching processor tests because we introduce this feature in 2.16
if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand All @@ -136,47 +96,6 @@ task testAgainstOneThirdUpgradedCluster(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.skip_delete_model_index', 'true'
systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the tests because we introduce these features in 2.13
if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
}
}

// Excluding the k-NN radial search and batch ingestion tests because we introduce these features in 2.14
if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

// Excluding the neural sparse two phase processor test because we introduce this feature in 2.15
if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}

// Excluding the batching processor tests because we introduce this feature in 2.16
if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand All @@ -201,47 +120,6 @@ task testAgainstTwoThirdsUpgradedCluster(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.skip_delete_model_index', 'true'
systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version

// Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the tests because we introduce these features in 2.13
if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
}
}

// Excluding the k-NN radial search and batch ingestion tests because we introduce these features in 2.14
if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

// Excluding the neural sparse two phase processor test because we introduce this feature in 2.15
if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}

// Excluding the batching processor tests because we introduce this feature in 2.16
if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand All @@ -266,47 +144,6 @@ task testRollingUpgrade(type: StandaloneRestIntegTestTask) {
systemProperty 'tests.skip_delete_model_index', 'true'
systemProperty 'tests.plugin_bwc_version', ext.neural_search_bwc_version

//Excluding MultiModalSearchIT, HybridSearchIT, NeuralSparseSearchIT, NeuralQueryEnricherProcessorIT tests from neural search version 2.9 and 2.10
// because these features were released in 2.11 version.
if (versionsBelow2_11.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.*"
}
}

// Excluding the tests because we introduce these features in 2.13
if (versionsBelow2_13.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralQueryEnricherProcessorIT.testNeuralQueryEnricherProcessor_NeuralSparseSearch_E2EFlow"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.TextChunkingProcessorIT.*"
}
}

// Excluding the k-NN radial search and batch ingestion tests because we introduce these features in 2.14
if (versionsBelow2_14.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

// Excluding the neural sparse two phase processor test because we introduce this feature in 2.15
if (versionsBelow2_15.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}

// Excluding the batching processor tests because we introduce this feature in 2.16
if (versionsBelow2_16.any { ext.neural_search_bwc_version.startsWith(it) }){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

nonInputProperties.systemProperty('tests.rest.cluster', "${-> testClusters."${baseName}".allHttpSocketURI.join(",")}")
nonInputProperties.systemProperty('tests.clustername', "${-> testClusters."${baseName}".getName()}")
systemProperty 'tests.security.manager', 'false'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,7 @@ public void testNormalizationProcessor_whenIndexWithMultipleShards_E2EFlow() thr
if (isFirstMixedRound()) {
totalDocsCountMixed = NUM_DOCS_PER_ROUND;
HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId, null, null, null);
QueryBuilder rescorer = QueryBuilders.matchQuery(TEST_FIELD, RESCORE_QUERY).boost(0.3f);
validateTestIndexOnUpgrade(totalDocsCountMixed, modelId, hybridQueryBuilder, rescorer);
validateTestIndexOnUpgrade(totalDocsCountMixed, modelId, hybridQueryBuilder, null);
addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT_MIXED, null, null);
} else {
totalDocsCountMixed = 2 * NUM_DOCS_PER_ROUND;
Expand All @@ -84,10 +83,9 @@ public void testNormalizationProcessor_whenIndexWithMultipleShards_E2EFlow() thr
loadModel(modelId);
addDocument(getIndexNameForTest(), "2", TEST_FIELD, TEXT_UPGRADED, null, null);
HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId, null, null, null);
QueryBuilder rescorer = QueryBuilders.matchQuery(TEST_FIELD, RESCORE_QUERY).boost(0.3f);
validateTestIndexOnUpgrade(totalDocsCountUpgraded, modelId, hybridQueryBuilder, rescorer);
validateTestIndexOnUpgrade(totalDocsCountUpgraded, modelId, hybridQueryBuilder, null);
hybridQueryBuilder = getQueryBuilder(modelId, Boolean.FALSE, Map.of("ef_search", 100), RescoreContext.getDefault());
validateTestIndexOnUpgrade(totalDocsCountUpgraded, modelId, hybridQueryBuilder, rescorer);
validateTestIndexOnUpgrade(totalDocsCountUpgraded, modelId, hybridQueryBuilder, null);
} finally {
wipeOfTestResources(getIndexNameForTest(), PIPELINE_NAME, modelId, SEARCH_PIPELINE_NAME);
}
Expand Down
Loading
Loading