-
Notifications
You must be signed in to change notification settings - Fork 72
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
BWC tests for Multimodal Search, Hybrid Search and Neural Sparse Sear…
…ch (#560) Signed-off-by: Varun Jain <[email protected]>
- Loading branch information
1 parent
26699ea
commit 50a24b3
Showing
34 changed files
with
1,263 additions
and
303 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
129 changes: 129 additions & 0 deletions
129
qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/HybridSearchIT.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
package org.opensearch.neuralsearch.bwc; | ||
|
||
import java.io.IOException; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.Map; | ||
import org.opensearch.index.query.MatchQueryBuilder; | ||
import static org.opensearch.neuralsearch.TestUtils.getModelId; | ||
import static org.opensearch.neuralsearch.TestUtils.NODES_BWC_CLUSTER; | ||
import static org.opensearch.neuralsearch.TestUtils.PARAM_NAME_WEIGHTS; | ||
import static org.opensearch.neuralsearch.TestUtils.TEXT_EMBEDDING_PROCESSOR; | ||
import static org.opensearch.neuralsearch.TestUtils.DEFAULT_NORMALIZATION_METHOD; | ||
import static org.opensearch.neuralsearch.TestUtils.DEFAULT_COMBINATION_METHOD; | ||
import org.opensearch.neuralsearch.query.HybridQueryBuilder; | ||
import org.opensearch.neuralsearch.query.NeuralQueryBuilder; | ||
|
||
public class HybridSearchIT extends AbstractRestartUpgradeRestTestCase { | ||
private static final String PIPELINE_NAME = "nlp-hybrid-pipeline"; | ||
private static final String PIPELINE1_NAME = "nlp-hybrid-1-pipeline"; | ||
private static final String SEARCH_PIPELINE_NAME = "nlp-search-pipeline"; | ||
private static final String SEARCH_PIPELINE1_NAME = "nlp-search-1-pipeline"; | ||
private static final String TEST_FIELD = "passage_text"; | ||
private static final String TEXT_1 = "Hello world"; | ||
private static final String TEXT_2 = "Hi planet"; | ||
private static final String TEXT_3 = "Hi earth"; | ||
private static final String TEXT_4 = "Hi amazon"; | ||
private static final String TEXT_5 = "Hi mars"; | ||
private static final String TEXT_6 = "Hi opensearch"; | ||
private static final String QUERY = "Hi world"; | ||
|
||
// Test restart-upgrade normalization processor when index with multiple shards | ||
// Create Text Embedding Processor, Ingestion Pipeline, add document and search pipeline with normalization processor | ||
// Validate process , pipeline and document count in restart-upgrade scenario | ||
public void testNormalizationProcessor_whenIndexWithMultipleShards_E2EFlow() throws Exception { | ||
validateNormalizationProcessor("processor/IndexMappingMultipleShard.json", PIPELINE_NAME, SEARCH_PIPELINE_NAME); | ||
} | ||
|
||
// Test restart-upgrade normalization processor when index with single shard | ||
// Create Text Embedding Processor, Ingestion Pipeline, add document and search pipeline with normalization processor | ||
// Validate process , pipeline and document count in restart-upgrade scenario | ||
public void testNormalizationProcessor_whenIndexWithSingleShard_E2EFlow() throws Exception { | ||
validateNormalizationProcessor("processor/IndexMappingSingleShard.json", PIPELINE1_NAME, SEARCH_PIPELINE1_NAME); | ||
} | ||
|
||
private void validateNormalizationProcessor(final String fileName, final String pipelineName, final String searchPipelineName) | ||
throws Exception { | ||
waitForClusterHealthGreen(NODES_BWC_CLUSTER); | ||
if (isRunningAgainstOldCluster()) { | ||
String modelId = uploadTextEmbeddingModel(); | ||
loadModel(modelId); | ||
createPipelineProcessor(modelId, pipelineName); | ||
createIndexWithConfiguration( | ||
getIndexNameForTest(), | ||
Files.readString(Path.of(classLoader.getResource(fileName).toURI())), | ||
pipelineName | ||
); | ||
addDocuments(getIndexNameForTest(), true); | ||
createSearchPipeline(searchPipelineName); | ||
} else { | ||
String modelId = null; | ||
try { | ||
modelId = getModelId(getIngestionPipeline(pipelineName), TEXT_EMBEDDING_PROCESSOR); | ||
loadModel(modelId); | ||
addDocuments(getIndexNameForTest(), false); | ||
validateTestIndex(modelId, getIndexNameForTest(), searchPipelineName); | ||
} finally { | ||
wipeOfTestResources(getIndexNameForTest(), pipelineName, modelId, searchPipelineName); | ||
} | ||
} | ||
} | ||
|
||
private void addDocuments(final String indexName, boolean isRunningAgainstOldCluster) throws IOException { | ||
if (isRunningAgainstOldCluster) { | ||
addDocument(indexName, "0", TEST_FIELD, TEXT_1, null, null); | ||
addDocument(indexName, "1", TEST_FIELD, TEXT_2, null, null); | ||
addDocument(indexName, "2", TEST_FIELD, TEXT_3, null, null); | ||
addDocument(indexName, "3", TEST_FIELD, TEXT_4, null, null); | ||
addDocument(indexName, "4", TEST_FIELD, TEXT_5, null, null); | ||
} else { | ||
addDocument(indexName, "5", TEST_FIELD, TEXT_6, null, null); | ||
} | ||
} | ||
|
||
private void createSearchPipeline(final String pipelineName) { | ||
createSearchPipeline( | ||
pipelineName, | ||
DEFAULT_NORMALIZATION_METHOD, | ||
DEFAULT_COMBINATION_METHOD, | ||
Map.of(PARAM_NAME_WEIGHTS, Arrays.toString(new float[] { 0.3f, 0.7f })) | ||
); | ||
} | ||
|
||
private void validateTestIndex(final String modelId, final String index, final String searchPipeline) throws Exception { | ||
int docCount = getDocCount(index); | ||
assertEquals(6, docCount); | ||
HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId); | ||
Map<String, Object> searchResponseAsMap = search(index, hybridQueryBuilder, null, 1, Map.of("search_pipeline", searchPipeline)); | ||
assertNotNull(searchResponseAsMap); | ||
int hits = getHitCount(searchResponseAsMap); | ||
assertEquals(1, hits); | ||
List<Double> scoresList = getNormalizationScoreList(searchResponseAsMap); | ||
for (Double score : scoresList) { | ||
assertTrue(0 <= score && score <= 2); | ||
} | ||
} | ||
|
||
private HybridQueryBuilder getQueryBuilder(final String modelId) { | ||
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(); | ||
neuralQueryBuilder.fieldName("passage_embedding"); | ||
neuralQueryBuilder.modelId(modelId); | ||
neuralQueryBuilder.queryText(QUERY); | ||
neuralQueryBuilder.k(5); | ||
|
||
MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("text", QUERY); | ||
|
||
HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder(); | ||
hybridQueryBuilder.add(matchQueryBuilder); | ||
hybridQueryBuilder.add(neuralQueryBuilder); | ||
|
||
return hybridQueryBuilder; | ||
} | ||
|
||
} |
61 changes: 61 additions & 0 deletions
61
qa/restart-upgrade/src/test/java/org/opensearch/neuralsearch/bwc/MultiModalSearchIT.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
package org.opensearch.neuralsearch.bwc; | ||
|
||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.util.Map; | ||
import static org.opensearch.neuralsearch.TestUtils.NODES_BWC_CLUSTER; | ||
import static org.opensearch.neuralsearch.TestUtils.TEXT_IMAGE_EMBEDDING_PROCESSOR; | ||
import static org.opensearch.neuralsearch.TestUtils.getModelId; | ||
import org.opensearch.neuralsearch.query.NeuralQueryBuilder; | ||
|
||
public class MultiModalSearchIT extends AbstractRestartUpgradeRestTestCase { | ||
private static final String PIPELINE_NAME = "nlp-ingest-pipeline"; | ||
private static final String TEST_FIELD = "passage_text"; | ||
private static final String TEST_IMAGE_FIELD = "passage_image"; | ||
private static final String TEXT = "Hello world"; | ||
private static final String TEXT_1 = "Hello world a"; | ||
private static final String TEST_IMAGE_TEXT = "/9j/4AAQSkZJRgABAQAASABIAAD"; | ||
private static final String TEST_IMAGE_TEXT_1 = "/9j/4AAQSkZJRgbdwoeicfhoid"; | ||
|
||
// Test restart-upgrade test image embedding processor | ||
// Create Text Image Embedding Processor, Ingestion Pipeline and add document | ||
// Validate process , pipeline and document count in restart-upgrade scenario | ||
public void testTextImageEmbeddingProcessor_E2EFlow() throws Exception { | ||
waitForClusterHealthGreen(NODES_BWC_CLUSTER); | ||
|
||
if (isRunningAgainstOldCluster()) { | ||
String modelId = uploadTextEmbeddingModel(); | ||
loadModel(modelId); | ||
createPipelineForTextImageProcessor(modelId, PIPELINE_NAME); | ||
createIndexWithConfiguration( | ||
getIndexNameForTest(), | ||
Files.readString(Path.of(classLoader.getResource("processor/IndexMappingMultipleShard.json").toURI())), | ||
PIPELINE_NAME | ||
); | ||
addDocument(getIndexNameForTest(), "0", TEST_FIELD, TEXT, TEST_IMAGE_FIELD, TEST_IMAGE_TEXT); | ||
} else { | ||
String modelId = null; | ||
try { | ||
modelId = getModelId(getIngestionPipeline(PIPELINE_NAME), TEXT_IMAGE_EMBEDDING_PROCESSOR); | ||
loadModel(modelId); | ||
addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT_1, TEST_IMAGE_FIELD, TEST_IMAGE_TEXT_1); | ||
validateTestIndex(modelId); | ||
} finally { | ||
wipeOfTestResources(getIndexNameForTest(), PIPELINE_NAME, modelId, null); | ||
} | ||
} | ||
} | ||
|
||
private void validateTestIndex(final String modelId) throws Exception { | ||
int docCount = getDocCount(getIndexNameForTest()); | ||
assertEquals(2, docCount); | ||
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder("passage_embedding", TEXT, TEST_IMAGE_TEXT, modelId, 1, null, null); | ||
Map<String, Object> response = search(getIndexNameForTest(), neuralQueryBuilder, 1); | ||
assertNotNull(response); | ||
} | ||
|
||
} |
Oops, something went wrong.