Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rebasing with main #826

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ on:
branches:
- "*"
- "feature/**"
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true

jobs:
Get-CI-Image-Tag:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/test_aggregations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ on:
branches:
- "*"
- "feature/**"
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true

jobs:
Get-CI-Image-Tag:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/test_security.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ on:
branches:
- "*"
- "feature/**"
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true

jobs:
Get-CI-Image-Tag:
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
## [Unreleased 2.x](https://github.com/opensearch-project/neural-search/compare/2.15...2.x)
### Features
### Enhancements
- Adds dynamic knn query parameters efsearch and nprobes [#814](https://github.com/opensearch-project/neural-search/pull/814/)
- Enable '.' for nested field in text embedding processor ([#811](https://github.com/opensearch-project/neural-search/pull/811))
### Bug Fixes
### Infrastructure
- Add BWC for batch ingestion ([#769](https://github.com/opensearch-project/neural-search/pull/769))
- Add backward test cases for neural sparse two phase processor ([#777](https://github.com/opensearch-project/neural-search/pull/777))
### Documentation
### Maintenance
### Refactoring
20 changes: 18 additions & 2 deletions qa/restart-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,18 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the k-NN radial search tests because we introduce this feature in 2.14
// Excluding the k-NN radial search tests and batch ingestion tests because we introduce these features in 2.14
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

// Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}

Expand Down Expand Up @@ -146,10 +154,18 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
}
}

// Excluding the k-NN radial search tests because we introduce this feature in 2.14
// Excluding the k-NN radial search tests and batch ingestion tests because we introduce these features in 2.14
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.KnnRadialSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.BatchIngestionIT.*"
}
}

// Excluding the NeuralSparseQuery two-phase search pipeline tests because we introduce this feature in 2.15
if (ext.neural_search_bwc_version.startsWith("2.9") || ext.neural_search_bwc_version.startsWith("2.10") || ext.neural_search_bwc_version.startsWith("2.11") || ext.neural_search_bwc_version.startsWith("2.12") || ext.neural_search_bwc_version.startsWith("2.13") || ext.neural_search_bwc_version.startsWith("2.14")){
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseTwoPhaseProcessorIT.*"
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;

import org.opensearch.neuralsearch.util.TestUtils;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;

import static org.opensearch.neuralsearch.util.BatchIngestionUtils.prepareDataForBulkIngestion;
import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER;
import static org.opensearch.neuralsearch.util.TestUtils.SPARSE_ENCODING_PROCESSOR;

public class BatchIngestionIT extends AbstractRestartUpgradeRestTestCase {
private static final String PIPELINE_NAME = "pipeline-BatchIngestionIT";
private static final String TEXT_FIELD_NAME = "passage_text";
private static final String EMBEDDING_FIELD_NAME = "passage_embedding";
private static final int batchSize = 3;

public void testBatchIngestionWithNeuralSparseProcessor_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
String indexName = getIndexNameForTest();
if (isRunningAgainstOldCluster()) {
String modelId = uploadSparseEncodingModel();
loadModel(modelId);
createPipelineForSparseEncodingProcessor(modelId, PIPELINE_NAME);
createIndexWithConfiguration(
indexName,
Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())),
PIPELINE_NAME
);
List<Map<String, String>> docs = prepareDataForBulkIngestion(0, 5);
bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs, batchSize);
validateDocCountAndInfo(indexName, 5, () -> getDocById(indexName, "4"), EMBEDDING_FIELD_NAME, Map.class);
} else {
String modelId = null;
modelId = TestUtils.getModelId(getIngestionPipeline(PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR);
loadModel(modelId);
try {
List<Map<String, String>> docs = prepareDataForBulkIngestion(5, 5);
bulkAddDocuments(indexName, TEXT_FIELD_NAME, PIPELINE_NAME, docs, batchSize);
validateDocCountAndInfo(indexName, 10, () -> getDocById(indexName, "9"), EMBEDDING_FIELD_NAME, Map.class);
} finally {
wipeOfTestResources(indexName, PIPELINE_NAME, modelId, null);
}
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.util.Arrays;
import java.util.List;
import java.util.Map;

import org.opensearch.index.query.MatchQueryBuilder;
import static org.opensearch.neuralsearch.util.TestUtils.getModelId;
import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER;
Expand Down Expand Up @@ -69,6 +70,7 @@ private void validateNormalizationProcessor(final String fileName, final String
loadModel(modelId);
addDocuments(getIndexNameForTest(), false);
validateTestIndex(modelId, getIndexNameForTest(), searchPipelineName);
validateTestIndex(modelId, getIndexNameForTest(), searchPipelineName, Map.of("ef_search", 100));
} finally {
wipeOfTestResources(getIndexNameForTest(), pipelineName, modelId, searchPipelineName);
}
Expand Down Expand Up @@ -96,10 +98,14 @@ private void createSearchPipeline(final String pipelineName) {
);
}

private void validateTestIndex(final String modelId, final String index, final String searchPipeline) throws Exception {
private void validateTestIndex(final String modelId, final String index, final String searchPipeline) {
validateTestIndex(modelId, index, searchPipeline, null);
}

private void validateTestIndex(final String modelId, final String index, final String searchPipeline, Map<String, ?> methodParameters) {
int docCount = getDocCount(index);
assertEquals(6, docCount);
HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId);
HybridQueryBuilder hybridQueryBuilder = getQueryBuilder(modelId, methodParameters);
Map<String, Object> searchResponseAsMap = search(index, hybridQueryBuilder, null, 1, Map.of("search_pipeline", searchPipeline));
assertNotNull(searchResponseAsMap);
int hits = getHitCount(searchResponseAsMap);
Expand All @@ -110,12 +116,15 @@ private void validateTestIndex(final String modelId, final String index, final S
}
}

private HybridQueryBuilder getQueryBuilder(final String modelId) {
private HybridQueryBuilder getQueryBuilder(final String modelId, Map<String, ?> methodParameters) {
NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder();
neuralQueryBuilder.fieldName("passage_embedding");
neuralQueryBuilder.modelId(modelId);
neuralQueryBuilder.queryText(QUERY);
neuralQueryBuilder.k(5);
if (methodParameters != null) {
neuralQueryBuilder.methodParameters(methodParameters);
}

MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder("text", QUERY);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ private void validateIndexQuery(final String modelId) {
null,
0.01f,
null,
null,
null
);
Map<String, Object> responseWithMinScoreQuery = search(getIndexNameForTest(), neuralQueryBuilderWithMinScoreQuery, 1);
Expand All @@ -74,6 +75,7 @@ private void validateIndexQuery(final String modelId) {
100000f,
null,
null,
null,
null
);
Map<String, Object> responseWithMaxDistanceQuery = search(getIndexNameForTest(), neuralQueryBuilderWithMaxDistanceQuery, 1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ private void validateTestIndex(final String modelId) throws Exception {
null,
null,
null,
null,
null
);
Map<String, Object> response = search(getIndexNameForTest(), neuralQueryBuilder, 1);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.bwc;

import org.opensearch.common.settings.Settings;
import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder;
import org.opensearch.neuralsearch.util.TestUtils;

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;

import static org.opensearch.neuralsearch.util.TestUtils.NODES_BWC_CLUSTER;
import static org.opensearch.neuralsearch.util.TestUtils.SPARSE_ENCODING_PROCESSOR;

public class NeuralSparseTwoPhaseProcessorIT extends AbstractRestartUpgradeRestTestCase {

private static final String NEURAL_SPARSE_INGEST_PIPELINE_NAME = "nstp-nlp-ingest-pipeline-dense";
private static final String NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME = "nstp-nlp-two-phase-search-pipeline-sparse";
private static final String TEST_ENCODING_FIELD = "passage_embedding";
private static final String TEST_TEXT_FIELD = "passage_text";
private static final String TEXT_1 = "Hello world a b";

public void testNeuralSparseQueryTwoPhaseProcessor_NeuralSearch_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
NeuralSparseQueryBuilder neuralSparseQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_ENCODING_FIELD).queryText(TEXT_1);
if (isRunningAgainstOldCluster()) {
String modelId = uploadSparseEncodingModel();
loadModel(modelId);
neuralSparseQueryBuilder.modelId(modelId);
createPipelineForSparseEncodingProcessor(modelId, NEURAL_SPARSE_INGEST_PIPELINE_NAME);
createIndexWithConfiguration(
getIndexNameForTest(),
Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())),
NEURAL_SPARSE_INGEST_PIPELINE_NAME
);
addSparseEncodingDoc(getIndexNameForTest(), "0", List.of(), List.of(), List.of(TEST_TEXT_FIELD), List.of(TEXT_1));
createNeuralSparseTwoPhaseSearchProcessor(NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME);
updateIndexSettings(
getIndexNameForTest(),
Settings.builder().put("index.search.default_pipeline", NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME)
);
Object resultWith2PhasePipeline = search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits");
assertNotNull(resultWith2PhasePipeline);
} else {
String modelId = null;
try {
modelId = TestUtils.getModelId(getIngestionPipeline(NEURAL_SPARSE_INGEST_PIPELINE_NAME), SPARSE_ENCODING_PROCESSOR);
loadModel(modelId);
neuralSparseQueryBuilder.modelId(modelId);
Object resultWith2PhasePipeline = search(getIndexNameForTest(), neuralSparseQueryBuilder, 1).get("hits");
assertNotNull(resultWith2PhasePipeline);
} finally {
wipeOfTestResources(
getIndexNameForTest(),
NEURAL_SPARSE_INGEST_PIPELINE_NAME,
modelId,
NEURAL_SPARSE_TWO_PHASE_SEARCH_PIPELINE_NAME
);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,21 @@ private void createChunkingIndex(String indexName) throws Exception {
createIndexWithConfiguration(indexName, indexSetting, PIPELINE_NAME);
}

private void validateTestIndex(String indexName, String fieldName, int documentCount, Object expected) {
int docCount = getDocCount(indexName);
assertEquals(documentCount, docCount);
private Map<String, Object> getFirstDocumentInQuery(String indexName, int resultSize) {
MatchAllQueryBuilder query = new MatchAllQueryBuilder();
Map<String, Object> searchResults = search(indexName, query, 10);
Map<String, Object> searchResults = search(indexName, query, resultSize);
assertNotNull(searchResults);
Map<String, Object> document = getFirstInnerHit(searchResults);
assertNotNull(document);
Object documentSource = document.get("_source");
assert (documentSource instanceof Map);
@SuppressWarnings("unchecked")
Map<String, Object> documentSourceMap = (Map<String, Object>) documentSource;
assert (documentSourceMap).containsKey(fieldName);
Object ingestOutputs = documentSourceMap.get(fieldName);
assertEquals(expected, ingestOutputs);
return getFirstInnerHit(searchResults);
}

private void validateTestIndex(String indexName, String fieldName, int documentCount, Object expected) {
Object outputs = validateDocCountAndInfo(
indexName,
documentCount,
() -> getFirstDocumentInQuery(indexName, 10),
fieldName,
List.class
);
assertEquals(expected, outputs);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"request_processors": [
{
"neural_sparse_two_phase_processor": {
"tag": "neural-sparse",
"description": "This processor is making two-phase rescorer.",
"enabled": true,
"two_phase_parameter": {
"prune_ratio": %f,
"expansion_rate": %f,
"max_window_size": %d
}
}
}
]
}
Loading
Loading