diff --git a/CHANGELOG.md b/CHANGELOG.md index 7fa453fd8..27d4acf47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x](https://github.com/opensearch-project/neural-search/compare/2.13...2.x) ### Features - Support k-NN radial search parameters in neural search([#697](https://github.com/opensearch-project/neural-search/pull/697)) -- Enhance neural_sparse query's latency performance with two-phase rescore query([#695](https://github.com/opensearch-project/neural-search/pull/695/files)). +- Enhance neural_sparse query's latency performance with two-phase rescore query([#695](https://github.com/opensearch-project/neural-search/pull/695)). ### Enhancements - BWC tests for text chunking processor ([#661](https://github.com/opensearch-project/neural-search/pull/661)) - Allowing execution of hybrid query on index alias with filters ([#670](https://github.com/opensearch-project/neural-search/pull/670)) diff --git a/src/main/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilder.java b/src/main/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilder.java index 9ec60bc4d..2bd3a1d2f 100644 --- a/src/main/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilder.java +++ b/src/main/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilder.java @@ -340,25 +340,23 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws protected Query doToQuery(QueryShardContext context) throws IOException { final MappedFieldType ft = context.fieldMapper(fieldName); validateFieldType(ft); + Map allTokens = getAllTokens(); + Query allTokenQuery = buildFeatureFieldQueryFromTokens(allTokens, fieldName); if (!NeuralSparseTwoPhaseParameters.isEnabled(neuralSparseTwoPhaseParameters)) { - return buildFeatureFieldQueryFromTokens(getAllTokens(), fieldName); + return allTokenQuery; } // in the last step we make sure neuralSparseTwoPhaseParameters is not null float ratio = neuralSparseTwoPhaseParameters.pruning_ratio(); - Map highScoreTokens = getHighScoreTokens(ratio); - Map lowScoreTokens = getLowScoreTokens(ratio); - Map allTokens = getAllTokens(); - Query allTokenQuery = buildFeatureFieldQueryFromTokens(allTokens, fieldName); + Map highScoreTokens = getHighScoreTokens(allTokens, ratio); + Map lowScoreTokens = getLowScoreTokens(allTokens, ratio); // if all token are valid score that we don't need the two-phase optimize, return allTokenQuery. if (lowScoreTokens.isEmpty()) { return allTokenQuery; } - Query highScoreTokenQuery = buildFeatureFieldQueryFromTokens(highScoreTokens, fieldName); - Query lowScoreTokenQuery = buildFeatureFieldQueryFromTokens(lowScoreTokens, fieldName); return new NeuralSparseQuery( allTokenQuery, - highScoreTokenQuery, - lowScoreTokenQuery, + buildFeatureFieldQueryFromTokens(highScoreTokens, fieldName), + buildFeatureFieldQueryFromTokens(lowScoreTokens, fieldName), neuralSparseTwoPhaseParameters.window_size_expansion() ); } @@ -439,17 +437,15 @@ private Map getAllTokens() { return queryTokens; } - private Map getHighScoreTokens(float ratio) { - return getFilteredScoreTokens(true, ratio); + private Map getHighScoreTokens(Map queryTokens, float ratio) { + return getFilteredScoreTokens(queryTokens, true, ratio); } - private Map getLowScoreTokens(float ratio) { - return getFilteredScoreTokens(false, ratio); + private Map getLowScoreTokens(Map queryTokens, float ratio) { + return getFilteredScoreTokens(queryTokens, false, ratio); } - private Map getFilteredScoreTokens(boolean aboveThreshold, float ratio) { - Map queryTokens = queryTokensSupplier.get(); - validateQueryTokens(queryTokens); + private Map getFilteredScoreTokens(Map queryTokens, boolean aboveThreshold, float ratio) { float max = queryTokens.values().stream().max(Float::compare).orElse(0f); float threshold = ratio * max; if (max == 0) { diff --git a/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilderTests.java b/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilderTests.java index fdd691c94..12dfd2d9e 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilderTests.java +++ b/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilderTests.java @@ -70,8 +70,6 @@ public class NeuralSparseQueryBuilderTests extends OpenSearchTestCase { private static final String FIELD_NAME = "testField"; private static final String QUERY_TEXT = "Hello world!"; - private static final String QUERY_TEXT_LONG_VERSION = - "The ID of the sparse encoding model or tokenizer model that will be used to generate vector embeddings from the query text. The model must be deployed in OpenSearch before it can be used in sparse neural search. For more information, see Using custom models within OpenSearch and Neural sparse search."; private static final String MODEL_ID = "mfgfgdsfgfdgsde"; private static final float BOOST = 1.8f; private static final String QUERY_NAME = "queryName"; @@ -505,9 +503,9 @@ public void testFromXContent_whenBuiltWithEmptyTwoPhaseParams_thenThrowException "query_text": "string", "model_id": "string", "two_phase_settings":{ - "window_size_expansion": 5, - "pruning_ratio": 0.4, - "enabled": false + "window_size_expansion": null, + "pruning_ratio": null, + "enabled": null } } } @@ -996,30 +994,26 @@ public void testTokenDividedByScores_whenDefaultSettings() { @SneakyThrows public void testDoToQuery_whenTwoPhaseParaDisabled_thenDegradeSuccess() { NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(FIELD_NAME) - .maxTokenScore(MAX_TOKEN_SCORE) .queryText(QUERY_TEXT) .modelId(MODEL_ID) .queryTokensSupplier(QUERY_TOKENS_SUPPLIER) .neuralSparseTwoPhaseParameters( - new NeuralSparseTwoPhaseParameters().enabled(false).pruning_ratio(0.4f).window_size_expansion(6.0f) + new NeuralSparseTwoPhaseParameters().enabled(false).pruning_ratio(0.7f).window_size_expansion(6.0f) ); Query query = sparseEncodingQueryBuilder.doToQuery(mockQueryShardContext); assertTrue(query instanceof BooleanQuery); List booleanClauseList = ((BooleanQuery) query).clauses(); assertEquals(2, ((BooleanQuery) query).clauses().size()); - BooleanClause firstClause = booleanClauseList.get(0); - BooleanClause secondClause = booleanClauseList.get(1); - - Query firstFeatureQuery = firstClause.getQuery(); - assertEquals(firstFeatureQuery, FeatureField.newLinearQuery(FIELD_NAME, "world", 2.f)); - Query secondFeatureQuery = secondClause.getQuery(); - assertEquals(secondFeatureQuery, FeatureField.newLinearQuery(FIELD_NAME, "hello", 1.f)); + List actualQueries = booleanClauseList.stream().map(BooleanClause::getQuery).collect(Collectors.toList()); + Query expectedQuery1 = FeatureField.newLinearQuery(FIELD_NAME, "world", 2.f); + Query expectedQuery2 = FeatureField.newLinearQuery(FIELD_NAME, "hello", 1.f); + assertTrue("Expected query for 'world' not found", actualQueries.contains(expectedQuery1)); + assertTrue("Expected query for 'hello' not found", actualQueries.contains(expectedQuery2)); } @SneakyThrows public void testDoToQuery_whenTwoPhaseParaEmpty_thenDegradeSuccess() { NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(FIELD_NAME) - .maxTokenScore(MAX_TOKEN_SCORE) .queryText(QUERY_TEXT) .modelId(MODEL_ID) .queryTokensSupplier(QUERY_TOKENS_SUPPLIER); @@ -1036,6 +1030,29 @@ public void testDoToQuery_whenTwoPhaseParaEmpty_thenDegradeSuccess() { assertEquals(secondFeatureQuery, FeatureField.newLinearQuery(FIELD_NAME, "hello", 1.f)); } + @SneakyThrows + public void testDoToQuery_whenTwoPhaseEnabled_thenBuildCorrectQuery() { + Map map = new HashMap<>(); + for (int i = 1; i < 3; i++) { + map.put(String.valueOf(i), (float) i); + } + final Supplier> tokenSupplier = () -> map; + // token with score [1.0,2.0] will build degrade to allTokenQuery + NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(FIELD_NAME) + .queryText(QUERY_TEXT) + .modelId(MODEL_ID) + .queryTokensSupplier(tokenSupplier) + .neuralSparseTwoPhaseParameters(NeuralSparseTwoPhaseParameters.getDefaultSettings()); + Query allTokenQuery = sparseEncodingQueryBuilder.doToQuery(mockQueryShardContext); + assertTrue(allTokenQuery instanceof BooleanQuery); + assertEquals(((BooleanQuery) allTokenQuery).clauses().size(), 2); + map.put("Temp", 9.f); + // token with score [1.0,2.0,9.0] will build a NeuralSparseQuery whose lowTokenQuery including [1.0,2.0] + Query query = sparseEncodingQueryBuilder.doToQuery(mockQueryShardContext); + assertTrue(query instanceof NeuralSparseQuery); + assertEquals(((NeuralSparseQuery) query).getLowScoreTokenQuery(), allTokenQuery); + } + @SneakyThrows public void testDoToQuery_successfulDoToQuery() { NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(FIELD_NAME) diff --git a/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryIT.java b/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryIT.java index fd8beaf50..01b10bf9a 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryIT.java +++ b/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryIT.java @@ -7,9 +7,11 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.Supplier; import org.junit.Before; import org.opensearch.client.ResponseException; +import org.opensearch.common.settings.Settings; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.ConstantScoreQueryBuilder; import org.opensearch.index.query.DisMaxQueryBuilder; @@ -19,6 +21,7 @@ import org.opensearch.index.query.QueryBuilder; import org.opensearch.neuralsearch.BaseNeuralSearchIT; +import org.opensearch.neuralsearch.settings.NeuralSearchSettings; import org.opensearch.neuralsearch.util.TestUtils; import static org.opensearch.neuralsearch.util.TestUtils.createRandomTokenWeightMap; @@ -34,16 +37,15 @@ public class NeuralSparseQueryIT extends BaseNeuralSearchIT { private static final String TWO_PHASE_PRUNE_RATIO_SETTING_KEY = "plugins.neural_search.neural_sparse.two_phase.default_pruning_ratio"; private static final String TWO_PHASE_MAX_WINDOW_SIZE_SETTING_KEY = "plugins.neural_search.neural_sparse.two_phase.max_window_size"; private static final String TEST_BASIC_INDEX_NAME = "test-sparse-basic-index"; + private static final String TEST_TWO_PHASE_BASIC_INDEX_NAME = "test-sparse-basic-index-two-phase"; private static final String TEST_MULTI_NEURAL_SPARSE_FIELD_INDEX_NAME = "test-sparse-multi-field-index"; private static final String TEST_TEXT_AND_NEURAL_SPARSE_FIELD_INDEX_NAME = "test-sparse-text-and-field-index"; private static final String TEST_NESTED_INDEX_NAME = "test-sparse-nested-index"; private static final String TEST_QUERY_TEXT = "Hello world a b"; - private static final String TEST_QUERY_TEXT_V2 = "Hello world a b cat cache dog war pop nice nick neck question"; private static final String TEST_NEURAL_SPARSE_FIELD_NAME_1 = "test-sparse-encoding-1"; private static final String TEST_NEURAL_SPARSE_FIELD_NAME_2 = "test-sparse-encoding-2"; private static final String TEST_TEXT_FIELD_NAME_1 = "test-text-field"; private static final String TEST_NEURAL_SPARSE_FIELD_NAME_NESTED = "nested.neural_sparse.field"; - private static final List TEST_TOKENS = List.of("hello", "world", "a", "b", "c"); private static final Float DELTA = 1e-5f; @@ -53,6 +55,7 @@ public class NeuralSparseQueryIT extends BaseNeuralSearchIT { public void setUp() throws Exception { super.setUp(); updateClusterSettings(); + updateTwoPhaseClusterSettings(true, 5.0f, 0.4f, 10000); } @SneakyThrows @@ -61,10 +64,15 @@ private void updateTwoPhaseClusterSettings(boolean enabled, float windowSizeExpa updateClusterSettings(TWO_PHASE_WINDOW_SIZE_EXPANSION_SETTING_KEY, windowSizeExpansion); updateClusterSettings(TWO_PHASE_PRUNE_RATIO_SETTING_KEY, ratio); updateClusterSettings(TWO_PHASE_MAX_WINDOW_SIZE_SETTING_KEY, maxWindowSize); + clusterService.getClusterSettings().registerSetting(NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_ENABLED); + clusterService.getClusterSettings().registerSetting(NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_WINDOW_SIZE_EXPANSION); + clusterService.getClusterSettings().registerSetting(NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_MAX_WINDOW_SIZE); + clusterService.getClusterSettings().registerSetting(NeuralSearchSettings.NEURAL_SPARSE_TWO_PHASE_DEFAULT_PRUNING_RATIO); + NeuralSparseTwoPhaseParameters.initialize(clusterService, Settings.EMPTY); } @SneakyThrows - private NeuralSparseTwoPhaseParameters getDefaultTwoPhaseParameter(boolean enabled, float windowSizeExpansion, float ratio) { + private NeuralSparseTwoPhaseParameters getCustomTwoPhaseParameter(boolean enabled, float windowSizeExpansion, float ratio) { return new NeuralSparseTwoPhaseParameters().enabled(enabled).window_size_expansion(windowSizeExpansion).pruning_ratio(ratio); } @@ -304,11 +312,11 @@ public void testBooleanQuery_withMultipleSparseEncodingQueries_whenTwoPhaseEnabl NeuralSparseQueryBuilder sparseEncodingQueryBuilder1 = new NeuralSparseQueryBuilder().fieldName(TEST_NEURAL_SPARSE_FIELD_NAME_1) .queryText(TEST_QUERY_TEXT) .modelId(modelId) - .neuralSparseTwoPhaseParameters(getDefaultTwoPhaseParameter(true, 2.0f, 0.4f)); + .neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 2.0f, 0.4f)); NeuralSparseQueryBuilder sparseEncodingQueryBuilder2 = new NeuralSparseQueryBuilder().fieldName(TEST_NEURAL_SPARSE_FIELD_NAME_2) .queryText(TEST_QUERY_TEXT) .modelId(modelId) - .neuralSparseTwoPhaseParameters(getDefaultTwoPhaseParameter(true, 2.0f, 0.4f)); + .neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 2.0f, 0.4f)); boolQueryBuilder.should(sparseEncodingQueryBuilder1).should(sparseEncodingQueryBuilder2); @@ -424,6 +432,35 @@ protected void initializeIndexIfNotExist(String indexName) { addSparseEncodingDoc(indexName, "1", List.of(TEST_NEURAL_SPARSE_FIELD_NAME_NESTED), List.of(testRankFeaturesDoc)); assertEquals(1, getDocCount(TEST_NESTED_INDEX_NAME)); } + + if (TEST_TWO_PHASE_BASIC_INDEX_NAME.equals(indexName) && !indexExists(indexName)) { + Map twoPhaseRandFeatures = new HashMap<>(); + Map normalRandFeatures = new HashMap<>(); + prepareSparseEncodingIndex(indexName, List.of(TEST_NEURAL_SPARSE_FIELD_NAME_1)); + // put [(5,5.0), (6,6.0)] into twoPhaseRandFeatures + for (int i = 5; i < 7; i++) { + twoPhaseRandFeatures.put(String.valueOf(i), (float) i); + } + + // put 10 token [(1,1.0),(11,1.0),....(5,5.0),(55,5.0)] into normalRandFeatures + for (int i = 1; i < 6; i++) { + normalRandFeatures.put(String.valueOf(i), (float) i); + normalRandFeatures.put(String.valueOf(10 + i), (float) i); + + } + + for (int i = 0; i < 10; i++) { + addSparseEncodingDoc(indexName, String.valueOf(i), List.of(TEST_NEURAL_SPARSE_FIELD_NAME_1), List.of(normalRandFeatures)); + addSparseEncodingDoc( + indexName, + String.valueOf(i + 10), + List.of(TEST_NEURAL_SPARSE_FIELD_NAME_1), + List.of(twoPhaseRandFeatures) + ); + ; + } + assertEquals(20, getDocCount(indexName)); + } } /** @@ -460,7 +497,7 @@ public void testBasicQueryUsingQueryText_whenTwoPhaseEnabled_thenGetExpectedScor .queryText(TEST_QUERY_TEXT) .modelId(modelId) .boost(2.0f) - .neuralSparseTwoPhaseParameters(getDefaultTwoPhaseParameter(true, 2.0f, 0.4f)); + .neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 2.0f, 0.4f)); Map searchResponseAsMap = search(TEST_BASIC_INDEX_NAME, sparseEncodingQueryBuilder, 1); Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); @@ -511,6 +548,41 @@ public void testUpdateTwoPhaseSettings_whenTwoPhasedSettingsOverEdge_thenFail() expectThrows(ResponseException.class, () -> updateTwoPhaseClusterSettings(true, -10f, 1.4f, 10000)); } + @SneakyThrows + public void testBasicQueryUsingQueryText_whenTwoPhaseParameterOverEdge_thenFail() { + String modelId = null; + try { + initializeIndexIfNotExist(TEST_BASIC_INDEX_NAME); + modelId = prepareSparseEncodingModel(); + // windows_size_expansion over edge [0.f + NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_NEURAL_SPARSE_FIELD_NAME_1) + .queryText(TEST_QUERY_TEXT) + .modelId(modelId) + .boost(2.0f) + .neuralSparseTwoPhaseParameters(NeuralSparseTwoPhaseParameters.getDefaultSettings().window_size_expansion(-0.001f)); + NeuralSparseQueryBuilder finalSparseEncodingQueryBuilder = sparseEncodingQueryBuilder; + expectThrows(ResponseException.class, () -> search(TEST_BASIC_INDEX_NAME, finalSparseEncodingQueryBuilder, 1)); + // pruning_ratio over edge [0.f + sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_NEURAL_SPARSE_FIELD_NAME_1) + .queryText(TEST_QUERY_TEXT) + .modelId(modelId) + .boost(2.0f) + .neuralSparseTwoPhaseParameters(NeuralSparseTwoPhaseParameters.getDefaultSettings().pruning_ratio(-0.001f)); + NeuralSparseQueryBuilder finalSparseEncodingQueryBuilder1 = sparseEncodingQueryBuilder; + expectThrows(ResponseException.class, () -> search(TEST_BASIC_INDEX_NAME, finalSparseEncodingQueryBuilder1, 1)); + // pruning_ratio over edge 1.f] + sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_NEURAL_SPARSE_FIELD_NAME_1) + .queryText(TEST_QUERY_TEXT) + .modelId(modelId) + .boost(2.0f) + .neuralSparseTwoPhaseParameters(NeuralSparseTwoPhaseParameters.getDefaultSettings().pruning_ratio(1.001f)); + NeuralSparseQueryBuilder finalSparseEncodingQueryBuilder2 = sparseEncodingQueryBuilder; + expectThrows(ResponseException.class, () -> search(TEST_BASIC_INDEX_NAME, finalSparseEncodingQueryBuilder2, 1)); + } finally { + wipeOfTestResources(TEST_BASIC_INDEX_NAME, null, modelId, null); + } + } + /** * Tests neuralSparseQuery as rescoreQuery with DSL query: * { @@ -550,7 +622,7 @@ public void testNeuralSparseQueryAsRescoreQuery_whenTwoPhase_thenGetExpectedScor .queryText(TEST_QUERY_TEXT) .modelId(modelId) .boost(2.0f) - .neuralSparseTwoPhaseParameters(getDefaultTwoPhaseParameter(true, 4.0f, 0.5f)); + .neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 4.0f, 0.5f)); QueryBuilder queryBuilder = new MatchAllQueryBuilder(); Map searchResponseAsMap = search(TEST_BASIC_INDEX_NAME, queryBuilder, sparseEncodingQueryBuilder, 1); Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); @@ -610,7 +682,7 @@ public void testMultiNeuralSparseQuery_whenTwoPhase_thenGetExpectedScore() { .queryText(TEST_QUERY_TEXT) .modelId(modelId) .boost(2.0f) - .neuralSparseTwoPhaseParameters(getDefaultTwoPhaseParameter(true, 4.0f, 0.2f)); + .neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 4.0f, 0.2f)); boolQueryBuilder.should(sparseEncodingQueryBuilder); boolQueryBuilder.should(sparseEncodingQueryBuilder); Map searchResponseAsMap = search(TEST_BASIC_INDEX_NAME, boolQueryBuilder, 1); @@ -623,6 +695,39 @@ public void testMultiNeuralSparseQuery_whenTwoPhase_thenGetExpectedScore() { } } + /** + * This test case aim to test different score caused by different two-phase parameters. + * First, with a default parameter, two-phase get same score at most times. + * Second, With a high ratio, there may some docs including lots of low score tokens are missed. + * And then, lower ratio or higher windows size can improve accuracy. + */ + @SneakyThrows + public void testNeuralSparseQuery_whenDifferentTwoPhaseParameter_thenGetDifferentResult() { + try { + initializeIndexIfNotExist(TEST_TWO_PHASE_BASIC_INDEX_NAME); + Map queryToken = new HashMap<>(); + for (int i = 1; i < 6; i++) { + queryToken.put(String.valueOf(i + 10), (float) i); + } + for (int i = 1; i < 8; i++) { + queryToken.put(String.valueOf(i), (float) i); + } + Supplier> queryTokenSupplier = () -> queryToken; + NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_NEURAL_SPARSE_FIELD_NAME_1) + .queryTokensSupplier(queryTokenSupplier); + sparseEncodingQueryBuilder.neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(false, 1.f, 0.7f)); + assertSearchScore(sparseEncodingQueryBuilder, TEST_TWO_PHASE_BASIC_INDEX_NAME, 110); + sparseEncodingQueryBuilder.neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 1.f, 0.3f)); + assertSearchScore(sparseEncodingQueryBuilder, TEST_TWO_PHASE_BASIC_INDEX_NAME, 110); + sparseEncodingQueryBuilder.neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 1.f, 0.7f)); + assertSearchScore(sparseEncodingQueryBuilder, TEST_TWO_PHASE_BASIC_INDEX_NAME, 61); + sparseEncodingQueryBuilder.neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 30f, 0.7f)); + assertSearchScore(sparseEncodingQueryBuilder, TEST_TWO_PHASE_BASIC_INDEX_NAME, 110); + } finally { + wipeOfTestResources(TEST_TWO_PHASE_BASIC_INDEX_NAME, null, null, null); + } + } + @SneakyThrows public void testMultiNeuralSparseQuery_whenTwoPhaseAndFilter_thenGetExpectedScore() { String modelId = null; @@ -634,7 +739,7 @@ public void testMultiNeuralSparseQuery_whenTwoPhaseAndFilter_thenGetExpectedScor .queryText(TEST_QUERY_TEXT) .modelId(modelId) .boost(2.0f) - .neuralSparseTwoPhaseParameters(getDefaultTwoPhaseParameter(true, 5.0f, 0.8f)); + .neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 5.0f, 0.8f)); boolQueryBuilder.should(sparseEncodingQueryBuilder); boolQueryBuilder.filter(sparseEncodingQueryBuilder); Map searchResponseAsMap = search(TEST_BASIC_INDEX_NAME, boolQueryBuilder, 1); @@ -658,7 +763,7 @@ public void testMultiNeuralSparseQuery_whenTwoPhaseAndMultiBoolean_thenGetExpect .queryText(TEST_QUERY_TEXT) .modelId(modelId) .boost(1.0f) - .neuralSparseTwoPhaseParameters(getDefaultTwoPhaseParameter(true, 5.0f, 0.6f)); + .neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 5.0f, 0.6f)); boolQueryBuilder.should(sparseEncodingQueryBuilder1); boolQueryBuilder.should(sparseEncodingQueryBuilder1); BoolQueryBuilder subBoolQueryBuilder = new BoolQueryBuilder(); @@ -666,12 +771,12 @@ public void testMultiNeuralSparseQuery_whenTwoPhaseAndMultiBoolean_thenGetExpect .queryText(TEST_QUERY_TEXT) .modelId(modelId) .boost(2.0f) - .neuralSparseTwoPhaseParameters(getDefaultTwoPhaseParameter(true, 5.0f, 0.6f)); + .neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 5.0f, 0.6f)); NeuralSparseQueryBuilder sparseEncodingQueryBuilder3 = new NeuralSparseQueryBuilder().fieldName(TEST_NEURAL_SPARSE_FIELD_NAME_1) .queryText(TEST_QUERY_TEXT) .modelId(modelId) .boost(3.0f) - .neuralSparseTwoPhaseParameters(getDefaultTwoPhaseParameter(true, 5.0f, 0.6f)); + .neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 5.0f, 0.6f)); subBoolQueryBuilder.should(sparseEncodingQueryBuilder2); subBoolQueryBuilder.should(sparseEncodingQueryBuilder3); subBoolQueryBuilder.boost(2.0f); @@ -698,7 +803,7 @@ public void testMultiNeuralSparseQuery_whenTwoPhaseAndNoLowScoreToken_thenGetExp NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_NEURAL_SPARSE_FIELD_NAME_1) .queryTokensSupplier(() -> queryTokens) .boost(2.0f) - .neuralSparseTwoPhaseParameters(getDefaultTwoPhaseParameter(true, 5.0f, 0.6f)); + .neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 5.0f, 0.6f)); Map searchResponseAsMap = search(TEST_BASIC_INDEX_NAME, sparseEncodingQueryBuilder, 1); Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); assertEquals("1", firstInnerHit.get("_id")); @@ -736,7 +841,7 @@ public void testNeuralSParseQuery_whenTwoPhaseAndNestedInConstantScoreQuery_then .queryText(TEST_QUERY_TEXT) .modelId(modelId) .boost(1.0f) - .neuralSparseTwoPhaseParameters(getDefaultTwoPhaseParameter(true, 5.0f, 0.6f)); + .neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 5.0f, 0.6f)); ConstantScoreQueryBuilder constantScoreQueryBuilder = new ConstantScoreQueryBuilder(sparseEncodingQueryBuilder); Map searchResponseAsMap = search(TEST_BASIC_INDEX_NAME, constantScoreQueryBuilder, 1); Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); @@ -779,7 +884,7 @@ public void testNeuralSParseQuery_whenTwoPhaseAndNestedInDisjunctionMaxQuery_the .queryText(TEST_QUERY_TEXT) .modelId(modelId) .boost(5.0f) - .neuralSparseTwoPhaseParameters(getDefaultTwoPhaseParameter(true, 5.0f, 0.6f)); + .neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 5.0f, 0.6f)); DisMaxQueryBuilder disMaxQueryBuilder = new DisMaxQueryBuilder(); disMaxQueryBuilder.add(sparseEncodingQueryBuilder); disMaxQueryBuilder.add(new MatchAllQueryBuilder()); @@ -820,7 +925,7 @@ public void testNeuralSParseQuery_whenTwoPhaseAndNestedInFunctionScoreQuery_then .queryText(TEST_QUERY_TEXT) .modelId(modelId) .boost(5.0f) - .neuralSparseTwoPhaseParameters(getDefaultTwoPhaseParameter(true, 5.0f, 0.6f)); + .neuralSparseTwoPhaseParameters(getCustomTwoPhaseParameter(true, 5.0f, 0.6f)); FunctionScoreQueryBuilder functionScoreQueryBuilder = new FunctionScoreQueryBuilder(sparseEncodingQueryBuilder); functionScoreQueryBuilder.boost(2.0f); Map searchResponseAsMap = search(TEST_BASIC_INDEX_NAME, functionScoreQueryBuilder, 1); @@ -832,4 +937,10 @@ public void testNeuralSParseQuery_whenTwoPhaseAndNestedInFunctionScoreQuery_then wipeOfTestResources(TEST_BASIC_INDEX_NAME, null, modelId, null); } } + + private void assertSearchScore(NeuralSparseQueryBuilder builder, String indexName, float expectedScore) { + Map searchResponse = search(indexName, builder, 10); + Map firstInnerHit = getFirstInnerHit(searchResponse); + assertEquals(expectedScore, objectToFloat(firstInnerHit.get("_score")), DELTA); + } }