From 86dcd51fba25b8c2ee54504234091bc5c2174b68 Mon Sep 17 00:00:00 2001 From: Forest Vey <36905077+forestmvey@users.noreply.github.com> Date: Mon, 27 Jun 2022 15:28:06 -0700 Subject: [PATCH] Integ match bool prefix #187 (#634) Signed-off-by: MaxKsyunz Co-authored-by: MaxKsyunz Co-authored-by: Max Ksyunz <99687200+MaxKsyunz@users.noreply.github.com> --- .../opensearch/sql/common/antlr/Parser.java | 7 ++ .../org/opensearch/sql/expression/DSL.java | 4 + .../function/BuiltinFunctionName.java | 1 + .../function/OpenSearchFunctions.java | 8 ++ .../sql/analysis/ExpressionAnalyzerTest.java | 11 ++ docs/user/dql/functions.rst | 41 +++++++ docs/user/ppl/functions/relevance.rst | 44 ++++++- .../opensearch/sql/legacy/SQLFunctionsIT.java | 6 +- .../opensearch/sql/ppl/MatchBoolPrefixIT.java | 59 +++++++++ .../org/opensearch/sql/sql/ConditionalIT.java | 8 +- .../opensearch/sql/sql/MatchBoolPrefixIT.java | 55 +++++++++ .../semantic/SemanticAnalyzerBasicTest.java | 2 +- .../antlr/semantic/scope/EnvironmentTest.java | 2 +- .../script/filter/FilterQueryBuilder.java | 2 + .../relevance/MatchBoolPrefixQuery.java | 47 ++++++++ .../script/filter/FilterQueryBuilderTest.java | 22 ++++ .../lucene/MatchBoolPrefixQueryTest.java | 114 ++++++++++++++++++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 1 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 1 + .../org/opensearch/sql/ppl/PPLService.java | 2 +- .../sql/ppl/antlr/PPLSyntaxParser.java | 6 +- ...ntaxParserMatchBoolPrefixSamplesTests.java | 64 ++++++++++ ...PPLSyntaxParserMatchPhraseSamplesTest.java | 2 +- .../sql/ppl/antlr/PPLSyntaxParserTest.java | 72 +++++------ .../sql/ppl/parser/AstBuilderTest.java | 4 +- .../ppl/utils/PPLQueryDataAnonymizerTest.java | 4 +- sql/src/main/antlr/OpenSearchSQLLexer.g4 | 2 + sql/src/main/antlr/OpenSearchSQLParser.g4 | 1 + .../sql/sql/antlr/SQLSyntaxParser.java | 4 +- .../common/antlr/SyntaxParserTestBase.java | 35 ++++++ .../sql/antlr/MatchBoolPrefixParserTest.java | 42 +++++++ .../sql/sql/antlr/SQLParserTest.java | 9 ++ 32 files changed, 627 insertions(+), 55 deletions(-) create mode 100644 common/src/main/java/org/opensearch/sql/common/antlr/Parser.java create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/MatchBoolPrefixIT.java create mode 100644 integ-test/src/test/java/org/opensearch/sql/sql/MatchBoolPrefixIT.java create mode 100644 opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/relevance/MatchBoolPrefixQuery.java create mode 100644 opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/MatchBoolPrefixQueryTest.java create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserMatchBoolPrefixSamplesTests.java create mode 100644 sql/src/test/java/org/opensearch/sql/common/antlr/SyntaxParserTestBase.java create mode 100644 sql/src/test/java/org/opensearch/sql/sql/antlr/MatchBoolPrefixParserTest.java create mode 100644 sql/src/test/java/org/opensearch/sql/sql/antlr/SQLParserTest.java diff --git a/common/src/main/java/org/opensearch/sql/common/antlr/Parser.java b/common/src/main/java/org/opensearch/sql/common/antlr/Parser.java new file mode 100644 index 0000000000..7962f53ef6 --- /dev/null +++ b/common/src/main/java/org/opensearch/sql/common/antlr/Parser.java @@ -0,0 +1,7 @@ +package org.opensearch.sql.common.antlr; + +import org.antlr.v4.runtime.tree.ParseTree; + +public interface Parser { + ParseTree parse(String query); +} diff --git a/core/src/main/java/org/opensearch/sql/expression/DSL.java b/core/src/main/java/org/opensearch/sql/expression/DSL.java index 2fcf5e7c6b..e1d558ce55 100644 --- a/core/src/main/java/org/opensearch/sql/expression/DSL.java +++ b/core/src/main/java/org/opensearch/sql/expression/DSL.java @@ -666,6 +666,10 @@ public FunctionExpression simple_query_string(Expression... args) { return compile(BuiltinFunctionName.SIMPLE_QUERY_STRING, args); } + public FunctionExpression match_bool_prefix(Expression... args) { + return compile(BuiltinFunctionName.MATCH_BOOL_PREFIX, args); + } + private FunctionExpression compile(BuiltinFunctionName bfn, Expression... args) { return (FunctionExpression) repository.compile(bfn.getName(), Arrays.asList(args.clone())); } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java index b59c767828..baf715a2ad 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java @@ -190,6 +190,7 @@ public enum BuiltinFunctionName { SIMPLE_QUERY_STRING(FunctionName.of("simple_query_string")), MATCH_PHRASE(FunctionName.of("match_phrase")), MATCHPHRASE(FunctionName.of("matchphrase")), + MATCH_BOOL_PREFIX(FunctionName.of("match_bool_prefix")), /** * Legacy Relevance Function. diff --git a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java index 6f41075224..60b9174e09 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/OpenSearchFunctions.java @@ -8,6 +8,7 @@ import static org.opensearch.sql.data.type.ExprCoreType.STRING; import static org.opensearch.sql.data.type.ExprCoreType.STRUCT; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.ArrayList; import java.util.Collections; @@ -27,6 +28,7 @@ public class OpenSearchFunctions { public static final int MATCH_MAX_NUM_PARAMETERS = 14; + public static final int MATCH_BOOL_PREFIX_MAX_NUM_PARAMETERS = 9; public static final int MATCH_PHRASE_MAX_NUM_PARAMETERS = 5; public static final int MIN_NUM_PARAMETERS = 2; public static final int MULTI_MATCH_MAX_NUM_PARAMETERS = 17; @@ -36,6 +38,7 @@ public class OpenSearchFunctions { * Add functions specific to OpenSearch to repository. */ public void register(BuiltinFunctionRepository repository) { + repository.register(match_bool_prefix()); repository.register(match()); repository.register(multi_match()); repository.register(simple_query_string()); @@ -45,6 +48,11 @@ public void register(BuiltinFunctionRepository repository) { repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE)); } + private static FunctionResolver match_bool_prefix() { + FunctionName name = BuiltinFunctionName.MATCH_BOOL_PREFIX.getName(); + return getRelevanceFunctionResolver(name, MATCH_BOOL_PREFIX_MAX_NUM_PARAMETERS, STRING); + } + private static FunctionResolver match() { FunctionName funcName = BuiltinFunctionName.MATCH.getName(); return getRelevanceFunctionResolver(funcName, MATCH_MAX_NUM_PARAMETERS, STRING); diff --git a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java index f6fe679328..5aaf4e8b3e 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/ExpressionAnalyzerTest.java @@ -342,6 +342,17 @@ public void named_non_parse_expression() { assertAnalyzeEqual(DSL.ref("string_field", STRING), qualifiedName("string_field")); } + @Test + void match_bool_prefix_expression() { + assertAnalyzeEqual( + dsl.match_bool_prefix( + dsl.namedArgument("field", DSL.literal("fieldA")), + dsl.namedArgument("query", DSL.literal("sample query"))), + AstDSL.function("match_bool_prefix", + AstDSL.unresolvedArg("field", stringLiteral("fieldA")), + AstDSL.unresolvedArg("query", stringLiteral("sample query")))); + } + @Test void visit_span() { assertAnalyzeEqual( diff --git a/docs/user/dql/functions.rst b/docs/user/dql/functions.rst index c5711c7e7f..cc64e2d591 100644 --- a/docs/user/dql/functions.rst +++ b/docs/user/dql/functions.rst @@ -2233,6 +2233,47 @@ Another example to show how to set custom values for the optional parameters:: +----------------------+--------------------------+ +MATCH_BOOL_PREFIX +----- + +Description +>>>>>>>>>>> + +``match_bool_prefix(field_expression, query_expression)`` + +The match_bool_prefix function maps to the match_bool_prefix query in the search engine. match_bool_prefix creates a match query from all but the last term in the query string. The last term is used to create a prefix query. + +- fuzziness +- max_expansions +- prefix_length +- fuzzy_transpositions +- fuzzy_rewrite +- minimum_should_match +- boost +- operator +- analyzer + +Example with only ``field`` and ``query`` expressions, and all other parameters are set default values:: + + os> SELECT firstname, address FROM accounts WHERE match_bool_prefix(address, 'Bristol Stre'); + fetched rows / total rows = 2/2 + +-------------+--------------------+ + | firstname | address | + |-------------+--------------------| + | Hattie | 671 Bristol Street | + | Nanette | 789 Madison Street | + +-------------+--------------------+ + +Another example to show how to set custom values for the optional parameters:: + + os> SELECT firstname, address FROM accounts WHERE match_bool_prefix(address, 'Bristol Street', minimum_should_match=2); + fetched rows / total rows = 1/1 + +-------------+--------------------+ + | firstname | address | + |-------------+--------------------| + | Hattie | 671 Bristol Street | + +-------------+--------------------+ + MULTI_MATCH ----------- diff --git a/docs/user/ppl/functions/relevance.rst b/docs/user/ppl/functions/relevance.rst index e6e787e3d7..7262aea3e9 100644 --- a/docs/user/ppl/functions/relevance.rst +++ b/docs/user/ppl/functions/relevance.rst @@ -204,7 +204,49 @@ Another example to show how to set custom values for the optional parameters:: | 1 | The House at Pooh Corner | Alan Alexander Milne | +------+--------------------------+----------------------+ +MATCH_BOOL_PREFIX +----- + +Description +>>>>>>>>>>> + +``match_bool_prefix(field_expression, query_expression)`` + +The match_bool_prefix function maps to the match_bool_prefix query in the search engine. match_bool_prefix creates a match query from all but the last term in the query string. The last term is used to create a prefix query. + +- analyzer +- fuzziness +- max_expansions +- prefix_length +- fuzzy_transpositions +- operator +- fuzzy_rewrite +- minimum_should_match +- boost + +Example with only ``field`` and ``query`` expressions, and all other parameters are set default values:: + + os> source=accounts | where match_bool_prefix(address, 'Bristol Stre') | fields firstname, address + fetched rows / total rows = 2/2 + +-------------+--------------------+ + | firstname | address | + |-------------+--------------------| + | Hattie | 671 Bristol Street | + | Nanette | 789 Madison Street | + +-------------+--------------------+ + +Another example to show how to set custom values for the optional parameters:: + + os> source=accounts | where match_bool_prefix(address, 'Bristol Stre', minimum_should_match = 2) | fields firstname, address + fetched rows / total rows = 1/1 + +-------------+--------------------+ + | firstname | address | + |-------------+--------------------| + | Hattie | 671 Bristol Street | + +-------------+--------------------+ + Limitations >>>>>>>>>>> -The relevance functions are available to execute only in OpenSearch DSL but not in memory as of now, so the relevance search might fail for queries that are too complex to translate into DSL if the relevance function is following after a complex PPL query. To make your queries always work-able, it is recommended to place the relevance commands as close to the search command as possible, to ensure the relevance functions are eligible to push down. For example, a complex query like ``search source = people | rename firstname as name | dedup account_number | fields name, account_number, balance, employer | where match(employer, 'Open Search') | stats count() by city`` could fail because it is difficult to translate to DSL, but it would be better if we rewrite it to an equivalent query as ``search source = people | where match(employer, 'Open Search') | rename firstname as name | dedup account_number | fields name, account_number, balance, employer | stats count() by city`` by moving the where command with relevance function to the second command right after the search command, and the relevance would be optimized and executed smoothly in OpenSearch DSL. See `Optimization <../../optimization/optimization.rst>`_ to get more details about the query engine optimization. \ No newline at end of file +The relevance functions are available to execute only in OpenSearch DSL but not in memory as of now, so the relevance search might fail for queries that are too complex to translate into DSL if the relevance function is following after a complex PPL query. To make your queries always work-able, it is recommended to place the relevance commands as close to the search command as possible, to ensure the relevance functions are eligible to push down. For example, a complex query like ``search source = people | rename firstname as name | dedup account_number | fields name, account_number, balance, employer | where match(employer, 'Open Search') | stats count() by city`` could fail because it is difficult to translate to DSL, but it would be better if we rewrite it to an equivalent query as ``search source = people | where match(employer, 'Open Search') | rename firstname as name | dedup account_number | fields name, account_number, balance, employer | stats count() by city`` by moving the where command with relevance function to the second command right after the search command, and the relevance would be optimized and executed smoothly in OpenSearch DSL. See `Optimization <../../optimization/optimization.rst>`_ to get more details about the query engine optimization. + diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLFunctionsIT.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLFunctionsIT.java index 50fc93ee9b..a19ed4db21 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLFunctionsIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLFunctionsIT.java @@ -480,7 +480,7 @@ public void castStatementInWhereClauseLessThanConstantTest() { public void castStatementInWhereClauseDatetimeCastTest() { JSONObject response = executeJdbcRequest("SELECT date_keyword FROM " + TestsConstants.TEST_INDEX_DATE - + " WHERE (CAST(date_keyword AS DATETIME) = \'2014-08-19T07:09:13.434Z\')"); + + " WHERE (CAST(date_keyword AS DATETIME) = '2014-08-19T07:09:13.434Z')"); String schema_result = "{\"name\":\"date_keyword\",\"type\":\"keyword\"}"; assertEquals(response.getJSONArray("schema").get(0).toString(), schema_result); @@ -704,7 +704,7 @@ public void ifFuncShouldPassJDBC() { JSONObject response = executeJdbcRequest( "SELECT IF(age > 30, 'True', 'False') AS Ages FROM " + TEST_INDEX_ACCOUNT + " WHERE age IS NOT NULL GROUP BY Ages"); - assertEquals("IF(age > 30, \'True\', \'False\')", response.query("/schema/0/name")); + assertEquals("IF(age > 30, 'True', 'False')", response.query("/schema/0/name")); assertEquals("Ages", response.query("/schema/0/alias")); assertEquals("keyword", response.query("/schema/0/type")); } @@ -742,7 +742,7 @@ public void ifnullShouldPassJDBC() throws IOException { JSONObject response = executeJdbcRequest( "SELECT IFNULL(lastname, 'unknown') AS name FROM " + TEST_INDEX_ACCOUNT + " GROUP BY name"); - assertEquals("IFNULL(lastname, \'unknown\')", response.query("/schema/0/name")); + assertEquals("IFNULL(lastname, 'unknown')", response.query("/schema/0/name")); assertEquals("name", response.query("/schema/0/alias")); assertEquals("keyword", response.query("/schema/0/type")); } diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/MatchBoolPrefixIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/MatchBoolPrefixIT.java new file mode 100644 index 0000000000..42ba8bea53 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/MatchBoolPrefixIT.java @@ -0,0 +1,59 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_PHRASE; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; + +public class MatchBoolPrefixIT extends PPLIntegTestCase { + + @Override + public void init() throws IOException { + loadIndex(Index.PHRASE); + } + + @Test + public void valid_query_match_test() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | where match_bool_prefix(phrase, 'qui') | fields phrase", + TEST_INDEX_PHRASE)); + + verifyDataRows(result, + rows("quick fox"), + rows("quick fox here")); + } + + @Test + public void optional_parameter_match_test() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | where match_bool_prefix(phrase, '2 tes', minimum_should_match=1, fuzziness=2) | fields phrase", + TEST_INDEX_PHRASE)); + + verifyDataRows(result, + rows("my test"), + rows("my test 2")); + } + + @Test + public void no_matches_test() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | where match_bool_prefix(phrase, 'rice') | fields phrase", + TEST_INDEX_PHRASE)); + + assertEquals(0, result.getInt("total")); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/ConditionalIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/ConditionalIT.java index 385341327d..8b41e16551 100644 --- a/integ-test/src/test/java/org/opensearch/sql/sql/ConditionalIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/sql/ConditionalIT.java @@ -45,7 +45,7 @@ public void ifnullShouldPassJDBC() throws IOException { JSONObject response = executeJdbcRequest( "SELECT IFNULL(lastname, 'unknown') AS name FROM " + TEST_INDEX_ACCOUNT + " GROUP BY name"); - assertEquals("IFNULL(lastname, \'unknown\')", response.query("/schema/0/name")); + assertEquals("IFNULL(lastname, 'unknown')", response.query("/schema/0/name")); assertEquals("name", response.query("/schema/0/alias")); assertEquals("keyword", response.query("/schema/0/type")); } @@ -92,7 +92,7 @@ public void ifnullWithMissingInputTest() { public void nullifShouldPassJDBC() throws IOException { JSONObject response = executeJdbcRequest( "SELECT NULLIF(lastname, 'unknown') AS name FROM " + TEST_INDEX_ACCOUNT); - assertEquals("NULLIF(lastname, \'unknown\')", response.query("/schema/0/name")); + assertEquals("NULLIF(lastname, 'unknown')", response.query("/schema/0/name")); assertEquals("name", response.query("/schema/0/alias")); assertEquals("keyword", response.query("/schema/0/type")); } @@ -181,8 +181,8 @@ public void isnullWithMathExpr() throws IOException{ @Test public void ifShouldPassJDBC() throws IOException { JSONObject response = executeJdbcRequest( - "SELECT IF(2 > 0, \'hello\', \'world\') AS name FROM " + TEST_INDEX_ACCOUNT); - assertEquals("IF(2 > 0, \'hello\', \'world\')", response.query("/schema/0/name")); + "SELECT IF(2 > 0, 'hello', 'world') AS name FROM " + TEST_INDEX_ACCOUNT); + assertEquals("IF(2 > 0, 'hello', 'world')", response.query("/schema/0/name")); assertEquals("name", response.query("/schema/0/alias")); assertEquals("keyword", response.query("/schema/0/type")); } diff --git a/integ-test/src/test/java/org/opensearch/sql/sql/MatchBoolPrefixIT.java b/integ-test/src/test/java/org/opensearch/sql/sql/MatchBoolPrefixIT.java new file mode 100644 index 0000000000..1c959c5460 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/sql/MatchBoolPrefixIT.java @@ -0,0 +1,55 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_PHRASE; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.legacy.SQLIntegTestCase; + +public class MatchBoolPrefixIT extends SQLIntegTestCase { + public void init() throws IOException { + loadIndex(SQLIntegTestCase.Index.PHRASE); + } + + @Test + public void query_matches_test() throws IOException { + String query = "SELECT phrase FROM " + + TEST_INDEX_PHRASE + " WHERE match_bool_prefix(phrase, 'quick')"; + var result = new JSONObject(executeQuery(query, "jdbc")); + verifySchema(result, schema("phrase", "text")); + + verifyDataRows(result, + rows("quick fox"), + rows("quick fox here")); + } + + @Test + public void additional_parameters_test() throws IOException { + String query = "SELECT phrase FROM " + + TEST_INDEX_PHRASE + " WHERE match_bool_prefix(phrase, '2 test', minimum_should_match=1, fuzziness=2)"; + var result = new JSONObject(executeQuery(query, "jdbc")); + verifySchema(result, schema("phrase", "text")); + + verifyDataRows(result, + rows("my test"), + rows("my test 2")); + } + + @Test + public void no_matches_test() throws IOException { + String query = "SELECT * FROM " + + TEST_INDEX_PHRASE + " WHERE match_bool_prefix(phrase, 'rice')"; + var result = new JSONObject(executeQuery(query, "jdbc")); + assertEquals(0, result.getInt("total")); + } +} diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/SemanticAnalyzerBasicTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/SemanticAnalyzerBasicTest.java index 1d594db2bc..67af723f8c 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/SemanticAnalyzerBasicTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/SemanticAnalyzerBasicTest.java @@ -594,4 +594,4 @@ public void fieldWithSpacesInNameShouldPass() { Assert.assertSame(TEXT, type.get()); } -} +} \ No newline at end of file diff --git a/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/scope/EnvironmentTest.java b/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/scope/EnvironmentTest.java index dfc49ad2fb..d9e9271728 100644 --- a/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/scope/EnvironmentTest.java +++ b/legacy/src/test/java/org/opensearch/sql/legacy/antlr/semantic/scope/EnvironmentTest.java @@ -29,7 +29,7 @@ public class EnvironmentTest { /** Use context class for push/pop */ - private SemanticContext context = new SemanticContext(); + private final SemanticContext context = new SemanticContext(); @Test public void defineFieldSymbolInDifferentEnvironmentsShouldBeAbleToResolve() { diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java index 3bba48d24c..7a69f265c9 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java @@ -29,6 +29,7 @@ import org.opensearch.sql.opensearch.storage.script.filter.lucene.RangeQuery.Comparison; import org.opensearch.sql.opensearch.storage.script.filter.lucene.TermQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.WildcardQuery; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchBoolPrefixQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhraseQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchQuery; import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MultiMatchQuery; @@ -62,6 +63,7 @@ public class FilterQueryBuilder extends ExpressionNodeVisitor { + /** + * Constructor for MatchBoolPrefixQuery to configure RelevanceQuery + * with support of optional parameters. + */ + public MatchBoolPrefixQuery() { + super(ImmutableMap.>builder() + .put("minimum_should_match", (b, v) -> b.minimumShouldMatch(v.stringValue())) + .put("fuzziness", (b, v) -> b.fuzziness(v.stringValue())) + .put("prefix_length", (b, v) -> b.prefixLength(Integer.parseInt(v.stringValue()))) + .put("max_expansions", (b, v) -> b.maxExpansions(Integer.parseInt(v.stringValue()))) + .put("fuzzy_transpositions", + (b, v) -> b.fuzzyTranspositions(Boolean.parseBoolean(v.stringValue()))) + .put("fuzzy_rewrite", (b, v) -> b.fuzzyRewrite(v.stringValue())) + .put("boost", (b, v) -> b.boost(Float.parseFloat(v.stringValue()))) + .put("analyzer", (b, v) -> b.analyzer(v.stringValue())) + .put("operator", (b,v) -> b.operator(Operator.fromString(v.stringValue()))) + .build()); + } + + /** + * Maps correct query builder function to class. + * @param field Field to execute query in + * @param query Text used to search field + * @return Object of executed query + */ + @Override + protected MatchBoolPrefixQueryBuilder createQueryBuilder(String field, String query) { + return QueryBuilders.matchBoolPrefixQuery(field, query); + } +} diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java index 43f3d3170e..615b542302 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java @@ -765,6 +765,28 @@ void match_phrase_too_many_args() { + "[STRING,STRING,STRING,STRING,STRING,STRING]", msg); } + + @Test + void should_build_match_bool_prefix_query_with_default_parameters() { + assertJsonEquals( + "{\n" + + " \"match_bool_prefix\" : {\n" + + " \"message\" : {\n" + + " \"query\" : \"search query\",\n" + + " \"operator\" : \"OR\",\n" + + " \"prefix_length\" : 0,\n" + + " \"max_expansions\" : 50,\n" + + " \"fuzzy_transpositions\" : true,\n" + + " \"boost\" : 1.0\n" + + " }\n" + + " }\n" + + "}", + buildQuery( + dsl.match_bool_prefix( + dsl.namedArgument("field", literal("message")), + dsl.namedArgument("query", literal("search query"))))); + } + @Test void multi_match_missing_fields() { var msg = assertThrows(ExpressionEvaluationException.class, () -> diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/MatchBoolPrefixQueryTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/MatchBoolPrefixQueryTest.java new file mode 100644 index 0000000000..00cf3158c4 --- /dev/null +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/lucene/MatchBoolPrefixQueryTest.java @@ -0,0 +1,114 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.opensearch.storage.script.filter.lucene; + +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.util.List; +import java.util.stream.Stream; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.DisplayNameGenerator; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.opensearch.sql.common.antlr.SyntaxCheckException; +import org.opensearch.sql.data.model.ExprValue; +import org.opensearch.sql.data.type.ExprType; +import org.opensearch.sql.exception.SemanticCheckException; +import org.opensearch.sql.expression.DSL; +import org.opensearch.sql.expression.Expression; +import org.opensearch.sql.expression.FunctionExpression; +import org.opensearch.sql.expression.NamedArgumentExpression; +import org.opensearch.sql.expression.config.ExpressionConfig; +import org.opensearch.sql.expression.env.Environment; +import org.opensearch.sql.expression.function.FunctionName; +import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchBoolPrefixQuery; + +@DisplayNameGeneration(DisplayNameGenerator.ReplaceUnderscores.class) +public class MatchBoolPrefixQueryTest { + private final DSL dsl = new ExpressionConfig().dsl(new ExpressionConfig().functionRepository()); + private final MatchBoolPrefixQuery matchBoolPrefixQuery = new MatchBoolPrefixQuery(); + private final FunctionName matchBoolPrefix = FunctionName.of("match_bool_prefix"); + + static Stream> generateValidData() { + final DSL dsl = new ExpressionConfig().dsl(new ExpressionConfig().functionRepository()); + NamedArgumentExpression field = dsl.namedArgument("field", DSL.literal("field_value")); + NamedArgumentExpression query = dsl.namedArgument("query", DSL.literal("query_value")); + return List.of( + dsl.namedArgument("fuzziness", DSL.literal("AUTO")), + dsl.namedArgument("max_expansions", DSL.literal("50")), + dsl.namedArgument("prefix_length", DSL.literal("0")), + dsl.namedArgument("fuzzy_transpositions", DSL.literal("true")), + dsl.namedArgument("fuzzy_rewrite", DSL.literal("constant_score")), + dsl.namedArgument("minimum_should_match", DSL.literal("3")), + dsl.namedArgument("boost", DSL.literal("1")), + dsl.namedArgument("analyzer", DSL.literal("simple")), + dsl.namedArgument("operator", DSL.literal("Or")), + dsl.namedArgument("operator", DSL.literal("and")) + ).stream().map(arg -> List.of(field, query, arg)); + } + + @ParameterizedTest + @MethodSource("generateValidData") + public void test_valid_arguments(List validArgs) { + Assertions.assertNotNull(matchBoolPrefixQuery.build(new MatchExpression(validArgs))); + } + + @Test + public void test_valid_when_two_arguments() { + List arguments = List.of( + namedArgument("field", "field_value"), + namedArgument("query", "query_value")); + Assertions.assertNotNull(matchBoolPrefixQuery.build(new MatchExpression(arguments))); + } + + @Test + public void test_SyntaxCheckException_when_no_arguments() { + List arguments = List.of(); + assertThrows(SyntaxCheckException.class, + () -> matchBoolPrefixQuery.build(new MatchExpression(arguments))); + } + + @Test + public void test_SyntaxCheckException_when_one_argument() { + List arguments = List.of(namedArgument("field", "field_value")); + assertThrows(SyntaxCheckException.class, + () -> matchBoolPrefixQuery.build(new MatchExpression(arguments))); + } + + @Test + public void test_SemanticCheckException_when_invalid_argument() { + List arguments = List.of( + namedArgument("field", "field_value"), + namedArgument("query", "query_value"), + namedArgument("unsupported", "unsupported_value")); + Assertions.assertThrows(SemanticCheckException.class, + () -> matchBoolPrefixQuery.build(new MatchExpression(arguments))); + } + + private NamedArgumentExpression namedArgument(String name, String value) { + return dsl.namedArgument(name, DSL.literal(value)); + } + + private class MatchExpression extends FunctionExpression { + public MatchExpression(List arguments) { + super(MatchBoolPrefixQueryTest.this.matchBoolPrefix, arguments); + } + + @Override + public ExprValue valueOf(Environment valueEnv) { + throw new UnsupportedOperationException("Invalid function call, " + + "valueOf function need implementation only to support Expression interface"); + } + + @Override + public ExprType type() { + throw new UnsupportedOperationException("Invalid function call, " + + "type function need implementation only to support Expression interface"); + } + } +} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 22e0bebac7..68fb402a2a 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -271,6 +271,7 @@ IF: 'IF'; // RELEVANCE FUNCTIONS AND PARAMETERS MATCH: 'MATCH'; MATCH_PHRASE: 'MATCH_PHRASE'; +MATCH_BOOL_PREFIX: 'MATCH_BOOL_PREFIX'; SIMPLE_QUERY_STRING: 'SIMPLE_QUERY_STRING'; MULTI_MATCH: 'MULTI_MATCH'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index e9c9814191..c0ab5ebb84 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -389,6 +389,7 @@ binaryOperator singleFieldRelevanceFunctionName : MATCH | MATCH_PHRASE + | MATCH_BOOL_PREFIX ; multiFieldRelevanceFunctionName diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java b/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java index 5a3f8bfe77..a1a831c7cd 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/PPLService.java @@ -80,7 +80,7 @@ public void explain(PPLQueryRequest request, ResponseListener l private PhysicalPlan plan(PPLQueryRequest request) { // 1.Parse query and convert parse tree (CST) to abstract syntax tree (AST) - ParseTree cst = parser.analyzeSyntax(request.getRequest()); + ParseTree cst = parser.parse(request.getRequest()); UnresolvedPlan ast = cst.accept( new AstBuilder(new AstExpressionBuilder(), request.getRequest())); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParser.java b/ppl/src/main/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParser.java index 0a18462275..168ba33a8a 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParser.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParser.java @@ -10,6 +10,7 @@ import org.antlr.v4.runtime.Lexer; import org.antlr.v4.runtime.tree.ParseTree; import org.opensearch.sql.common.antlr.CaseInsensitiveCharStream; +import org.opensearch.sql.common.antlr.Parser; import org.opensearch.sql.common.antlr.SyntaxAnalysisErrorListener; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLLexer; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser; @@ -17,11 +18,12 @@ /** * PPL Syntax Parser. */ -public class PPLSyntaxParser { +public class PPLSyntaxParser implements Parser { /** * Analyze the query syntax. */ - public ParseTree analyzeSyntax(String query) { + @Override + public ParseTree parse(String query) { OpenSearchPPLParser parser = createParser(createLexer(query)); parser.addErrorListener(new SyntaxAnalysisErrorListener()); return parser.root(); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserMatchBoolPrefixSamplesTests.java b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserMatchBoolPrefixSamplesTests.java new file mode 100644 index 0000000000..dd146ea2cf --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserMatchBoolPrefixSamplesTests.java @@ -0,0 +1,64 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.antlr; + +import static org.junit.Assert.assertNotEquals; + +import java.util.List; +import org.antlr.v4.runtime.tree.ParseTree; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + + +@RunWith(Parameterized.class) +public class PPLSyntaxParserMatchBoolPrefixSamplesTests { + + + /** Returns sample queries that the PPLSyntaxParser is expected to parse successfully. + * @return an Iterable of sample queries. + */ + @Parameterized.Parameters(name = "{0}") + public static Iterable sampleQueries() { + return List.of( + "source=t a= 1 | where match_bool_prefix(a, 'hello world')", + "source=t a = 1 | where match_bool_prefix(a, 'hello world'," + + " minimum_should_match = 3)", + "source=t a = 1 | where match_bool_prefix(a, 'hello world', fuzziness='AUTO')", + "source=t a = 1 | where match_bool_prefix(a, 'hello world', fuzziness='AUTO:4,6')", + "source=t a= 1 | where match_bool_prefix(a, 'hello world', prefix_length=0)", + "source=t a= 1 | where match_bool_prefix(a, 'hello world', max_expansions=1)", + "source=t a= 1 | where match_bool_prefix(a, 'hello world'," + + " fuzzy_transpositions=true)", + "source=t a= 1 | where match_bool_prefix(a, 'hello world'," + + " fuzzy_rewrite=constant_score)", + "source=t a= 1 | where match_bool_prefix(a, 'hello world'," + + " fuzzy_rewrite=constant_score_boolean)", + "source=t a= 1 | where match_bool_prefix(a, 'hello world'," + + " fuzzy_rewrite=scoring_boolean)", + "source=t a= 1 | where match_bool_prefix(a, 'hello world'," + + " fuzzy_rewrite=top_terms_blended_freqs_1)", + "source=t a= 1 | where match_bool_prefix(a, 'hello world'," + + " fuzzy_rewrite=top_terms_boost_1)", + "source=t a= 1 | where match_bool_prefix(a, 'hello world'," + + " fuzzy_rewrite=top_terms_1)", + "source=t a= 1 | where match_bool_prefix(a, 'hello world', boost=1)", + "source=t a = 1 | where match_bool_prefix(a, 'hello world', analyzer = 'standard'," + + "prefix_length = '0', boost = 1)"); + } + + private final String query; + + public PPLSyntaxParserMatchBoolPrefixSamplesTests(String query) { + this.query = query; + } + + @Test + public void test() { + ParseTree tree = new PPLSyntaxParser().parse(query); + assertNotEquals(null, tree); + } +} diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserMatchPhraseSamplesTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserMatchPhraseSamplesTest.java index a4fbee44e3..aef6d1d69e 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserMatchPhraseSamplesTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserMatchPhraseSamplesTest.java @@ -39,7 +39,7 @@ public PPLSyntaxParserMatchPhraseSamplesTest(String query) { @Test public void test() { - ParseTree tree = new PPLSyntaxParser().analyzeSyntax(query); + ParseTree tree = new PPLSyntaxParser().parse(query); assertNotEquals(null, tree); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java index 41e2168d2f..47285751dc 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java @@ -20,19 +20,19 @@ public class PPLSyntaxParserTest { @Test public void testSearchCommandShouldPass() { - ParseTree tree = new PPLSyntaxParser().analyzeSyntax("search source=t a=1 b=2"); + ParseTree tree = new PPLSyntaxParser().parse("search source=t a=1 b=2"); assertNotEquals(null, tree); } @Test public void testSearchCommandIgnoreSearchKeywordShouldPass() { - ParseTree tree = new PPLSyntaxParser().analyzeSyntax("source=t a=1 b=2"); + ParseTree tree = new PPLSyntaxParser().parse("source=t a=1 b=2"); assertNotEquals(null, tree); } @Test public void testSearchFieldsCommandShouldPass() { - ParseTree tree = new PPLSyntaxParser().analyzeSyntax("search source=t a=1 b=2 | fields a,b"); + ParseTree tree = new PPLSyntaxParser().parse("search source=t a=1 b=2 | fields a,b"); assertNotEquals(null, tree); } @@ -41,107 +41,107 @@ public void testSearchCommandWithoutSourceShouldFail() { exceptionRule.expect(RuntimeException.class); exceptionRule.expectMessage("Failed to parse query due to offending symbol"); - new PPLSyntaxParser().analyzeSyntax("search a=1"); + new PPLSyntaxParser().parse("search a=1"); } @Test public void testRareCommandShouldPass() { - ParseTree tree = new PPLSyntaxParser().analyzeSyntax("source=t a=1 | rare a"); + ParseTree tree = new PPLSyntaxParser().parse("source=t a=1 | rare a"); assertNotEquals(null, tree); } @Test public void testRareCommandWithGroupByShouldPass() { - ParseTree tree = new PPLSyntaxParser().analyzeSyntax("source=t a=1 | rare a by b"); + ParseTree tree = new PPLSyntaxParser().parse("source=t a=1 | rare a by b"); assertNotEquals(null, tree); } @Test public void testTopCommandWithoutNShouldPass() { - ParseTree tree = new PPLSyntaxParser().analyzeSyntax("source=t a=1 | top a"); + ParseTree tree = new PPLSyntaxParser().parse("source=t a=1 | top a"); assertNotEquals(null, tree); } @Test public void testTopCommandWithNShouldPass() { - ParseTree tree = new PPLSyntaxParser().analyzeSyntax("source=t a=1 | top 1 a"); + ParseTree tree = new PPLSyntaxParser().parse("source=t a=1 | top 1 a"); assertNotEquals(null, tree); } @Test public void testTopCommandWithNAndGroupByShouldPass() { - ParseTree tree = new PPLSyntaxParser().analyzeSyntax("source=t a=1 | top 1 a by b"); + ParseTree tree = new PPLSyntaxParser().parse("source=t a=1 | top 1 a by b"); assertNotEquals(null, tree); } @Test public void testTopCommandWithoutNAndGroupByShouldPass() { - ParseTree tree = new PPLSyntaxParser().analyzeSyntax("source=t a=1 | top a by b"); + ParseTree tree = new PPLSyntaxParser().parse("source=t a=1 | top a by b"); assertNotEquals(null, tree); } @Test public void can_parse_multi_match_relevance_function() { - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE multi_match(['address'], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE multi_match(['address', 'notes'], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE multi_match([\"*\"], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE multi_match([\"address\"], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE multi_match([`address`], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE multi_match([address], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE multi_match(['address' ^ 1.0, 'notes' ^ 2.2], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE multi_match(['address' ^ 1.1, 'notes'], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE multi_match(['address', 'notes' ^ 1.5], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE multi_match(['address', 'notes' 3], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE multi_match(['address' ^ .3, 'notes' 3], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE multi_match([\"Tags\" ^ 1.5, Title, `Body` 4.2], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE multi_match([\"Tags\" ^ 1.5, Title, `Body` 4.2], 'query'," + "analyzer=keyword, quote_field_suffix=\".exact\", fuzzy_prefix_length = 4)")); } @Test public void can_parse_simple_query_string_relevance_function() { - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE simple_query_string(['address'], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE simple_query_string(['address', 'notes'], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE simple_query_string([\"*\"], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE simple_query_string([\"address\"], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE simple_query_string([`address`], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE simple_query_string([address], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE simple_query_string(['address' ^ 1.0, 'notes' ^ 2.2], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE simple_query_string(['address' ^ 1.1, 'notes'], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE simple_query_string(['address', 'notes' ^ 1.5], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE simple_query_string(['address', 'notes' 3], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE simple_query_string(['address' ^ .3, 'notes' 3], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE simple_query_string([\"Tags\" ^ 1.5, Title, `Body` 4.2], 'query')")); - assertNotEquals(null, new PPLSyntaxParser().analyzeSyntax( + assertNotEquals(null, new PPLSyntaxParser().parse( "SOURCE=test | WHERE simple_query_string([\"Tags\" ^ 1.5, Title, `Body` 4.2], 'query'," + "analyzer=keyword, quote_field_suffix=\".exact\", fuzzy_prefix_length = 4)")); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index 3d3fbc06f5..df8ca769d1 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -58,7 +58,7 @@ public class AstBuilderTest { @Rule public ExpectedException exceptionRule = ExpectedException.none(); - private PPLSyntaxParser parser = new PPLSyntaxParser(); + private final PPLSyntaxParser parser = new PPLSyntaxParser(); @Test public void testSearchCommand() { @@ -664,6 +664,6 @@ protected void assertEqual(String query, String expected) { private Node plan(String query) { AstBuilder astBuilder = new AstBuilder(new AstExpressionBuilder(), query); - return astBuilder.visit(parser.analyzeSyntax(query)); + return astBuilder.visit(parser.parse(query)); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index 57ba12eae9..46af993fc1 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -20,7 +20,7 @@ public class PPLQueryDataAnonymizerTest { - private PPLSyntaxParser parser = new PPLSyntaxParser(); + private final PPLSyntaxParser parser = new PPLSyntaxParser(); @Test public void testSearchCommand() { @@ -160,7 +160,7 @@ public void anonymizeFieldsNoArg() { private String anonymize(String query) { AstBuilder astBuilder = new AstBuilder(new AstExpressionBuilder(), query); - return anonymize(astBuilder.visit(parser.analyzeSyntax(query))); + return anonymize(astBuilder.visit(parser.parse(query))); } private String anonymize(UnresolvedPlan plan) { diff --git a/sql/src/main/antlr/OpenSearchSQLLexer.g4 b/sql/src/main/antlr/OpenSearchSQLLexer.g4 index 28b1cea9fd..5070eefab8 100644 --- a/sql/src/main/antlr/OpenSearchSQLLexer.g4 +++ b/sql/src/main/antlr/OpenSearchSQLLexer.g4 @@ -344,6 +344,8 @@ TIME_ZONE: 'TIME_ZONE'; TYPE: 'TYPE'; ZERO_TERMS_QUERY: 'ZERO_TERMS_QUERY'; +// RELEVANCE FUNCTIONS +MATCH_BOOL_PREFIX: 'MATCH_BOOL_PREFIX'; // Operators // Operators. Arithmetics diff --git a/sql/src/main/antlr/OpenSearchSQLParser.g4 b/sql/src/main/antlr/OpenSearchSQLParser.g4 index 603ea50e77..2bea8afbc8 100644 --- a/sql/src/main/antlr/OpenSearchSQLParser.g4 +++ b/sql/src/main/antlr/OpenSearchSQLParser.g4 @@ -395,6 +395,7 @@ flowControlFunctionName singleFieldRelevanceFunctionName : MATCH | MATCH_PHRASE | MATCHPHRASE + | MATCH_BOOL_PREFIX ; multiFieldRelevanceFunctionName diff --git a/sql/src/main/java/org/opensearch/sql/sql/antlr/SQLSyntaxParser.java b/sql/src/main/java/org/opensearch/sql/sql/antlr/SQLSyntaxParser.java index b42aa2fd6c..ee1e991bd4 100644 --- a/sql/src/main/java/org/opensearch/sql/sql/antlr/SQLSyntaxParser.java +++ b/sql/src/main/java/org/opensearch/sql/sql/antlr/SQLSyntaxParser.java @@ -9,6 +9,7 @@ import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.tree.ParseTree; import org.opensearch.sql.common.antlr.CaseInsensitiveCharStream; +import org.opensearch.sql.common.antlr.Parser; import org.opensearch.sql.common.antlr.SyntaxAnalysisErrorListener; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLLexer; import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser; @@ -16,13 +17,14 @@ /** * SQL syntax parser which encapsulates an ANTLR parser. */ -public class SQLSyntaxParser { +public class SQLSyntaxParser implements Parser { /** * Parse a SQL query by ANTLR parser. * @param query a SQL query * @return parse tree root */ + @Override public ParseTree parse(String query) { OpenSearchSQLLexer lexer = new OpenSearchSQLLexer(new CaseInsensitiveCharStream(query)); OpenSearchSQLParser parser = new OpenSearchSQLParser(new CommonTokenStream(lexer)); diff --git a/sql/src/test/java/org/opensearch/sql/common/antlr/SyntaxParserTestBase.java b/sql/src/test/java/org/opensearch/sql/common/antlr/SyntaxParserTestBase.java new file mode 100644 index 0000000000..526dc4e816 --- /dev/null +++ b/sql/src/test/java/org/opensearch/sql/common/antlr/SyntaxParserTestBase.java @@ -0,0 +1,35 @@ +package org.opensearch.sql.common.antlr; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import lombok.AccessLevel; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import org.opensearch.sql.sql.antlr.SQLSyntaxParser; + +/** + * A base class for tests for SQL or PPL parser. + */ +@RequiredArgsConstructor(access = AccessLevel.PROTECTED) +public abstract class SyntaxParserTestBase { + @Getter + private final Parser parser; + + /** + * A helper function that fails a test if the parser rejects a given query. + * @param query Query to test. + */ + protected void acceptQuery(String query) { + assertNotNull(parser.parse(query)); + } + + /** + * A helper function that fails a test if the parser accepts a given query. + * @param query Query to test. + */ + protected void rejectQuery(String query) { + assertThrows(SyntaxCheckException.class, () -> parser.parse(query)); + } +} diff --git a/sql/src/test/java/org/opensearch/sql/sql/antlr/MatchBoolPrefixParserTest.java b/sql/src/test/java/org/opensearch/sql/sql/antlr/MatchBoolPrefixParserTest.java new file mode 100644 index 0000000000..66c4d5be9d --- /dev/null +++ b/sql/src/test/java/org/opensearch/sql/sql/antlr/MatchBoolPrefixParserTest.java @@ -0,0 +1,42 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.sql.antlr; + +import java.util.stream.Stream; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; + +public class MatchBoolPrefixParserTest extends SQLParserTest { + + @Test + public void testDefaultArguments() { + acceptQuery("SELECT * FROM T WHERE MATCH_BOOL_PREFIX(message, 'query')"); + } + + static Stream generateValidArguments() { + return Stream.of( + new String("minimum_should_match=3"), + new String("fuzziness=AUTO"), + new String("prefix_length=0"), + new String("max_expansions=50"), + new String("fuzzy_transpositions=true"), + new String("fuzzy_rewrite=constant_score"), + new String("boost=1") + ); + } + + @ParameterizedTest + @MethodSource("generateValidArguments") + public void testValidArguments(String arg) { + acceptQuery("SELECT * FROM T WHERE MATCH_BOOL_PREFIX(message, 'query', " + arg + ")"); + } + + @Test + public void testOneParameter() { + rejectQuery("SELECT * FROM T WHERE MATCH_BOOL_PREFIX(message)"); + } +} diff --git a/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLParserTest.java b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLParserTest.java new file mode 100644 index 0000000000..7b8b415ee7 --- /dev/null +++ b/sql/src/test/java/org/opensearch/sql/sql/antlr/SQLParserTest.java @@ -0,0 +1,9 @@ +package org.opensearch.sql.sql.antlr; + +import org.opensearch.sql.common.antlr.SyntaxParserTestBase; + +public class SQLParserTest extends SyntaxParserTestBase { + public SQLParserTest() { + super(new SQLSyntaxParser()); + } +}