Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add MatchPhraseQuery As Alternate Syntax for Match_Phrase Function #165

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ public enum BuiltinFunctionName {
SIMPLE_QUERY_STRING(FunctionName.of("simple_query_string")),
MATCH_PHRASE(FunctionName.of("match_phrase")),
MATCHPHRASE(FunctionName.of("matchphrase")),
MATCHPHRASEQUERY(FunctionName.of("matchphrasequery")),
QUERY_STRING(FunctionName.of("query_string")),
MATCH_BOOL_PREFIX(FunctionName.of("match_bool_prefix")),
HIGHLIGHT(FunctionName.of("highlight")),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ public void register(BuiltinFunctionRepository repository) {
// compatibility.
repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE));
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE));
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASEQUERY));
repository.register(match_phrase_prefix());
}

Expand Down
40 changes: 40 additions & 0 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2769,6 +2769,46 @@ Another example to show how to set custom values for the optional parameters::
+----------------------+--------------------------+


MATCHPHRASEQUERY
------------

Description
>>>>>>>>>>>

``matchphrasequery(field_expression, query_expression[, option=<option_value>]*)``

The matchphrasequery function maps to the match_phrase query used in search engine, to return the documents that match a provided text with a given field.
It is an alternate syntax for the `match_phrase`_ function. Available parameters include:

- analyzer
- slop
- zero_terms_query

For backward compatibility, matchphrase is also supported and mapped to match_phrase query.

Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::

os> SELECT author, title FROM books WHERE match_phrase(author, 'Alexander Milne');
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+

Another example to show how to set custom values for the optional parameters::

os> SELECT author, title FROM books WHERE match_phrase(author, 'Alan Milne', slop = 2);
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+


MATCH_BOOL_PREFIX
-----

Expand Down
19 changes: 19 additions & 0 deletions integ-test/src/test/java/org/opensearch/sql/sql/MatchPhraseIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,29 @@ public void test_matchphrase_legacy_function() throws IOException {
verifyDataRows(result, rows("quick fox"), rows("quick fox here"));
}

@Test
public void test_matchphrasequery_legacy_function() throws IOException {
String query = "SELECT phrase FROM %s WHERE matchphrasequery(phrase, 'quick fox')";
JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_PHRASE));
verifyDataRows(result, rows("quick fox"), rows("quick fox here"));
}

@Test
public void test_match_phrase_with_slop() throws IOException {
String query = "SELECT phrase FROM %s WHERE match_phrase(phrase, 'brown fox', slop = 2)";
JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_PHRASE));
verifyDataRows(result, rows("brown fox"), rows("fox brown"));
}

@Test
public void test_alternate_syntax_for_match_phrase_returns_same_result() throws IOException {
String query1 = "SELECT phrase FROM %s WHERE matchphrase(phrase, 'quick fox')";
String query2 = "SELECT phrase FROM %s WHERE match_phrase(phrase, 'quick fox')";
String query3 = "SELECT phrase FROM %s WHERE matchphrasequery(phrase, 'quick fox')";
JSONObject result1 = executeJdbcRequest(String.format(query1, TEST_INDEX_PHRASE));
JSONObject result2 = executeJdbcRequest(String.format(query2, TEST_INDEX_PHRASE));
JSONObject result3 = executeJdbcRequest(String.format(query3, TEST_INDEX_PHRASE));
assertTrue(result1.similar(result2));
assertTrue(result1.similar(result3));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ public class FilterQueryBuilder extends ExpressionNodeVisitor<QueryBuilder, Obje
.put(BuiltinFunctionName.MATCH.getName(), new MatchQuery())
.put(BuiltinFunctionName.MATCH_PHRASE.getName(), new MatchPhraseQuery())
.put(BuiltinFunctionName.MATCHPHRASE.getName(), new MatchPhraseQuery())
.put(BuiltinFunctionName.MATCHPHRASEQUERY.getName(), new MatchPhraseQuery())
.put(BuiltinFunctionName.QUERY.getName(), new QueryQuery())
.put(BuiltinFunctionName.MATCH_QUERY.getName(), new MatchQuery())
.put(BuiltinFunctionName.MATCHQUERY.getName(), new MatchQuery())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
public class MatchPhraseQueryTest {

private final MatchPhraseQuery matchPhraseQuery = new MatchPhraseQuery();
private final FunctionName matchPhrase = FunctionName.of("match_phrase");
private final FunctionName matchPhraseName = FunctionName.of("matchphrase");
private final FunctionName matchPhraseWithUnderscoreName = FunctionName.of("match_phrase");
private final FunctionName matchPhraseQueryName = FunctionName.of("matchphrasequery");

@Test
public void test_SyntaxCheckException_when_no_arguments() {
Expand Down Expand Up @@ -102,9 +104,175 @@ public void test_zero_terms_query_parameter_lower_case() {
Assertions.assertNotNull(matchPhraseQuery.build(new MatchPhraseExpression(arguments)));
}

@Test
public void test_SyntaxCheckException_when_no_arguments_match_phrase_syntax() {
List<Expression> arguments = List.of();
assertThrows(SyntaxCheckException.class,
() -> matchPhraseQuery.build(new MatchPhraseExpression(
arguments, matchPhraseWithUnderscoreName)));
}

@Test
public void test_SyntaxCheckException_when_one_argument_match_phrase_syntax() {
List<Expression> arguments = List.of(DSL.namedArgument("field", "test"));
assertThrows(SyntaxCheckException.class,
() -> matchPhraseQuery.build(new MatchPhraseExpression(
arguments, matchPhraseWithUnderscoreName)));

}

@Test
public void test_SyntaxCheckException_when_invalid_parameter_match_phrase_syntax() {
List<Expression> arguments = List.of(
DSL.namedArgument("field", "test"),
DSL.namedArgument("query", "test2"),
DSL.namedArgument("unsupported", "3"));
Assertions.assertThrows(SemanticCheckException.class,
() -> matchPhraseQuery.build(new MatchPhraseExpression(
arguments, matchPhraseWithUnderscoreName)));
}

@Test
public void test_analyzer_parameter_match_phrase_syntax() {
List<Expression> arguments = List.of(
DSL.namedArgument("field", "t1"),
DSL.namedArgument("query", "t2"),
DSL.namedArgument("analyzer", "standard")
);
Assertions.assertNotNull(matchPhraseQuery.build(new MatchPhraseExpression(
arguments, matchPhraseWithUnderscoreName)));
}

@Test
public void build_succeeds_with_two_arguments_match_phrase_syntax() {
List<Expression> arguments = List.of(
DSL.namedArgument("field", "test"),
DSL.namedArgument("query", "test2"));
Assertions.assertNotNull(matchPhraseQuery.build(new MatchPhraseExpression(
arguments, matchPhraseWithUnderscoreName)));
}

@Test
public void test_slop_parameter_match_phrase_syntax() {
List<Expression> arguments = List.of(
DSL.namedArgument("field", "t1"),
DSL.namedArgument("query", "t2"),
DSL.namedArgument("slop", "2")
);
Assertions.assertNotNull(matchPhraseQuery.build(new MatchPhraseExpression(
arguments, matchPhraseWithUnderscoreName)));
}

@Test
public void test_zero_terms_query_parameter_match_phrase_syntax() {
List<Expression> arguments = List.of(
DSL.namedArgument("field", "t1"),
DSL.namedArgument("query", "t2"),
DSL.namedArgument("zero_terms_query", "ALL")
);
Assertions.assertNotNull(matchPhraseQuery.build(new MatchPhraseExpression(
arguments, matchPhraseWithUnderscoreName)));
}

@Test
public void test_zero_terms_query_parameter_lower_case_match_phrase_syntax() {
List<Expression> arguments = List.of(
DSL.namedArgument("field", "t1"),
DSL.namedArgument("query", "t2"),
DSL.namedArgument("zero_terms_query", "all")
);
Assertions.assertNotNull(matchPhraseQuery.build(new MatchPhraseExpression(
arguments, matchPhraseWithUnderscoreName)));
}

@Test
public void test_SyntaxCheckException_when_no_arguments_matchphrase_syntax() {
List<Expression> arguments = List.of();
assertThrows(SyntaxCheckException.class,
() -> matchPhraseQuery.build(new MatchPhraseExpression(
arguments, matchPhraseQueryName)));
}

@Test
public void test_SyntaxCheckException_when_one_argument_matchphrase_syntax() {
List<Expression> arguments = List.of(DSL.namedArgument("field", "test"));
assertThrows(SyntaxCheckException.class,
() -> matchPhraseQuery.build(new MatchPhraseExpression(
arguments, matchPhraseQueryName)));

}

@Test
public void test_SyntaxCheckException_when_invalid_parameter_matchphrase_syntax() {
List<Expression> arguments = List.of(
DSL.namedArgument("field", "test"),
DSL.namedArgument("query", "test2"),
DSL.namedArgument("unsupported", "3"));
Assertions.assertThrows(SemanticCheckException.class,
() -> matchPhraseQuery.build(new MatchPhraseExpression(
arguments, matchPhraseQueryName)));
}

@Test
public void test_analyzer_parameter_matchphrase_syntax() {
List<Expression> arguments = List.of(
DSL.namedArgument("field", "t1"),
DSL.namedArgument("query", "t2"),
DSL.namedArgument("analyzer", "standard")
);
Assertions.assertNotNull(matchPhraseQuery.build(new MatchPhraseExpression(
arguments, matchPhraseQueryName)));
}

@Test
public void build_succeeds_with_two_arguments_matchphrase_syntax() {
List<Expression> arguments = List.of(
DSL.namedArgument("field", "test"),
DSL.namedArgument("query", "test2"));
Assertions.assertNotNull(matchPhraseQuery.build(new MatchPhraseExpression(
arguments, matchPhraseQueryName)));
}

@Test
public void test_slop_parameter_matchphrase_syntax() {
List<Expression> arguments = List.of(
DSL.namedArgument("field", "t1"),
DSL.namedArgument("query", "t2"),
DSL.namedArgument("slop", "2")
);
Assertions.assertNotNull(matchPhraseQuery.build(new MatchPhraseExpression(
arguments, matchPhraseQueryName)));
}

@Test
public void test_zero_terms_query_parameter_matchphrase_syntax() {
List<Expression> arguments = List.of(
DSL.namedArgument("field", "t1"),
DSL.namedArgument("query", "t2"),
DSL.namedArgument("zero_terms_query", "ALL")
);
Assertions.assertNotNull(matchPhraseQuery.build(new MatchPhraseExpression(
arguments, matchPhraseQueryName)));
}

@Test
public void test_zero_terms_query_parameter_lower_case_matchphrase_syntax() {
List<Expression> arguments = List.of(
DSL.namedArgument("field", "t1"),
DSL.namedArgument("query", "t2"),
DSL.namedArgument("zero_terms_query", "all")
);
Assertions.assertNotNull(matchPhraseQuery.build(new MatchPhraseExpression(
MaxKsyunz marked this conversation as resolved.
Show resolved Hide resolved
arguments, matchPhraseQueryName)));
}

private class MatchPhraseExpression extends FunctionExpression {
public MatchPhraseExpression(List<Expression> arguments) {
super(MatchPhraseQueryTest.this.matchPhrase, arguments);
super(matchPhraseName, arguments);
}

public MatchPhraseExpression(List<Expression> arguments, FunctionName funcName) {
super(funcName, arguments);
}

@Override
Expand Down
1 change: 1 addition & 0 deletions sql/src/main/antlr/OpenSearchSQLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ INCLUDE: 'INCLUDE';
IN_TERMS: 'IN_TERMS';
MATCHPHRASE: 'MATCHPHRASE';
MATCH_PHRASE: 'MATCH_PHRASE';
MATCHPHRASEQUERY: 'MATCHPHRASEQUERY';
SIMPLE_QUERY_STRING: 'SIMPLE_QUERY_STRING';
QUERY_STRING: 'QUERY_STRING';
MATCH_PHRASE_PREFIX: 'MATCH_PHRASE_PREFIX';
Expand Down
2 changes: 1 addition & 1 deletion sql/src/main/antlr/OpenSearchSQLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ systemFunctionName
;

singleFieldRelevanceFunctionName
: MATCH | MATCH_PHRASE | MATCHPHRASE
: MATCH | MATCH_PHRASE | MATCHPHRASE| MATCHPHRASEQUERY
| MATCH_BOOL_PREFIX | MATCH_PHRASE_PREFIX
;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ public void can_parse_match_relevance_function() {
"matchPhraseComplexQueries",
"matchPhraseGeneratedQueries",
"generateMatchPhraseQueries",
"matchPhraseQueryComplexQueries"
})
public void canParseComplexMatchPhraseArgsTest(String query) {
assertNotNull(parser.parse(query));
Expand Down Expand Up @@ -420,6 +421,22 @@ private static Stream<String> matchPhraseComplexQueries() {
);
}

private static Stream<String> matchPhraseQueryComplexQueries() {
return Stream.of(
"SELECT * FROM t WHERE matchphrasequery(c, 3)",
"SELECT * FROM t WHERE matchphrasequery(c, 3, fuzziness=AUTO)",
"SELECT * FROM t WHERE matchphrasequery(c, 3, zero_terms_query=\"all\")",
"SELECT * FROM t WHERE matchphrasequery(c, 3, lenient=true)",
"SELECT * FROM t WHERE matchphrasequery(c, 3, lenient='true')",
"SELECT * FROM t WHERE matchphrasequery(c, 3, operator=xor)",
"SELECT * FROM t WHERE matchphrasequery(c, 3, cutoff_frequency=0.04)",
"SELECT * FROM t WHERE matchphrasequery(c, 3, cutoff_frequency=0.04, analyzer = english, "
+ "prefix_length=34, fuzziness='auto', minimum_should_match='2<-25% 9<-3')",
"SELECT * FROM t WHERE matchphrasequery(c, 3, minimum_should_match='2<-25% 9<-3')",
"SELECT * FROM t WHERE matchphrasequery(c, 3, operator='AUTO')"
);
}

private static Stream<String> matchPhraseGeneratedQueries() {
var matchArgs = new HashMap<String, Object[]>();
matchArgs.put("fuzziness", new String[]{ "AUTO", "AUTO:1,5", "1" });
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,22 @@ public void filteredDistinctCount() {
);
}

@Test
public void matchPhraseQueryAllParameters() {
assertEquals(
AstDSL.function("matchphrasequery",
unresolvedArg("field", stringLiteral("test")),
unresolvedArg("query", stringLiteral("search query")),
unresolvedArg("slop", stringLiteral("3")),
unresolvedArg("analyzer", stringLiteral("standard")),
unresolvedArg("zero_terms_query", stringLiteral("NONE"))
),
buildExprAst("matchphrasequery(test, 'search query', slop = 3"
+ ", analyzer = 'standard', zero_terms_query='NONE'"
+ ")")
);
}

@Test
public void matchPhrasePrefixAllParameters() {
assertEquals(
Expand Down