Skip to content

Commit

Permalink
Support match_phrase filter function in SQL and PPL (opensearch-proje…
Browse files Browse the repository at this point in the history
…ct#604)

Signed-off-by: MaxKsyunz <[email protected]>

Co-authored-by: Yury Fridlyand <[email protected]>
  • Loading branch information
MaxKsyunz and Yury-Fridlyand authored May 25, 2022
1 parent 760e377 commit 26058b8
Show file tree
Hide file tree
Showing 24 changed files with 906 additions and 141 deletions.
13 changes: 11 additions & 2 deletions core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,16 @@ public FunctionExpression castDatetime(Expression value) {
}

public FunctionExpression match(Expression... args) {
return (FunctionExpression) repository
.compile(BuiltinFunctionName.MATCH.getName(), Arrays.asList(args.clone()));
return compile(BuiltinFunctionName.MATCH, args);
}

public FunctionExpression match_phrase(Expression... args) {
return compile(BuiltinFunctionName.MATCH_PHRASE, args);
}

private FunctionExpression compile(BuiltinFunctionName bfn, Expression... args) {
return (FunctionExpression) repository.compile(bfn.getName(), Arrays.asList(args.clone()));
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ public enum BuiltinFunctionName {
* Relevance Function.
*/
MATCH(FunctionName.of("match")),
MATCH_PHRASE(FunctionName.of("match_phrase")),
MATCHPHRASE(FunctionName.of("matchphrase")),

/**
* Legacy Relevance Function.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@

import static org.opensearch.sql.data.type.ExprCoreType.STRING;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.ToString;
import lombok.experimental.UtilityClass;
import org.opensearch.sql.data.model.ExprValue;
import org.opensearch.sql.data.type.ExprCoreType;
Expand All @@ -23,56 +23,49 @@

@UtilityClass
public class OpenSearchFunctions {

public static final int MATCH_MAX_NUM_PARAMETERS = 12;
public static final int MATCH_PHRASE_MAX_NUM_PARAMETERS = 3;
public static final int MIN_NUM_PARAMETERS = 2;

/**
* Add functions specific to OpenSearch to repository.
*/
public void register(BuiltinFunctionRepository repository) {
repository.register(match());
// Register MATCHPHRASE as MATCH_PHRASE as well for backwards
// compatibility.
repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE));
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE));
}

private static FunctionResolver match() {
FunctionName funcName = BuiltinFunctionName.MATCH.getName();
return getRelevanceFunctionResolver(funcName, MATCH_MAX_NUM_PARAMETERS);
}

private static FunctionResolver match_phrase(BuiltinFunctionName matchPhrase) {
FunctionName funcName = matchPhrase.getName();
return getRelevanceFunctionResolver(funcName, MATCH_PHRASE_MAX_NUM_PARAMETERS);
}

private static FunctionResolver getRelevanceFunctionResolver(
FunctionName funcName, int maxNumParameters) {
return new FunctionResolver(funcName,
ImmutableMap.<FunctionSignature, FunctionBuilder>builder()
.put(new FunctionSignature(funcName, ImmutableList.of(STRING, STRING)),
args -> new OpenSearchFunction(funcName, args))
.put(new FunctionSignature(funcName, ImmutableList.of(STRING, STRING, STRING)),
args -> new OpenSearchFunction(funcName, args))
.put(new FunctionSignature(funcName, ImmutableList.of(STRING, STRING, STRING, STRING)),
args -> new OpenSearchFunction(funcName, args))
.put(new FunctionSignature(funcName, ImmutableList
.of(STRING, STRING, STRING, STRING, STRING)),
args -> new OpenSearchFunction(funcName, args))
.put(new FunctionSignature(funcName, ImmutableList
.of(STRING, STRING, STRING, STRING, STRING, STRING)),
args -> new OpenSearchFunction(funcName, args))
.put(new FunctionSignature(funcName, ImmutableList
.of(STRING, STRING, STRING, STRING, STRING, STRING, STRING)),
args -> new OpenSearchFunction(funcName, args))
.put(new FunctionSignature(funcName, ImmutableList
.of(STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING)),
args -> new OpenSearchFunction(funcName, args))
.put(new FunctionSignature(funcName, ImmutableList
.of(STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING)),
args -> new OpenSearchFunction(funcName, args))
.put(new FunctionSignature(funcName, ImmutableList
.of(STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
STRING)),
args -> new OpenSearchFunction(funcName, args))
.put(new FunctionSignature(funcName, ImmutableList
.of(STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
STRING, STRING)),
args -> new OpenSearchFunction(funcName, args))
.put(new FunctionSignature(funcName, ImmutableList
.of(STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
STRING, STRING, STRING)),
args -> new OpenSearchFunction(funcName, args))
.put(new FunctionSignature(funcName, ImmutableList
.of(STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
STRING, STRING, STRING, STRING)),
args -> new OpenSearchFunction(funcName, args))
.put(new FunctionSignature(funcName, ImmutableList
.of(STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING, STRING,
STRING, STRING, STRING, STRING, STRING)),
args -> new OpenSearchFunction(funcName, args))
.build());
getRelevanceFunctionSignatureMap(funcName, maxNumParameters));
}

private static Map<FunctionSignature, FunctionBuilder> getRelevanceFunctionSignatureMap(
FunctionName funcName, int numOptionalParameters) {
FunctionBuilder buildFunction = args -> new OpenSearchFunction(funcName, args);
var signatureMapBuilder = ImmutableMap.<FunctionSignature, FunctionBuilder>builder();
for (int numParameters = MIN_NUM_PARAMETERS;
numParameters <= MIN_NUM_PARAMETERS + numOptionalParameters;
numParameters++) {
List<ExprType> args = Collections.nCopies(numParameters, STRING);
signatureMapBuilder.put(new FunctionSignature(funcName, args), buildFunction);
}
return signatureMapBuilder.build();
}

private static class OpenSearchFunction extends FunctionExpression {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN;

import java.util.List;
import org.junit.jupiter.api.Test;
import org.opensearch.sql.expression.DSL;
import org.opensearch.sql.expression.ExpressionTestBase;
import org.opensearch.sql.expression.FunctionExpression;
import org.opensearch.sql.expression.NamedArgumentExpression;



public class OpenSearchFunctionsTest extends ExpressionTestBase {
private final NamedArgumentExpression field = new NamedArgumentExpression(
"field", DSL.literal("message"));
Expand All @@ -40,10 +43,14 @@ public class OpenSearchFunctionsTest extends ExpressionTestBase {
"operator", DSL.literal("OR"));
private final NamedArgumentExpression minimumShouldMatch = new NamedArgumentExpression(
"minimum_should_match", DSL.literal("1"));
private final NamedArgumentExpression zeroTermsQuery = new NamedArgumentExpression(
"zero_terms_query", DSL.literal("ALL"));
private final NamedArgumentExpression zeroTermsQueryAll = new NamedArgumentExpression(
"zero_terms_query", DSL.literal("ALL"));
private final NamedArgumentExpression zeroTermsQueryNone = new NamedArgumentExpression(
"zero_terms_query", DSL.literal("None"));
private final NamedArgumentExpression boost = new NamedArgumentExpression(
"boost", DSL.literal("2.0"));
private final NamedArgumentExpression slop = new NamedArgumentExpression(
"slop", DSL.literal("3"));

@Test
void match() {
Expand Down Expand Up @@ -98,16 +105,34 @@ void match() {

expr = dsl.match(
field, query, analyzer, autoGenerateSynonymsPhrase, fuzziness, maxExpansions, prefixLength,
fuzzyTranspositions, fuzzyRewrite, lenient, operator, minimumShouldMatch, zeroTermsQuery);
fuzzyTranspositions, fuzzyRewrite, lenient, operator, minimumShouldMatch,
zeroTermsQueryAll);
assertEquals(BOOLEAN, expr.type());

expr = dsl.match(
field, query, analyzer, autoGenerateSynonymsPhrase, fuzziness, maxExpansions, prefixLength,
fuzzyTranspositions, fuzzyRewrite, lenient, operator, minimumShouldMatch, zeroTermsQuery,
fuzzyTranspositions, fuzzyRewrite, lenient, operator, minimumShouldMatch, zeroTermsQueryAll,
boost);
assertEquals(BOOLEAN, expr.type());
}

@Test
void match_phrase() {
for (FunctionExpression expr : match_phrase_dsl_expressions()) {
assertEquals(BOOLEAN, expr.type());
}
}


List<FunctionExpression> match_phrase_dsl_expressions() {
return List.of(
dsl.match_phrase(field, query),
dsl.match_phrase(field, query, analyzer),
dsl.match_phrase(field, query, analyzer, zeroTermsQueryAll),
dsl.match_phrase(field, query, analyzer, zeroTermsQueryNone, slop)
);
}

@Test
void match_in_memory() {
FunctionExpression expr = dsl.match(field, query);
Expand Down
40 changes: 40 additions & 0 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2195,3 +2195,43 @@ Another example to show how to set custom values for the optional parameters::
| Bond |
+------------+

MATCH_PHRASE
-----

Description
>>>>>>>>>>>

``match_phrase(field_expression, query_expression[, option=<option_value>]*)``

The match_phrase function maps to the match_phrase query used in search engine, to return the documents that match a provided text with a given field. Available parameters include:

- analyzer
- slop
- zero_terms_query

For backward compatibility, matchphrase is also supported and mapped to match_phrase query as well.

Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::

os> SELECT author, title FROM books WHERE match_phrase(author, 'Alexander Milne');
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+



Another example to show how to set custom values for the optional parameters::

os> SELECT author, title FROM books WHERE match_phrase(author, 'Alan Milne', slop = 2);
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+

40 changes: 40 additions & 0 deletions docs/user/ppl/functions/relevance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,46 @@ Another example to show how to set custom values for the optional parameters::
| Bond |
+------------+

MATCH_PHRASE
-----

Description
>>>>>>>>>>>

``match_phrase(field_expression, query_expression[, option=<option_value>]*)``

The match_phrase function maps to the match_phrase query used in search engine, to return the documents that match a provided text with a given field. Available parameters include:

- analyzer
- slop
- zero_terms_query

For backward compatibility, matchphrase is also supported and mapped to match_phrase query as well.

Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::

os> source=books | where match_phrase(author, 'Alexander Milne') | fields author, title
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+



Another example to show how to set custom values for the optional parameters::

os> source=books | where match_phrase(author, 'Alan Milne', slop = 2) | fields author, title
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+

Limitations
>>>>>>>>>>>

Expand Down
2 changes: 2 additions & 0 deletions doctest/test_data/books.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"id": 1, "author": "Alan Alexander Milne", "title": "The House at Pooh Corner"}
{"id": 2, "author": "Alan Alexander Milne", "title": "Winnie-the-Pooh"}
4 changes: 3 additions & 1 deletion doctest/test_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
PEOPLE = "people"
ACCOUNT2 = "account2"
NYC_TAXI = "nyc_taxi"
BOOKS = "books"


class DocTestConnection(OpenSearchConnection):
Expand Down Expand Up @@ -88,6 +89,7 @@ def set_up_test_indices(test):
load_file("people.json", index_name=PEOPLE)
load_file("account2.json", index_name=ACCOUNT2)
load_file("nyc_taxi.json", index_name=NYC_TAXI)
load_file("books.json", index_name=BOOKS)


def load_file(filename, index_name):
Expand Down Expand Up @@ -116,7 +118,7 @@ def set_up(test):

def tear_down(test):
# drop leftover tables after each test
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI], ignore_unavailable=True)
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS], ignore_unavailable=True)


docsuite = partial(doctest.DocFileSuite,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT;
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK;
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES;
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_PHRASE;
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;

Expand All @@ -23,6 +24,8 @@ public void init() throws IOException {
loadIndex(Index.ACCOUNT);
loadIndex(Index.BANK_WITH_NULL_VALUES);
loadIndex(Index.BANK);
loadIndex(Index.GAME_OF_THRONES);
loadIndex(Index.PHRASE);
}

@Test
Expand Down Expand Up @@ -110,4 +113,22 @@ public void testRelevanceFunction() throws IOException {
TEST_INDEX_BANK));
verifyDataRows(result, rows("Hattie"));
}

@Test
public void testMatchPhraseFunction() throws IOException {
JSONObject result =
executeQuery(
String.format(
"source=%s | where match_phrase(phrase, 'quick fox') | fields phrase", TEST_INDEX_PHRASE));
verifyDataRows(result, rows("quick fox"), rows("quick fox here"));
}

@Test
public void testMathPhraseWithSlop() throws IOException {
JSONObject result =
executeQuery(
String.format(
"source=%s | where match_phrase(phrase, 'brown fox', slop = 2) | fields phrase", TEST_INDEX_PHRASE));
verifyDataRows(result, rows("brown fox"), rows("fox brown"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.opensearch.sql.opensearch.storage.script.filter.lucene.RangeQuery.Comparison;
import org.opensearch.sql.opensearch.storage.script.filter.lucene.TermQuery;
import org.opensearch.sql.opensearch.storage.script.filter.lucene.WildcardQuery;
import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchPhraseQuery;
import org.opensearch.sql.opensearch.storage.script.filter.lucene.relevance.MatchQuery;
import org.opensearch.sql.opensearch.storage.serialization.ExpressionSerializer;

Expand All @@ -52,6 +53,8 @@ public class FilterQueryBuilder extends ExpressionNodeVisitor<QueryBuilder, Obje
.put(BuiltinFunctionName.GTE.getName(), new RangeQuery(Comparison.GTE))
.put(BuiltinFunctionName.LIKE.getName(), new WildcardQuery())
.put(BuiltinFunctionName.MATCH.getName(), new MatchQuery())
.put(BuiltinFunctionName.MATCH_PHRASE.getName(), new MatchPhraseQuery())
.put(BuiltinFunctionName.MATCHPHRASE.getName(), new MatchPhraseQuery())
.put(BuiltinFunctionName.QUERY.getName(), new MatchQuery())
.put(BuiltinFunctionName.MATCH_QUERY.getName(), new MatchQuery())
.put(BuiltinFunctionName.MATCHQUERY.getName(), new MatchQuery())
Expand Down
Loading

0 comments on commit 26058b8

Please sign in to comment.