Skip to content

Commit

Permalink
Merge branch 'main' into integ-query_string-#191
Browse files Browse the repository at this point in the history
  • Loading branch information
forestmvey authored Jul 11, 2022
2 parents c68aecf + e180d56 commit 77b6162
Show file tree
Hide file tree
Showing 41 changed files with 1,260 additions and 241 deletions.
2 changes: 1 addition & 1 deletion DEVELOPER_GUIDE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ OpenSearch & OpenSearch Dashboards

For convenience, we recommend installing `OpenSearch <https://www.opensearch.org/downloads.html>`_ and `OpenSearch Dashboards <https://www.opensearch.org/downloads.html>`_ on your local machine. You can download the open source ZIP for each and extract them to a folder.

If you just want to have a quick look, you can also get an OpenSearch running with plugin installed by ``./gradlew :plugin:run``.
If you just want to have a quick look, you can also get an OpenSearch running with plugin installed by ``./gradlew :opensearch-sql-plugin:run``.

OpenSearch Dashboards is optional, but makes it easier to test your queries. Alternately, you can use curl from the terminal to run queries against the plugin.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<connector-plugin class='opensearch_jdbc' superclass='jdbc' plugin-version='2.0.0.0' name='OpenSearch' version='18.1' min-version-tableau='2021.1'>
<vendor-information>
<company name="OpenSearch Project"/>
<support-link url="https://github.com/opensearch-project/sql"/>
<support-link url="https://forum.opensearch.org/"/>
</vendor-information>
<connection-customization class="opensearch_jdbc" enabled="true" version="10.0">
<vendor name="OpenSearch Project"/>
Expand Down
8 changes: 8 additions & 0 deletions core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ public NamedArgumentExpression namedArgument(String argName, Expression value) {
return new NamedArgumentExpression(argName, value);
}

public NamedArgumentExpression namedArgument(String name, String value) {
return namedArgument(name, literal(value));
}

public static ParseExpression parsed(Expression expression, Expression pattern,
Expression identifier) {
return new ParseExpression(expression, pattern, identifier);
Expand Down Expand Up @@ -658,6 +662,10 @@ public FunctionExpression match_phrase(Expression... args) {
return compile(BuiltinFunctionName.MATCH_PHRASE, args);
}

public FunctionExpression match_phrase_prefix(Expression... args) {
return compile(BuiltinFunctionName.MATCH_PHRASE_PREFIX, args);
}

public FunctionExpression multi_match(Expression... args) {
return compile(BuiltinFunctionName.MULTI_MATCH, args);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ public enum BuiltinFunctionName {
MATCHPHRASE(FunctionName.of("matchphrase")),
QUERY_STRING(FunctionName.of("query_string")),
MATCH_BOOL_PREFIX(FunctionName.of("match_bool_prefix")),

MATCH_PHRASE_PREFIX(FunctionName.of("match_phrase_prefix")),
/**
* Legacy Relevance Function.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public class OpenSearchFunctions {
public static final int MULTI_MATCH_MAX_NUM_PARAMETERS = 17;
public static final int SIMPLE_QUERY_STRING_MAX_NUM_PARAMETERS = 14;
public static final int QUERY_STRING_MAX_NUM_PARAMETERS = 25;
public static final int MATCH_PHRASE_PREFIX_MAX_NUM_PARAMETERS = 7;

/**
* Add functions specific to OpenSearch to repository.
Expand All @@ -48,6 +49,7 @@ public void register(BuiltinFunctionRepository repository) {
// compatibility.
repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE));
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE));
repository.register(match_phrase_prefix());
}

private static FunctionResolver match_bool_prefix() {
Expand All @@ -60,6 +62,11 @@ private static FunctionResolver match() {
return getRelevanceFunctionResolver(funcName, MATCH_MAX_NUM_PARAMETERS, STRING);
}

private static FunctionResolver match_phrase_prefix() {
FunctionName funcName = BuiltinFunctionName.MATCH_PHRASE_PREFIX.getName();
return getRelevanceFunctionResolver(funcName, MATCH_PHRASE_PREFIX_MAX_NUM_PARAMETERS, STRING);
}

private static FunctionResolver match_phrase(BuiltinFunctionName matchPhrase) {
FunctionName funcName = matchPhrase.getName();
return getRelevanceFunctionResolver(funcName, MATCH_PHRASE_MAX_NUM_PARAMETERS, STRING);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import static org.opensearch.sql.ast.dsl.AstDSL.intLiteral;
import static org.opensearch.sql.ast.dsl.AstDSL.qualifiedName;
import static org.opensearch.sql.ast.dsl.AstDSL.stringLiteral;
import static org.opensearch.sql.ast.dsl.AstDSL.unresolvedArg;
import static org.opensearch.sql.data.model.ExprValueUtils.LITERAL_TRUE;
import static org.opensearch.sql.data.model.ExprValueUtils.integerValue;
import static org.opensearch.sql.data.type.ExprCoreType.BOOLEAN;
Expand Down Expand Up @@ -509,6 +510,29 @@ void query_string_expression_two_fields() {
AstDSL.unresolvedArg("fields", new RelevanceFieldList(ImmutableMap.of(
"field1", 1.F, "field2", .3F))),
AstDSL.unresolvedArg("query", stringLiteral("query_value"))));

@Test
public void match_phrase_prefix_all_params() {
assertAnalyzeEqual(
dsl.match_phrase_prefix(
dsl.namedArgument("field", "test"),
dsl.namedArgument("query", "search query"),
dsl.namedArgument("slop", "3"),
dsl.namedArgument("boost", "1.5"),
dsl.namedArgument("analyzer", "standard"),
dsl.namedArgument("max_expansions", "4"),
dsl.namedArgument("zero_terms_query", "NONE")
),
AstDSL.function("match_phrase_prefix",
unresolvedArg("field", stringLiteral("test")),
unresolvedArg("query", stringLiteral("search query")),
unresolvedArg("slop", stringLiteral("3")),
unresolvedArg("boost", stringLiteral("1.5")),
unresolvedArg("analyzer", stringLiteral("standard")),
unresolvedArg("max_expansions", stringLiteral("4")),
unresolvedArg("zero_terms_query", stringLiteral("NONE"))
)
);
}

protected Expression analyze(UnresolvedExpression unresolvedExpression) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,19 @@ List<FunctionExpression> match_phrase_dsl_expressions() {
);
}

List<FunctionExpression> match_phrase_prefix_dsl_expressions() {
return List.of(
dsl.match_phrase_prefix(field, query)
);
}

@Test
public void match_phrase_prefix() {
for (FunctionExpression fe : match_phrase_prefix_dsl_expressions()) {
assertEquals(BOOLEAN, fe.type());
}
}

@Test
void match_in_memory() {
FunctionExpression expr = dsl.match(field, query);
Expand Down
41 changes: 41 additions & 0 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2274,6 +2274,47 @@ Another example to show how to set custom values for the optional parameters::
| Hattie | 671 Bristol Street |
+-------------+--------------------+

MATCH_PHRASE_PREFIX
------------

Description
>>>>>>>>>>>

``match_phrase_prefix(field_expression, query_expression[, option=<option_value>]*)``

The match_phrase_prefix function maps to the match_phrase_prefix query used in search engine,
to return the documents that match a provided text with a given field. Available parameters include:

- analyzer
- slop
- zero_terms_query
- max_expansions
- boost


Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::

os> SELECT author, title FROM books WHERE match_phrase_prefix(author, 'Alexander Mil');
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+

Another example to show how to set custom values for the optional parameters::

os> SELECT author, title FROM books WHERE match_phrase_prefix(author, 'Alan Mil', slop = 2);
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+


MULTI_MATCH
-----------

Expand Down
43 changes: 43 additions & 0 deletions docs/user/ppl/functions/relevance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,49 @@ Another example to show how to set custom values for the optional parameters::
+----------------------+--------------------------+



MATCH_PHRASE_PREFIX
------------

Description
>>>>>>>>>>>

``match_phrase_prefix(field_expression, query_expression[, option=<option_value>]*)``

The match_phrase_prefix function maps to the match_phrase_prefix query used in search engine, to return the documents that match a provided text with a given field. Available parameters include:

- analyzer
- slop
- max_expansions
- boost
- zero_terms_query

Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::

os> source=books | where match_phrase_prefix(author, 'Alexander Mil') | fields author, title
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+



Another example to show how to set custom values for the optional parameters::

os> source=books | where match_phrase_prefix(author, 'Alan Mil', slop = 2) | fields author, title
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+



MULTI_MATCH
-----------

Expand Down
2 changes: 1 addition & 1 deletion doctest/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ testClusters {
}
}
}))
plugin ':plugin'
plugin ':opensearch-sql-plugin'
testDistribution = 'integ_test'
}
}
Expand Down
8 changes: 4 additions & 4 deletions integ-test/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ dependencies {
testImplementation group: 'org.opensearch.client', name: 'opensearch-rest-client', version: "${opensearch_version}"
testImplementation group: 'org.hamcrest', name: 'hamcrest', version: '2.1'
implementation group: 'org.apache.logging.log4j', name: 'log4j-core', version:'2.17.1'
testImplementation project(':plugin')
testImplementation project(':opensearch-sql-plugin')
testImplementation project(':legacy')
testImplementation('org.junit.jupiter:junit-jupiter-api:5.6.2')
testRuntimeOnly('org.junit.jupiter:junit-jupiter-engine:5.6.2')
Expand Down Expand Up @@ -98,12 +98,12 @@ testClusters.all {
}

testClusters.integTest {
plugin ":plugin"
plugin ":opensearch-sql-plugin"
}

// Run PPL ITs and new, legacy and comparison SQL ITs with new SQL engine enabled
integTest {
dependsOn ':plugin:bundlePlugin'
dependsOn ':opensearch-sql-plugin:bundlePlugin'

systemProperty 'tests.security.manager', 'false'
systemProperty('project.root', project.projectDir.absolutePath)
Expand Down Expand Up @@ -147,7 +147,7 @@ integTest {


task comparisonTest(type: RestIntegTestTask) {
dependsOn ':plugin:bundlePlugin'
dependsOn ':opensearch-sql-plugin:bundlePlugin'

systemProperty 'tests.security.manager', 'false'
systemProperty('project.root', project.projectDir.absolutePath)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.ppl;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BEER;
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;

import java.io.IOException;
import org.json.JSONObject;
import org.junit.Test;

public class MatchPhrasePrefixWhereCommandIT extends PPLIntegTestCase {

@Override
public void init() throws IOException {
loadIndex(Index.BEER);
}

@Test
public void required_parameters() throws IOException {
String query = "source = %s | WHERE match_phrase_prefix(Title, 'champagne be') | fields Title";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result,
rows("Can old flat champagne be used for vinegar?"),
rows("Elder flower champagne best to use natural yeast or add a wine yeast?"));
}


@Test
public void all_optional_parameters() throws IOException {
// The values for optional parameters are valid but arbitrary.
String query = "source = %s " +
"| WHERE match_phrase_prefix(Title, 'flat champ', boost = 1.0, " +
"zero_terms_query='ALL', max_expansions = 2, analyzer=standard, slop=0) " +
"| fields Title";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("Can old flat champagne be used for vinegar?"));
}


@Test
public void max_expansions_is_3() throws IOException {
// max_expansions applies to the last term in the query -- 'bottl'
// It tells OpenSearch to consider only the first 3 terms that start with 'bottl'
// In this dataset these are 'bottle-conditioning', 'bottling', 'bottles'.

String query = "source = %s " +
"| WHERE match_phrase_prefix(Tags, 'draught bottl', max_expansions=3) | fields Tags";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("brewing draught bottling"),
rows("draught bottles"));
}

@Test
public void analyzer_english() throws IOException {
// English analyzer removes 'in' and 'to' as they are common words.
// This results in an empty query.
String query = "source = %s " +
"| WHERE match_phrase_prefix(Title, 'in to', analyzer=english)" +
"| fields Title";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
assertTrue("Expect English analyzer to filter out common words 'in' and 'to'",
result.getInt("total") == 0);
}

@Test
public void analyzer_standard() throws IOException {
// Standard analyzer does not treat 'in' and 'to' as special terms.
// This results in 'to' being used as a phrase prefix given us 'Tokyo'.
String query = "source = %s " +
"| WHERE match_phrase_prefix(Title, 'in to', analyzer=standard)" +
"| fields Title";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("Local microbreweries and craft beer in Tokyo"));
}

@Test
public void zero_term_query_all() throws IOException {
// English analyzer removes 'in' and 'to' as they are common words.
// zero_terms_query of 'ALL' causes all rows to be returned.
// ORDER BY ... LIMIT helps make the test understandable.
String query = "source = %s" +
"| WHERE match_phrase_prefix(Title, 'in to', analyzer=english, zero_terms_query='ALL') " +
"| sort -Title | head 1 | fields Title";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("was working great, now all foam"));
}


@Test
public void slop_is_2() throws IOException {
// When slop is 0, the terms are matched exactly in the order specified.
// 'open' is used to match prefix of the next term.
String query = "source = %s" +
"| where match_phrase_prefix(Tags, 'gas ta', slop=2) " +
"| fields Tags";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("taste gas"));
}

@Test
public void slop_is_3() throws IOException {
// When slop is 2, results will include phrases where the query terms are transposed.
String query = "source = %s" +
"| where match_phrase_prefix(Tags, 'gas ta', slop=3)" +
"| fields Tags";
JSONObject result = executeQuery(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result,
rows("taste draught gas"),
rows("taste gas"));
}
}
Loading

0 comments on commit 77b6162

Please sign in to comment.