Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for wildcard_query function #156

Merged
merged 35 commits into from
Nov 25, 2022
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
a96a740
Implemented wildcard_query and added tests in core
Nov 2, 2022
885f04c
Implemented and added tests for sql
Nov 2, 2022
4d0dadb
Implemented and added tests for ppl
Nov 3, 2022
9c25e66
Implemented and added tests for lucene
Nov 3, 2022
efe0ee5
Fixed test for like expression
Nov 3, 2022
bedf662
Added parameters to wildcard_query
Nov 4, 2022
afbeb44
Added integration tests for ppl and sql
Nov 4, 2022
e25d4da
Added docs for doctests
Nov 4, 2022
84fcf88
Pulled from 2.x
Nov 7, 2022
4708ae2
Fixed issues introduced during merging
Nov 7, 2022
e05e5c5
Addressed PR comment
Nov 7, 2022
988b40b
Added annotation that was deleted from merging
Nov 8, 2022
5c225ce
Fixed merge conflict issues
Nov 8, 2022
0b9752c
Addressed some PR comments and handled escaping wildcards
Nov 9, 2022
a2ca906
Added tests for wildcard conversion and created data for testing
Nov 11, 2022
68c97d6
Added javadoc
Nov 14, 2022
a873b27
Changed index name
Nov 14, 2022
518c09f
Temporarily changed jackson_version to run GH actions
Nov 14, 2022
1101ca3
Added comparison test for wildcard conversion
Nov 14, 2022
fc8883c
Removed PPL implementation of wildcard_query
Nov 14, 2022
ae46f96
Reverted ppl docs change
Nov 14, 2022
a9f5be7
Made namedArgument a static function
Nov 14, 2022
bba63ce
Removed extra space
Nov 14, 2022
8b7b442
Merge branch 'integ-add-wildcardquery' of github.com:Bit-Quill/opense…
Nov 15, 2022
4af9eef
Fixed LIKE query
Nov 17, 2022
57aac8e
Fixed LIKE tests and added more tests
Nov 18, 2022
0a4af9d
Addressed PR comments
Nov 18, 2022
288e29c
Implemented converting text field to keyword. Still needs testing
Nov 21, 2022
1fb4973
Added test cases for LIKE in sql and ppl
Nov 22, 2022
34ffe13
Addressed PR comments regarding docs
Nov 24, 2022
eed3294
Fixed backslashes in docs
Nov 24, 2022
86fa737
Added missed backticks in docs
Nov 24, 2022
b54a934
Moved escaping wildcard test to common/utils
Nov 25, 2022
fede123
Fixed merge conflicts
Nov 25, 2022
9da6140
Fixed checkstyle error
Nov 25, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ buildscript {
ext {
opensearch_version = System.getProperty("opensearch.version", "2.4.0-SNAPSHOT")
spring_version = "5.3.22"
jackson_version = "2.13.4"
jackson_databind_version = "2.13.4.2"
jackson_version = "2.14.0"
GumpacG marked this conversation as resolved.
Show resolved Hide resolved
jackson_databind_version = "2.14.0"
isSnapshot = "true" == System.getProperty("build.snapshot", "true")
buildVersionQualifier = System.getProperty("build.version_qualifier", "")
version_tokens = opensearch_version.tokenize('-')
Expand Down
4 changes: 4 additions & 0 deletions core/src/main/java/org/opensearch/sql/expression/DSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,10 @@ public FunctionExpression match_bool_prefix(Expression... args) {
return compile(BuiltinFunctionName.MATCH_BOOL_PREFIX, args);
}

public FunctionExpression wildcard_query(Expression... args) {
Yury-Fridlyand marked this conversation as resolved.
Show resolved Hide resolved
return compile(BuiltinFunctionName.WILDCARD_QUERY, args);
}

public FunctionExpression now(Expression... args) {
return compile(BuiltinFunctionName.NOW, args);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,9 @@ public enum BuiltinFunctionName {
QUERY(FunctionName.of("query")),
MATCH_QUERY(FunctionName.of("match_query")),
MATCHQUERY(FunctionName.of("matchquery")),
MULTI_MATCH(FunctionName.of("multi_match"));
MULTI_MATCH(FunctionName.of("multi_match")),
WILDCARDQUERY(FunctionName.of("wildcardquery")),
WILDCARD_QUERY(FunctionName.of("wildcard_query"));

private final FunctionName name;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ public void register(BuiltinFunctionRepository repository) {
repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE));
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE));
repository.register(match_phrase_prefix());
repository.register(wildcard_query(BuiltinFunctionName.WILDCARD_QUERY));
repository.register(wildcard_query(BuiltinFunctionName.WILDCARDQUERY));
}

private static FunctionResolver match_bool_prefix() {
Expand Down Expand Up @@ -79,6 +81,11 @@ private static FunctionResolver query_string() {
return new RelevanceFunctionResolver(funcName, STRUCT);
}

private static FunctionResolver wildcard_query(BuiltinFunctionName wildcardQuery) {
FunctionName funcName = wildcardQuery.getName();
return new RelevanceFunctionResolver(funcName, STRING);
}

public static class OpenSearchFunction extends FunctionExpression {
private final FunctionName functionName;
private final List<Expression> arguments;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,34 @@ void query_string_expression_two_fields() {
AstDSL.unresolvedArg("query", stringLiteral("query_value"))));
}

@Test
void wildcard_query_expression() {
assertAnalyzeEqual(
dsl.wildcard_query(
dsl.namedArgument("field", DSL.literal("test")),
dsl.namedArgument("query", DSL.literal("query_value*"))),
AstDSL.function("wildcard_query",
unresolvedArg("field", stringLiteral("test")),
unresolvedArg("query", stringLiteral("query_value*"))));
}

@Test
void wildcard_query_expression_all_params() {
assertAnalyzeEqual(
dsl.wildcard_query(
dsl.namedArgument("field", DSL.literal("test")),
dsl.namedArgument("query", DSL.literal("query_value*")),
dsl.namedArgument("boost", DSL.literal("1.5")),
dsl.namedArgument("case_insensitive", DSL.literal("true")),
dsl.namedArgument("rewrite", DSL.literal("scoring_boolean"))),
AstDSL.function("wildcard_query",
unresolvedArg("field", stringLiteral("test")),
unresolvedArg("query", stringLiteral("query_value*")),
unresolvedArg("boost", stringLiteral("1.5")),
unresolvedArg("case_insensitive", stringLiteral("true")),
unresolvedArg("rewrite", stringLiteral("scoring_boolean"))));
}

@Test
public void match_phrase_prefix_all_params() {
assertAnalyzeEqual(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,4 +197,12 @@ void query_string() {
fields.getValue(), query.getValue()),
expr.toString());
}

@Test
void wildcard_query() {
FunctionExpression expr = dsl.wildcard_query(field, query);
assertEquals(String.format("wildcard_query(field=%s, query=%s)",
field.getValue(), query.getValue()),
expr.toString());
}
}
53 changes: 53 additions & 0 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3127,3 +3127,56 @@ Example::
|----------------+---------------+-----------------+------------------|
| DATE | INTEGER | DATETIME | STRUCT |
+----------------+---------------+-----------------+------------------+


WILDCARD_QUERY
------------

Description
>>>>>>>>>>>

``wildcard_query(field_expression, query_expression[, option=<option_value>]*)``

The wildcard_query function maps to the wildcard_query query used in search engine. It returns documents that match provided text in the specified field.
acarbonetto marked this conversation as resolved.
Show resolved Hide resolved
Supported wildcard characters can be found here: https://opensearch.org/docs/latest/opensearch/query-dsl/term/#wildcards
SQL wildcard % is converted to * as well as _ to ?. You may include % and _ in the search by escaping with a backslash prefix.
acarbonetto marked this conversation as resolved.
Show resolved Hide resolved

Available parameters include:

- boost
- case_insensitive
- rewrite

For backward compatibility, wildcardquery is also supported and mapped to wildcard_query query as well.

Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::

os> select Body from wildcard where wildcard_query(Body, 'test wildcard*');
fetched rows / total rows = 7/7
+-------------------------------------------+
| Body |
|-------------------------------------------|
| test wildcard |
| test wildcard in the end of the text% |
| test wildcard in % the middle of the text |
| test wildcard %% beside each other |
| test wildcard in the end of the text_ |
| test wildcard in _ the middle of the text |
| test wildcard __ beside each other |
+-------------------------------------------+

Another example to show how to set custom values for the optional parameters::

os> select Body from wildcard where wildcard_query(Body, 'test wildcard*', boost=0.7, case_insensitive=true, rewrite='constant_score');
fetched rows / total rows = 7/7
+-------------------------------------------+
| Body |
|-------------------------------------------|
| test wildcard |
| test wildcard in the end of the text% |
| test wildcard in % the middle of the text |
| test wildcard %% beside each other |
| test wildcard in the end of the text_ |
| test wildcard in _ the middle of the text |
| test wildcard __ beside each other |
+-------------------------------------------+
20 changes: 20 additions & 0 deletions doctest/test_data/wildcard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{"index":{"_id":"0"}}
{"Body":"test wildcard"}
{"index":{"_id":"1"}}
{"Body":"test wildcard in the end of the text%"}
{"index":{"_id":"2"}}
{"Body":"%test wildcard in the beginning of the text"}
{"index":{"_id":"3"}}
{"Body":"test wildcard in % the middle of the text"}
{"index":{"_id":"4"}}
{"Body":"test wildcard %% beside each other"}
{"index":{"_id":"5"}}
{"Body":"test wildcard in the end of the text_"}
{"index":{"_id":"6"}}
{"Body":"_test wildcard in the beginning of the text"}
{"index":{"_id":"7"}}
{"Body":"test wildcard in _ the middle of the text"}
{"index":{"_id":"8"}}
{"Body":"test wildcard __ beside each other"}
{"index":{"_id":"9"}}
{"Body":"test backslash wildcard \\_"}
4 changes: 3 additions & 1 deletion doctest/test_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
NYC_TAXI = "nyc_taxi"
BOOKS = "books"
APACHE = "apache"
WILDCARD = "wildcard"


class DocTestConnection(OpenSearchConnection):
Expand Down Expand Up @@ -92,6 +93,7 @@ def set_up_test_indices(test):
load_file("nyc_taxi.json", index_name=NYC_TAXI)
load_file("books.json", index_name=BOOKS)
load_file("apache.json", index_name=APACHE)
load_file("wildcard.json", index_name=WILDCARD)


def load_file(filename, index_name):
Expand Down Expand Up @@ -120,7 +122,7 @@ def set_up(test):

def tear_down(test):
# drop leftover tables after each test
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE], ignore_unavailable=True)
test_data_client.indices.delete(index=[ACCOUNTS, EMPLOYEES, PEOPLE, ACCOUNT2, NYC_TAXI, BOOKS, APACHE, WILDCARD], ignore_unavailable=True)


docsuite = partial(doctest.DocFileSuite,
Expand Down
9 changes: 9 additions & 0 deletions doctest/test_mapping/wildcard.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"mappings" : {
"properties" : {
"Body" : {
"type" : "keyword"
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,11 @@ public enum Index {
CALCS(TestsConstants.TEST_INDEX_CALCS,
"calcs",
getMappingFile("calcs_index_mappings.json"),
"src/test/resources/calcs.json"),;
"src/test/resources/calcs.json"),
WILDCARD(TestsConstants.TEST_INDEX_WILDCARD,
"wildcard",
getMappingFile("wildcard_index_mappings.json"),
"src/test/resources/wildcard.json"),;

private final String name;
private final String type;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public class TestsConstants {
public final static String TEST_INDEX_BEER = TEST_INDEX + "_beer";
public final static String TEST_INDEX_NULL_MISSING = TEST_INDEX + "_null_missing";
public final static String TEST_INDEX_CALCS = TEST_INDEX + "_calcs";
public final static String TEST_INDEX_WILDCARD = TEST_INDEX + "_wildcard";

public final static String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'";
public final static String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS";
Expand Down
163 changes: 163 additions & 0 deletions integ-test/src/test/java/org/opensearch/sql/sql/WildcardQueryIT.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/


package org.opensearch.sql.sql;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WILDCARD;

import java.io.IOException;
import org.json.JSONObject;
import org.junit.Test;
import org.opensearch.sql.legacy.SQLIntegTestCase;

import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;

public class WildcardQueryIT extends SQLIntegTestCase {
@Override
protected void init() throws Exception {
loadIndex(Index.WILDCARD);
}

@Test
public void test_wildcard_query_asterisk_function() throws IOException {
String expected = "test wildcard";

String query1 = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, 't*') LIMIT 1";
JSONObject result1 = executeJdbcRequest(query1);
verifyDataRows(result1, rows(expected));

String query2 = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcardquery(Body, 't*') LIMIT 1";
JSONObject result2 = executeJdbcRequest(query2);
verifyDataRows(result2, rows(expected));
}

@Test
public void test_wildcard_query_question_mark_function() throws IOException {
String expected = "test wildcard";

String query1 = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, 'test wild??rd')";
JSONObject result1 = executeJdbcRequest(query1);
verifyDataRows(result1, rows(expected));

String query2 = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcardquery(Body, 'test wild??rd')";
JSONObject result2 = executeJdbcRequest(query2);
verifyDataRows(result2, rows(expected));
}

// SQL uses ? as a wildcard which is converted to * in WildcardQuery.java
@Test
public void test_wildcard_query_sql_wildcard_percent_conversion() throws IOException {
GabeFernandez310 marked this conversation as resolved.
Show resolved Hide resolved
String query1 = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, 'test%') LIMIT 1";
JSONObject result1 = executeJdbcRequest(query1);
verifyDataRows(result1, rows("test wildcard"));

query1 = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, 'test%')";
result1 = executeJdbcRequest(query1);
assertEquals(8, result1.getInt("total"));

String query2 = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, 'test*')";
JSONObject result2 = executeJdbcRequest(query2);
assertEquals(result1.getInt("total"), result2.getInt("total"));
}

// SQL uses _ as a wildcard which is converted to ? in WildcardQuery.java
@Test
public void test_wildcard_query_sql_wildcard_underscore_conversion() throws IOException {
String query1 = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, 'test wild_ard') LIMIT 1";
JSONObject result1 = executeJdbcRequest(query1);
verifyDataRows(result1, rows("test wildcard"));

query1 = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, 'test wild_ard*')";
acarbonetto marked this conversation as resolved.
Show resolved Hide resolved
result1 = executeJdbcRequest(query1);
assertEquals(7, result1.getInt("total"));

String query2 = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, 'test wild?ard*')";
JSONObject result2 = executeJdbcRequest(query2);
assertEquals(result1.getInt("total"), result2.getInt("total"));
}

@Test
public void test_escaping_wildcard_percent_in_the_beginning_of_text() throws IOException {
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, '\\\\%*')";
JSONObject result = executeJdbcRequest(query);
verifyDataRows(result, rows("%test wildcard in the beginning of the text"));
}

@Test
public void test_escaping_wildcard_percent_in_text() throws IOException {
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, '*\\\\%%')";
JSONObject result = executeJdbcRequest(query);
verifyDataRows(result, rows("test wildcard in % the middle of the text"),
rows("test wildcard %% beside each other"),
rows("test wildcard in the end of the text%"),
rows("%test wildcard in the beginning of the text"));
}

@Test
public void test_escaping_wildcard_percent_in_the_end_of_text() throws IOException {
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, '*\\\\%')";
JSONObject result = executeJdbcRequest(query);
verifyDataRows(result, rows("test wildcard in the end of the text%"));
}

@Test
public void test_double_escaped_wildcard_percent() throws IOException {
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, '*\\\\%\\\\%*')";
JSONObject result = executeJdbcRequest(query);
verifyDataRows(result, rows("test wildcard %% beside each other"));
}

@Test
public void test_escaping_wildcard_underscore_in_the_beginning_of_text() throws IOException {
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, '\\\\_*')";
JSONObject result = executeJdbcRequest(query);
verifyDataRows(result, rows("_test wildcard in the beginning of the text"));
}

@Test
public void test_escaping_wildcard_underscore_in_text() throws IOException {
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, '*\\\\_*')";
JSONObject result = executeJdbcRequest(query);
verifyDataRows(result, rows("test wildcard in _ the middle of the text"),
rows("test wildcard __ beside each other"),
rows("test wildcard in the end of the text_"),
rows("_test wildcard in the beginning of the text"),
rows("test backslash wildcard \\_"));
}

@Test
public void test_escaping_wildcard_underscore_in_the_end_of_text() throws IOException {
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, '*\\\\_')";
JSONObject result = executeJdbcRequest(query);
verifyDataRows(result,
rows("test wildcard in the end of the text_"),
rows("test backslash wildcard \\_"));
}

@Test
public void test_double_escaped_wildcard_underscore() throws IOException {
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, '*\\\\_\\\\_*')";
JSONObject result = executeJdbcRequest(query);
verifyDataRows(result, rows("test wildcard __ beside each other"));
}

@Test
public void test_backslash_wildcard() throws IOException {
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD + " WHERE wildcard_query(Body, '*\\\\\\\\\\\\_')";
acarbonetto marked this conversation as resolved.
Show resolved Hide resolved
JSONObject result = executeJdbcRequest(query);
verifyDataRows(result, rows("test backslash wildcard \\_"));
}

@Test
public void all_params_test() throws IOException {
String query = "SELECT * FROM " + TEST_INDEX_WILDCARD
+ " WHERE wildcard_query(Body, 'test*', boost = 0.9,"
+ " case_insensitive=true, rewrite='constant_score')";
JSONObject result = executeJdbcRequest(query);
assertEquals(8, result.getInt("total"));
}
}
Loading