From 924dd46ecf2a0e293d4f999111861261f03a11e9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 8 Dec 2022 15:07:51 +0000 Subject: [PATCH] [Bug] fix case sensitivity for wildcard queries (#5462) Fixes the wildcard query to not normalize the pattern when case_insensitive is set by the user. This is achieved by creating a new normalizedWildcardQuery method so that query_string queries (which do not support case sensitivity) can still normalize the pattern when the default analyzer is used; maintaining existing behavior. Signed-off-by: Nicholas Walter Knize (cherry picked from commit ce25dec68521cc3ee13d42fe52b5912f4448ae71) Signed-off-by: github-actions[bot] --- CHANGELOG.md | 1 + .../search/query/QueryStringIT.java | 33 ++++++++++++++++ .../search/query/SearchQueryIT.java | 39 +++++++++++++++++++ .../index/mapper/KeywordFieldMapper.java | 15 +++++++ .../index/mapper/MappedFieldType.java | 17 +++++--- .../index/mapper/StringFieldType.java | 28 ++++++++++++- .../index/search/QueryStringQueryParser.java | 3 +- .../index/query/PrefixQueryBuilderTests.java | 2 +- .../query/QueryStringQueryBuilderTests.java | 2 +- 9 files changed, 130 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b892cfebc6904..1ab0531c52bc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Removed ### Fixed - Fix 1.x compatibility bug with stored Tasks ([#5412](https://github.com/opensearch-project/OpenSearch/pull/5412)) +- Fix case sensitivity for wildcard queries ([#5462](https://github.com/opensearch-project/OpenSearch/pull/5462)) ### Security [Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.4...2.x diff --git a/server/src/internalClusterTest/java/org/opensearch/search/query/QueryStringIT.java b/server/src/internalClusterTest/java/org/opensearch/search/query/QueryStringIT.java index 5c7e53fda3f23..9837c86cd8608 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/query/QueryStringIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/query/QueryStringIT.java @@ -216,6 +216,39 @@ public void testKeywordWithWhitespace() throws Exception { assertHitCount(resp, 3L); } + public void testRegexCaseInsensitivity() throws Exception { + createIndex("messages"); + List indexRequests = new ArrayList<>(); + indexRequests.add(client().prepareIndex("messages").setId("1").setSource("message", "message: this is a TLS handshake")); + indexRequests.add(client().prepareIndex("messages").setId("2").setSource("message", "message: this is a tcp handshake")); + indexRandom(true, false, indexRequests); + + SearchResponse response = client().prepareSearch("messages").setQuery(queryStringQuery("/TLS/").defaultField("message")).get(); + assertNoFailures(response); + assertHitCount(response, 1); + assertHits(response.getHits(), "1"); + + response = client().prepareSearch("messages").setQuery(queryStringQuery("/tls/").defaultField("message")).get(); + assertNoFailures(response); + assertHitCount(response, 1); + assertHits(response.getHits(), "1"); + + response = client().prepareSearch("messages").setQuery(queryStringQuery("/TCP/").defaultField("message")).get(); + assertNoFailures(response); + assertHitCount(response, 1); + assertHits(response.getHits(), "2"); + + response = client().prepareSearch("messages").setQuery(queryStringQuery("/tcp/").defaultField("message")).get(); + assertNoFailures(response); + assertHitCount(response, 1); + assertHits(response.getHits(), "2"); + + response = client().prepareSearch("messages").setQuery(queryStringQuery("/HANDSHAKE/").defaultField("message")).get(); + assertNoFailures(response); + assertHitCount(response, 2); + assertHits(response.getHits(), "1", "2"); + } + public void testAllFields() throws Exception { String indexBody = copyToStringFromClasspath("/org/opensearch/search/query/all-query-index.json"); diff --git a/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java b/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java index c51043f02174d..e90d4e8e12c10 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java @@ -89,12 +89,15 @@ import java.time.format.DateTimeFormatter; import java.util.Arrays; import java.util.Collection; +import java.util.HashSet; import java.util.Map; import java.util.Random; +import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.regex.Pattern; import static java.util.Collections.singletonMap; +import static org.hamcrest.Matchers.containsInAnyOrder; import static org.opensearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; @@ -2089,8 +2092,14 @@ public void testWildcardQueryNormalizationOnTextField() { refresh(); { + // test default case insensitivity: false WildcardQueryBuilder wildCardQuery = wildcardQuery("field1", "Bb*"); SearchResponse searchResponse = client().prepareSearch().setQuery(wildCardQuery).get(); + assertHitCount(searchResponse, 0L); + + // test case insensitivity set to true + wildCardQuery = wildcardQuery("field1", "Bb*").caseInsensitive(true); + searchResponse = client().prepareSearch().setQuery(wildCardQuery).get(); assertHitCount(searchResponse, 1L); wildCardQuery = wildcardQuery("field1", "bb*"); @@ -2099,6 +2108,24 @@ public void testWildcardQueryNormalizationOnTextField() { } } + /** tests wildcard case sensitivity */ + public void testWildcardCaseSensitivity() { + assertAcked(prepareCreate("test").setMapping("field", "type=text")); + client().prepareIndex("test").setId("1").setSource("field", "lowercase text").get(); + refresh(); + + // test case sensitive + SearchResponse response = client().prepareSearch("test").setQuery(wildcardQuery("field", "Text").caseInsensitive(false)).get(); + assertNoFailures(response); + assertHitCount(response, 0); + + // test case insensitive + response = client().prepareSearch("test").setQuery(wildcardQuery("field", "Text").caseInsensitive(true)).get(); + assertNoFailures(response); + assertHitCount(response, 1); + assertHits(response.getHits(), "1"); + } + /** * Reserved characters should be excluded when the normalization is applied for keyword fields. * See https://github.com/elastic/elasticsearch/issues/46300 for details. @@ -2175,4 +2202,16 @@ public void testIssueFuzzyInsideSpanMulti() { SearchResponse response = client().prepareSearch("test").setQuery(query).get(); assertHitCount(response, 1); } + + /** + * asserts the search response hits include the expected ids + */ + private void assertHits(SearchHits hits, String... ids) { + assertThat(hits.getTotalHits().value, equalTo((long) ids.length)); + Set hitIds = new HashSet<>(); + for (SearchHit hit : hits.getHits()) { + hitIds.add(hit.getId()); + } + assertThat(hitIds, containsInAnyOrder(ids)); + } } diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java index 0b85ba0d2ccd8..42069ac165b25 100644 --- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java @@ -38,7 +38,10 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; +import org.opensearch.common.Nullable; import org.opensearch.common.lucene.Lucene; import org.opensearch.common.xcontent.XContentParser; import org.opensearch.index.analysis.IndexAnalyzers; @@ -368,6 +371,18 @@ protected BytesRef indexedValueForSearch(Object value) { } return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString()); } + + @Override + public Query wildcardQuery( + String value, + @Nullable MultiTermQuery.RewriteMethod method, + boolean caseInsensitve, + QueryShardContext context + ) { + // keyword field types are always normalized, so ignore case sensitivity and force normalize the wildcard + // query text + return super.wildcardQuery(value, method, caseInsensitve, true, context); + } } private final boolean indexed; diff --git a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java index ead901a25e6fd..0804ad1a524a9 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java @@ -281,7 +281,7 @@ public Query prefixQuery( ) { throw new QueryShardException( context, - "Can only use prefix queries on keyword, text and wildcard fields - not on [" + name + "] which is of type [" + typeName() + "]" + "Can only use prefix queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } @@ -290,6 +290,7 @@ public final Query wildcardQuery(String value, @Nullable MultiTermQuery.RewriteM return wildcardQuery(value, method, false, context); } + /** optionally normalize the wildcard pattern based on the value of {@code caseInsensitive} */ public Query wildcardQuery( String value, @Nullable MultiTermQuery.RewriteMethod method, @@ -298,11 +299,15 @@ public Query wildcardQuery( ) { throw new QueryShardException( context, - "Can only use wildcard queries on keyword, text and wildcard fields - not on [" - + name - + "] which is of type [" - + typeName() - + "]" + "Can only use wildcard queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]" + ); + } + + /** always normalizes the wildcard pattern to lowercase */ + public Query normalizedWildcardQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, QueryShardContext context) { + throw new QueryShardException( + context, + "Can only use wildcard queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } diff --git a/server/src/main/java/org/opensearch/index/mapper/StringFieldType.java b/server/src/main/java/org/opensearch/index/mapper/StringFieldType.java index fa9c02c3cf14e..fbfca44c3062a 100644 --- a/server/src/main/java/org/opensearch/index/mapper/StringFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/StringFieldType.java @@ -152,8 +152,34 @@ public static final String normalizeWildcardPattern(String fieldname, String val return sb.toBytesRef().utf8ToString(); } + /** optionally normalize the wildcard pattern based on the value of {@code caseInsensitive} */ @Override public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, boolean caseInsensitive, QueryShardContext context) { + return wildcardQuery(value, method, caseInsensitive, false, context); + } + + /** always normalizes the wildcard pattern to lowercase */ + @Override + public Query normalizedWildcardQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) { + return wildcardQuery(value, method, false, true, context); + } + + /** + * return a wildcard query + * + * @param value the pattern + * @param method rewrite method + * @param caseInsensitive should ignore case; note, only used if there is no analyzer, else we use the analyzer rules + * @param normalizeIfAnalyzed force normalize casing if an analyzer is used + * @param context the query shard context + */ + public Query wildcardQuery( + String value, + MultiTermQuery.RewriteMethod method, + boolean caseInsensitive, + boolean normalizeIfAnalyzed, + QueryShardContext context + ) { failIfNotIndexed(); if (context.allowExpensiveQueries() == false) { throw new OpenSearchException( @@ -162,7 +188,7 @@ public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, bo } Term term; - if (getTextSearchInfo().getSearchAnalyzer() != null) { + if (getTextSearchInfo().getSearchAnalyzer() != null && normalizeIfAnalyzed) { value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer()); term = new Term(name(), value); } else { diff --git a/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java b/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java index 6d59e861eb32f..9a121fe55a7e7 100644 --- a/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java +++ b/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java @@ -729,7 +729,8 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE if (getAllowLeadingWildcard() == false && (termStr.startsWith("*") || termStr.startsWith("?"))) { throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery"); } - return currentFieldType.wildcardQuery(termStr, getMultiTermRewriteMethod(), context); + // query string query is always normalized + return currentFieldType.normalizedWildcardQuery(termStr, getMultiTermRewriteMethod(), context); } catch (RuntimeException e) { if (lenient) { return newLenientFieldQuery(field, e); diff --git a/server/src/test/java/org/opensearch/index/query/PrefixQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/PrefixQueryBuilderTests.java index 48b309ea4eca3..8f4f70e96e2b4 100644 --- a/server/src/test/java/org/opensearch/index/query/PrefixQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/PrefixQueryBuilderTests.java @@ -130,7 +130,7 @@ public void testNumeric() throws Exception { QueryShardContext context = createShardContext(); QueryShardException e = expectThrows(QueryShardException.class, () -> query.toQuery(context)); assertEquals( - "Can only use prefix queries on keyword, text and wildcard fields - not on [mapped_int] which is of type [integer]", + "Can only use prefix queries on keyword and text fields - not on [mapped_int] which is of type [integer]", e.getMessage() ); } diff --git a/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java index 393d4cb3f2121..611223e067cff 100644 --- a/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java @@ -873,7 +873,7 @@ public void testPrefixNumeric() throws Exception { QueryShardContext context = createShardContext(); QueryShardException e = expectThrows(QueryShardException.class, () -> query.toQuery(context)); assertEquals( - "Can only use prefix queries on keyword, text and wildcard fields - not on [mapped_int] which is of type [integer]", + "Can only use prefix queries on keyword and text fields - not on [mapped_int] which is of type [integer]", e.getMessage() ); query.lenient(true);