Skip to content

Commit

Permalink
Add support for wildcard field type (opensearch-project#13461) (opens…
Browse files Browse the repository at this point in the history
…earch-project#14167)

This adds support for the "wildcard" field type that supports efficient
execution of wildcard, prefix, and regexp queries by matching first against
trigrams (or bigrams or individual characters), then post-filtering by
evaluating the original field value against the pattern.

---------

Signed-off-by: Michael Froh <[email protected]>
(cherry picked from commit b71e547)
Signed-off-by: kkewwei <[email protected]>
  • Loading branch information
msfroh authored and kkewwei committed Jul 24, 2024
1 parent 66e9270 commit 84e2833
Show file tree
Hide file tree
Showing 7 changed files with 1,601 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Remote Store] Upload translog checkpoint as object metadata to translog.tlog([#13637](https://github.com/opensearch-project/OpenSearch/pull/13637))
- [Remote Store] Add dynamic cluster settings to set timeout for segments upload to Remote Store ([#13679](https://github.com/opensearch-project/OpenSearch/pull/13679))
- Add getMetadataFields to MapperService ([#13819](https://github.com/opensearch-project/OpenSearch/pull/13819))
- Add "wildcard" field type that supports efficient wildcard, prefix, and regexp queries ([#13461](https://github.com/opensearch-project/OpenSearch/pull/13461))
- Allow setting query parameters on requests ([#13776](https://github.com/opensearch-project/OpenSearch/issues/13776))
- Add dynamic action retry timeout setting ([#14022](https://github.com/opensearch-project/OpenSearch/issues/14022))
- Add capability to disable source recovery_source for an index ([#13590](https://github.com/opensearch-project/OpenSearch/pull/13590))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
setup:
- skip:
version: " - 2.99.99"
reason: "Added in 2.15, but need to skip pre-3.0 before backport"

- do:
indices.create:
index: test
body:
mappings:
properties:
my_field:
type: wildcard
fields:
lower:
type: wildcard
normalizer: lowercase
doc_values:
type: wildcard
doc_values: true

- do:
index:
index: test
id: 1
body:
my_field: "org.opensearch.transport.NodeDisconnectedException: [node_s0][127.0.0.1:39953][disconnected] disconnected"
- do:
index:
index: test
id: 2
body:
my_field: "[2024-06-08T06:31:37,443][INFO ][o.o.c.c.Coordinator ] [node_s2] cluster-manager node [{node_s0}{Nj7FjR7hRP2lh_zur8KN_g}{OTGOoWmmSsWP_RQ3tIKJ9g}{127.0.0.1}{127.0.0.1:39953}{imr}{shard_indexing_pressure_enabled=true}] failed, restarting discovery"

- do:
index:
index: test
id: 3
body:
my_field: "[2024-06-08T06:31:37,451][INFO ][o.o.c.s.ClusterApplierService] [node_s2] cluster-manager node changed {previous [{node_s0}{Nj7FjR7hRP2lh_zur8KN_g}{OTGOoWmmSsWP_RQ3tIKJ9g}{127.0.0.1}{127.0.0.1:39953}{imr}{shard_indexing_pressure_enabled=true}], current []}, term: 1, version: 24, reason: becoming candidate: onLeaderFailure"
- do:
index:
index: test
id: 4
body:
my_field: "[2024-06-08T06:31:37,452][WARN ][o.o.c.NodeConnectionsService] [node_s1] failed to connect to {node_s0}{Nj7FjR7hRP2lh_zur8KN_g}{OTGOoWmmSsWP_RQ3tIKJ9g}{127.0.0.1}{127.0.0.1:39953}{imr}{shard_indexing_pressure_enabled=true} (tried [1] times)"
- do:
index:
index: test
id: 5
body:
my_field: "AbCd"
- do:
index:
index: test
id: 6
body:
other_field: "test"
- do:
indices.refresh: {}

---
"term query matches exact value":
- do:
search:
index: test
body:
query:
term:
my_field: "AbCd"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "5" }

- do:
search:
index: test
body:
query:
term:
my_field.doc_values: "AbCd"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "5" }

---
"term query matches lowercase-normalized value":
- do:
search:
index: test
body:
query:
term:
my_field.lower: "abcd"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "5" }

- do:
search:
index: test
body:
query:
term:
my_field.lower: "ABCD"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "5" }

- do:
search:
index: test
body:
query:
term:
my_field: "abcd"
- match: { hits.total.value: 0 }

---
"wildcard query matches":
- do:
search:
index: test
body:
query:
wildcard:
my_field:
value: "*Node*Exception*"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "1" }

---
"wildcard query matches lowercase-normalized field":
- do:
search:
index: test
body:
query:
wildcard:
my_field.lower:
value: "*node*exception*"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "1" }

- do:
search:
index: test
body:
query:
wildcard:
my_field.lower:
value: "*NODE*EXCEPTION*"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "1" }

- do:
search:
index: test
body:
query:
wildcard:
my_field:
value: "*node*exception*"
- match: { hits.total.value: 0 }

---
"prefix query matches":
- do:
search:
index: test
body:
query:
prefix:
my_field:
value: "[2024-06-08T"
- match: { hits.total.value: 3 }

---
"regexp query matches":
- do:
search:
index: test
body:
query:
regexp:
my_field:
value: ".*06-08.*cluster-manager node.*"
- match: { hits.total.value: 2 }

---
"regexp query matches lowercase-normalized field":
- do:
search:
index: test
body:
query:
regexp:
my_field.lower:
value: ".*06-08.*Cluster-Manager Node.*"
- match: { hits.total.value: 2 }

- do:
search:
index: test
body:
query:
regexp:
my_field:
value: ".*06-08.*Cluster-Manager Node.*"
- match: { hits.total.value: 0 }

---
"wildcard match-all works":
- do:
search:
index: test
body:
query:
wildcard:
my_field:
value: "*"
- match: { hits.total.value: 5 }
---
"regexp match-all works":
- do:
search:
index: test
body:
query:
regexp:
my_field:
value: ".*"
- match: { hits.total.value: 5 }
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ protected void parseCreateField(ParseContext context) throws IOException {
}
}

private static String normalizeValue(NamedAnalyzer normalizer, String field, String value) throws IOException {
static String normalizeValue(NamedAnalyzer normalizer, String field, String value) throws IOException {
try (TokenStream ts = normalizer.tokenStream(field, value)) {
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
Expand Down
Loading

0 comments on commit 84e2833

Please sign in to comment.