From 29372cb91d796e4d9f357b75ef45298f95095628 Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Fri, 11 Aug 2023 12:59:10 +0800 Subject: [PATCH 1/4] Fix dissect ingest processor parsing empty brackets failed Signed-off-by: Gao Binlong --- CHANGELOG.md | 3 +- .../org/opensearch/dissect/DissectParser.java | 6 +++- .../ingest/common/DissectProcessorTests.java | 24 +++++++++++++ .../test/ingest/200_dissect_processor.yml | 35 +++++++++++++++++++ 4 files changed, 66 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b6bdbafb0fe59..c2af1b79dcf61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -128,8 +128,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed - Fix flaky ResourceAwareTasksTests.testBasicTaskResourceTracking test ([#8993](https://github.com/opensearch-project/OpenSearch/pull/8993)) +- Fix dissect ingest processor parsing empty brackets failed ### Security [Unreleased 3.0]: https://github.com/opensearch-project/OpenSearch/compare/2.x...HEAD -[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.10...2.x \ No newline at end of file +[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.10...2.x diff --git a/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java index 9861847c9e1ea..58c5d9ab51b6e 100644 --- a/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java +++ b/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java @@ -232,7 +232,7 @@ public Map parse(String inputString) { int lookAheadMatches; // start walking the input string byte by byte, look ahead for matches where needed // if a match is found jump forward to the end of the match - for (; i < input.length; i++) { + while (i < input.length) { lookAheadMatches = 0; // potential match between delimiter and input string if (delimiter.length > 0 && input[i] == delimiter[0]) { @@ -284,7 +284,11 @@ public Map parse(String inputString) { delimiter = dissectPair.getDelimiter().getBytes(StandardCharsets.UTF_8); // i is always one byte after the last found delimiter, aka the start of the next value valueStart = i; + } else { + i++; } + } else { + i++; } } // the last key, grab the rest of the input (unless consecutive delimiters already grabbed the last key) diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java index ca0c0df40f009..e42a1147825d1 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java @@ -155,4 +155,28 @@ public void testNullValueWithOutIgnoreMissing() { IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); expectThrows(IllegalArgumentException.class, () -> processor.execute(ingestDocument)); } + + public void testMatchEmptyBrackets() { + IngestDocument ingestDocument = new IngestDocument( + "_index", + "_id", + null, + null, + null, + Collections.singletonMap("message", "[foo],[bar],[]") + ); + DissectProcessor dissectProcessor = new DissectProcessor("", null, "message", "[%{a}],[%{b}],[%{c}]", "", true); + dissectProcessor.execute(ingestDocument); + assertEquals("foo", ingestDocument.getFieldValue("a", String.class)); + assertEquals("bar", ingestDocument.getFieldValue("b", String.class)); + assertEquals("", ingestDocument.getFieldValue("c", String.class)); + + ingestDocument = new IngestDocument("_index", "_id", null, null, null, Collections.singletonMap("message", "{}{}{}{baz}")); + dissectProcessor = new DissectProcessor("", null, "message", "{%{a}}{%{b}}{%{c}}{%{d}}", "", true); + dissectProcessor.execute(ingestDocument); + assertEquals("", ingestDocument.getFieldValue("a", String.class)); + assertEquals("", ingestDocument.getFieldValue("b", String.class)); + assertEquals("", ingestDocument.getFieldValue("c", String.class)); + assertEquals("baz", ingestDocument.getFieldValue("d", String.class)); + } } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml index 916a7fe656cc2..d90e5fbf2362b 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml @@ -84,3 +84,38 @@ teardown: } ] } + +--- +"Test dissect processor can match empty brackets": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "dissect" : { + "field" : "message", + "pattern" : "[%{a}][%{b}][%{c}]" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: {message: "[foo][bar][]"} + + - do: + get: + index: test + id: 1 + - match: { _source.message: "[foo][bar][]" } + - match: { _source.a: "foo" } + - match: { _source.b: "bar" } + - match: { _source.c: "" } From 24f2ae95d8f01e68844d72bbc3d91ba3d1b4deba Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Fri, 11 Aug 2023 14:23:20 +0800 Subject: [PATCH 2/4] Modify change log Signed-off-by: Gao Binlong --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2af1b79dcf61..7689bd60c16bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -128,7 +128,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed - Fix flaky ResourceAwareTasksTests.testBasicTaskResourceTracking test ([#8993](https://github.com/opensearch-project/OpenSearch/pull/8993)) -- Fix dissect ingest processor parsing empty brackets failed +- Fix dissect ingest processor parsing empty brackets failed ([#9225](https://github.com/opensearch-project/OpenSearch/pull/9255))) ### Security From c558848d4e754d2f8618e51784d5e465ce7d9beb Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Fri, 11 Aug 2023 14:26:12 +0800 Subject: [PATCH 3/4] Modify change log Signed-off-by: Gao Binlong --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7689bd60c16bc..3a09c159e0f66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -128,7 +128,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed - Fix flaky ResourceAwareTasksTests.testBasicTaskResourceTracking test ([#8993](https://github.com/opensearch-project/OpenSearch/pull/8993)) -- Fix dissect ingest processor parsing empty brackets failed ([#9225](https://github.com/opensearch-project/OpenSearch/pull/9255))) +- Fix dissect ingest processor parsing empty brackets failed ([#9225](https://github.com/opensearch-project/OpenSearch/pull/9255)) ### Security From 8a79835df02e20af5e3889d274fa5486d2b8a349 Mon Sep 17 00:00:00 2001 From: Gao Binlong Date: Sun, 8 Oct 2023 18:52:07 +0800 Subject: [PATCH 4/4] Add assertion Signed-off-by: Gao Binlong --- CHANGELOG.md | 2 +- .../src/main/java/org/opensearch/dissect/DissectParser.java | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5f618fb2d66f..19d6f1244d159 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -148,4 +148,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Security [Unreleased 3.0]: https://github.com/opensearch-project/OpenSearch/compare/2.x...HEAD -[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.11...2.x \ No newline at end of file +[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.11...2.x diff --git a/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java index 58c5d9ab51b6e..7b61ee144586a 100644 --- a/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java +++ b/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java @@ -233,6 +233,9 @@ public Map parse(String inputString) { // start walking the input string byte by byte, look ahead for matches where needed // if a match is found jump forward to the end of the match while (i < input.length) { + // start is only used to record the value of i + int start = i; + lookAheadMatches = 0; // potential match between delimiter and input string if (delimiter.length > 0 && input[i] == delimiter[0]) { @@ -290,6 +293,8 @@ public Map parse(String inputString) { } else { i++; } + // i should change anyway + assert (i != start); } // the last key, grab the rest of the input (unless consecutive delimiters already grabbed the last key) // and there is no trailing delimiter