Skip to content

Commit

Permalink
Fix Dissect with leading non-ascii characters (#111184)
Browse files Browse the repository at this point in the history
  • Loading branch information
luigidellaquila authored Jul 23, 2024
1 parent c1dcc6e commit 8f3244d
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 1 deletion.
5 changes: 5 additions & 0 deletions docs/changelog/111184.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 111184
summary: Fix Dissect with leading non-ascii characters
area: Ingest Node
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ public Map<String, String> parse(String inputString) {
DissectKey key = dissectPair.key();
byte[] delimiter = dissectPair.delimiter().getBytes(StandardCharsets.UTF_8);
// start dissection after the first delimiter
int i = leadingDelimiter.length();
int i = leadingDelimiter.getBytes(StandardCharsets.UTF_8).length;
int valueStart = i;
int lookAheadMatches;
// start walking the input string byte by byte, look ahead for matches where needed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,18 @@ public void testMatchUnicode() {
assertMatch("%{a->}࿏%{b}", "⟳༒࿏࿏࿏࿏࿏༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲"));
assertMatch("%{*a}࿏%{&a}", "⟳༒࿏༒⟲", Arrays.asList("⟳༒"), Arrays.asList("༒⟲"));
assertMatch("%{}࿏%{a}", "⟳༒࿏༒⟲", Arrays.asList("a"), Arrays.asList("༒⟲"));
assertMatch(
"Zürich, the %{adjective} city in Switzerland",
"Zürich, the largest city in Switzerland",
Arrays.asList("adjective"),
Arrays.asList("largest")
);
assertMatch(
"Zürich, the %{one} city in Switzerland; Zürich, the %{two} city in Switzerland",
"Zürich, the largest city in Switzerland; Zürich, the LARGEST city in Switzerland",
Arrays.asList("one", "two"),
Arrays.asList("largest", "LARGEST")
);
}

public void testMatchRemainder() {
Expand Down

0 comments on commit 8f3244d

Please sign in to comment.