From c694e50ecde36daefd0f3dc429f97e7d74fc2a02 Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Wed, 1 Mar 2023 14:11:41 -0800 Subject: [PATCH] replacing unicode control chars (#6486) (#6522) (cherry picked from commit 41cc4dfc030fc9166084ad97482d47bfc407efbf) Signed-off-by: Ramakrishna Chilaka Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] --- .../highlight/HighlightFieldTests.java | 19 ++++++++++++++++--- .../opensearch/test/OpenSearchTestCase.java | 9 +++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/HighlightFieldTests.java b/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/HighlightFieldTests.java index 6fb53c2ed1da0..d3d7c1aa8d411 100644 --- a/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/HighlightFieldTests.java +++ b/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/HighlightFieldTests.java @@ -53,19 +53,32 @@ public class HighlightFieldTests extends OpenSearchTestCase { public static HighlightField createTestItem() { String name = frequently() ? randomAlphaOfLengthBetween(5, 20) : randomRealisticUnicodeOfCodepointLengthBetween(5, 20); + name = replaceUnicodeControlCharacters(name); Text[] fragments = null; if (frequently()) { int size = randomIntBetween(0, 5); fragments = new Text[size]; for (int i = 0; i < size; i++) { - fragments[i] = new Text( - frequently() ? randomAlphaOfLengthBetween(10, 30) : randomRealisticUnicodeOfCodepointLengthBetween(10, 30) - ); + String fragmentText = frequently() + ? randomAlphaOfLengthBetween(10, 30) + : randomRealisticUnicodeOfCodepointLengthBetween(10, 30); + fragmentText = replaceUnicodeControlCharacters(fragmentText); + fragments[i] = new Text(fragmentText); } } return new HighlightField(name, fragments); } + public void testReplaceUnicodeControlCharacters() { + assertEquals("æÆ ¢¡Èýñ«Ò", replaceUnicodeControlCharacters("æÆ\u0000¢¡Èýñ«Ò")); + assertEquals("test_string_without_control_characters", replaceUnicodeControlCharacters("test_string_without_control_characters")); + assertEquals("æÆ@¢¡Èýñ«Ò", replaceUnicodeControlCharacters("æÆ\u0000¢¡Èýñ«Ò", "@")); + assertEquals( + "test_string_without_control_characters", + replaceUnicodeControlCharacters("test_string_without_control_characters", "@") + ); + } + public void testFromXContent() throws IOException { HighlightField highlightField = createTestItem(); XContentType xcontentType = randomFrom(XContentType.values()); diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java index d4090fdfdfdad..904f30a2edf95 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java @@ -883,6 +883,15 @@ public static String randomRealisticUnicodeOfCodepointLength(int codePoints) { return RandomizedTest.randomRealisticUnicodeOfCodepointLength(codePoints); } + public static String replaceUnicodeControlCharacters(String uniCodeStr, String toReplaceWith) { + // replace control characters (https://stackoverflow.com/questions/3438854/replace-unicode-control-characters/) + return uniCodeStr.replaceAll("\\p{Cc}", toReplaceWith); + } + + public static String replaceUnicodeControlCharacters(String uniCodeStr) { + return replaceUnicodeControlCharacters(uniCodeStr, " "); + } + /** * @param maxArraySize The maximum number of elements in the random array * @param stringSize The length of each String in the array