diff --git a/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/HighlightFieldTests.java b/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/HighlightFieldTests.java index 6fb53c2ed1da0..d3d7c1aa8d411 100644 --- a/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/HighlightFieldTests.java +++ b/server/src/test/java/org/opensearch/search/fetch/subphase/highlight/HighlightFieldTests.java @@ -53,19 +53,32 @@ public class HighlightFieldTests extends OpenSearchTestCase { public static HighlightField createTestItem() { String name = frequently() ? randomAlphaOfLengthBetween(5, 20) : randomRealisticUnicodeOfCodepointLengthBetween(5, 20); + name = replaceUnicodeControlCharacters(name); Text[] fragments = null; if (frequently()) { int size = randomIntBetween(0, 5); fragments = new Text[size]; for (int i = 0; i < size; i++) { - fragments[i] = new Text( - frequently() ? randomAlphaOfLengthBetween(10, 30) : randomRealisticUnicodeOfCodepointLengthBetween(10, 30) - ); + String fragmentText = frequently() + ? randomAlphaOfLengthBetween(10, 30) + : randomRealisticUnicodeOfCodepointLengthBetween(10, 30); + fragmentText = replaceUnicodeControlCharacters(fragmentText); + fragments[i] = new Text(fragmentText); } } return new HighlightField(name, fragments); } + public void testReplaceUnicodeControlCharacters() { + assertEquals("æÆ ¢¡Èýñ«Ò", replaceUnicodeControlCharacters("æÆ\u0000¢¡Èýñ«Ò")); + assertEquals("test_string_without_control_characters", replaceUnicodeControlCharacters("test_string_without_control_characters")); + assertEquals("æÆ@¢¡Èýñ«Ò", replaceUnicodeControlCharacters("æÆ\u0000¢¡Èýñ«Ò", "@")); + assertEquals( + "test_string_without_control_characters", + replaceUnicodeControlCharacters("test_string_without_control_characters", "@") + ); + } + public void testFromXContent() throws IOException { HighlightField highlightField = createTestItem(); XContentType xcontentType = randomFrom(XContentType.values()); diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java index d4090fdfdfdad..904f30a2edf95 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java @@ -883,6 +883,15 @@ public static String randomRealisticUnicodeOfCodepointLength(int codePoints) { return RandomizedTest.randomRealisticUnicodeOfCodepointLength(codePoints); } + public static String replaceUnicodeControlCharacters(String uniCodeStr, String toReplaceWith) { + // replace control characters (https://stackoverflow.com/questions/3438854/replace-unicode-control-characters/) + return uniCodeStr.replaceAll("\\p{Cc}", toReplaceWith); + } + + public static String replaceUnicodeControlCharacters(String uniCodeStr) { + return replaceUnicodeControlCharacters(uniCodeStr, " "); + } + /** * @param maxArraySize The maximum number of elements in the random array * @param stringSize The length of each String in the array