Skip to content

Commit

Permalink
replacing unicode control chars (#6486) (#6522)
Browse files Browse the repository at this point in the history
(cherry picked from commit 41cc4df)

Signed-off-by: Ramakrishna Chilaka <[email protected]>
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
1 parent 940a3f9 commit c694e50
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,32 @@ public class HighlightFieldTests extends OpenSearchTestCase {

public static HighlightField createTestItem() {
String name = frequently() ? randomAlphaOfLengthBetween(5, 20) : randomRealisticUnicodeOfCodepointLengthBetween(5, 20);
name = replaceUnicodeControlCharacters(name);
Text[] fragments = null;
if (frequently()) {
int size = randomIntBetween(0, 5);
fragments = new Text[size];
for (int i = 0; i < size; i++) {
fragments[i] = new Text(
frequently() ? randomAlphaOfLengthBetween(10, 30) : randomRealisticUnicodeOfCodepointLengthBetween(10, 30)
);
String fragmentText = frequently()
? randomAlphaOfLengthBetween(10, 30)
: randomRealisticUnicodeOfCodepointLengthBetween(10, 30);
fragmentText = replaceUnicodeControlCharacters(fragmentText);
fragments[i] = new Text(fragmentText);
}
}
return new HighlightField(name, fragments);
}

public void testReplaceUnicodeControlCharacters() {
assertEquals("æÆ ¢¡Èýñ«Ò", replaceUnicodeControlCharacters("æÆ\u0000¢¡Èýñ«Ò"));
assertEquals("test_string_without_control_characters", replaceUnicodeControlCharacters("test_string_without_control_characters"));
assertEquals("æÆ@¢¡Èýñ«Ò", replaceUnicodeControlCharacters("æÆ\u0000¢¡Èýñ«Ò", "@"));
assertEquals(
"test_string_without_control_characters",
replaceUnicodeControlCharacters("test_string_without_control_characters", "@")
);
}

public void testFromXContent() throws IOException {
HighlightField highlightField = createTestItem();
XContentType xcontentType = randomFrom(XContentType.values());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,15 @@ public static String randomRealisticUnicodeOfCodepointLength(int codePoints) {
return RandomizedTest.randomRealisticUnicodeOfCodepointLength(codePoints);
}

public static String replaceUnicodeControlCharacters(String uniCodeStr, String toReplaceWith) {
// replace control characters (https://stackoverflow.com/questions/3438854/replace-unicode-control-characters/)
return uniCodeStr.replaceAll("\\p{Cc}", toReplaceWith);
}

public static String replaceUnicodeControlCharacters(String uniCodeStr) {
return replaceUnicodeControlCharacters(uniCodeStr, " ");
}

/**
* @param maxArraySize The maximum number of elements in the random array
* @param stringSize The length of each String in the array
Expand Down

0 comments on commit c694e50

Please sign in to comment.