diff --git a/docs/changelog/109173.yaml b/docs/changelog/109173.yaml new file mode 100644 index 0000000000000..9f4f73a6f74c8 --- /dev/null +++ b/docs/changelog/109173.yaml @@ -0,0 +1,5 @@ +pr: 109173 +summary: Wrap "Pattern too complex" exception into an `IllegalArgumentException` +area: Mapping +type: bug +issues: [] diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/50_char_filters.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/50_char_filters.yml index 67e68428c07c7..76f17dddd3f0e 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/50_char_filters.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/50_char_filters.yml @@ -27,6 +27,33 @@ - length: { tokens: 1 } - match: { tokens.0.token: "replacedSample 6 sample1" } +--- +"pattern_replace error handling (too complex pattern)": + - do: + catch: bad_request + indices.create: + index: test_too_complex_regex_pattern + body: + settings: + index: + analysis: + analyzer: + my_analyzer: + tokenizer: standard + char_filter: + - my_char_filter + char_filter: + my_char_filter: + type: "pattern_replace" + # This pattern intentionally uses special characters designed to throw an error. + # It's expected that the pattern may not render correctly. + pattern: "(\\d+)-(?=\\d\nͭͭͭͭͭͭͭͭͭͭͭͭͭͭͭ" + flags: CASE_INSENSITIVE|MULTILINE|DOTALL|UNICODE_CASE|CANON_EQ + replacement: "_$1" + - match: { status: 400 } + - match: { error.type: illegal_argument_exception } + - match: { error.reason: "Too complex regex pattern" } + --- "mapping": - do: diff --git a/server/src/main/java/org/elasticsearch/common/regex/Regex.java b/server/src/main/java/org/elasticsearch/common/regex/Regex.java index 039f484f1ebca..983144c7cee89 100644 --- a/server/src/main/java/org/elasticsearch/common/regex/Regex.java +++ b/server/src/main/java/org/elasticsearch/common/regex/Regex.java @@ -230,8 +230,26 @@ public static boolean simpleMatch(final List patterns, final String str) } public static Pattern compile(String regex, String flags) { - int pFlags = flags == null ? 0 : flagsFromString(flags); - return Pattern.compile(regex, pFlags); + try { + int pFlags = flags == null ? 0 : flagsFromString(flags); + return Pattern.compile(regex, pFlags); + } catch (OutOfMemoryError e) { + if (e.getMessage().equals("Pattern too complex")) { + // Normally, we do try to handle OutOfMemoryError errors, as they typically indicate the JVM is not healthy. + // + // In the context of Pattern::compile, an OutOfMemoryError can occur if the pattern is too complex. + // In this case, the OutOfMemoryError is thrown by a pre-check rather than actual memory exhaustion. + // + // Because the JVM has not encountered a real memory issue, we can treat this as a recoverable exception by wrapping + // the original OutOfMemoryError in an IllegalArgumentException. + // + // For additional details, see: + // - https://bugs.openjdk.org/browse/JDK-8300207 + // - https://github.com/openjdk/jdk/commit/030b071db1fb6197a2633a04b20aa95432a903bc + throw new IllegalArgumentException("Too complex regex pattern", e); + } + throw e; + } } public static int flagsFromString(String flags) {