From 5e3955f8a5f901eeba40d14f0d5e0bdba026bbdd Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Mon, 17 Jun 2024 13:02:58 -0400 Subject: [PATCH] Avoid concatenating adjacent regex loops+strings under right-to-left The pattern gets reversed but the actual strings in multis aren't (instead the evaluation just compares them in reverse), which means optimizations based on comparing nodes in a sequence and the text of such a string either need to take that into account or be disabled for right-to-left. --- .../src/System/Text/RegularExpressions/RegexNode.cs | 5 ++++- .../tests/FunctionalTests/Regex.Match.Tests.cs | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index 4d9b7a0efdabe..8326cf4ef3eac 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -1702,7 +1702,10 @@ static bool CanCombineCounts(int nodeMin, int nodeMax, int nextMin, int nextMax) break; // Coalescing a loop with a subsequent string - case RegexNodeKind.Oneloop or RegexNodeKind.Onelazy when nextNode.Kind == RegexNodeKind.Multi && currentNode.Ch == nextNode.Str![0]: + case RegexNodeKind.Oneloop or RegexNodeKind.Onelazy when + nextNode.Kind == RegexNodeKind.Multi && + (nextNode.Options & RegexOptions.RightToLeft) == 0 && // RTL multi nodes don't have their text reversed, and it's not worth the code to optimize further + currentNode.Ch == nextNode.Str![0]: { // Determine how many of the multi's characters can be combined. // We already checked for the first, so we know it's at least one. diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index 234b416a9b46d..57780531253d3 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -870,6 +870,11 @@ public static IEnumerable Match_MemberData() yield return (@"\s+\d+", " asdf12 ", RegexOptions.RightToLeft, 0, 6, false, string.Empty); yield return ("aaa", "aaabbb", RegexOptions.None, 3, 3, false, string.Empty); yield return ("abc|def", "123def456", RegexOptions.RightToLeft | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 9, true, "def"); + yield return (@"^says?$", "says", RegexOptions.RightToLeft, 0, 4, true, "says"); + yield return (@"^says?$", "say", RegexOptions.RightToLeft, 0, 3, true, "say"); + yield return (@"^say(s?)$", "says", RegexOptions.RightToLeft, 0, 4, true, "says"); + yield return (@"^(say)s?$", "says", RegexOptions.RightToLeft, 0, 4, true, "says"); + yield return (@"^(.+?) (says?),\s'(.+)'$", "User says, 'adventure'", RegexOptions.RightToLeft, 0, 22, true, "User says, 'adventure'"); // .* : RTL, Case-sensitive yield return (@".*\nfoo", "This shouldn't match", RegexOptions.None | RegexOptions.RightToLeft, 0, 20, false, "");