From 98d4df2224e7d6eb94ec4b574056ab1b0562eb91 Mon Sep 17 00:00:00 2001 From: Viktor Date: Fri, 8 Mar 2019 17:13:05 +0100 Subject: [PATCH 1/2] Check regex timeout in loops and repetitions Check the regex timeout in SetLoop and SetRepetition opcodes to avoid the timeout not being handled. --- .../Text/RegularExpressions/RegexCompiler.cs | 44 +++++++++++++++++++ .../RegularExpressions/RegexInterpreter.cs | 19 ++++++++ .../tests/Regex.Match.Tests.cs | 23 ++++++++++ 3 files changed, 86 insertions(+) diff --git a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index 3da864922acd..30886bff1546 100644 --- a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -73,6 +73,7 @@ internal abstract class RegexCompiler private LocalBuilder _temp2V; private LocalBuilder _temp3V; private LocalBuilder _cultureV; // current culture is cached in local variable to prevent many thread local storage accesses for CultureInfo.CurrentCulture + private LocalBuilder _loopV; // counter for setrep and setloop protected RegexCode _code; // the RegexCode object (used for debugging only) protected int[] _codes; // the RegexCodes being translated @@ -111,6 +112,7 @@ internal abstract class RegexCompiler private const int Lazybranchcountback2 = 8; // back2 part of lazybranchcount private const int Forejumpback = 9; // back part of forejump private const int Uniquecount = 10; + private const int LoopTimeoutCheckCount = 2000; // A conservative value to guarantee the correct timeout handling. static RegexCompiler() { @@ -368,6 +370,22 @@ private void Ret() _ilg.Emit(OpCodes.Ret); } + /* + * A macro for _ilg.Emit(OpCodes.Rem) + */ + private void Rem() + { + _ilg.Emit(OpCodes.Rem); + } + + /* + * A macro for _ilg.Emit(OpCodes.Ceq) + */ + private void Ceq() + { + _ilg.Emit(OpCodes.Ceq); + } + /* * A macro for _ilg.Emit(OpCodes.Pop) */ @@ -1602,6 +1620,7 @@ protected void GenerateGo() _tempV = DeclareInt(); _temp2V = DeclareInt(); _temp3V = DeclareInt(); + _loopV = DeclareInt(); _textbegV = DeclareInt(); _textendV = DeclareInt(); _textstartV = DeclareInt(); @@ -2787,6 +2806,7 @@ private void GenerateOneCode() if (Code() == RegexCode.Setrep) { + EmitTimeoutCheck(); Ldstr(_strings[Operand(0)]); Call(s_charInSetM); @@ -2896,6 +2916,7 @@ private void GenerateOneCode() if (Code() == RegexCode.Setloop) { + EmitTimeoutCheck(); Ldstr(_strings[Operand(0)]); Call(s_charInSetM); @@ -3105,5 +3126,28 @@ private void GenerateOneCode() throw new NotImplementedException(SR.UnimplementedState); } } + + private void EmitTimeoutCheck() + { + Label label = DefineLabel(); + + // Increment counter for each loop iteration. + Ldloc(_loopV); + Ldc(1); + Add(); + Stloc(_loopV); + + // Emit code to check the timeout every 2000th-iteration. + Ldloc(_loopV); + Ldc(LoopTimeoutCheckCount); + Rem(); + Ldc(0); + Ceq(); + Brfalse(label); + Ldthis(); + Callvirt(s_checkTimeoutM); + + MarkLabel(label); + } } } diff --git a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs index 917075739218..0f3233440943 100644 --- a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs +++ b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs @@ -18,6 +18,7 @@ internal sealed class RegexInterpreter : RegexRunner private int _codepos; private bool _rightToLeft; private bool _caseInsensitive; + private const int LoopTimeoutCheckCount = 2000; // A conservative value to guarantee the correct timeout handling. public RegexInterpreter(RegexCode code, CultureInfo culture) { @@ -964,8 +965,18 @@ protected override void Go() string set = _code.Strings[Operand(0)]; while (c-- > 0) + { + // Check the timeout every 2000th iteration. The aditional if check + // in every iteration can be neglected as the cost of the CharInClass + // check is many times higher. + if (c % LoopTimeoutCheckCount == 0) + { + CheckTimeout(); + } + if (!RegexCharClass.CharInClass(Forwardcharnext(), set)) goto BreakBackward; + } advance = 2; continue; @@ -1035,6 +1046,14 @@ protected override void Go() for (i = c; i > 0; i--) { + // Check the timeout every 2000th iteration. The aditional if check + // in every iteration can be neglected as the cost of the CharInClass + // check is many times higher. + if (i % LoopTimeoutCheckCount == 0) + { + CheckTimeout(); + } + if (!RegexCharClass.CharInClass(Forwardcharnext(), set)) { Backwardnext(); diff --git a/src/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index 1571757ca5db..ef913275beff 100644 --- a/src/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -380,6 +380,29 @@ public void Match_Timeout_Throws() }).Dispose(); } + [Theory] + [InlineData(RegexOptions.Compiled)] + [InlineData(RegexOptions.None)] + public void Match_Timeout_Loop_Throws(RegexOptions options) + { + var regex = new Regex(@"a\s+", options, TimeSpan.FromSeconds(1)); + string input = @"a" + new string(' ', 800_000_000) + @"b"; + + Assert.Throws(() => regex.Match(input)); + } + + [Theory] + [InlineData(RegexOptions.Compiled)] + [InlineData(RegexOptions.None)] + public void Match_Timeout_Repetition_Throws(RegexOptions options) + { + int repetitionCount = 800_000_000; + var regex = new Regex(@"a\s{" + repetitionCount+ "}", options, TimeSpan.FromSeconds(1)); + string input = @"a" + new string(' ', repetitionCount) + @"b"; + + Assert.Throws(() => regex.Match(input)); + } + public static IEnumerable Match_Advanced_TestData() { // \B special character escape: ".*\\B(SUCCESS)\\B.*" From e5a3a7f4aa3e97c6d6215943f9b71e60738799d0 Mon Sep 17 00:00:00 2001 From: Viktor Hofer Date: Fri, 31 May 2019 18:59:19 +0200 Subject: [PATCH 2/2] PR feedback and fix OOM --- .../System/Text/RegularExpressions/RegexCompiler.cs | 2 +- .../System/Text/RegularExpressions/RegexInterpreter.cs | 10 +++++----- .../tests/Regex.Match.Tests.cs | 6 ++++-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index 30886bff1546..84cd50fb9621 100644 --- a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -112,7 +112,7 @@ internal abstract class RegexCompiler private const int Lazybranchcountback2 = 8; // back2 part of lazybranchcount private const int Forejumpback = 9; // back part of forejump private const int Uniquecount = 10; - private const int LoopTimeoutCheckCount = 2000; // A conservative value to guarantee the correct timeout handling. + private const int LoopTimeoutCheckCount = 2048; // A conservative value to guarantee the correct timeout handling. static RegexCompiler() { diff --git a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs index 0f3233440943..f006522dfa59 100644 --- a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs +++ b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs @@ -18,7 +18,7 @@ internal sealed class RegexInterpreter : RegexRunner private int _codepos; private bool _rightToLeft; private bool _caseInsensitive; - private const int LoopTimeoutCheckCount = 2000; // A conservative value to guarantee the correct timeout handling. + private const int LoopTimeoutCheckCount = 2048; // A conservative value to guarantee the correct timeout handling. public RegexInterpreter(RegexCode code, CultureInfo culture) { @@ -966,10 +966,10 @@ protected override void Go() while (c-- > 0) { - // Check the timeout every 2000th iteration. The aditional if check + // Check the timeout every 2000th iteration. The additional if check // in every iteration can be neglected as the cost of the CharInClass // check is many times higher. - if (c % LoopTimeoutCheckCount == 0) + if ((uint)c % LoopTimeoutCheckCount == 0) { CheckTimeout(); } @@ -1046,10 +1046,10 @@ protected override void Go() for (i = c; i > 0; i--) { - // Check the timeout every 2000th iteration. The aditional if check + // Check the timeout every 2000th iteration. The additional if check // in every iteration can be neglected as the cost of the CharInClass // check is many times higher. - if (i % LoopTimeoutCheckCount == 0) + if ((uint)i % LoopTimeoutCheckCount == 0) { CheckTimeout(); } diff --git a/src/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index ef913275beff..9ca5c39f740a 100644 --- a/src/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -380,7 +380,8 @@ public void Match_Timeout_Throws() }).Dispose(); } - [Theory] + // On 32-bit we can't test these high inputs as they cause OutOfMemoryExceptions. + [ConditionalTheory(typeof(Environment), nameof(Environment.Is64BitProcess))] [InlineData(RegexOptions.Compiled)] [InlineData(RegexOptions.None)] public void Match_Timeout_Loop_Throws(RegexOptions options) @@ -391,7 +392,8 @@ public void Match_Timeout_Loop_Throws(RegexOptions options) Assert.Throws(() => regex.Match(input)); } - [Theory] + // On 32-bit we can't test these high inputs as they cause OutOfMemoryExceptions. + [ConditionalTheory(typeof(Environment), nameof(Environment.Is64BitProcess))] [InlineData(RegexOptions.Compiled)] [InlineData(RegexOptions.None)] public void Match_Timeout_Repetition_Throws(RegexOptions options)