Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.
/ corefx Public archive

Check regex timeout in loops and repetitions #38091

Merged
merged 2 commits into from
May 31, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ internal abstract class RegexCompiler
private LocalBuilder _temp2V;
private LocalBuilder _temp3V;
private LocalBuilder _cultureV; // current culture is cached in local variable to prevent many thread local storage accesses for CultureInfo.CurrentCulture
private LocalBuilder _loopV; // counter for setrep and setloop

protected RegexCode _code; // the RegexCode object (used for debugging only)
protected int[] _codes; // the RegexCodes being translated
Expand Down Expand Up @@ -111,6 +112,7 @@ internal abstract class RegexCompiler
private const int Lazybranchcountback2 = 8; // back2 part of lazybranchcount
private const int Forejumpback = 9; // back part of forejump
private const int Uniquecount = 10;
private const int LoopTimeoutCheckCount = 2048; // A conservative value to guarantee the correct timeout handling.

static RegexCompiler()
{
Expand Down Expand Up @@ -368,6 +370,22 @@ private void Ret()
_ilg.Emit(OpCodes.Ret);
}

/*
* A macro for _ilg.Emit(OpCodes.Rem)
*/
private void Rem()
{
_ilg.Emit(OpCodes.Rem);
}

/*
* A macro for _ilg.Emit(OpCodes.Ceq)
*/
private void Ceq()
{
_ilg.Emit(OpCodes.Ceq);
}

/*
* A macro for _ilg.Emit(OpCodes.Pop)
*/
Expand Down Expand Up @@ -1602,6 +1620,7 @@ protected void GenerateGo()
_tempV = DeclareInt();
_temp2V = DeclareInt();
_temp3V = DeclareInt();
_loopV = DeclareInt();
_textbegV = DeclareInt();
_textendV = DeclareInt();
_textstartV = DeclareInt();
Expand Down Expand Up @@ -2787,6 +2806,7 @@ private void GenerateOneCode()

if (Code() == RegexCode.Setrep)
{
EmitTimeoutCheck();
Ldstr(_strings[Operand(0)]);
Call(s_charInSetM);

Expand Down Expand Up @@ -2896,6 +2916,7 @@ private void GenerateOneCode()

if (Code() == RegexCode.Setloop)
{
EmitTimeoutCheck();
Ldstr(_strings[Operand(0)]);
Call(s_charInSetM);

Expand Down Expand Up @@ -3105,5 +3126,28 @@ private void GenerateOneCode()
throw new NotImplementedException(SR.UnimplementedState);
}
}

private void EmitTimeoutCheck()
{
Label label = DefineLabel();

// Increment counter for each loop iteration.
Ldloc(_loopV);
Ldc(1);
Add();
Stloc(_loopV);

// Emit code to check the timeout every 2000th-iteration.
Ldloc(_loopV);
Ldc(LoopTimeoutCheckCount);
Rem();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment on the non-emit version.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment wasn't address. Fine for this PR, but consider addressing it subsequently.

Ldc(0);
Ceq();
Brfalse(label);
Ldthis();
Callvirt(s_checkTimeoutM);

MarkLabel(label);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ internal sealed class RegexInterpreter : RegexRunner
private int _codepos;
private bool _rightToLeft;
private bool _caseInsensitive;
private const int LoopTimeoutCheckCount = 2048; // A conservative value to guarantee the correct timeout handling.

public RegexInterpreter(RegexCode code, CultureInfo culture)
{
Expand Down Expand Up @@ -964,8 +965,18 @@ protected override void Go()
string set = _code.Strings[Operand(0)];

while (c-- > 0)
{
// Check the timeout every 2000th iteration. The additional if check
// in every iteration can be neglected as the cost of the CharInClass
// check is many times higher.
if ((uint)c % LoopTimeoutCheckCount == 0)
{
CheckTimeout();
}

if (!RegexCharClass.CharInClass(Forwardcharnext(), set))
goto BreakBackward;
}

advance = 2;
continue;
Expand Down Expand Up @@ -1035,6 +1046,14 @@ protected override void Go()

for (i = c; i > 0; i--)
{
// Check the timeout every 2000th iteration. The additional if check
// in every iteration can be neglected as the cost of the CharInClass
// check is many times higher.
if ((uint)i % LoopTimeoutCheckCount == 0)
{
CheckTimeout();
}

if (!RegexCharClass.CharInClass(Forwardcharnext(), set))
{
Backwardnext();
Expand Down
25 changes: 25 additions & 0 deletions src/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,31 @@ public void Match_Timeout_Throws()
}).Dispose();
}

// On 32-bit we can't test these high inputs as they cause OutOfMemoryExceptions.
[ConditionalTheory(typeof(Environment), nameof(Environment.Is64BitProcess))]
[InlineData(RegexOptions.Compiled)]
[InlineData(RegexOptions.None)]
public void Match_Timeout_Loop_Throws(RegexOptions options)
{
var regex = new Regex(@"a\s+", options, TimeSpan.FromSeconds(1));
string input = @"a" + new string(' ', 800_000_000) + @"b";

Assert.Throws<RegexMatchTimeoutException>(() => regex.Match(input));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we validate that the exception is thrown within some window of time?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would have to have significant leeway (eg., within 2x or 3x) to account for vagaries of CI machines.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I see it's a second. Maybe within 30 sec?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Assuming 800M will take much longer.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

30 seconds, 1 minute, whatever we think is reasonable. My goal would just be that the test doesn't pass after the regex runs for an hour and then upon completion sees there was a timeout requested and throws :)

}

// On 32-bit we can't test these high inputs as they cause OutOfMemoryExceptions.
[ConditionalTheory(typeof(Environment), nameof(Environment.Is64BitProcess))]
[InlineData(RegexOptions.Compiled)]
[InlineData(RegexOptions.None)]
public void Match_Timeout_Repetition_Throws(RegexOptions options)
{
int repetitionCount = 800_000_000;
var regex = new Regex(@"a\s{" + repetitionCount+ "}", options, TimeSpan.FromSeconds(1));
string input = @"a" + new string(' ', repetitionCount) + @"b";

Assert.Throws<RegexMatchTimeoutException>(() => regex.Match(input));
}

public static IEnumerable<object[]> Match_Advanced_TestData()
{
// \B special character escape: ".*\\B(SUCCESS)\\B.*"
Expand Down