Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Regex.IsMatch(Span) and RegexRunner.Scan() methods #65473

Merged
merged 18 commits into from
Feb 26, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ public static void CompileToAssembly(System.Text.RegularExpressions.RegexCompila
public string GroupNameFromNumber(int i) { throw null; }
public int GroupNumberFromName(string name) { throw null; }
protected void InitializeReferences() { }
public bool IsMatch(System.ReadOnlySpan<char> input) { throw null; }
public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex)] string pattern) { throw null; }
public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options) { throw null; }
public static bool IsMatch(System.ReadOnlySpan<char> input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex, "options")] string pattern, System.Text.RegularExpressions.RegexOptions options, System.TimeSpan matchTimeout) { throw null; }
public bool IsMatch(string input) { throw null; }
public bool IsMatch(string input, int startat) { throw null; }
public static bool IsMatch(string input, [System.Diagnostics.CodeAnalysis.StringSyntaxAttribute(System.Diagnostics.CodeAnalysis.StringSyntaxAttribute.Regex)] string pattern) { throw null; }
Expand Down Expand Up @@ -330,9 +334,9 @@ protected void DoubleCrawl() { }
protected void DoubleStack() { }
protected void DoubleTrack() { }
protected void EnsureStorage() { }
protected abstract bool FindFirstChar();
protected abstract void Go();
protected abstract void InitTrackCount();
protected virtual bool FindFirstChar() { throw null; }
protected virtual void Go() { throw null; }
protected virtual void InitTrackCount() { throw null; }
protected bool IsBoundary(int index, int startpos, int endpos) { throw null; }
protected bool IsECMABoundary(int index, int startpos, int endpos) { throw null; }
protected bool IsMatched(int cap) { throw null; }
Expand All @@ -341,6 +345,7 @@ protected void EnsureStorage() { }
protected int Popcrawl() { throw null; }
protected internal System.Text.RegularExpressions.Match? Scan(System.Text.RegularExpressions.Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick) { throw null; }
protected internal System.Text.RegularExpressions.Match? Scan(System.Text.RegularExpressions.Regex regex, string text, int textbeg, int textend, int textstart, int prevlen, bool quick, System.TimeSpan timeout) { throw null; }
protected internal virtual void Scan(System.ReadOnlySpan<char> text) { throw null; }
protected void TransferCapture(int capnum, int uncapnum, int start, int end) { }
protected void Uncapture() { }
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,64 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema

Version 2.0

The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.

Example:

... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>

There are any number of "resheader" rows that contain simple
name/value pairs.

Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.

The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:

Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.

mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.

mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.

mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
Expand Down Expand Up @@ -254,4 +313,7 @@
<data name="ExpressionDescription_BalancingGroup" xml:space="preserve">
<value>balancing group (?&lt;name1-name2&gt;subexpression) or (?'name1-name2' subexpression)</value>
</data>
</root>
<data name="UsingSpanAPIsWithCompiledToAssembly" xml:space="preserve">
<value>Attempted to call Span overloads with a pre compiled Regex engine is not supported. Please use the string overloads instead.</value>
joperezr marked this conversation as resolved.
Show resolved Hide resolved
</data>
</root>
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace System.Text.RegularExpressions
/// </summary>
public class Capture
{
internal Capture(string text, int index, int length)
internal Capture(string? text, int index, int length)
{
Text = text;
Index = index;
Expand All @@ -19,27 +19,38 @@ internal Capture(string text, int index, int length)
/// <summary>Returns the position in the original string where the first character of captured substring was found.</summary>
public int Index { get; private protected set; }

/// <summary>
/// This method should only be called when the text for matching was sliced with a different beginning, so the resulting index of
/// the match is not from the start of the text, but instead the start of the slice. This method will add back that extra indices
/// to account for the original text beginning.
/// </summary>
/// <param name="beginning">The original text's beginning offset.</param>
internal void AddBeginningToIndex(int beginning)
{
Index += beginning;
}

/// <summary>Returns the length of the captured substring.</summary>
public int Length { get; private protected set; }

/// <summary>The original string</summary>
internal string Text { get; set; }
internal string? Text { get; set; }

/// <summary>Gets the captured substring from the input string.</summary>
/// <value>The substring that is captured by the match.</value>
public string Value => Text.Substring(Index, Length);
public string Value => Text is string text ? text.Substring(Index, Length) : string.Empty;

/// <summary>Gets the captured span from the input string.</summary>
/// <value>The span that is captured by the match.</value>
public ReadOnlySpan<char> ValueSpan => Text.AsSpan(Index, Length);
public ReadOnlySpan<char> ValueSpan => Text is string text ? text.AsSpan(Index, Length) : ReadOnlySpan<char>.Empty;
joperezr marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>Returns the substring that was matched.</summary>
public override string ToString() => Value;

/// <summary>The substring to the left of the capture</summary>
internal ReadOnlyMemory<char> GetLeftSubstring() => Text.AsMemory(0, Index);
internal ReadOnlyMemory<char> GetLeftSubstring() => Text is string text ? text.AsMemory(0, Index) : ReadOnlyMemory<char>.Empty;

/// <summary>The substring to the right of the capture</summary>
internal ReadOnlyMemory<char> GetRightSubstring() => Text.AsMemory(Index + Length, Text.Length - Index - Length);
internal ReadOnlyMemory<char> GetRightSubstring() => Text is string text ? text.AsMemory(Index + Length, Text.Length - Index - Length) : ReadOnlyMemory<char>.Empty;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,16 @@ namespace System.Text.RegularExpressions
{
internal sealed class CompiledRegexRunner : RegexRunner
{
private readonly Action<RegexRunner> _goMethod;
private readonly Func<RegexRunner, bool> _findFirstCharMethod;
private readonly ScanDelegate _scanMethod;

public CompiledRegexRunner(Action<RegexRunner> go, Func<RegexRunner, bool> findFirstChar, int trackCount)
internal delegate void ScanDelegate(RegexRunner runner, ReadOnlySpan<char> text);

public CompiledRegexRunner(ScanDelegate scan)
{
_goMethod = go;
_findFirstCharMethod = findFirstChar;
runtrackcount = trackCount;
_scanMethod = scan;
}

protected override void Go() => _goMethod(this);

protected override bool FindFirstChar() => _findFirstCharMethod(this);

protected override void InitTrackCount() { }
protected internal override void Scan(ReadOnlySpan<char> text)
=> _scanMethod(this, text);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,18 @@ namespace System.Text.RegularExpressions
{
internal sealed class CompiledRegexRunnerFactory : RegexRunnerFactory
{
private readonly DynamicMethod _goMethod;
private readonly DynamicMethod _findFirstCharMethod;
private readonly int _trackcount;
private readonly DynamicMethod _scanMethod;

// Delegates are lazily created to avoid forcing JIT'ing until the regex is actually executed.
joperezr marked this conversation as resolved.
Show resolved Hide resolved
private Action<RegexRunner>? _go;
private Func<RegexRunner, bool>? _findFirstChar;
private CompiledRegexRunner.ScanDelegate? _scan;

public CompiledRegexRunnerFactory(DynamicMethod goMethod, DynamicMethod findFirstCharMethod, int trackcount)
public CompiledRegexRunnerFactory(DynamicMethod scanMethod)
{
_goMethod = goMethod;
_findFirstCharMethod = findFirstCharMethod;
_trackcount = trackcount;
_scanMethod = scanMethod;
}

protected internal override RegexRunner CreateInstance() =>
new CompiledRegexRunner(
_go ??= _goMethod.CreateDelegate<Action<RegexRunner>>(),
_findFirstChar ??= _findFirstCharMethod.CreateDelegate<Func<RegexRunner, bool>>(),
_trackcount);
_scan ??= _scanMethod.CreateDelegate<CompiledRegexRunner.ScanDelegate>());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public class Group : Capture
internal int _capcount;
internal CaptureCollection? _capcoll;

internal Group(string text, int[] caps, int capcount, string name)
internal Group(string? text, int[] caps, int capcount, string name)
: base(text, capcount == 0 ? 0 : caps[(capcount - 1) * 2], capcount == 0 ? 0 : caps[(capcount * 2) - 1])
{
_caps = caps;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public class Match : Group
internal bool _balancing; // whether we've done any balancing with this match. If we
// have done balancing, we'll need to do extra work in Tidy().

internal Match(Regex? regex, int capcount, string text, int begpos, int len, int startpos) :
internal Match(Regex? regex, int capcount, string? text, int begpos, int len, int startpos) :
base(text, new int[2], 0, "0")
{
_regex = regex;
Expand All @@ -66,7 +66,7 @@ internal Match(Regex? regex, int capcount, string text, int begpos, int len, int
/// <summary>Returns an empty Match object.</summary>
public static Match Empty { get; } = new Match(null, 1, string.Empty, 0, 0, 0);

internal void Reset(Regex regex, string text, int textbeg, int textend, int textstart)
internal void Reset(Regex regex, string? text, int textbeg, int textend, int textstart)
{
_regex = regex;
Text = text;
Expand All @@ -84,6 +84,11 @@ internal void Reset(Regex regex, string text, int textbeg, int textend, int text
_groupcoll?.Reset();
}

internal bool FoundAMatch
joperezr marked this conversation as resolved.
Show resolved Hide resolved
{
get => _matchcount[0] > 0;
}

public virtual GroupCollection Groups => _groupcoll ??= new GroupCollection(this, null);

/// <summary>
Expand All @@ -94,6 +99,7 @@ internal void Reset(Regex regex, string text, int textbeg, int textend, int text
public Match NextMatch()
{
Regex? r = _regex;
Debug.Assert(Text != null);
return r != null ?
r.Run(false, Length, Text, _textbeg, _textend - _textbeg, _textpos)! :
this;
Expand Down Expand Up @@ -338,7 +344,7 @@ internal sealed class MatchSparse : Match
{
private new readonly Hashtable _caps;

internal MatchSparse(Regex regex, Hashtable caps, int capcount, string text, int begpos, int len, int startpos) :
internal MatchSparse(Regex regex, Hashtable caps, int capcount, string? text, int begpos, int len, int startpos) :
base(regex, capcount, text, begpos, len, startpos)
{
_caps = caps;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@ public partial class Regex
public static bool IsMatch(string input, [StringSyntax(StringSyntaxAttribute.Regex)] string pattern) =>
RegexCache.GetOrAdd(pattern).IsMatch(input);

/// <summary>
/// Searches the input span for one or more occurrences of the text supplied in the given pattern.
joperezr marked this conversation as resolved.
Show resolved Hide resolved
/// </summary>
/// <param name="input">The input span to be searched on.</param>
joperezr marked this conversation as resolved.
Show resolved Hide resolved
/// <param name="pattern">The Regex pattern to be used for matching.</param>
joperezr marked this conversation as resolved.
Show resolved Hide resolved
/// <returns><see langword="true"/> if the input matches the pattern, <see langword="false"/> otherwise.</returns>
joperezr marked this conversation as resolved.
Show resolved Hide resolved
public static bool IsMatch(ReadOnlySpan<char> input, [StringSyntax(StringSyntaxAttribute.Regex)] string pattern) =>
RegexCache.GetOrAdd(pattern).IsMatch(input);
joperezr marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>
/// Searches the input string for one or more occurrences of the text
/// supplied in the pattern parameter with matching options supplied in the options
Expand All @@ -21,9 +30,30 @@ public static bool IsMatch(string input, [StringSyntax(StringSyntaxAttribute.Reg
public static bool IsMatch(string input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options) =>
RegexCache.GetOrAdd(pattern, options, s_defaultMatchTimeout).IsMatch(input);

/// <summary>
/// Searches the input span for one or more occurrences of the text supplied in the given pattern. It uses the passed in options.
/// </summary>
/// <param name="input">The input span to be searched on.</param>
/// <param name="pattern">The Regex pattern to be used for matching.</param>
/// <param name="options">The options to be used for matching</param>
/// <returns><see langword="true"/> if the input matches the pattern, <see langword="false"/> otherwise.</returns>
joperezr marked this conversation as resolved.
Show resolved Hide resolved
public static bool IsMatch(ReadOnlySpan<char> input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options) =>
RegexCache.GetOrAdd(pattern, options, s_defaultMatchTimeout).IsMatch(input);

public static bool IsMatch(string input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options, TimeSpan matchTimeout) =>
RegexCache.GetOrAdd(pattern, options, matchTimeout).IsMatch(input);

/// <summary>
/// Searches the input span for one or more occurrences of the text supplied in the given pattern under the specified timeout. It uses the passed in options.
/// </summary>
/// <param name="input">The input span to be searched on.</param>
/// <param name="pattern">The Regex pattern to be used for matching.</param>
/// <param name="options">The options to be used for matching</param>
/// <param name="matchTimeout">Max time to be used for matching before returning.</param>
/// <returns><see langword="true"/> if the input matches the pattern, <see langword="false"/> otherwise. Also returns <see langword="false"/> for time out.</returns>
public static bool IsMatch(ReadOnlySpan<char> input, [StringSyntax(StringSyntaxAttribute.Regex, "options")] string pattern, RegexOptions options, TimeSpan matchTimeout) =>
RegexCache.GetOrAdd(pattern, options, matchTimeout).IsMatch(input);

/// <summary>
/// Searches the input string for one or more matches using the previous pattern,
/// options, and starting position.
Expand All @@ -38,6 +68,14 @@ public bool IsMatch(string input)
return Run(quick: true, -1, input, 0, input.Length, UseOptionR() ? input.Length : 0) is null;
}

/// <summary>
/// Searches the input span for one or more matches using the previous pattern,
/// options, and starting position.
/// </summary>
/// <returns><see langword="true"/> if the input matches the pattern, <see langword="false"/> otherwise.</returns>
public bool IsMatch(ReadOnlySpan<char> input) =>
Run(input, UseOptionR() ? input.Length : 0) is null;

/// <summary>
/// Searches the input string for one or more matches using the previous pattern and options,
/// with a new starting position.
Expand Down
Loading