Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify parsing part of BigInteger with CoreLib #85978

Merged
merged 20 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
378 changes: 378 additions & 0 deletions src/libraries/Common/src/System/Number.Parsing.Common.cs

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,6 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Number.Dragon4.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Number.Formatting.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Number.Grisu3.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Number.NumberBuffer.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Number.NumberToFloatingPointBits.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Number.Parsing.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Numerics\BitOperations.cs" />
Expand Down Expand Up @@ -1334,6 +1333,12 @@
<Compile Include="$(CommonPath)System\NotImplemented.cs">
<Link>Common\System\NotImplemented.cs</Link>
</Compile>
<Compile Include="$(CommonPath)System\Number.NumberBuffer.cs">
<Link>System\Number.NumberBuffer.cs</Link>
</Compile>
<Compile Include="$(CommonPath)System\Number.Parsing.Common.cs">
<Link>System\Number.Parsing.Common.cs</Link>
adamsitnik marked this conversation as resolved.
Show resolved Hide resolved
</Compile>
<Compile Include="$(CommonPath)System\Numerics\Crc32ReflectedTable.cs">
<Link>Common\System\Numerics\Crc32ReflectedTable.cs</Link>
</Compile>
Expand Down
336 changes: 0 additions & 336 deletions src/libraries/System.Private.CoreLib/src/System/Number.Parsing.cs
Original file line number Diff line number Diff line change
Expand Up @@ -138,265 +138,6 @@ internal static TInteger ParseBinaryInteger<TInteger>(ReadOnlySpan<char> value,
return result;
}

private static unsafe bool TryParseNumber(scoped ref char* str, char* strEnd, NumberStyles styles, ref NumberBuffer number, NumberFormatInfo info)
{
Debug.Assert(str != null);
Debug.Assert(strEnd != null);
Debug.Assert(str <= strEnd);
Debug.Assert((styles & (NumberStyles.AllowHexSpecifier | NumberStyles.AllowBinarySpecifier)) == 0);

const int StateSign = 0x0001;
const int StateParens = 0x0002;
const int StateDigits = 0x0004;
const int StateNonZero = 0x0008;
const int StateDecimal = 0x0010;
const int StateCurrency = 0x0020;

Debug.Assert(number.DigitsCount == 0);
Debug.Assert(number.Scale == 0);
Debug.Assert(!number.IsNegative);
Debug.Assert(!number.HasNonZeroTail);

number.CheckConsistency();

string decSep; // decimal separator from NumberFormatInfo.
string groupSep; // group separator from NumberFormatInfo.
string? currSymbol = null; // currency symbol from NumberFormatInfo.

bool parsingCurrency = false;
if ((styles & NumberStyles.AllowCurrencySymbol) != 0)
{
currSymbol = info.CurrencySymbol;

// The idea here is to match the currency separators and on failure match the number separators to keep the perf of VB's IsNumeric fast.
// The values of decSep are setup to use the correct relevant separator (currency in the if part and decimal in the else part).
decSep = info.CurrencyDecimalSeparator;
groupSep = info.CurrencyGroupSeparator;
parsingCurrency = true;
}
else
{
decSep = info.NumberDecimalSeparator;
groupSep = info.NumberGroupSeparator;
}

int state = 0;
char* p = str;
char ch = p < strEnd ? *p : '\0';
char* next;

while (true)
{
// Eat whitespace unless we've found a sign which isn't followed by a currency symbol.
// "-Kr 1231.47" is legal but "- 1231.47" is not.
if (!IsWhite(ch) || (styles & NumberStyles.AllowLeadingWhite) == 0 || ((state & StateSign) != 0 && ((state & StateCurrency) == 0 && info.NumberNegativePattern != 2)))
{
if ((((styles & NumberStyles.AllowLeadingSign) != 0) && (state & StateSign) == 0) && ((next = MatchChars(p, strEnd, info.PositiveSign)) != null || ((next = MatchNegativeSignChars(p, strEnd, info)) != null && (number.IsNegative = true))))
{
state |= StateSign;
p = next - 1;
}
else if (ch == '(' && ((styles & NumberStyles.AllowParentheses) != 0) && ((state & StateSign) == 0))
{
state |= StateSign | StateParens;
number.IsNegative = true;
}
else if (currSymbol != null && (next = MatchChars(p, strEnd, currSymbol)) != null)
{
state |= StateCurrency;
currSymbol = null;
// We already found the currency symbol. There should not be more currency symbols. Set
// currSymbol to NULL so that we won't search it again in the later code path.
p = next - 1;
}
else
{
break;
}
}
ch = ++p < strEnd ? *p : '\0';
}

int digCount = 0;
int digEnd = 0;
int maxDigCount = number.Digits.Length - 1;
int numberOfTrailingZeros = 0;

while (true)
{
if (IsDigit(ch))
{
state |= StateDigits;

if (ch != '0' || (state & StateNonZero) != 0)
{
if (digCount < maxDigCount)
{
number.Digits[digCount] = (byte)(ch);
if ((ch != '0') || (number.Kind != NumberBufferKind.Integer))
{
digEnd = digCount + 1;
}
}
else if (ch != '0')
{
// For decimal and binary floating-point numbers, we only
// need to store digits up to maxDigCount. However, we still
// need to keep track of whether any additional digits past
// maxDigCount were non-zero, as that can impact rounding
// for an input that falls evenly between two representable
// results.

number.HasNonZeroTail = true;
}

if ((state & StateDecimal) == 0)
{
number.Scale++;
}

if (digCount < maxDigCount)
{
// Handle a case like "53.0". We need to ignore trailing zeros in the fractional part for floating point numbers, so we keep a count of the number of trailing zeros and update digCount later
if (ch == '0')
{
numberOfTrailingZeros++;
}
else
{
numberOfTrailingZeros = 0;
}
}
digCount++;
state |= StateNonZero;
}
else if ((state & StateDecimal) != 0)
{
number.Scale--;
}
}
else if (((styles & NumberStyles.AllowDecimalPoint) != 0) && ((state & StateDecimal) == 0) && ((next = MatchChars(p, strEnd, decSep)) != null || (parsingCurrency && (state & StateCurrency) == 0) && (next = MatchChars(p, strEnd, info.NumberDecimalSeparator)) != null))
{
state |= StateDecimal;
p = next - 1;
}
else if (((styles & NumberStyles.AllowThousands) != 0) && ((state & StateDigits) != 0) && ((state & StateDecimal) == 0) && ((next = MatchChars(p, strEnd, groupSep)) != null || (parsingCurrency && (state & StateCurrency) == 0) && (next = MatchChars(p, strEnd, info.NumberGroupSeparator)) != null))
{
p = next - 1;
}
else
{
break;
}
ch = ++p < strEnd ? *p : '\0';
}

bool negExp = false;
number.DigitsCount = digEnd;
number.Digits[digEnd] = (byte)('\0');
if ((state & StateDigits) != 0)
{
if ((ch == 'E' || ch == 'e') && ((styles & NumberStyles.AllowExponent) != 0))
{
char* temp = p;
ch = ++p < strEnd ? *p : '\0';
if ((next = MatchChars(p, strEnd, info._positiveSign)) != null)
{
ch = (p = next) < strEnd ? *p : '\0';
}
else if ((next = MatchNegativeSignChars(p, strEnd, info)) != null)
{
ch = (p = next) < strEnd ? *p : '\0';
negExp = true;
}
if (IsDigit(ch))
{
int exp = 0;
do
{
exp = exp * 10 + (ch - '0');
ch = ++p < strEnd ? *p : '\0';
if (exp > 1000)
{
exp = 9999;
while (IsDigit(ch))
{
ch = ++p < strEnd ? *p : '\0';
}
}
} while (IsDigit(ch));
if (negExp)
{
exp = -exp;
}
number.Scale += exp;
}
else
{
p = temp;
ch = p < strEnd ? *p : '\0';
}
}

if (number.Kind == NumberBufferKind.FloatingPoint && !number.HasNonZeroTail)
{
// Adjust the number buffer for trailing zeros
int numberOfFractionalDigits = digEnd - number.Scale;
if (numberOfFractionalDigits > 0)
{
numberOfTrailingZeros = Math.Min(numberOfTrailingZeros, numberOfFractionalDigits);
Debug.Assert(numberOfTrailingZeros >= 0);
number.DigitsCount = digEnd - numberOfTrailingZeros;
number.Digits[number.DigitsCount] = (byte)('\0');
}
}

while (true)
{
if (!IsWhite(ch) || (styles & NumberStyles.AllowTrailingWhite) == 0)
{
if ((styles & NumberStyles.AllowTrailingSign) != 0 && ((state & StateSign) == 0) && ((next = MatchChars(p, strEnd, info.PositiveSign)) != null || (((next = MatchNegativeSignChars(p, strEnd, info)) != null) && (number.IsNegative = true))))
{
state |= StateSign;
p = next - 1;
}
else if (ch == ')' && ((state & StateParens) != 0))
{
state &= ~StateParens;
}
else if (currSymbol != null && (next = MatchChars(p, strEnd, currSymbol)) != null)
{
currSymbol = null;
p = next - 1;
}
else
{
break;
}
}
ch = ++p < strEnd ? *p : '\0';
}
if ((state & StateParens) == 0)
{
if ((state & StateNonZero) == 0)
{
if (number.Kind != NumberBufferKind.Decimal)
{
number.Scale = 0;
}
if ((number.Kind == NumberBufferKind.Integer) && (state & StateDecimal) == 0)
{
number.IsNegative = false;
}
}
str = p;
return true;
}
}
str = p;
return false;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static ParsingStatus TryParseBinaryInteger<TInteger>(ReadOnlySpan<char> value, NumberStyles styles, NumberFormatInfo info, out TInteger result)
where TInteger : unmanaged, IBinaryIntegerParseAndFormatInfo<TInteger>
Expand Down Expand Up @@ -1198,83 +939,6 @@ internal static unsafe bool TryParseSingle(ReadOnlySpan<char> value, NumberStyle
return true;
}

internal static unsafe bool TryStringToNumber(ReadOnlySpan<char> value, NumberStyles styles, ref NumberBuffer number, NumberFormatInfo info)
{
Debug.Assert(info != null);
fixed (char* stringPointer = &MemoryMarshal.GetReference(value))
{
char* p = stringPointer;
if (!TryParseNumber(ref p, p + value.Length, styles, ref number, info)
|| ((int)(p - stringPointer) < value.Length && !TrailingZeros(value, (int)(p - stringPointer))))
{
number.CheckConsistency();
return false;
}
}

number.CheckConsistency();
return true;
}

[MethodImpl(MethodImplOptions.NoInlining)] // rare slow path that shouldn't impact perf of the main use case
private static bool TrailingZeros(ReadOnlySpan<char> value, int index) =>
// For compatibility, we need to allow trailing zeros at the end of a number string
value.Slice(index).IndexOfAnyExcept('\0') < 0;

private static bool IsSpaceReplacingChar(char c) => c == '\u00a0' || c == '\u202f';

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe char* MatchNegativeSignChars(char* p, char* pEnd, NumberFormatInfo info)
{
char* ret = MatchChars(p, pEnd, info.NegativeSign);
if (ret == null && info.AllowHyphenDuringParsing && p < pEnd && *p == '-')
{
ret = p + 1;
}

return ret;
}

private static unsafe char* MatchChars(char* p, char* pEnd, string value)
{
Debug.Assert(p != null && pEnd != null && p <= pEnd && value != null);
fixed (char* stringPointer = value)
{
char* str = stringPointer;
if (*str != '\0')
{
// We only hurt the failure case
// This fix is for French or Kazakh cultures. Since a user cannot type 0xA0 or 0x202F as a
// space character we use 0x20 space character instead to mean the same.
while (true)
{
char cp = p < pEnd ? *p : '\0';
if (cp != *str && !(IsSpaceReplacingChar(*str) && cp == '\u0020'))
{
break;
}
p++;
str++;
if (*str == '\0')
return p;
}
}
}

return null;
}

private static bool IsWhite(int ch) => ch == 0x20 || (uint)(ch - 0x09) <= (0x0D - 0x09);

private static bool IsDigit(int ch) => ((uint)ch - '0') <= 9;

internal enum ParsingStatus
{
OK,
Failed,
Overflow
}

[DoesNotReturn]
internal static void ThrowOverflowOrFormatException(ParsingStatus status, ReadOnlySpan<char> value, TypeCode type = 0) => throw GetException(status, value, type);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,23 @@
<Compile Include="System\Numerics\BigIntegerCalculator.SquMul.cs" />
<Compile Include="System\Numerics\BigIntegerCalculator.Utils.cs" />
<Compile Include="System\Numerics\BigInteger.cs" />
<Compile Include="System\Numerics\BigNumber.cs" />
<Compile Include="System\Number.BigInteger.cs" />
<Compile Include="System\Numerics\NumericsHelpers.cs" />
<Compile Include="System\Numerics\Complex.cs" />
<Compile Include="System\Globalization\FormatProvider.BigInteger.cs" />
<Compile Include="System\Globalization\FormatProvider.Number.cs" />
<Compile Include="System\Globalization\FormatProvider.NumberBuffer.cs" />
<Compile Include="Properties\InternalsVisibleTo.cs" />
</ItemGroup>
<ItemGroup>
<Compile Include="$(CommonPath)System\Globalization\FormatProvider.Number.cs"
Link="System\Globalization\FormatProvider.Number.cs" />
<Compile Include="$(CommonPath)System\Text\ValueStringBuilder.cs"
Link="CoreLib\System\Text\ValueStringBuilder.cs" />
<Compile Include="$(CommonPath)System\HexConverter.cs"
Link="Common\System\HexConverter.cs" />
<Compile Include="$(CommonPath)System\Number.NumberBuffer.cs"
Link="Common\System\Number.NumberBuffer.cs" />
<Compile Include="$(CommonPath)System\Number.Parsing.Common.cs"
Link="Common\System\Number.Parsing.Common.cs" />
</ItemGroup>
<ItemGroup>
<Reference Include="System.Memory" />
Expand Down
Loading