diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Signed.D.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Signed.D.cs index ee2e046901fe1..07723d04810f7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Signed.D.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Signed.D.cs @@ -336,107 +336,121 @@ private static bool TryParseInt32D(ReadOnlySpan source, out int value, out private static bool TryParseInt64D(ReadOnlySpan source, out long value, out int bytesConsumed) { - if (source.Length < 1) - { - bytesConsumed = 0; - value = default; - return false; - } + long sign = 0; // 0 if the value is positive, -1 if the value is negative + int idx = 0; - int indexOfFirstDigit = 0; - int sign = 1; - if (source[0] == '-') + // We use 'nuint' for the firstChar and nextChar data types in this method because + // it gives us a free early zero-extension to 64 bits when running on a 64-bit platform. + + nuint firstChar; + while (true) { - indexOfFirstDigit = 1; - sign = -1; + if ((uint)idx >= (uint)source.Length) { goto FalseExit; } + firstChar = (uint)source[idx] - '0'; + if ((uint)firstChar <= 9) { break; } - if (source.Length <= indexOfFirstDigit) + // We saw something that wasn't a digit. If it's a '+' or a '-', + // we'll set the 'sign' value appropriately and resume the "read + // first char" loop from the next index. If this loops more than + // once (idx != 0), it means we saw a sign character followed by + // a non-digit character, which should be considered an error. + + if (idx != 0) { - bytesConsumed = 0; - value = default; - return false; + goto FalseExit; } - } - else if (source[0] == '+') - { - indexOfFirstDigit = 1; - if (source.Length <= indexOfFirstDigit) + idx++; + + if ((uint)firstChar == unchecked((uint)('-' - '0'))) + { + sign--; // set to -1 + } + else if ((uint)firstChar != unchecked((uint)('+' - '0'))) { - bytesConsumed = 0; - value = default; - return false; + goto FalseExit; // not a digit, not '-', and not '+'; fail } } - int overflowLength = ParserHelpers.Int64OverflowLength + indexOfFirstDigit; + ulong parsedValue = firstChar; + int overflowLength = ParserHelpers.Int64OverflowLength + idx; // +idx to account for any sign char we read + idx++; - // Parse the first digit separately. If invalid here, we need to return false. - long firstDigit = source[indexOfFirstDigit] - 48; // '0' - if (firstDigit < 0 || firstDigit > 9) - { - bytesConsumed = 0; - value = default; - return false; - } - ulong parsedValue = (ulong)firstDigit; + // At this point, we successfully read a single digit character. + // The only failure condition from here on out is integer overflow. if (source.Length < overflowLength) { - // Length is less than Parsers.Int64OverflowLength; overflow is not possible - for (int index = indexOfFirstDigit + 1; index < source.Length; index++) + // If the input span is short enough such that integer overflow isn't an issue, + // don't bother performing overflow checks. Just keep shifting in new digits + // until we see a non-digit character or until we've exhausted our input buffer. + + while (true) { - long nextDigit = source[index] - 48; // '0' - if (nextDigit < 0 || nextDigit > 9) - { - bytesConsumed = index; - value = ((long)parsedValue) * sign; - return true; - } - parsedValue = parsedValue * 10 + (ulong)nextDigit; + if ((uint)idx >= (uint)source.Length) { break; } // EOF + nuint nextChar = (uint)source[idx] - '0'; + if ((uint)nextChar > 9) { break; } // not a digit + parsedValue = parsedValue * 10 + nextChar; + idx++; } } else { - // Length is greater than Parsers.Int64OverflowLength; overflow is only possible after Parsers.Int64OverflowLength - // digits. There may be no overflow after Parsers.Int64OverflowLength if there are leading zeroes. - for (int index = indexOfFirstDigit + 1; index < overflowLength - 1; index++) - { - long nextDigit = source[index] - 48; // '0' - if (nextDigit < 0 || nextDigit > 9) - { - bytesConsumed = index; - value = ((long)parsedValue) * sign; - return true; - } - parsedValue = parsedValue * 10 + (ulong)nextDigit; - } - for (int index = overflowLength - 1; index < source.Length; index++) + while (true) { - long nextDigit = source[index] - 48; // '0' - if (nextDigit < 0 || nextDigit > 9) + if ((uint)idx >= (uint)source.Length) { break; } // EOF + nuint nextChar = (uint)source[idx] - '0'; + if ((uint)nextChar > 9) { break; } // not a digit + idx++; + + // The const below is the smallest unsigned x for which "x * 10 + 9" + // might overflow long.MaxValue. If the current accumulator is below + // this const, there's no risk of overflowing. + + const ulong OverflowRisk = 0x0CCC_CCCC_CCCC_CCCCul; + + if (parsedValue < OverflowRisk) { - bytesConsumed = index; - value = ((long)parsedValue) * sign; - return true; + parsedValue = parsedValue * 10 + nextChar; + continue; } - // If parsedValue > (long.MaxValue / 10), any more appended digits will cause overflow. - // if parsedValue == (long.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. - bool positive = sign > 0; - bool nextDigitTooLarge = nextDigit > 8 || (positive && nextDigit > 7); - if (parsedValue > long.MaxValue / 10 || parsedValue == long.MaxValue / 10 && nextDigitTooLarge) + + // If the current accumulator is exactly equal to the const above, + // then "accumulator * 10 + 7" is the highest we can go without overflowing + // long.MaxValue. (If we know the value is negative, we can instead allow + // +8, since the range of negative numbers is one higher than the range of + // positive numbers.) This also implies that if the current accumulator + // is higher than the const above, there's no hope that we'll succeed, + // so we may as well just fail now. + // + // The (nextChar + sign) trick below works because sign is 0 or -1, + // so if sign is -1 then this actually checks that nextChar > 8. + // n.b. signed arithmetic below because nextChar may be 0. + + if (parsedValue != OverflowRisk || (int)nextChar + (int)sign > 7) { - bytesConsumed = 0; - value = default; - return false; + goto FalseExit; } - parsedValue = parsedValue * 10 + (ulong)nextDigit; + + parsedValue = OverflowRisk * 10 + nextChar; } } - bytesConsumed = source.Length; - value = ((long)parsedValue) * sign; + // 'sign' is 0 for non-negative and -1 for negative. This allows us to perform + // cheap arithmetic + bitwise operations to mimic a multiplication by 1 or -1 + // without incurring the cost of an actual multiplication operation. + // + // If sign = 0, this becomes value = (parsedValue ^ 0) - 0 = parsedValue + // If sign = -1, this becomes value = (parsedValue ^ -1) - (-1) = ~parsedValue + 1 = -parsedValue + + bytesConsumed = idx; + value = ((long)parsedValue ^ sign) - sign; return true; + + FalseExit: + bytesConsumed = 0; + value = default; + return false; } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.D.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.D.cs index 0b9cca720b425..dd4572afaf7fa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.D.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Utf8Parser/Utf8Parser.Integer.Unsigned.D.cs @@ -277,77 +277,83 @@ private static bool TryParseUInt32D(ReadOnlySpan source, out uint value, o private static bool TryParseUInt64D(ReadOnlySpan source, out ulong value, out int bytesConsumed) { - if (source.Length < 1) + if (source.IsEmpty) { - bytesConsumed = 0; - value = default; - return false; + goto FalseExit; } + // We use 'nuint' for the firstDigit and nextChar data types in this method because + // it gives us a free early zero-extension to 64 bits when running on a 64-bit platform. + // // Parse the first digit separately. If invalid here, we need to return false. - ulong firstDigit = source[0] - 48u; // '0' - if (firstDigit > 9) - { - bytesConsumed = 0; - value = default; - return false; - } + + nuint firstDigit = (uint)source[0] - '0'; + if ((uint)firstDigit > 9) { goto FalseExit; } ulong parsedValue = firstDigit; - if (source.Length < ParserHelpers.Int64OverflowLength) + // At this point, we successfully read a single digit character. + // The only failure condition from here on out is integer overflow. + + int idx = 1; + if (source.Length < ParserHelpers.UInt64OverflowLength) { - // Length is less than Parsers.Int64OverflowLength; overflow is not possible - for (int index = 1; index < source.Length; index++) + // If the input span is short enough such that integer overflow isn't an issue, + // don't bother performing overflow checks. Just keep shifting in new digits + // until we see a non-digit character or until we've exhausted our input buffer. + + while (true) { - ulong nextDigit = source[index] - 48u; // '0' - if (nextDigit > 9) - { - bytesConsumed = index; - value = parsedValue; - return true; - } - parsedValue = parsedValue * 10 + nextDigit; + if ((uint)idx >= (uint)source.Length) { break; } // EOF + nuint nextChar = (uint)source[idx] - '0'; + if ((uint)nextChar > 9) { break; } // not a digit + parsedValue = parsedValue * 10 + nextChar; + idx++; } } else { - // Length is greater than Parsers.Int64OverflowLength; overflow is only possible after Parsers.Int64OverflowLength - // digits. There may be no overflow after Parsers.Int64OverflowLength if there are leading zeroes. - for (int index = 1; index < ParserHelpers.Int64OverflowLength - 1; index++) + while (true) { - ulong nextDigit = source[index] - 48u; // '0' - if (nextDigit > 9) - { - bytesConsumed = index; - value = parsedValue; - return true; - } - parsedValue = parsedValue * 10 + nextDigit; - } - for (int index = ParserHelpers.Int64OverflowLength - 1; index < source.Length; index++) - { - ulong nextDigit = source[index] - 48u; // '0' - if (nextDigit > 9) + if ((uint)idx >= (uint)source.Length) { break; } // EOF + nuint nextChar = (uint)source[idx] - '0'; + if ((uint)nextChar > 9) { break; } // not a digit + idx++; + + // The const below is the smallest unsigned x for which "x * 10 + 9" + // might overflow ulong.MaxValue. If the current accumulator is below + // this const, there's no risk of overflowing. + + const ulong OverflowRisk = 0x1999_9999_9999_9999ul; + + if (parsedValue < OverflowRisk) { - bytesConsumed = index; - value = parsedValue; - return true; + parsedValue = parsedValue * 10 + nextChar; + continue; } - // If parsedValue > (ulong.MaxValue / 10), any more appended digits will cause overflow. - // if parsedValue == (ulong.MaxValue / 10), any nextDigit greater than 5 implies overflow. - if (parsedValue > ulong.MaxValue / 10 || (parsedValue == ulong.MaxValue / 10 && nextDigit > 5)) + + // If the current accumulator is exactly equal to the const above, + // then "accumulator * 10 + 5" is the highest we can go without overflowing + // ulong.MaxValue. This also implies that if the current accumulator + // is higher than the const above, there's no hope that we'll succeed, + // so we may as well just fail now. + + if (parsedValue != OverflowRisk || (uint)nextChar > 5) { - bytesConsumed = 0; - value = default; - return false; + goto FalseExit; } - parsedValue = parsedValue * 10 + nextDigit; + + parsedValue = OverflowRisk * 10 + nextChar; } } - bytesConsumed = source.Length; + bytesConsumed = idx; value = parsedValue; return true; + + FalseExit: + bytesConsumed = 0; + value = default; + return false; } } }