Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Format/Parse binary from/to BigInteger #85392

Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
3c127ea
Initial draft commit: add FormatBigIntegerToBin().
lateapexearlyspeed Apr 26, 2023
64a0ead
Fix comment: use '?:' to assign ValueStringBuilder variable to make i…
lateapexearlyspeed Apr 28, 2023
4216498
Refine FormatBigIntegerToBin(); and consider chars overflow scenario.
lateapexearlyspeed Apr 29, 2023
1bcf7f8
Update Format code for final binary format definition.
lateapexearlyspeed May 15, 2023
63923da
Refine FormatBigIntegerToBin().
lateapexearlyspeed May 23, 2023
19c701d
consider case where output is span
lateapexearlyspeed May 25, 2023
56e701f
Turn to use try..finally to return array pool.
lateapexearlyspeed May 27, 2023
817c58c
Initial add method BinNumberToBigInteger().
lateapexearlyspeed May 27, 2023
24d88c7
Update FormatProvider.Number.cs to support AllowBinarySpecifier.
lateapexearlyspeed May 30, 2023
5a90e15
Use BinNumberToBigInteger().
lateapexearlyspeed May 30, 2023
3987f71
Add tests of Format.
lateapexearlyspeed May 30, 2023
8b58eb7
Add tests of Parse().
lateapexearlyspeed Jun 1, 2023
6cea91a
Improve Format(): use ValueStringBuilder just as wrapper for destinat…
lateapexearlyspeed Jun 1, 2023
3007b48
Fix comment: use ch == '0' || ch == '1'
lateapexearlyspeed Jun 2, 2023
cd0a03d
Fix comment: refactor ParseNumber() to extract common abstract operat…
lateapexearlyspeed Aug 25, 2023
f22d2e9
Fix comment: refine naming; make BinNumberToBigInteger() general patt…
lateapexearlyspeed Sep 1, 2023
17434aa
Fix comment: use internal 'kcbitUint'.
lateapexearlyspeed Sep 15, 2023
bdfddf5
Fix comment: rename 'Bin' method names to 'Binary' ones; remove unnec…
lateapexearlyspeed Oct 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,54 @@ private static unsafe bool AllowHyphenDuringParsing(NumberFormatInfo info)
return ret;
}

private interface IDigitParser
{
static abstract bool IsValidChar(char c);
static abstract bool IsHexOrBinaryParser();
}

private readonly struct IntegerDigitParser : IDigitParser
{
public static bool IsValidChar(char c) => char.IsAsciiDigit(c);

public static bool IsHexOrBinaryParser() => false;
}

private readonly struct HexDigitParser : IDigitParser
{
public static bool IsValidChar(char c) => HexConverter.IsHexChar((int)c);

public static bool IsHexOrBinaryParser() => true;
}

private readonly struct BinaryDigitParser : IDigitParser
{
public static bool IsValidChar(char c)
{
return (uint)c - '0' <= 1;
}

public static bool IsHexOrBinaryParser() => true;
}


private static unsafe bool ParseNumber(ref char* str, char* strEnd, NumberStyles options, scoped ref NumberBuffer number, StringBuilder? sb, NumberFormatInfo numfmt, bool parseDecimal)
{
if ((options & NumberStyles.AllowHexSpecifier) != 0)
{
return ParseNumberStyle<HexDigitParser>(ref str, strEnd, options, ref number, sb, numfmt, parseDecimal);
}

if ((options & NumberStyles.AllowBinarySpecifier) != 0)
{
return ParseNumberStyle<BinaryDigitParser>(ref str, strEnd, options, ref number, sb, numfmt, parseDecimal);
}

return ParseNumberStyle<IntegerDigitParser>(ref str, strEnd, options, ref number, sb, numfmt, parseDecimal);
}

private static unsafe bool ParseNumberStyle<TDigitParser>(ref char* str, char* strEnd, NumberStyles options, scoped ref NumberBuffer number, StringBuilder? sb, NumberFormatInfo numfmt, bool parseDecimal)
where TDigitParser : struct, IDigitParser
{
Debug.Assert(str != null);
Debug.Assert(strEnd != null);
Expand Down Expand Up @@ -440,11 +487,11 @@ private static unsafe bool ParseNumber(ref char* str, char* strEnd, NumberStyles
int digEnd = 0;
while (true)
{
if (char.IsAsciiDigit(ch) || (((options & NumberStyles.AllowHexSpecifier) != 0) && char.IsBetween((char)(ch | 0x20), 'a', 'f')))
if (TDigitParser.IsValidChar(ch))
{
state |= StateDigits;

if (ch != '0' || (state & StateNonZero) != 0 || (bigNumber && ((options & NumberStyles.AllowHexSpecifier) != 0)))
if (ch != '0' || (state & StateNonZero) != 0 || (bigNumber && TDigitParser.IsHexOrBinaryParser()))
{
if (digCount < maxParseDigits)
{
Expand Down
222 changes: 218 additions & 4 deletions src/libraries/System.Runtime.Numerics/src/System/Numerics/BigNumber.cs
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,8 @@ internal static class BigNumber
| NumberStyles.AllowLeadingSign | NumberStyles.AllowTrailingSign
| NumberStyles.AllowParentheses | NumberStyles.AllowDecimalPoint
| NumberStyles.AllowThousands | NumberStyles.AllowExponent
| NumberStyles.AllowCurrencySymbol | NumberStyles.AllowHexSpecifier);
| NumberStyles.AllowCurrencySymbol | NumberStyles.AllowHexSpecifier
| NumberStyles.AllowBinarySpecifier);

private static ReadOnlySpan<uint> UInt32PowersOfTen => new uint[] { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000 };

Expand Down Expand Up @@ -371,10 +372,13 @@ internal static ParsingStatus TryParseBigInteger(ReadOnlySpan<char> value, Numbe
{
return HexNumberToBigInteger(ref bigNumber, out result);
}
else

if ((style & NumberStyles.AllowBinarySpecifier) != 0)
{
return NumberToBigInteger(ref bigNumber, out result);
return BinNumberToBigInteger(ref bigNumber, out result);
}

return NumberToBigInteger(ref bigNumber, out result);
}

internal static BigInteger ParseBigInteger(string value, NumberStyles style, NumberFormatInfo info)
Expand Down Expand Up @@ -511,6 +515,114 @@ private static ParsingStatus HexNumberToBigInteger(ref BigNumberBuffer number, o
}
}

private static ParsingStatus BinNumberToBigInteger(ref BigNumberBuffer number, out BigInteger result)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: can we expand "Bin" to "Binary" in the name? I realize this is likely an attempt to match the conciseness of "Hex", but "Bin" and "Big" are so close that this keeps making me do a double-take to know which it was.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed method names, also for other 'Bin' related ones.

{
if (number.digits is null || number.digits.Length == 0)
{
result = default;
return ParsingStatus.Failed;
}

const int DigitsPerBlock = 32;
tannergooding marked this conversation as resolved.
Show resolved Hide resolved

int totalDigitCount = number.digits.Length - 1; // Ignore trailing '\0'
int partialDigitCount;

(int blockCount, int remainder) = int.DivRem(totalDigitCount, DigitsPerBlock);
if (remainder == 0)
{
partialDigitCount = 0;
}
else
{
blockCount++;
partialDigitCount = DigitsPerBlock - remainder;
}

Debug.Assert(number.digits[0] is '0' or '1');
bool isNegative = number.digits[0] == '1';
uint currentBlock = isNegative ? 0xFF_FF_FF_FFu : 0x0;

uint[]? arrayFromPool = null;
Span<uint> buffer = ((uint)blockCount <= BigIntegerCalculator.StackAllocThreshold
? stackalloc uint[BigIntegerCalculator.StackAllocThreshold]
: arrayFromPool = ArrayPool<uint>.Shared.Rent(blockCount)).Slice(0, blockCount);

int bufferPos = blockCount - 1;

try
{
foreach (ReadOnlyMemory<char> digitsChunkMem in number.digits.GetChunks())
{
ReadOnlySpan<char> chunkDigits = digitsChunkMem.Span;
for (int i = 0; i < chunkDigits.Length; i++)
{
char digitChar = chunkDigits[i];
if (digitChar == '\0')
{
break;
}

Debug.Assert(digitChar is '0' or '1');
currentBlock = (currentBlock << 1) | (uint)(digitChar - '0');
partialDigitCount++;

if (partialDigitCount == DigitsPerBlock)
{
buffer[bufferPos--] = currentBlock;
partialDigitCount = 0;

// we do not need to reset currentBlock now, because it should always set all its bits by left shift in subsequent iterations
}
}
}

Debug.Assert(partialDigitCount == 0 && bufferPos == -1);

buffer = buffer.TrimEnd(0u);

int sign;
uint[]? bits;

if (buffer.IsEmpty)
{
sign = 0;
bits = null;
}
else if (buffer.Length == 1)
{
sign = (int)buffer[0];
bits = null;

if ((!isNegative && sign < 0) || sign == int.MinValue)
{
bits = new[] { (uint)sign };
sign = isNegative ? -1 : 1;
}
}
else
{
sign = isNegative ? -1 : 1;
bits = buffer.ToArray();

if (isNegative)
{
NumericsHelpers.DangerousMakeTwosComplement(bits);
}
}

result = new BigInteger(sign, bits);
return ParsingStatus.OK;
}
finally
{
if (arrayFromPool is not null)
{
ArrayPool<uint>.Shared.Return(arrayFromPool);
}
}
}

//
// This threshold is for choosing the algorithm to use based on the number of digits.
//
Expand Down Expand Up @@ -1002,6 +1114,105 @@ internal static char ParseFormatSpecifier(ReadOnlySpan<char> format, out int dig
}
}

private static string? FormatBigIntegerToBin(bool targetSpan, BigInteger value, int digits, Span<char> destination, out int charsWritten, out bool spanSuccess)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unlike existing FormatBigIntegerToHex(), this FormatBigIntegerToBin() is implemented by calculating required char length before format, this can:

  • avoids from extending ValueStringBuilder's capacity during append char;
  • for targetSpan flow, formatted chars can write to wanted destination span directly rather than allocate buffer in ValueStringBuilder and copy to destination at the end

Please give advice if not proper, thanks !

{
// Get the bytes that make up the BigInteger.
byte[]? arrayToReturnToPool = null;
Span<byte> bytes = stackalloc byte[64]; // arbitrary threshold
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
if (!value.TryWriteOrCountBytes(bytes, out int bytesWrittenOrNeeded))
{
bytes = arrayToReturnToPool = ArrayPool<byte>.Shared.Rent(bytesWrittenOrNeeded);
bool success = value.TryWriteBytes(bytes, out _);
Debug.Assert(success);
stephentoub marked this conversation as resolved.
Show resolved Hide resolved
}
bytes = bytes.Slice(0, bytesWrittenOrNeeded);

Debug.Assert(!bytes.IsEmpty);

byte highByte = bytes[^1];

int charsInHighByte = 9 - byte.LeadingZeroCount(value._sign >= 0 ? highByte : (byte)~highByte);
long tmpCharCount = charsInHighByte + ((long)(bytes.Length - 1) << 3);

if (tmpCharCount > Array.MaxLength)
{
Debug.Assert(arrayToReturnToPool is not null);
ArrayPool<byte>.Shared.Return(arrayToReturnToPool);

throw new FormatException(SR.Format_TooLarge);
}

int charsForBits = (int)tmpCharCount;

Debug.Assert(digits < Array.MaxLength);
int charsIncludeDigits = Math.Max(digits, charsForBits);

try
{
scoped ValueStringBuilder sb;
if (targetSpan)
{
if (charsIncludeDigits > destination.Length)
{
charsWritten = 0;
spanSuccess = false;
return null;
}

// Because we have ensured destination can take actual char length, so now just use ValueStringBuilder as wrapper so that subsequent logic can be reused by 2 flows (targetSpan and non-targetSpan);
// meanwhile there is no need to copy to destination again after format data for targetSpan flow.
sb = new ValueStringBuilder(destination);
}
else
{
// each byte is typically eight chars
sb = charsIncludeDigits > 512
? new ValueStringBuilder(charsIncludeDigits)
: new ValueStringBuilder(stackalloc char[512].Slice(0, charsIncludeDigits));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why slice it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes it is unnecessary, removed.

}

if (digits > charsForBits)
{
sb.Append(value._sign >= 0 ? '0' : '1', digits - charsForBits);
}

AppendByte(ref sb, highByte, charsInHighByte - 1);

for (int i = bytes.Length - 2; i >= 0; i--)
{
AppendByte(ref sb, bytes[i]);
}

Debug.Assert(sb.Length == charsIncludeDigits);

if (targetSpan)
{
charsWritten = charsIncludeDigits;
spanSuccess = true;
return null;
}

charsWritten = 0;
spanSuccess = false;
return sb.ToString();
}
finally
{
if (arrayToReturnToPool is not null)
{
ArrayPool<byte>.Shared.Return(arrayToReturnToPool);
}
}

static void AppendByte(ref ValueStringBuilder sb, byte b, int startHighBit = 7)
{
for (int i = startHighBit; i >= 0; i--)
{
sb.Append((char)('0' + ((b >> i) & 0x1)));
}
}
}

internal static string FormatBigInteger(BigInteger value, string? format, NumberFormatInfo info)
{
return FormatBigInteger(targetSpan: false, value, format, format, info, default, out _, out _)!;
Expand All @@ -1026,7 +1237,10 @@ internal static bool TryFormatBigInteger(BigInteger value, ReadOnlySpan<char> fo
{
return FormatBigIntegerToHex(targetSpan, value, fmt, digits, info, destination, out charsWritten, out spanSuccess);
}

if (fmt == 'b' || fmt == 'B')
{
return FormatBigIntegerToBin(targetSpan, value, digits, destination, out charsWritten, out spanSuccess);
}

if (value._bits == null)
{
Expand Down
Loading
Loading