Skip to content

Commit

Permalink
(parser) Add support for binary literals (#219)
Browse files Browse the repository at this point in the history
Related issue: #69.
  • Loading branch information
perlun authored Oct 13, 2021
1 parent 56f943c commit 80c0bf5
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 27 deletions.
73 changes: 46 additions & 27 deletions src/Perlang.Parser/Scanner.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// TODO: Remove once https://github.com/DotNetAnalyzers/StyleCopAnalyzers/issues/3392 has been resolved
#pragma warning disable SA1515 // SingleLineCommentMustBePrecededByBlankLine

using System;
using System.Collections.Generic;
using System.Collections.Immutable;
Expand Down Expand Up @@ -302,13 +305,24 @@ private void Number()
var numberBase = Base.DECIMAL;
int startOffset = 0;

if (Char.ToLower(Peek()) == 'x')
char currentChar = Char.ToLower(Peek());

if (currentChar is 'b' or 'x')
{
numberStyles = NumberStyles.HexNumber;
numberBase = Base.HEXADECIMAL;
switch (currentChar)
{
case 'b':
numberBase = Base.BINARY;
break;

case 'x':
numberStyles = NumberStyles.HexNumber;
numberBase = Base.HEXADECIMAL;
break;
}

// Moving the `start` pointer forward is important, since `BigInteger.Parse()` does not accept a prefix
// like 0x or 0X being present. Adding a `startOffset` here feels safer than mutating `start`,
// Moving the `start` pointer forward is important, since the parsing methods do not accept a prefix
// like 0b or 0x being present. Adding a `startOffset` here feels safer than mutating `start`,
// especially in case parsing fails somehow.
Advance();
startOffset = 2;
Expand All @@ -326,7 +340,6 @@ private void Number()

// Consume the "."
Advance();

while (IsDigit(Peek(), numberBase))
{
Advance();
Expand All @@ -339,32 +352,37 @@ private void Number()
}
else
{
string numberCharacters = source[(start + startOffset)..current];

// Any potential preceding '-' character has already been taken care of at this stage => we can treat
// the number as an unsigned value. However, we still try to coerce it to the smallest signed or
// unsigned integer type in which it will fit (but never smaller than 32-bit). This coincidentally
// follows the same semantics as how C# does it, for simplicity.
BigInteger value;

if (numberBase == Base.HEXADECIMAL)
{
string numberCharacters = source[(start + startOffset)..current];

// Quoting from
//https://docs.microsoft.com/en-us/dotnet/api/system.numerics.biginteger.parse?view=net-5.0#System_Numerics_BigInteger_Parse_System_ReadOnlySpan_System_Char__System_Globalization_NumberStyles_System_IFormatProvider_
//
// If value is a hexadecimal string, the Parse(String, NumberStyles) method interprets value as a
// negative number stored by using two's complement representation if its first two hexadecimal
// digits are greater than or equal to 0x80. In other words, the method interprets the highest-order
// bit of the first byte in value as the sign bit. To make sure that a hexadecimal string is
// correctly interpreted as a positive number, the first digit in value must have a value of zero.
//
// We presume that all hexadecimals should be treated as positive numbers for now.
value = BigInteger.Parse('0' + numberCharacters, numberStyles);
}
else
BigInteger value = numberBase switch
{
value = BigInteger.Parse(source[(start + startOffset)..current], numberStyles);
}
Base.DECIMAL =>
BigInteger.Parse(source[(start + startOffset)..current], numberStyles),

Base.BINARY =>
Convert.ToUInt64(numberCharacters, 2),

Base.HEXADECIMAL =>
// Quoting from
// https://docs.microsoft.com/en-us/dotnet/api/system.numerics.biginteger.parse?view=net-5.0#System_Numerics_BigInteger_Parse_System_ReadOnlySpan_System_Char__System_Globalization_NumberStyles_System_IFormatProvider_
//
// If value is a hexadecimal string, the Parse(String, NumberStyles) method interprets value as a
// negative number stored by using two's complement representation if its first two hexadecimal
// digits are greater than or equal to 0x80. In other words, the method interprets the highest-order
// bit of the first byte in value as the sign bit. To make sure that a hexadecimal string is
// correctly interpreted as a positive number, the first digit in value must have a value of zero.
//
// We presume that all hexadecimals should be treated as positive numbers for now.
BigInteger.Parse('0' + numberCharacters, numberStyles),

_ =>
throw new InvalidOperationException($"Base {(int)numberBase} not supported")
};

if (value < Int32.MaxValue)
{
Expand Down Expand Up @@ -504,8 +522,9 @@ private void AddToken(TokenType type, object literal = null)

private enum Base
{
BINARY = 2,
DECIMAL = 10,
HEXADECIMAL = 16
HEXADECIMAL = 16,
}
}
}
12 changes: 12 additions & 0 deletions src/Perlang.Tests.Integration/Number/NumberTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,18 @@ public void literal_negative_float()
Assert.Equal(-0.001, result);
}

[Fact]
public void literal_binary()
{
string source = @"
0b00101010
";

object result = Eval(source);

Assert.Equal(42, result);
}

[Fact]
public void literal_hexadecimal()
{
Expand Down
13 changes: 13 additions & 0 deletions src/Perlang.Tests/Interpreter/Typing/TypeResolverTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,19 @@ public void Resolve_var_with_long_type_defines_variable_with_expected_ClrType()
Assert.Equal(typeof(Int64), ((Stmt.Var)singleStatement).TypeReference.ClrType);
}

[Fact]
public void Resolve_implicitly_typed_var_initialized_from_binary_literal_has_expected_ClrType()
{
(Stmt singleStatement, NameResolver resolver) = ScanParseResolveAndTypeResolveSingleStatement(@"
var v = 0b00101010;
");

// Assert
Assert.IsType<Stmt.Var>(singleStatement);
Assert.True(resolver.Globals.ContainsKey("v"));
Assert.Equal(typeof(Int32), ((Stmt.Var)singleStatement).TypeReference.ClrType);
}

[Fact]
public void Resolve_implicitly_typed_var_initialized_from_hexadecimal_literal_has_expected_ClrType()
{
Expand Down

0 comments on commit 80c0bf5

Please sign in to comment.