Skip to content

Commit

Permalink
(parser) Support hexadecimal number literals (#217)
Browse files Browse the repository at this point in the history
Implements part of #69.
  • Loading branch information
perlun authored Oct 11, 2021
1 parent 2aa5deb commit 56f943c
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<PackageReference Include="StyleCop.Analyzers" Version="1.2.0-beta.321" PrivateAssets="all" />
<AdditionalFiles Include="$(SolutionDir)stylecop.json" Link="stylecop.json" />

<PackageReference Include="SonarAnalyzer.CSharp" Version="8.17.0.26580" />
<PackageReference Include="SonarAnalyzer.CSharp" Version="8.22.0.31243" />
</ItemGroup>

<!--
Expand Down
68 changes: 60 additions & 8 deletions src/Perlang.Parser/Scanner.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Globalization;
using System.Linq;
using System.Numerics;
using static Perlang.TokenType;
Expand Down Expand Up @@ -261,7 +262,9 @@ private void ScanToken()
break;

default:
if (IsDigit(c))
// Even if the number is a number in a different base than 10 (binary, hexadecimal etc), it always
// starts with a "normal" (decimal) digit because of the prefix characters - e.g. 0x1234.
if (IsDigit(c, Base.DECIMAL))
{
Number();
}
Expand Down Expand Up @@ -295,21 +298,36 @@ private void Identifier()
private void Number()
{
bool isFractional = false;
var numberStyles = NumberStyles.Any;
var numberBase = Base.DECIMAL;
int startOffset = 0;

while (IsDigit(Peek()))
if (Char.ToLower(Peek()) == 'x')
{
numberStyles = NumberStyles.HexNumber;
numberBase = Base.HEXADECIMAL;

// Moving the `start` pointer forward is important, since `BigInteger.Parse()` does not accept a prefix
// like 0x or 0X being present. Adding a `startOffset` here feels safer than mutating `start`,
// especially in case parsing fails somehow.
Advance();
startOffset = 2;
}

while (IsDigit(Peek(), numberBase))
{
Advance();
}

// Look for a fractional part.
if (Peek() == '.' && IsDigit(PeekNext()))
if (Peek() == '.' && IsDigit(PeekNext(), numberBase))
{
isFractional = true;

// Consume the "."
Advance();

while (IsDigit(Peek()))
while (IsDigit(Peek(), numberBase))
{
Advance();
}
Expand All @@ -325,7 +343,28 @@ private void Number()
// the number as an unsigned value. However, we still try to coerce it to the smallest signed or
// unsigned integer type in which it will fit (but never smaller than 32-bit). This coincidentally
// follows the same semantics as how C# does it, for simplicity.
BigInteger value = BigInteger.Parse(source[start..current]);
BigInteger value;

if (numberBase == Base.HEXADECIMAL)
{
string numberCharacters = source[(start + startOffset)..current];

// Quoting from
//https://docs.microsoft.com/en-us/dotnet/api/system.numerics.biginteger.parse?view=net-5.0#System_Numerics_BigInteger_Parse_System_ReadOnlySpan_System_Char__System_Globalization_NumberStyles_System_IFormatProvider_
//
// If value is a hexadecimal string, the Parse(String, NumberStyles) method interprets value as a
// negative number stored by using two's complement representation if its first two hexadecimal
// digits are greater than or equal to 0x80. In other words, the method interprets the highest-order
// bit of the first byte in value as the sign bit. To make sure that a hexadecimal string is
// correctly interpreted as a positive number, the first digit in value must have a value of zero.
//
// We presume that all hexadecimals should be treated as positive numbers for now.
value = BigInteger.Parse('0' + numberCharacters, numberStyles);
}
else
{
value = BigInteger.Parse(source[(start + startOffset)..current], numberStyles);
}

if (value < Int32.MaxValue)
{
Expand Down Expand Up @@ -436,10 +475,17 @@ private static bool IsAlpha(char c)
}

private static bool IsAlphaNumeric(char c) =>
IsAlpha(c) || IsDigit(c);
IsAlpha(c) || IsDigit(c, Base.DECIMAL);

private static bool IsDigit(char c) =>
c >= '0' && c <= '9';
private static bool IsDigit(char c, Base @base) =>
(int)@base switch
{
2 => c == '0' || c == '1',
8 => c >= '0' && c <= '7',
10 => c >= '0' && c <= '9',
16 => (c >= '0' && c <= '9') || (Char.ToUpper(c) >= 'A' && Char.ToUpper(c) <= 'F'),
_ => throw new ArgumentException($"Base {@base} is not supported")
};

private bool IsAtEnd() =>
current >= source.Length;
Expand All @@ -455,5 +501,11 @@ private void AddToken(TokenType type, object literal = null)
string text = source[start..current];
tokens.Add(new Token(type, text, literal, line));
}

private enum Base
{
DECIMAL = 10,
HEXADECIMAL = 16
}
}
}
12 changes: 12 additions & 0 deletions src/Perlang.Tests.Integration/Number/NumberTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,17 @@ public void literal_negative_float()

Assert.Equal(-0.001, result);
}

[Fact]
public void literal_hexadecimal()
{
string source = @"
0xC0CAC01A
";

object result = Eval(source);

Assert.Equal(3234512922, result);
}
}
}
13 changes: 13 additions & 0 deletions src/Perlang.Tests/Interpreter/Typing/TypeResolverTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,19 @@ public void Resolve_var_with_long_type_defines_variable_with_expected_ClrType()
Assert.Equal(typeof(Int64), ((Stmt.Var)singleStatement).TypeReference.ClrType);
}

[Fact]
public void Resolve_implicitly_typed_var_initialized_from_hexadecimal_literal_has_expected_ClrType()
{
(Stmt singleStatement, NameResolver resolver) = ScanParseResolveAndTypeResolveSingleStatement(@"
var v = 0xC0CAC01A;
");

// Assert
Assert.IsType<Stmt.Var>(singleStatement);
Assert.True(resolver.Globals.ContainsKey("v"));
Assert.Equal(typeof(UInt32), ((Stmt.Var)singleStatement).TypeReference.ClrType);
}

[Fact]
public void Resolve_implicitly_typed_var_initialized_from_long_var_has_expected_ClrType()
{
Expand Down

0 comments on commit 56f943c

Please sign in to comment.