Skip to content

Commit

Permalink
(parser) Preserve exact floating point representation when parsing
Browse files Browse the repository at this point in the history
This will be useful in the compilation part (#409), where we want to
avoid losing precision because of round-tripping.
  • Loading branch information
perlun committed Oct 17, 2023
1 parent 69367d3 commit 9b4a133
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 60 deletions.
2 changes: 2 additions & 0 deletions release-notes/v0.4.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
### Added
#### Experimental compilation
- Add C++-based stdlib project [[#407][407]]
- Preserve exact floating point representation while parsing [[#412][412]]

### Changed
#### Data types
Expand Down Expand Up @@ -37,3 +38,4 @@
[389]: https://github.com/perlang-org/perlang/pull/389
[407]: https://github.com/perlang-org/perlang/pull/407
[410]: https://github.com/perlang-org/perlang/pull/410
[412]: https://github.com/perlang-org/perlang/pull/412
10 changes: 5 additions & 5 deletions src/Perlang.Parser/FloatingPointLiteral.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,21 @@

namespace Perlang.Parser;

internal readonly struct FloatingPointLiteral<T> : INumericLiteral
internal readonly struct FloatingPointLiteral<T> : IFloatingPointLiteral, INumericLiteral
where T : notnull
{
internal T Value { get; }
public object Value { get; }
public string NumberCharacters { get; }

/// <inheritdoc cref="INumericLiteral.BitsUsed"/>
public long BitsUsed { get; }

public bool IsPositive { get; }

object INumericLiteral.Value => Value;

public FloatingPointLiteral(T value)
public FloatingPointLiteral(T value, string numberCharacters)
{
Value = value;
NumberCharacters = numberCharacters;

BitsUsed = value switch
{
Expand Down
12 changes: 12 additions & 0 deletions src/Perlang.Parser/IFloatingPointLiteral.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#nullable enable
namespace Perlang.Parser;

public interface IFloatingPointLiteral
{
/// <summary>
/// Gets a string representation of this floating point literal. This is to ensure we avoid precision loss while
/// carrying the value over to the compiler, since `float`/`double` `ToString()` and back are not necessarily
/// round-trip safe.
/// </summary>
public string NumberCharacters { get; }
}
103 changes: 54 additions & 49 deletions src/Perlang.Parser/NumberParser.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#nullable enable
using System;
using System.Globalization;
using System.Numerics;
using Perlang.Internal.Extensions;

#nullable enable
namespace Perlang.Parser;

internal static class NumberParser
Expand All @@ -22,16 +22,21 @@ public static INumericLiteral Parse(NumericToken numericToken)
{
// The explicit IFormatProvider is required to ensure we use 123.45 format, regardless of host OS
// language/region settings. See #263 for more details.
//
// An interesting detail: we preserve the `numberCharacters` here (the unparsed floating point
// value), since we might otherwise loose valuable precision. Round-tripping via ToString() will
// otherwise risk loosing precision. This is particularly important to get proper semantics for
// `double + float` operations in compiled mode.
float value = Single.Parse(numberCharacters, CultureInfo.InvariantCulture);
return new FloatingPointLiteral<float>(value);
return new FloatingPointLiteral<float>(value, numberCharacters);
}

case 'd':
{
// The explicit IFormatProvider is required to ensure we use 123.45 format, regardless of host OS
// language/region settings. See #263 for more details.
double value = Double.Parse(numberCharacters, CultureInfo.InvariantCulture);
return new FloatingPointLiteral<double>(value);
return new FloatingPointLiteral<double>(value, numberCharacters);
}

default:
Expand All @@ -40,9 +45,9 @@ public static INumericLiteral Parse(NumericToken numericToken)
}
else
{
// No suffix provided => use `double` precision by default, just like C#
// No suffix provided => use `double` precision by default, just like C++, Java and C#.
double value = Double.Parse(numberCharacters, CultureInfo.InvariantCulture);
return new FloatingPointLiteral<double>(value);
return new FloatingPointLiteral<double>(value, numberCharacters);
}
}
else
Expand Down Expand Up @@ -104,72 +109,72 @@ public static INumericLiteral Parse(NumericToken numericToken)
}
}

public static object MakeNegative(object value)
public static object MakeNegative(INumericLiteral numericLiteral)
{
if (value is INumericLiteral numericLiteral)
if (numericLiteral is IFloatingPointLiteral floatingPointLiteral)
{
if (numericLiteral.Value is float floatValue)
{
return new FloatingPointLiteral<float>(-floatValue);
return new FloatingPointLiteral<float>(-floatValue, "-" + floatingPointLiteral.NumberCharacters);
}
else if (numericLiteral.Value is double doubleValue)
{
return new FloatingPointLiteral<double>(-doubleValue);
return new FloatingPointLiteral<double>(-doubleValue, "-" + floatingPointLiteral.NumberCharacters);
}
else if (numericLiteral.Value is int intValue)
else
{
return new IntegerLiteral<int>(-intValue);
throw new ArgumentException($"Type {numericLiteral.Value.GetType().ToTypeKeyword()} not supported");
}
else if (numericLiteral.Value is uint uintValue)
{
long negativeValue = -uintValue;
}
else if (numericLiteral.Value is int intValue)
{
return new IntegerLiteral<int>(-intValue);
}
else if (numericLiteral.Value is uint uintValue)
{
long negativeValue = -uintValue;

// This is a special hack to ensure that the value -2147483648 gets returned as an `int` and not a `long`.
// Some details available in #302, summarized here in brief:
//
// The value 2147483648 is too large for an `int` => gets parsed into a `ulong` where it will fit. Once it
// has been made negative, the value -2147483648 is again small enough to fit in an `int` => the code below
// will narrow it down to comply with the "smallest type possible" design principle.
//
// Rationale: Two's complement: https://en.wikipedia.org/wiki/Two%27s_complement
if (negativeValue >= Int32.MinValue)
{
return new IntegerLiteral<int>((int)negativeValue);
}
else
{
return new IntegerLiteral<long>(negativeValue);
}
}
else if (numericLiteral.Value is long longValue)
// This is a special hack to ensure that the value -2147483648 gets returned as an `int` and not a `long`.
// Some details available in #302, summarized here in brief:
//
// The value 2147483648 is too large for an `int` => gets parsed into a `ulong` where it will fit. Once it
// has been made negative, the value -2147483648 is again small enough to fit in an `int` => the code below
// will narrow it down to comply with the "smallest type possible" design principle.
//
// Rationale: Two's complement: https://en.wikipedia.org/wiki/Two%27s_complement
if (negativeValue >= Int32.MinValue)
{
return new IntegerLiteral<long>(-longValue);
return new IntegerLiteral<int>((int)negativeValue);
}
else if (numericLiteral.Value is ulong ulongValue)
else
{
// Again, this needs to be handled specially to ensure that numbers that fit in a `long` doesn't use
// BigInteger unnecessarily.
BigInteger negativeValue = -new BigInteger(ulongValue);
return new IntegerLiteral<long>(negativeValue);
}
}
else if (numericLiteral.Value is long longValue)
{
return new IntegerLiteral<long>(-longValue);
}
else if (numericLiteral.Value is ulong ulongValue)
{
// Again, this needs to be handled specially to ensure that numbers that fit in a `long` doesn't use
// BigInteger unnecessarily.
BigInteger negativeValue = -new BigInteger(ulongValue);

if (negativeValue >= Int64.MinValue)
{
return new IntegerLiteral<long>((long)negativeValue);
}
else
{
// All negative numbers that are too big to fit in any of the smaller signed integer types will go
// through this code path.
return new IntegerLiteral<BigInteger>(negativeValue);
}
if (negativeValue >= Int64.MinValue)
{
return new IntegerLiteral<long>((long)negativeValue);
}
else
{
throw new ArgumentException($"Type {numericLiteral.Value.GetType().ToTypeKeyword()} not supported");
// All negative numbers that are too big to fit in any of the smaller signed integer types will go
// through this code path.
return new IntegerLiteral<BigInteger>(negativeValue);
}
}
else
{
throw new ArgumentException($"Type {value.GetType().ToTypeKeyword()} not supported");
throw new ArgumentException($"Type {numericLiteral.Value.GetType().ToTypeKeyword()} not supported");
}
}
}
16 changes: 15 additions & 1 deletion src/Perlang.Parser/PerlangParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using Perlang.Internal.Extensions;
using static Perlang.Internal.Utils;
using static Perlang.TokenType;

Expand Down Expand Up @@ -570,7 +571,20 @@ private Expr UnaryPrefix()
// changed.
if (@operator.Type == MINUS && right is Expr.Literal rightLiteral)
{
return new Expr.Literal(NumberParser.MakeNegative(rightLiteral.Value!));
if (rightLiteral.Value is INumericLiteral numericLiteral)
{
return new Expr.Literal(NumberParser.MakeNegative(numericLiteral));
}
else if (rightLiteral.Value is null)
{
Error(Peek(), "Unary minus operator does not support null operand");
return new Expr.Literal(null);
}
else
{
// TODO: Call Error() here to produce a context-aware error instead of just throwing a raw exception
throw new ArgumentException($"Type {rightLiteral.Value.GetType().ToTypeKeyword()} not supported");
}
}
else
{
Expand Down
7 changes: 3 additions & 4 deletions src/Perlang.Stdlib/Internal/Utils.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
using System;
using System.Globalization;
using System.Numerics;
using Perlang.Internal.Extensions;
using Perlang.Lang;
using String = Perlang.Lang.String;
Expand All @@ -12,15 +11,15 @@ namespace Perlang.Internal
/// </summary>
public static class Utils
{
private static readonly Lang.String NullString = AsciiString.from("null");
private static readonly String NullString = AsciiString.from("null");

public static Lang.String Stringify(object @object)
public static String Stringify(object @object)
{
if (@object == null)
{
return NullString;
}
else if (@object is Lang.String nativeString)
else if (@object is String nativeString)
{
return nativeString;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
Expand Down

0 comments on commit 9b4a133

Please sign in to comment.