From 0dcea42fb936236f5caf4343fedebbab020075bf Mon Sep 17 00:00:00 2001 From: Per Lundberg Date: Sat, 27 Apr 2024 08:30:29 +0300 Subject: [PATCH] (stdlib) Support ASCII-to-UTF8 string reassignment in compiled mode --- .../Operator/Binary/DivisionTests.cs | 2 +- .../Typing/StringTests.cs | 34 +++++++++++-------- src/stdlib/src/ascii_string.cc | 2 +- src/stdlib/src/utf8_string.cc | 13 +++++++ src/stdlib/src/utf8_string.h | 7 ++++ 5 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/Perlang.Tests.Integration/Operator/Binary/DivisionTests.cs b/src/Perlang.Tests.Integration/Operator/Binary/DivisionTests.cs index 473e95d4..3c506685 100644 --- a/src/Perlang.Tests.Integration/Operator/Binary/DivisionTests.cs +++ b/src/Perlang.Tests.Integration/Operator/Binary/DivisionTests.cs @@ -132,7 +132,7 @@ public void division_by_zero_throws_expected_runtime_error() [SkippableFact] public void division_by_zero_halts_execution() { - Skip.If(PerlangMode.ExperimentalCompilation, "Not supported in compiled mode"); + Skip.If(PerlangMode.ExperimentalCompilation, "Division by zero has undefined behavior in compiled mode."); string source = @" 1 / 0; diff --git a/src/Perlang.Tests.Integration/Typing/StringTests.cs b/src/Perlang.Tests.Integration/Typing/StringTests.cs index 21e5f115..176f7a04 100644 --- a/src/Perlang.Tests.Integration/Typing/StringTests.cs +++ b/src/Perlang.Tests.Integration/Typing/StringTests.cs @@ -7,11 +7,9 @@ namespace Perlang.Tests.Integration.Typing; public class StringTests { - [SkippableFact] + [Fact] public void string_variable_can_be_printed() { - Skip.If(PerlangMode.ExperimentalCompilation, "Not supported in compiled mode"); - string source = @" var s: string = ""this is a string""; @@ -24,11 +22,9 @@ public void string_variable_can_be_printed() .Be("this is a string"); } - [SkippableFact] + [Fact] public void string_variable_can_be_reassigned() { - Skip.If(PerlangMode.ExperimentalCompilation, "Not supported in compiled mode"); - string source = @" var s: string = ""this is a string""; s = ""this is another string""; @@ -42,16 +38,9 @@ public void string_variable_can_be_reassigned() .Be("this is another string"); } - [SkippableFact] + [Fact] public void ascii_string_inferred_variable_can_be_reassigned_with_non_ascii_value() { - // The code below is incredibly hard to support in compiled mode, because: an AsciiString cannot be assigned to a - // String variable in C++ (because the latter is an abstract class; I believe the C++ compiler will try to make a - // copy of it). `const perlang::string& s = ...` works, but then the problem is that the variable can obviously - // not be reassigned on the second line... because it is constant. We'll have to think through how to solve this - // properly. - Skip.If(PerlangMode.ExperimentalCompilation, "Not yet supported in compiled mode"); - string source = @" var s: string = ""this is a string""; s = ""this is a string with non-ASCII characters: åäöÅÄÖéèüÜÿŸïÏ""; @@ -65,6 +54,23 @@ public void ascii_string_inferred_variable_can_be_reassigned_with_non_ascii_valu .Be("this is a string with non-ASCII characters: åäöÅÄÖéèüÜÿŸïÏ"); } + [Fact] + public void non_ascii_string_inferred_variable_can_be_reassigned_with_ascii_value() + { + // Same as the ASCIIString to UTF8String above, but the other way around + string source = @" + var s: string = ""this is a string with non-ASCII characters: åäöÅÄÖéèüÜÿŸïÏ""; + s = ""this is a string""; + + print(s); + "; + + var output = EvalReturningOutputString(source); + + output.Should() + .Be("this is a string"); + } + [SkippableFact] public void ascii_string_variable_has_expected_type() { diff --git a/src/stdlib/src/ascii_string.cc b/src/stdlib/src/ascii_string.cc index 475d3fcb..54e17a48 100644 --- a/src/stdlib/src/ascii_string.cc +++ b/src/stdlib/src/ascii_string.cc @@ -13,7 +13,7 @@ namespace perlang throw std::invalid_argument("string argument cannot be null"); } - // TODO: Mark this string as "static" in some way, to ensure the destructor doesn't try to delete `bytes`. + // TODO: Mark this string as "static" in some way, to ensure the destructor doesn't try to delete `bytes_`. auto result = ASCIIString(); result.bytes_ = s; result.length_ = strlen(s); diff --git a/src/stdlib/src/utf8_string.cc b/src/stdlib/src/utf8_string.cc index 13fdedc8..80550dd7 100644 --- a/src/stdlib/src/utf8_string.cc +++ b/src/stdlib/src/utf8_string.cc @@ -1,5 +1,6 @@ #include #include +#include #include "utf8_string.h" @@ -11,6 +12,7 @@ namespace perlang throw std::invalid_argument("string argument cannot be null"); } + // TODO: Mark this string as "static" in some way, to ensure the destructor doesn't try to delete `bytes_`. auto result = UTF8String(); result.bytes_ = s; result.length_ = strlen(s); @@ -26,17 +28,28 @@ namespace perlang length_ = -1; } + // TODO: Implement deallocation here for non-static strings, but MAKE SURE to keep a distinction between static and + // TODO: non-static strings! + UTF8String::~UTF8String() = default; + const char* UTF8String::bytes() const { return bytes_; } + bool UTF8String::operator==(const UTF8String& rhs) const { return bytes_ == rhs.bytes_ && length_ == rhs.length_; } + bool UTF8String::operator!=(const UTF8String& rhs) const { return !(rhs == *this); } + + UTF8String::operator std::shared_ptr() const + { + return std::make_shared(*this); + } } diff --git a/src/stdlib/src/utf8_string.h b/src/stdlib/src/utf8_string.h index b49c1d89..c58f32c9 100644 --- a/src/stdlib/src/utf8_string.h +++ b/src/stdlib/src/utf8_string.h @@ -35,10 +35,17 @@ namespace perlang // documentation for more semantic details about the implementation. bool operator!=(const UTF8String& rhs) const; + // Implicit conversion to String pointer, which is required for reassignment between different string types. + operator std::shared_ptr() const; // NOLINT(*-explicit-constructor) + private: // Private constructor for creating a `null` string, not yet initialized with any sensible content. UTF8String(); + public: + virtual ~UTF8String(); + + private: // The backing byte array for this string. This is to be considered immutable and MUST NOT be modified at any // point. There might be multiple UTF8String objects pointing to the same `bytes_`, so modifying one of them // would unintentionally spread the modifications to these other objects too.