Skip to content

Commit

Permalink
(stdlib) Support ASCII-to-UTF8 string reassignment in compiled mode
Browse files Browse the repository at this point in the history
  • Loading branch information
perlun committed Apr 27, 2024
1 parent 315b642 commit 0dcea42
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ public void division_by_zero_throws_expected_runtime_error()
[SkippableFact]
public void division_by_zero_halts_execution()
{
Skip.If(PerlangMode.ExperimentalCompilation, "Not supported in compiled mode");
Skip.If(PerlangMode.ExperimentalCompilation, "Division by zero has undefined behavior in compiled mode.");

string source = @"
1 / 0;
Expand Down
34 changes: 20 additions & 14 deletions src/Perlang.Tests.Integration/Typing/StringTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,9 @@ namespace Perlang.Tests.Integration.Typing;

public class StringTests
{
[SkippableFact]
[Fact]
public void string_variable_can_be_printed()
{
Skip.If(PerlangMode.ExperimentalCompilation, "Not supported in compiled mode");

string source = @"
var s: string = ""this is a string"";
Expand All @@ -24,11 +22,9 @@ public void string_variable_can_be_printed()
.Be("this is a string");
}

[SkippableFact]
[Fact]
public void string_variable_can_be_reassigned()
{
Skip.If(PerlangMode.ExperimentalCompilation, "Not supported in compiled mode");

string source = @"
var s: string = ""this is a string"";
s = ""this is another string"";
Expand All @@ -42,16 +38,9 @@ public void string_variable_can_be_reassigned()
.Be("this is another string");
}

[SkippableFact]
[Fact]
public void ascii_string_inferred_variable_can_be_reassigned_with_non_ascii_value()
{
// The code below is incredibly hard to support in compiled mode, because: an AsciiString cannot be assigned to a
// String variable in C++ (because the latter is an abstract class; I believe the C++ compiler will try to make a
// copy of it). `const perlang::string& s = ...` works, but then the problem is that the variable can obviously
// not be reassigned on the second line... because it is constant. We'll have to think through how to solve this
// properly.
Skip.If(PerlangMode.ExperimentalCompilation, "Not yet supported in compiled mode");

string source = @"
var s: string = ""this is a string"";
s = ""this is a string with non-ASCII characters: åäöÅÄÖéèüÜÿŸïÏ"";
Expand All @@ -65,6 +54,23 @@ public void ascii_string_inferred_variable_can_be_reassigned_with_non_ascii_valu
.Be("this is a string with non-ASCII characters: åäöÅÄÖéèüÜÿŸïÏ");
}

[Fact]
public void non_ascii_string_inferred_variable_can_be_reassigned_with_ascii_value()
{
// Same as the ASCIIString to UTF8String above, but the other way around
string source = @"
var s: string = ""this is a string with non-ASCII characters: åäöÅÄÖéèüÜÿŸïÏ"";
s = ""this is a string"";
print(s);
";

var output = EvalReturningOutputString(source);

output.Should()
.Be("this is a string");
}

[SkippableFact]
public void ascii_string_variable_has_expected_type()
{
Expand Down
2 changes: 1 addition & 1 deletion src/stdlib/src/ascii_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace perlang
throw std::invalid_argument("string argument cannot be null");
}

// TODO: Mark this string as "static" in some way, to ensure the destructor doesn't try to delete `bytes`.
// TODO: Mark this string as "static" in some way, to ensure the destructor doesn't try to delete `bytes_`.
auto result = ASCIIString();
result.bytes_ = s;
result.length_ = strlen(s);
Expand Down
13 changes: 13 additions & 0 deletions src/stdlib/src/utf8_string.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <cstring>
#include <stdexcept>
#include <memory>

#include "utf8_string.h"

Expand All @@ -11,6 +12,7 @@ namespace perlang
throw std::invalid_argument("string argument cannot be null");
}

// TODO: Mark this string as "static" in some way, to ensure the destructor doesn't try to delete `bytes_`.
auto result = UTF8String();
result.bytes_ = s;
result.length_ = strlen(s);
Expand All @@ -26,17 +28,28 @@ namespace perlang
length_ = -1;
}

// TODO: Implement deallocation here for non-static strings, but MAKE SURE to keep a distinction between static and
// TODO: non-static strings!
UTF8String::~UTF8String() = default;

const char* UTF8String::bytes() const
{
return bytes_;
}

bool UTF8String::operator==(const UTF8String& rhs) const
{
return bytes_ == rhs.bytes_ &&
length_ == rhs.length_;
}

bool UTF8String::operator!=(const UTF8String& rhs) const
{
return !(rhs == *this);
}

UTF8String::operator std::shared_ptr<const String>() const
{
return std::make_shared<const UTF8String>(*this);
}
}
7 changes: 7 additions & 0 deletions src/stdlib/src/utf8_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,17 @@ namespace perlang
// documentation for more semantic details about the implementation.
bool operator!=(const UTF8String& rhs) const;

// Implicit conversion to String pointer, which is required for reassignment between different string types.
operator std::shared_ptr<const String>() const; // NOLINT(*-explicit-constructor)

private:
// Private constructor for creating a `null` string, not yet initialized with any sensible content.
UTF8String();

public:
virtual ~UTF8String();

private:
// The backing byte array for this string. This is to be considered immutable and MUST NOT be modified at any
// point. There might be multiple UTF8String objects pointing to the same `bytes_`, so modifying one of them
// would unintentionally spread the modifications to these other objects too.
Expand Down

0 comments on commit 0dcea42

Please sign in to comment.