Skip to content

Commit

Permalink
(stdlib) Support ASCII-to-UTF8 string reassignment in compiled mode (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
perlun authored Apr 27, 2024
1 parent 315b642 commit e85afaf
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 19 deletions.
2 changes: 2 additions & 0 deletions release-notes/v0.5.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
- Add partial inline C++ support [[#462][462]]
- Detect method being redefined [[#464][464]]
- Make compiled mode be the default and drop interpreted mode [[#465][465]]
- Support ASCII-to-UTF8 string reassignment in compiled mode [[#466][466]]

### Changed
#### Data types
Expand Down Expand Up @@ -53,3 +54,4 @@
[463]: https://github.com/perlang-org/perlang/pull/463
[464]: https://github.com/perlang-org/perlang/pull/464
[465]: https://github.com/perlang-org/perlang/pull/465
[466]: https://github.com/perlang-org/perlang/pull/466
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ public void division_by_zero_throws_expected_runtime_error()
[SkippableFact]
public void division_by_zero_halts_execution()
{
Skip.If(PerlangMode.ExperimentalCompilation, "Not supported in compiled mode");
Skip.If(PerlangMode.ExperimentalCompilation, "Division by zero has undefined behavior in compiled mode.");

string source = @"
1 / 0;
Expand Down
34 changes: 20 additions & 14 deletions src/Perlang.Tests.Integration/Typing/StringTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,9 @@ namespace Perlang.Tests.Integration.Typing;

public class StringTests
{
[SkippableFact]
[Fact]
public void string_variable_can_be_printed()
{
Skip.If(PerlangMode.ExperimentalCompilation, "Not supported in compiled mode");

string source = @"
var s: string = ""this is a string"";
Expand All @@ -24,11 +22,9 @@ public void string_variable_can_be_printed()
.Be("this is a string");
}

[SkippableFact]
[Fact]
public void string_variable_can_be_reassigned()
{
Skip.If(PerlangMode.ExperimentalCompilation, "Not supported in compiled mode");

string source = @"
var s: string = ""this is a string"";
s = ""this is another string"";
Expand All @@ -42,16 +38,9 @@ public void string_variable_can_be_reassigned()
.Be("this is another string");
}

[SkippableFact]
[Fact]
public void ascii_string_inferred_variable_can_be_reassigned_with_non_ascii_value()
{
// The code below is incredibly hard to support in compiled mode, because: an AsciiString cannot be assigned to a
// String variable in C++ (because the latter is an abstract class; I believe the C++ compiler will try to make a
// copy of it). `const perlang::string& s = ...` works, but then the problem is that the variable can obviously
// not be reassigned on the second line... because it is constant. We'll have to think through how to solve this
// properly.
Skip.If(PerlangMode.ExperimentalCompilation, "Not yet supported in compiled mode");

string source = @"
var s: string = ""this is a string"";
s = ""this is a string with non-ASCII characters: åäöÅÄÖéèüÜÿŸïÏ"";
Expand All @@ -65,6 +54,23 @@ public void ascii_string_inferred_variable_can_be_reassigned_with_non_ascii_valu
.Be("this is a string with non-ASCII characters: åäöÅÄÖéèüÜÿŸïÏ");
}

[Fact]
public void non_ascii_string_inferred_variable_can_be_reassigned_with_ascii_value()
{
// Same as the ASCIIString to UTF8String above, but the other way around
string source = @"
var s: string = ""this is a string with non-ASCII characters: åäöÅÄÖéèüÜÿŸïÏ"";
s = ""this is a string"";
print(s);
";

var output = EvalReturningOutputString(source);

output.Should()
.Be("this is a string");
}

[SkippableFact]
public void ascii_string_variable_has_expected_type()
{
Expand Down
2 changes: 1 addition & 1 deletion src/stdlib/src/ascii_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace perlang
throw std::invalid_argument("string argument cannot be null");
}

// TODO: Mark this string as "static" in some way, to ensure the destructor doesn't try to delete `bytes`.
// TODO: Mark this string as "static" in some way, to ensure the destructor doesn't try to delete `bytes_`.
auto result = ASCIIString();
result.bytes_ = s;
result.length_ = strlen(s);
Expand Down
1 change: 1 addition & 0 deletions src/stdlib/src/ascii_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ namespace perlang
// Because of the above assumptions, we know that we can make the new ASCIIString constructed from the `s`
// parameter "borrow" the actual bytes_ used by the string. Since no deallocation will take place, and no
// mutation, copying the string at this point would just waste CPU cycles for no added benefit.
[[nodiscard]]
static std::shared_ptr<const ASCIIString> from_static_string(const char* s);

// Returns the backing byte array for this ASCIIString. This method is generally to be avoided; it is safer to
Expand Down
12 changes: 10 additions & 2 deletions src/stdlib/src/utf8_string.cc
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
#include <cstring>
#include <stdexcept>
#include <memory>

#include "utf8_string.h"

namespace perlang
{
UTF8String UTF8String::from_static_string(const char* s)
std::shared_ptr<const UTF8String> UTF8String::from_static_string(const char* s)
{
if (s == nullptr) {
throw std::invalid_argument("string argument cannot be null");
}

// TODO: Mark this string as "static" in some way, to ensure the destructor doesn't try to delete `bytes_`.
auto result = UTF8String();
result.bytes_ = s;
result.length_ = strlen(s);

return result;
return std::make_shared<UTF8String>(result);
}

UTF8String::UTF8String()
Expand All @@ -26,15 +28,21 @@ namespace perlang
length_ = -1;
}

// TODO: Implement deallocation here for non-static strings, but MAKE SURE to keep a distinction between static and
// TODO: non-static strings!
UTF8String::~UTF8String() = default;

const char* UTF8String::bytes() const
{
return bytes_;
}

bool UTF8String::operator==(const UTF8String& rhs) const
{
return bytes_ == rhs.bytes_ &&
length_ == rhs.length_;
}

bool UTF8String::operator!=(const UTF8String& rhs) const
{
return !(rhs == *this);
Expand Down
7 changes: 6 additions & 1 deletion src/stdlib/src/utf8_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ namespace perlang
// Because of the above assumptions, we know that we can make the new UTF8String constructed from the `s`
// parameter "borrow" the actual bytes_ used by the string. Since no deallocation will take place, and no
// mutation, copying the string at this point would just waste CPU cycles for no added benefit.
static UTF8String from_static_string(const char* s);
[[nodiscard]]
static std::shared_ptr<const UTF8String> from_static_string(const char* s);

// Returns the backing byte array for this UTF8String. This method is generally to be avoided; it is safer to
// use the UTF8String throughout the code and only call this when you really must.
Expand All @@ -39,6 +40,10 @@ namespace perlang
// Private constructor for creating a `null` string, not yet initialized with any sensible content.
UTF8String();

public:
virtual ~UTF8String();

private:
// The backing byte array for this string. This is to be considered immutable and MUST NOT be modified at any
// point. There might be multiple UTF8String objects pointing to the same `bytes_`, so modifying one of them
// would unintentionally spread the modifications to these other objects too.
Expand Down

0 comments on commit e85afaf

Please sign in to comment.