Skip to content

Commit

Permalink
(many) Support string+bigint and bigint+string concatenation
Browse files Browse the repository at this point in the history
  • Loading branch information
perlun committed May 10, 2024
1 parent 2b1c441 commit d4ba0d4
Show file tree
Hide file tree
Showing 11 changed files with 215 additions and 77 deletions.
1 change: 1 addition & 0 deletions Perlang.sln.DotSettings
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
<s:String x:Key="/Default/CodeStyle/CodeFormatting/CSharpCodeStyle/ThisQualifier/INSTANCE_MEMBERS_QUALIFY_DECLARED_IN/@EntryValue">BaseClass</s:String>
<s:String x:Key="/Default/CodeStyle/CodeFormatting/CSharpCodeStyle/ThisQualifier/INSTANCE_MEMBERS_QUALIFY_MEMBERS/@EntryValue">Property, Event</s:String>
<s:Int64 x:Key="/Default/CodeStyle/CodeFormatting/CSharpFormat/BLANK_LINES_BEFORE_BLOCK_STATEMENTS/@EntryValue">1</s:Int64>
<s:String x:Key="/Default/CodeStyle/CodeFormatting/CSharpFormat/INDENT_RAW_LITERAL_STRING/@EntryValue">DO_NOT_CHANGE</s:String>
<s:Boolean x:Key="/Default/CodeStyle/CodeFormatting/CSharpFormat/LINE_FEED_AT_FILE_END/@EntryValue">True</s:Boolean>
<s:String x:Key="/Default/CodeStyle/CodeFormatting/CSharpFormat/OTHER_BRACES/@EntryValue">END_OF_LINE</s:String>
<s:Boolean x:Key="/Default/CodeStyle/CodeFormatting/CSharpFormat/PLACE_WHILE_ON_NEW_LINE/@EntryValue">True</s:Boolean>
Expand Down
2 changes: 2 additions & 0 deletions release-notes/v0.5.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
- Support string+integer concatenation in compiled mode [[#472][472]]
- Support more types in string+int and int+string concatenation [[#473][473]]
- Fix ASCIIString+ASCIIString concatenation to return ASCIIString [[#474][474]]
- Support string+bigint and bigint+string concatenation [[#475][475]]

### Changed
#### Data types
Expand Down Expand Up @@ -61,3 +62,4 @@
[472]: https://github.com/perlang-org/perlang/pull/472
[473]: https://github.com/perlang-org/perlang/pull/473
[474]: https://github.com/perlang-org/perlang/pull/474
[475]: https://github.com/perlang-org/perlang/pull/475
12 changes: 6 additions & 6 deletions src/Perlang.Interpreter/Typing/TypeResolver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -127,36 +127,36 @@ public override VoidObject VisitBinaryExpr(Expr.Binary expr)
}
else if (expr.Operator.Type == TokenType.PLUS &&
(leftTypeReference.ClrType == typeof(Lang.String) &&
new[] { typeof(int), typeof(long), typeof(uint), typeof(ulong) }.Contains(rightTypeReference.ClrType))) {
new[] { typeof(int), typeof(long), typeof(uint), typeof(ulong), typeof(BigInteger) }.Contains(rightTypeReference.ClrType))) {
// "string" + 42
expr.TypeReference.ClrType = typeof(Lang.String);
}
else if (expr.Operator.Type == TokenType.PLUS &&
(leftTypeReference.ClrType == typeof(AsciiString) &&
new[] { typeof(int), typeof(long), typeof(uint), typeof(ulong) }.Contains(rightTypeReference.ClrType))) {
new[] { typeof(int), typeof(long), typeof(uint), typeof(ulong), typeof(BigInteger) }.Contains(rightTypeReference.ClrType))) {
// "string" + 42
expr.TypeReference.ClrType = typeof(AsciiString);
}
else if (expr.Operator.Type == TokenType.PLUS &&
(leftTypeReference.ClrType == typeof(Utf8String) &&
new[] { typeof(int), typeof(long), typeof(uint), typeof(ulong) }.Contains(rightTypeReference.ClrType))) {
new[] { typeof(int), typeof(long), typeof(uint), typeof(ulong), typeof(BigInteger) }.Contains(rightTypeReference.ClrType))) {
// "åäö string" + 42
expr.TypeReference.ClrType = typeof(Utf8String);
}
else if (expr.Operator.Type == TokenType.PLUS &&
(new[] { typeof(int), typeof(long), typeof(uint), typeof(ulong) }.Contains(leftTypeReference.ClrType) &&
(new[] { typeof(int), typeof(long), typeof(uint), typeof(ulong), typeof(BigInteger) }.Contains(leftTypeReference.ClrType) &&
rightTypeReference.ClrType == typeof(Lang.String))) {
// 42 + "string"
expr.TypeReference.ClrType = typeof(Lang.String);
}
else if (expr.Operator.Type == TokenType.PLUS &&
(new[] { typeof(int), typeof(long), typeof(uint), typeof(ulong) }.Contains(leftTypeReference.ClrType) &&
(new[] { typeof(int), typeof(long), typeof(uint), typeof(ulong), typeof(BigInteger) }.Contains(leftTypeReference.ClrType) &&
rightTypeReference.ClrType == typeof(AsciiString))) {
// 42 + "string" + 42
expr.TypeReference.ClrType = typeof(AsciiString);
}
else if (expr.Operator.Type == TokenType.PLUS &&
(new[] { typeof(int), typeof(long), typeof(uint), typeof(ulong) }.Contains(leftTypeReference.ClrType) &&
(new[] { typeof(int), typeof(long), typeof(uint), typeof(ulong), typeof(BigInteger) }.Contains(leftTypeReference.ClrType) &&
rightTypeReference.ClrType == typeof(Utf8String))) {
// 42 + "åäö string"
expr.TypeReference.ClrType = typeof(Utf8String);
Expand Down
26 changes: 19 additions & 7 deletions src/Perlang.Tests.Integration/Operator/Binary/AdditionTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,9 @@ void addition_of_ulong_and_string_coerces_number_to_string()
.Be("18446744073709551615abc");
}

[SkippableFact]
void addition_of_bigint_and_string_coerces_number_to_string()
[Fact]
void addition_of_bigint_and_ascii_string_coerces_number_to_string()
{
Skip.If(PerlangMode.ExperimentalCompilation, "bigint+string is not yet supported in compiled mode");

string source = @"
var i = 18446744073709551616;
var s = ""xyz"";
Expand All @@ -185,6 +183,22 @@ void addition_of_bigint_and_string_coerces_number_to_string()
.Be("18446744073709551616xyz");
}

[Fact]
void addition_of_bigint_and_utf8_string_coerces_number_to_string()
{
string source = """
var i = 18446744073709551616;
var s = "åäöÅÄÖéèüÜÿŸïÏすし";

print i + s;
""";

string result = EvalReturningOutputString(source);

result.Should()
.Be("18446744073709551616åäöÅÄÖéèüÜÿŸïÏすし");
}

[Fact]
void addition_of_string_and_int_coerces_number_to_string()
{
Expand Down Expand Up @@ -249,11 +263,9 @@ void addition_of_string_and_ulong_coerces_number_to_string()
.Be("abc18446744073709551615");
}

[SkippableFact]
[Fact]
void addition_of_string_and_bigint_coerces_number_to_string()
{
Skip.If(PerlangMode.ExperimentalCompilation, "string+bigint is not yet supported in compiled mode");

string source = @"
var s = ""abc"";
var i = 18446744073709551616;
Expand Down
89 changes: 62 additions & 27 deletions src/stdlib/src/ascii_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,47 @@
#include <memory>

#include "ascii_string.h"
#include "bigint.hpp"

namespace perlang
{
std::shared_ptr<const ASCIIString> ASCIIString::from_static_string(const char* s)
std::shared_ptr<const ASCIIString> ASCIIString::from_static_string(const char* str)
{
if (s == nullptr) {
if (str == nullptr) {
throw std::invalid_argument("string argument cannot be null");
}

auto result = new ASCIIString(s, strlen(s), false);
// Cannot use std::make_shared() since it forces the ASCIIString constructor to be made public.
auto result = new ASCIIString(str, strlen(str), false);

return std::shared_ptr<ASCIIString>(result);
}

std::shared_ptr<const ASCIIString> ASCIIString::from_owned_string(const char* s, size_t length)
std::shared_ptr<const ASCIIString> ASCIIString::from_owned_string(const char* str, size_t length)
{
if (s == nullptr) {
throw std::invalid_argument("string argument cannot be null");
if (str == nullptr) {
throw std::invalid_argument("str argument cannot be null");
}

auto result = new ASCIIString(s, length, true);
auto result = new ASCIIString(str, length, true);

return std::shared_ptr<ASCIIString>(result);
}

std::shared_ptr<const ASCIIString> ASCIIString::from_copied_string(const char* str)
{
if (str == nullptr) {
throw std::invalid_argument("str argument cannot be null");
}

// Create a new buffer and copy the string into it. Since we need to know the length anyway, we can use
// memcpy() instead of strcpy() to avoid an extra iteration over the string.
size_t length = strlen(str);
char* new_str = (char*)malloc(length + 1);
memcpy(new_str, str, length);
new_str[length] = '\0';

auto result = new ASCIIString(new_str, length, true);

return std::shared_ptr<ASCIIString>(result);
}
Expand All @@ -39,7 +59,7 @@ namespace perlang
ASCIIString::~ASCIIString()
{
if (owned_) {
delete[] bytes_;
free((void*)bytes_);
}
}

Expand Down Expand Up @@ -78,25 +98,26 @@ namespace perlang
std::shared_ptr<const String> ASCIIString::operator+(const String& rhs) const
{
size_t length = this->length_ + rhs.length();
char *bytes = new char[length + 1];
char *bytes = (char*)malloc(length + 1);

// TODO: This won't work once we bring in UTF16String into the picture.
memcpy((void*)bytes, this->bytes_, this->length_);
memcpy((void*)(bytes + this->length_), rhs.bytes(), rhs.length());
memcpy(bytes, this->bytes_, this->length_);
memcpy((bytes + this->length_), rhs.bytes(), rhs.length());
bytes[length] = '\0';

return from_owned_string(bytes, length);
}

std::shared_ptr<const ASCIIString> ASCIIString::operator+(const ASCIIString& rhs) const
{
// The alternative to copy-paste here would be to use a bunch of casting.
// Copy-paste is a bit ugly, but the alternative would perhaps also not be so pretty, calling the above method
// and doing some semi-ugly casting of the result.

size_t length = this->length_ + rhs.length();
char *bytes = new char[length + 1];
char *bytes = (char*)malloc(length + 1);

memcpy((void*)bytes, this->bytes_, this->length_);
memcpy((void*)(bytes + this->length_), rhs.bytes(), rhs.length());
memcpy(bytes, this->bytes_, this->length_);
memcpy(bytes + this->length_, rhs.bytes(), rhs.length());
bytes[length] = '\0';

return from_owned_string(bytes, length);
Expand All @@ -107,10 +128,10 @@ namespace perlang
std::string str = std::to_string(rhs);

size_t length = str.length() + this->length_;
char *bytes = new char[length + 1];
char *bytes = (char*)malloc(length + 1);

memcpy((void*)bytes, this->bytes_, this->length_);
memcpy((void*)(bytes + this->length_), str.c_str(), str.length());
memcpy(bytes, this->bytes_, this->length_);
memcpy((bytes + this->length_), str.c_str(), str.length());
bytes[length] = '\0';

return from_owned_string(bytes, length);
Expand All @@ -121,10 +142,24 @@ namespace perlang
std::string str = std::to_string(rhs);

size_t length = str.length() + this->length_;
char *bytes = new char[length + 1];
char *bytes = (char*)malloc(length + 1);

memcpy(bytes, this->bytes_, this->length_);
memcpy((bytes + this->length_), str.c_str(), str.length());
bytes[length] = '\0';

return from_owned_string(bytes, length);
}

std::shared_ptr<const String> ASCIIString::operator+(const BigInt& rhs) const
{
std::string str = rhs.to_string();

size_t length = str.length() + this->length_;
char *bytes = (char*)malloc(length + 1);

memcpy((void*)bytes, this->bytes_, this->length_);
memcpy((void*)(bytes + this->length_), str.c_str(), str.length());
memcpy(bytes, this->bytes_, this->length_);
memcpy((bytes + this->length_), str.c_str(), str.length());
bytes[length] = '\0';

return from_owned_string(bytes, length);
Expand All @@ -142,10 +177,10 @@ namespace perlang
{
std::string str = std::to_string(lhs);
size_t length = str.length() + rhs.length();
char *bytes = new char[length + 1];
char *bytes = (char*)malloc(length + 1);

memcpy((void*)bytes, str.c_str(), str.length());
memcpy((void*)(bytes + str.length()), rhs.bytes(), rhs.length());
memcpy(bytes, str.c_str(), str.length());
memcpy((bytes + str.length()), rhs.bytes(), rhs.length());
bytes[length] = '\0';

return ASCIIString::from_owned_string(bytes, length);
Expand All @@ -155,10 +190,10 @@ namespace perlang
{
std::string str = std::to_string(lhs);
size_t length = str.length() + rhs.length();
char *bytes = new char[length + 1];
char *bytes = (char*)malloc(length + 1);

memcpy((void*)bytes, str.c_str(), str.length());
memcpy((void*)(bytes + str.length()), rhs.bytes(), rhs.length());
memcpy(bytes, str.c_str(), str.length());
memcpy((bytes + str.length()), rhs.bytes(), rhs.length());
bytes[length] = '\0';

return ASCIIString::from_owned_string(bytes, length);
Expand Down
27 changes: 20 additions & 7 deletions src/stdlib/src/ascii_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

#include "perlang_string.h"

// Forward declaration to avoid circular dependencies
class BigInt;

namespace perlang
{
// A class for representing immutable ASCII strings.
Expand All @@ -15,22 +18,28 @@ namespace perlang
// running program. We also extend this to presume that the content of the string remains the same during this
// whole lifetime.
//
// Because of the above assumptions, we know that we can make the new ASCIIString constructed from the `s`
// Because of the above assumptions, we know that we can make the new ASCIIString constructed from the `str`
// parameter "borrow" the actual bytes_ used by the string. Since no deallocation will take place, and no
// mutation, copying the string at this point would just waste CPU cycles for no added benefit.
[[nodiscard]]
static std::shared_ptr<const ASCIIString> from_static_string(const char* s);
static std::shared_ptr<const ASCIIString> from_static_string(const char* str);

// Creates a new ASCIIString from an "owned string", like a string that has been allocated on the heap. The
// Creates a new ASCIIString from an "owned str", like a str that has been allocated on the heap. The
// ownership of the memory is transferred to the ASCIIString, which is then responsible for deallocating the
// memory when it is no longer needed (i.e. when no references to it remains).
[[nodiscard]]
static std::shared_ptr<const ASCIIString> from_owned_string(const char* s, size_t length);
static std::shared_ptr<const ASCIIString> from_owned_string(const char* str, size_t length);

// Creates a new ASCIIString from an existing string, by copying its content to a new buffer allocated on the
// heap. The ASCIIString class takes ownership of the newly allocated buffer, which will be deallocated when the
// ASCIIString runs out of scope.
[[nodiscard]]
static std::shared_ptr<const ASCIIString> from_copied_string(const char* str);

private:
// Private constructor for creating a new ASCIIString from a C-style string. The `owned` parameter indicates
// whether the ASCIIString should take ownership of the memory it points to, and thus be responsible for
// deallocating it when it is no longer needed.
// Private constructor for creating a new ASCIIString from a C-style (NUL-terminated) string. The `owned`
// parameter indicates whether the ASCIIString should take ownership of the memory it points to, and thus be
// responsible for deallocating it when it is no longer needed.
ASCIIString(const char* string, size_t length, bool owned);

public:
Expand Down Expand Up @@ -90,6 +99,10 @@ namespace perlang
[[nodiscard]]
std::shared_ptr<const String> operator+(uint64_t rhs) const override;

// Concatenates this string with a BigInt. The memory for the new string is allocated from the heap.
[[nodiscard]]
std::shared_ptr<const String> operator+(const BigInt& rhs) const override;

// Alias for [], which is easier to use from Perlang-generated C++ code in a pointer context.
[[nodiscard]]
char char_at(int index) const;
Expand Down
12 changes: 12 additions & 0 deletions src/stdlib/src/bigint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
// Extracted from bigint.hpp, to avoid "multiple definition" errors when linking.

#include "bigint.hpp"
#include "ascii_string.h"
#include "perlang_string.h"

#include <iostream>
#include <cmath>
Expand Down Expand Up @@ -945,6 +947,16 @@ BigInt BigInt::operator+(const BigInt& num) const
return result;
}

std::shared_ptr<const perlang::String> BigInt::operator+(const perlang::ASCIIString& rhs) const
{
return *perlang::ASCIIString::from_copied_string(to_string().c_str()) + rhs;
}

std::shared_ptr<const perlang::String> BigInt::operator+(const perlang::UTF8String& rhs) const
{
return *perlang::UTF8String::from_copied_string(to_string().c_str()) + rhs;
}

/*
BigInt - BigInt
---------------
Expand Down
Loading

0 comments on commit d4ba0d4

Please sign in to comment.