From 6ba28162cacf408218babf7041141fd72028c429 Mon Sep 17 00:00:00 2001 From: "P. Oscar Boykin" Date: Tue, 26 Nov 2024 16:13:11 -1000 Subject: [PATCH] Implement remaining string and integer methods in runtime (#1276) * Implement string methods used for pattern matching in runtime * try to fix ci in C * try to rename * fix it I think * add string find and integer equality * add more predef implementations * fill out all std-lib --- .github/workflows/ci.yml | 4 +- c_runtime/Makefile | 16 +- c_runtime/bosatsu_ext_Bosatsu_l_Predef.c | 267 +++ c_runtime/bosatsu_ext_Bosatsu_l_Prog.c | 50 + c_runtime/bosatsu_runtime.c | 1773 ++++++++++++++++- c_runtime/bosatsu_runtime.h | 62 +- c_runtime/test.c | 44 +- .../bykn/bosatsu/codegen/clang/ClangGen.scala | 2 +- 8 files changed, 2184 insertions(+), 34 deletions(-) create mode 100644 c_runtime/bosatsu_ext_Bosatsu_l_Predef.c create mode 100644 c_runtime/bosatsu_ext_Bosatsu_l_Prog.c diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9d6a34a8b..8f2912b19 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -121,9 +121,9 @@ jobs: - name: "test runtime code" run: | cd c_runtime + rm -f test make && git diff --quiet - ./test - cd .. + ./test_exe && cd .. - name: "build assembly" run: "sbt \"++${{matrix.scala}}; cli/assembly\"" - name: "generate c code" diff --git a/c_runtime/Makefile b/c_runtime/Makefile index 1a54699fb..d36d6a353 100644 --- a/c_runtime/Makefile +++ b/c_runtime/Makefile @@ -1,4 +1,4 @@ -all: bosatsu_runtime.o test +all: bosatsu_runtime.o test_out bosatsu_ext_Bosatsu_l_Predef.o bosatsu_ext_Bosatsu_l_Prog.o bosatsu_generated.h: typegen.py python3 typegen.py impls > bosatsu_generated.h @@ -10,5 +10,15 @@ bosatsu_runtime.o: bosatsu_runtime.h bosatsu_runtime.c bosatsu_decls_generated.h gcc -c -Wall -Werror bosatsu_runtime.c # this will eventually have test code for the runtime and predef -test: test.c bosatsu_runtime.o - gcc -O3 -Wall -o test test.c bosatsu_runtime.o +test_exe: test.c bosatsu_runtime.o + gcc -O3 -Wall -o test_exe test.c bosatsu_runtime.o + +test_out: test_exe + ./test_exe > output.log 2>&1 || { cat output.log; rm -f output.log; false; } + touch test_out + +bosatsu_ext_Bosatsu_l_Predef.o: bosatsu_ext_Bosatsu_l_Predef.c bosatsu_runtime.o + gcc -Wall -Werror -c bosatsu_ext_Bosatsu_l_Predef.c + +bosatsu_ext_Bosatsu_l_Prog.o: bosatsu_ext_Bosatsu_l_Prog.c bosatsu_runtime.o + gcc -Wall -Werror -c bosatsu_ext_Bosatsu_l_Prog.c \ No newline at end of file diff --git a/c_runtime/bosatsu_ext_Bosatsu_l_Predef.c b/c_runtime/bosatsu_ext_Bosatsu_l_Predef.c new file mode 100644 index 000000000..d0bb63c97 --- /dev/null +++ b/c_runtime/bosatsu_ext_Bosatsu_l_Predef.c @@ -0,0 +1,267 @@ +#include "bosatsu_runtime.h" +#include +#include +#include + +BValue ___bsts_g_Bosatsu_l_Predef_l_add(BValue a, BValue b) { + BValue result = bsts_integer_add(a, b); + release_value(a); + release_value(b); + return result; +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_and__Int(BValue a, BValue b) { + BValue result = bsts_integer_and(a, b); + release_value(a); + release_value(b); + return result; +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_char__to__String(BValue a) { + int codepoint = (int)PURE_VALUE(a); + char bytes[4]; + int len = bsts_string_code_point_to_utf8(codepoint, bytes); + return bsts_string_from_utf8_bytes_owned(len, bytes); +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_cmp__Int(BValue a, BValue b) { + int result = bsts_integer_cmp(a, b); + release_value(a); + release_value(b); + // -1, 0, 1, but we map to 0, 1, 2 which are the adt tags for LT, EQ, GT + return alloc_enum0(result + 1); +} + +// a is a List[String] +BValue ___bsts_g_Bosatsu_l_Predef_l_concat__String(BValue a) { + BValue amut = a; + BValue res; + ENUM_TAG v = get_variant(amut); + if (v == 0) { + // this is the empty list + res = bsts_string_from_utf8_bytes_static(0, NULL); + goto done; + } + // otherwise we have at least one + size_t total_len = 0; + size_t count = 0; + while (v != 0) { + BValue str = get_enum_index(amut, 0); + amut = get_enum_index(amut, 1); + v = get_variant(amut); + total_len += bsts_string_utf8_len(str); + count++; + } + // now we know the total length and count + if (count == 1) { + // this is List(s), just increment the ref count of s and return + res = clone_value(get_enum_index(a, 0)); + } + else { + // we allocate some bytes and copy + char* bytes = malloc(sizeof(char) * total_len); + char* current_pos = bytes; + while (v != 0) { + BValue str = get_enum_index(amut, 0); + size_t str_len = bsts_string_utf8_len(str); + char* str_bytes = bsts_string_utf8_bytes(str); + memcpy(current_pos, str_bytes, str_len); + current_pos += str_len; + amut = get_enum_index(amut, 1); + v = get_variant(amut); + } + res = bsts_string_from_utf8_bytes_owned(total_len, bytes); + } + + done: + release_value(a); + return res; +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_div(BValue a, BValue b) { + // TODO this is non-trivial, just return something wrong + release_value(b); + return a; +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_eq__Int(BValue a, BValue b) { + BValue res = bsts_integer_equals(a, b) ? alloc_enum0(1) : alloc_enum0(0); + release_value(a); + release_value(b); + return res; +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_gcd__Int(BValue a, BValue b) { + // TODO this is non-trivial, just return something wrong + release_value(b); + return a; +} + +/* +this loops until the returned Int is <= 0 or the returned Int is >= intValue +external def int_loop(intValue: Int, state: a, fn: (Int, a) -> (Int, a)) -> a +*/ +BValue ___bsts_g_Bosatsu_l_Predef_l_int__loop(BValue i, BValue a, BValue fn) { + // def int_loop(i, a, fn): + // cont = (0 < i) + // res = a + // _i = i + // _a = a + // while cont: + // res = fn(_i, _a) + // tmp_i = res[0] + // _a = res[1][0] + // cont = (0 < tmp_i) and (tmp_i < _i) + // _i = tmp_i + // return _a + BValue zero = bsts_integer_from_int(0); + int cont = bsts_integer_cmp(zero, i) > 0; + BValue _i = i; + BValue _a = a; + while (cont) { + // we have to keep a ref to _i to compare below + BValue i_clone = clone_value(_i); + // _i and _a are consumed here, so + BValue res = call_fn2(fn, _i, _a); + BValue tmp_i = clone_value(get_struct_index(res, 0)); + _a = clone_value(get_struct_index(res, 1)); + release_value(res); + // we have to be strictly decreasing _i but > 0 + cont = (bsts_integer_cmp(zero, tmp_i) < 0) && (bsts_integer_cmp(tmp_i, i_clone) < 0); + release_value(i_clone); + _i = tmp_i; + } + // all the rest of the values are references + release_value(i); + release_value(a); + release_value(fn); + return _a; +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_int__to__String(BValue a) { + BValue str = bsts_integer_to_string(a); + release_value(a); + return str; +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_mod__Int(BValue a, BValue b) { + // TODO this is non-trivial, just return something wrong + release_value(b); + return a; +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_not__Int(BValue a) { + // ~x = (-1 - x) + return bsts_integer_negate(bsts_integer_add(a, bsts_integer_from_int(1))); +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_or__Int(BValue a, BValue b) { + BValue result = bsts_integer_or(a, b); + release_value(a); + release_value(b); + return result; +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_partition__String(BValue a, BValue b) { + size_t blen = bsts_string_utf8_len(b); + BValue res; + if (blen == 0) { + // the result has to give proper substrings, so here we return None + res = alloc_enum0(0); + goto done; + } + int offset = bsts_string_find(a, b, 0); + if (offset < 0) { + // return None + res = alloc_enum0(0); + goto done; + } + // we return substrings + // Some((x, y)) with x = a[0:offset], y = a[offset + b.len():] + BValue x = bsts_string_substring(a, 0, offset); + BValue y = bsts_string_substring_tail(a, offset + blen); + res = alloc_enum2(1, x, y); + + done: + release_value(a); + release_value(b); + return res; +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_rpartition__String(BValue a, BValue b) { + size_t blen = bsts_string_utf8_len(b); + BValue res; + if (blen == 0) { + // the result has to give proper substrings, so here we return None + res = alloc_enum0(0); + goto done; + } + size_t alen = bsts_string_utf8_len(a); + int offset = bsts_string_rfind(a, b, alen - 1); + if (offset < 0) { + // return None + res = alloc_enum0(0); + goto done; + } + // we return substrings + // Some((x, y)) with x = a[0:offset], y = a[offset + b.len():] + BValue x = bsts_string_substring(a, 0, offset); + BValue y = bsts_string_substring_tail(a, offset + blen); + res = alloc_enum2(1, x, y); + + done: + release_value(a); + release_value(b); + return res; +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_shift__left__Int(BValue a, BValue b) { + BValue res = bsts_integer_shift_left(a, b); + release_value(a); + release_value(b); + return res; +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_shift__right__Int(BValue a, BValue b) { + BValue negb = bsts_integer_negate(b); + BValue res = bsts_integer_shift_left(a, negb); + release_value(a); + release_value(negb); + return res; +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_string__Order__fn(BValue a, BValue b) { + int result = bsts_string_cmp(a, b); + release_value(a); + release_value(b); + // -1, 0, 1, but we map to 0, 1, 2 which are the adt tags for LT, EQ, GT + return alloc_enum0(result + 1); +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_sub(BValue a, BValue b) { + return ___bsts_g_Bosatsu_l_Predef_l_add(a, bsts_integer_negate(b)); +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_times(BValue a, BValue b) { + BValue result = bsts_integer_times(a, b); + release_value(a); + release_value(b); + return result; +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_trace(BValue a, BValue b) { + char* bytes = bsts_string_utf8_bytes(a); + size_t len = bsts_string_utf8_len(a); + // TODO: if this string is somehow too big for an int this may fail + printf("%.*s\n", (int)len, bytes); + release_value(a); + + return b; +} + +BValue ___bsts_g_Bosatsu_l_Predef_l_xor__Int(BValue a, BValue b) { + BValue result = bsts_integer_xor(a, b); + release_value(a); + release_value(b); + return result; +} \ No newline at end of file diff --git a/c_runtime/bosatsu_ext_Bosatsu_l_Prog.c b/c_runtime/bosatsu_ext_Bosatsu_l_Prog.c new file mode 100644 index 000000000..3798f92bb --- /dev/null +++ b/c_runtime/bosatsu_ext_Bosatsu_l_Prog.c @@ -0,0 +1,50 @@ +#include "bosatsu_runtime.h" + +/* +# Prog is an ADT with the following values: +# Pure(a) => (0, a) +# Raise(e) => (1, e) +# FlatMap(p, f) => (2, p, f) +# Recover(p, f) => (3, p, f) +# ApplyFix(a, f) => (4, a, f) +# ReadEnv() => (5, ) +# RemapEnv(f, p) => (6, f, p) +*/ + +BValue ___bsts_g_Bosatsu_l_Prog_l_apply__fix(BValue a, BValue f) { + return alloc_enum2(4, a, f); +} + +BValue ___bsts_g_Bosatsu_l_Prog_l_flat__map(BValue p, BValue f) { + return alloc_enum2(2, p, f); +} + +BValue ___bsts_g_Bosatsu_l_Prog_l_get__args() { + // TODO + return (BValue)1; +} + +BValue ___bsts_g_Bosatsu_l_Prog_l_println(BValue a) { + // TODO + return (BValue)1; +} + +BValue ___bsts_g_Bosatsu_l_Prog_l_pure(BValue a) { + return alloc_enum1(0, a); +} + +BValue ___bsts_g_Bosatsu_l_Prog_l_raise__error(BValue a) { + return alloc_enum1(1, a); +} + +BValue ___bsts_g_Bosatsu_l_Prog_l_read__env() { + return alloc_enum0(5); +} + +BValue ___bsts_g_Bosatsu_l_Prog_l_recover(BValue p, BValue f) { + return alloc_enum2(3, p, f); +} + +BValue ___bsts_g_Bosatsu_l_Prog_l_remap__env(BValue f, BValue p) { + return alloc_enum2(6, f, p); +} \ No newline at end of file diff --git a/c_runtime/bosatsu_runtime.c b/c_runtime/bosatsu_runtime.c index f6be05c70..c4b2d05ba 100644 --- a/c_runtime/bosatsu_runtime.c +++ b/c_runtime/bosatsu_runtime.c @@ -38,6 +38,7 @@ DEFINE_RC_ENUM(Enum0,); DEFINE_RC_STRUCT(External, void* external; FreeFn ex_free;); DEFINE_RC_STRUCT(BSTS_String, size_t len; char* bytes;); +DEFINE_RC_STRUCT(BSTS_Integer, size_t len; _Bool sign; uint32_t* words;); // A general structure for a reference counted memory block // it is always allocated with len BValue array immediately after @@ -128,10 +129,14 @@ void free_external(External* ex) { free(ex); } +void bsts_init_rc(RefCounted* rc, FreeFn free) { + atomic_init(&rc->ref_count, 1); + rc->free = free; +} + BValue alloc_external(void* data, FreeFn free) { External* rc = malloc(sizeof(External)); - atomic_init(&rc->ref_count, 1); - rc->free = (FreeFn)free_external; + bsts_init_rc((RefCounted*)rc, free); rc->external = data; rc->ex_free = free; return (BValue)rc; @@ -162,40 +167,916 @@ BValue bsts_string_from_utf8_bytes_copy(size_t len, char* bytes) { } str->len = len; str->bytes = bytes_copy; - atomic_init(&str->ref_count, 1); - str->free = (FreeFn)free_string; + bsts_init_rc((RefCounted*)str, free_string); return (BValue)str; } +BValue bsts_string_from_utf8_bytes_owned(size_t len, char* bytes) { + BSTS_String* str = malloc(sizeof(BSTS_String)); + str->len = len; + str->bytes = bytes; + bsts_init_rc((RefCounted*)str, free_string); + + return (BValue)str; +} + +BValue bsts_string_from_utf8_bytes_static(size_t len, char* bytes) { + BSTS_String* str = malloc(sizeof(BSTS_String)); + str->len = len; + str->bytes = bytes; + bsts_init_rc((RefCounted*)str, free_static_string); + + return (BValue)str; +} + +int bsts_string_code_point_to_utf8(int code_point, char* output) { + // Validate the code point + if (code_point < 0 || code_point > 0x10FFFF || + (code_point >= 0xD800 && code_point <= 0xDFFF)) { + // Invalid code point + return -1; + } + + if (code_point <= 0x7F) { + // 1-byte sequence (ASCII) + output[0] = (char)code_point; + return 1; + } else if (code_point <= 0x7FF) { + // 2-byte sequence + output[0] = (char)(0xC0 | ((code_point >> 6) & 0x1F)); + output[1] = (char)(0x80 | (code_point & 0x3F)); + return 2; + } else if (code_point <= 0xFFFF) { + // 3-byte sequence + output[0] = (char)(0xE0 | ((code_point >> 12) & 0x0F)); + output[1] = (char)(0x80 | ((code_point >> 6) & 0x3F)); + output[2] = (char)(0x80 | (code_point & 0x3F)); + return 3; + } else if (code_point <= 0x10FFFF) { + // 4-byte sequence + output[0] = (char)(0xF0 | ((code_point >> 18) & 0x07)); + output[1] = (char)(0x80 | ((code_point >> 12) & 0x3F)); + output[2] = (char)(0x80 | ((code_point >> 6) & 0x3F)); + output[3] = (char)(0x80 | (code_point & 0x3F)); + return 4; + } + + // Should not reach here + return -1; +} + _Bool bsts_string_equals(BValue left, BValue right) { + if (left == right) { + return 1; + } + BSTS_String* lstr = (BSTS_String*)left; BSTS_String* rstr = (BSTS_String*)right; - if (lstr->len == rstr->len) { + size_t llen = lstr->len; + if (llen == rstr->len) { return (strncmp( lstr->bytes, rstr->bytes, - lstr->len) == 0); + llen) == 0); } else { return 0; } } +int bsts_string_cmp(BValue left, BValue right) { + if (left == right) { + return 0; + } + + BSTS_String* lstr = (BSTS_String*)left; + BSTS_String* rstr = (BSTS_String*)right; + + size_t llen = lstr->len; + size_t rlen = rstr->len; + size_t min_len = (llen <= rlen) ? llen : rlen; + int cmp = strncmp(lstr->bytes, rstr->bytes, min_len); + + if (cmp == 0) { + return (llen < rlen) ? -1 : ((llen > rlen) ? 1 : 0); + } + else { + return cmp; + } +} + size_t bsts_string_utf8_len(BValue str) { BSTS_String* strptr = (BSTS_String*)str; return strptr->len; } -BValue bsts_string_from_utf8_bytes_static(size_t len, char* bytes) { - BSTS_String* str = malloc(sizeof(BSTS_String)); - str->len = len; - str->bytes = bytes; - atomic_init(&str->ref_count, 1); - str->free = (FreeFn)free_static_string; +char* bsts_string_utf8_bytes(BValue str) { + BSTS_String* strptr = (BSTS_String*)str; + return strptr->bytes; +} - return (BValue)str; +/** + * return the number of bytes at this position, 1, 2, 3, 4 or -1 on error + * TODO: the runtime maybe should assume everything is safe, which the + * compiler should have guaranteed, so doing error checks here is probably + * wasteful once we debug the compiler. + */ +int bsts_string_code_point_bytes(BValue value, int offset) { + BSTS_String* str = (BSTS_String*)value; + if (str == NULL || offset < 0 || offset >= str->len) { + // Invalid input + return -1; + } + + // cast to an unsigned char for the math below + unsigned char *s = (unsigned char*)(str->bytes + offset); + unsigned char c = s[0]; + int remaining = str->len - offset; + int bytes = -1; + + if (c <= 0x7F) { + // 1-byte sequence (ASCII) + bytes = 1; + } else if ((c & 0xE0) == 0xC0) { + // 2-byte sequence + if (remaining < 2 || (s[1] & 0xC0) != 0x80) { + // Invalid continuation byte + bytes = -1; + } + else { + bytes = 2; + } + } else if ((c & 0xF0) == 0xE0) { + // 3-byte sequence + if (remaining < 3 || (s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80) { + // Invalid continuation bytes + bytes = -1; + } + else { + bytes = 3; + } + } else if ((c & 0xF8) == 0xF0) { + // 4-byte sequence + if (remaining < 4 || (s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80) { + // Invalid continuation bytes + bytes = -1; + } + else { + bytes = 4; + } + } else { + // Invalid UTF-8 leading byte + bytes = -1; + } + + // Return the code point value + return bytes; +} + +/** + * return char at the given offset + * TODO: the runtime maybe should assume everything is safe, which the + * compiler should have guaranteed, so doing error checks here is probably + * wasteful once we debug the compiler. + */ +BValue bsts_string_char_at(BValue value, int offset) { + BSTS_String* str = (BSTS_String*)value; + if (str == NULL || offset < 0 || offset >= str->len) { + // Invalid input + return 0; + } + + // cast to an unsigned char for the math below + unsigned char *s = (unsigned char*)(str->bytes + offset); + unsigned char c = s[0]; + int remaining = str->len - offset; + uint32_t code_point = 0; + + if (c <= 0x7F) { + // 1-byte sequence (ASCII) + code_point = c; + } else if ((c & 0xE0) == 0xC0) { + // 2-byte sequence + if (remaining < 2 || (s[1] & 0xC0) != 0x80) { + // Invalid continuation byte + return 0; + } + code_point = ((c & 0x1F) << 6) | (s[1] & 0x3F); + } else if ((c & 0xF0) == 0xE0) { + // 3-byte sequence + if (remaining < 3 || (s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80) { + // Invalid continuation bytes + return 0; + } + code_point = ((c & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); + } else if ((c & 0xF8) == 0xF0) { + // 4-byte sequence + if (remaining < 4 || (s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80) { + // Invalid continuation bytes + return 0; + } + code_point = ((c & 0x07) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F); + } else { + // Invalid UTF-8 leading byte + return 0; + } + + // Return the code point value + return BSTS_TO_CHAR((intptr_t)code_point); +} + +_Bool bsts_rc_value_is_unique(RefCounted* value) { + return atomic_load(&(value->ref_count)) == 1; +} + +// (&string, int, int) -> string +BValue bsts_string_substring(BValue value, int start, int end) { + BSTS_String* str = (BSTS_String*)value; + size_t len = str->len; + if (len < end || end <= start) { + // this is invalid + return 0; + } + size_t new_len = end - start; + if (str->free == free_static_string) { + if (new_len > 0) { + return bsts_string_from_utf8_bytes_static(new_len, str->bytes + start); + } + else { + // empty string, should probably be a constant + return bsts_string_from_utf8_bytes_static(0, ""); + } + } + else { + // ref-counted bytes + // TODO: we could keep track of an offset into the string to optimize + // this case when refcount == 1, which may matter for tail recursion + // taking substrings.... + return bsts_string_from_utf8_bytes_copy(new_len, str->bytes + start); + } +} + +// this takes ownership since it can possibly reuse (if it is a static string, or count is 1) +// (String, int) -> String +BValue bsts_string_substring_tail(BValue value, int byte_offset) { + BSTS_String* str = (BSTS_String*)value; + return bsts_string_substring(str, byte_offset, str->len); +} + +int bsts_string_find(BValue haystack, BValue needle, int start) { + BSTS_String* haystack_str = (BSTS_String*)haystack; + BSTS_String* needle_str = (BSTS_String*)needle; + + size_t haystack_len = haystack_str->len; + size_t needle_len = needle_str->len; + if (needle_len == 0) { + // Empty needle matches at start + return (start <= (int)haystack_len) ? start : -1; + } + + if (start < 0 || start > (int)(haystack_len - needle_len)) { + // Start position is out of bounds + return -1; + } + + + // The maximum valid start index is haystack_len - needle_len + for (size_t i = (size_t)start; i <= haystack_len - needle_len; i++) { + if (haystack_str->bytes[i] == needle_str->bytes[0]) { + // Potential match found, check the rest of the needle + size_t j; + for (j = 1; j < needle_len; j++) { + if (haystack_str->bytes[i + j] != needle_str->bytes[j]) { + break; + } + } + if (j == needle_len) { + // Full match found + return (int)i; + } + } + } + + // No match found + return -1; +} + +int bsts_string_rfind(BValue haystack, BValue needle, int start) { + BSTS_String* haystack_str = (BSTS_String*)haystack; + BSTS_String* needle_str = (BSTS_String*)needle; + + size_t haystack_len = haystack_str->len; + size_t needle_len = needle_str->len; + if (needle_len == 0) { + // Empty needle matches at end + if (haystack_len == 0) { + return 0; + } + return (start < (int)haystack_len) ? start : -1; + } + + if (start < 0 || start > (int)(haystack_len - needle_len)) { + // Start position is out of bounds + return -1; + } + + + // The maximum valid start index is haystack_len - needle_len + for (size_t i = (size_t)start; i <= 0; i--) { + if (haystack_str->bytes[i] == needle_str->bytes[0]) { + // Potential match found, check the rest of the needle + size_t j; + for (j = 1; j < needle_len; j++) { + if (haystack_str->bytes[i + j] != needle_str->bytes[j]) { + break; + } + } + if (j == needle_len) { + // Full match found + return (int)i; + } + } + } + + // No match found + return -1; +} + +// Helper macros and functions +#define IS_SMALL(v) (((uintptr_t)(v)) & 1) +#define GET_SMALL_INT(v) ((intptr_t)((uintptr_t)(v) >> 1)) +#define GET_BIG_INT(v) ((BSTS_Integer*)(v)) + +BValue bsts_integer_from_int(int small_int) { + // chatgpt + uintptr_t value = (((uintptr_t)(intptr_t)small_int) << 1) | 1; + return (BValue)value; +} + +void free_integer(void* integer) { + BSTS_Integer* bint = GET_BIG_INT(integer); + free(bint->words); + free(integer); +} + +BValue bsts_integer_from_words_copy(_Bool is_pos, size_t size, uint32_t* words) { + // chatgpt authored this + BSTS_Integer* integer = (BSTS_Integer*)malloc(sizeof(BSTS_Integer)); + if (integer == NULL) { + // Handle allocation failure + return NULL; + } + + integer->sign = !is_pos; // sign: 0 for positive, 1 for negative + // remove any leading 0 words + while ((size > 1) && (words[size - 1] == 0)) { + size--; + } + integer->len = size; + integer->words = (uint32_t*)malloc(size * sizeof(uint32_t)); + if (integer->words == NULL) { + // Handle allocation failure + free(integer); + return NULL; + } + bsts_init_rc((RefCounted*)integer, free_integer); + memcpy(integer->words, words, size * sizeof(uint32_t)); + return (BValue)integer; // Low bit is 0 since it's a pointer +} + +// Function to check equality between two BValues +_Bool bsts_integer_equals(BValue left, BValue right) { + if (left == right) { return 1; } + + uintptr_t lval = (uintptr_t)left; + uintptr_t rval = (uintptr_t)right; + + _Bool l_is_small = lval & 1; + _Bool r_is_small = rval & 1; + + if (l_is_small && r_is_small) { + // Both are small integers, but they aren't equal + return 0; + } else if (!l_is_small && !r_is_small) { + // Both are BSTS_Integer pointers + BSTS_Integer* l_int = GET_BIG_INT(left); + BSTS_Integer* r_int = GET_BIG_INT(right); + + // Compare sign + if (l_int->sign != r_int->sign) + return 0; + // Compare length + if (l_int->len != r_int->len) + return 0; + // Compare words + for (size_t i = 0; i < l_int->len; ++i) { + if (l_int->words[i] != r_int->words[i]) + return 0; + } + return 1; // All equal + } else { + // One is small integer, one is BSTS_Integer* + // Ensure left is the small integer + if (!l_is_small) { + BValue temp = left; + left = right; + right = temp; + _Bool temp_is_small = l_is_small; + l_is_small = r_is_small; + r_is_small = temp_is_small; + } + + // Extract small integer value + intptr_t small_int_value = GET_SMALL_INT(left); + BSTS_Integer* big_int = GET_BIG_INT(right); + + // Check sign + _Bool big_int_sign = big_int->sign; // 0 for positive, 1 for negative + _Bool small_int_sign = (small_int_value < 0) ? 1 : 0; + if (big_int_sign != small_int_sign) { + return 0; // Different signs + } + + // Compare absolute values + uintptr_t abs_small_int_value = (uintptr_t)(small_int_value < 0 ? -small_int_value : small_int_value); + + // Check if big_int can fit in uintptr_t + size_t bits_in_uintptr_t = sizeof(uintptr_t) * 8; + if (big_int->len * 32 > bits_in_uintptr_t) { + return 0; // big_int is too large + } + + // Reconstruct big integer value + uintptr_t big_int_value = 0; + for (size_t i = 0; i < big_int->len; ++i) { + big_int_value |= ((uintptr_t)big_int->words[i]) << (32 * i); + } + + // Compare values + if (big_int_value != abs_small_int_value) { + return 0; + } + + return 1; // Values are equal + } +} + +int compare_abs(size_t len_a, uint32_t* words_a, size_t len_b, uint32_t* words_b) { + if (len_a > len_b) { + return 1; + } else if (len_a < len_b) { + return -1; + } else { + // Same length, compare from most significant word + for (size_t i = len_a; i > 0; i--) { + uint32_t word_a = words_a[i - 1]; + uint32_t word_b = words_b[i - 1]; + if (word_a > word_b) { + return 1; + } else if (word_a < word_b) { + return -1; + } + } + return 0; + } +} + +BValue bsts_integer_add(BValue l, BValue r) { + _Bool l_is_small = IS_SMALL(l); + _Bool r_is_small = IS_SMALL(r); + + // Case 1: Both are small integers + if (l_is_small && r_is_small) { + intptr_t l_int = GET_SMALL_INT(l); + intptr_t r_int = GET_SMALL_INT(r); + intptr_t result = l_int + r_int; + + // Check for overflow + if ((result > (INTPTR_MAX >> 1)) || (result < (INTPTR_MIN >> 1))) { + // Promote to big integer + _Bool is_positive = result >= 0; + uintptr_t abs_result = (uintptr_t)(result >= 0 ? result : -result); + + size_t word_count = 0; + uintptr_t temp = abs_result; + while (temp > 0) { + temp >>= 32; + word_count++; + } + if (word_count == 0) { + return bsts_integer_from_int(0); + } + uint32_t* words = (uint32_t*)malloc(word_count * sizeof(uint32_t)); + if (words == NULL) { + return NULL; + } + temp = abs_result; + for (size_t i = 0; i < word_count; i++) { + words[i] = (uint32_t)(temp & 0xFFFFFFFF); + temp >>= 32; + } + BValue big_result = bsts_integer_from_words_copy(is_positive, word_count, words); + free(words); + return big_result; + } else { + // Result fits in small integer + return bsts_integer_from_int((int)result); + } + } else { + // At least one operand is a big integer + typedef struct { + _Bool sign; + size_t len; + uint32_t* words; + } Operand; + + Operand left_operand; + Operand right_operand; + + // Process left operand + if (l_is_small) { + intptr_t l_int = GET_SMALL_INT(l); + left_operand.sign = l_int < 0; + uintptr_t abs_l_int = (uintptr_t)(l_int < 0 ? -l_int : l_int); + + size_t l_word_count = (abs_l_int == 0) ? 1 : 0; + uintptr_t temp = abs_l_int; + while (temp > 0) { + temp >>= 32; + l_word_count++; + } + left_operand.len = l_word_count; + left_operand.words = (uint32_t*)calloc(l_word_count, sizeof(uint32_t)); + if (left_operand.words == NULL) { + return NULL; + } + temp = abs_l_int; + for (size_t i = 0; i < l_word_count; i++) { + left_operand.words[i] = (uint32_t)(temp & 0xFFFFFFFF); + temp >>= 32; + } + } else { + BSTS_Integer* l_big = GET_BIG_INT(l); + left_operand.sign = l_big->sign; + left_operand.len = l_big->len; + left_operand.words = (uint32_t*)malloc(l_big->len * sizeof(uint32_t)); + if (left_operand.words == NULL) { + return NULL; + } + memcpy(left_operand.words, l_big->words, l_big->len * sizeof(uint32_t)); + } + + // Process right operand + if (r_is_small) { + intptr_t r_int = GET_SMALL_INT(r); + right_operand.sign = r_int < 0; + uintptr_t abs_r_int = (uintptr_t)(r_int < 0 ? -r_int : r_int); + + size_t r_word_count = (abs_r_int == 0) ? 1 : 0; + uintptr_t temp = abs_r_int; + while (temp > 0) { + temp >>= 32; + r_word_count++; + } + right_operand.len = r_word_count; + right_operand.words = (uint32_t*)calloc(r_word_count, sizeof(uint32_t)); + if (right_operand.words == NULL) { + free(left_operand.words); + return NULL; + } + temp = abs_r_int; + for (size_t i = 0; i < r_word_count; i++) { + right_operand.words[i] = (uint32_t)(temp & 0xFFFFFFFF); + temp >>= 32; + } + } else { + BSTS_Integer* r_big = GET_BIG_INT(r); + right_operand.sign = r_big->sign; + right_operand.len = r_big->len; + right_operand.words = (uint32_t*)malloc(r_big->len * sizeof(uint32_t)); + if (right_operand.words == NULL) { + free(left_operand.words); + return NULL; + } + memcpy(right_operand.words, r_big->words, r_big->len * sizeof(uint32_t)); + } + + BValue result = NULL; + if (left_operand.sign == right_operand.sign) { + // Addition + _Bool result_sign = left_operand.sign; + size_t max_len = (left_operand.len > right_operand.len) ? left_operand.len : right_operand.len; + uint32_t* result_words = (uint32_t*)calloc(max_len + 1, sizeof(uint32_t)); + if (result_words == NULL) { + free(left_operand.words); + free(right_operand.words); + return NULL; + } + + uint64_t carry = 0; + size_t i = 0; + for (; i < max_len; i++) { + uint64_t left_word = (i < left_operand.len) ? left_operand.words[i] : 0; + uint64_t right_word = (i < right_operand.len) ? right_operand.words[i] : 0; + uint64_t sum = left_word + right_word + carry; + result_words[i] = (uint32_t)(sum & 0xFFFFFFFF); + carry = sum >> 32; + } + if (carry) { + result_words[i++] = (uint32_t)carry; + } + size_t result_len = i; + + // Normalize result + while (result_len > 1 && result_words[result_len - 1] == 0) { + result_len--; + } + + // Check for small integer representation + if (result_len == 1) { + intptr_t small_int = (intptr_t)result_words[0]; + if (result_sign) { + small_int = -small_int; + } + result = bsts_integer_from_int((int)small_int); + free(result_words); + } else { + result = bsts_integer_from_words_copy(!result_sign, result_len, result_words); + free(result_words); + } + } else { + // Subtraction + int cmp = compare_abs(left_operand.len, left_operand.words, right_operand.len, right_operand.words); + if (cmp == 0) { + result = bsts_integer_from_int(0); + } else { + Operand* larger; + Operand* smaller; + _Bool result_sign; + if (cmp > 0) { + larger = &left_operand; + smaller = &right_operand; + result_sign = left_operand.sign; + } else { + larger = &right_operand; + smaller = &left_operand; + result_sign = right_operand.sign; + } + + size_t result_len = larger->len; + uint32_t* result_words = (uint32_t*)calloc(result_len, sizeof(uint32_t)); + if (result_words == NULL) { + free(left_operand.words); + free(right_operand.words); + return NULL; + } + + int64_t borrow = 0; + for (size_t i = 0; i < result_len; i++) { + int64_t large_word = (int64_t)larger->words[i]; + int64_t small_word = (i < smaller->len) ? (int64_t)smaller->words[i] : 0; + int64_t diff = large_word - small_word - borrow; + if (diff < 0) { + diff += ((int64_t)1 << 32); + borrow = 1; + } else { + borrow = 0; + } + result_words[i] = (uint32_t)(diff & 0xFFFFFFFF); + } + + // Normalize result + while (result_len > 1 && result_words[result_len - 1] == 0) { + result_len--; + } + + // Check for small integer representation + if (result_len == 1) { + intptr_t small_int = (intptr_t)result_words[0]; + if (result_sign) { + small_int = -small_int; + } + result = bsts_integer_from_int((int)small_int); + free(result_words); + } else { + result = bsts_integer_from_words_copy(!result_sign, result_len, result_words); + free(result_words); + } + } + } + + free(left_operand.words); + free(right_operand.words); + + return result; + } +} + +// Function to negate a BValue +BValue bsts_integer_negate(BValue v) { + if (IS_SMALL(v)) { + intptr_t small_int = GET_SMALL_INT(v); + if (small_int != INTPTR_MIN) { + intptr_t negated_int = -small_int; + return bsts_integer_from_int((int)negated_int); + } else { + // Handle INT_MIN, which cannot be negated in two's complement + uintmax_t abs_value = (uintmax_t)INTPTR_MAX + 1; // Absolute value of INTPTR_MIN + // Determine the number of 32-bit words needed + size_t num_words = 0; + uintmax_t temp = abs_value; + do { + temp >>= 32; + num_words++; + } while (temp != 0); + + uint32_t* words = (uint32_t*)malloc(num_words * sizeof(uint32_t)); + if (words == NULL) { + return NULL; + } + temp = abs_value; + for (size_t i = 0; i < num_words; ++i) { + words[i] = (uint32_t)(temp & 0xFFFFFFFF); + temp >>= 32; + } + // Create a big integer with positive sign + BValue result = bsts_integer_from_words_copy(1, num_words, words); + free(words); + return result; + } + } else { + // Negate big integer + BSTS_Integer* integer = GET_BIG_INT(v); + // Check if the integer is zero + _Bool is_zero = 1; + for (size_t i = 0; i < integer->len; ++i) { + if (integer->words[i] != 0) { + is_zero = 0; + break; + } + } + if (is_zero) { + // Zero remains zero when negated + return bsts_integer_from_int(0); + } + if (bsts_rc_value_is_unique((RefCounted*)integer)) { + // we can reuse the data + _Bool sign = integer->sign; + integer->sign = !sign; + return v; + } + // Create a new big integer with flipped sign + BSTS_Integer* negated_integer = (BSTS_Integer*)malloc(sizeof(BSTS_Integer)); + if (negated_integer == NULL) { + return NULL; + } + negated_integer->len = integer->len; + negated_integer->sign = !integer->sign; // Flip the sign + negated_integer->words = (uint32_t*)malloc(integer->len * sizeof(uint32_t)); + if (negated_integer->words == NULL) { + free(negated_integer); + return NULL; + } + memcpy(negated_integer->words, integer->words, integer->len * sizeof(uint32_t)); + release_value(v); + return (BValue)negated_integer; + } +} + +// Helper function to divide big integer by 10 +uint32_t bigint_divide_by_10(uint32_t* words, size_t len, uint32_t* quotient_words, size_t* quotient_len_ptr) { + uint64_t remainder = 0; + for (size_t i = len; i > 0; i--) { + uint64_t dividend = (remainder << 32) | words[i - 1]; + uint32_t quotient = (uint32_t)(dividend / 10); + remainder = dividend % 10; + quotient_words[i - 1] = quotient; + } + + // Remove leading zeros + size_t quotient_len = len; + while (quotient_len > 0 && quotient_words[quotient_len - 1] == 0) { + quotient_len--; + } + + if (quotient_len_ptr) { + *quotient_len_ptr = quotient_len; + } + + return (uint32_t)remainder; +} + +// &Integer -> String +BValue bsts_integer_to_string(BValue v) { + if (IS_SMALL(v)) { + intptr_t value = GET_SMALL_INT(v); + + // Convert small integer to string + char buffer[32]; // Enough for 64-bit integer + int length = snprintf(buffer, sizeof(buffer), "%ld", value); + + if (length < 0) { + // snprintf error + return NULL; + } + + return bsts_string_from_utf8_bytes_copy(length, buffer); + } else { + // Big integer + BSTS_Integer* bigint = GET_BIG_INT(v); + + // Check for zero + int is_zero = 1; + for (size_t i = 0; i < bigint->len; i++) { + if (bigint->words[i] != 0) { + is_zero = 0; + break; + } + } + if (is_zero) { + // Return "0" + return bsts_string_from_utf8_bytes_static(1, "0"); + } + + // Estimate the maximum number of digits + size_t bits = bigint->len * 32; + size_t max_digits = (size_t)(bits * 0.30103) + 2; // +1 for sign, +1 for safety + + // Allocate array for digits + char* digits = (char*)malloc(max_digits); + if (digits == NULL) { + // Memory allocation error + return NULL; + } + + size_t digit_count = 0; + + // Make a copy of the bigint words + size_t len = bigint->len; + uint32_t* words_copy = (uint32_t*)malloc(len * sizeof(uint32_t)); + if (words_copy == NULL) { + // Memory allocation error + free(digits); + return NULL; + } + memcpy(words_copy, bigint->words, len * sizeof(uint32_t)); + + uint32_t* quotient_words = (uint32_t*)malloc(len * sizeof(uint32_t)); + if (quotient_words == NULL) { + // Memory allocation error + free(digits); + free(words_copy); + return NULL; + } + + // Handle sign + _Bool sign = bigint->sign; + + // Repeatedly divide words_copy by 10 + while (len > 0) { + size_t quotient_len = 0; + uint32_t remainder = bigint_divide_by_10(words_copy, len, quotient_words, "ient_len); + + // Store the remainder as a digit + digits[digit_count++] = '0' + (char)remainder; + + // Prepare for next iteration + len = quotient_len; + uint32_t* temp = words_copy; + words_copy = quotient_words; + quotient_words = temp; + } + + // Free the last quotient_words + free(quotient_words); + + // If negative, add '-' sign + if (sign) { + digits[digit_count++] = '-'; + } + + // Now, reverse the digits to get the correct order + char* data = (char*)malloc(digit_count); + if (data == NULL) { + // Memory allocation error + free(digits); + free(words_copy); + return NULL; + } + + // reverse the data + for (size_t i = 0; i < digit_count; i++) { + data[i] = digits[digit_count - i - 1]; + } + + // Free temporary allocations + free(digits); + free(words_copy); + + return bsts_string_from_utf8_bytes_owned(digit_count, data); + } } // Function to determine the type of the given value pointer and clone if necessary @@ -225,6 +1106,872 @@ static void release_ref_counted(RefCounted *block) { } } +// Function to convert sign-magnitude to two's complement representation +void sign_magnitude_to_twos_complement(_Bool sign, size_t len, uint32_t* words, uint32_t* result_words, size_t result_len) { + if (sign == 0) { + // Positive number + memcpy(result_words, words, len * sizeof(uint32_t)); + for (size_t i = len; i < result_len; i++) { + result_words[i] = 0; + } + } else { + // Negative number + memcpy(result_words, words, len * sizeof(uint32_t)); + for (size_t i = len; i < result_len; i++) { + result_words[i] = 0; + } + // Invert all bits + for (size_t i = 0; i < result_len; i++) { + result_words[i] = ~result_words[i]; + } + // Add 1 + uint64_t carry = 1; + for (size_t i = 0; i < result_len; i++) { + uint64_t sum = (uint64_t)result_words[i] + carry; + result_words[i] = (uint32_t)(sum & 0xFFFFFFFF); + carry = sum >> 32; + if (carry == 0) { + break; + } + } + } +} + +// Function to convert two's complement to sign-magnitude representation +void twos_complement_to_sign_magnitude(size_t len, uint32_t* words, _Bool* sign, size_t* result_len, uint32_t* result_words) { + // Determine sign from the most significant bit + uint32_t msb = words[len - 1]; + if (msb & 0x80000000) { + // Negative number + *sign = 1; + // Take two's complement to get magnitude + uint32_t* temp_words = (uint32_t*)malloc(len * sizeof(uint32_t)); + if (temp_words == NULL) { + *result_len = 0; + return; + } + for (size_t i = 0; i < len; i++) { + temp_words[i] = ~words[i]; + } + // Add 1 + uint64_t carry = 1; + for (size_t i = 0; i < len; i++) { + uint64_t sum = (uint64_t)temp_words[i] + carry; + temp_words[i] = (uint32_t)(sum & 0xFFFFFFFF); + carry = sum >> 32; + if (carry == 0) { + break; + } + } + // Remove leading zeros + size_t mag_len = len; + while (mag_len > 1 && temp_words[mag_len - 1] == 0) { + mag_len--; + } + memcpy(result_words, temp_words, mag_len * sizeof(uint32_t)); + *result_len = mag_len; + free(temp_words); + } else { + // Positive number + *sign = 0; + size_t mag_len = len; + while (mag_len > 1 && words[mag_len - 1] == 0) { + mag_len--; + } + memcpy(result_words, words, mag_len * sizeof(uint32_t)); + *result_len = mag_len; + } +} + +// Function to perform bitwise AND on two BValues +BValue bsts_integer_and(BValue l, BValue r) { + _Bool l_is_small = IS_SMALL(l); + _Bool r_is_small = IS_SMALL(r); + + // Determine maximum length in words + size_t l_len = l_is_small ? sizeof(intptr_t) * 8 / 32 : GET_BIG_INT(l)->len; + size_t r_len = r_is_small ? sizeof(intptr_t) * 8 / 32 : GET_BIG_INT(r)->len; + size_t max_len = (l_len > r_len) ? l_len : r_len; + + // Ensure at least one word + if (max_len == 0) { + max_len = 1; + } + + // Allocate arrays for two's complement representations + uint32_t* l_twos = (uint32_t*)calloc(max_len, sizeof(uint32_t)); + uint32_t* r_twos = (uint32_t*)calloc(max_len, sizeof(uint32_t)); + if (l_twos == NULL || r_twos == NULL) { + free(l_twos); + free(r_twos); + return NULL; + } + + // Convert left operand to two's complement + if (l_is_small) { + intptr_t l_int = GET_SMALL_INT(l); + _Bool l_sign = (l_int < 0) ? 1 : 0; + uintptr_t l_abs = (uintptr_t)(l_sign ? -l_int : l_int); + for (size_t i = 0; i < max_len && l_abs > 0; i++) { + l_twos[i] = (uint32_t)(l_abs & 0xFFFFFFFF); + l_abs >>= 32; + } + if (l_sign) { + // Negative number: invert bits and add 1 + for (size_t i = 0; i < max_len; i++) { + l_twos[i] = ~l_twos[i]; + } + uint64_t carry = 1; + for (size_t i = 0; i < max_len; i++) { + uint64_t sum = (uint64_t)l_twos[i] + carry; + l_twos[i] = (uint32_t)(sum & 0xFFFFFFFF); + carry = sum >> 32; + if (carry == 0) break; + } + } + } else { + BSTS_Integer* l_big = GET_BIG_INT(l); + sign_magnitude_to_twos_complement(l_big->sign, l_big->len, l_big->words, l_twos, max_len); + } + + // Convert right operand to two's complement + if (r_is_small) { + intptr_t r_int = GET_SMALL_INT(r); + _Bool r_sign = (r_int < 0) ? 1 : 0; + uintptr_t r_abs = (uintptr_t)(r_sign ? -r_int : r_int); + for (size_t i = 0; i < max_len && r_abs > 0; i++) { + r_twos[i] = (uint32_t)(r_abs & 0xFFFFFFFF); + r_abs >>= 32; + } + if (r_sign) { + // Negative number: invert bits and add 1 + for (size_t i = 0; i < max_len; i++) { + r_twos[i] = ~r_twos[i]; + } + uint64_t carry = 1; + for (size_t i = 0; i < max_len; i++) { + uint64_t sum = (uint64_t)r_twos[i] + carry; + r_twos[i] = (uint32_t)(sum & 0xFFFFFFFF); + carry = sum >> 32; + if (carry == 0) break; + } + } + } else { + BSTS_Integer* r_big = GET_BIG_INT(r); + sign_magnitude_to_twos_complement(r_big->sign, r_big->len, r_big->words, r_twos, max_len); + } + + // Perform bitwise AND + uint32_t* result_twos = (uint32_t*)malloc(max_len * sizeof(uint32_t)); + if (result_twos == NULL) { + free(l_twos); + free(r_twos); + return NULL; + } + for (size_t i = 0; i < max_len; i++) { + result_twos[i] = l_twos[i] & r_twos[i]; + } + + free(l_twos); + free(r_twos); + + // Convert result from two's complement to sign-magnitude + _Bool result_sign; + size_t result_len = max_len; + uint32_t* result_words = (uint32_t*)malloc(max_len * sizeof(uint32_t)); + if (result_words == NULL) { + free(result_twos); + return NULL; + } + + twos_complement_to_sign_magnitude(max_len, result_twos, &result_sign, &result_len, result_words); + + free(result_twos); + + // Check if result can be represented as small integer + if (result_len * 32 <= sizeof(intptr_t) * 8) { + // Attempt to pack into small integer + intptr_t result_int = 0; + for (size_t i = 0; i < result_len; i++) { + result_int |= ((intptr_t)result_words[i]) << (32 * i); + } + if (result_sign) { + result_int = -result_int; + } + if (result_int <= (INTPTR_MAX >> 1) && result_int >= (INTPTR_MIN >> 1)) { + BValue result = bsts_integer_from_int((int)result_int); + free(result_words); + return result; + } + } + + // Return result as big integer + BValue result = bsts_integer_from_words_copy(!result_sign, result_len, result_words); + free(result_words); + return result; +} + +// Function to multiply two BValues +BValue bsts_integer_times(BValue left, BValue right) { + _Bool left_is_small = IS_SMALL(left); + _Bool right_is_small = IS_SMALL(right); + + if (left_is_small && right_is_small) { + // Both are small integers + intptr_t l_int = GET_SMALL_INT(left); + intptr_t r_int = GET_SMALL_INT(right); + // Multiply and check for overflow + __int128 result = (__int128)l_int * (__int128)r_int; + // Check if result fits in small integer + if (result >= (INTPTR_MIN >> 1) && result <= (INTPTR_MAX >> 1)) { + return bsts_integer_from_int((int)result); + } else { + // Promote to big integer + _Bool is_positive = result >= 0; + __uint128_t abs_result = result >= 0 ? result : -result; + // Convert abs_result to words + size_t word_count = 0; + __uint128_t temp = abs_result; + while (temp > 0) { + temp >>= 32; + word_count++; + } + uint32_t* words = (uint32_t*)malloc(word_count * sizeof(uint32_t)); + if (words == NULL) { + return NULL; + } + temp = abs_result; + for (size_t i = 0; i < word_count; i++) { + words[i] = (uint32_t)(temp & 0xFFFFFFFF); + temp >>= 32; + } + BValue big_result = bsts_integer_from_words_copy(is_positive, word_count, words); + free(words); + return big_result; + } + } else { + // At least one operand is big integer + typedef struct { + _Bool sign; + size_t len; + uint32_t* words; + } Operand; + + Operand l_operand; + Operand r_operand; + + // Prepare left operand + if (left_is_small) { + intptr_t l_int = GET_SMALL_INT(left); + l_operand.sign = l_int < 0; + uintptr_t abs_l_int = (uintptr_t)(l_int < 0 ? -l_int : l_int); + l_operand.len = 0; + uintptr_t temp = abs_l_int; + while (temp > 0) { + temp >>= 32; + l_operand.len++; + } + if (l_operand.len == 0) { + // Zero + return bsts_integer_from_int(0); + } + l_operand.words = (uint32_t*)malloc(l_operand.len * sizeof(uint32_t)); + if (l_operand.words == NULL) { + return NULL; + } + temp = abs_l_int; + for (size_t i = 0; i < l_operand.len; i++) { + l_operand.words[i] = (uint32_t)(temp & 0xFFFFFFFF); + temp >>= 32; + } + } else { + BSTS_Integer* l_big = GET_BIG_INT(left); + l_operand.sign = l_big->sign; + l_operand.len = l_big->len; + l_operand.words = (uint32_t*)malloc(l_operand.len * sizeof(uint32_t)); + if (l_operand.words == NULL) { + return NULL; + } + memcpy(l_operand.words, l_big->words, l_big->len * sizeof(uint32_t)); + } + + // Prepare right operand + if (right_is_small) { + intptr_t r_int = GET_SMALL_INT(right); + r_operand.sign = r_int < 0; + uintptr_t abs_r_int = (uintptr_t)(r_int < 0 ? -r_int : r_int); + r_operand.len = 0; + uintptr_t temp = abs_r_int; + while (temp > 0) { + temp >>= 32; + r_operand.len++; + } + if (r_operand.len == 0) { + // Zero + free(l_operand.words); + return bsts_integer_from_int(0); + } + r_operand.words = (uint32_t*)malloc(r_operand.len * sizeof(uint32_t)); + if (r_operand.words == NULL) { + free(l_operand.words); + return NULL; + } + temp = abs_r_int; + for (size_t i = 0; i < r_operand.len; i++) { + r_operand.words[i] = (uint32_t)(temp & 0xFFFFFFFF); + temp >>= 32; + } + } else { + BSTS_Integer* r_big = GET_BIG_INT(right); + r_operand.sign = r_big->sign; + r_operand.len = r_big->len; + r_operand.words = (uint32_t*)malloc(r_operand.len * sizeof(uint32_t)); + if (r_operand.words == NULL) { + free(l_operand.words); + return NULL; + } + memcpy(r_operand.words, r_big->words, r_big->len * sizeof(uint32_t)); + } + + // Multiply operands + size_t result_len = l_operand.len + r_operand.len; + uint32_t* result_words = (uint32_t*)calloc(result_len, sizeof(uint32_t)); + if (result_words == NULL) { + free(l_operand.words); + free(r_operand.words); + return NULL; + } + + for (size_t i = 0; i < l_operand.len; i++) { + uint64_t carry = 0; + uint64_t a = l_operand.words[i]; + for (size_t j = 0; j < r_operand.len; j++) { + uint64_t b = r_operand.words[j]; + uint64_t sum = (uint64_t)result_words[i + j] + a * b + carry; + result_words[i + j] = (uint32_t)(sum & 0xFFFFFFFF); + carry = sum >> 32; + } + result_words[i + r_operand.len] += (uint32_t)carry; + } + + // Determine sign of result + _Bool result_sign = l_operand.sign != r_operand.sign; + + // Normalize result + while (result_len > 1 && result_words[result_len - 1] == 0) { + result_len--; + } + + // Check if result fits in small integer + if (result_len == 1) { + intptr_t small_result = (intptr_t)result_words[0]; + if (result_sign) { + small_result = -small_result; + } + free(result_words); + free(l_operand.words); + free(r_operand.words); + return bsts_integer_from_int((int)small_result); + } else { + BValue result = bsts_integer_from_words_copy(!result_sign, result_len, result_words); + free(result_words); + free(l_operand.words); + free(r_operand.words); + return result; + } + } +} + +// Function to perform bitwise OR on two BValues +BValue bsts_integer_or(BValue l, BValue r) { + _Bool l_is_small = IS_SMALL(l); + _Bool r_is_small = IS_SMALL(r); + + // Determine maximum length in words + size_t l_len = l_is_small ? sizeof(intptr_t) * 8 / 32 : GET_BIG_INT(l)->len; + size_t r_len = r_is_small ? sizeof(intptr_t) * 8 / 32 : GET_BIG_INT(r)->len; + size_t max_len = (l_len > r_len) ? l_len : r_len; + + // Ensure at least one word + if (max_len == 0) { + max_len = 1; + } + + // Allocate arrays for two's complement representations + uint32_t* l_twos = (uint32_t*)calloc(max_len, sizeof(uint32_t)); + uint32_t* r_twos = (uint32_t*)calloc(max_len, sizeof(uint32_t)); + if (l_twos == NULL || r_twos == NULL) { + free(l_twos); + free(r_twos); + return NULL; + } + + // Convert left operand to two's complement + if (l_is_small) { + intptr_t l_int = GET_SMALL_INT(l); + memcpy(l_twos, &l_int, sizeof(intptr_t)); + } else { + BSTS_Integer* l_big = GET_BIG_INT(l); + sign_magnitude_to_twos_complement(l_big->sign, l_big->len, l_big->words, l_twos, max_len); + } + + // Convert right operand to two's complement + if (r_is_small) { + intptr_t r_int = GET_SMALL_INT(r); + memcpy(r_twos, &r_int, sizeof(intptr_t)); + } else { + BSTS_Integer* r_big = GET_BIG_INT(r); + sign_magnitude_to_twos_complement(r_big->sign, r_big->len, r_big->words, r_twos, max_len); + } + + // Perform bitwise OR + uint32_t* result_twos = (uint32_t*)malloc(max_len * sizeof(uint32_t)); + if (result_twos == NULL) { + free(l_twos); + free(r_twos); + return NULL; + } + for (size_t i = 0; i < max_len; i++) { + result_twos[i] = l_twos[i] | r_twos[i]; + } + + free(l_twos); + free(r_twos); + + // Convert result from two's complement to sign-magnitude + _Bool result_sign; + size_t result_len = max_len; + uint32_t* result_words = (uint32_t*)malloc(max_len * sizeof(uint32_t)); + if (result_words == NULL) { + free(result_twos); + return NULL; + } + + twos_complement_to_sign_magnitude(max_len, result_twos, &result_sign, &result_len, result_words); + + free(result_twos); + + // Check if result can be represented as small integer + if (result_len * 32 <= sizeof(intptr_t) * 8) { + // Attempt to pack into small integer + intptr_t result_int = 0; + for (size_t i = 0; i < result_len; i++) { + result_int |= ((intptr_t)result_words[i]) << (32 * i); + } + if (result_sign) { + result_int = -result_int; + } + if (result_int <= (INTPTR_MAX >> 1) && result_int >= (INTPTR_MIN >> 1)) { + BValue result = bsts_integer_from_int((int)result_int); + free(result_words); + return result; + } + } + + // Return result as big integer + BValue result = bsts_integer_from_words_copy(!result_sign, result_len, result_words); + free(result_words); + return result; +} + +// Function to perform bitwise XOR on two BValues +BValue bsts_integer_xor(BValue l, BValue r) { + _Bool l_is_small = IS_SMALL(l); + _Bool r_is_small = IS_SMALL(r); + + // Determine maximum length in words + size_t l_len = l_is_small ? sizeof(intptr_t) * 8 / 32 : GET_BIG_INT(l)->len; + size_t r_len = r_is_small ? sizeof(intptr_t) * 8 / 32 : GET_BIG_INT(r)->len; + size_t max_len = (l_len > r_len) ? l_len : r_len; + + // Ensure at least one word + if (max_len == 0) { + max_len = 1; + } + + // Allocate arrays for two's complement representations + uint32_t* l_twos = (uint32_t*)calloc(max_len, sizeof(uint32_t)); + uint32_t* r_twos = (uint32_t*)calloc(max_len, sizeof(uint32_t)); + if (l_twos == NULL || r_twos == NULL) { + free(l_twos); + free(r_twos); + return NULL; + } + + // Convert left operand to two's complement + if (l_is_small) { + intptr_t l_int = GET_SMALL_INT(l); + memcpy(l_twos, &l_int, sizeof(intptr_t)); + } else { + BSTS_Integer* l_big = GET_BIG_INT(l); + sign_magnitude_to_twos_complement(l_big->sign, l_big->len, l_big->words, l_twos, max_len); + } + + // Convert right operand to two's complement + if (r_is_small) { + intptr_t r_int = GET_SMALL_INT(r); + memcpy(r_twos, &r_int, sizeof(intptr_t)); + } else { + BSTS_Integer* r_big = GET_BIG_INT(r); + sign_magnitude_to_twos_complement(r_big->sign, r_big->len, r_big->words, r_twos, max_len); + } + + // Perform bitwise XOR + uint32_t* result_twos = (uint32_t*)malloc(max_len * sizeof(uint32_t)); + if (result_twos == NULL) { + free(l_twos); + free(r_twos); + return NULL; + } + for (size_t i = 0; i < max_len; i++) { + result_twos[i] = l_twos[i] ^ r_twos[i]; + } + + free(l_twos); + free(r_twos); + + // Convert result from two's complement to sign-magnitude + _Bool result_sign; + size_t result_len = max_len; + uint32_t* result_words = (uint32_t*)malloc(max_len * sizeof(uint32_t)); + if (result_words == NULL) { + free(result_twos); + return NULL; + } + + twos_complement_to_sign_magnitude(max_len, result_twos, &result_sign, &result_len, result_words); + + free(result_twos); + + // Check if result can be represented as small integer + if (result_len * 32 <= sizeof(intptr_t) * 8) { + // Attempt to pack into small integer + intptr_t result_int = 0; + for (size_t i = 0; i < result_len; i++) { + result_int |= ((intptr_t)result_words[i]) << (32 * i); + } + if (result_sign) { + result_int = -result_int; + } + if (result_int <= (INTPTR_MAX >> 1) && result_int >= (INTPTR_MIN >> 1)) { + BValue result = bsts_integer_from_int((int)result_int); + free(result_words); + return result; + } + } + + // Return result as big integer + BValue result = bsts_integer_from_words_copy(!result_sign, result_len, result_words); + free(result_words); + return result; +} + +// Function to compare two BValues +// (&Integer, &Integer) -> Integer +int bsts_integer_cmp(BValue l, BValue r) { + _Bool l_is_small = IS_SMALL(l); + _Bool r_is_small = IS_SMALL(r); + + if (l_is_small && r_is_small) { + // Both are small integers + intptr_t l_int = GET_SMALL_INT(l); + intptr_t r_int = GET_SMALL_INT(r); + if (l_int < r_int) return -1; + else if (l_int > r_int) return 1; + else return 0; + } else if (!l_is_small && !r_is_small) { + // Both are big integers + BSTS_Integer* l_big = GET_BIG_INT(l); + BSTS_Integer* r_big = GET_BIG_INT(r); + + // Compare signs + if (l_big->sign != r_big->sign) { + // If signs differ, the positive one is greater + return l_big->sign ? -1 : 1; + } + + // Signs are the same, compare magnitudes + if (l_big->len != r_big->len) { + if (l_big->len > r_big->len) + return l_big->sign ? -1 : 1; + else + return l_big->sign ? 1 : -1; + } else { + // Same length, compare words from most significant to least significant + for (size_t i = l_big->len; i > 0; i--) { + uint32_t l_word = l_big->words[i - 1]; + uint32_t r_word = r_big->words[i - 1]; + if (l_word != r_word) { + if (l_word > r_word) + return l_big->sign ? -1 : 1; + else + return l_big->sign ? 1 : -1; + } + } + // All words are equal + return 0; + } + } else { + // One is small, one is big + // Ensure 'l' is the big integer + if (l_is_small) { + // Swap 'l' and 'r' + BValue temp = l; + l = r; + r = temp; + _Bool temp_is_small = l_is_small; + l_is_small = r_is_small; + r_is_small = temp_is_small; + + // Negate the result + int cmp = bsts_integer_cmp(l, r); + return -cmp; + } + + // Now 'l' is big, 'r' is small + intptr_t r_int = GET_SMALL_INT(r); + BSTS_Integer* l_big = GET_BIG_INT(l); + + // Compare signs + _Bool l_sign = l_big->sign; + _Bool r_sign = (r_int < 0) ? 1 : 0; + + if (l_sign != r_sign) { + return l_sign ? -1 : 1; + } + + // Signs are the same, compare magnitudes + uintptr_t r_abs = (uintptr_t)(r_int < 0 ? -r_int : r_int); + + // Calculate number of words in 'r_abs' + size_t r_abs_len = 0; + uintptr_t temp_r_abs = r_abs; + do { + temp_r_abs >>= 32; + r_abs_len++; + } while (temp_r_abs > 0); + + if (l_big->len != r_abs_len) { + if (l_big->len > r_abs_len) + return l_sign ? -1 : 1; + else + return l_sign ? 1 : -1; + } else { + // Lengths are equal, compare words + uint32_t r_words[r_abs_len]; + uintptr_t temp = r_abs; + for (size_t i = 0; i < r_abs_len; i++) { + r_words[i] = (uint32_t)(temp & 0xFFFFFFFF); + temp >>= 32; + } + // Compare words from most significant to least significant + for (size_t i = l_big->len; i > 0; i--) { + uint32_t l_word = l_big->words[i - 1]; + uint32_t r_word = r_words[i - 1]; + if (l_word != r_word) { + if (l_word > r_word) + return l_sign ? -1 : 1; + else + return l_sign ? 1 : -1; + } + } + // All words are equal + return 0; + } + } +} + +// Function to shift a BValue left or right +BValue bsts_integer_shift_left(BValue l, BValue r) { + // Check if r is a small integer + if (!IS_SMALL(r)) { + // r is not a small integer, return NULL + return NULL; + } + + // Get the shift amount + intptr_t shift_amount = GET_SMALL_INT(r); + + // If shift_amount is zero, return l as is + if (shift_amount == 0) { + return l; + } + + // Determine direction of shift + _Bool shift_left = shift_amount > 0; + intptr_t shift_abs = shift_left ? shift_amount : -shift_amount; + + // Prepare the operand (l) + typedef struct { + _Bool sign; + size_t len; + uint32_t* words; + } Operand; + + Operand operand; + + // Convert l to Operand + if (IS_SMALL(l)) { + intptr_t l_int = GET_SMALL_INT(l); + operand.sign = l_int < 0; + uintptr_t abs_l_int = (uintptr_t)(l_int < 0 ? -l_int : l_int); + + // Convert abs_l_int to words + size_t word_count = 0; + uintptr_t temp = abs_l_int; + do { + temp >>= 32; + word_count++; + } while (temp != 0); + + operand.len = word_count; + operand.words = (uint32_t*)calloc(word_count, sizeof(uint32_t)); + if (operand.words == NULL) { + return NULL; + } + temp = abs_l_int; + for (size_t i = 0; i < word_count; i++) { + operand.words[i] = (uint32_t)(temp & 0xFFFFFFFF); + temp >>= 32; + } + } else { + BSTS_Integer* l_big = GET_BIG_INT(l); + operand.sign = l_big->sign; + operand.len = l_big->len; + operand.words = (uint32_t*)malloc(operand.len * sizeof(uint32_t)); + if (operand.words == NULL) { + return NULL; + } + memcpy(operand.words, l_big->words, operand.len * sizeof(uint32_t)); + } + + // Perform shifting on operand.words + if (shift_left) { + // Left shift + size_t word_shift = shift_abs / 32; + size_t bit_shift = shift_abs % 32; + + size_t new_len = operand.len + word_shift + 1; // +1 for possible carry + uint32_t* new_words = (uint32_t*)calloc(new_len, sizeof(uint32_t)); + if (new_words == NULL) { + free(operand.words); + return NULL; + } + + // Shift bits + uint64_t carry = 0; + for (size_t i = 0; i < operand.len; i++) { + uint64_t shifted = ((uint64_t)operand.words[i] << bit_shift) | carry; + new_words[i + word_shift] = (uint32_t)(shifted & 0xFFFFFFFF); + carry = shifted >> 32; + } + if (carry != 0) { + new_words[operand.len + word_shift] = (uint32_t)carry; + } + + // Remove leading zeros + size_t result_len = new_len; + while (result_len > 1 && new_words[result_len - 1] == 0) { + result_len--; + } + + // Check if result fits in small integer + if (result_len == 1) { + intptr_t result_int = (intptr_t)new_words[0]; + if (operand.sign) { + result_int = -result_int; + } + free(new_words); + free(operand.words); + return bsts_integer_from_int((int)result_int); + } else { + // Create new big integer + BSTS_Integer* result = (BSTS_Integer*)malloc(sizeof(BSTS_Integer)); + if (result == NULL) { + free(new_words); + free(operand.words); + return NULL; + } + result->len = result_len; + result->sign = operand.sign; + result->words = new_words; + + free(operand.words); + return (BValue)result; + } + } else { + // Right shift + size_t word_shift = shift_abs / 32; + size_t bit_shift = shift_abs % 32; + + if (word_shift >= operand.len) { + // All bits are shifted out + if (operand.sign) { + // Negative number, result is -1 + free(operand.words); + return bsts_integer_from_int(-1); + } else { + // Positive number, result is 0 + free(operand.words); + return bsts_integer_from_int(0); + } + } + + size_t new_len = operand.len - word_shift; + uint32_t* new_words = (uint32_t*)calloc(new_len, sizeof(uint32_t)); + if (new_words == NULL) { + free(operand.words); + return NULL; + } + + uint32_t sign_extension = operand.sign ? 0xFFFFFFFF : 0x00000000; + + for (size_t i = 0; i < new_len; i++) { + uint64_t high = (i + word_shift + 1 < operand.len) ? operand.words[i + word_shift + 1] : sign_extension; + uint64_t low = operand.words[i + word_shift]; + uint64_t combined = (high << 32) | low; + new_words[i] = (uint32_t)((combined >> bit_shift) & 0xFFFFFFFF); + } + + // Remove leading redundant words + size_t result_len = new_len; + if (operand.sign) { + while (result_len > 1 && new_words[result_len - 1] == 0xFFFFFFFF) { + result_len--; + } + } else { + while (result_len > 1 && new_words[result_len - 1] == 0) { + result_len--; + } + } + + // Check if result fits in small integer + if (result_len == 1) { + intptr_t result_int = (intptr_t)new_words[0]; + if (operand.sign) { + result_int = -result_int; + } + free(new_words); + free(operand.words); + return bsts_integer_from_int((int)result_int); + } else { + // Create new big integer + BSTS_Integer* result = (BSTS_Integer*)malloc(sizeof(BSTS_Integer)); + if (result == NULL) { + free(new_words); + free(operand.words); + return NULL; + } + result->len = result_len; + result->sign = operand.sign; + result->words = new_words; + + free(operand.words); + return (BValue)result; + } + } +} + void free_statics() { RefCounted* rc; do { diff --git a/c_runtime/bosatsu_runtime.h b/c_runtime/bosatsu_runtime.h index 26c6e8e99..b1f02363b 100644 --- a/c_runtime/bosatsu_runtime.h +++ b/c_runtime/bosatsu_runtime.h @@ -78,45 +78,83 @@ typedef void (*FreeFn)(void*); typedef BValue (*BConstruct)(); // Function to determine the type of the given value pointer and clone if necessary +// &BValue -> BValue BValue clone_value(BValue value); +// BValue -> () void release_value(BValue value); +// (&BValue, int) -> &BValue BValue get_struct_index(BValue v, int idx); +// &BValue -> Tag ENUM_TAG get_variant(BValue v); +// (&BValue, int) -> &BValue BValue get_enum_index(BValue v, int idx); + // This one is not auto generated because it can always be fit into the BValue directly BValue alloc_enum0(ENUM_TAG tag); BValue bsts_string_from_utf8_bytes_copy(size_t len, char* bytes); +BValue bsts_string_from_utf8_bytes_owned(size_t len, char* bytes); BValue bsts_string_from_utf8_bytes_static(size_t len, char* bytes); +/* + * write the codepoint into bytes, which must be >= 4 in length + * and return the number of bytes written + */ +int bsts_string_code_point_to_utf8(int codepoint, char* bytes); +// (&String, &String) -> Bool _Bool bsts_string_equals(BValue left, BValue right); -// string -> int (lenght in bytes) +// (&String, &String) -> int +int bsts_string_cmp(BValue left, BValue right); +// &String -> int (length in bytes) size_t bsts_string_utf8_len(BValue); +char* bsts_string_utf8_bytes(BValue); -// (string, int) -> int +// How many bytes is the codepoint at this offset, 1, 2, 3, 4, or -1 on error +// (&String, int) -> int int bsts_string_code_point_bytes(BValue, int offset); -// (string, int) -> char +// (&String, int) -> char BValue bsts_string_char_at(BValue, int); -// (string, int) -> string -BValue bsts_string_substring_tail(BValue, int start); - -// (string, int, int) -> string +// (&string, int, int) -> string BValue bsts_string_substring(BValue, int start, int end); +// (&String, int) -> String +BValue bsts_string_substring_tail(BValue, int byte_offset); + // return -1 if the needle isn't in the haystack, else the offset >= byteOffset it was found -// (string, string, int) -> int +// (&string, string, int) -> int int bsts_string_find(BValue haystack, BValue needle, int start); - -// basically python src.startswith(expected, _) but with utf8 byte offsets -// (string, int, string) -> _Bool -_Bool bsts_string_matches_at(BValue src, int start, BValue expected); +/* + * search from right to left. + * return -1 if the needle isn't in the haystack, else the offset >= byteOffset it was found + * (&string, string, int) -> int + */ +int bsts_string_rfind(BValue haystack, BValue needle, int start); BValue bsts_integer_from_int(int small_int); BValue bsts_integer_from_words_copy(_Bool is_pos, size_t size, uint32_t* words); _Bool bsts_integer_equals(BValue left, BValue right); +// (&Integer, &Integer) -> Integer +BValue bsts_integer_add(BValue left, BValue right); +// (&Integer, &Integer) -> Integer +BValue bsts_integer_times(BValue left, BValue right); +// (&Integer, &Integer) -> Integer +BValue bsts_integer_or(BValue left, BValue right); +// (&Integer, &Integer) -> Integer +BValue bsts_integer_xor(BValue left, BValue right); +// (&Integer, &Integer) -> Integer +BValue bsts_integer_and(BValue l, BValue r); +// (&Integer, &Integer) -> Integer +BValue bsts_integer_shift_left(BValue l, BValue r); +// (&Integer, &Integer) -> int +int bsts_integer_cmp(BValue l, BValue r); +// return the negative of this +// Integer -> Integer +BValue bsts_integer_negate(BValue v); +// &Integer -> String +BValue bsts_integer_to_string(BValue v); BValue alloc_external(void* eval, FreeFn free_fn); void* get_external(BValue v); diff --git a/c_runtime/test.c b/c_runtime/test.c index 119663c18..0a4c734b7 100644 --- a/c_runtime/test.c +++ b/c_runtime/test.c @@ -9,11 +9,14 @@ void assert(_Bool cond, char* message) { } } -int main(int argc, char** argv) { +void test_runtime_enum_struct() { BValue s1 = alloc_struct2(alloc_enum0(0), alloc_enum0(1)); assert(get_variant(get_struct_index(s1, 0)) == 0, "index0 == alloc_enum0"); assert(get_variant(get_struct_index(s1, 1)) == 1, "index0 == alloc_enum0(1)"); release_value(s1); +} + +void test_runtime_strings() { char* hello = "hello1"; @@ -23,9 +26,44 @@ int main(int argc, char** argv) { assert(bsts_string_equals(v1, v2), "v1 == v2"); assert(bsts_string_equals(v1, v1), "v1 == v1"); assert(bsts_string_equals(v2, v2), "v2 == v2"); - release_value(v1); - release_value(v2); + //codepoint tests + assert(bsts_string_code_point_bytes(v1, 0) == 1, "code_point_bytes(v[0]) == 1"); + assert(bsts_string_char_at(v1, 0) == BSTS_TO_CHAR(104), "char_at(v, 0) == 104"); + assert(bsts_string_char_at(v1, 1) == BSTS_TO_CHAR(101), "char_at(v, 1) == 101"); + BValue v3 = bsts_string_from_utf8_bytes_static(4, "\x00F0\x009F\x0098\x008A"); + assert(bsts_string_char_at(v3, 0) == BSTS_TO_CHAR(0x1F60A), "smiley check char"); + assert(bsts_string_code_point_bytes(v3, 0) == 4, "smiley length"); + + BValue v1tail = bsts_string_substring_tail(v1, 1); + BValue v2tail = bsts_string_substring_tail(v2, 1); + BValue tail_expected = bsts_string_from_utf8_bytes_static(4, "ello"); + assert(bsts_string_equals(v1tail, v2tail), "v1tail == v2tail"); + assert(bsts_string_equals(v1tail, tail_expected), "v1tail == expected"); + + release_value(v1tail); + release_value(v2tail); + release_value(tail_expected); + release_value(v3); + + { + BValue hello_world1 = bsts_string_from_utf8_bytes_static(11, "hello world"); + BValue hello1 = bsts_string_from_utf8_bytes_static(5, "world"); + int find1 = bsts_string_find(hello_world1, hello1, 0); + assert(find1 == 6, "find1"); + int find2 = bsts_string_find(hello_world1, hello1, 1); + assert(find2 == 6, "find2"); + int find3 = bsts_string_find(hello_world1, hello1, 7); + assert(find3 == -1, "find3"); + release_value(hello_world1); + release_value(hello1); + } + +} + +int main(int argc, char** argv) { + test_runtime_enum_struct(); + test_runtime_strings(); printf("success\n"); return 0; } \ No newline at end of file diff --git a/core/src/main/scala/org/bykn/bosatsu/codegen/clang/ClangGen.scala b/core/src/main/scala/org/bykn/bosatsu/codegen/clang/ClangGen.scala index 58eec9f6b..dd2d3197d 100644 --- a/core/src/main/scala/org/bykn/bosatsu/codegen/clang/ClangGen.scala +++ b/core/src/main/scala/org/bykn/bosatsu/codegen/clang/ClangGen.scala @@ -434,7 +434,7 @@ object ClangGen { // basically python src.startswith(expected, _) but with utf8 byte offsets // (string, int, string) -> _Bool def matchesAt(src: Expression, byteOffset: Expression, expected: Expression): Expression = - fn("matches_at")(src, byteOffset, expected) + find(src, expected, byteOffset) =:= byteOffset def staticString(s: String): T[Code.StrLiteral] = { // convert to utf8 and then to a literal array of bytes