From 65bea8a4d8c33081904fdd6ce30cc4051597ad49 Mon Sep 17 00:00:00 2001
From: Shane Liesegang <shane@techie.net>
Date: Mon, 28 Oct 2024 08:29:32 +0800
Subject: [PATCH] first draft zig generator; only deserializes

---
 beschi/cli.py                      |   3 +-
 beschi/writers/boilerplate/Zig.zig |  76 +++++++++++
 beschi/writers/c.py                |   8 +-
 beschi/writers/csharp.py           |   2 +-
 beschi/writers/go.py               |   2 +-
 beschi/writers/rust.py             |   2 +-
 beschi/writers/swift.py            |   2 +-
 beschi/writers/typescript.py       |   2 +-
 beschi/writers/zig.py              | 212 +++++++++++++++++++++++++++++
 docs/dev/todo.md                   |  12 +-
 docs/languages/c.md                |   1 +
 11 files changed, 310 insertions(+), 12 deletions(-)
 create mode 100644 beschi/writers/boilerplate/Zig.zig
 create mode 100644 beschi/writers/zig.py

diff --git a/beschi/cli.py b/beschi/cli.py
index 2a984da..fa58aeb 100644
--- a/beschi/cli.py
+++ b/beschi/cli.py
@@ -1,5 +1,6 @@
 import sys
 import argparse
+import traceback
 
 from .protocol import Protocol
 from .writers import all_writers, experimental_writers
@@ -74,7 +75,7 @@ def main():
     try:
         output = writer.generate()
     except NotImplementedError as nie:
-        sys.stderr.write(f"{nie}\n")
+        sys.stderr.write(f"{traceback.format_exc()}\n")
         sys.exit(1)
 
     if args.output == None:
diff --git a/beschi/writers/boilerplate/Zig.zig b/beschi/writers/boilerplate/Zig.zig
new file mode 100644
index 0000000..e0225b8
--- /dev/null
+++ b/beschi/writers/boilerplate/Zig.zig
@@ -0,0 +1,76 @@
+fn _numberTypeIsValid(comptime T: type) bool {
+    const validNumericTypes = [_]type{
+        u8,  i8,
+        u16, i16,
+        u32, i32,
+        u64, i64,
+        f32, f64,
+    };
+    for (validNumericTypes) |vt| {
+        if (T == vt) {
+            return true;
+        }
+    }
+    return false;
+}
+
+pub fn readNumber(comptime T: type, offset: usize, buffer: []u8) struct { value: T, bytes_read: usize } {
+    comptime {
+        if (!_numberTypeIsValid(T)) {
+            @compileError("Invalid number type");
+        }
+    }
+
+    switch (T) {
+        f32 => return .{ .value = @bitCast(std.mem.readInt(u32, buffer[offset..][0..@sizeOf(T)], .little)), .bytes_read = @sizeOf(T) },
+        f64 => return .{ .value = @bitCast(std.mem.readInt(u64, buffer[offset..][0..@sizeOf(T)], .little)), .bytes_read = @sizeOf(T) },
+        else => return .{ .value = std.mem.readInt(T, buffer[offset..][0..@sizeOf(T)], .little), .bytes_read = @sizeOf(T) },
+    }
+}
+
+pub fn readString(allocator: std.mem.Allocator, offset: usize, buffer: []u8) !struct { value: []u8, bytes_read: usize } {
+    const len_read = readNumber({# STRING_SIZE_TYPE #}, offset, buffer);
+    const len = len_read.value;
+    var str = try allocator.alloc(u8, len);
+    for (0..len) |i| {
+        str[i] = buffer[offset + len_read.bytes_read + i];
+    }
+    return .{ .value = str, .bytes_read = @sizeOf({# STRING_SIZE_TYPE #}) + len };
+}
+
+pub fn readList(comptime T: type, allocator: std.mem.Allocator, offset: usize, buffer: []u8) !struct { value: []T, bytes_read: usize } {
+    var local_offset = offset;
+    const len_read = readNumber({# LIST_SIZE_TYPE #}, local_offset, buffer);
+    const len = len_read.value;
+    local_offset += len_read.bytes_read;
+    var list = try allocator.alloc(T, len);
+
+    for (0..len) |i| {
+        if (comptime _numberTypeIsValid(T)) {
+            const list_read = readNumber(T, local_offset, buffer);
+            list[i] = list_read.value;
+            local_offset += list_read.bytes_read;
+        } else {
+            switch (T) {
+                []u8 => {
+                    const list_read = try readString(allocator, local_offset, buffer);
+                    list[i] = list_read.value;
+                    local_offset += list_read.bytes_read;
+                },
+                else => {
+                    if (comptime _typeIsSimple(T)) {
+                        const list_read = try T.fromBytes(local_offset, buffer);
+                        list[i] = list_read.value;
+                        local_offset += list_read.bytes_read;
+                    }
+                    else {
+                        const list_read = try T.fromBytes(allocator, local_offset, buffer);
+                        list[i] = list_read.value;
+                        local_offset += list_read.bytes_read;
+                    }
+                },
+            }
+        }
+    }
+    return .{ .value = list, .bytes_read = local_offset - offset };
+}
diff --git a/beschi/writers/c.py b/beschi/writers/c.py
index 5649f5c..68ca7d0 100644
--- a/beschi/writers/c.py
+++ b/beschi/writers/c.py
@@ -309,9 +309,11 @@ def gen_implementation(self, sname: str, sdata: Struct):
 
     def generate(self) -> str:
         self.output = []
-        self.write_line(f"// This file was automatically generated by {LIB_NAME} v{LIB_VERSION}")
-        self.write_line( "// <https://github.com/sjml/beschi>")
-        self.write_line(f"// Do not edit directly.")
+        self.write_line( "/*")
+        self.write_line(f"   This file was automatically generated by {LIB_NAME} v{LIB_VERSION}")
+        self.write_line( "   <https://github.com/sjml/beschi>")
+        self.write_line( "   Do not edit directly.")
+        self.write_line( "*/")
         self.write_line()
 
         if self.embed_protocol:
diff --git a/beschi/writers/csharp.py b/beschi/writers/csharp.py
index eecd061..8b37ae1 100644
--- a/beschi/writers/csharp.py
+++ b/beschi/writers/csharp.py
@@ -212,7 +212,7 @@ def generate(self) -> str:
 
         self.write_line(f"// This file was automatically generated by {LIB_NAME} v{LIB_VERSION}")
         self.write_line( "// <https://github.com/sjml/beschi>")
-        self.write_line(f"// Do not edit directly.")
+        self.write_line( "// Do not edit directly.")
         self.write_line()
 
         if self.embed_protocol:
diff --git a/beschi/writers/go.py b/beschi/writers/go.py
index 318bc49..bf0025d 100644
--- a/beschi/writers/go.py
+++ b/beschi/writers/go.py
@@ -205,7 +205,7 @@ def generate(self) -> str:
 
         self.write_line(f"// This file was automatically generated by {LIB_NAME} v{LIB_VERSION}")
         self.write_line( "// <https://github.com/sjml/beschi>")
-        self.write_line(f"// Do not edit directly.")
+        self.write_line( "// Do not edit directly.")
         self.write_line()
 
         if self.embed_protocol:
diff --git a/beschi/writers/rust.py b/beschi/writers/rust.py
index fe61add..e107919 100644
--- a/beschi/writers/rust.py
+++ b/beschi/writers/rust.py
@@ -194,7 +194,7 @@ def generate(self) -> str:
         self.output = []
         self.write_line(f"// This file was automatically generated by {LIB_NAME} v{LIB_VERSION}")
         self.write_line( "// <https://github.com/sjml/beschi>")
-        self.write_line(f"// Do not edit directly.")
+        self.write_line( "// Do not edit directly.")
         self.write_line()
 
         if self.embed_protocol:
diff --git a/beschi/writers/swift.py b/beschi/writers/swift.py
index e7617b1..178c9b1 100644
--- a/beschi/writers/swift.py
+++ b/beschi/writers/swift.py
@@ -204,7 +204,7 @@ def generate(self) -> str:
 
         self.write_line(f"// This file was automatically generated by {LIB_NAME} v{LIB_VERSION}")
         self.write_line( "// <https://github.com/sjml/beschi>")
-        self.write_line(f"// Do not edit directly.")
+        self.write_line( "// Do not edit directly.")
         self.write_line()
         self.write_line("import Foundation")
         self.write_line()
diff --git a/beschi/writers/typescript.py b/beschi/writers/typescript.py
index 66b239d..c52d8b6 100644
--- a/beschi/writers/typescript.py
+++ b/beschi/writers/typescript.py
@@ -233,7 +233,7 @@ def generate(self) -> str:
 
         self.write_line(f"// This file was automatically generated by {LIB_NAME} v{LIB_VERSION}")
         self.write_line( "// <https://github.com/sjml/beschi>")
-        self.write_line(f"// Do not edit directly.")
+        self.write_line( "// Do not edit directly.")
         self.write_line()
 
         if self.embed_protocol:
diff --git a/beschi/writers/zig.py b/beschi/writers/zig.py
new file mode 100644
index 0000000..bc96c12
--- /dev/null
+++ b/beschi/writers/zig.py
@@ -0,0 +1,212 @@
+import argparse
+
+from ..protocol import Protocol, Struct, Variable, NUMERIC_TYPE_SIZES
+from ..writer import Writer, TextUtil
+from .. import LIB_NAME, LIB_VERSION
+
+LANGUAGE_NAME = "Zig"
+
+
+class ZigWriter(Writer):
+    language_name = LANGUAGE_NAME
+    default_extension = ".zig"
+    in_progress = True
+
+    def __init__(self, p: Protocol, extra_args: dict[str,any] = {}):
+        super().__init__(protocol=p)
+
+        self.embed_protocol = extra_args["embed_protocol"]
+
+        self.type_mapping["byte"] = "u8"
+        self.type_mapping["bool"] = "bool"
+        self.type_mapping["uint16"] = "u16"
+        self.type_mapping["int16"] = "i16"
+        self.type_mapping["uint32"] = "u32"
+        self.type_mapping["int32"] = "i32"
+        self.type_mapping["uint64"] = "u64"
+        self.type_mapping["int64"] = "i64"
+        self.type_mapping["float"] = "f32"
+        self.type_mapping["double"] = "f64"
+        self.type_mapping["string"] = "[]u8"
+
+        self.base_defaults: dict[str,str] = {
+            "byte": "0",
+            "bool": "false",
+            "uint16": "0",
+            "int16": "0",
+            "uint32": "0",
+            "int32": "0",
+            "uint64": "0",
+            "int64": "0",
+            "float": "0.0",
+            "double": "0.0",
+            "string": '""',
+        }
+
+    def deserializer(self, var: Variable, accessor: str, parent_is_simple: bool, simple_offset: int):
+        if parent_is_simple: # also means that *var* is simple because recursion!
+            if var.vartype == "bool":
+                self.write_line(f"const {accessor}_{var.name} = readNumber(u8, offset + {simple_offset}, buffer).value != 0;")
+            elif var.vartype in NUMERIC_TYPE_SIZES.keys():
+                self.write_line(f"const {accessor}_{var.name} = readNumber({self.type_mapping[var.vartype]}, offset + {simple_offset}, buffer).value;")
+            else:
+                self.write_line(f"const {accessor}_{var.name}_read = {var.vartype}.fromBytes({simple_offset}, buffer);")
+                self.write_line(f"const {accessor}_{var.name} = {accessor}_{var.name}_read.value;")
+        else:
+            if var.is_list:
+                self.write_line(f"const {accessor}_{var.name}_read = try readList({self.type_mapping[var.vartype]}, allocator, local_offset, buffer);")
+                self.write_line(f"const {accessor}_{var.name} = {accessor}_{var.name}_read.value;")
+                self.write_line(f"local_offset += {accessor}_{var.name}_read.bytes_read;")
+            elif var.vartype == "bool":
+                self.write_line(f"const {accessor}_{var.name}_read = readNumber(u8, local_offset, buffer);")
+                self.write_line(f"const {accessor}_{var.name} = {accessor}_{var.name}_read.value != 0;")
+                self.write_line(f"local_offset += {accessor}_{var.name}_read.bytes_read;")
+            elif var.vartype in NUMERIC_TYPE_SIZES.keys():
+                self.write_line(f"const {accessor}_{var.name}_read = readNumber({self.type_mapping[var.vartype]}, local_offset, buffer);")
+                self.write_line(f"const {accessor}_{var.name} = {accessor}_{var.name}_read.value;")
+                self.write_line(f"local_offset += {accessor}_{var.name}_read.bytes_read;")
+            elif var.vartype == "string":
+                self.write_line(f"const {accessor}_{var.name}_read = try readString(allocator, local_offset, buffer);")
+                self.write_line(f"const {accessor}_{var.name} = {accessor}_{var.name}_read.value;")
+                self.write_line(f"local_offset += {accessor}_{var.name}_read.bytes_read;")
+            else:
+                self.write_line(f"const {accessor}_{var.name}_read = try {var.vartype}.fromBytes({'' if var.is_simple() else 'allocator, '}local_offset, buffer);")
+                self.write_line(f"const {accessor}_{var.name} = {accessor}_{var.name}_read.value;")
+                self.write_line(f"local_offset += {accessor}_{var.name}_read.bytes_read;")
+
+            self.write_line()
+
+    def destructor(self, var: Variable, accessor: str):
+        if var.is_simple():
+            return
+        elif var.is_list:
+            if not var.is_simple(True):
+                idx = self.indent_level
+                self.write_line(f"for ({accessor}{var.name}) |{'*' if not var.vartype == 'string' else ''}item{idx}| {{")
+                self.indent_level += 1
+                inner = Variable(self.protocol, f"item{idx}", var.vartype)
+                self.destructor(inner, "")
+                self.indent_level -= 1
+                self.write_line("}")
+            self.write_line(f"allocator.free({accessor}{var.name});")
+        elif var.vartype == "string":
+            self.write_line(f"allocator.free({accessor}{var.name});")
+        else:
+            self.write_line(f"{accessor}{var.name}.deinit(allocator);")
+
+
+    def gen_struct(self, sname: str, sdata: Struct):
+        self.write_line(f"pub const {sname} = struct {{")
+        self.indent_level += 1
+        for var in sdata.members:
+            if var.is_list:
+                self.write_line(f"{var.name}: []{self.type_mapping[var.vartype]},")
+            else:
+                default_value = self.base_defaults.get(var.vartype)
+                if default_value == None:
+                    if var.is_simple():
+                        default_value = f"{var.vartype}{{}}"
+                    else:
+                        default_value = None
+                if default_value != None:
+                    self.write_line(f"{var.name}: {self.type_mapping[var.vartype]} = {default_value},")
+                else:
+                    self.write_line(f"{var.name}: {self.type_mapping[var.vartype]},")
+        self.write_line()
+
+        self.write_line(f"pub fn fromBytes({'' if sdata.is_simple() else 'allocator: std.mem.Allocator, '}offset: usize, buffer: []u8) !struct {{ value: {sname}, bytes_read: usize }} {{")
+        self.indent_level += 1
+        simple_offset = -1
+        if sdata.is_simple():
+            simple_offset = 0
+        else:
+            self.write_line("var local_offset = offset;")
+            self.write_line()
+        for mem in sdata.members:
+            self.deserializer(mem, sname, sdata.is_simple(), simple_offset)
+            if sdata.is_simple():
+                simple_offset += self.protocol.get_size_of(mem.vartype)
+        self.write_line(f"return .{{ .value = {sname}{{")
+        self.indent_level += 1
+        for var in sdata.members:
+            self.write_line(f".{var.name} = {sname}_{var.name},")
+        self.indent_level -= 1
+        if sdata.is_simple():
+            self.write_line(f"}}, .bytes_read = {self.protocol.get_size_of(sdata.name)} }};")
+        else:
+            self.write_line(f"}}, .bytes_read = local_offset - offset }};")
+        self.indent_level -= 1
+        self.write_line("}")
+
+        if not sdata.is_simple():
+            self.write_line()
+            self.write_line(f"pub fn deinit(self: *{sname}, allocator: std.mem.Allocator) void {{")
+            self.indent_level += 1
+            [self.destructor(mem, "self.") for mem in sdata.members]
+            self.indent_level -= 1
+            self.write_line("}")
+
+
+        self.indent_level -= 1
+        self.write_line("};")
+        self.write_line()
+
+    def generate(self) -> str:
+        self.output = []
+
+        self.write_line(f"// This file was automatically generated by {LIB_NAME} v{LIB_VERSION}")
+        self.write_line( "// <https://github.com/sjml/beschi>")
+        self.write_line(f"// Do not edit directly.")
+        self.write_line()
+
+        if self.embed_protocol:
+            self.write_line("// DATA PROTOCOL")
+            self.write_line("// -----------------")
+            [self.write_line(f"// {l}") for l in self.protocol.protocol_string.splitlines()]
+            self.write_line("// -----------------")
+            self.write_line("// END DATA PROTOCOL")
+            self.write_line()
+            self.write_line()
+
+        self.write_line("const std = @import(\"std\");")
+        self.write_line()
+
+        self.write_line( "fn _typeIsSimple(comptime T: type) bool {")
+        self.write_line( "    if (comptime _numberTypeIsValid(T)) {")
+        self.write_line( "        return true;")
+        self.write_line( "    }")
+        self.write_line( "    const simpleTypes = [_]type{")
+        simple_structs  = [sname for sname, sdata in self.protocol.structs.items()  if sdata.is_simple()]
+        simple_messages = [mname for mname, mdata in self.protocol.messages.items() if mdata.is_simple()]
+        if len(simple_structs):
+            self.write_line(f"        {', '.join(simple_structs )},")
+        if len(simple_messages):
+            self.write_line(f"        {', '.join(simple_messages)},")
+        self.write_line( "    };")
+        self.write_line( "    for (simpleTypes) |vt| {")
+        self.write_line( "        if (T == vt) {")
+        self.write_line( "            return true;")
+        self.write_line( "        }")
+        self.write_line( "    }")
+        self.write_line( "    return false;")
+        self.write_line( "}")
+        self.write_line()
+
+        subs = [
+            ("{# STRING_SIZE_TYPE #}", self.get_native_string_size()),
+            ("{# LIST_SIZE_TYPE #}"  , self.get_native_list_size()),
+        ]
+        self.add_boilerplate(subs)
+        self.write_line()
+
+        for sname, sdata in self.protocol.structs.items():
+            self.gen_struct(sname, sdata)
+
+        for mname, mdata in self.protocol.messages.items():
+            self.gen_struct(mname, mdata)
+
+
+        self.write_line()
+        assert self.indent_level == 0
+
+        return "\n".join(self.output)
diff --git a/docs/dev/todo.md b/docs/dev/todo.md
index bc7ad0f..5c2071a 100644
--- a/docs/dev/todo.md
+++ b/docs/dev/todo.md
@@ -2,9 +2,12 @@ This file is a rough todo list for the tool itself.
 
 ## dustoff notes
 - check documentation to make sure it's not promising to be exception-free 
+- is `list_size_type` actually implemented?
 
 ## Zig writer 
 - <sigh> buckle up
+- fix "this was auto created" to not use format strings if needed
+- test with a protocol that has no simple structs/messages
 
 ## protocol features:
 - static values, so you can, say, version a message and it will be automatically written to every instance of it
@@ -19,10 +22,9 @@ This file is a rough todo list for the tool itself.
         # ..."
         ```
     - proposal:
-        - statics are allowed for numeric types and strings only; no lists, no structs
+        - statics are allowed for numeric types (and lists of numeric types) only
         - statics cannot be set from target language; will be overwritten with static value when put into the buffer
             - note that this might lead to leaked memory or trashed pointers if you're not careful in C
-            - (perhaps a reason to only allow numeric types, but strings are just _so_ useful)
 - inline string and array length types so they don't have to be protocol-wide like they are now
     - not pressing, but worth thinking of
         ```toml
@@ -31,8 +33,12 @@ This file is a rough todo list for the tool itself.
         shorty = "string[u8]"
         medium = "string[u16]"
         gargantuan = "string[u64]"
+        regular = "string[]" # will use the default
+        regular2 = "string" # will also use the default
         smallList = "[f32][u8]"
-        universe = "[f64][f64]"
+        universe = "[f64][u64]"
+        regList = "[DataType][]" # default
+        regList2 = "[DataType]" # default
         ```
 
 ## "immediate" todo
diff --git a/docs/languages/c.md b/docs/languages/c.md
index a487e98..0cd1514 100644
--- a/docs/languages/c.md
+++ b/docs/languages/c.md
@@ -49,3 +49,4 @@ Note that this requires the `stdint.h` and `stdbool.h` header files, which are s
 * Reading a message from a buffer copies all the data it needs, so the buffer can be discarded safely afterwards. This *does* mean, though, that the reading functions will allocate memory if there are lists or strings in the structure. They will need to be `free`-ed or will leak. 
     - Every message struct has an associated `{namespace}_Destroy{message_type}` function that handles that for you. 
 * `ProcessRawBytes` fills an array of pointers to `void` (`void**`), so you need to pass it a *pointer* to such an array, a `void***`. I know, I know. Anyway, once it's filled, you can check each one for its type with `{namespace}_GetMessageType` and then cast as you need to. (There is also a `{namespace}_DestroyMessageList` to help with cleaning that up when you're done.)
+    - Note that each of the messages in the resulting list has been allocated, so will need to be freed.