From 65bea8a4d8c33081904fdd6ce30cc4051597ad49 Mon Sep 17 00:00:00 2001 From: Shane Liesegang Date: Mon, 28 Oct 2024 08:29:32 +0800 Subject: [PATCH] first draft zig generator; only deserializes --- beschi/cli.py | 3 +- beschi/writers/boilerplate/Zig.zig | 76 +++++++++++ beschi/writers/c.py | 8 +- beschi/writers/csharp.py | 2 +- beschi/writers/go.py | 2 +- beschi/writers/rust.py | 2 +- beschi/writers/swift.py | 2 +- beschi/writers/typescript.py | 2 +- beschi/writers/zig.py | 212 +++++++++++++++++++++++++++++ docs/dev/todo.md | 12 +- docs/languages/c.md | 1 + 11 files changed, 310 insertions(+), 12 deletions(-) create mode 100644 beschi/writers/boilerplate/Zig.zig create mode 100644 beschi/writers/zig.py diff --git a/beschi/cli.py b/beschi/cli.py index 2a984da..fa58aeb 100644 --- a/beschi/cli.py +++ b/beschi/cli.py @@ -1,5 +1,6 @@ import sys import argparse +import traceback from .protocol import Protocol from .writers import all_writers, experimental_writers @@ -74,7 +75,7 @@ def main(): try: output = writer.generate() except NotImplementedError as nie: - sys.stderr.write(f"{nie}\n") + sys.stderr.write(f"{traceback.format_exc()}\n") sys.exit(1) if args.output == None: diff --git a/beschi/writers/boilerplate/Zig.zig b/beschi/writers/boilerplate/Zig.zig new file mode 100644 index 0000000..e0225b8 --- /dev/null +++ b/beschi/writers/boilerplate/Zig.zig @@ -0,0 +1,76 @@ +fn _numberTypeIsValid(comptime T: type) bool { + const validNumericTypes = [_]type{ + u8, i8, + u16, i16, + u32, i32, + u64, i64, + f32, f64, + }; + for (validNumericTypes) |vt| { + if (T == vt) { + return true; + } + } + return false; +} + +pub fn readNumber(comptime T: type, offset: usize, buffer: []u8) struct { value: T, bytes_read: usize } { + comptime { + if (!_numberTypeIsValid(T)) { + @compileError("Invalid number type"); + } + } + + switch (T) { + f32 => return .{ .value = @bitCast(std.mem.readInt(u32, buffer[offset..][0..@sizeOf(T)], .little)), .bytes_read = @sizeOf(T) }, + f64 => return .{ .value = @bitCast(std.mem.readInt(u64, buffer[offset..][0..@sizeOf(T)], .little)), .bytes_read = @sizeOf(T) }, + else => return .{ .value = std.mem.readInt(T, buffer[offset..][0..@sizeOf(T)], .little), .bytes_read = @sizeOf(T) }, + } +} + +pub fn readString(allocator: std.mem.Allocator, offset: usize, buffer: []u8) !struct { value: []u8, bytes_read: usize } { + const len_read = readNumber({# STRING_SIZE_TYPE #}, offset, buffer); + const len = len_read.value; + var str = try allocator.alloc(u8, len); + for (0..len) |i| { + str[i] = buffer[offset + len_read.bytes_read + i]; + } + return .{ .value = str, .bytes_read = @sizeOf({# STRING_SIZE_TYPE #}) + len }; +} + +pub fn readList(comptime T: type, allocator: std.mem.Allocator, offset: usize, buffer: []u8) !struct { value: []T, bytes_read: usize } { + var local_offset = offset; + const len_read = readNumber({# LIST_SIZE_TYPE #}, local_offset, buffer); + const len = len_read.value; + local_offset += len_read.bytes_read; + var list = try allocator.alloc(T, len); + + for (0..len) |i| { + if (comptime _numberTypeIsValid(T)) { + const list_read = readNumber(T, local_offset, buffer); + list[i] = list_read.value; + local_offset += list_read.bytes_read; + } else { + switch (T) { + []u8 => { + const list_read = try readString(allocator, local_offset, buffer); + list[i] = list_read.value; + local_offset += list_read.bytes_read; + }, + else => { + if (comptime _typeIsSimple(T)) { + const list_read = try T.fromBytes(local_offset, buffer); + list[i] = list_read.value; + local_offset += list_read.bytes_read; + } + else { + const list_read = try T.fromBytes(allocator, local_offset, buffer); + list[i] = list_read.value; + local_offset += list_read.bytes_read; + } + }, + } + } + } + return .{ .value = list, .bytes_read = local_offset - offset }; +} diff --git a/beschi/writers/c.py b/beschi/writers/c.py index 5649f5c..68ca7d0 100644 --- a/beschi/writers/c.py +++ b/beschi/writers/c.py @@ -309,9 +309,11 @@ def gen_implementation(self, sname: str, sdata: Struct): def generate(self) -> str: self.output = [] - self.write_line(f"// This file was automatically generated by {LIB_NAME} v{LIB_VERSION}") - self.write_line( "// ") - self.write_line(f"// Do not edit directly.") + self.write_line( "/*") + self.write_line(f" This file was automatically generated by {LIB_NAME} v{LIB_VERSION}") + self.write_line( " ") + self.write_line( " Do not edit directly.") + self.write_line( "*/") self.write_line() if self.embed_protocol: diff --git a/beschi/writers/csharp.py b/beschi/writers/csharp.py index eecd061..8b37ae1 100644 --- a/beschi/writers/csharp.py +++ b/beschi/writers/csharp.py @@ -212,7 +212,7 @@ def generate(self) -> str: self.write_line(f"// This file was automatically generated by {LIB_NAME} v{LIB_VERSION}") self.write_line( "// ") - self.write_line(f"// Do not edit directly.") + self.write_line( "// Do not edit directly.") self.write_line() if self.embed_protocol: diff --git a/beschi/writers/go.py b/beschi/writers/go.py index 318bc49..bf0025d 100644 --- a/beschi/writers/go.py +++ b/beschi/writers/go.py @@ -205,7 +205,7 @@ def generate(self) -> str: self.write_line(f"// This file was automatically generated by {LIB_NAME} v{LIB_VERSION}") self.write_line( "// ") - self.write_line(f"// Do not edit directly.") + self.write_line( "// Do not edit directly.") self.write_line() if self.embed_protocol: diff --git a/beschi/writers/rust.py b/beschi/writers/rust.py index fe61add..e107919 100644 --- a/beschi/writers/rust.py +++ b/beschi/writers/rust.py @@ -194,7 +194,7 @@ def generate(self) -> str: self.output = [] self.write_line(f"// This file was automatically generated by {LIB_NAME} v{LIB_VERSION}") self.write_line( "// ") - self.write_line(f"// Do not edit directly.") + self.write_line( "// Do not edit directly.") self.write_line() if self.embed_protocol: diff --git a/beschi/writers/swift.py b/beschi/writers/swift.py index e7617b1..178c9b1 100644 --- a/beschi/writers/swift.py +++ b/beschi/writers/swift.py @@ -204,7 +204,7 @@ def generate(self) -> str: self.write_line(f"// This file was automatically generated by {LIB_NAME} v{LIB_VERSION}") self.write_line( "// ") - self.write_line(f"// Do not edit directly.") + self.write_line( "// Do not edit directly.") self.write_line() self.write_line("import Foundation") self.write_line() diff --git a/beschi/writers/typescript.py b/beschi/writers/typescript.py index 66b239d..c52d8b6 100644 --- a/beschi/writers/typescript.py +++ b/beschi/writers/typescript.py @@ -233,7 +233,7 @@ def generate(self) -> str: self.write_line(f"// This file was automatically generated by {LIB_NAME} v{LIB_VERSION}") self.write_line( "// ") - self.write_line(f"// Do not edit directly.") + self.write_line( "// Do not edit directly.") self.write_line() if self.embed_protocol: diff --git a/beschi/writers/zig.py b/beschi/writers/zig.py new file mode 100644 index 0000000..bc96c12 --- /dev/null +++ b/beschi/writers/zig.py @@ -0,0 +1,212 @@ +import argparse + +from ..protocol import Protocol, Struct, Variable, NUMERIC_TYPE_SIZES +from ..writer import Writer, TextUtil +from .. import LIB_NAME, LIB_VERSION + +LANGUAGE_NAME = "Zig" + + +class ZigWriter(Writer): + language_name = LANGUAGE_NAME + default_extension = ".zig" + in_progress = True + + def __init__(self, p: Protocol, extra_args: dict[str,any] = {}): + super().__init__(protocol=p) + + self.embed_protocol = extra_args["embed_protocol"] + + self.type_mapping["byte"] = "u8" + self.type_mapping["bool"] = "bool" + self.type_mapping["uint16"] = "u16" + self.type_mapping["int16"] = "i16" + self.type_mapping["uint32"] = "u32" + self.type_mapping["int32"] = "i32" + self.type_mapping["uint64"] = "u64" + self.type_mapping["int64"] = "i64" + self.type_mapping["float"] = "f32" + self.type_mapping["double"] = "f64" + self.type_mapping["string"] = "[]u8" + + self.base_defaults: dict[str,str] = { + "byte": "0", + "bool": "false", + "uint16": "0", + "int16": "0", + "uint32": "0", + "int32": "0", + "uint64": "0", + "int64": "0", + "float": "0.0", + "double": "0.0", + "string": '""', + } + + def deserializer(self, var: Variable, accessor: str, parent_is_simple: bool, simple_offset: int): + if parent_is_simple: # also means that *var* is simple because recursion! + if var.vartype == "bool": + self.write_line(f"const {accessor}_{var.name} = readNumber(u8, offset + {simple_offset}, buffer).value != 0;") + elif var.vartype in NUMERIC_TYPE_SIZES.keys(): + self.write_line(f"const {accessor}_{var.name} = readNumber({self.type_mapping[var.vartype]}, offset + {simple_offset}, buffer).value;") + else: + self.write_line(f"const {accessor}_{var.name}_read = {var.vartype}.fromBytes({simple_offset}, buffer);") + self.write_line(f"const {accessor}_{var.name} = {accessor}_{var.name}_read.value;") + else: + if var.is_list: + self.write_line(f"const {accessor}_{var.name}_read = try readList({self.type_mapping[var.vartype]}, allocator, local_offset, buffer);") + self.write_line(f"const {accessor}_{var.name} = {accessor}_{var.name}_read.value;") + self.write_line(f"local_offset += {accessor}_{var.name}_read.bytes_read;") + elif var.vartype == "bool": + self.write_line(f"const {accessor}_{var.name}_read = readNumber(u8, local_offset, buffer);") + self.write_line(f"const {accessor}_{var.name} = {accessor}_{var.name}_read.value != 0;") + self.write_line(f"local_offset += {accessor}_{var.name}_read.bytes_read;") + elif var.vartype in NUMERIC_TYPE_SIZES.keys(): + self.write_line(f"const {accessor}_{var.name}_read = readNumber({self.type_mapping[var.vartype]}, local_offset, buffer);") + self.write_line(f"const {accessor}_{var.name} = {accessor}_{var.name}_read.value;") + self.write_line(f"local_offset += {accessor}_{var.name}_read.bytes_read;") + elif var.vartype == "string": + self.write_line(f"const {accessor}_{var.name}_read = try readString(allocator, local_offset, buffer);") + self.write_line(f"const {accessor}_{var.name} = {accessor}_{var.name}_read.value;") + self.write_line(f"local_offset += {accessor}_{var.name}_read.bytes_read;") + else: + self.write_line(f"const {accessor}_{var.name}_read = try {var.vartype}.fromBytes({'' if var.is_simple() else 'allocator, '}local_offset, buffer);") + self.write_line(f"const {accessor}_{var.name} = {accessor}_{var.name}_read.value;") + self.write_line(f"local_offset += {accessor}_{var.name}_read.bytes_read;") + + self.write_line() + + def destructor(self, var: Variable, accessor: str): + if var.is_simple(): + return + elif var.is_list: + if not var.is_simple(True): + idx = self.indent_level + self.write_line(f"for ({accessor}{var.name}) |{'*' if not var.vartype == 'string' else ''}item{idx}| {{") + self.indent_level += 1 + inner = Variable(self.protocol, f"item{idx}", var.vartype) + self.destructor(inner, "") + self.indent_level -= 1 + self.write_line("}") + self.write_line(f"allocator.free({accessor}{var.name});") + elif var.vartype == "string": + self.write_line(f"allocator.free({accessor}{var.name});") + else: + self.write_line(f"{accessor}{var.name}.deinit(allocator);") + + + def gen_struct(self, sname: str, sdata: Struct): + self.write_line(f"pub const {sname} = struct {{") + self.indent_level += 1 + for var in sdata.members: + if var.is_list: + self.write_line(f"{var.name}: []{self.type_mapping[var.vartype]},") + else: + default_value = self.base_defaults.get(var.vartype) + if default_value == None: + if var.is_simple(): + default_value = f"{var.vartype}{{}}" + else: + default_value = None + if default_value != None: + self.write_line(f"{var.name}: {self.type_mapping[var.vartype]} = {default_value},") + else: + self.write_line(f"{var.name}: {self.type_mapping[var.vartype]},") + self.write_line() + + self.write_line(f"pub fn fromBytes({'' if sdata.is_simple() else 'allocator: std.mem.Allocator, '}offset: usize, buffer: []u8) !struct {{ value: {sname}, bytes_read: usize }} {{") + self.indent_level += 1 + simple_offset = -1 + if sdata.is_simple(): + simple_offset = 0 + else: + self.write_line("var local_offset = offset;") + self.write_line() + for mem in sdata.members: + self.deserializer(mem, sname, sdata.is_simple(), simple_offset) + if sdata.is_simple(): + simple_offset += self.protocol.get_size_of(mem.vartype) + self.write_line(f"return .{{ .value = {sname}{{") + self.indent_level += 1 + for var in sdata.members: + self.write_line(f".{var.name} = {sname}_{var.name},") + self.indent_level -= 1 + if sdata.is_simple(): + self.write_line(f"}}, .bytes_read = {self.protocol.get_size_of(sdata.name)} }};") + else: + self.write_line(f"}}, .bytes_read = local_offset - offset }};") + self.indent_level -= 1 + self.write_line("}") + + if not sdata.is_simple(): + self.write_line() + self.write_line(f"pub fn deinit(self: *{sname}, allocator: std.mem.Allocator) void {{") + self.indent_level += 1 + [self.destructor(mem, "self.") for mem in sdata.members] + self.indent_level -= 1 + self.write_line("}") + + + self.indent_level -= 1 + self.write_line("};") + self.write_line() + + def generate(self) -> str: + self.output = [] + + self.write_line(f"// This file was automatically generated by {LIB_NAME} v{LIB_VERSION}") + self.write_line( "// ") + self.write_line(f"// Do not edit directly.") + self.write_line() + + if self.embed_protocol: + self.write_line("// DATA PROTOCOL") + self.write_line("// -----------------") + [self.write_line(f"// {l}") for l in self.protocol.protocol_string.splitlines()] + self.write_line("// -----------------") + self.write_line("// END DATA PROTOCOL") + self.write_line() + self.write_line() + + self.write_line("const std = @import(\"std\");") + self.write_line() + + self.write_line( "fn _typeIsSimple(comptime T: type) bool {") + self.write_line( " if (comptime _numberTypeIsValid(T)) {") + self.write_line( " return true;") + self.write_line( " }") + self.write_line( " const simpleTypes = [_]type{") + simple_structs = [sname for sname, sdata in self.protocol.structs.items() if sdata.is_simple()] + simple_messages = [mname for mname, mdata in self.protocol.messages.items() if mdata.is_simple()] + if len(simple_structs): + self.write_line(f" {', '.join(simple_structs )},") + if len(simple_messages): + self.write_line(f" {', '.join(simple_messages)},") + self.write_line( " };") + self.write_line( " for (simpleTypes) |vt| {") + self.write_line( " if (T == vt) {") + self.write_line( " return true;") + self.write_line( " }") + self.write_line( " }") + self.write_line( " return false;") + self.write_line( "}") + self.write_line() + + subs = [ + ("{# STRING_SIZE_TYPE #}", self.get_native_string_size()), + ("{# LIST_SIZE_TYPE #}" , self.get_native_list_size()), + ] + self.add_boilerplate(subs) + self.write_line() + + for sname, sdata in self.protocol.structs.items(): + self.gen_struct(sname, sdata) + + for mname, mdata in self.protocol.messages.items(): + self.gen_struct(mname, mdata) + + + self.write_line() + assert self.indent_level == 0 + + return "\n".join(self.output) diff --git a/docs/dev/todo.md b/docs/dev/todo.md index bc7ad0f..5c2071a 100644 --- a/docs/dev/todo.md +++ b/docs/dev/todo.md @@ -2,9 +2,12 @@ This file is a rough todo list for the tool itself. ## dustoff notes - check documentation to make sure it's not promising to be exception-free +- is `list_size_type` actually implemented? ## Zig writer - buckle up +- fix "this was auto created" to not use format strings if needed +- test with a protocol that has no simple structs/messages ## protocol features: - static values, so you can, say, version a message and it will be automatically written to every instance of it @@ -19,10 +22,9 @@ This file is a rough todo list for the tool itself. # ..." ``` - proposal: - - statics are allowed for numeric types and strings only; no lists, no structs + - statics are allowed for numeric types (and lists of numeric types) only - statics cannot be set from target language; will be overwritten with static value when put into the buffer - note that this might lead to leaked memory or trashed pointers if you're not careful in C - - (perhaps a reason to only allow numeric types, but strings are just _so_ useful) - inline string and array length types so they don't have to be protocol-wide like they are now - not pressing, but worth thinking of ```toml @@ -31,8 +33,12 @@ This file is a rough todo list for the tool itself. shorty = "string[u8]" medium = "string[u16]" gargantuan = "string[u64]" + regular = "string[]" # will use the default + regular2 = "string" # will also use the default smallList = "[f32][u8]" - universe = "[f64][f64]" + universe = "[f64][u64]" + regList = "[DataType][]" # default + regList2 = "[DataType]" # default ``` ## "immediate" todo diff --git a/docs/languages/c.md b/docs/languages/c.md index a487e98..0cd1514 100644 --- a/docs/languages/c.md +++ b/docs/languages/c.md @@ -49,3 +49,4 @@ Note that this requires the `stdint.h` and `stdbool.h` header files, which are s * Reading a message from a buffer copies all the data it needs, so the buffer can be discarded safely afterwards. This *does* mean, though, that the reading functions will allocate memory if there are lists or strings in the structure. They will need to be `free`-ed or will leak. - Every message struct has an associated `{namespace}_Destroy{message_type}` function that handles that for you. * `ProcessRawBytes` fills an array of pointers to `void` (`void**`), so you need to pass it a *pointer* to such an array, a `void***`. I know, I know. Anyway, once it's filled, you can check each one for its type with `{namespace}_GetMessageType` and then cast as you need to. (There is also a `{namespace}_DestroyMessageList` to help with cleaning that up when you're done.) + - Note that each of the messages in the resulting list has been allocated, so will need to be freed.