From 894fba0e62ba6af3180ff5710fb9cbc3c70b2288 Mon Sep 17 00:00:00 2001 From: Malcolm Still Date: Sat, 15 Jul 2023 18:12:03 +0100 Subject: [PATCH] wasi: implementations --- src/instance.zig | 51 ++++ src/instance/vm.zig | 29 ++- src/main.zig | 1 + src/wasi/wasi.zig | 413 ++++++++++++++++++++++++++++++ tools/zware-gen/src/zware_gen.zig | 8 +- 5 files changed, 498 insertions(+), 4 deletions(-) create mode 100644 src/wasi/wasi.zig diff --git a/src/instance.zig b/src/instance.zig index 5193c17b..ee5d4faf 100644 --- a/src/instance.zig +++ b/src/instance.zig @@ -1,6 +1,8 @@ const std = @import("std"); const mem = std.mem; const math = std.math; +const os = std.os; +const wasi = std.os.wasi; const ArrayList = std.ArrayList; const Module = @import("module.zig").Module; const Store = @import("store.zig").ArrayListStore; @@ -40,6 +42,22 @@ pub const Instance = struct { elemaddrs: ArrayList(usize), dataaddrs: ArrayList(usize), + // wasi-specific fields + // + // They are defined on an instance but only really on an + // initial instance that is invoked. When initialising a + // VirtualMachine this initial instance will pass its wasi + // data to the VirtualMachine (via pointers). + // + // The wasi implementations can (and must) then lookup this data via + // the VirtualMachine, it shouldn't call e.g. `vm.inst...` because + // a VirtualMachine swaps out its `inst` (instance) pointer as + // it executes; an arbitrary `inst` will not contain the correct + // data. + wasi_preopens: std.AutoHashMap(wasi.fd_t, WasiPreopen), + wasi_args: std.ArrayList([:0]u8), + wasi_env: std.StringHashMap([]const u8), + pub fn init(alloc: mem.Allocator, store: *Store, module: Module) Instance { return Instance{ .module = module, @@ -50,6 +68,10 @@ pub const Instance = struct { .globaladdrs = ArrayList(usize).init(alloc), .elemaddrs = ArrayList(usize).init(alloc), .dataaddrs = ArrayList(usize).init(alloc), + + .wasi_preopens = std.AutoHashMap(os.wasi.fd_t, WasiPreopen).init(alloc), + .wasi_args = ArrayList([:0]u8).init(alloc), + .wasi_env = std.StringHashMap([]const u8).init(alloc), }; } @@ -60,6 +82,10 @@ pub const Instance = struct { self.globaladdrs.deinit(); self.elemaddrs.deinit(); self.dataaddrs.deinit(); + + self.wasi_preopens.deinit(); + self.wasi_args.deinit(); + self.wasi_env.deinit(); } pub fn getFunc(self: *Instance, funcidx: usize) !Function { @@ -411,4 +437,29 @@ pub const Instance = struct { else => return vm.popOperand(Result), } } + + pub fn addWasiPreopen(self: *Instance, wasi_fd: os.wasi.fd_t, name: []const u8, host_fd: os.fd_t) !void { + return self.wasi_preopens.put(wasi_fd, .{ + .wasi_fd = wasi_fd, + .name = name, + .host_fd = host_fd, + }); + } + + // Caller must call std.process.argsFree on returned args + pub fn forwardArgs(self: *Instance, alloc: mem.Allocator) ![][:0]u8 { + const args = try std.process.argsAlloc(alloc); + + for (args) |arg| { + try self.wasi_args.append(arg); + } + + return args; + } +}; + +pub const WasiPreopen = struct { + wasi_fd: wasi.fd_t, + name: []const u8, + host_fd: os.fd_t, }; diff --git a/src/instance/vm.zig b/src/instance/vm.zig index f20027a2..e351550e 100644 --- a/src/instance/vm.zig +++ b/src/instance/vm.zig @@ -1,10 +1,13 @@ const std = @import("std"); +const os = std.os; const mem = std.mem; const math = std.math; +const wasi = std.os.wasi; const ArrayList = std.ArrayList; const Module = @import("../module.zig").Module; const ValType = @import("../module.zig").ValType; const Instance = @import("../instance.zig").Instance; +const WasiPreopen = @import("../instance.zig").WasiPreopen; const Rr = @import("../rr.zig").Rr; // VirtualMachine: @@ -32,6 +35,15 @@ pub const VirtualMachine = struct { inst: *Instance = undefined, ip: usize = 0, + // wasi support + // + // These fields match the types in Instance but are + // instead pointers. These will point to the Instance + // that initialises the VirtualMachine + wasi_preopens: *std.AutoHashMap(wasi.fd_t, WasiPreopen), + wasi_args: *std.ArrayList([:0]u8), + wasi_env: *std.StringHashMap([]const u8), + pub const Frame = struct { locals: []u64 = undefined, // TODO: we're in trouble if we move our stacks in memory return_arity: usize = 0, @@ -55,9 +67,22 @@ pub const VirtualMachine = struct { .frame_stack = frame_stack, .label_stack = label_stack, .inst = inst, + .wasi_preopens = &inst.wasi_preopens, + .wasi_args = &inst.wasi_args, + .wasi_env = &inst.wasi_env, }; } + pub fn lookupWasiPreopen(self: *VirtualMachine, wasi_fd: os.wasi.fd_t) ?WasiPreopen { + return self.wasi_preopens.get(wasi_fd); + } + + pub fn getHostFd(self: *VirtualMachine, wasi_fd: wasi.fd_t) os.fd_t { + const preopen = self.lookupWasiPreopen(wasi_fd) orelse return wasi_fd; + + return preopen.host_fd; + } + pub fn invoke(self: *VirtualMachine, ip: usize) !void { const instr = self.inst.module.parsed_code.items[ip]; @@ -2306,9 +2331,9 @@ pub const VirtualMachine = struct { // FIXME: move initial bounds check into Memory implementation const data = memory.memory(); if (dest <= src) { - memory.uncheckedCopy(dest, data[src..src+n]); + memory.uncheckedCopy(dest, data[src .. src + n]); } else { - memory.uncheckedCopyBackwards(dest, data[src..src+n]); + memory.uncheckedCopyBackwards(dest, data[src .. src + n]); } return dispatch(self, ip + 1, code); diff --git a/src/main.zig b/src/main.zig index d3663c3f..accfb60f 100644 --- a/src/main.zig +++ b/src/main.zig @@ -7,6 +7,7 @@ pub const Function = @import("store/function.zig").Function; pub const Global = @import("store/global.zig").Global; pub const Memory = @import("store/memory.zig").Memory; pub const ValType = @import("valtype.zig").ValType; +pub const wasi = @import("wasi/wasi.zig"); test { _ = @import("module/validator.zig"); diff --git a/src/wasi/wasi.zig b/src/wasi/wasi.zig new file mode 100644 index 00000000..d7a4ccff --- /dev/null +++ b/src/wasi/wasi.zig @@ -0,0 +1,413 @@ +const std = @import("std"); +const mem = std.mem; +const fs = std.fs; +const os = std.os; +const math = std.math; +const wasi = std.os.wasi; + +const VirtualMachine = @import("../instance/vm.zig").VirtualMachine; +const WasmError = @import("../instance/vm.zig").WasmError; + +pub fn args_get(vm: *VirtualMachine) WasmError!void { + const argv_buf_ptr = vm.popOperand(u32); + const argv_ptr = vm.popOperand(u32); + + const memory = try vm.inst.getMemory(0); + const data = memory.memory(); + + var argv_buf_i: usize = 0; + for (vm.wasi_args.items, 0..) |arg, i| { + const argv_i_ptr = argv_buf_ptr + argv_buf_i; + const arg_len = arg.len + 1; + + mem.copy(u8, data[argv_i_ptr .. argv_i_ptr + arg_len], arg[0..arg_len]); + argv_buf_i += arg_len; + + try memory.write(u32, argv_ptr, 4 * @as(u32, @intCast(i)), @as(u32, @intCast(argv_i_ptr))); + } + + try vm.pushOperand(u64, @intFromEnum(wasi.errno_t.SUCCESS)); +} + +pub fn args_sizes_get(vm: *VirtualMachine) WasmError!void { + const argv_buf_size_ptr = vm.popOperand(u32); + const argc_ptr = vm.popOperand(u32); + + const memory = try vm.inst.getMemory(0); + + const argc = vm.wasi_args.items.len; + try memory.write(u32, argc_ptr, 0, @as(u32, @intCast(argc))); + + var buf_size: usize = 0; + for (vm.wasi_args.items) |arg| { + buf_size += arg.len + 1; + } + try memory.write(u32, argv_buf_size_ptr, 0, @as(u32, @intCast(buf_size))); + + try vm.pushOperand(u64, @intFromEnum(wasi.errno_t.SUCCESS)); +} + +pub fn clock_time_get(vm: *VirtualMachine) WasmError!void { + const timestamp_ptr = vm.popOperand(u32); + const precision = vm.popOperand(i64); // FIXME: we should probably be using this + _ = precision; + const clock_id = vm.popOperand(i32); // FIXME: we should probably be using this + _ = clock_id; + + const memory = try vm.inst.getMemory(0); + + const timestamp = toWasiTimestamp(std.time.nanoTimestamp()); + + try memory.write(u64, timestamp_ptr, 0, timestamp); + + try vm.pushOperand(u64, @intFromEnum(wasi.errno_t.SUCCESS)); +} + +pub fn fd_close(vm: *VirtualMachine) WasmError!void { + const fd = vm.popOperand(i32); + + const host_fd = vm.getHostFd(fd); + os.close(host_fd); + + try vm.pushOperand(u64, @intFromEnum(wasi.errno_t.SUCCESS)); +} + +pub fn fd_fdstat_get(vm: *VirtualMachine) WasmError!void { + const stat_ptr = vm.popOperand(u32); + const fd = vm.popOperand(i32); + + const memory = try vm.inst.getMemory(0); + + const host_fd = vm.getHostFd(fd); + const file = fs.File{ .handle = host_fd }; + const stat = file.stat() catch |err| { + try vm.pushOperand(u64, @intFromEnum(toWasiError(err))); + return; + }; + + try memory.write(u16, stat_ptr, 0x00, @intFromEnum(toWasiFileType(stat.kind))); + try memory.write(u16, stat_ptr, 0x02, 0); + try memory.write(u64, stat_ptr, 0x08, math.maxInt(u64)); + try memory.write(u64, stat_ptr, 0x10, math.maxInt(u64)); + + try vm.pushOperand(u64, @intFromEnum(wasi.errno_t.SUCCESS)); +} + +// FIXME: implement +pub fn fd_fdstat_set_flags(vm: *VirtualMachine) WasmError!void { + const param0 = vm.popOperand(i32); + const param1 = vm.popOperand(i32); + std.debug.print("Unimplemented: fd_fdstat_set_flags({}, {})\n", .{ param0, param1 }); + try vm.pushOperand(u64, 0); + @panic("Unimplemented: fd_fdstat_set_flags"); +} + +// FIXME: implement +pub fn fd_filestat_get(vm: *VirtualMachine) WasmError!void { + const param0 = vm.popOperand(i32); + const param1 = vm.popOperand(i32); + std.debug.print("Unimplemented: fd_filestat_get({}, {})\n", .{ param0, param1 }); + try vm.pushOperand(u64, 0); + @panic("Unimplemented: fd_filestat_get"); +} + +pub fn fd_prestat_get(vm: *VirtualMachine) WasmError!void { + const prestat_ptr = vm.popOperand(u32); + const fd = vm.popOperand(i32); + + const memory = try vm.inst.getMemory(0); + + if (vm.lookupWasiPreopen(fd)) |preopen| { + const some_other_ptr = try memory.read(u32, prestat_ptr, 0); + const name_len_ptr = try memory.read(u32, prestat_ptr, 4); + try memory.write(u32, some_other_ptr, 0, 0); + try memory.write(u32, name_len_ptr, 0, @as(u32, @intCast(preopen.name.len))); + + try vm.pushOperand(u64, @intFromEnum(wasi.errno_t.SUCCESS)); + } else { + try vm.pushOperand(u64, @intFromEnum(wasi.errno_t.BADF)); + } +} + +pub fn fd_prestat_dir_name(vm: *VirtualMachine) WasmError!void { + const path_len = vm.popOperand(u32); // FIXME: we should probably be using this + _ = path_len; + const path_ptr = vm.popOperand(u32); + const fd = vm.popOperand(i32); + + const memory = try vm.inst.getMemory(0); + + const preopen = vm.lookupWasiPreopen(fd) orelse return WasmError.Trap; + try memory.copy(path_ptr, preopen.name); + + try vm.pushOperand(u64, @intFromEnum(wasi.errno_t.SUCCESS)); +} + +pub fn fd_read(vm: *VirtualMachine) WasmError!void { + const n_read_ptr = vm.popOperand(u32); + const iovs_len = vm.popOperand(u32); + const iovs_ptr = vm.popOperand(u32); + const fd = vm.popOperand(i32); + + const memory = try vm.inst.getMemory(0); + const data = memory.memory(); + + const host_fd = vm.getHostFd(fd); + + var i: u32 = 0; + var total_read: usize = 0; + while (i < iovs_len) : (i += 1) { + const offset: u32 = i * 8; // Each iov is 8 bytes... + const iov_i_ptr = try memory.read(u32, iovs_ptr, offset); // 4 bytes (u32) for the ith ptr of where to read into + const iov_i_len = try memory.read(u32, iovs_ptr, offset + 4); // 4 bytes (u32) for the length of data to read + + const buf = data[iov_i_ptr .. iov_i_ptr + iov_i_len]; + + // read data from fd into buffer defined by iov + const read = os.read(host_fd, buf) catch |err| { + try vm.pushOperand(u64, @intFromEnum(toWasiError(err))); + return; + }; + + total_read += read; + if (read != buf.len) break; + } + + try memory.write(u32, n_read_ptr, 0, @as(u32, @intCast(total_read))); + + try vm.pushOperand(u64, @intFromEnum(wasi.errno_t.SUCCESS)); +} + +pub fn fd_seek(vm: *VirtualMachine) WasmError!void { + const new_offset_ptr = vm.popOperand(u32); + const relative_to: wasi.whence_t = @enumFromInt(vm.popOperand(i32)); + const offset = vm.popOperand(i64); + const fd = vm.popOperand(i32); + + switch (relative_to) { + wasi.whence_t.CUR => { + os.lseek_CUR(fd, offset) catch |err| { + try vm.pushOperand(u64, @intFromEnum(toWasiError(err))); + return; + }; + }, + wasi.whence_t.END => { + os.lseek_END(fd, offset) catch |err| { + try vm.pushOperand(u64, @intFromEnum(toWasiError(err))); + return; + }; + }, + wasi.whence_t.SET => { + os.lseek_SET(fd, @intCast(offset)) catch |err| { + try vm.pushOperand(u64, @intFromEnum(toWasiError(err))); + return; + }; + }, + } + + const new_offset = os.lseek_CUR_get(fd) catch |err| { + try vm.pushOperand(u64, @intFromEnum(toWasiError(err))); + return; + }; + + const memory = try vm.inst.getMemory(0); + try memory.write(u64, new_offset_ptr, 0, new_offset); + + try vm.pushOperand(u64, @intFromEnum(wasi.errno_t.SUCCESS)); +} + +pub fn fd_write(vm: *VirtualMachine) WasmError!void { + const ret_ptr = vm.popOperand(u32); + const iovs_len = vm.popOperand(u32); + const iovs_ptr = vm.popOperand(u32); + const fd = vm.popOperand(i32); + + const memory = try vm.inst.getMemory(0); + const data = memory.memory(); + + const host_fd = vm.getHostFd(fd); + + var n: usize = 0; + var i: u32 = 0; + while (i < iovs_len) : (i += 1) { + const offset: u32 = i * 8; + const iov_i_ptr = try memory.read(u32, iovs_ptr, offset); + const iov_i_len = try memory.read(u32, iovs_ptr, offset + 4); + + const bytes = data[iov_i_ptr .. iov_i_ptr + iov_i_len]; + + const written = os.write(host_fd, bytes) catch |err| { + try vm.pushOperand(u64, @intFromEnum(toWasiError(err))); + return; + }; + + n += written; + + if (written != bytes.len) break; + } + + try memory.write(u32, ret_ptr, 0, @as(u32, @intCast(n))); + + try vm.pushOperand(u64, @intFromEnum(wasi.errno_t.SUCCESS)); +} + +// FIXME: implement +pub fn path_create_directory(vm: *VirtualMachine) WasmError!void { + const param0 = vm.popOperand(i32); + const param1 = vm.popOperand(i32); + const param2 = vm.popOperand(i32); + std.debug.print("Unimplemented: path_create_directory({}, {}, {})\n", .{ param0, param1, param2 }); + try vm.pushOperand(u64, 0); + @panic("Unimplemented: path_create_directory"); +} + +pub fn path_filestat_get(vm: *VirtualMachine) WasmError!void { + const stat_ptr = vm.popOperand(u32); + const path_len = vm.popOperand(u32); + const path_ptr = vm.popOperand(u32); + const flags = vm.popOperand(u32); // FIXME: we should probably be using this + _ = flags; + const fd = vm.popOperand(i32); + + const memory = try vm.inst.getMemory(0); + const data = memory.memory(); + + const sub_path = data[path_ptr .. path_ptr + path_len]; + + const host_fd = vm.getHostFd(fd); + const dir: fs.Dir = .{ .fd = host_fd }; + const stat = dir.statFile(sub_path) catch |err| { + try vm.pushOperand(u64, @intFromEnum(toWasiError(err))); + return; + }; + + try memory.write(u64, stat_ptr, 0, 0); + try memory.write(u64, stat_ptr, 0x08, stat.inode); + try memory.write(u64, stat_ptr, 0x10, @intFromEnum(toWasiFileType(stat.kind))); + try memory.write(u64, stat_ptr, 0x18, 1); + try memory.write(u64, stat_ptr, 0x20, stat.size); + try memory.write(u64, stat_ptr, 0x28, toWasiTimestamp(stat.atime)); + try memory.write(u64, stat_ptr, 0x30, toWasiTimestamp(stat.mtime)); + try memory.write(u64, stat_ptr, 0x38, toWasiTimestamp(stat.ctime)); + + try vm.pushOperand(u64, @intFromEnum(wasi.errno_t.SUCCESS)); +} + +pub fn path_open(vm: *VirtualMachine) WasmError!void { + const fd_ptr = vm.popOperand(u32); + const fs_flags = vm.popOperand(u32); + const fs_rights_inheriting = vm.popOperand(u64); // FIXME: we should probably be using this + _ = fs_rights_inheriting; + const fs_rights_base = vm.popOperand(u64); + const oflags = vm.popOperand(u32); + const path_len = vm.popOperand(u32); + const path_ptr = vm.popOperand(u32); + const dir_flags = vm.popOperand(u32); // FIXME: we should probably be using this + _ = dir_flags; + const dir_fd = vm.popOperand(i32); + + const memory = try vm.inst.getMemory(0); + const data = memory.memory(); + + const sub_path = data[path_ptr .. path_ptr + path_len]; + + const host_fd = vm.getHostFd(dir_fd); + + var flags: u32 = @as(u32, if (oflags & wasi.O.CREAT != 0) os.O.CREAT else 0) | + @as(u32, if (oflags & wasi.O.DIRECTORY != 0) os.O.DIRECTORY else 0) | + @as(u32, if (oflags & wasi.O.EXCL != 0) os.O.EXCL else 0) | + @as(u32, if (oflags & wasi.O.TRUNC != 0) os.O.TRUNC else 0) | + @as(u32, if (fs_flags & wasi.FDFLAG.APPEND != 0) os.O.APPEND else 0) | + @as(u32, if (fs_flags & wasi.FDFLAG.DSYNC != 0) os.O.DSYNC else 0) | + @as(u32, if (fs_flags & wasi.FDFLAG.NONBLOCK != 0) os.O.NONBLOCK else 0) | + @as(u32, if (fs_flags & wasi.FDFLAG.SYNC != 0) os.O.SYNC else 0); + + if ((fs_rights_base & wasi.RIGHT.FD_READ != 0) and + (fs_rights_base & wasi.RIGHT.FD_WRITE != 0)) + { + flags |= os.O.RDWR; + } else if (fs_rights_base & wasi.RIGHT.FD_WRITE != 0) { + flags |= os.O.WRONLY; + } else if (fs_rights_base & wasi.RIGHT.FD_READ != 0) { + flags |= os.O.RDONLY; + } + + const mode = 0o644; + const opened_fd = os.openat(host_fd, sub_path, flags, mode) catch |err| { + try vm.pushOperand(u64, @intFromEnum(toWasiError(err))); + return; + }; + + try memory.write(i32, fd_ptr, 0, opened_fd); + + try vm.pushOperand(u64, @intFromEnum(wasi.errno_t.SUCCESS)); +} + +// FIXME: implement +pub fn poll_oneoff(vm: *VirtualMachine) WasmError!void { + const param0 = vm.popOperand(i32); + const param1 = vm.popOperand(i32); + const param2 = vm.popOperand(i32); + const param3 = vm.popOperand(i32); + std.debug.print("Unimplemented: poll_oneoff({}, {}, {}, {})\n", .{ param0, param1, param2, param3 }); + try vm.pushOperand(u64, 0); + @panic("Unimplemented: poll_oneoff"); +} + +// FIXME: implement +pub fn proc_exit(vm: *VirtualMachine) WasmError!void { + const param0 = vm.popOperand(i32); + std.debug.print("Unimplemented: proc_exit({})\n", .{param0}); + @panic("Unimplemented: proc_exit"); +} + +pub fn random_get(vm: *VirtualMachine) WasmError!void { + const buf_len = vm.popOperand(u32); + const buf_ptr = vm.popOperand(u32); + + const memory = try vm.inst.getMemory(0); + const data = memory.memory(); + + std.crypto.random.bytes(data[buf_ptr .. buf_ptr + buf_len]); + + try vm.pushOperand(u64, @intFromEnum(wasi.errno_t.SUCCESS)); +} + +fn toWasiError(err: anyerror) wasi.errno_t { + return switch (err) { + error.AccessDenied => .ACCES, + error.DiskQuota => .DQUOT, + error.InputOutput => .IO, + error.FileTooBig => .FBIG, + error.NoSpaceLeft => .NOSPC, + error.BrokenPipe => .PIPE, + error.NotOpenForWriting => .BADF, + error.SystemResources => .NOMEM, + error.FileNotFound => .NOENT, + error.PathAlreadyExists => .EXIST, + error.IsDir => .ISDIR, + else => std.debug.panic("unexpected error: {s}", .{@errorName(err)}), + }; +} + +fn toWasiFileType(kind: fs.File.Kind) wasi.filetype_t { + return switch (kind) { + .block_device => .BLOCK_DEVICE, + .character_device => .CHARACTER_DEVICE, + .directory => .DIRECTORY, + .sym_link => .SYMBOLIC_LINK, + .file => .REGULAR_FILE, + .unknown => .UNKNOWN, + + .named_pipe, + .unix_domain_socket, + .whiteout, + .door, + .event_port, + => .UNKNOWN, + }; +} + +fn toWasiTimestamp(ns: i128) u64 { + return @as(u64, @intCast(ns)); +} diff --git a/tools/zware-gen/src/zware_gen.zig b/tools/zware-gen/src/zware_gen.zig index ea172b85..9218e3d1 100644 --- a/tools/zware-gen/src/zware_gen.zig +++ b/tools/zware-gen/src/zware_gen.zig @@ -79,8 +79,12 @@ pub fn main() !void { try stdout.print("\t_ = vm;\n", .{}); } - for (function_type.params, 0..) |param, i| { - try stdout.print("\tconst param{} = vm.popOperand({s});\n", .{ i, zigType(param) }); + // Insert pops. Note our first argument to a function is the _last_ argument that will be popped off + // the stack, so we pop the last argument first which is why this is working backwards through params. + for (function_type.params, 0..) |_, i| { + const j = function_type.params.len - 1 - i; + const param = function_type.params[j]; + try stdout.print("\tconst param{} = vm.popOperand({s});\n", .{ j, zigType(param) }); } try stdout.print("\tstd.debug.print(\"Unimplemented: {s}(", .{function_import.name});