Big rework of the parser!
It now follows a more functional style but it should be waaay easier to add functionality. Probably the parser is a bit slower than the previous one but the code is much cleaner and a good enough compiler should be able to inline the function calls and make it par with the previous one. As a TODO, runtime structs should not depends on the parser, but I think that is a topic for another commit.
This commit is contained in:
parent
00d695e5f0
commit
b7854d7325
4 changed files with 605 additions and 362 deletions
19
src/main.zig
19
src/main.zig
|
|
@ -5,7 +5,7 @@ const window = @import("rendering/window.zig");
|
|||
const config = @import("config");
|
||||
const Renderer = @import("rendering/renderer_vulkan.zig");
|
||||
const math = @import("math.zig");
|
||||
const Parser = @import("mods/parse.zig");
|
||||
const Parser = @import("mods/Parser.zig");
|
||||
const vm = @import("mods/vm.zig");
|
||||
const wasm = @import("mods/wasm.zig");
|
||||
const components = @import("ecs/components.zig");
|
||||
|
|
@ -28,10 +28,19 @@ pub fn main() !void {
|
|||
//defer global_runtime.deinit();
|
||||
//try global_runtime.addFunction("debug", wasm.debug);
|
||||
|
||||
//const file = try std.fs.cwd().openFile("assets/core.wasm", .{});
|
||||
//const module = try Parser.parseWasm(allocator, file.reader());
|
||||
//var runtime = try vm.Runtime.init(allocator, module, &global_runtime);
|
||||
//defer runtime.deinit(allocator);
|
||||
// const file = try std.fs.cwd().openFile("assets/core.wasm", .{});
|
||||
// const all = try file.readToEndAlloc(allocator, 1_000_000); // 1 MB
|
||||
// var parser = Parser{
|
||||
// .bytes = all,
|
||||
// .byte_idx = 0,
|
||||
// .allocator = allocator,
|
||||
// };
|
||||
// const module = parser.parseModule() catch |err| {
|
||||
// std.debug.print("[ERROR]: error at byte {x}(0x{x})\n", .{ parser.byte_idx, parser.bytes[parser.byte_idx] });
|
||||
// return err;
|
||||
// };
|
||||
// var runtime = try vm.Runtime.init(allocator, module, &global_runtime);
|
||||
// defer runtime.deinit(allocator);
|
||||
|
||||
//var parameters = [_]usize{};
|
||||
//try runtime.callExternal(allocator, "preinit", ¶meters);
|
||||
|
|
|
|||
486
src/mods/Parser.zig
Normal file
486
src/mods/Parser.zig
Normal file
|
|
@ -0,0 +1,486 @@
|
|||
const std = @import("std");
|
||||
const vm = @import("vm.zig");
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
bytes: []const u8,
|
||||
byte_idx: usize,
|
||||
allocator: Allocator,
|
||||
|
||||
// TODO: We don't really need ArrayLists
|
||||
types: std.ArrayListUnmanaged(Functype) = .{},
|
||||
imports: std.ArrayListUnmanaged(Import) = .{},
|
||||
exports: std.StringHashMapUnmanaged(u32) = .{},
|
||||
functions: std.ArrayListUnmanaged(u32) = .{},
|
||||
memory: ?Memtype = null,
|
||||
code: std.ArrayListUnmanaged(Func) = .{},
|
||||
funcs: std.ArrayListUnmanaged(vm.Func) = .{},
|
||||
|
||||
pub const FunctionType = struct {
|
||||
parameters: []u8,
|
||||
results: []u8,
|
||||
|
||||
pub fn deinit(self: FunctionType, allocator: Allocator) void {
|
||||
allocator.free(self.parameters);
|
||||
allocator.free(self.results);
|
||||
}
|
||||
};
|
||||
|
||||
pub const FunctionBody = struct {
|
||||
locals: []Local,
|
||||
code: []u8,
|
||||
};
|
||||
|
||||
pub const FunctionScope = enum {
|
||||
external,
|
||||
internal,
|
||||
};
|
||||
|
||||
const Parser = @This();
|
||||
|
||||
pub const Error = error{
|
||||
invalid_magic,
|
||||
invalid_version,
|
||||
invalid_section,
|
||||
invalid_functype,
|
||||
invalid_vectype,
|
||||
invalid_numtype,
|
||||
invalid_reftype,
|
||||
invalid_valtype,
|
||||
invalid_string,
|
||||
invalid_limits,
|
||||
invalid_globaltype,
|
||||
invalid_importdesc,
|
||||
invalid_exportdesc,
|
||||
unterminated_wasm,
|
||||
};
|
||||
|
||||
// TODO: This function should not exists
|
||||
fn warn(self: Parser, s: []const u8) void {
|
||||
std.debug.print("[WARN]: Parsing of {s} unimplemented at byte index {d}\n", .{ s, self.byte_idx });
|
||||
}
|
||||
|
||||
// TODO: remove peek
|
||||
fn peek(self: Parser) ?u8 {
|
||||
return if (self.byte_idx < self.bytes.len) self.bytes[self.byte_idx] else null;
|
||||
}
|
||||
|
||||
fn read(self: *Parser, n: usize) ![]const u8 {
|
||||
if (self.byte_idx + n > self.bytes.len) return Error.unterminated_wasm;
|
||||
defer self.byte_idx += n;
|
||||
return self.bytes[self.byte_idx .. self.byte_idx + n];
|
||||
}
|
||||
|
||||
// ==========
|
||||
// = VALUES =
|
||||
// ==========
|
||||
|
||||
pub fn readByte(self: *Parser) !u8 {
|
||||
return (try self.read(1))[0];
|
||||
}
|
||||
|
||||
fn readU32(self: *Parser) !u32 {
|
||||
return std.leb.readUleb128(u32, self);
|
||||
}
|
||||
|
||||
fn readName(self: *Parser) ![]const u8 {
|
||||
// NOTE: This should be the only vector not parsed through parseVector
|
||||
const size = try self.readU32();
|
||||
const str = try self.allocator.alloc(u8, size);
|
||||
@memcpy(str, try self.read(size));
|
||||
if (!std.unicode.utf8ValidateSlice(str)) return Error.invalid_string;
|
||||
return str;
|
||||
}
|
||||
|
||||
// =========
|
||||
// = TYPES =
|
||||
// =========
|
||||
// NOTE: This should return a value
|
||||
|
||||
fn VectorFnResult(parse_fn: anytype) type {
|
||||
const type_info = @typeInfo(@TypeOf(parse_fn));
|
||||
if (type_info != .@"fn") {
|
||||
@compileError("cannot determine return type of " ++ @typeName(@TypeOf(parse_fn)));
|
||||
}
|
||||
const ret_type = type_info.@"fn".return_type.?;
|
||||
const ret_type_info = @typeInfo(ret_type);
|
||||
return switch (ret_type_info) {
|
||||
.error_union => ret_type_info.error_union.payload,
|
||||
else => ret_type,
|
||||
};
|
||||
}
|
||||
fn parseVector(self: *Parser, parse_fn: anytype) ![]VectorFnResult(parse_fn) {
|
||||
const n = try self.readU32();
|
||||
const ret = try self.allocator.alloc(VectorFnResult(parse_fn), n);
|
||||
for (ret) |*i| {
|
||||
i.* = try parse_fn(self);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
fn parseNumtype(self: *Parser) !std.wasm.Valtype {
|
||||
return switch (try self.readByte()) {
|
||||
0x7F => .i32,
|
||||
0x7E => .i32,
|
||||
0x7D => .f32,
|
||||
0x7C => .f64,
|
||||
else => Error.invalid_numtype,
|
||||
};
|
||||
}
|
||||
|
||||
fn parseVectype(self: *Parser) !std.wasm.Valtype {
|
||||
return switch (try self.readByte()) {
|
||||
0x7B => .v128,
|
||||
else => Error.invalid_vectype,
|
||||
};
|
||||
}
|
||||
|
||||
fn parseReftype(self: *Parser) !std.wasm.RefType {
|
||||
return switch (try self.readByte()) {
|
||||
0x70 => .funcref,
|
||||
0x6F => .externref,
|
||||
else => Error.invalid_reftype,
|
||||
};
|
||||
}
|
||||
|
||||
// NOTE: Parsing of Valtype can be improved but it makes it less close to spec so...
|
||||
// TODO: Do we really need Valtype?
|
||||
const Valtype = union(enum) {
|
||||
val: std.wasm.Valtype,
|
||||
ref: std.wasm.RefType,
|
||||
};
|
||||
fn parseValtype(self: *Parser) !Valtype {
|
||||
const pb = self.peek() orelse return Error.unterminated_wasm;
|
||||
return switch (pb) {
|
||||
0x7F, 0x7E, 0x7D, 0x7C => .{ .val = try self.parseNumtype() },
|
||||
0x7B => .{ .val = try self.parseVectype() },
|
||||
0x70, 0x6F => .{ .ref = try self.parseReftype() },
|
||||
else => Error.invalid_valtype,
|
||||
};
|
||||
}
|
||||
|
||||
fn parseResultType(self: *Parser) ![]Valtype {
|
||||
return try self.parseVector(Parser.parseValtype);
|
||||
}
|
||||
|
||||
pub const Functype = struct {
|
||||
parameters: []Valtype,
|
||||
rt2: []Valtype,
|
||||
|
||||
pub fn deinit(self: Functype, allocator: Allocator) void {
|
||||
allocator.free(self.parameters);
|
||||
allocator.free(self.rt2);
|
||||
}
|
||||
};
|
||||
fn parseFunctype(self: *Parser) !Functype {
|
||||
if (try self.readByte() != 0x60) return Error.invalid_functype;
|
||||
return .{
|
||||
.parameters = try self.parseResultType(),
|
||||
.rt2 = try self.parseResultType(),
|
||||
};
|
||||
}
|
||||
|
||||
const Limits = struct {
|
||||
min: u32,
|
||||
max: ?u32,
|
||||
};
|
||||
|
||||
fn parseLimits(self: *Parser) !Limits {
|
||||
return switch (try self.readByte()) {
|
||||
0x00 => .{
|
||||
.min = try self.readU32(),
|
||||
.max = null,
|
||||
},
|
||||
0x01 => .{
|
||||
.min = try self.readU32(),
|
||||
.max = try self.readU32(),
|
||||
},
|
||||
else => Error.invalid_limits,
|
||||
};
|
||||
}
|
||||
|
||||
const Memtype = struct {
|
||||
lim: Limits,
|
||||
};
|
||||
fn parseMemtype(self: *Parser) !Memtype {
|
||||
return .{ .lim = try self.parseLimits() };
|
||||
}
|
||||
|
||||
const Tabletype = struct {
|
||||
et: std.wasm.RefType,
|
||||
lim: Limits,
|
||||
};
|
||||
fn parseTabletype(self: *Parser) !Tabletype {
|
||||
return .{
|
||||
.et = try self.parseReftype(),
|
||||
.lim = try self.parseLimits(),
|
||||
};
|
||||
}
|
||||
|
||||
const Globaltype = struct {
|
||||
t: Valtype,
|
||||
m: enum {
|
||||
@"const",
|
||||
@"var",
|
||||
},
|
||||
};
|
||||
fn parseGlobaltype(self: *Parser) !Globaltype {
|
||||
return .{
|
||||
.t = try self.parseValtype(),
|
||||
.m = switch (try self.readByte()) {
|
||||
0x00 => .@"const",
|
||||
0x01 => .@"var",
|
||||
else => return Error.invalid_globaltype,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// ===========
|
||||
// = MODULES =
|
||||
// ===========
|
||||
// NOTE: This should not return anything but modify IR
|
||||
|
||||
pub fn parseModule(self: *Parser) !vm.Module {
|
||||
if (!std.mem.eql(u8, try self.read(4), &.{ 0x00, 0x61, 0x73, 0x6d })) return Error.invalid_magic;
|
||||
if (!std.mem.eql(u8, try self.read(4), &.{ 0x01, 0x00, 0x00, 0x00 })) return Error.invalid_version;
|
||||
// TODO: Ensure only one section of each type (except for custom section), some code depends on it
|
||||
while (self.byte_idx < self.bytes.len) {
|
||||
try switch (try self.readByte()) {
|
||||
0 => self.parseCustomsec(),
|
||||
1 => self.parseTypesec(),
|
||||
2 => self.parseImportsec(),
|
||||
3 => self.parseFuncsec(),
|
||||
4 => self.parseTablesec(),
|
||||
5 => self.parseMemsec(),
|
||||
6 => self.parseGlobalsec(),
|
||||
7 => self.parseExportsec(),
|
||||
8 => self.parseStartsec(),
|
||||
9 => self.parseElemsec(),
|
||||
10 => self.parseCodesec(),
|
||||
11 => self.parseDatasec(),
|
||||
12 => self.parseDatacountsec(),
|
||||
else => return Error.invalid_section,
|
||||
};
|
||||
}
|
||||
|
||||
return .{
|
||||
.memory = .{
|
||||
.min = self.memory.?.lim.min,
|
||||
.max = self.memory.?.lim.max,
|
||||
},
|
||||
.exports = self.exports,
|
||||
.funcs = try self.funcs.toOwnedSlice(self.allocator),
|
||||
.types = try self.types.toOwnedSlice(self.allocator),
|
||||
.functions = try self.functions.toOwnedSlice(self.allocator),
|
||||
.imports = try self.imports.toOwnedSlice(self.allocator),
|
||||
.code = try self.code.toOwnedSlice(self.allocator),
|
||||
};
|
||||
}
|
||||
|
||||
fn parseCustomsec(self: *Parser) !void {
|
||||
self.warn("customsec");
|
||||
const size = try self.readU32();
|
||||
_ = try self.read(size);
|
||||
}
|
||||
|
||||
fn parseTypesec(self: *Parser) !void {
|
||||
const size = try self.readU32();
|
||||
const end_idx = self.byte_idx + size;
|
||||
|
||||
const ft = try self.parseVector(Parser.parseFunctype);
|
||||
// TODO: Maybe the interface should be better?
|
||||
try self.types.appendSlice(self.allocator, ft);
|
||||
|
||||
// TODO: run this check not only on debug
|
||||
std.debug.assert(self.byte_idx == end_idx);
|
||||
}
|
||||
|
||||
pub const Import = struct {
|
||||
name: []const u8,
|
||||
module: []const u8,
|
||||
importdesc: union { func: u32, table: Tabletype, mem: Memtype, global: Globaltype },
|
||||
pub fn deinit(self: Import, allocator: Allocator) void {
|
||||
allocator.free(self.name);
|
||||
allocator.free(self.module);
|
||||
}
|
||||
};
|
||||
fn parseImport(self: *Parser) !Import {
|
||||
return .{
|
||||
.name = try self.readName(),
|
||||
.module = try self.readName(),
|
||||
.importdesc = switch (try self.readByte()) {
|
||||
0x00 => .{ .func = try self.readU32() },
|
||||
0x01 => .{ .table = try self.parseTabletype() },
|
||||
0x02 => .{ .mem = try self.parseMemtype() },
|
||||
0x03 => .{ .global = try self.parseGlobaltype() },
|
||||
else => return Error.invalid_importdesc,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
fn parseImportsec(self: *Parser) !void {
|
||||
const size = try self.readU32();
|
||||
const end_idx = self.byte_idx + size;
|
||||
|
||||
const imports = try self.parseVector(Parser.parseImport);
|
||||
try self.imports.appendSlice(self.allocator, imports);
|
||||
|
||||
// TODO: run this check not only on debug
|
||||
std.debug.assert(self.byte_idx == end_idx);
|
||||
}
|
||||
|
||||
fn parseFuncsec(self: *Parser) !void {
|
||||
const size = try self.readU32();
|
||||
const end_idx = self.byte_idx + size;
|
||||
|
||||
const types = try self.parseVector(Parser.readU32);
|
||||
try self.functions.appendSlice(self.allocator, types);
|
||||
|
||||
// TODO: run this check not only on debug
|
||||
std.debug.assert(self.byte_idx == end_idx);
|
||||
}
|
||||
|
||||
fn parseTablesec(self: *Parser) !void {
|
||||
self.warn("tablesec");
|
||||
const size = try self.readU32();
|
||||
_ = try self.read(size);
|
||||
}
|
||||
|
||||
fn parseMemsec(self: *Parser) !void {
|
||||
const size = try self.readU32();
|
||||
const end_idx = self.byte_idx + size;
|
||||
|
||||
const mems = try self.parseVector(Parser.parseMemtype);
|
||||
if (mems.len == 0) {
|
||||
// WTF?
|
||||
} else if (mems.len == 1) {
|
||||
self.memory = mems[0];
|
||||
} else {
|
||||
std.debug.print("[WARN]: Parsing more than one memory is not yet supported\n", .{});
|
||||
}
|
||||
|
||||
// TODO: run this check not only on debug
|
||||
std.debug.assert(self.byte_idx == end_idx);
|
||||
}
|
||||
|
||||
fn parseGlobalsec(self: *Parser) !void {
|
||||
self.warn("globalsec");
|
||||
const size = try self.readU32();
|
||||
_ = try self.read(size);
|
||||
}
|
||||
|
||||
pub const Export = struct {
|
||||
name: []const u8,
|
||||
exportdesc: union(enum) { func: u32, table: u32, mem: u32, global: u32 },
|
||||
pub fn deinit(self: Import, allocator: Allocator) void {
|
||||
allocator.free(self.name);
|
||||
}
|
||||
};
|
||||
|
||||
fn parseExport(self: *Parser) !Export {
|
||||
return .{
|
||||
.name = try self.readName(),
|
||||
.exportdesc = switch (try self.readByte()) {
|
||||
0x00 => .{ .func = try self.readU32() },
|
||||
0x01 => .{ .table = try self.readU32() },
|
||||
0x02 => .{ .mem = try self.readU32() },
|
||||
0x03 => .{ .global = try self.readU32() },
|
||||
else => return Error.invalid_exportdesc,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
fn parseExportsec(self: *Parser) !void {
|
||||
const size = try self.readU32();
|
||||
const end_idx = self.byte_idx + size;
|
||||
|
||||
const exports = try self.parseVector(Parser.parseExport);
|
||||
for (exports) |e| {
|
||||
switch (e.exportdesc) {
|
||||
.func => try self.exports.put(self.allocator, e.name, e.exportdesc.func),
|
||||
else => std.debug.print("[WARN]: export ignored\n", .{}),
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: run this check not only on debug
|
||||
std.debug.assert(self.byte_idx == end_idx);
|
||||
}
|
||||
|
||||
fn parseStartsec(self: *Parser) !void {
|
||||
self.warn("startsec");
|
||||
const size = try self.readU32();
|
||||
_ = try self.read(size);
|
||||
}
|
||||
|
||||
fn parseElemsec(self: *Parser) !void {
|
||||
self.warn("elemsec");
|
||||
const size = try self.readU32();
|
||||
_ = try self.read(size);
|
||||
}
|
||||
|
||||
pub const Func = struct {
|
||||
locals: []Valtype,
|
||||
code: []const u8,
|
||||
};
|
||||
const Local = struct {
|
||||
n: u32,
|
||||
t: Valtype,
|
||||
};
|
||||
fn parseLocal(self: *Parser) !Local {
|
||||
return .{
|
||||
.n = try self.readU32(),
|
||||
.t = try self.parseValtype(),
|
||||
};
|
||||
}
|
||||
|
||||
fn parseCode(self: *Parser) !Func {
|
||||
const size = try self.readU32();
|
||||
const end_idx = self.byte_idx + size;
|
||||
|
||||
const locals = try self.parseVector(Parser.parseLocal);
|
||||
var local_count: usize = 0;
|
||||
for (locals) |l| {
|
||||
local_count += l.n;
|
||||
}
|
||||
|
||||
const func = Func{
|
||||
.locals = try self.allocator.alloc(Valtype, local_count),
|
||||
.code = try self.read(end_idx - self.byte_idx),
|
||||
};
|
||||
|
||||
var li: usize = 0;
|
||||
for (locals) |l| {
|
||||
@memset(func.locals[li .. li + l.n], l.t);
|
||||
li += l.n;
|
||||
}
|
||||
|
||||
// TODO: run this check not only on debug
|
||||
std.debug.assert(self.byte_idx == end_idx);
|
||||
|
||||
return func;
|
||||
}
|
||||
|
||||
fn parseCodesec(self: *Parser) !void {
|
||||
const size = try self.readU32();
|
||||
const end_idx = self.byte_idx + size;
|
||||
|
||||
const codes = try self.parseVector(Parser.parseCode);
|
||||
for (codes, 0..) |_, i| {
|
||||
try self.funcs.append(self.allocator, .{ .internal = @intCast(i) });
|
||||
}
|
||||
try self.code.appendSlice(self.allocator, codes);
|
||||
|
||||
// TODO: run this check not only on debug
|
||||
std.debug.assert(self.byte_idx == end_idx);
|
||||
}
|
||||
|
||||
fn parseDatasec(self: *Parser) !void {
|
||||
self.warn("datasec");
|
||||
const size = try self.readU32();
|
||||
_ = try self.read(size);
|
||||
}
|
||||
|
||||
fn parseDatacountsec(self: *Parser) !void {
|
||||
self.warn("datacountsec");
|
||||
const size = try self.readU32();
|
||||
_ = try self.read(size);
|
||||
}
|
||||
|
|
@ -1,334 +0,0 @@
|
|||
const std = @import("std");
|
||||
const wasm = @import("wasm.zig");
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
pub fn leb128Result(T: type) type {
|
||||
return struct { len: usize, val: T };
|
||||
}
|
||||
|
||||
pub fn leb128Decode(comptime T: type, stream: anytype) !leb128Result(T) {
|
||||
switch (@typeInfo(T)) {
|
||||
.int => {},
|
||||
else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)),
|
||||
}
|
||||
if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) {
|
||||
@compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits}));
|
||||
}
|
||||
|
||||
var result: T = 0;
|
||||
// TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day...
|
||||
var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0;
|
||||
var byte: u8 = undefined;
|
||||
var len: usize = 0;
|
||||
while (stream.readByte()) |b| {
|
||||
len += 1;
|
||||
result |= @as(T, @intCast((b & 0x7f))) << shift;
|
||||
if ((b & (0x1 << 7)) == 0) {
|
||||
byte = b;
|
||||
break;
|
||||
}
|
||||
shift += 7;
|
||||
} else |err| {
|
||||
return err;
|
||||
}
|
||||
|
||||
if (@typeInfo(T).int.signedness == .signed) {
|
||||
const size = @sizeOf(T) * 8;
|
||||
if (shift < size and (byte & 0x40) != 0) {
|
||||
result |= (~@as(T, 0) << shift);
|
||||
}
|
||||
}
|
||||
|
||||
return .{ .len = len, .val = result };
|
||||
}
|
||||
|
||||
pub const Error = error{
|
||||
malformed_wasm,
|
||||
invalid_utf8,
|
||||
};
|
||||
|
||||
pub const Module = struct {
|
||||
types: []FunctionType,
|
||||
imports: std.ArrayList(Import),
|
||||
exports: std.StringHashMap(u32),
|
||||
functions: []u32,
|
||||
memory: Memory,
|
||||
code: []FunctionBody,
|
||||
funcs: std.ArrayList(Function),
|
||||
|
||||
pub fn deinit(self: *Module, allocator: Allocator) void {
|
||||
for (self.types) |t| {
|
||||
t.deinit(allocator);
|
||||
}
|
||||
allocator.free(self.types);
|
||||
|
||||
for (self.imports.items) |i| {
|
||||
i.deinit(allocator);
|
||||
}
|
||||
self.imports.deinit();
|
||||
|
||||
var iter = self.exports.iterator();
|
||||
while (iter.next()) |entry| {
|
||||
allocator.free(entry.key_ptr.*);
|
||||
}
|
||||
self.exports.deinit();
|
||||
|
||||
allocator.free(self.functions);
|
||||
|
||||
for (self.code) |f| {
|
||||
for (f.locals) |l| {
|
||||
allocator.free(l.types);
|
||||
}
|
||||
allocator.free(f.code);
|
||||
}
|
||||
allocator.free(self.code);
|
||||
|
||||
self.funcs.deinit();
|
||||
}
|
||||
};
|
||||
|
||||
pub const FunctionScope = enum {
|
||||
external,
|
||||
internal,
|
||||
};
|
||||
|
||||
pub const Function = union(FunctionScope) {
|
||||
external: u8,
|
||||
internal: u8,
|
||||
};
|
||||
|
||||
// TODO: refactor locals
|
||||
pub const Local = struct {
|
||||
types: []u8,
|
||||
};
|
||||
|
||||
pub const FunctionBody = struct {
|
||||
locals: []Local,
|
||||
code: []u8,
|
||||
};
|
||||
|
||||
pub const Memory = struct {
|
||||
initial: u32,
|
||||
max: u32,
|
||||
};
|
||||
|
||||
pub const FunctionType = struct {
|
||||
parameters: []u8,
|
||||
results: []u8,
|
||||
|
||||
pub fn deinit(self: FunctionType, allocator: Allocator) void {
|
||||
allocator.free(self.parameters);
|
||||
allocator.free(self.results);
|
||||
}
|
||||
};
|
||||
|
||||
pub const Import = struct {
|
||||
name: []u8,
|
||||
module: []u8,
|
||||
signature: u32,
|
||||
|
||||
pub fn deinit(self: Import, allocator: Allocator) void {
|
||||
allocator.free(self.name);
|
||||
allocator.free(self.module);
|
||||
}
|
||||
};
|
||||
|
||||
pub fn parseType(t: u8) wasm.Type {
|
||||
return @enumFromInt(t);
|
||||
}
|
||||
|
||||
pub fn parseName(allocator: Allocator, stream: anytype) ![]u8 {
|
||||
const size = try std.leb.readULEB128(u32, stream);
|
||||
const str = try allocator.alloc(u8, size);
|
||||
if (try stream.read(str) != size) {
|
||||
// TODO: better error
|
||||
return Error.malformed_wasm;
|
||||
}
|
||||
|
||||
if (!std.unicode.utf8ValidateSlice(str)) return Error.invalid_utf8;
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
// TODO: parse Global Section
|
||||
// TODO: Consider Arena allocator
|
||||
pub fn parseWasm(allocator: Allocator, stream: anytype) !Module {
|
||||
var types: []FunctionType = undefined;
|
||||
var imports = std.ArrayList(Import).init(allocator);
|
||||
var exports = std.StringHashMap(u32).init(allocator);
|
||||
var funcs = std.ArrayList(Function).init(allocator);
|
||||
var functions: []u32 = undefined;
|
||||
var memory: Memory = undefined;
|
||||
var code: []FunctionBody = undefined;
|
||||
|
||||
// Parse magic
|
||||
if (!(try stream.isBytes(&[_]u8{ 0x00, 0x61, 0x73, 0x6d }))) return Error.malformed_wasm;
|
||||
// Parse version
|
||||
if (!(try stream.isBytes(&[_]u8{ 0x01, 0x00, 0x00, 0x00 }))) return Error.malformed_wasm;
|
||||
|
||||
// NOTE: This ensures that (in this block) illegal behavior is safety-checked.
|
||||
// This slows down the code but since this function is only called at the start
|
||||
// I believe it is better to take the ``hit'' in performance (should only be @enumFromInt)
|
||||
// rather than having undefined behavior when user provides an invalid wasm file.
|
||||
@setRuntimeSafety(true);
|
||||
loop: while (stream.readByte()) |byte| {
|
||||
const section_size = try std.leb.readULEB128(u32, stream);
|
||||
switch (@as(std.wasm.Section, @enumFromInt(byte))) {
|
||||
std.wasm.Section.custom => {
|
||||
// TODO: unimplemented
|
||||
break :loop;
|
||||
},
|
||||
std.wasm.Section.type => {
|
||||
const type_count = try std.leb.readULEB128(u32, stream);
|
||||
types = try allocator.alloc(FunctionType, type_count);
|
||||
for (types) |*t| {
|
||||
if (!(try stream.isBytes(&.{0x60}))) return Error.malformed_wasm;
|
||||
const params_count = try std.leb.readULEB128(u32, stream);
|
||||
t.parameters = try allocator.alloc(u8, params_count);
|
||||
if (try stream.read(t.parameters) != params_count) {
|
||||
// TODO: better errors
|
||||
return Error.malformed_wasm;
|
||||
}
|
||||
const results = try std.leb.readULEB128(u32, stream);
|
||||
t.results = try allocator.alloc(u8, results);
|
||||
if (try stream.read(t.results) != results) {
|
||||
// TODO: better errors
|
||||
return Error.malformed_wasm;
|
||||
}
|
||||
}
|
||||
},
|
||||
std.wasm.Section.import => {
|
||||
// Can there be more than one import section?
|
||||
const import_count = try std.leb.readULEB128(u32, stream);
|
||||
for (0..import_count) |i| {
|
||||
const mod = try parseName(allocator, stream);
|
||||
const nm = try parseName(allocator, stream);
|
||||
|
||||
const b = try stream.readByte();
|
||||
switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) {
|
||||
std.wasm.ExternalKind.function => {
|
||||
try funcs.append(.{ .external = @intCast(i) });
|
||||
|
||||
const idx = try std.leb.readULEB128(u32, stream);
|
||||
try imports.append(.{
|
||||
.module = mod,
|
||||
.name = nm,
|
||||
.signature = idx,
|
||||
});
|
||||
},
|
||||
// TODO: not implemented
|
||||
std.wasm.ExternalKind.table => try stream.skipBytes(3, .{}),
|
||||
std.wasm.ExternalKind.memory => try stream.skipBytes(2, .{}),
|
||||
std.wasm.ExternalKind.global => try stream.skipBytes(2, .{}),
|
||||
}
|
||||
}
|
||||
},
|
||||
std.wasm.Section.function => {
|
||||
const function_count = try std.leb.readULEB128(u32, stream);
|
||||
functions = try allocator.alloc(u32, function_count);
|
||||
for (functions) |*f| {
|
||||
f.* = try std.leb.readULEB128(u32, stream);
|
||||
}
|
||||
},
|
||||
std.wasm.Section.table => {
|
||||
// TODO: not implemented
|
||||
try stream.skipBytes(section_size, .{});
|
||||
},
|
||||
std.wasm.Section.memory => {
|
||||
const memory_count = try std.leb.readULEB128(u32, stream);
|
||||
for (0..memory_count) |_| {
|
||||
const b = try stream.readByte();
|
||||
const n = try std.leb.readULEB128(u32, stream);
|
||||
var m: u32 = 0;
|
||||
switch (b) {
|
||||
0x00 => {},
|
||||
0x01 => m = try std.leb.readULEB128(u32, stream),
|
||||
else => return Error.malformed_wasm,
|
||||
}
|
||||
// TODO: support multiple memories
|
||||
memory = .{
|
||||
.initial = n,
|
||||
.max = m,
|
||||
};
|
||||
}
|
||||
},
|
||||
std.wasm.Section.global => {
|
||||
// TODO: unimplemented
|
||||
try stream.skipBytes(section_size, .{});
|
||||
},
|
||||
// TODO: Can there be more than one export section? Otherwise we can optimize allocations
|
||||
std.wasm.Section.@"export" => {
|
||||
const export_count = try std.leb.readULEB128(u32, stream);
|
||||
for (0..export_count) |_| {
|
||||
const nm = try parseName(allocator, stream);
|
||||
const b = try stream.readByte();
|
||||
const idx = try std.leb.readULEB128(u32, stream);
|
||||
switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) {
|
||||
std.wasm.ExternalKind.function => try exports.put(nm, idx),
|
||||
// TODO: unimplemented,
|
||||
std.wasm.ExternalKind.table => allocator.free(nm),
|
||||
std.wasm.ExternalKind.memory => allocator.free(nm),
|
||||
std.wasm.ExternalKind.global => allocator.free(nm),
|
||||
}
|
||||
}
|
||||
},
|
||||
std.wasm.Section.start => {
|
||||
// TODO: unimplemented
|
||||
try stream.skipBytes(section_size, .{});
|
||||
},
|
||||
std.wasm.Section.element => {
|
||||
// TODO: unimplemented
|
||||
try stream.skipBytes(section_size, .{});
|
||||
},
|
||||
std.wasm.Section.code => {
|
||||
const code_count = try std.leb.readULEB128(u32, stream);
|
||||
code = try allocator.alloc(FunctionBody, code_count);
|
||||
for (0..code_count) |i| {
|
||||
const code_size = try std.leb.readULEB128(u32, stream);
|
||||
var locals_size: usize = 0;
|
||||
const local_count = try leb128Decode(u32, stream);
|
||||
locals_size += local_count.len;
|
||||
const locals = try allocator.alloc(Local, local_count.val);
|
||||
for (locals) |*l| {
|
||||
const n = try leb128Decode(u32, stream);
|
||||
l.types = try allocator.alloc(u8, n.val);
|
||||
@memset(l.types, try stream.readByte());
|
||||
locals_size += n.len + 1;
|
||||
}
|
||||
code[i].locals = locals;
|
||||
|
||||
// TODO: maybe is better to parse code into ast here and not do it every frame?
|
||||
// FIXME: This calculation is plain wrong. Resolving above TODO should help
|
||||
code[i].code = try allocator.alloc(u8, code_size - locals_size);
|
||||
// TODO: better error reporting
|
||||
if (try stream.read(code[i].code) != code_size - locals_size) return Error.malformed_wasm;
|
||||
|
||||
const f = Function{ .internal = @intCast(i) };
|
||||
try funcs.append(f);
|
||||
}
|
||||
},
|
||||
std.wasm.Section.data => {
|
||||
// TODO: unimplemented
|
||||
try stream.skipBytes(section_size, .{});
|
||||
},
|
||||
std.wasm.Section.data_count => {
|
||||
// TODO: unimplemented
|
||||
try stream.skipBytes(section_size, .{});
|
||||
},
|
||||
else => return Error.malformed_wasm,
|
||||
}
|
||||
} else |err| switch (err) {
|
||||
error.EndOfStream => {},
|
||||
else => return err,
|
||||
}
|
||||
|
||||
return Module{
|
||||
.types = types,
|
||||
.imports = imports,
|
||||
.functions = functions,
|
||||
.memory = memory,
|
||||
.exports = exports,
|
||||
.code = code,
|
||||
.funcs = funcs,
|
||||
};
|
||||
}
|
||||
104
src/mods/vm.zig
104
src/mods/vm.zig
|
|
@ -1,13 +1,83 @@
|
|||
const std = @import("std");
|
||||
const wasm = @import("wasm.zig");
|
||||
const Parser = @import("parse.zig");
|
||||
const Parser = @import("Parser.zig");
|
||||
const Allocator = std.mem.Allocator;
|
||||
const AllocationError = error{OutOfMemory};
|
||||
|
||||
fn leb128Decode(comptime T: type, bytes: []u8) Parser.leb128Result(T) {
|
||||
pub const Memory = struct {
|
||||
min: u32,
|
||||
max: ?u32,
|
||||
};
|
||||
// TODO: Resolve function calls at parse time
|
||||
// TODO: Resolve function types at compile time
|
||||
pub const Func = union(enum) {
|
||||
internal: u32,
|
||||
external: u32,
|
||||
};
|
||||
|
||||
pub const Module = struct {
|
||||
memory: Memory,
|
||||
funcs: []Func,
|
||||
exports: std.StringHashMapUnmanaged(u32),
|
||||
imports: []Parser.Import,
|
||||
types: []Parser.Functype,
|
||||
functions: []u32,
|
||||
code: []Parser.Func,
|
||||
|
||||
fn deinit(self: *Module, allocator: Allocator) void {
|
||||
self.exports.deinit(allocator);
|
||||
allocator.free(self.funcs);
|
||||
allocator.free(self.imports);
|
||||
allocator.free(self.types);
|
||||
allocator.free(self.functions);
|
||||
allocator.free(self.code);
|
||||
}
|
||||
};
|
||||
|
||||
pub fn leb128Result(T: type) type {
|
||||
return struct { len: usize, val: T };
|
||||
}
|
||||
|
||||
pub fn leb128Decode_stream(comptime T: type, stream: anytype) !leb128Result(T) {
|
||||
switch (@typeInfo(T)) {
|
||||
.int => {},
|
||||
else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)),
|
||||
}
|
||||
if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) {
|
||||
@compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits}));
|
||||
}
|
||||
|
||||
var result: T = 0;
|
||||
// TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day...
|
||||
var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0;
|
||||
var byte: u8 = undefined;
|
||||
var len: usize = 0;
|
||||
while (stream.readByte()) |b| {
|
||||
len += 1;
|
||||
result |= @as(T, @intCast((b & 0x7f))) << shift;
|
||||
if ((b & (0x1 << 7)) == 0) {
|
||||
byte = b;
|
||||
break;
|
||||
}
|
||||
shift += 7;
|
||||
} else |err| {
|
||||
return err;
|
||||
}
|
||||
|
||||
if (@typeInfo(T).int.signedness == .signed) {
|
||||
const size = @sizeOf(T) * 8;
|
||||
if (shift < size and (byte & 0x40) != 0) {
|
||||
result |= (~@as(T, 0) << shift);
|
||||
}
|
||||
}
|
||||
|
||||
return .{ .len = len, .val = result };
|
||||
}
|
||||
|
||||
fn leb128Decode(comptime T: type, bytes: []const u8) leb128Result(T) {
|
||||
var fbs = std.io.fixedBufferStream(bytes);
|
||||
// TODO: this catch should be unrecheable
|
||||
return Parser.leb128Decode(T, fbs.reader()) catch .{ .len = 0, .val = 0 };
|
||||
return leb128Decode_stream(T, fbs.reader()) catch .{ .len = 0, .val = 0 };
|
||||
}
|
||||
|
||||
pub fn decodeLittleEndian(comptime T: type, bytes: []u8) T {
|
||||
|
|
@ -30,7 +100,7 @@ pub fn encodeLittleEndian(comptime T: type, bytes: *[]u8, value: T) void {
|
|||
|
||||
pub const CallFrame = struct {
|
||||
program_counter: usize,
|
||||
code: []u8,
|
||||
code: []const u8,
|
||||
locals: []Value,
|
||||
};
|
||||
|
||||
|
|
@ -45,15 +115,20 @@ pub const Value = union(ValueType) {
|
|||
};
|
||||
|
||||
pub const Runtime = struct {
|
||||
module: Parser.Module,
|
||||
module: Module,
|
||||
stack: std.ArrayList(Value),
|
||||
call_stack: std.ArrayList(CallFrame),
|
||||
memory: []u8,
|
||||
global_runtime: *wasm.GlobalRuntime,
|
||||
labels: std.ArrayList(usize),
|
||||
|
||||
pub fn init(allocator: Allocator, module: Parser.Module, global_runtime: *wasm.GlobalRuntime) !Runtime {
|
||||
const memory = try allocator.alloc(u8, module.memory.max);
|
||||
pub fn init(allocator: Allocator, module: Module, global_runtime: *wasm.GlobalRuntime) !Runtime {
|
||||
// if memory max is not set the memory is allowed to grow but it is not supported at the moment
|
||||
const max = module.memory.max orelse 1_000;
|
||||
if (module.memory.max == null) {
|
||||
std.debug.print("[WARN]: growing memory is not yet supported, usign a default value of 1Kb\n", .{});
|
||||
}
|
||||
const memory = try allocator.alloc(u8, max);
|
||||
return Runtime{
|
||||
.module = module,
|
||||
.stack = try std.ArrayList(Value).initCapacity(allocator, 10),
|
||||
|
|
@ -492,6 +567,7 @@ pub const Runtime = struct {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: Do name resolution
|
||||
pub fn callExternal(self: *Runtime, allocator: Allocator, name: []const u8, parameters: []usize) !void {
|
||||
if (self.module.exports.get(name)) |function| {
|
||||
try self.call(allocator, function, parameters);
|
||||
|
|
@ -501,7 +577,7 @@ pub const Runtime = struct {
|
|||
}
|
||||
|
||||
pub fn call(self: *Runtime, allocator: Allocator, function: usize, parameters: []usize) AllocationError!void {
|
||||
const f = self.module.funcs.items[function];
|
||||
const f = self.module.funcs[function];
|
||||
switch (f) {
|
||||
.internal => {
|
||||
const function_type = self.module.types[self.module.functions[f.internal]];
|
||||
|
|
@ -512,7 +588,8 @@ pub const Runtime = struct {
|
|||
};
|
||||
|
||||
for (parameters, 0..) |p, i| {
|
||||
switch (Parser.parseType(function_type.parameters[i])) {
|
||||
switch (function_type.parameters[i]) {
|
||||
.val => |v| switch (v) {
|
||||
.i32 => {
|
||||
frame.locals[i] = .{ .i32 = @intCast(p) };
|
||||
},
|
||||
|
|
@ -520,11 +597,14 @@ pub const Runtime = struct {
|
|||
frame.locals[i] = .{ .i64 = @intCast(p) };
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
.ref => unreachable,
|
||||
}
|
||||
}
|
||||
|
||||
for (self.module.code[f.internal].locals, function_type.parameters.len..) |local, i| {
|
||||
switch (Parser.parseType(local.types[0])) {
|
||||
switch (local) {
|
||||
.val => |v| switch (v) {
|
||||
.i32 => {
|
||||
frame.locals[i] = .{ .i32 = 0 };
|
||||
},
|
||||
|
|
@ -532,6 +612,8 @@ pub const Runtime = struct {
|
|||
frame.locals[i] = .{ .i64 = 0 };
|
||||
},
|
||||
else => unreachable,
|
||||
},
|
||||
.ref => unreachable,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -540,7 +622,7 @@ pub const Runtime = struct {
|
|||
allocator.free(frame.locals);
|
||||
},
|
||||
.external => {
|
||||
const name = self.module.imports.items[f.external].name;
|
||||
const name = self.module.imports[f.external].name;
|
||||
if (self.global_runtime.functions.get(name)) |external| {
|
||||
external(&self.stack);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue