Big rework of the parser!

It now follows a more functional style but it
should be waaay easier to add functionality.

Probably the parser is a bit slower than the
previous one but the code is much cleaner and a
good enough compiler should be able to inline the
function calls and make it par with the previous
one.

As a TODO, runtime structs should not depends on
the parser, but I think that is a topic for
another commit.
This commit is contained in:
Ernesto Lanchares 2025-03-23 13:38:57 +00:00 committed by Lorenzo Torres
parent 00d695e5f0
commit b7854d7325
4 changed files with 605 additions and 362 deletions

View file

@ -5,7 +5,7 @@ const window = @import("rendering/window.zig");
const config = @import("config"); const config = @import("config");
const Renderer = @import("rendering/renderer_vulkan.zig"); const Renderer = @import("rendering/renderer_vulkan.zig");
const math = @import("math.zig"); const math = @import("math.zig");
const Parser = @import("mods/parse.zig"); const Parser = @import("mods/Parser.zig");
const vm = @import("mods/vm.zig"); const vm = @import("mods/vm.zig");
const wasm = @import("mods/wasm.zig"); const wasm = @import("mods/wasm.zig");
const components = @import("ecs/components.zig"); const components = @import("ecs/components.zig");
@ -29,7 +29,16 @@ pub fn main() !void {
//try global_runtime.addFunction("debug", wasm.debug); //try global_runtime.addFunction("debug", wasm.debug);
// const file = try std.fs.cwd().openFile("assets/core.wasm", .{}); // const file = try std.fs.cwd().openFile("assets/core.wasm", .{});
//const module = try Parser.parseWasm(allocator, file.reader()); // const all = try file.readToEndAlloc(allocator, 1_000_000); // 1 MB
// var parser = Parser{
// .bytes = all,
// .byte_idx = 0,
// .allocator = allocator,
// };
// const module = parser.parseModule() catch |err| {
// std.debug.print("[ERROR]: error at byte {x}(0x{x})\n", .{ parser.byte_idx, parser.bytes[parser.byte_idx] });
// return err;
// };
// var runtime = try vm.Runtime.init(allocator, module, &global_runtime); // var runtime = try vm.Runtime.init(allocator, module, &global_runtime);
// defer runtime.deinit(allocator); // defer runtime.deinit(allocator);

486
src/mods/Parser.zig Normal file
View file

@ -0,0 +1,486 @@
const std = @import("std");
const vm = @import("vm.zig");
const Allocator = std.mem.Allocator;
bytes: []const u8,
byte_idx: usize,
allocator: Allocator,
// TODO: We don't really need ArrayLists
types: std.ArrayListUnmanaged(Functype) = .{},
imports: std.ArrayListUnmanaged(Import) = .{},
exports: std.StringHashMapUnmanaged(u32) = .{},
functions: std.ArrayListUnmanaged(u32) = .{},
memory: ?Memtype = null,
code: std.ArrayListUnmanaged(Func) = .{},
funcs: std.ArrayListUnmanaged(vm.Func) = .{},
pub const FunctionType = struct {
parameters: []u8,
results: []u8,
pub fn deinit(self: FunctionType, allocator: Allocator) void {
allocator.free(self.parameters);
allocator.free(self.results);
}
};
pub const FunctionBody = struct {
locals: []Local,
code: []u8,
};
pub const FunctionScope = enum {
external,
internal,
};
const Parser = @This();
pub const Error = error{
invalid_magic,
invalid_version,
invalid_section,
invalid_functype,
invalid_vectype,
invalid_numtype,
invalid_reftype,
invalid_valtype,
invalid_string,
invalid_limits,
invalid_globaltype,
invalid_importdesc,
invalid_exportdesc,
unterminated_wasm,
};
// TODO: This function should not exists
fn warn(self: Parser, s: []const u8) void {
std.debug.print("[WARN]: Parsing of {s} unimplemented at byte index {d}\n", .{ s, self.byte_idx });
}
// TODO: remove peek
fn peek(self: Parser) ?u8 {
return if (self.byte_idx < self.bytes.len) self.bytes[self.byte_idx] else null;
}
fn read(self: *Parser, n: usize) ![]const u8 {
if (self.byte_idx + n > self.bytes.len) return Error.unterminated_wasm;
defer self.byte_idx += n;
return self.bytes[self.byte_idx .. self.byte_idx + n];
}
// ==========
// = VALUES =
// ==========
pub fn readByte(self: *Parser) !u8 {
return (try self.read(1))[0];
}
fn readU32(self: *Parser) !u32 {
return std.leb.readUleb128(u32, self);
}
fn readName(self: *Parser) ![]const u8 {
// NOTE: This should be the only vector not parsed through parseVector
const size = try self.readU32();
const str = try self.allocator.alloc(u8, size);
@memcpy(str, try self.read(size));
if (!std.unicode.utf8ValidateSlice(str)) return Error.invalid_string;
return str;
}
// =========
// = TYPES =
// =========
// NOTE: This should return a value
fn VectorFnResult(parse_fn: anytype) type {
const type_info = @typeInfo(@TypeOf(parse_fn));
if (type_info != .@"fn") {
@compileError("cannot determine return type of " ++ @typeName(@TypeOf(parse_fn)));
}
const ret_type = type_info.@"fn".return_type.?;
const ret_type_info = @typeInfo(ret_type);
return switch (ret_type_info) {
.error_union => ret_type_info.error_union.payload,
else => ret_type,
};
}
fn parseVector(self: *Parser, parse_fn: anytype) ![]VectorFnResult(parse_fn) {
const n = try self.readU32();
const ret = try self.allocator.alloc(VectorFnResult(parse_fn), n);
for (ret) |*i| {
i.* = try parse_fn(self);
}
return ret;
}
fn parseNumtype(self: *Parser) !std.wasm.Valtype {
return switch (try self.readByte()) {
0x7F => .i32,
0x7E => .i32,
0x7D => .f32,
0x7C => .f64,
else => Error.invalid_numtype,
};
}
fn parseVectype(self: *Parser) !std.wasm.Valtype {
return switch (try self.readByte()) {
0x7B => .v128,
else => Error.invalid_vectype,
};
}
fn parseReftype(self: *Parser) !std.wasm.RefType {
return switch (try self.readByte()) {
0x70 => .funcref,
0x6F => .externref,
else => Error.invalid_reftype,
};
}
// NOTE: Parsing of Valtype can be improved but it makes it less close to spec so...
// TODO: Do we really need Valtype?
const Valtype = union(enum) {
val: std.wasm.Valtype,
ref: std.wasm.RefType,
};
fn parseValtype(self: *Parser) !Valtype {
const pb = self.peek() orelse return Error.unterminated_wasm;
return switch (pb) {
0x7F, 0x7E, 0x7D, 0x7C => .{ .val = try self.parseNumtype() },
0x7B => .{ .val = try self.parseVectype() },
0x70, 0x6F => .{ .ref = try self.parseReftype() },
else => Error.invalid_valtype,
};
}
fn parseResultType(self: *Parser) ![]Valtype {
return try self.parseVector(Parser.parseValtype);
}
pub const Functype = struct {
parameters: []Valtype,
rt2: []Valtype,
pub fn deinit(self: Functype, allocator: Allocator) void {
allocator.free(self.parameters);
allocator.free(self.rt2);
}
};
fn parseFunctype(self: *Parser) !Functype {
if (try self.readByte() != 0x60) return Error.invalid_functype;
return .{
.parameters = try self.parseResultType(),
.rt2 = try self.parseResultType(),
};
}
const Limits = struct {
min: u32,
max: ?u32,
};
fn parseLimits(self: *Parser) !Limits {
return switch (try self.readByte()) {
0x00 => .{
.min = try self.readU32(),
.max = null,
},
0x01 => .{
.min = try self.readU32(),
.max = try self.readU32(),
},
else => Error.invalid_limits,
};
}
const Memtype = struct {
lim: Limits,
};
fn parseMemtype(self: *Parser) !Memtype {
return .{ .lim = try self.parseLimits() };
}
const Tabletype = struct {
et: std.wasm.RefType,
lim: Limits,
};
fn parseTabletype(self: *Parser) !Tabletype {
return .{
.et = try self.parseReftype(),
.lim = try self.parseLimits(),
};
}
const Globaltype = struct {
t: Valtype,
m: enum {
@"const",
@"var",
},
};
fn parseGlobaltype(self: *Parser) !Globaltype {
return .{
.t = try self.parseValtype(),
.m = switch (try self.readByte()) {
0x00 => .@"const",
0x01 => .@"var",
else => return Error.invalid_globaltype,
},
};
}
// ===========
// = MODULES =
// ===========
// NOTE: This should not return anything but modify IR
pub fn parseModule(self: *Parser) !vm.Module {
if (!std.mem.eql(u8, try self.read(4), &.{ 0x00, 0x61, 0x73, 0x6d })) return Error.invalid_magic;
if (!std.mem.eql(u8, try self.read(4), &.{ 0x01, 0x00, 0x00, 0x00 })) return Error.invalid_version;
// TODO: Ensure only one section of each type (except for custom section), some code depends on it
while (self.byte_idx < self.bytes.len) {
try switch (try self.readByte()) {
0 => self.parseCustomsec(),
1 => self.parseTypesec(),
2 => self.parseImportsec(),
3 => self.parseFuncsec(),
4 => self.parseTablesec(),
5 => self.parseMemsec(),
6 => self.parseGlobalsec(),
7 => self.parseExportsec(),
8 => self.parseStartsec(),
9 => self.parseElemsec(),
10 => self.parseCodesec(),
11 => self.parseDatasec(),
12 => self.parseDatacountsec(),
else => return Error.invalid_section,
};
}
return .{
.memory = .{
.min = self.memory.?.lim.min,
.max = self.memory.?.lim.max,
},
.exports = self.exports,
.funcs = try self.funcs.toOwnedSlice(self.allocator),
.types = try self.types.toOwnedSlice(self.allocator),
.functions = try self.functions.toOwnedSlice(self.allocator),
.imports = try self.imports.toOwnedSlice(self.allocator),
.code = try self.code.toOwnedSlice(self.allocator),
};
}
fn parseCustomsec(self: *Parser) !void {
self.warn("customsec");
const size = try self.readU32();
_ = try self.read(size);
}
fn parseTypesec(self: *Parser) !void {
const size = try self.readU32();
const end_idx = self.byte_idx + size;
const ft = try self.parseVector(Parser.parseFunctype);
// TODO: Maybe the interface should be better?
try self.types.appendSlice(self.allocator, ft);
// TODO: run this check not only on debug
std.debug.assert(self.byte_idx == end_idx);
}
pub const Import = struct {
name: []const u8,
module: []const u8,
importdesc: union { func: u32, table: Tabletype, mem: Memtype, global: Globaltype },
pub fn deinit(self: Import, allocator: Allocator) void {
allocator.free(self.name);
allocator.free(self.module);
}
};
fn parseImport(self: *Parser) !Import {
return .{
.name = try self.readName(),
.module = try self.readName(),
.importdesc = switch (try self.readByte()) {
0x00 => .{ .func = try self.readU32() },
0x01 => .{ .table = try self.parseTabletype() },
0x02 => .{ .mem = try self.parseMemtype() },
0x03 => .{ .global = try self.parseGlobaltype() },
else => return Error.invalid_importdesc,
},
};
}
fn parseImportsec(self: *Parser) !void {
const size = try self.readU32();
const end_idx = self.byte_idx + size;
const imports = try self.parseVector(Parser.parseImport);
try self.imports.appendSlice(self.allocator, imports);
// TODO: run this check not only on debug
std.debug.assert(self.byte_idx == end_idx);
}
fn parseFuncsec(self: *Parser) !void {
const size = try self.readU32();
const end_idx = self.byte_idx + size;
const types = try self.parseVector(Parser.readU32);
try self.functions.appendSlice(self.allocator, types);
// TODO: run this check not only on debug
std.debug.assert(self.byte_idx == end_idx);
}
fn parseTablesec(self: *Parser) !void {
self.warn("tablesec");
const size = try self.readU32();
_ = try self.read(size);
}
fn parseMemsec(self: *Parser) !void {
const size = try self.readU32();
const end_idx = self.byte_idx + size;
const mems = try self.parseVector(Parser.parseMemtype);
if (mems.len == 0) {
// WTF?
} else if (mems.len == 1) {
self.memory = mems[0];
} else {
std.debug.print("[WARN]: Parsing more than one memory is not yet supported\n", .{});
}
// TODO: run this check not only on debug
std.debug.assert(self.byte_idx == end_idx);
}
fn parseGlobalsec(self: *Parser) !void {
self.warn("globalsec");
const size = try self.readU32();
_ = try self.read(size);
}
pub const Export = struct {
name: []const u8,
exportdesc: union(enum) { func: u32, table: u32, mem: u32, global: u32 },
pub fn deinit(self: Import, allocator: Allocator) void {
allocator.free(self.name);
}
};
fn parseExport(self: *Parser) !Export {
return .{
.name = try self.readName(),
.exportdesc = switch (try self.readByte()) {
0x00 => .{ .func = try self.readU32() },
0x01 => .{ .table = try self.readU32() },
0x02 => .{ .mem = try self.readU32() },
0x03 => .{ .global = try self.readU32() },
else => return Error.invalid_exportdesc,
},
};
}
fn parseExportsec(self: *Parser) !void {
const size = try self.readU32();
const end_idx = self.byte_idx + size;
const exports = try self.parseVector(Parser.parseExport);
for (exports) |e| {
switch (e.exportdesc) {
.func => try self.exports.put(self.allocator, e.name, e.exportdesc.func),
else => std.debug.print("[WARN]: export ignored\n", .{}),
}
}
// TODO: run this check not only on debug
std.debug.assert(self.byte_idx == end_idx);
}
fn parseStartsec(self: *Parser) !void {
self.warn("startsec");
const size = try self.readU32();
_ = try self.read(size);
}
fn parseElemsec(self: *Parser) !void {
self.warn("elemsec");
const size = try self.readU32();
_ = try self.read(size);
}
pub const Func = struct {
locals: []Valtype,
code: []const u8,
};
const Local = struct {
n: u32,
t: Valtype,
};
fn parseLocal(self: *Parser) !Local {
return .{
.n = try self.readU32(),
.t = try self.parseValtype(),
};
}
fn parseCode(self: *Parser) !Func {
const size = try self.readU32();
const end_idx = self.byte_idx + size;
const locals = try self.parseVector(Parser.parseLocal);
var local_count: usize = 0;
for (locals) |l| {
local_count += l.n;
}
const func = Func{
.locals = try self.allocator.alloc(Valtype, local_count),
.code = try self.read(end_idx - self.byte_idx),
};
var li: usize = 0;
for (locals) |l| {
@memset(func.locals[li .. li + l.n], l.t);
li += l.n;
}
// TODO: run this check not only on debug
std.debug.assert(self.byte_idx == end_idx);
return func;
}
fn parseCodesec(self: *Parser) !void {
const size = try self.readU32();
const end_idx = self.byte_idx + size;
const codes = try self.parseVector(Parser.parseCode);
for (codes, 0..) |_, i| {
try self.funcs.append(self.allocator, .{ .internal = @intCast(i) });
}
try self.code.appendSlice(self.allocator, codes);
// TODO: run this check not only on debug
std.debug.assert(self.byte_idx == end_idx);
}
fn parseDatasec(self: *Parser) !void {
self.warn("datasec");
const size = try self.readU32();
_ = try self.read(size);
}
fn parseDatacountsec(self: *Parser) !void {
self.warn("datacountsec");
const size = try self.readU32();
_ = try self.read(size);
}

View file

@ -1,334 +0,0 @@
const std = @import("std");
const wasm = @import("wasm.zig");
const Allocator = std.mem.Allocator;
pub fn leb128Result(T: type) type {
return struct { len: usize, val: T };
}
pub fn leb128Decode(comptime T: type, stream: anytype) !leb128Result(T) {
switch (@typeInfo(T)) {
.int => {},
else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)),
}
if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) {
@compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits}));
}
var result: T = 0;
// TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day...
var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0;
var byte: u8 = undefined;
var len: usize = 0;
while (stream.readByte()) |b| {
len += 1;
result |= @as(T, @intCast((b & 0x7f))) << shift;
if ((b & (0x1 << 7)) == 0) {
byte = b;
break;
}
shift += 7;
} else |err| {
return err;
}
if (@typeInfo(T).int.signedness == .signed) {
const size = @sizeOf(T) * 8;
if (shift < size and (byte & 0x40) != 0) {
result |= (~@as(T, 0) << shift);
}
}
return .{ .len = len, .val = result };
}
pub const Error = error{
malformed_wasm,
invalid_utf8,
};
pub const Module = struct {
types: []FunctionType,
imports: std.ArrayList(Import),
exports: std.StringHashMap(u32),
functions: []u32,
memory: Memory,
code: []FunctionBody,
funcs: std.ArrayList(Function),
pub fn deinit(self: *Module, allocator: Allocator) void {
for (self.types) |t| {
t.deinit(allocator);
}
allocator.free(self.types);
for (self.imports.items) |i| {
i.deinit(allocator);
}
self.imports.deinit();
var iter = self.exports.iterator();
while (iter.next()) |entry| {
allocator.free(entry.key_ptr.*);
}
self.exports.deinit();
allocator.free(self.functions);
for (self.code) |f| {
for (f.locals) |l| {
allocator.free(l.types);
}
allocator.free(f.code);
}
allocator.free(self.code);
self.funcs.deinit();
}
};
pub const FunctionScope = enum {
external,
internal,
};
pub const Function = union(FunctionScope) {
external: u8,
internal: u8,
};
// TODO: refactor locals
pub const Local = struct {
types: []u8,
};
pub const FunctionBody = struct {
locals: []Local,
code: []u8,
};
pub const Memory = struct {
initial: u32,
max: u32,
};
pub const FunctionType = struct {
parameters: []u8,
results: []u8,
pub fn deinit(self: FunctionType, allocator: Allocator) void {
allocator.free(self.parameters);
allocator.free(self.results);
}
};
pub const Import = struct {
name: []u8,
module: []u8,
signature: u32,
pub fn deinit(self: Import, allocator: Allocator) void {
allocator.free(self.name);
allocator.free(self.module);
}
};
pub fn parseType(t: u8) wasm.Type {
return @enumFromInt(t);
}
pub fn parseName(allocator: Allocator, stream: anytype) ![]u8 {
const size = try std.leb.readULEB128(u32, stream);
const str = try allocator.alloc(u8, size);
if (try stream.read(str) != size) {
// TODO: better error
return Error.malformed_wasm;
}
if (!std.unicode.utf8ValidateSlice(str)) return Error.invalid_utf8;
return str;
}
// TODO: parse Global Section
// TODO: Consider Arena allocator
pub fn parseWasm(allocator: Allocator, stream: anytype) !Module {
var types: []FunctionType = undefined;
var imports = std.ArrayList(Import).init(allocator);
var exports = std.StringHashMap(u32).init(allocator);
var funcs = std.ArrayList(Function).init(allocator);
var functions: []u32 = undefined;
var memory: Memory = undefined;
var code: []FunctionBody = undefined;
// Parse magic
if (!(try stream.isBytes(&[_]u8{ 0x00, 0x61, 0x73, 0x6d }))) return Error.malformed_wasm;
// Parse version
if (!(try stream.isBytes(&[_]u8{ 0x01, 0x00, 0x00, 0x00 }))) return Error.malformed_wasm;
// NOTE: This ensures that (in this block) illegal behavior is safety-checked.
// This slows down the code but since this function is only called at the start
// I believe it is better to take the ``hit'' in performance (should only be @enumFromInt)
// rather than having undefined behavior when user provides an invalid wasm file.
@setRuntimeSafety(true);
loop: while (stream.readByte()) |byte| {
const section_size = try std.leb.readULEB128(u32, stream);
switch (@as(std.wasm.Section, @enumFromInt(byte))) {
std.wasm.Section.custom => {
// TODO: unimplemented
break :loop;
},
std.wasm.Section.type => {
const type_count = try std.leb.readULEB128(u32, stream);
types = try allocator.alloc(FunctionType, type_count);
for (types) |*t| {
if (!(try stream.isBytes(&.{0x60}))) return Error.malformed_wasm;
const params_count = try std.leb.readULEB128(u32, stream);
t.parameters = try allocator.alloc(u8, params_count);
if (try stream.read(t.parameters) != params_count) {
// TODO: better errors
return Error.malformed_wasm;
}
const results = try std.leb.readULEB128(u32, stream);
t.results = try allocator.alloc(u8, results);
if (try stream.read(t.results) != results) {
// TODO: better errors
return Error.malformed_wasm;
}
}
},
std.wasm.Section.import => {
// Can there be more than one import section?
const import_count = try std.leb.readULEB128(u32, stream);
for (0..import_count) |i| {
const mod = try parseName(allocator, stream);
const nm = try parseName(allocator, stream);
const b = try stream.readByte();
switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) {
std.wasm.ExternalKind.function => {
try funcs.append(.{ .external = @intCast(i) });
const idx = try std.leb.readULEB128(u32, stream);
try imports.append(.{
.module = mod,
.name = nm,
.signature = idx,
});
},
// TODO: not implemented
std.wasm.ExternalKind.table => try stream.skipBytes(3, .{}),
std.wasm.ExternalKind.memory => try stream.skipBytes(2, .{}),
std.wasm.ExternalKind.global => try stream.skipBytes(2, .{}),
}
}
},
std.wasm.Section.function => {
const function_count = try std.leb.readULEB128(u32, stream);
functions = try allocator.alloc(u32, function_count);
for (functions) |*f| {
f.* = try std.leb.readULEB128(u32, stream);
}
},
std.wasm.Section.table => {
// TODO: not implemented
try stream.skipBytes(section_size, .{});
},
std.wasm.Section.memory => {
const memory_count = try std.leb.readULEB128(u32, stream);
for (0..memory_count) |_| {
const b = try stream.readByte();
const n = try std.leb.readULEB128(u32, stream);
var m: u32 = 0;
switch (b) {
0x00 => {},
0x01 => m = try std.leb.readULEB128(u32, stream),
else => return Error.malformed_wasm,
}
// TODO: support multiple memories
memory = .{
.initial = n,
.max = m,
};
}
},
std.wasm.Section.global => {
// TODO: unimplemented
try stream.skipBytes(section_size, .{});
},
// TODO: Can there be more than one export section? Otherwise we can optimize allocations
std.wasm.Section.@"export" => {
const export_count = try std.leb.readULEB128(u32, stream);
for (0..export_count) |_| {
const nm = try parseName(allocator, stream);
const b = try stream.readByte();
const idx = try std.leb.readULEB128(u32, stream);
switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) {
std.wasm.ExternalKind.function => try exports.put(nm, idx),
// TODO: unimplemented,
std.wasm.ExternalKind.table => allocator.free(nm),
std.wasm.ExternalKind.memory => allocator.free(nm),
std.wasm.ExternalKind.global => allocator.free(nm),
}
}
},
std.wasm.Section.start => {
// TODO: unimplemented
try stream.skipBytes(section_size, .{});
},
std.wasm.Section.element => {
// TODO: unimplemented
try stream.skipBytes(section_size, .{});
},
std.wasm.Section.code => {
const code_count = try std.leb.readULEB128(u32, stream);
code = try allocator.alloc(FunctionBody, code_count);
for (0..code_count) |i| {
const code_size = try std.leb.readULEB128(u32, stream);
var locals_size: usize = 0;
const local_count = try leb128Decode(u32, stream);
locals_size += local_count.len;
const locals = try allocator.alloc(Local, local_count.val);
for (locals) |*l| {
const n = try leb128Decode(u32, stream);
l.types = try allocator.alloc(u8, n.val);
@memset(l.types, try stream.readByte());
locals_size += n.len + 1;
}
code[i].locals = locals;
// TODO: maybe is better to parse code into ast here and not do it every frame?
// FIXME: This calculation is plain wrong. Resolving above TODO should help
code[i].code = try allocator.alloc(u8, code_size - locals_size);
// TODO: better error reporting
if (try stream.read(code[i].code) != code_size - locals_size) return Error.malformed_wasm;
const f = Function{ .internal = @intCast(i) };
try funcs.append(f);
}
},
std.wasm.Section.data => {
// TODO: unimplemented
try stream.skipBytes(section_size, .{});
},
std.wasm.Section.data_count => {
// TODO: unimplemented
try stream.skipBytes(section_size, .{});
},
else => return Error.malformed_wasm,
}
} else |err| switch (err) {
error.EndOfStream => {},
else => return err,
}
return Module{
.types = types,
.imports = imports,
.functions = functions,
.memory = memory,
.exports = exports,
.code = code,
.funcs = funcs,
};
}

View file

@ -1,13 +1,83 @@
const std = @import("std"); const std = @import("std");
const wasm = @import("wasm.zig"); const wasm = @import("wasm.zig");
const Parser = @import("parse.zig"); const Parser = @import("Parser.zig");
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const AllocationError = error{OutOfMemory}; const AllocationError = error{OutOfMemory};
fn leb128Decode(comptime T: type, bytes: []u8) Parser.leb128Result(T) { pub const Memory = struct {
min: u32,
max: ?u32,
};
// TODO: Resolve function calls at parse time
// TODO: Resolve function types at compile time
pub const Func = union(enum) {
internal: u32,
external: u32,
};
pub const Module = struct {
memory: Memory,
funcs: []Func,
exports: std.StringHashMapUnmanaged(u32),
imports: []Parser.Import,
types: []Parser.Functype,
functions: []u32,
code: []Parser.Func,
fn deinit(self: *Module, allocator: Allocator) void {
self.exports.deinit(allocator);
allocator.free(self.funcs);
allocator.free(self.imports);
allocator.free(self.types);
allocator.free(self.functions);
allocator.free(self.code);
}
};
pub fn leb128Result(T: type) type {
return struct { len: usize, val: T };
}
pub fn leb128Decode_stream(comptime T: type, stream: anytype) !leb128Result(T) {
switch (@typeInfo(T)) {
.int => {},
else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)),
}
if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) {
@compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits}));
}
var result: T = 0;
// TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day...
var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0;
var byte: u8 = undefined;
var len: usize = 0;
while (stream.readByte()) |b| {
len += 1;
result |= @as(T, @intCast((b & 0x7f))) << shift;
if ((b & (0x1 << 7)) == 0) {
byte = b;
break;
}
shift += 7;
} else |err| {
return err;
}
if (@typeInfo(T).int.signedness == .signed) {
const size = @sizeOf(T) * 8;
if (shift < size and (byte & 0x40) != 0) {
result |= (~@as(T, 0) << shift);
}
}
return .{ .len = len, .val = result };
}
fn leb128Decode(comptime T: type, bytes: []const u8) leb128Result(T) {
var fbs = std.io.fixedBufferStream(bytes); var fbs = std.io.fixedBufferStream(bytes);
// TODO: this catch should be unrecheable // TODO: this catch should be unrecheable
return Parser.leb128Decode(T, fbs.reader()) catch .{ .len = 0, .val = 0 }; return leb128Decode_stream(T, fbs.reader()) catch .{ .len = 0, .val = 0 };
} }
pub fn decodeLittleEndian(comptime T: type, bytes: []u8) T { pub fn decodeLittleEndian(comptime T: type, bytes: []u8) T {
@ -30,7 +100,7 @@ pub fn encodeLittleEndian(comptime T: type, bytes: *[]u8, value: T) void {
pub const CallFrame = struct { pub const CallFrame = struct {
program_counter: usize, program_counter: usize,
code: []u8, code: []const u8,
locals: []Value, locals: []Value,
}; };
@ -45,15 +115,20 @@ pub const Value = union(ValueType) {
}; };
pub const Runtime = struct { pub const Runtime = struct {
module: Parser.Module, module: Module,
stack: std.ArrayList(Value), stack: std.ArrayList(Value),
call_stack: std.ArrayList(CallFrame), call_stack: std.ArrayList(CallFrame),
memory: []u8, memory: []u8,
global_runtime: *wasm.GlobalRuntime, global_runtime: *wasm.GlobalRuntime,
labels: std.ArrayList(usize), labels: std.ArrayList(usize),
pub fn init(allocator: Allocator, module: Parser.Module, global_runtime: *wasm.GlobalRuntime) !Runtime { pub fn init(allocator: Allocator, module: Module, global_runtime: *wasm.GlobalRuntime) !Runtime {
const memory = try allocator.alloc(u8, module.memory.max); // if memory max is not set the memory is allowed to grow but it is not supported at the moment
const max = module.memory.max orelse 1_000;
if (module.memory.max == null) {
std.debug.print("[WARN]: growing memory is not yet supported, usign a default value of 1Kb\n", .{});
}
const memory = try allocator.alloc(u8, max);
return Runtime{ return Runtime{
.module = module, .module = module,
.stack = try std.ArrayList(Value).initCapacity(allocator, 10), .stack = try std.ArrayList(Value).initCapacity(allocator, 10),
@ -492,6 +567,7 @@ pub const Runtime = struct {
} }
} }
// TODO: Do name resolution
pub fn callExternal(self: *Runtime, allocator: Allocator, name: []const u8, parameters: []usize) !void { pub fn callExternal(self: *Runtime, allocator: Allocator, name: []const u8, parameters: []usize) !void {
if (self.module.exports.get(name)) |function| { if (self.module.exports.get(name)) |function| {
try self.call(allocator, function, parameters); try self.call(allocator, function, parameters);
@ -501,7 +577,7 @@ pub const Runtime = struct {
} }
pub fn call(self: *Runtime, allocator: Allocator, function: usize, parameters: []usize) AllocationError!void { pub fn call(self: *Runtime, allocator: Allocator, function: usize, parameters: []usize) AllocationError!void {
const f = self.module.funcs.items[function]; const f = self.module.funcs[function];
switch (f) { switch (f) {
.internal => { .internal => {
const function_type = self.module.types[self.module.functions[f.internal]]; const function_type = self.module.types[self.module.functions[f.internal]];
@ -512,7 +588,8 @@ pub const Runtime = struct {
}; };
for (parameters, 0..) |p, i| { for (parameters, 0..) |p, i| {
switch (Parser.parseType(function_type.parameters[i])) { switch (function_type.parameters[i]) {
.val => |v| switch (v) {
.i32 => { .i32 => {
frame.locals[i] = .{ .i32 = @intCast(p) }; frame.locals[i] = .{ .i32 = @intCast(p) };
}, },
@ -520,11 +597,14 @@ pub const Runtime = struct {
frame.locals[i] = .{ .i64 = @intCast(p) }; frame.locals[i] = .{ .i64 = @intCast(p) };
}, },
else => unreachable, else => unreachable,
},
.ref => unreachable,
} }
} }
for (self.module.code[f.internal].locals, function_type.parameters.len..) |local, i| { for (self.module.code[f.internal].locals, function_type.parameters.len..) |local, i| {
switch (Parser.parseType(local.types[0])) { switch (local) {
.val => |v| switch (v) {
.i32 => { .i32 => {
frame.locals[i] = .{ .i32 = 0 }; frame.locals[i] = .{ .i32 = 0 };
}, },
@ -532,6 +612,8 @@ pub const Runtime = struct {
frame.locals[i] = .{ .i64 = 0 }; frame.locals[i] = .{ .i64 = 0 };
}, },
else => unreachable, else => unreachable,
},
.ref => unreachable,
} }
} }
@ -540,7 +622,7 @@ pub const Runtime = struct {
allocator.free(frame.locals); allocator.free(frame.locals);
}, },
.external => { .external => {
const name = self.module.imports.items[f.external].name; const name = self.module.imports[f.external].name;
if (self.global_runtime.functions.get(name)) |external| { if (self.global_runtime.functions.get(name)) |external| {
external(&self.stack); external(&self.stack);
} }