The fix involves moving the function leb128Decode over to parser. To me it makes more sense for the function to belong in that module so I think of it as a positive change. However I do not think that returning two values is really necessary. I think a proper solution would be either to parse the code or wrap the stream so we can count how many bytes are readed. Therefore we could use std.leb.readUleb128 which should be less error-prone.
331 lines
12 KiB
Zig
331 lines
12 KiB
Zig
const std = @import("std");
|
|
const wasm = @import("wasm.zig");
|
|
const Allocator = std.mem.Allocator;
|
|
|
|
pub fn leb128Result(T: type) type {
|
|
return struct { len: usize, val: T };
|
|
}
|
|
|
|
pub fn leb128Decode(comptime T: type, stream: anytype) !leb128Result(T) {
|
|
switch (@typeInfo(T)) {
|
|
.int => {},
|
|
else => @compileError("LEB128 integer decoding only support integers, but got " ++ @typeName(T)),
|
|
}
|
|
if (@typeInfo(T).int.bits != 32 and @typeInfo(T).int.bits != 64) {
|
|
@compileError("LEB128 integer decoding only supports 32 or 64 bits integers but got " ++ std.fmt.comptimePrint("{d} bits", .{@typeInfo(T).int.bits}));
|
|
}
|
|
|
|
var result: T = 0;
|
|
// TODO: is the type of shift important. Reading Wikipedia (not very much tho) it seems like we can use u32 and call it a day...
|
|
var shift: if (@typeInfo(T).int.bits == 32) u5 else u6 = 0;
|
|
var byte: u8 = undefined;
|
|
var len: usize = 0;
|
|
while (stream.readByte()) |b| {
|
|
len += 1;
|
|
result |= @as(T, @intCast((b & 0x7f))) << shift;
|
|
if ((b & (0x1 << 7)) == 0) {
|
|
byte = b;
|
|
break;
|
|
}
|
|
shift += 7;
|
|
} else |err| {
|
|
return err;
|
|
}
|
|
|
|
if (@typeInfo(T).int.signedness == .signed) {
|
|
const size = @sizeOf(T) * 8;
|
|
if (shift < size and (byte & 0x40) != 0) {
|
|
result |= (~@as(T, 0) << shift);
|
|
}
|
|
}
|
|
|
|
return .{ .len = len, .val = result };
|
|
}
|
|
|
|
pub const Error = error{
|
|
malformed_wasm,
|
|
invalid_utf8,
|
|
};
|
|
|
|
pub const Module = struct {
|
|
types: []FunctionType,
|
|
imports: std.ArrayList(Import),
|
|
exports: std.StringHashMap(u32),
|
|
functions: []u32,
|
|
memory: Memory,
|
|
code: []FunctionBody,
|
|
funcs: std.ArrayList(Function),
|
|
|
|
pub fn deinit(self: *Module, allocator: Allocator) void {
|
|
for (self.types) |t| {
|
|
t.deinit(allocator);
|
|
}
|
|
allocator.free(self.types);
|
|
|
|
for (self.imports.items) |i| {
|
|
i.deinit(allocator);
|
|
}
|
|
self.imports.deinit();
|
|
|
|
var iter = self.exports.iterator();
|
|
while (iter.next()) |entry| {
|
|
allocator.free(entry.key_ptr.*);
|
|
}
|
|
self.exports.deinit();
|
|
|
|
allocator.free(self.functions);
|
|
|
|
for (self.code) |f| {
|
|
for (f.locals) |l| {
|
|
allocator.free(l.types);
|
|
}
|
|
allocator.free(f.code);
|
|
}
|
|
allocator.free(self.code);
|
|
|
|
self.funcs.deinit();
|
|
}
|
|
};
|
|
|
|
pub const FunctionScope = enum {
|
|
external,
|
|
internal,
|
|
};
|
|
|
|
pub const Function = union(FunctionScope) {
|
|
external: u8,
|
|
internal: u8,
|
|
};
|
|
|
|
// TODO: refactor locals
|
|
pub const Local = struct {
|
|
types: []u8,
|
|
};
|
|
|
|
pub const FunctionBody = struct {
|
|
locals: []Local,
|
|
code: []u8,
|
|
};
|
|
|
|
pub const Memory = struct {
|
|
initial: u32,
|
|
max: u32,
|
|
};
|
|
|
|
pub const FunctionType = struct {
|
|
parameters: []u8,
|
|
results: []u8,
|
|
|
|
pub fn deinit(self: FunctionType, allocator: Allocator) void {
|
|
allocator.free(self.parameters);
|
|
allocator.free(self.results);
|
|
}
|
|
};
|
|
|
|
pub const Import = struct {
|
|
name: []u8,
|
|
module: []u8,
|
|
signature: u32,
|
|
|
|
pub fn deinit(self: Import, allocator: Allocator) void {
|
|
allocator.free(self.name);
|
|
allocator.free(self.module);
|
|
}
|
|
};
|
|
|
|
pub fn parseType(t: u8) wasm.Type {
|
|
return @enumFromInt(t);
|
|
}
|
|
|
|
pub fn parseName(allocator: Allocator, stream: anytype) ![]u8 {
|
|
const size = try std.leb.readULEB128(u32, stream);
|
|
const str = try allocator.alloc(u8, size);
|
|
if (try stream.read(str) != size) {
|
|
// TODO: better error
|
|
return Error.malformed_wasm;
|
|
}
|
|
|
|
if (!std.unicode.utf8ValidateSlice(str)) return Error.invalid_utf8;
|
|
|
|
return str;
|
|
}
|
|
|
|
// TODO: parse Global Section
|
|
// TODO: Consider Arena allocator
|
|
pub fn parseWasm(allocator: Allocator, stream: anytype) !Module {
|
|
var types: []FunctionType = undefined;
|
|
var imports = std.ArrayList(Import).init(allocator);
|
|
var exports = std.StringHashMap(u32).init(allocator);
|
|
var funcs = std.ArrayList(Function).init(allocator);
|
|
var functions: []u32 = undefined;
|
|
var memory: Memory = undefined;
|
|
var code: []FunctionBody = undefined;
|
|
|
|
// Parse magic
|
|
if (!(try stream.isBytes(&[_]u8{ 0x00, 0x61, 0x73, 0x6d }))) return Error.malformed_wasm;
|
|
// Parse version
|
|
if (!(try stream.isBytes(&[_]u8{ 0x01, 0x00, 0x00, 0x00 }))) return Error.malformed_wasm;
|
|
|
|
// NOTE: This ensures that (in this block) illegal behavior is safety-checked.
|
|
// This slows down the code but since this function is only called at the start
|
|
// I believe it is better to take the ``hit'' in performance (should only be @enumFromInt)
|
|
// rather than having undefined behavior when user provides an invalid wasm file.
|
|
@setRuntimeSafety(true);
|
|
loop: while (stream.readByte()) |byte| {
|
|
const section_size = try std.leb.readULEB128(u32, stream);
|
|
switch (@as(std.wasm.Section, @enumFromInt(byte))) {
|
|
std.wasm.Section.custom => {
|
|
// TODO: unimplemented
|
|
break :loop;
|
|
},
|
|
std.wasm.Section.type => {
|
|
const type_count = try std.leb.readULEB128(u32, stream);
|
|
types = try allocator.alloc(FunctionType, type_count);
|
|
for (types) |*t| {
|
|
if (!(try stream.isBytes(&.{0x60}))) return Error.malformed_wasm;
|
|
const params_count = try std.leb.readULEB128(u32, stream);
|
|
t.parameters = try allocator.alloc(u8, params_count);
|
|
if (try stream.read(t.parameters) != params_count) {
|
|
// TODO: better errors
|
|
return Error.malformed_wasm;
|
|
}
|
|
const results = try std.leb.readULEB128(u32, stream);
|
|
t.results = try allocator.alloc(u8, results);
|
|
if (try stream.read(t.results) != results) {
|
|
// TODO: better errors
|
|
return Error.malformed_wasm;
|
|
}
|
|
}
|
|
},
|
|
std.wasm.Section.import => {
|
|
// Can there be more than one import section?
|
|
const import_count = try std.leb.readULEB128(u32, stream);
|
|
for (0..import_count) |i| {
|
|
const mod = try parseName(allocator, stream);
|
|
const nm = try parseName(allocator, stream);
|
|
|
|
const b = try stream.readByte();
|
|
switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) {
|
|
std.wasm.ExternalKind.function => try funcs.append(.{ .external = @intCast(i) }),
|
|
// TODO: not implemented
|
|
std.wasm.ExternalKind.table => {},
|
|
std.wasm.ExternalKind.memory => {},
|
|
std.wasm.ExternalKind.global => {},
|
|
}
|
|
const idx = try std.leb.readULEB128(u32, stream);
|
|
try imports.append(.{
|
|
.module = mod,
|
|
.name = nm,
|
|
.signature = idx,
|
|
});
|
|
}
|
|
},
|
|
std.wasm.Section.function => {
|
|
const function_count = try std.leb.readULEB128(u32, stream);
|
|
functions = try allocator.alloc(u32, function_count);
|
|
for (functions) |*f| {
|
|
f.* = try std.leb.readULEB128(u32, stream);
|
|
}
|
|
},
|
|
std.wasm.Section.table => {
|
|
// TODO: not implemented
|
|
try stream.skipBytes(section_size, .{});
|
|
},
|
|
std.wasm.Section.memory => {
|
|
const memory_count = try std.leb.readULEB128(u32, stream);
|
|
for (0..memory_count) |_| {
|
|
const b = try stream.readByte();
|
|
const n = try std.leb.readULEB128(u32, stream);
|
|
var m: u32 = 0;
|
|
switch (b) {
|
|
0x00 => {},
|
|
0x01 => m = try std.leb.readULEB128(u32, stream),
|
|
else => return Error.malformed_wasm,
|
|
}
|
|
// TODO: support multiple memories
|
|
memory = .{
|
|
.initial = n,
|
|
.max = m,
|
|
};
|
|
}
|
|
},
|
|
std.wasm.Section.global => {
|
|
// TODO: unimplemented
|
|
try stream.skipBytes(section_size, .{});
|
|
},
|
|
// TODO: Can there be more than one export section? Otherwise we can optimize allocations
|
|
std.wasm.Section.@"export" => {
|
|
const export_count = try std.leb.readULEB128(u32, stream);
|
|
for (0..export_count) |_| {
|
|
const nm = try parseName(allocator, stream);
|
|
const b = try stream.readByte();
|
|
const idx = try std.leb.readULEB128(u32, stream);
|
|
switch (@as(std.wasm.ExternalKind, @enumFromInt(b))) {
|
|
std.wasm.ExternalKind.function => try exports.put(nm, idx),
|
|
// TODO: unimplemented,
|
|
std.wasm.ExternalKind.table => allocator.free(nm),
|
|
std.wasm.ExternalKind.memory => allocator.free(nm),
|
|
std.wasm.ExternalKind.global => allocator.free(nm),
|
|
}
|
|
}
|
|
},
|
|
std.wasm.Section.start => {
|
|
// TODO: unimplemented
|
|
try stream.skipBytes(section_size, .{});
|
|
},
|
|
std.wasm.Section.element => {
|
|
// TODO: unimplemented
|
|
try stream.skipBytes(section_size, .{});
|
|
},
|
|
std.wasm.Section.code => {
|
|
const code_count = try std.leb.readULEB128(u32, stream);
|
|
code = try allocator.alloc(FunctionBody, code_count);
|
|
for (0..code_count) |i| {
|
|
const code_size = try std.leb.readULEB128(u32, stream);
|
|
var locals_size: usize = 0;
|
|
const local_count = try leb128Decode(u32, stream);
|
|
locals_size += local_count.len;
|
|
const locals = try allocator.alloc(Local, local_count.val);
|
|
for (locals) |*l| {
|
|
const n = try leb128Decode(u32, stream);
|
|
l.types = try allocator.alloc(u8, n.val);
|
|
@memset(l.types, try stream.readByte());
|
|
locals_size += n.len + 1;
|
|
}
|
|
code[i].locals = locals;
|
|
|
|
// TODO: maybe is better to parse code into ast here and not do it every frame?
|
|
// FIXME: This calculation is plain wrong. Resolving above TODO should help
|
|
code[i].code = try allocator.alloc(u8, code_size - locals_size);
|
|
// TODO: better error reporting
|
|
if (try stream.read(code[i].code) != code_size - locals_size) return Error.malformed_wasm;
|
|
|
|
const f = Function{ .internal = @intCast(i) };
|
|
try funcs.append(f);
|
|
}
|
|
},
|
|
std.wasm.Section.data => {
|
|
// TODO: unimplemented
|
|
try stream.skipBytes(section_size, .{});
|
|
},
|
|
std.wasm.Section.data_count => {
|
|
// TODO: unimplemented
|
|
try stream.skipBytes(section_size, .{});
|
|
},
|
|
else => return Error.malformed_wasm,
|
|
}
|
|
} else |err| switch (err) {
|
|
error.EndOfStream => {},
|
|
else => return err,
|
|
}
|
|
|
|
return Module{
|
|
.types = types,
|
|
.imports = imports,
|
|
.functions = functions,
|
|
.memory = memory,
|
|
.exports = exports,
|
|
.code = code,
|
|
.funcs = funcs,
|
|
};
|
|
}
|