[MODS/PARSING]: Moved over to std.Io.Reader interface for upcoming zig update

This commit is contained in:
luccie 2025-08-26 01:15:06 +02:00
parent 330d9b7711
commit c12e5ef485
3 changed files with 43 additions and 41 deletions

View file

@ -3,8 +3,7 @@ const vm = @import("vm.zig");
const IR = @import("ir.zig"); const IR = @import("ir.zig");
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
bytes: []const u8, reader: *std.Io.Reader,
byte_idx: usize,
allocator: Allocator, allocator: Allocator,
types: []vm.Functype, types: []vm.Functype,
@ -27,6 +26,8 @@ pub const Error = error{
OutOfMemory, OutOfMemory,
DivideBy0, DivideBy0,
Overflow, Overflow,
ReadFailed,
EndOfStream,
invalid_instruction, invalid_instruction,
invalid_magic, invalid_magic,
invalid_version, invalid_version,
@ -51,15 +52,14 @@ pub const Error = error{
unterminated_wasm, unterminated_wasm,
}; };
pub fn init(allocator: Allocator, bytes: []const u8) !Parser { pub fn init(allocator: Allocator, reader: std.fs.File.Reader) !Parser {
return .{ return .{
.elems = &.{}, .elems = &.{},
.tables = &.{}, .tables = &.{},
.parsedData = &.{}, .parsedData = &.{},
.exported_memory = 0, .exported_memory = 0,
.importCount = 0, .importCount = 0,
.bytes = bytes, .reader = @constCast(&reader.interface),
.byte_idx = 0,
.allocator = allocator, .allocator = allocator,
.types = &.{}, .types = &.{},
.functions = &.{}, .functions = &.{},
@ -103,18 +103,19 @@ pub fn module(self: *Parser) vm.Module {
// TODO: This function should not exists // TODO: This function should not exists
fn warn(self: Parser, s: []const u8) void { fn warn(self: Parser, s: []const u8) void {
std.debug.print("[WARN]: Parsing of {s} unimplemented at byte index {d}\n", .{ s, self.byte_idx }); std.debug.print("[WARN]: Parsing of {s} unimplemented at byte index {d}\n", .{ s, self.reader.seek });
} }
// TODO: remove peek? // TODO: remove peek?
pub fn peek(self: Parser) ?u8 { pub fn peek(self: Parser) ?u8 {
return if (self.byte_idx < self.bytes.len) self.bytes[self.byte_idx] else null; return self.reader.peekByte() catch return null;
} }
fn read(self: *Parser, n: usize) ![]const u8 { fn read(self: *Parser, n: usize) ![]const u8 {
if (self.byte_idx + n > self.bytes.len) return Error.unterminated_wasm; _ = self.reader.peek(n) catch {
defer self.byte_idx += n; return Error.unterminated_wasm;
return self.bytes[self.byte_idx .. self.byte_idx + n]; };
return try self.reader.readAlloc(self.allocator, n);
} }
// ========== // ==========
@ -309,7 +310,7 @@ pub fn parseModule(self: *Parser) !void {
if (!std.mem.eql(u8, try self.read(4), &.{ 0x00, 0x61, 0x73, 0x6d })) return Error.invalid_magic; if (!std.mem.eql(u8, try self.read(4), &.{ 0x00, 0x61, 0x73, 0x6d })) return Error.invalid_magic;
if (!std.mem.eql(u8, try self.read(4), &.{ 0x01, 0x00, 0x00, 0x00 })) return Error.invalid_version; if (!std.mem.eql(u8, try self.read(4), &.{ 0x01, 0x00, 0x00, 0x00 })) return Error.invalid_version;
// TODO: Ensure only one section of each type (except for custom section), some code depends on it // TODO: Ensure only one section of each type (except for custom section), some code depends on it
while (self.byte_idx < self.bytes.len) { while (self.reader.seek < self.reader.end) {
try switch (try self.readByte()) { try switch (try self.readByte()) {
0 => self.parseCustomsec(), 0 => self.parseCustomsec(),
1 => self.parseTypesec(), 1 => self.parseTypesec(),
@ -343,7 +344,7 @@ fn parseCustomsec(self: *Parser) !void {
fn parseTypesec(self: *Parser) !void { fn parseTypesec(self: *Parser) !void {
const size = try self.readU32(); const size = try self.readU32();
const end_idx = self.byte_idx + size; const end_idx = self.reader.seek + size;
const ft = try self.parseVector(Parser.parseFunctype); const ft = try self.parseVector(Parser.parseFunctype);
@ -351,7 +352,7 @@ fn parseTypesec(self: *Parser) !void {
self.types = ft; self.types = ft;
// TODO(ernesto): run this check not only on debug // TODO(ernesto): run this check not only on debug
std.debug.assert(self.byte_idx == end_idx); std.debug.assert(self.reader.seek == end_idx);
} }
pub const Import = struct { pub const Import = struct {
@ -379,7 +380,7 @@ fn parseImport(self: *Parser) !Import {
fn parseImportsec(self: *Parser) !void { fn parseImportsec(self: *Parser) !void {
const size = try self.readU32(); const size = try self.readU32();
const end_idx = self.byte_idx + size; const end_idx = self.reader.seek + size;
const imports = try self.parseVector(Parser.parseImport); const imports = try self.parseVector(Parser.parseImport);
@ -423,12 +424,12 @@ fn parseImportsec(self: *Parser) !void {
defer self.allocator.free(imports); defer self.allocator.free(imports);
// TODO: run this check not only on debug // TODO: run this check not only on debug
std.debug.assert(self.byte_idx == end_idx); std.debug.assert(self.reader.seek == end_idx);
} }
fn parseFuncsec(self: *Parser) !void { fn parseFuncsec(self: *Parser) !void {
const size = try self.readU32(); const size = try self.readU32();
const end_idx = self.byte_idx + size; const end_idx = self.reader.seek + size;
const types = try self.parseVector(Parser.readU32); const types = try self.parseVector(Parser.readU32);
defer self.allocator.free(types); defer self.allocator.free(types);
@ -449,7 +450,7 @@ fn parseFuncsec(self: *Parser) !void {
std.debug.assert(types.len + self.importCount == self.functions.len); std.debug.assert(types.len + self.importCount == self.functions.len);
// TODO: run this check not only on debug // TODO: run this check not only on debug
std.debug.assert(self.byte_idx == end_idx); std.debug.assert(self.reader.seek == end_idx);
} }
pub const Table = struct { pub const Table = struct {
@ -464,7 +465,7 @@ fn parseTable(self: *Parser) !Table {
fn parseTablesec(self: *Parser) !void { fn parseTablesec(self: *Parser) !void {
const size = try self.readU32(); const size = try self.readU32();
const end_idx = self.byte_idx + size; const end_idx = self.reader.seek + size;
const tables = try self.parseVector(Parser.parseTable); const tables = try self.parseVector(Parser.parseTable);
defer self.allocator.free(tables); defer self.allocator.free(tables);
@ -476,12 +477,12 @@ fn parseTablesec(self: *Parser) !void {
self.tables[i] = t.t; self.tables[i] = t.t;
} }
std.debug.assert(self.byte_idx == end_idx); std.debug.assert(self.reader.seek == end_idx);
} }
fn parseMemsec(self: *Parser) !void { fn parseMemsec(self: *Parser) !void {
const size = try self.readU32(); const size = try self.readU32();
const end_idx = self.byte_idx + size; const end_idx = self.reader.seek + size;
const mems = try self.parseVector(Parser.parseMemtype); const mems = try self.parseVector(Parser.parseMemtype);
defer self.allocator.free(mems); defer self.allocator.free(mems);
@ -498,7 +499,7 @@ fn parseMemsec(self: *Parser) !void {
} }
// TODO: run this check not only on debug // TODO: run this check not only on debug
std.debug.assert(self.byte_idx == end_idx); std.debug.assert(self.reader.seek == end_idx);
} }
pub const Global = struct { pub const Global = struct {
@ -515,7 +516,7 @@ fn parseGlobal(self: *Parser) !Global {
fn parseGlobalsec(self: *Parser) !void { fn parseGlobalsec(self: *Parser) !void {
const size = try self.readU32(); const size = try self.readU32();
const end_idx = self.byte_idx + size; const end_idx = self.reader.seek + size;
const globals = try self.parseVector(Parser.parseGlobal); const globals = try self.parseVector(Parser.parseGlobal);
defer self.allocator.free(globals); defer self.allocator.free(globals);
@ -530,7 +531,7 @@ fn parseGlobalsec(self: *Parser) !void {
self.globalTypes[i] = global.t; self.globalTypes[i] = global.t;
} }
std.debug.assert(self.byte_idx == end_idx); std.debug.assert(self.reader.seek == end_idx);
} }
pub const Export = struct { pub const Export = struct {
@ -556,7 +557,7 @@ fn parseExport(self: *Parser) !Export {
fn parseExportsec(self: *Parser) !void { fn parseExportsec(self: *Parser) !void {
const size = try self.readU32(); const size = try self.readU32();
const end_idx = self.byte_idx + size; const end_idx = self.reader.seek + size;
const exports = try self.parseVector(Parser.parseExport); const exports = try self.parseVector(Parser.parseExport);
defer { defer {
@ -582,7 +583,7 @@ fn parseExportsec(self: *Parser) !void {
} }
// TODO: run this check not only on debug // TODO: run this check not only on debug
std.debug.assert(self.byte_idx == end_idx); std.debug.assert(self.reader.seek == end_idx);
} }
fn parseStartsec(self: *Parser) !void { fn parseStartsec(self: *Parser) !void {
@ -636,7 +637,7 @@ fn parseElem(self: *Parser) !Elem {
fn parseElemsec(self: *Parser) !void { fn parseElemsec(self: *Parser) !void {
const size = try self.readU32(); const size = try self.readU32();
const end_idx = self.byte_idx + size; const end_idx = self.reader.seek + size;
const elems = try self.parseVector(Parser.parseElem); const elems = try self.parseVector(Parser.parseElem);
defer self.allocator.free(elems); defer self.allocator.free(elems);
@ -655,7 +656,7 @@ fn parseElemsec(self: *Parser) !void {
} }
} }
std.debug.assert(self.byte_idx == end_idx); std.debug.assert(self.reader.seek == end_idx);
} }
pub const Func = struct { pub const Func = struct {
@ -675,7 +676,7 @@ fn parseLocal(self: *Parser) !Local {
fn parseCode(self: *Parser) !Func { fn parseCode(self: *Parser) !Func {
const size = try self.readU32(); const size = try self.readU32();
const end_idx = self.byte_idx + size; const end_idx = self.reader.seek + size;
const locals = try self.parseVector(Parser.parseLocal); const locals = try self.parseVector(Parser.parseLocal);
defer self.allocator.free(locals); defer self.allocator.free(locals);
@ -700,14 +701,14 @@ fn parseCode(self: *Parser) !Func {
} }
// TODO: run this check not only on debug // TODO: run this check not only on debug
std.debug.assert(self.byte_idx == end_idx); std.debug.assert(self.reader.seek == end_idx);
return func; return func;
} }
fn parseCodesec(self: *Parser) !void { fn parseCodesec(self: *Parser) !void {
const size = try self.readU32(); const size = try self.readU32();
const end_idx = self.byte_idx + size; const end_idx = self.reader.seek + size;
const codes = try self.parseVector(Parser.parseCode); const codes = try self.parseVector(Parser.parseCode);
defer self.allocator.free(codes); defer self.allocator.free(codes);
@ -722,7 +723,7 @@ fn parseCodesec(self: *Parser) !void {
} }
// TODO: run this check not only on debug // TODO: run this check not only on debug
std.debug.assert(self.byte_idx == end_idx); std.debug.assert(self.reader.seek == end_idx);
} }
pub const Data = struct { pub const Data = struct {
@ -747,14 +748,14 @@ fn parseData(self: *Parser) !Data {
fn parseDatasec(self: *Parser) !void { fn parseDatasec(self: *Parser) !void {
const size = try self.readU32(); const size = try self.readU32();
const end_idx = self.byte_idx + size; const end_idx = self.reader.seek + size;
const datas = try self.parseVector(Parser.parseData); const datas = try self.parseVector(Parser.parseData);
defer self.allocator.free(datas); defer self.allocator.free(datas);
for (datas) |data| { for (datas) |data| {
self.parsedData = try self.allocator.realloc(self.parsedData, @as(usize, @intCast(data.offsetVal.i32)) + data.data.len); self.parsedData = try self.allocator.realloc(self.parsedData, @as(usize, @intCast(data.offsetVal.i32)) + data.data.len);
@memcpy(self.parsedData[@as(usize, @intCast(data.offsetVal.i32))..@as(usize, @intCast(data.offsetVal.i32))+data.data.len], data.data); @memcpy(self.parsedData[@as(usize, @intCast(data.offsetVal.i32))..@as(usize, @intCast(data.offsetVal.i32))+data.data.len], data.data);
} }
std.debug.assert(self.byte_idx == end_idx); std.debug.assert(self.reader.seek == end_idx);
} }
fn parseDatacountsec(self: *Parser) !void { fn parseDatacountsec(self: *Parser) !void {

View file

@ -649,7 +649,7 @@ const IRParserState = struct {
0xFD => self.parseVector(), 0xFD => self.parseVector(),
0xFC => self.parseMisc(), 0xFC => self.parseMisc(),
else => { else => {
std.log.err("Invalid instruction {x} at position {d}\n", .{ b, self.parser.byte_idx }); std.log.err("Invalid instruction {x} at position {d}\n", .{ b, self.parser.reader.seek });
return Parser.Error.invalid_instruction; return Parser.Error.invalid_instruction;
}, },
}; };
@ -697,7 +697,7 @@ const IRParserState = struct {
}, },
12...17 => @panic("UNIMPLEMENTED"), 12...17 => @panic("UNIMPLEMENTED"),
else => { else => {
std.log.err("Invalid misc instruction {d} at position {d}\n", .{ n, self.parser.byte_idx }); std.log.err("Invalid misc instruction {d} at position {d}\n", .{ n, self.parser.reader.seek });
return Parser.Error.invalid_instruction; return Parser.Error.invalid_instruction;
}, },
}; };
@ -845,7 +845,7 @@ const IRParserState = struct {
try self.push(.vecinst, .{ .vector = .{ .opcode = @enumFromInt(n), .memarg = .{ .alignment = 0, .offset = 0 }, .laneidx = 0 } }); try self.push(.vecinst, .{ .vector = .{ .opcode = @enumFromInt(n), .memarg = .{ .alignment = 0, .offset = 0 }, .laneidx = 0 } });
}, },
else => { else => {
std.log.err("Invalid vector instruction {d} at position {d}\n", .{ n, self.parser.byte_idx }); std.log.err("Invalid vector instruction {d} at position {d}\n", .{ n, self.parser.reader.seek });
return Parser.Error.invalid_instruction; return Parser.Error.invalid_instruction;
}, },
}; };

View file

@ -73,13 +73,14 @@ fn loadMod(entry: std.fs.Dir.Entry) !void {
std.debug.panic("Failed to delete {s} (reason: {any})", .{modDir, err}); std.debug.panic("Failed to delete {s} (reason: {any})", .{modDir, err});
}; };
defer file.close(); defer file.close();
const all = file.readToEndAlloc(allocator, 1_000_000) catch @panic("Unable to read main file"); // TODO(luccie): Make this be able to construct a buffer for the whole file
defer allocator.free(all); const buffer = try allocator.alloc(u8, 1_000_000);
var parser = mods.Parser.init(allocator, all) catch @panic("Failed to init parser"); var parser = mods.Parser.init(allocator, file.reader(buffer)) catch @panic("Failed to init parser");
defer parser.deinit(); defer parser.deinit();
parser.parseModule() catch |err| { parser.parseModule() catch |err| {
std.debug.print("[ERROR]: error {any} at byte {x}(0x{x})\n", .{ err, parser.byte_idx, parser.bytes[parser.byte_idx] }); // TODO(luccie): Find a better option for the expression `parser.reader.buffer[parser.reader.seek]`
return err; std.debug.print("[ERROR]: error {any} at byte {x}(0x{x})\n", .{ err, parser.reader.seek, parser.reader.buffer[parser.reader.seek] });
return err;
}; };
const module = parser.module(); const module = parser.module();