Some progress on IR parsing.

Alhtough IR parsing is technically called while
parsing, since we lack the hability to parse
blocks or labels or if or any hard stuff really,
it does not affect code parsing. However it is
nice to have it there as zig compiles it :)
This commit is contained in:
Ernesto Lanchares 2025-03-24 21:18:40 +00:00 committed by Lorenzo Torres
parent 7cf43ccb8b
commit 09691ec4d9
4 changed files with 196 additions and 66 deletions

View file

@ -21,58 +21,54 @@ fn testSystem2(pool: *ecs.Pool) void {
pub fn main() !void { pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){}; var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator(); const allocator = gpa.allocator();
{ defer if (gpa.deinit() != .ok) @panic("Leaked memory");
//var global_runtime = wasm.GlobalRuntime.init(allocator);
//defer global_runtime.deinit();
//try global_runtime.addFunction("debug", wasm.debug);
// const file = try std.fs.cwd().openFile("assets/core.wasm", .{}); var global_runtime = mods.GlobalRuntime.init(allocator);
// const all = try file.readToEndAlloc(allocator, 1_000_000); // 1 MB defer global_runtime.deinit();
// var parser = mods.Parser{ try global_runtime.addFunction("debug", mods.Wasm.debug);
// .bytes = all,
// .byte_idx = 0,
// .allocator = allocator,
// };
// const module = parser.parseModule() catch |err| {
// std.debug.print("[ERROR]: error at byte {x}(0x{x})\n", .{ parser.byte_idx, parser.bytes[parser.byte_idx] });
// return err;
// };
// var runtime = try vm.Runtime.init(allocator, module, &global_runtime);
// defer runtime.deinit(allocator);
//var parameters = [_]usize{}; const file = try std.fs.cwd().openFile("assets/core.wasm", .{});
//try runtime.callExternal(allocator, "preinit", &parameters); const all = try file.readToEndAlloc(allocator, 1_000_000); // 1 MB
const w = try window.Window.create(800, 600, "sideros"); var parser = mods.Parser{
defer w.destroy(); .bytes = all,
.byte_idx = 0,
.allocator = allocator,
};
const module = parser.parseModule() catch |err| {
std.debug.print("[ERROR]: error at byte {x}(0x{x})\n", .{ parser.byte_idx, parser.bytes[parser.byte_idx] });
return err;
};
var runtime = try mods.Runtime.init(allocator, module, &global_runtime);
defer runtime.deinit(allocator);
//var pool = try ecs.Pool.init(allocator); var parameters = [_]usize{};
//defer pool.deinit(allocator); try runtime.callExternal(allocator, "preinit", &parameters);
const w = try window.Window.create(800, 600, "sideros");
defer w.destroy();
////try pool.addSystemGroup(&[_]entities.System{ // var pool = try ecs.Pool.init(allocator);
//// testSystem, // defer pool.deinit(allocator);
////});
//try pool.addSystemGroup(&[_]ecs.System{
// testSystem2,
//});
//for (0..1000) |_| { //try pool.addSystemGroup(&[_]entities.System{
// const entity = try pool.createEntity(); // testSystem,
// try pool.addComponent(entity, ecs.components.Position{ .x = 1.0, .y = 0.5, .z = 3.0 }); //});
// try pool.addComponent(entity, ecs.components.Speed{ .speed = 5.0 }); // try pool.addSystemGroup(&[_]ecs.System{
//} // testSystem2,
// });
// TODO(luccie-cmd): Renderer.create shouldn't return an error // for (0..1000) |_| {
var r = try Renderer.create(allocator, w); // const entity = try pool.createEntity();
defer r.destroy(); // try pool.addComponent(entity, ecs.components.Position{ .x = 1.0, .y = 0.5, .z = 3.0 });
// try pool.addComponent(entity, ecs.components.Speed{ .speed = 5.0 });
// }
while (!w.shouldClose()) { // TODO(luccie-cmd): Renderer.create shouldn't return an error
c.glfwPollEvents(); // var r = try Renderer.create(allocator, w);
try r.tick(); // defer r.destroy();
//pool.tick();
}
}
if (gpa.detectLeaks()) { // while (!w.shouldClose()) {
return error.leaked_memory; // c.glfwPollEvents();
} // try r.tick();
// pool.tick();
// }
} }

View file

@ -1,5 +1,6 @@
const std = @import("std"); const std = @import("std");
const vm = @import("vm.zig"); const vm = @import("vm.zig");
const IR = @import("ir.zig");
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
bytes: []const u8, bytes: []const u8,
@ -38,6 +39,7 @@ pub const FunctionScope = enum {
const Parser = @This(); const Parser = @This();
pub const Error = error{ pub const Error = error{
invalid_instruction,
invalid_magic, invalid_magic,
invalid_version, invalid_version,
invalid_section, invalid_section,
@ -78,10 +80,28 @@ pub fn readByte(self: *Parser) !u8 {
return (try self.read(1))[0]; return (try self.read(1))[0];
} }
fn readU32(self: *Parser) !u32 { pub fn readU32(self: *Parser) !u32 {
return std.leb.readUleb128(u32, self); return std.leb.readUleb128(u32, self);
} }
pub fn readI32(self: *Parser) !i32 {
return std.leb.readIleb128(i32, self);
}
pub fn readI64(self: *Parser) !i64 {
return std.leb.readIleb128(i64, self);
}
pub fn readF32(self: *Parser) !f32 {
const bytes = try self.read(@sizeOf(f32));
return std.mem.bytesAsValue(f32, bytes).*;
}
pub fn readF64(self: *Parser) !f64 {
const bytes = try self.read(@sizeOf(f64));
return std.mem.bytesAsValue(f64, bytes).*;
}
fn readName(self: *Parser) ![]const u8 { fn readName(self: *Parser) ![]const u8 {
// NOTE: This should be the only vector not parsed through parseVector // NOTE: This should be the only vector not parsed through parseVector
const size = try self.readU32(); const size = try self.readU32();
@ -442,6 +462,8 @@ fn parseCode(self: *Parser) !Func {
local_count += l.n; local_count += l.n;
} }
_ = try IR.parse(self);
const func = Func{ const func = Func{
.locals = try self.allocator.alloc(Valtype, local_count), .locals = try self.allocator.alloc(Valtype, local_count),
.code = try self.read(end_idx - self.byte_idx), .code = try self.read(end_idx - self.byte_idx),

View file

@ -3,6 +3,8 @@ const Parser = @import("Parser.zig");
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const IR = @This();
const VectorIndex = packed struct { const VectorIndex = packed struct {
opcode: VectorOpcode, opcode: VectorOpcode,
laneidx: u8, laneidx: u8,
@ -46,7 +48,7 @@ select_valtypes: []Parser.Valtype,
/// Opcodes. /// Opcodes.
/// This is a mix of wasm opcodes mixed with a few of our own. /// This is a mix of wasm opcodes mixed with a few of our own.
/// Mainly for `0xFC` opcodes we use `0xD3` to `0xF5`. /// Mainly for `0xFC` opcodes we use `0xD3` to `0xE4`.
pub const Opcode = enum(u8) { pub const Opcode = enum(u8) {
// CONTROL INSTRUCTIONS // CONTROL INSTRUCTIONS
// The rest of instructions should be implemented in terms of these ones // The rest of instructions should be implemented in terms of these ones
@ -97,17 +99,17 @@ pub const Opcode = enum(u8) {
/// Index: `u32`. Meaning: index into table index /// Index: `u32`. Meaning: index into table index
tableset = 0x26, tableset = 0x26,
/// Index: `DIndex`. Meaning: TODO /// Index: `DIndex`. Meaning: TODO
tableinit = 0xD3, tableinit = 0xDF,
/// Index: `u32`. Meaning: TODO /// Index: `u32`. Meaning: TODO
elemdrop = 0xD4, elemdrop = 0xE0,
/// Index: `DIndex`. Meaning: `DIndex.x` is destination `DIndex.y` is source /// Index: `DIndex`. Meaning: `DIndex.x` is destination `DIndex.y` is source
tablecopy = 0xD5, tablecopy = 0xE1,
/// Index: `u32`. Meaning: tableidx /// Index: `u32`. Meaning: tableidx
tablegrow = 0xD6, tablegrow = 0xE2,
/// Index: `u32`. Meaning: tableidx /// Index: `u32`. Meaning: tableidx
tablesize = 0xD7, tablesize = 0xE3,
/// Index: `u32`. Meaning: tableidx /// Index: `u32`. Meaning: tableidx
tablefill = 0xD8, tablefill = 0xE4,
// MEMORY INSTRUCTIONS // MEMORY INSTRUCTIONS
/// Index: `Memarg`. Meaning: memarg /// Index: `Memarg`. Meaning: memarg
@ -159,11 +161,11 @@ pub const Opcode = enum(u8) {
memorysize = 0x3F, memorysize = 0x3F,
memorygrow = 0x40, memorygrow = 0x40,
/// Index: `u32`. Meaning: dataidx /// Index: `u32`. Meaning: dataidx
memoryinit = 0xD9, memoryinit = 0xDB,
/// Index: `u32`. Meaning: dataidx /// Index: `u32`. Meaning: dataidx
datadrop = 0xDA, datadrop = 0xDC,
memorycopy = 0xDB, memorycopy = 0xDD,
memoryfill = 0xDC, memoryfill = 0xDE,
// NUMERIC INSTRUCTION // NUMERIC INSTRUCTION
/// Index: `i32`. Meaning: constant /// Index: `i32`. Meaning: constant
@ -313,14 +315,14 @@ pub const Opcode = enum(u8) {
i64_extend16_s = 0xC3, i64_extend16_s = 0xC3,
i64_extend32_s = 0xC4, i64_extend32_s = 0xC4,
i32_trunc_sat_f32_s = 0xDD, i32_trunc_sat_f32_s = 0xD3,
i32_trunc_sat_f32_u = 0xDF, i32_trunc_sat_f32_u = 0xD4,
i32_trunc_sat_f64_s = 0xF0, i32_trunc_sat_f64_s = 0xD5,
i32_trunc_sat_f64_u = 0xF1, i32_trunc_sat_f64_u = 0xD6,
i64_trunc_sat_f32_s = 0xF2, i64_trunc_sat_f32_s = 0xD7,
i64_trunc_sat_f32_u = 0xF3, i64_trunc_sat_f32_u = 0xD8,
i64_trunc_sat_f64_s = 0xF4, i64_trunc_sat_f64_s = 0xD9,
i64_trunc_sat_f64_u = 0xF5, i64_trunc_sat_f64_u = 0xDA,
// VECTOR INSTRUCTIONS // VECTOR INSTRUCTIONS
/// Index: `VectorIndex`. Meaning: See `VectorOpcode` /// Index: `VectorIndex`. Meaning: See `VectorOpcode`
@ -584,3 +586,110 @@ const VectorOpcode = enum(u8) {
f32x4_demote_f64x2_zero = 94, f32x4_demote_f64x2_zero = 94,
f64x2_promote_low_f32x4 = 95, f64x2_promote_low_f32x4 = 95,
}; };
const IRParserState = struct {
parser: *Parser,
allocator: Allocator,
opcodes: std.ArrayListUnmanaged(Opcode),
indices: std.ArrayListUnmanaged(Index),
fn parseExpression(self: *IRParserState) !void {
const b = try self.parser.readByte();
try switch (b) {
0x00...0x01 => {}, // TODO
0x02...0x04 => {}, // TODO
0x0C...0x11 => {}, // TODO
0xD0...0xD2 => {}, // TODO
0x1A...0x1C => {}, // TODO
0x20...0x24 => self.push(@enumFromInt(b), .{ .u32 = try self.parser.readU32() }),
0x25...0x26 => self.push(@enumFromInt(b), .{ .u32 = try self.parser.readU32() }),
0x28...0x3E => self.push(@enumFromInt(b), .{ .memarg = try self.parseMemarg() }),
0x3F...0x40 => self.parseMemsizeorgrow(b),
0x41...0x44 => self.parseConst(b),
0x45...0xC4 => self.push(@enumFromInt(b), .{ .u64 = 0 }),
0xFD => self.parseVector(),
0xFC => self.parseMisc(),
else => {
std.log.err("Invalid instruction {x} at position {d}\n", .{ b, self.parser.byte_idx });
return Parser.Error.invalid_instruction;
},
};
}
fn push(self: *IRParserState, opcode: Opcode, index: Index) !void {
try self.opcodes.append(self.allocator, opcode);
try self.indices.append(self.allocator, index);
}
fn parseMemarg(self: *IRParserState) !Memarg {
return .{
// TODO: assert this intCast does not fail
.alignment = @intCast(try self.parser.readU32()),
.offset = try self.parser.readU32(),
};
}
fn parseMemsizeorgrow(self: *IRParserState, b: u8) !void {
if (try self.parser.readByte() != 0x00) return Parser.Error.invalid_instruction;
try self.push(@enumFromInt(b), .{ .u64 = 0 });
}
fn parseConst(self: *IRParserState, b: u8) !void {
try switch (b) {
0x41 => self.push(.i32_const, .{ .i32 = try self.parser.readI32() }),
0x42 => self.push(.i64_const, .{ .i64 = try self.parser.readI64() }),
0x43 => self.push(.f32_const, .{ .f32 = try self.parser.readF32() }),
0x44 => self.push(.f64_const, .{ .f64 = try self.parser.readF64() }),
else => unreachable,
};
}
fn parseMisc(self: *IRParserState) !void {
const n = try self.parser.readU32();
try switch (n) {
0...7 => self.push(@enumFromInt(0xD3 + @as(u8, @intCast(n))), .{ .u64 = 0 }),
8...11 => {}, // TODO
12...17 => {}, // TODO
else => {
std.log.err("Invalid misc instruction {d} at position {d}\n", .{ n, self.parser.byte_idx });
return Parser.Error.invalid_instruction;
},
};
}
fn parseVector(self: *IRParserState) !void {
const n = try self.parser.readU32();
try switch (n) {
0...10, 92...93, 11 => self.push(.vecinst, .{ .vector = .{ .opcode = @enumFromInt(n), .memarg = try self.parseMemarg(), .laneidx = 0 } }),
84...91 => self.push(.vecinst, .{ .vector = .{ .opcode = @enumFromInt(n), .memarg = try self.parseMemarg(), .laneidx = try self.parser.readByte() } }),
12 => {},
13 => {},
21...34 => self.push(.vecinst, .{ .vector = .{ .opcode = @enumFromInt(n), .memarg = .{ .alignment = 0, .offset = 0 }, .laneidx = try self.parser.readByte() } }),
// Yes, there are this random gaps in wasm vector instructions don't ask me how I know...
14...20, 35...83, 94...153, 155...161, 163...164, 167...174, 177, 181...186, 188...193, 195...196, 199...206, 209, 213...225, 227...237, 239...255 => {
try self.push(.vecinst, .{ .vector = .{ .opcode = @enumFromInt(n), .memarg = .{ .alignment = 0, .offset = 0 }, .laneidx = 0 } });
},
else => {
std.log.err("Invalid vector instruction {d} at position {d}\n", .{ n, self.parser.byte_idx });
return Parser.Error.invalid_instruction;
},
};
}
};
pub fn parse(parser: *Parser) !IR {
var state = IRParserState{
.opcodes = .{},
.indices = .{},
.parser = parser,
.allocator = parser.allocator,
};
std.debug.print("Parsing\n", .{});
try state.parseExpression();
return .{
.opcodes = try state.opcodes.toOwnedSlice(state.allocator),
.indices = try state.indices.toOwnedSlice(state.allocator),
.select_valtypes = &.{},
};
}

View file

@ -3,3 +3,6 @@ pub const VM = @import("vm.zig");
// TODO: is this really needed? // TODO: is this really needed?
pub const Wasm = @import("wasm.zig"); pub const Wasm = @import("wasm.zig");
pub const IR = @import("ir.zig"); pub const IR = @import("ir.zig");
pub const GlobalRuntime = Wasm.GlobalRuntime;
pub const Runtime = VM.Runtime;