const std = @import("std"); const builtin = @import("builtin"); const binary = @import("../binary.zig"); const module = @import("../module.zig"); const codebuf = @import("codebuf.zig"); const aarch64 = @import("aarch64.zig"); pub const JitResult = aarch64.JitResult; pub const HelperAddrs = aarch64.HelperAddrs; const frame_size_bytes: usize = 0x408; const local_base_bytes: usize = 32; const EndKind = enum { hit_end, hit_else }; const ControlKind = enum { block, loop, @"if" }; const ControlFrame = struct { kind: ControlKind, entry_depth: usize, label_arity: u8, label_type: ?module.ValType, end_arity: u8, end_type: ?module.ValType, loop_head_pos: usize, end_patches: std.ArrayList(usize), }; const Context = struct { allocator: std.mem.Allocator, mod: *const module.Module, num_imported_funcs: u32, helpers: HelperAddrs, buf: *codebuf.CodeBuffer, stack_depth: usize, max_stack_depth: usize, local_count: u32, result_type: ?module.ValType, local_types: []const module.ValType, control: std.ArrayList(ControlFrame), }; pub fn compileFunctionI32( allocator: std.mem.Allocator, mod: *const module.Module, num_imported_funcs: u32, current_func_idx: u32, body: *const module.FunctionBody, ft: *const module.FuncType, helpers: HelperAddrs, ) !?JitResult { if (builtin.cpu.arch != .x86_64) return null; _ = current_func_idx; if (ft.results.len > 1) return null; for (ft.params) |p| if (!(p == .i32 or p == .i64 or p == .f32 or p == .f64)) return null; var local_count: usize = ft.params.len; for (body.locals) |decl| { if (!(decl.valtype == .i32 or decl.valtype == .i64 or decl.valtype == .f32 or decl.valtype == .f64)) return null; local_count += decl.count; } var local_types = try allocator.alloc(module.ValType, local_count); defer allocator.free(local_types); for (ft.params, 0..) |p, i| local_types[i] = p; var lt_i: usize = ft.params.len; for (body.locals) |decl| { var j: u32 = 0; while (j < decl.count) : (j += 1) { local_types[lt_i] = decl.valtype; lt_i += 1; } } const operand_base_bytes = std.mem.alignForward(usize, local_base_bytes + local_count * 8, 16); if (operand_base_bytes >= frame_size_bytes) return null; const max_stack_depth = (frame_size_bytes - operand_base_bytes) / 8; if (max_stack_depth == 0) return null; if (local_base_bytes + local_count * 8 > frame_size_bytes) return null; var buf = try codebuf.CodeBuffer.init(allocator, 8192); errdefer buf.deinit(); emitPrologue(&buf, @intCast(ft.params.len), @intCast(local_count), @intCast(operand_base_bytes)); var cx = Context{ .allocator = allocator, .mod = mod, .num_imported_funcs = num_imported_funcs, .helpers = helpers, .buf = &buf, .stack_depth = 0, .max_stack_depth = max_stack_depth, .local_count = @intCast(local_count), .result_type = if (ft.results.len == 1) ft.results[0] else null, .local_types = local_types, .control = .empty, }; defer { for (cx.control.items) |*fr| fr.end_patches.deinit(allocator); cx.control.deinit(allocator); } const fn_arity: u8 = if (ft.results.len == 1) 1 else 0; const fn_type: ?module.ValType = if (ft.results.len == 1) ft.results[0] else null; try cx.control.append(allocator, .{ .kind = .block, .entry_depth = 0, .label_arity = fn_arity, .label_type = fn_type, .end_arity = fn_arity, .end_type = fn_type, .loop_head_pos = 0, .end_patches = .empty, }); var pos: usize = 0; const end_kind = compileBlock(&cx, body.code, &pos, false) catch return null; if (end_kind != .hit_end) return null; if (ft.results.len == 1) { if (cx.stack_depth != 1) return null; switch (ft.results[0]) { .i32, .f32 => try popW(&cx, @intFromEnum(Reg.rax)), .i64, .f64 => try popX(&cx, @intFromEnum(Reg.rax)), } } else { if (cx.stack_depth != 0) return null; } emitEpilogueAndRet(&buf); try buf.finalize(); return .{ .buf = buf, .arity = 0 }; } fn compileBlock(cx: *Context, code: []const u8, pos: *usize, allow_else: bool) !EndKind { while (pos.* < code.len) { const op = code[pos.*]; pos.* += 1; switch (op) { 0x0B => { const fr = currentFrame(cx); const end_pos = cx.buf.cursor(); for (fr.end_patches.items) |patch_imm_pos| patchRel32(cx.buf, patch_imm_pos, end_pos); try setStackDepth(cx, fr.entry_depth + fr.end_arity); return .hit_end; }, 0x05 => { if (!allow_else) return error.MalformedControlFlow; return .hit_else; }, 0x0C => { const depth = try binary.readULEB128(u32, code, pos); try emitBrToDepth(cx, depth); }, 0x0D => { const depth = try binary.readULEB128(u32, code, pos); try popW(cx, @intFromEnum(Reg.r11)); emitTestWReg(cx.buf, @intFromEnum(Reg.r11), @intFromEnum(Reg.r11)); const not_taken = emitJccPlaceholder(cx.buf, .z); const fallthrough_depth = cx.stack_depth; try emitBrToDepth(cx, depth); cx.stack_depth = fallthrough_depth; patchRel32(cx.buf, not_taken, cx.buf.cursor()); }, 0x0E => { const n = try binary.readULEB128(u32, code, pos); const table = try cx.allocator.alloc(u32, n + 1); defer cx.allocator.free(table); for (table) |*d| d.* = try binary.readULEB128(u32, code, pos); try popW(cx, @intFromEnum(Reg.r11)); const fallthrough_depth = cx.stack_depth; var i: u32 = 0; while (i < n) : (i += 1) { emitCmpWImm32(cx.buf, @intFromEnum(Reg.r11), @bitCast(i)); const skip = emitJccPlaceholder(cx.buf, .ne); try emitBrToDepth(cx, table[i]); cx.stack_depth = fallthrough_depth; patchRel32(cx.buf, skip, cx.buf.cursor()); } try emitBrToDepth(cx, table[n]); }, 0x20 => { const idx = try binary.readULEB128(u32, code, pos); if (idx >= cx.local_count) return error.UnsupportedOpcode; const off: i32 = @intCast(local_base_bytes + idx * 8); switch (cx.local_types[idx]) { .i32, .f32 => { emitMovWFromRspDisp(cx.buf, @intFromEnum(Reg.r9), off); try pushW(cx, @intFromEnum(Reg.r9)); }, .i64, .f64 => { emitMovXFromRspDisp(cx.buf, @intFromEnum(Reg.r9), off); try pushX(cx, @intFromEnum(Reg.r9)); }, } }, 0x21 => { const idx = try binary.readULEB128(u32, code, pos); if (idx >= cx.local_count) return error.UnsupportedOpcode; const off: i32 = @intCast(local_base_bytes + idx * 8); switch (cx.local_types[idx]) { .i32, .f32 => { try popW(cx, @intFromEnum(Reg.r9)); emitMovRspDispFromW(cx.buf, off, @intFromEnum(Reg.r9)); }, .i64, .f64 => { try popX(cx, @intFromEnum(Reg.r9)); emitMovRspDispFromX(cx.buf, off, @intFromEnum(Reg.r9)); }, } }, 0x22 => { const idx = try binary.readULEB128(u32, code, pos); if (idx >= cx.local_count) return error.UnsupportedOpcode; const off: i32 = @intCast(local_base_bytes + idx * 8); switch (cx.local_types[idx]) { .i32, .f32 => { try popW(cx, @intFromEnum(Reg.r9)); emitMovRspDispFromW(cx.buf, off, @intFromEnum(Reg.r9)); try pushW(cx, @intFromEnum(Reg.r9)); }, .i64, .f64 => { try popX(cx, @intFromEnum(Reg.r9)); emitMovRspDispFromX(cx.buf, off, @intFromEnum(Reg.r9)); try pushX(cx, @intFromEnum(Reg.r9)); }, } }, 0x41 => { const v = try binary.readSLEB128(i32, code, pos); emitMovImm32(cx.buf, @intFromEnum(Reg.r9), @bitCast(v)); try pushW(cx, @intFromEnum(Reg.r9)); }, 0x42 => { const v = try binary.readSLEB128(i64, code, pos); emitMovImm64(cx.buf, @intFromEnum(Reg.r9), @bitCast(v)); try pushX(cx, @intFromEnum(Reg.r9)); }, 0x43 => { if (pos.* + 4 > code.len) return error.UnexpectedEof; const bits = std.mem.readInt(u32, code[pos.*..][0..4], .little); pos.* += 4; emitMovImm32(cx.buf, @intFromEnum(Reg.r9), bits); try pushW(cx, @intFromEnum(Reg.r9)); }, 0x44 => { if (pos.* + 8 > code.len) return error.UnexpectedEof; const bits = std.mem.readInt(u64, code[pos.*..][0..8], .little); pos.* += 8; emitMovImm64(cx.buf, @intFromEnum(Reg.r9), bits); try pushX(cx, @intFromEnum(Reg.r9)); }, 0x1A => { try popX(cx, @intFromEnum(Reg.r9)); }, 0x1B => { try popW(cx, @intFromEnum(Reg.r11)); try popX(cx, @intFromEnum(Reg.r10)); try popX(cx, @intFromEnum(Reg.r9)); emitTestWReg(cx.buf, @intFromEnum(Reg.r11), @intFromEnum(Reg.r11)); const use_rhs = emitJccPlaceholder(cx.buf, .z); const done = emitJmpPlaceholder(cx.buf); patchRel32(cx.buf, use_rhs, cx.buf.cursor()); emitMovXReg(cx.buf, @intFromEnum(Reg.r9), @intFromEnum(Reg.r10)); patchRel32(cx.buf, done, cx.buf.cursor()); try pushX(cx, @intFromEnum(Reg.r9)); }, 0x1C => { const n = try binary.readULEB128(u32, code, pos); if (n != 1) return error.UnsupportedOpcode; if (pos.* >= code.len) return error.UnexpectedEof; const vt = try decodeValType(code[pos.*]); pos.* += 1; try popW(cx, @intFromEnum(Reg.r11)); switch (vt) { .i32, .f32 => { try popW(cx, @intFromEnum(Reg.r10)); try popW(cx, @intFromEnum(Reg.r9)); emitTestWReg(cx.buf, @intFromEnum(Reg.r11), @intFromEnum(Reg.r11)); const use_rhs = emitJccPlaceholder(cx.buf, .z); const done = emitJmpPlaceholder(cx.buf); patchRel32(cx.buf, use_rhs, cx.buf.cursor()); emitMovWReg(cx.buf, @intFromEnum(Reg.r9), @intFromEnum(Reg.r10)); patchRel32(cx.buf, done, cx.buf.cursor()); try pushW(cx, @intFromEnum(Reg.r9)); }, .i64, .f64 => { try popX(cx, @intFromEnum(Reg.r10)); try popX(cx, @intFromEnum(Reg.r9)); emitTestWReg(cx.buf, @intFromEnum(Reg.r11), @intFromEnum(Reg.r11)); const use_rhs = emitJccPlaceholder(cx.buf, .z); const done = emitJmpPlaceholder(cx.buf); patchRel32(cx.buf, use_rhs, cx.buf.cursor()); emitMovXReg(cx.buf, @intFromEnum(Reg.r9), @intFromEnum(Reg.r10)); patchRel32(cx.buf, done, cx.buf.cursor()); try pushX(cx, @intFromEnum(Reg.r9)); }, } }, 0x45, 0x67...0x69 => { try popW(cx, @intFromEnum(Reg.rsi)); emitMovImm32(cx.buf, @intFromEnum(Reg.rdi), op); emitCallAbs(cx.buf, cx.helpers.i32_unary); try pushW(cx, @intFromEnum(Reg.rax)); }, 0x46...0x4F => { try popW(cx, @intFromEnum(Reg.rdx)); try popW(cx, @intFromEnum(Reg.rsi)); emitMovImm32(cx.buf, @intFromEnum(Reg.rdi), op); emitCallAbs(cx.buf, cx.helpers.i32_cmp); try pushW(cx, @intFromEnum(Reg.rax)); }, 0x6A...0x78 => { try popW(cx, @intFromEnum(Reg.rdx)); try popW(cx, @intFromEnum(Reg.rsi)); emitMovImm32(cx.buf, @intFromEnum(Reg.rdi), op); emitCallAbs(cx.buf, cx.helpers.i32_binary); try pushW(cx, @intFromEnum(Reg.rax)); }, 0x50 => { try popX(cx, @intFromEnum(Reg.rdi)); emitCallAbs(cx.buf, cx.helpers.i64_eqz); try pushW(cx, @intFromEnum(Reg.rax)); }, 0x51...0x5A => { try popX(cx, @intFromEnum(Reg.rdx)); try popX(cx, @intFromEnum(Reg.rsi)); emitMovImm32(cx.buf, @intFromEnum(Reg.rdi), op); emitCallAbs(cx.buf, cx.helpers.i64_cmp); try pushW(cx, @intFromEnum(Reg.rax)); }, 0x79...0x7B => { try popX(cx, @intFromEnum(Reg.rsi)); emitMovImm32(cx.buf, @intFromEnum(Reg.rdi), op); emitCallAbs(cx.buf, cx.helpers.i64_unary); try pushX(cx, @intFromEnum(Reg.rax)); }, 0x7C...0x8A => { try popX(cx, @intFromEnum(Reg.rdx)); try popX(cx, @intFromEnum(Reg.rsi)); emitMovImm32(cx.buf, @intFromEnum(Reg.rdi), op); emitCallAbs(cx.buf, cx.helpers.i64_binary); try pushX(cx, @intFromEnum(Reg.rax)); }, 0x5B...0x60 => { try popW(cx, @intFromEnum(Reg.rdx)); try popW(cx, @intFromEnum(Reg.rsi)); emitMovImm32(cx.buf, @intFromEnum(Reg.rdi), op); emitCallAbs(cx.buf, cx.helpers.f32_cmp); try pushW(cx, @intFromEnum(Reg.rax)); }, 0x61...0x66 => { try popX(cx, @intFromEnum(Reg.rdx)); try popX(cx, @intFromEnum(Reg.rsi)); emitMovImm32(cx.buf, @intFromEnum(Reg.rdi), op); emitCallAbs(cx.buf, cx.helpers.f64_cmp); try pushW(cx, @intFromEnum(Reg.rax)); }, 0x8B...0x91 => { try popW(cx, @intFromEnum(Reg.rsi)); emitMovImm32(cx.buf, @intFromEnum(Reg.rdi), op); emitCallAbs(cx.buf, cx.helpers.f32_unary); try pushW(cx, @intFromEnum(Reg.rax)); }, 0x92...0x98 => { try popW(cx, @intFromEnum(Reg.rdx)); try popW(cx, @intFromEnum(Reg.rsi)); emitMovImm32(cx.buf, @intFromEnum(Reg.rdi), op); emitCallAbs(cx.buf, cx.helpers.f32_binary); try pushW(cx, @intFromEnum(Reg.rax)); }, 0x99...0x9F => { try popX(cx, @intFromEnum(Reg.rsi)); emitMovImm32(cx.buf, @intFromEnum(Reg.rdi), op); emitCallAbs(cx.buf, cx.helpers.f64_unary); try pushX(cx, @intFromEnum(Reg.rax)); }, 0xA0...0xA6 => { try popX(cx, @intFromEnum(Reg.rdx)); try popX(cx, @intFromEnum(Reg.rsi)); emitMovImm32(cx.buf, @intFromEnum(Reg.rdi), op); emitCallAbs(cx.buf, cx.helpers.f64_binary); try pushX(cx, @intFromEnum(Reg.rax)); }, 0xA7...0xBF => { switch (op) { 0xA8, 0xA9, 0xAC, 0xAD, 0xB2, 0xB3, 0xB7, 0xB8, 0xBB, 0xBC, 0xBE => try popW(cx, @intFromEnum(Reg.rsi)), else => try popX(cx, @intFromEnum(Reg.rsi)), } emitMovImm32(cx.buf, @intFromEnum(Reg.rdi), op); emitCallAbs(cx.buf, cx.helpers.convert); switch (convertResultType(op)) { .i32, .f32 => try pushW(cx, @intFromEnum(Reg.rax)), .i64, .f64 => try pushX(cx, @intFromEnum(Reg.rax)), } }, 0xC0...0xC4 => { switch (op) { 0xC0, 0xC1 => try popW(cx, @intFromEnum(Reg.rsi)), else => try popX(cx, @intFromEnum(Reg.rsi)), } emitMovImm32(cx.buf, @intFromEnum(Reg.rdi), op); emitCallAbs(cx.buf, cx.helpers.i_extend); switch (op) { 0xC0, 0xC1 => try pushW(cx, @intFromEnum(Reg.rax)), else => try pushX(cx, @intFromEnum(Reg.rax)), } }, 0x10 => { const fidx = try binary.readULEB128(u32, code, pos); const cft = try getFuncType(cx.mod, cx.num_imported_funcs, fidx); if (cft.results.len > 1) return error.UnsupportedOpcode; if (cx.stack_depth < cft.params.len) return error.StackUnderflow; emitMovXReg(cx.buf, @intFromEnum(Reg.rdi), @intFromEnum(Reg.r13)); emitMovImm32(cx.buf, @intFromEnum(Reg.rsi), fidx); if (cft.params.len == 0) { emitMovXReg(cx.buf, @intFromEnum(Reg.rdx), @intFromEnum(Reg.rbx)); } else { const bytes = cft.params.len * 8; emitMovXReg(cx.buf, @intFromEnum(Reg.rdx), @intFromEnum(Reg.rbx)); emitSubXImm32(cx.buf, @intFromEnum(Reg.rdx), @intCast(bytes)); } emitMovImm32(cx.buf, @intFromEnum(Reg.rcx), @intCast(cft.params.len)); emitCallAbs(cx.buf, cx.helpers.call); if (cft.params.len > 0) { const bytes = cft.params.len * 8; emitSubXImm32(cx.buf, @intFromEnum(Reg.rbx), @intCast(bytes)); cx.stack_depth -= cft.params.len; } if (cft.results.len == 1) { switch (cft.results[0]) { .i32, .f32 => try pushW(cx, @intFromEnum(Reg.rax)), .i64, .f64 => try pushX(cx, @intFromEnum(Reg.rax)), } } }, 0x11 => { const type_idx = try binary.readULEB128(u32, code, pos); const table_idx = try binary.readULEB128(u32, code, pos); if (type_idx >= cx.mod.types.len) return error.UnsupportedOpcode; const cft = &cx.mod.types[type_idx]; if (cft.results.len > 1) return error.UnsupportedOpcode; if (cx.stack_depth < cft.params.len + 1) return error.StackUnderflow; emitMovXReg(cx.buf, @intFromEnum(Reg.rdi), @intFromEnum(Reg.r13)); emitMovImm32(cx.buf, @intFromEnum(Reg.rsi), type_idx); emitMovImm32(cx.buf, @intFromEnum(Reg.rdx), table_idx); try popW(cx, @intFromEnum(Reg.rcx)); if (cft.params.len == 0) { emitMovXReg(cx.buf, @intFromEnum(Reg.r8), @intFromEnum(Reg.rbx)); } else { const bytes = cft.params.len * 8; emitMovXReg(cx.buf, @intFromEnum(Reg.r8), @intFromEnum(Reg.rbx)); emitSubXImm32(cx.buf, @intFromEnum(Reg.r8), @intCast(bytes)); } emitMovImm32(cx.buf, @intFromEnum(Reg.r9), @intCast(cft.params.len)); emitCallAbs(cx.buf, cx.helpers.call_indirect); if (cft.params.len > 0) { const bytes = cft.params.len * 8; emitSubXImm32(cx.buf, @intFromEnum(Reg.rbx), @intCast(bytes)); cx.stack_depth -= cft.params.len; } if (cft.results.len == 1) { switch (cft.results[0]) { .i32, .f32 => try pushW(cx, @intFromEnum(Reg.rax)), .i64, .f64 => try pushX(cx, @intFromEnum(Reg.rax)), } } }, 0x23 => { const gidx = try binary.readULEB128(u32, code, pos); const gvt = try getGlobalValType(cx.mod, gidx); emitMovXReg(cx.buf, @intFromEnum(Reg.rdi), @intFromEnum(Reg.r13)); emitMovImm32(cx.buf, @intFromEnum(Reg.rsi), gidx); emitCallAbs(cx.buf, cx.helpers.global_get); switch (gvt) { .i32, .f32 => try pushW(cx, @intFromEnum(Reg.rax)), .i64, .f64 => try pushX(cx, @intFromEnum(Reg.rax)), } }, 0x24 => { const gidx = try binary.readULEB128(u32, code, pos); const gvt = try getGlobalValType(cx.mod, gidx); switch (gvt) { .i32, .f32 => try popW(cx, @intFromEnum(Reg.rdx)), .i64, .f64 => try popX(cx, @intFromEnum(Reg.rdx)), } emitMovXReg(cx.buf, @intFromEnum(Reg.rdi), @intFromEnum(Reg.r13)); emitMovImm32(cx.buf, @intFromEnum(Reg.rsi), gidx); emitCallAbs(cx.buf, cx.helpers.global_set); }, 0x28...0x35 => { _ = try binary.readULEB128(u32, code, pos); const offset = try binary.readULEB128(u32, code, pos); try popW(cx, @intFromEnum(Reg.rsi)); emitMovXReg(cx.buf, @intFromEnum(Reg.rdi), @intFromEnum(Reg.r13)); emitMovImm32(cx.buf, @intFromEnum(Reg.rdx), offset); emitMovImm32(cx.buf, @intFromEnum(Reg.rcx), op); emitCallAbs(cx.buf, cx.helpers.mem_load); switch (memLoadResultType(op)) { .i32, .f32 => try pushW(cx, @intFromEnum(Reg.rax)), .i64, .f64 => try pushX(cx, @intFromEnum(Reg.rax)), } }, 0x36...0x3E => { _ = try binary.readULEB128(u32, code, pos); const offset = try binary.readULEB128(u32, code, pos); switch (memStoreValueType(op)) { .i32, .f32 => try popW(cx, @intFromEnum(Reg.r8)), .i64, .f64 => try popX(cx, @intFromEnum(Reg.r8)), } try popW(cx, @intFromEnum(Reg.rsi)); emitMovXReg(cx.buf, @intFromEnum(Reg.rdi), @intFromEnum(Reg.r13)); emitMovImm32(cx.buf, @intFromEnum(Reg.rdx), offset); emitMovImm32(cx.buf, @intFromEnum(Reg.rcx), op); emitCallAbs(cx.buf, cx.helpers.mem_store); }, 0x3F => { _ = try binary.readULEB128(u8, code, pos); emitMovXReg(cx.buf, @intFromEnum(Reg.rdi), @intFromEnum(Reg.r13)); emitCallAbs(cx.buf, cx.helpers.memory_size); try pushW(cx, @intFromEnum(Reg.rax)); }, 0x40 => { _ = try binary.readULEB128(u8, code, pos); try popW(cx, @intFromEnum(Reg.rsi)); emitMovXReg(cx.buf, @intFromEnum(Reg.rdi), @intFromEnum(Reg.r13)); emitCallAbs(cx.buf, cx.helpers.memory_grow); try pushW(cx, @intFromEnum(Reg.rax)); }, 0xFC => { const subop = try binary.readULEB128(u32, code, pos); switch (subop) { 0...7 => { switch (subop) { 0, 1, 4, 5 => try popW(cx, @intFromEnum(Reg.rsi)), 2, 3, 6, 7 => try popX(cx, @intFromEnum(Reg.rsi)), else => unreachable, } emitMovImm32(cx.buf, @intFromEnum(Reg.rdi), @intCast(subop)); emitCallAbs(cx.buf, cx.helpers.trunc_sat); switch (subop) { 0, 1, 2, 3 => try pushW(cx, @intFromEnum(Reg.rax)), 4, 5, 6, 7 => try pushX(cx, @intFromEnum(Reg.rax)), else => unreachable, } }, 8 => { const data_idx = try binary.readULEB128(u32, code, pos); const mem_idx = try binary.readULEB128(u32, code, pos); if (mem_idx != 0) return error.UnsupportedOpcode; try popW(cx, @intFromEnum(Reg.rcx)); try popW(cx, @intFromEnum(Reg.rdx)); try popW(cx, @intFromEnum(Reg.rsi)); emitMovXReg(cx.buf, @intFromEnum(Reg.rdi), @intFromEnum(Reg.r13)); emitMovImm32(cx.buf, @intFromEnum(Reg.r8), data_idx); emitCallAbs(cx.buf, cx.helpers.memory_init); }, 9 => { const data_idx = try binary.readULEB128(u32, code, pos); emitMovXReg(cx.buf, @intFromEnum(Reg.rdi), @intFromEnum(Reg.r13)); emitMovImm32(cx.buf, @intFromEnum(Reg.rsi), data_idx); emitCallAbs(cx.buf, cx.helpers.data_drop); }, 10 => { const dst_mem = try binary.readULEB128(u32, code, pos); const src_mem = try binary.readULEB128(u32, code, pos); if (dst_mem != 0 or src_mem != 0) return error.UnsupportedOpcode; try popW(cx, @intFromEnum(Reg.rcx)); try popW(cx, @intFromEnum(Reg.rdx)); try popW(cx, @intFromEnum(Reg.rsi)); emitMovXReg(cx.buf, @intFromEnum(Reg.rdi), @intFromEnum(Reg.r13)); emitCallAbs(cx.buf, cx.helpers.memory_copy); }, 11 => { const mem_idx = try binary.readULEB128(u32, code, pos); if (mem_idx != 0) return error.UnsupportedOpcode; try popW(cx, @intFromEnum(Reg.rcx)); try popW(cx, @intFromEnum(Reg.rdx)); try popW(cx, @intFromEnum(Reg.rsi)); emitMovXReg(cx.buf, @intFromEnum(Reg.rdi), @intFromEnum(Reg.r13)); emitCallAbs(cx.buf, cx.helpers.memory_fill); }, 16 => { const table_idx = try binary.readULEB128(u32, code, pos); emitMovXReg(cx.buf, @intFromEnum(Reg.rdi), @intFromEnum(Reg.r13)); emitMovImm32(cx.buf, @intFromEnum(Reg.rsi), table_idx); emitCallAbs(cx.buf, cx.helpers.table_size); try pushW(cx, @intFromEnum(Reg.rax)); }, else => return error.UnsupportedOpcode, } }, 0x02, 0x03 => { const is_loop = op == 0x03; const sig = try readBlockSig(cx, code, pos, is_loop); try cx.control.append(cx.allocator, .{ .kind = if (is_loop) .loop else .block, .entry_depth = cx.stack_depth, .label_arity = if (is_loop) 0 else sig.arity, .label_type = if (is_loop) null else sig.val_type, .end_arity = sig.arity, .end_type = sig.val_type, .loop_head_pos = cx.buf.cursor(), .end_patches = .empty, }); const nested_end = try compileBlock(cx, code, pos, false); if (nested_end != .hit_end) return error.MalformedControlFlow; var fr = cx.control.pop().?; fr.end_patches.deinit(cx.allocator); }, 0x04 => { const sig = try readBlockSig(cx, code, pos, false); try popW(cx, @intFromEnum(Reg.r9)); emitTestWReg(cx.buf, @intFromEnum(Reg.r9), @intFromEnum(Reg.r9)); const entry_depth = cx.stack_depth; try cx.control.append(cx.allocator, .{ .kind = .@"if", .entry_depth = entry_depth, .label_arity = sig.arity, .label_type = sig.val_type, .end_arity = sig.arity, .end_type = sig.val_type, .loop_head_pos = 0, .end_patches = .empty, }); const jz_pos = emitJccPlaceholder(cx.buf, .z); const then_end = try compileBlock(cx, code, pos, true); if (then_end == .hit_else) { const jump_end = emitJmpPlaceholder(cx.buf); try currentFrame(cx).end_patches.append(cx.allocator, jump_end); patchRel32(cx.buf, jz_pos, cx.buf.cursor()); try setStackDepth(cx, entry_depth); const else_end = try compileBlock(cx, code, pos, false); if (else_end != .hit_end) return error.MalformedControlFlow; } else { patchRel32(cx.buf, jz_pos, cx.buf.cursor()); } var fr = cx.control.pop().?; fr.end_patches.deinit(cx.allocator); }, 0x00 => { emitMovXReg(cx.buf, @intFromEnum(Reg.rdi), @intFromEnum(Reg.r13)); emitCallAbs(cx.buf, cx.helpers.@"unreachable"); }, 0x01 => {}, 0x0F => { if (cx.result_type) |rt| { switch (rt) { .i32, .f32 => try popW(cx, @intFromEnum(Reg.rax)), .i64, .f64 => try popX(cx, @intFromEnum(Reg.rax)), } } emitEpilogueAndRet(cx.buf); return .hit_end; }, else => return error.UnsupportedOpcode, } } return error.UnexpectedEof; } const BlockSig = struct { arity: u8, val_type: ?module.ValType, }; fn readBlockSig(cx: *Context, code: []const u8, pos: *usize, is_loop: bool) !BlockSig { const bt = try binary.readSLEB128(i33, code, pos); if (bt == -0x40) return .{ .arity = 0, .val_type = null }; if (bt == -0x01) return .{ .arity = if (is_loop) 0 else 1, .val_type = .i32 }; if (bt == -0x02) return .{ .arity = if (is_loop) 0 else 1, .val_type = .i64 }; if (bt == -0x03) return .{ .arity = if (is_loop) 0 else 1, .val_type = .f32 }; if (bt == -0x04) return .{ .arity = if (is_loop) 0 else 1, .val_type = .f64 }; if (bt < 0) return error.UnsupportedOpcode; const type_idx: u32 = @intCast(bt); if (type_idx >= cx.mod.types.len) return error.UnsupportedOpcode; const ft = &cx.mod.types[type_idx]; if (ft.params.len != 0) return error.UnsupportedOpcode; if (ft.results.len == 0) return .{ .arity = 0, .val_type = null }; if (ft.results.len == 1) return .{ .arity = if (is_loop) 0 else 1, .val_type = ft.results[0] }; return error.UnsupportedOpcode; } fn decodeValType(b: u8) !module.ValType { return switch (b) { 0x7F => .i32, 0x7E => .i64, 0x7D => .f32, 0x7C => .f64, else => error.UnsupportedOpcode, }; } fn memLoadResultType(op: u8) module.ValType { return switch (op) { 0x28, 0x2C, 0x2D, 0x2E, 0x2F => .i32, 0x29, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35 => .i64, 0x2A => .f32, 0x2B => .f64, else => .i32, }; } fn memStoreValueType(op: u8) module.ValType { return switch (op) { 0x36, 0x3A, 0x3B => .i32, 0x37, 0x3C, 0x3D, 0x3E => .i64, 0x38 => .f32, 0x39 => .f64, else => .i32, }; } fn convertResultType(op: u8) module.ValType { return switch (op) { 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xBC => .i32, 0xAC, 0xAD, 0xAE, 0xAF, 0xB0, 0xB1, 0xBD => .i64, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xBE => .f32, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBF => .f64, else => .i32, }; } fn currentFrame(cx: *Context) *ControlFrame { return &cx.control.items[cx.control.items.len - 1]; } fn setStackDepth(cx: *Context, depth: usize) !void { if (depth > cx.max_stack_depth) return error.StackOverflow; if (depth == cx.stack_depth) return; if (depth > cx.stack_depth) { const bytes = (depth - cx.stack_depth) * 8; emitAddXImm32(cx.buf, @intFromEnum(Reg.rbx), @intCast(bytes)); } else { const bytes = (cx.stack_depth - depth) * 8; emitSubXImm32(cx.buf, @intFromEnum(Reg.rbx), @intCast(bytes)); } cx.stack_depth = depth; } fn emitBrToDepth(cx: *Context, depth: u32) !void { if (depth >= cx.control.items.len) return error.MalformedControlFlow; const target_idx = cx.control.items.len - 1 - depth; const target = &cx.control.items[target_idx]; const result_reg: u4 = @intFromEnum(Reg.r9); if (target.label_arity == 1) { const t = target.label_type orelse return error.UnsupportedOpcode; switch (t) { .i32, .f32 => try popW(cx, result_reg), .i64, .f64 => try popX(cx, result_reg), } } else if (target.label_arity != 0) { return error.UnsupportedOpcode; } try setStackDepth(cx, target.entry_depth); if (target.label_arity == 1 and target.kind != .loop) { const t = target.label_type orelse return error.UnsupportedOpcode; switch (t) { .i32, .f32 => try pushW(cx, result_reg), .i64, .f64 => try pushX(cx, result_reg), } } if (target.kind == .loop) { const p = emitJmpPlaceholder(cx.buf); patchRel32(cx.buf, p, target.loop_head_pos); } else { const p = emitJmpPlaceholder(cx.buf); try target.end_patches.append(cx.allocator, p); } } fn pushW(cx: *Context, reg: u4) !void { if (cx.stack_depth >= cx.max_stack_depth) return error.StackOverflow; emitMovMemRbxFromW(cx.buf, reg); emitAddXImm32(cx.buf, @intFromEnum(Reg.rbx), 8); cx.stack_depth += 1; } fn popW(cx: *Context, reg: u4) !void { if (cx.stack_depth == 0) return error.StackUnderflow; emitSubXImm32(cx.buf, @intFromEnum(Reg.rbx), 8); emitMovWFromMemRbx(cx.buf, reg); cx.stack_depth -= 1; } fn pushX(cx: *Context, reg: u4) !void { if (cx.stack_depth >= cx.max_stack_depth) return error.StackOverflow; emitMovMemRbxFromX(cx.buf, reg); emitAddXImm32(cx.buf, @intFromEnum(Reg.rbx), 8); cx.stack_depth += 1; } fn popX(cx: *Context, reg: u4) !void { if (cx.stack_depth == 0) return error.StackUnderflow; emitSubXImm32(cx.buf, @intFromEnum(Reg.rbx), 8); emitMovXFromMemRbx(cx.buf, reg); cx.stack_depth -= 1; } fn getFuncType(mod: *const module.Module, num_imported: u32, fidx: u32) !*const module.FuncType { if (fidx < num_imported) { var count: u32 = 0; for (mod.imports) |imp| { if (imp.desc == .func) { if (count == fidx) return &mod.types[imp.desc.func]; count += 1; } } return error.InvalidFunctionIndex; } const local_idx = fidx - num_imported; if (local_idx >= mod.functions.len) return error.InvalidFunctionIndex; const type_idx = mod.functions[local_idx]; if (type_idx >= mod.types.len) return error.InvalidTypeIndex; return &mod.types[type_idx]; } fn getGlobalValType(mod: *const module.Module, gidx: u32) !module.ValType { var import_global_count: u32 = 0; for (mod.imports) |imp| { if (imp.desc == .global) { if (import_global_count == gidx) return imp.desc.global.valtype; import_global_count += 1; } } const local_idx = gidx - import_global_count; if (local_idx >= mod.globals.len) return error.InvalidGlobalIndex; return mod.globals[local_idx].type.valtype; } fn emitPrologue(buf: *codebuf.CodeBuffer, param_count: u32, local_count: u32, operand_base_bytes: u32) void { emitSubXImm32(buf, @intFromEnum(Reg.rsp), @intCast(frame_size_bytes)); emitMovRspDispFromX(buf, 0, @intFromEnum(Reg.rbx)); emitMovRspDispFromX(buf, 8, @intFromEnum(Reg.r12)); emitMovRspDispFromX(buf, 16, @intFromEnum(Reg.r13)); emitMovXReg(buf, @intFromEnum(Reg.r13), @intFromEnum(Reg.rdi)); var i: u32 = 0; while (i < param_count) : (i += 1) { const src_off: i32 = @intCast(i * 8); const dst_off: i32 = @intCast(local_base_bytes + i * 8); emitMovXFromBaseDisp(buf, @intFromEnum(Reg.r9), @intFromEnum(Reg.rsi), src_off); emitMovBaseDispFromX(buf, @intFromEnum(Reg.rsp), dst_off, @intFromEnum(Reg.r9)); } emitMovImm64(buf, @intFromEnum(Reg.r9), 0); var j: u32 = param_count; while (j < local_count) : (j += 1) { const dst_off: i32 = @intCast(local_base_bytes + j * 8); emitMovRspDispFromX(buf, dst_off, @intFromEnum(Reg.r9)); } emitLeaRegRspDisp(buf, @intFromEnum(Reg.rbx), @intCast(operand_base_bytes)); } fn emitEpilogueAndRet(buf: *codebuf.CodeBuffer) void { emitMovXFromRspDisp(buf, @intFromEnum(Reg.r13), 16); emitMovXFromRspDisp(buf, @intFromEnum(Reg.r12), 8); emitMovXFromRspDisp(buf, @intFromEnum(Reg.rbx), 0); emitAddXImm32(buf, @intFromEnum(Reg.rsp), @intCast(frame_size_bytes)); buf.emit1(0xC3); } const Reg = enum(u4) { rax = 0, rcx = 1, rdx = 2, rbx = 3, rsp = 4, rbp = 5, rsi = 6, rdi = 7, r8 = 8, r9 = 9, r10 = 10, r11 = 11, r12 = 12, r13 = 13, r14 = 14, r15 = 15, }; const Jcc = enum(u8) { z = 0x84, ne = 0x85, }; fn emitRex(buf: *codebuf.CodeBuffer, w: bool, r: u1, x: u1, b: u1) void { const rex_w: u8 = if (w) @as(u8, 0x08) else @as(u8, 0x00); const rex: u8 = @as(u8, 0x40) | rex_w | (@as(u8, r) << 2) | (@as(u8, x) << 1) | @as(u8, b); if (rex != 0x40) buf.emit1(rex); } fn emitModRM(buf: *codebuf.CodeBuffer, mod: u2, reg: u3, rm: u3) void { buf.emit1((@as(u8, mod) << 6) | (@as(u8, reg) << 3) | @as(u8, rm)); } fn emitSIB(buf: *codebuf.CodeBuffer, scale: u2, index: u3, base: u3) void { buf.emit1((@as(u8, scale) << 6) | (@as(u8, index) << 3) | @as(u8, base)); } fn emitMovImm32(buf: *codebuf.CodeBuffer, reg: u4, imm: u32) void { emitRex(buf, false, 0, 0, @truncate(reg >> 3)); buf.emit1(0xB8 + @as(u8, reg & 7)); buf.emitU32Le(imm); } fn emitMovImm64(buf: *codebuf.CodeBuffer, reg: u4, imm: u64) void { emitRex(buf, true, 0, 0, @truncate(reg >> 3)); buf.emit1(0xB8 + @as(u8, reg & 7)); std.mem.writeInt(u64, buf.buf[buf.pos..][0..8], imm, .little); buf.pos += 8; } fn emitMovXReg(buf: *codebuf.CodeBuffer, dst: u4, src: u4) void { emitRex(buf, true, @truncate(src >> 3), 0, @truncate(dst >> 3)); buf.emit1(0x89); emitModRM(buf, 0b11, @truncate(src & 7), @truncate(dst & 7)); } fn emitMovWReg(buf: *codebuf.CodeBuffer, dst: u4, src: u4) void { emitRex(buf, false, @truncate(src >> 3), 0, @truncate(dst >> 3)); buf.emit1(0x89); emitModRM(buf, 0b11, @truncate(src & 7), @truncate(dst & 7)); } fn emitAddXImm32(buf: *codebuf.CodeBuffer, reg: u4, imm: i32) void { emitRex(buf, true, 0, 0, @truncate(reg >> 3)); buf.emit1(0x81); emitModRM(buf, 0b11, 0, @truncate(reg & 7)); buf.emitI32Le(imm); } fn emitSubXImm32(buf: *codebuf.CodeBuffer, reg: u4, imm: i32) void { emitRex(buf, true, 0, 0, @truncate(reg >> 3)); buf.emit1(0x81); emitModRM(buf, 0b11, 5, @truncate(reg & 7)); buf.emitI32Le(imm); } fn emitCmpWImm32(buf: *codebuf.CodeBuffer, reg: u4, imm: u32) void { emitRex(buf, false, 0, 0, @truncate(reg >> 3)); buf.emit1(0x81); emitModRM(buf, 0b11, 7, @truncate(reg & 7)); buf.emitU32Le(imm); } fn emitTestWReg(buf: *codebuf.CodeBuffer, a: u4, b: u4) void { emitRex(buf, false, @truncate(b >> 3), 0, @truncate(a >> 3)); buf.emit1(0x85); emitModRM(buf, 0b11, @truncate(b & 7), @truncate(a & 7)); } fn emitMovXFromBaseDisp(buf: *codebuf.CodeBuffer, dst: u4, base: u4, disp: i32) void { emitRex(buf, true, @truncate(dst >> 3), 0, @truncate(base >> 3)); buf.emit1(0x8B); if ((base & 7) == 4) { emitModRM(buf, 0b10, @truncate(dst & 7), 4); emitSIB(buf, 0, 4, @truncate(base & 7)); } else { emitModRM(buf, 0b10, @truncate(dst & 7), @truncate(base & 7)); } buf.emitI32Le(disp); } fn emitMovBaseDispFromX(buf: *codebuf.CodeBuffer, base: u4, disp: i32, src: u4) void { emitRex(buf, true, @truncate(src >> 3), 0, @truncate(base >> 3)); buf.emit1(0x89); if ((base & 7) == 4) { emitModRM(buf, 0b10, @truncate(src & 7), 4); emitSIB(buf, 0, 4, @truncate(base & 7)); } else { emitModRM(buf, 0b10, @truncate(src & 7), @truncate(base & 7)); } buf.emitI32Le(disp); } fn emitMovWFromBaseDisp(buf: *codebuf.CodeBuffer, dst: u4, base: u4, disp: i32) void { emitRex(buf, false, @truncate(dst >> 3), 0, @truncate(base >> 3)); buf.emit1(0x8B); if ((base & 7) == 4) { emitModRM(buf, 0b10, @truncate(dst & 7), 4); emitSIB(buf, 0, 4, @truncate(base & 7)); } else { emitModRM(buf, 0b10, @truncate(dst & 7), @truncate(base & 7)); } buf.emitI32Le(disp); } fn emitMovBaseDispFromW(buf: *codebuf.CodeBuffer, base: u4, disp: i32, src: u4) void { emitRex(buf, false, @truncate(src >> 3), 0, @truncate(base >> 3)); buf.emit1(0x89); if ((base & 7) == 4) { emitModRM(buf, 0b10, @truncate(src & 7), 4); emitSIB(buf, 0, 4, @truncate(base & 7)); } else { emitModRM(buf, 0b10, @truncate(src & 7), @truncate(base & 7)); } buf.emitI32Le(disp); } fn emitMovXFromRspDisp(buf: *codebuf.CodeBuffer, dst: u4, disp: i32) void { emitMovXFromBaseDisp(buf, dst, @intFromEnum(Reg.rsp), disp); } fn emitMovRspDispFromX(buf: *codebuf.CodeBuffer, disp: i32, src: u4) void { emitMovBaseDispFromX(buf, @intFromEnum(Reg.rsp), disp, src); } fn emitMovWFromRspDisp(buf: *codebuf.CodeBuffer, dst: u4, disp: i32) void { emitMovWFromBaseDisp(buf, dst, @intFromEnum(Reg.rsp), disp); } fn emitMovRspDispFromW(buf: *codebuf.CodeBuffer, disp: i32, src: u4) void { emitMovBaseDispFromW(buf, @intFromEnum(Reg.rsp), disp, src); } fn emitMovMemRbxFromX(buf: *codebuf.CodeBuffer, src: u4) void { emitRex(buf, true, @truncate(src >> 3), 0, 0); buf.emit1(0x89); emitModRM(buf, 0b00, @truncate(src & 7), 3); } fn emitMovXFromMemRbx(buf: *codebuf.CodeBuffer, dst: u4) void { emitRex(buf, true, @truncate(dst >> 3), 0, 0); buf.emit1(0x8B); emitModRM(buf, 0b00, @truncate(dst & 7), 3); } fn emitMovMemRbxFromW(buf: *codebuf.CodeBuffer, src: u4) void { emitRex(buf, false, @truncate(src >> 3), 0, 0); buf.emit1(0x89); emitModRM(buf, 0b00, @truncate(src & 7), 3); } fn emitMovWFromMemRbx(buf: *codebuf.CodeBuffer, dst: u4) void { emitRex(buf, false, @truncate(dst >> 3), 0, 0); buf.emit1(0x8B); emitModRM(buf, 0b00, @truncate(dst & 7), 3); } fn emitLeaRegRspDisp(buf: *codebuf.CodeBuffer, dst: u4, disp: i32) void { emitRex(buf, true, @truncate(dst >> 3), 0, 0); buf.emit1(0x8D); emitModRM(buf, 0b10, @truncate(dst & 7), 4); emitSIB(buf, 0, 4, 4); buf.emitI32Le(disp); } fn emitCallReg(buf: *codebuf.CodeBuffer, reg: u4) void { emitRex(buf, false, 0, 0, @truncate(reg >> 3)); buf.emit1(0xFF); emitModRM(buf, 0b11, 2, @truncate(reg & 7)); } fn emitCallAbs(buf: *codebuf.CodeBuffer, addr: usize) void { emitMovImm64(buf, @intFromEnum(Reg.rax), @intCast(addr)); emitCallReg(buf, @intFromEnum(Reg.rax)); } fn emitJmpPlaceholder(buf: *codebuf.CodeBuffer) usize { buf.emit1(0xE9); const imm_pos = buf.cursor(); buf.emitI32Le(0); return imm_pos; } fn emitJccPlaceholder(buf: *codebuf.CodeBuffer, cc: Jcc) usize { buf.emit1(0x0F); buf.emit1(@intFromEnum(cc)); const imm_pos = buf.cursor(); buf.emitI32Le(0); return imm_pos; } fn patchRel32(buf: *codebuf.CodeBuffer, imm_pos: usize, target_pos: usize) void { const next_ip: isize = @intCast(imm_pos + 4); const target: isize = @intCast(target_pos); const rel: i32 = @intCast(target - next_ip); buf.patchI32(imm_pos, rel); } test "x86_64 compileFunctionI32 executes const return" { if (builtin.cpu.arch != .x86_64) return error.SkipZigTest; var params = [_]module.ValType{}; var results = [_]module.ValType{.i32}; const ft = module.FuncType{ .params = ¶ms, .results = &results }; var bodies = [_]module.FunctionBody{.{ .locals = &.{}, .code = &[_]u8{ 0x41, 0x2a, 0x0b }, }}; var types = [_]module.FuncType{ft}; var funcs = [_]u32{0}; const mod = module.Module{ .types = &types, .imports = &.{}, .functions = &funcs, .tables = &.{}, .memories = &.{}, .globals = &.{}, .exports = &.{}, .start = null, .elements = &.{}, .codes = &bodies, .datas = &.{}, .allocator = std.testing.allocator, }; const helpers: HelperAddrs = .{ .call = 0, .@"unreachable" = 0, .global_get = 0, .global_set = 0, .mem_load = 0, .mem_store = 0, .i32_unary = 0, .i32_cmp = 0, .i32_binary = 0, .i32_div_s = 0, .i32_div_u = 0, .i32_rem_s = 0, .i32_rem_u = 0, .i64_eqz = 0, .i64_cmp = 0, .i64_unary = 0, .i64_binary = 0, .f32_cmp = 0, .f64_cmp = 0, .f32_unary = 0, .f32_binary = 0, .f64_unary = 0, .f64_binary = 0, .convert = 0, .trunc_sat = 0, .i_extend = 0, .memory_init = 0, .data_drop = 0, .memory_copy = 0, .memory_fill = 0, .table_size = 0, .memory_size = 0, .memory_grow = 0, .call_indirect = 0, }; var jit = (try compileFunctionI32(std.testing.allocator, &mod, 0, 0, &bodies[0], &ft, helpers)) orelse return error.TestUnexpectedResult; defer jit.buf.deinit(); var zero: u64 = 0; const fn_ptr = jit.buf.funcPtr(fn (*anyopaque, [*]const u64, u32) callconv(.c) u64, 0); const r = fn_ptr(@ptrFromInt(1), @ptrCast(&zero), 0); try std.testing.expectEqual(@as(u64, 42), r); }