lc/codegen.c

1168 lines
38 KiB
C

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "codegen.h"
#include "sema.h"
#include "stb_ds.h"
typedef struct {
char *key;
int value;
} var_map;
static var_map *variables = NULL;
static int stack_offset = 0;
static int label_counter = 0;
static int *break_stack = NULL;
void gen_expr(FILE *fp, ast_node *expr);
void gen_unary(FILE *fp, ast_node *expr);
void gen_statement_list(FILE *fp, ast_node *stmt);
int get_var_offset(char *name, usize name_len);
int get_var_offset_sized(char *name, usize name_len, usize size);
void gen_binary(FILE *fp, ast_node *expr)
{
switch (expr->expr.binary.operator) {
case OP_PLUS:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "mov %%rax, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "add %%rcx, %%rax\n");
break;
case OP_MINUS:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "mov %%rax, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "sub %%rax, %%rcx\n");
fprintf(fp, "mov %%rcx, %%rax\n");
break;
case OP_MUL:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "mov %%rax, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "imul %%rcx, %%rax\n");
break;
case OP_DIV:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "mov %%rax, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "mov %%rax, %%rbx\n");
fprintf(fp, "mov %%rcx, %%rax\n");
fprintf(fp, "cqo\n");
fprintf(fp, "idiv %%rbx\n");
break;
case OP_MOD:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "mov %%rax, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "mov %%rax, %%rbx\n");
fprintf(fp, "mov %%rcx, %%rax\n");
fprintf(fp, "cqo\n");
fprintf(fp, "idiv %%rbx\n");
fprintf(fp, "mov %%rdx, %%rax\n");
break;
case OP_BOR:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "mov %%rax, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "or %%rcx, %%rax\n");
break;
case OP_BAND:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "mov %%rax, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "and %%rcx, %%rax\n");
break;
case OP_BXOR:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "mov %%rax, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "xor %%rcx, %%rax\n");
break;
case OP_EQ:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "mov %%rax, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "cmp %%rax, %%rcx\n");
fprintf(fp, "sete %%al\n");
fprintf(fp, "movzx %%al, %%rax\n");
break;
case OP_NEQ:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "mov %%rax, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "cmp %%rax, %%rcx\n");
fprintf(fp, "setne %%al\n");
fprintf(fp, "movzx %%al, %%rax\n");
break;
case OP_LT:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "mov %%rax, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "cmp %%rax, %%rcx\n");
fprintf(fp, "setl %%al\n");
fprintf(fp, "movzx %%al, %%rax\n");
break;
case OP_GT:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "mov %%rax, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "cmp %%rax, %%rcx\n");
fprintf(fp, "setg %%al\n");
fprintf(fp, "movzx %%al, %%rax\n");
break;
case OP_LE:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "mov %%rax, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "cmp %%rax, %%rcx\n");
fprintf(fp, "setle %%al\n");
fprintf(fp, "movzx %%al, %%rax\n");
break;
case OP_GE:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "mov %%rax, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "cmp %%rax, %%rcx\n");
fprintf(fp, "setge %%al\n");
fprintf(fp, "movzx %%al, %%rax\n");
break;
case OP_AND:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "test %%rax, %%rax\n");
fprintf(fp, "setne %%al\n");
fprintf(fp, "movzx %%al, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "test %%rax, %%rax\n");
fprintf(fp, "setne %%al\n");
fprintf(fp, "movzx %%al, %%rax\n");
fprintf(fp, "and %%rcx, %%rax\n");
break;
case OP_OR:
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "test %%rax, %%rax\n");
fprintf(fp, "setne %%al\n");
fprintf(fp, "movzx %%al, %%rcx\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "test %%rax, %%rax\n");
fprintf(fp, "setne %%al\n");
fprintf(fp, "movzx %%al, %%rax\n");
fprintf(fp, "or %%rcx, %%rax\n");
break;
case OP_ASSIGN: {
if (expr->expr.binary.left->type == NODE_IDENTIFIER) {
gen_expr(fp, expr->expr.binary.right);
int offset = get_var_offset(expr->expr.binary.left->expr.string.start,
expr->expr.binary.left->expr.string.len);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
} else if (expr->expr.binary.left->type == NODE_ARRAY_SUBSCRIPT) {
ast_node *subscript = expr->expr.binary.left;
usize element_size = 8;
type *base_type = subscript->expr.subscript.expr->expr_type;
bool is_slice = false;
if (base_type) {
if (base_type->tag == TYPE_PTR && base_type->data.ptr.child) {
element_size = base_type->data.ptr.child->size;
} else if (base_type->tag == TYPE_SLICE && base_type->data.slice.child) {
element_size = base_type->data.slice.child->size;
is_slice = true;
}
}
if (subscript->expr.subscript.expr->type == NODE_IDENTIFIER && is_slice) {
int base_offset = get_var_offset(subscript->expr.subscript.expr->expr.string.start,
subscript->expr.subscript.expr->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rcx\n", base_offset);
gen_expr(fp, subscript->expr.subscript.index);
if (element_size != 1) {
fprintf(fp, "imul $%lu, %%rax\n", element_size);
}
fprintf(fp, "add %%rcx, %%rax\n");
fprintf(fp, "push %%rax\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "pop %%rcx\n");
if (subscript->expr_type && subscript->expr_type->size == 4) {
fprintf(fp, "mov %%eax, (%%rcx)\n");
} else if (subscript->expr_type && subscript->expr_type->size == 2) {
fprintf(fp, "mov %%ax, (%%rcx)\n");
} else if (subscript->expr_type && subscript->expr_type->size == 1) {
fprintf(fp, "mov %%al, (%%rcx)\n");
} else {
fprintf(fp, "mov %%rax, (%%rcx)\n");
}
} else if (subscript->expr.subscript.expr->type == NODE_IDENTIFIER) {
int base_offset = get_var_offset(subscript->expr.subscript.expr->expr.string.start,
subscript->expr.subscript.expr->expr.string.len);
gen_expr(fp, subscript->expr.subscript.index);
if (element_size != 1) {
fprintf(fp, "imul $%lu, %%rax\n", element_size);
}
fprintf(fp, "add $%d, %%rax\n", base_offset);
fprintf(fp, "neg %%rax\n");
fprintf(fp, "add %%rbp, %%rax\n");
fprintf(fp, "push %%rax\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "pop %%rcx\n");
if (subscript->expr_type && subscript->expr_type->size == 4) {
fprintf(fp, "mov %%eax, (%%rcx)\n");
} else if (subscript->expr_type && subscript->expr_type->size == 2) {
fprintf(fp, "mov %%ax, (%%rcx)\n");
} else if (subscript->expr_type && subscript->expr_type->size == 1) {
fprintf(fp, "mov %%al, (%%rcx)\n");
} else {
fprintf(fp, "mov %%rax, (%%rcx)\n");
}
} else {
gen_expr(fp, subscript->expr.subscript.expr);
fprintf(fp, "push %%rax\n");
gen_expr(fp, subscript->expr.subscript.index);
if (element_size != 1) {
fprintf(fp, "imul $%lu, %%rax\n", element_size);
}
fprintf(fp, "pop %%rcx\n");
fprintf(fp, "add %%rcx, %%rax\n");
fprintf(fp, "push %%rax\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "pop %%rcx\n");
if (subscript->expr_type && subscript->expr_type->size == 4) {
fprintf(fp, "mov %%eax, (%%rcx)\n");
} else if (subscript->expr_type && subscript->expr_type->size == 2) {
fprintf(fp, "mov %%ax, (%%rcx)\n");
} else if (subscript->expr_type && subscript->expr_type->size == 1) {
fprintf(fp, "mov %%al, (%%rcx)\n");
} else {
fprintf(fp, "mov %%rax, (%%rcx)\n");
}
}
} else {
fprintf(fp, "# ERROR: left side of assignment must be identifier\n");
break;
}
break;
}
case OP_ASSIGN_PTR: {
gen_expr(fp, expr->expr.binary.left);
fprintf(fp, "push %%rax\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "pop %%rcx\n");
fprintf(fp, "mov %%rax, (%%rcx)\n");
break;
}
case OP_PLUS_EQ: {
if (expr->expr.binary.left->type != NODE_IDENTIFIER) {
fprintf(fp, "# ERROR: left side of assignment must be identifier\n");
break;
}
int offset = get_var_offset(expr->expr.binary.left->expr.string.start,
expr->expr.binary.left->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset);
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "add %%rcx, %%rax\n");
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
break;
}
case OP_MINUS_EQ: {
if (expr->expr.binary.left->type != NODE_IDENTIFIER) {
fprintf(fp, "# ERROR: left side of assignment must be identifier\n");
break;
}
int offset = get_var_offset(expr->expr.binary.left->expr.string.start,
expr->expr.binary.left->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset);
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "sub %%rax, %%rcx\n");
fprintf(fp, "mov %%rcx, %%rax\n");
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
break;
}
case OP_MUL_EQ: {
if (expr->expr.binary.left->type != NODE_IDENTIFIER) {
fprintf(fp, "# ERROR: left side of assignment must be identifier\n");
break;
}
int offset = get_var_offset(expr->expr.binary.left->expr.string.start,
expr->expr.binary.left->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset);
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "imul %%rcx, %%rax\n");
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
break;
}
case OP_DIV_EQ: {
if (expr->expr.binary.left->type != NODE_IDENTIFIER) {
fprintf(fp, "# ERROR: left side of assignment must be identifier\n");
break;
}
int offset = get_var_offset(expr->expr.binary.left->expr.string.start,
expr->expr.binary.left->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset);
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "mov %%rax, %%rbx\n");
fprintf(fp, "mov %%rcx, %%rax\n");
fprintf(fp, "cqo\n");
fprintf(fp, "idiv %%rbx\n");
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
break;
}
case OP_MOD_EQ: {
if (expr->expr.binary.left->type != NODE_IDENTIFIER) {
fprintf(fp, "# ERROR: left side of assignment must be identifier\n");
break;
}
int offset = get_var_offset(expr->expr.binary.left->expr.string.start,
expr->expr.binary.left->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset);
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "mov %%rax, %%rbx\n");
fprintf(fp, "mov %%rcx, %%rax\n");
fprintf(fp, "cqo\n");
fprintf(fp, "idiv %%rbx\n");
fprintf(fp, "mov %%rdx, -%d(%%rbp)\n", offset);
break;
}
case OP_BOR_EQ: {
if (expr->expr.binary.left->type != NODE_IDENTIFIER) {
fprintf(fp, "# ERROR: left side of assignment must be identifier\n");
break;
}
int offset = get_var_offset(expr->expr.binary.left->expr.string.start,
expr->expr.binary.left->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset);
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "or %%rcx, %%rax\n");
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
break;
}
case OP_BAND_EQ: {
if (expr->expr.binary.left->type != NODE_IDENTIFIER) {
fprintf(fp, "# ERROR: left side of assignment must be identifier\n");
break;
}
int offset = get_var_offset(expr->expr.binary.left->expr.string.start,
expr->expr.binary.left->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset);
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "and %%rcx, %%rax\n");
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
break;
}
case OP_BXOR_EQ: {
if (expr->expr.binary.left->type != NODE_IDENTIFIER) {
fprintf(fp, "# ERROR: left side of assignment must be identifier\n");
break;
}
int offset = get_var_offset(expr->expr.binary.left->expr.string.start,
expr->expr.binary.left->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset);
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "xor %%rcx, %%rax\n");
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
break;
}
case OP_RSHIFT_EQ: {
if (expr->expr.binary.left->type != NODE_IDENTIFIER) {
fprintf(fp, "# ERROR: left side of assignment must be identifier\n");
break;
}
int offset = get_var_offset(expr->expr.binary.left->expr.string.start,
expr->expr.binary.left->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rax\n", offset);
fprintf(fp, "push %%rax\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "mov %%rax, %%rcx\n");
fprintf(fp, "pop %%rax\n");
fprintf(fp, "sar %%cl, %%rax\n");
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
break;
}
case OP_LSHIFT_EQ: {
if (expr->expr.binary.left->type != NODE_IDENTIFIER) {
break;
}
int offset = get_var_offset(expr->expr.binary.left->expr.string.start,
expr->expr.binary.left->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rax\n", offset);
fprintf(fp, "push %%rax\n");
gen_expr(fp, expr->expr.binary.right);
fprintf(fp, "mov %%rax, %%rcx\n");
fprintf(fp, "pop %%rax\n");
fprintf(fp, "shl %%cl, %%rax\n");
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
break;
}
}
}
int get_var_offset(char *name, usize name_len)
{
char *var_name = strndup(name, name_len);
ptrdiff_t idx = shgeti(variables, var_name);
if (idx >= 0) {
free(var_name);
return variables[idx].value;
}
stack_offset += 8;
shput(variables, var_name, stack_offset);
return stack_offset;
}
int get_var_offset_sized(char *name, usize name_len, usize size)
{
char *var_name = strndup(name, name_len);
ptrdiff_t idx = shgeti(variables, var_name);
if (idx >= 0) {
free(var_name);
return variables[idx].value;
}
usize aligned_size = (size + 7) & ~7;
stack_offset += aligned_size;
shput(variables, var_name, stack_offset);
return stack_offset;
}
void gen_statement_list(FILE *fp, ast_node *stmt)
{
if (!stmt) return;
if (stmt->type == NODE_UNIT) {
ast_node *current = stmt;
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr) {
gen_expr(fp, current->expr.unit_node.expr);
}
current = current->expr.unit_node.next;
}
} else {
gen_expr(fp, stmt);
}
}
void gen_unary(FILE *fp, ast_node *expr)
{
switch (expr->expr.unary.operator) {
case UOP_MINUS:
gen_expr(fp, expr->expr.unary.right);
fprintf(fp, "neg %%rax\n");
break;
case UOP_NOT:
gen_expr(fp, expr->expr.unary.right);
fprintf(fp, "test %%rax, %%rax\n");
fprintf(fp, "sete %%al\n");
fprintf(fp, "movzx %%al, %%rax\n");
break;
case UOP_INCR:
if (expr->expr.unary.right->type != NODE_IDENTIFIER) {
fprintf(fp, "# ERROR: increment requires identifier\n");
break;
}
int offset_incr = get_var_offset(expr->expr.unary.right->expr.string.start,
expr->expr.unary.right->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rax\n", offset_incr);
fprintf(fp, "inc %%rax\n");
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset_incr);
break;
case UOP_DECR:
if (expr->expr.unary.right->type != NODE_IDENTIFIER) {
fprintf(fp, "# ERROR: decrement requires identifier\n");
break;
}
int offset_decr = get_var_offset(expr->expr.unary.right->expr.string.start,
expr->expr.unary.right->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rax\n", offset_decr);
fprintf(fp, "dec %%rax\n");
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset_decr);
break;
case UOP_REF:
if (expr->expr.unary.right->type != NODE_IDENTIFIER) {
fprintf(fp, "# ERROR: address-of requires identifier\n");
break;
}
int offset_ref = get_var_offset(expr->expr.unary.right->expr.string.start,
expr->expr.unary.right->expr.string.len);
fprintf(fp, "lea -%d(%%rbp), %%rax\n", offset_ref);
break;
case UOP_DEREF:
gen_expr(fp, expr->expr.unary.right);
fprintf(fp, "mov (%%rax), %%rax\n");
break;
}
}
void gen_expr(FILE *fp, ast_node *expr)
{
switch (expr->type) {
case NODE_INTEGER:
fprintf(fp, "mov $%lu, %%rax\n", expr->expr.integer);
break;
case NODE_FLOAT: {
// TODO: do not truncate
i64 int_val = (i64)expr->expr.flt;
fprintf(fp, "mov $%ld, %%rax\n", int_val);
break;
}
case NODE_CHAR:
fprintf(fp, "mov $%d, %%rax\n", (int)(unsigned char)expr->expr.ch);
break;
case NODE_BOOL:
fprintf(fp, "mov $%d, %%rax\n", expr->expr.boolean ? 1 : 0);
break;
case NODE_IDENTIFIER: {
int offset = get_var_offset(expr->expr.string.start, expr->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rax\n", offset);
break;
}
case NODE_BINARY:
gen_binary(fp, expr);
break;
case NODE_UNARY:
gen_unary(fp, expr);
break;
case NODE_CAST:
gen_expr(fp, expr->expr.cast.value);
break;
case NODE_VAR_DECL: {
int offset = 0;
type *var_type = expr->expr_type;
if (!var_type && expr->expr.var_decl.type) {
var_type = expr->expr.var_decl.type->expr_type;
}
bool is_inline_slice = false;
if (var_type && var_type->tag == TYPE_SLICE && expr->expr.var_decl.value &&
(expr->expr.var_decl.value->type == NODE_STRUCT_INIT ||
expr->expr.var_decl.value->type == NODE_RANGE)) {
is_inline_slice = true;
}
if (!is_inline_slice) {
if (var_type && var_type->size > 0) {
offset = get_var_offset_sized(expr->expr.var_decl.name,
expr->expr.var_decl.name_len, var_type->size);
} else {
offset = get_var_offset(expr->expr.var_decl.name, expr->expr.var_decl.name_len);
}
}
if (expr->expr.var_decl.value) {
if (expr->expr.var_decl.value->type == NODE_RANGE && var_type && var_type->tag == TYPE_SLICE) {
ast_node *range = expr->expr.var_decl.value;
if (range->expr.binary.left->type == NODE_INTEGER &&
range->expr.binary.right->type == NODE_INTEGER) {
i64 start = range->expr.binary.left->expr.integer;
i64 end = range->expr.binary.right->expr.integer;
i64 count = end - start + 1;
type *element_type = var_type->data.slice.child;
usize element_size = element_type ? element_type->size : 8;
usize data_size = count * element_size;
usize aligned_data_size = (data_size + 7) & ~7;
stack_offset += aligned_data_size;
int data_offset = stack_offset;
stack_offset += 16;
offset = stack_offset;
char *var_name = strndup(expr->expr.var_decl.name, expr->expr.var_decl.name_len);
shput(variables, var_name, offset);
for (i64 i = 0; i < count; i++) {
i64 value = start + i;
int element_offset = data_offset - (i * element_size);
fprintf(fp, "mov $%ld, %%rax\n", value);
if (element_size == 4) {
fprintf(fp, "mov %%eax, -%d(%%rbp)\n", element_offset);
} else if (element_size == 2) {
fprintf(fp, "mov %%ax, -%d(%%rbp)\n", element_offset);
} else if (element_size == 1) {
fprintf(fp, "mov %%al, -%d(%%rbp)\n", element_offset);
} else {
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", element_offset);
}
}
fprintf(fp, "lea -%d(%%rbp), %%rax\n", data_offset);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
fprintf(fp, "mov $%ld, %%rax\n", count);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset - 8);
}
} else if (expr->expr.var_decl.value->type == NODE_STRING && var_type && var_type->tag == TYPE_SLICE) {
ast_node *str = expr->expr.var_decl.value;
usize str_len = str->expr.string.len;
char *str_data = str->expr.string.start;
usize aligned_data_size = (str_len + 7) & ~7;
stack_offset += aligned_data_size;
int data_offset = stack_offset;
stack_offset += 16;
offset = stack_offset;
char *var_name = strndup(expr->expr.var_decl.name, expr->expr.var_decl.name_len);
shput(variables, var_name, offset);
for (usize i = 0; i < str_len; i++) {
int byte_offset = data_offset - i;
if ((unsigned char)str_data[i] == '\\' && (unsigned char)str_data[i+1] == 'n') {
fprintf(fp, "movb $%d, -%d(%%rbp)\n", (unsigned char)'\n', byte_offset);
i += 1;
} else {
fprintf(fp, "movb $%d, -%d(%%rbp)\n", (unsigned char)str_data[i], byte_offset);
}
}
fprintf(fp, "lea -%d(%%rbp), %%rax\n", data_offset);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
fprintf(fp, "mov $%lu, %%rax\n", str_len);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset - 8);
} else if (expr->expr.var_decl.value->type == NODE_STRUCT_INIT) {
ast_node *member_list = expr->expr.var_decl.value->expr.struct_init.members;
ast_node *current = member_list;
if (var_type && var_type->tag == TYPE_STRUCT) {
while (current && current->type == NODE_UNIT) {
ast_node *assignment = current->expr.unit_node.expr;
if (assignment && assignment->type == NODE_BINARY &&
assignment->expr.binary.operator == OP_ASSIGN) {
ast_node *field = assignment->expr.binary.left;
ast_node *value = assignment->expr.binary.right;
if (field->type == NODE_IDENTIFIER) {
char *field_name = strndup(field->expr.string.start,
field->expr.string.len);
member *m = var_type->data.structure.members;
int field_offset = -1;
while (m) {
if (m->name_len == field->expr.string.len &&
strncmp(m->name, field->expr.string.start, m->name_len) == 0) {
field_offset = m->offset;
break;
}
m = m->next;
}
if (field_offset >= 0) {
gen_expr(fp, value);
type *field_type = shget(var_type->data.structure.member_types, field_name);
if (field_type && field_type->size == 4) {
fprintf(fp, "mov %%eax, -%d(%%rbp)\n", offset + field_offset);
} else if (field_type && field_type->size == 2) {
fprintf(fp, "mov %%ax, -%d(%%rbp)\n", offset + field_offset);
} else if (field_type && field_type->size == 1) {
fprintf(fp, "mov %%al, -%d(%%rbp)\n", offset + field_offset);
} else {
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset + field_offset);
}
}
free(field_name);
}
}
current = current->expr.unit_node.next;
}
}
else if (var_type && (var_type->tag == TYPE_PTR || var_type->tag == TYPE_SLICE)) {
usize element_size = 8;
type *element_type = NULL;
if (var_type->tag == TYPE_PTR && var_type->data.ptr.child) {
element_type = var_type->data.ptr.child;
element_size = element_type->size;
} else if (var_type->tag == TYPE_SLICE && var_type->data.slice.child) {
element_type = var_type->data.slice.child;
element_size = element_type->size;
}
int element_count = 0;
ast_node *count_node = current;
while (count_node && count_node->type == NODE_UNIT) {
element_count++;
count_node = count_node->expr.unit_node.next;
}
if (var_type->tag == TYPE_SLICE) {
usize data_size = element_count * element_size;
usize aligned_data_size = (data_size + 7) & ~7;
stack_offset += aligned_data_size;
int data_offset = stack_offset;
stack_offset += 16;
offset = stack_offset;
char *var_name = strndup(expr->expr.var_decl.name, expr->expr.var_decl.name_len);
shput(variables, var_name, offset);
int index = 0;
while (current && current->type == NODE_UNIT) {
ast_node *value = current->expr.unit_node.expr;
if (value) {
gen_expr(fp, value);
int element_offset = data_offset - (index * element_size);
if (element_type && element_type->size == 4) {
fprintf(fp, "mov %%eax, -%d(%%rbp)\n", element_offset);
} else if (element_type && element_type->size == 2) {
fprintf(fp, "mov %%ax, -%d(%%rbp)\n", element_offset);
} else if (element_type && element_type->size == 1) {
fprintf(fp, "mov %%al, -%d(%%rbp)\n", element_offset);
} else {
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", element_offset);
}
}
index++;
current = current->expr.unit_node.next;
}
fprintf(fp, "lea -%d(%%rbp), %%rax\n", data_offset);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
fprintf(fp, "mov $%d, %%rax\n", element_count);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset - 8);
} else {
int index = 0;
while (current && current->type == NODE_UNIT) {
ast_node *value = current->expr.unit_node.expr;
if (value) {
gen_expr(fp, value);
int element_offset = offset + (index * element_size);
if (element_type && element_type->size == 4) {
fprintf(fp, "mov %%eax, -%d(%%rbp)\n", element_offset);
} else if (element_type && element_type->size == 2) {
fprintf(fp, "mov %%ax, -%d(%%rbp)\n", element_offset);
} else if (element_type && element_type->size == 1) {
fprintf(fp, "mov %%al, -%d(%%rbp)\n", element_offset);
} else {
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", element_offset);
}
}
index++;
current = current->expr.unit_node.next;
}
}
}
} else {
gen_expr(fp, expr->expr.var_decl.value);
// If assigning a slice value, copy the 16-byte structure
if (var_type && var_type->tag == TYPE_SLICE) {
fprintf(fp, "mov (%%rax), %%rcx\n"); // Load ptr field
fprintf(fp, "mov 8(%%rax), %%rdx\n"); // Load len field
fprintf(fp, "mov %%rcx, -%d(%%rbp)\n", offset); // Store ptr
fprintf(fp, "mov %%rdx, -%d(%%rbp)\n", offset - 8); // Store len
} else {
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
}
}
}
break;
}
case NODE_RETURN: {
if (expr->expr.ret.value) {
gen_expr(fp, expr->expr.ret.value);
}
fprintf(fp, "mov %%rbp, %%rsp\n");
fprintf(fp, "pop %%rbp\n");
fprintf(fp, "ret\n");
break;
}
case NODE_IF: {
int label_else = label_counter++;
int label_end = label_counter++;
gen_expr(fp, expr->expr.if_stmt.condition);
fprintf(fp, "test %%rax, %%rax\n");
if (expr->expr.if_stmt.otherwise) {
fprintf(fp, "jz .L%d\n", label_else);
} else {
fprintf(fp, "jz .L%d\n", label_end);
}
gen_statement_list(fp, expr->expr.if_stmt.body);
if (expr->expr.if_stmt.otherwise) {
fprintf(fp, "jmp .L%d\n", label_end);
fprintf(fp, ".L%d:\n", label_else);
gen_statement_list(fp, expr->expr.if_stmt.otherwise);
}
fprintf(fp, ".L%d:\n", label_end);
break;
}
case NODE_WHILE: {
int label_start = label_counter++;
int label_end = label_counter++;
fprintf(fp, ".L%d:\n", label_start);
gen_expr(fp, expr->expr.whle.condition);
fprintf(fp, "test %%rax, %%rax\n");
fprintf(fp, "jz .L%d\n", label_end);
arrput(break_stack, label_end);
gen_statement_list(fp, expr->expr.whle.body);
arrpop(break_stack);
fprintf(fp, "jmp .L%d\n", label_start);
fprintf(fp, ".L%d:\n", label_end);
break;
}
case NODE_LABEL: {
char *label_name = strndup(expr->expr.label.name, expr->expr.label.name_len);
fprintf(fp, ".L_%s:\n", label_name);
free(label_name);
break;
}
case NODE_GOTO: {
char *label_name = strndup(expr->expr.label.name, expr->expr.label.name_len);
fprintf(fp, "jmp .L_%s\n", label_name);
free(label_name);
break;
}
case NODE_BREAK: {
if (arrlen(break_stack) > 0) {
int loop_end = break_stack[arrlen(break_stack) - 1];
fprintf(fp, "jmp .L%d\n", loop_end);
} else {
fprintf(fp, "# ERROR: break outside of loop\n");
}
break;
}
case NODE_ACCESS: {
ast_node *base = expr->expr.access.expr;
ast_node *member_node = expr->expr.access.member;
if (base->type == NODE_IDENTIFIER) {
int base_offset = get_var_offset(base->expr.string.start, base->expr.string.len);
type *base_type = base->expr_type;
if (base_type && base_type->tag == TYPE_SLICE && member_node->type == NODE_IDENTIFIER) {
char *field_name = strndup(member_node->expr.string.start, member_node->expr.string.len);
if (strcmp(field_name, "ptr") == 0) {
fprintf(fp, "mov -%d(%%rbp), %%rax\n", base_offset);
} else if (strcmp(field_name, "len") == 0) {
fprintf(fp, "mov -%d(%%rbp), %%rax\n", base_offset - 8);
} else {
fprintf(fp, "# ERROR: slice field '%s' not found\n", field_name);
}
free(field_name);
}
else if (member_node->type == NODE_IDENTIFIER && base_type &&
base_type->tag == TYPE_STRUCT) {
member *m = base_type->data.structure.members;
int field_offset = -1;
while (m) {
if (m->name_len == member_node->expr.string.len &&
strncmp(m->name, member_node->expr.string.start, m->name_len) == 0) {
field_offset = m->offset;
break;
}
m = m->next;
}
if (field_offset >= 0) {
fprintf(fp, "mov -%d(%%rbp), %%rax\n", base_offset + field_offset);
} else {
fprintf(fp, "# ERROR: field not found\n");
}
} else {
fprintf(fp, "# ERROR: not a struct or slice type\n");
}
} else {
fprintf(fp, "# ERROR: complex struct access not implemented\n");
}
break;
}
case NODE_RANGE: {
if (expr->expr.binary.left->type == NODE_INTEGER &&
expr->expr.binary.right->type == NODE_INTEGER) {
i64 start = expr->expr.binary.left->expr.integer;
i64 end = expr->expr.binary.right->expr.integer;
i64 count = end - start + 1;
usize element_size = 8;
usize data_size = count * element_size;
usize aligned_data_size = (data_size + 7) & ~7;
stack_offset += aligned_data_size;
int data_offset = stack_offset;
for (i64 i = 0; i < count; i++) {
i64 value = start + i;
int element_offset = data_offset - (i * element_size);
fprintf(fp, "mov $%ld, %%rax\n", value);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", element_offset);
}
stack_offset += 16;
int slice_offset = stack_offset;
fprintf(fp, "lea -%d(%%rbp), %%rax\n", data_offset);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", slice_offset);
fprintf(fp, "mov $%ld, %%rax\n", count);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", slice_offset - 8);
fprintf(fp, "lea -%d(%%rbp), %%rax\n", slice_offset);
} else {
fprintf(fp, "# ERROR: range expression requires constant bounds\n");
}
break;
}
case NODE_STRUCT_INIT: {
fprintf(fp, "# ERROR: struct init outside of variable declaration\n");
break;
}
case NODE_ARRAY_SUBSCRIPT: {
usize element_size = 8;
type *base_type = expr->expr.subscript.expr->expr_type;
bool is_slice = false;
if (base_type) {
if (base_type->tag == TYPE_PTR && base_type->data.ptr.child) {
element_size = base_type->data.ptr.child->size;
} else if (base_type->tag == TYPE_SLICE && base_type->data.slice.child) {
element_size = base_type->data.slice.child->size;
is_slice = true;
}
}
if (expr->expr.subscript.index->type == NODE_RANGE) {
if (expr->expr.subscript.expr->type == NODE_IDENTIFIER) {
int base_offset = get_var_offset(expr->expr.subscript.expr->expr.string.start,
expr->expr.subscript.expr->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rcx\n", base_offset);
gen_expr(fp, expr->expr.subscript.index->expr.binary.left);
fprintf(fp, "push %%rax\n");
gen_expr(fp, expr->expr.subscript.index->expr.binary.right);
fprintf(fp, "mov %%rax, %%rdx\n"); // rdx = end
fprintf(fp, "pop %%rax\n"); // rax = start
fprintf(fp, "mov %%rdx, %%r8\n");
fprintf(fp, "sub %%rax, %%r8\n");
fprintf(fp, "inc %%r8\n"); // r8 = new length
if (element_size != 1) {
fprintf(fp, "imul $%lu, %%rax\n", element_size);
}
fprintf(fp, "add %%rcx, %%rax\n"); // rax = new ptr
// Allocate temporary slice struct (16 bytes)
stack_offset += 16;
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", stack_offset); // Store ptr
fprintf(fp, "mov %%r8, -%d(%%rbp)\n", stack_offset - 8); // Store len
fprintf(fp, "lea -%d(%%rbp), %%rax\n", stack_offset); // Return address of temp slice
}
}
else if (expr->expr.subscript.expr->type == NODE_IDENTIFIER && is_slice) {
int base_offset = get_var_offset(expr->expr.subscript.expr->expr.string.start,
expr->expr.subscript.expr->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rcx\n", base_offset);
gen_expr(fp, expr->expr.subscript.index);
if (element_size != 1) {
fprintf(fp, "imul $%lu, %%rax\n", element_size);
}
fprintf(fp, "add %%rcx, %%rax\n");
if (expr->expr_type && expr->expr_type->size == 4) {
fprintf(fp, "movl (%%rax), %%eax\n");
} else if (expr->expr_type && expr->expr_type->size == 2) {
fprintf(fp, "movzwl (%%rax), %%eax\n");
} else if (expr->expr_type && expr->expr_type->size == 1) {
fprintf(fp, "movzbl (%%rax), %%eax\n");
} else {
fprintf(fp, "mov (%%rax), %%rax\n");
}
} else if (expr->expr.subscript.expr->type == NODE_IDENTIFIER) {
int base_offset = get_var_offset(expr->expr.subscript.expr->expr.string.start,
expr->expr.subscript.expr->expr.string.len);
gen_expr(fp, expr->expr.subscript.index);
if (element_size != 1) {
fprintf(fp, "imul $%lu, %%rax\n", element_size);
}
fprintf(fp, "add $%d, %%rax\n", base_offset);
fprintf(fp, "neg %%rax\n");
fprintf(fp, "add %%rbp, %%rax\n");
if (expr->expr_type && expr->expr_type->size == 4) {
fprintf(fp, "movl (%%rax), %%eax\n");
} else if (expr->expr_type && expr->expr_type->size == 2) {
fprintf(fp, "movzwl (%%rax), %%eax\n");
} else if (expr->expr_type && expr->expr_type->size == 1) {
fprintf(fp, "movzbl (%%rax), %%eax\n");
} else {
fprintf(fp, "mov (%%rax), %%rax\n");
}
} else {
gen_expr(fp, expr->expr.subscript.expr);
fprintf(fp, "push %%rax\n");
gen_expr(fp, expr->expr.subscript.index);
if (element_size != 1) {
fprintf(fp, "imul $%lu, %%rax\n", element_size);
}
fprintf(fp, "pop %%rcx\n");
fprintf(fp, "add %%rcx, %%rax\n");
if (expr->expr_type && expr->expr_type->size == 4) {
fprintf(fp, "movl (%%rax), %%eax\n");
} else if (expr->expr_type && expr->expr_type->size == 2) {
fprintf(fp, "movzwl (%%rax), %%eax\n");
} else if (expr->expr_type && expr->expr_type->size == 1) {
fprintf(fp, "movzbl (%%rax), %%eax\n");
} else {
fprintf(fp, "mov (%%rax), %%rax\n");
}
}
break;
}
case NODE_CALL: {
const char *arg_regs[] = {"%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9"};
int param_count = 0;
ast_node *param = expr->expr.call.parameters;
while (param && param->type == NODE_UNIT) {
param_count++;
param = param->expr.unit_node.next;
}
param = expr->expr.call.parameters;
int i = 0;
while (param && param->type == NODE_UNIT) {
if (param->expr.unit_node.expr) {
gen_expr(fp, param->expr.unit_node.expr);
fprintf(fp, "push %%rax\n");
}
param = param->expr.unit_node.next;
i++;
}
for (int j = param_count - 1; j >= 0; j--) {
if (j < 6) {
fprintf(fp, "pop %s\n", arg_regs[j]);
} else {
// TODO: handle more than 6 arguments properly
fprintf(fp, "pop %%rax\n");
fprintf(fp, "push %%rax\n");
}
}
fprintf(fp, "call %.*s\n", (int)expr->expr.call.name_len, expr->expr.call.name);
if (param_count > 6) {
int stack_args = param_count - 6;
fprintf(fp, "add $%d, %%rsp\n", stack_args * 8);
}
break;
}
}
}
void gen_function(FILE *fp, ast_node *fn)
{
if (fn->expr.function.is_extern || fn->expr.function.body == NULL) {
return;
}
ast_node *current = fn->expr.function.body;
stack_offset = 0;
label_counter = 0;
shfree(variables);
variables = NULL;
arrfree(break_stack);
break_stack = NULL;
fprintf(fp, ".global %s\n%s:\n", fn->expr.function.name, fn->expr.function.name);
fprintf(fp, "push %%rbp\n");
fprintf(fp, "mov %%rsp, %%rbp\n");
fprintf(fp, "sub $256, %%rsp\n");
const char *param_regs[] = {"%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9"};
member *param = fn->expr.function.parameters;
int param_idx = 0;
while (param && param_idx < 6) {
int offset = get_var_offset(param->name, param->name_len);
fprintf(fp, "mov %s, -%d(%%rbp)\n", param_regs[param_idx], offset);
param = param->next;
param_idx++;
}
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr) {
gen_expr(fp, current->expr.unit_node.expr);
}
current = current->expr.unit_node.next;
}
fprintf(fp, "mov %%rbp, %%rsp\n");
fprintf(fp, "pop %%rbp\n");
fprintf(fp, "ret\n");
}
void generate(ast_node *node)
{
FILE *fp = fopen("test.s", "w");
ast_node *current = node;
fprintf(fp, ".section .text\n");
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) {
gen_function(fp, current->expr.unit_node.expr);
}
current = current->expr.unit_node.next;
}
fclose(fp);
shfree(variables);
variables = NULL;
arrfree(break_stack);
break_stack = NULL;
}