codegen almost finished

This commit is contained in:
Lorenzo Torres 2026-01-16 23:26:28 +01:00
parent ed0ad1d095
commit 870cf8f0b4
15 changed files with 523 additions and 126 deletions

430
codegen.c
View file

@ -447,72 +447,240 @@ void gen_expr(FILE *fp, ast_node *expr)
gen_expr(fp, expr->expr.cast.value);
break;
case NODE_VAR_DECL: {
int offset;
if (expr->expr.var_decl.type && expr->expr.var_decl.type->expr_type) {
usize var_size = expr->expr.var_decl.type->expr_type->size;
offset = get_var_offset_sized(expr->expr.var_decl.name,
expr->expr.var_decl.name_len, var_size);
} else {
offset = get_var_offset(expr->expr.var_decl.name, expr->expr.var_decl.name_len);
int offset = 0;
type *var_type = expr->expr_type;
if (!var_type && expr->expr.var_decl.type) {
var_type = expr->expr.var_decl.type->expr_type;
}
bool is_inline_slice = false;
if (var_type && var_type->tag == TYPE_SLICE && expr->expr.var_decl.value &&
(expr->expr.var_decl.value->type == NODE_STRUCT_INIT ||
expr->expr.var_decl.value->type == NODE_RANGE)) {
is_inline_slice = true;
}
if (!is_inline_slice) {
if (var_type && var_type->size > 0) {
offset = get_var_offset_sized(expr->expr.var_decl.name,
expr->expr.var_decl.name_len, var_type->size);
} else {
offset = get_var_offset(expr->expr.var_decl.name, expr->expr.var_decl.name_len);
}
}
if (expr->expr.var_decl.value) {
if (expr->expr.var_decl.value->type == NODE_STRUCT_INIT) {
if (expr->expr.var_decl.value->type == NODE_RANGE && var_type && var_type->tag == TYPE_SLICE) {
ast_node *range = expr->expr.var_decl.value;
if (range->expr.binary.left->type == NODE_INTEGER &&
range->expr.binary.right->type == NODE_INTEGER) {
i64 start = range->expr.binary.left->expr.integer;
i64 end = range->expr.binary.right->expr.integer;
i64 count = end - start + 1;
type *element_type = var_type->data.slice.child;
usize element_size = element_type ? element_type->size : 8;
usize data_size = count * element_size;
usize aligned_data_size = (data_size + 7) & ~7;
stack_offset += aligned_data_size;
int data_offset = stack_offset;
stack_offset += 16;
offset = stack_offset;
char *var_name = strndup(expr->expr.var_decl.name, expr->expr.var_decl.name_len);
shput(variables, var_name, offset);
for (i64 i = 0; i < count; i++) {
i64 value = start + i;
int element_offset = data_offset - (i * element_size);
fprintf(fp, "mov $%ld, %%rax\n", value);
if (element_size == 4) {
fprintf(fp, "mov %%eax, -%d(%%rbp)\n", element_offset);
} else if (element_size == 2) {
fprintf(fp, "mov %%ax, -%d(%%rbp)\n", element_offset);
} else if (element_size == 1) {
fprintf(fp, "mov %%al, -%d(%%rbp)\n", element_offset);
} else {
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", element_offset);
}
}
fprintf(fp, "lea -%d(%%rbp), %%rax\n", data_offset);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
fprintf(fp, "mov $%ld, %%rax\n", count);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset - 8);
}
} else if (expr->expr.var_decl.value->type == NODE_STRING && var_type && var_type->tag == TYPE_SLICE) {
ast_node *str = expr->expr.var_decl.value;
usize str_len = str->expr.string.len;
char *str_data = str->expr.string.start;
usize aligned_data_size = (str_len + 7) & ~7;
stack_offset += aligned_data_size;
int data_offset = stack_offset;
stack_offset += 16;
offset = stack_offset;
char *var_name = strndup(expr->expr.var_decl.name, expr->expr.var_decl.name_len);
shput(variables, var_name, offset);
for (usize i = 0; i < str_len; i++) {
int byte_offset = data_offset - i;
if ((unsigned char)str_data[i] == '\\' && (unsigned char)str_data[i+1] == 'n') {
fprintf(fp, "movb $%d, -%d(%%rbp)\n", (unsigned char)'\n', byte_offset);
i += 1;
} else {
fprintf(fp, "movb $%d, -%d(%%rbp)\n", (unsigned char)str_data[i], byte_offset);
}
}
fprintf(fp, "lea -%d(%%rbp), %%rax\n", data_offset);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
fprintf(fp, "mov $%lu, %%rax\n", str_len);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset - 8);
} else if (expr->expr.var_decl.value->type == NODE_STRUCT_INIT) {
ast_node *member_list = expr->expr.var_decl.value->expr.struct_init.members;
ast_node *current = member_list;
type *struct_type = expr->expr_type;
if (!struct_type && expr->expr.var_decl.type) {
struct_type = expr->expr.var_decl.type->expr_type;
}
if (var_type && var_type->tag == TYPE_STRUCT) {
while (current && current->type == NODE_UNIT) {
ast_node *assignment = current->expr.unit_node.expr;
if (assignment && assignment->type == NODE_BINARY &&
assignment->expr.binary.operator == OP_ASSIGN) {
ast_node *field = assignment->expr.binary.left;
ast_node *value = assignment->expr.binary.right;
while (current && current->type == NODE_UNIT) {
ast_node *assignment = current->expr.unit_node.expr;
if (assignment && assignment->type == NODE_BINARY &&
assignment->expr.binary.operator == OP_ASSIGN) {
ast_node *field = assignment->expr.binary.left;
ast_node *value = assignment->expr.binary.right;
if (field->type == NODE_IDENTIFIER) {
char *field_name = strndup(field->expr.string.start,
field->expr.string.len);
if (field->type == NODE_IDENTIFIER && struct_type &&
struct_type->tag == TYPE_STRUCT) {
char *field_name = strndup(field->expr.string.start,
field->expr.string.len);
member *m = struct_type->data.structure.members;
int field_offset = -1;
while (m) {
if (m->name_len == field->expr.string.len &&
strncmp(m->name, field->expr.string.start, m->name_len) == 0) {
field_offset = m->offset;
break;
member *m = var_type->data.structure.members;
int field_offset = -1;
while (m) {
if (m->name_len == field->expr.string.len &&
strncmp(m->name, field->expr.string.start, m->name_len) == 0) {
field_offset = m->offset;
break;
}
m = m->next;
}
m = m->next;
}
if (field_offset >= 0) {
if (field_offset >= 0) {
gen_expr(fp, value);
type *field_type = shget(var_type->data.structure.member_types, field_name);
if (field_type && field_type->size == 4) {
fprintf(fp, "mov %%eax, -%d(%%rbp)\n", offset + field_offset);
} else if (field_type && field_type->size == 2) {
fprintf(fp, "mov %%ax, -%d(%%rbp)\n", offset + field_offset);
} else if (field_type && field_type->size == 1) {
fprintf(fp, "mov %%al, -%d(%%rbp)\n", offset + field_offset);
} else {
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset + field_offset);
}
}
free(field_name);
}
}
current = current->expr.unit_node.next;
}
}
else if (var_type && (var_type->tag == TYPE_PTR || var_type->tag == TYPE_SLICE)) {
usize element_size = 8;
type *element_type = NULL;
if (var_type->tag == TYPE_PTR && var_type->data.ptr.child) {
element_type = var_type->data.ptr.child;
element_size = element_type->size;
} else if (var_type->tag == TYPE_SLICE && var_type->data.slice.child) {
element_type = var_type->data.slice.child;
element_size = element_type->size;
}
int element_count = 0;
ast_node *count_node = current;
while (count_node && count_node->type == NODE_UNIT) {
element_count++;
count_node = count_node->expr.unit_node.next;
}
if (var_type->tag == TYPE_SLICE) {
usize data_size = element_count * element_size;
usize aligned_data_size = (data_size + 7) & ~7;
stack_offset += aligned_data_size;
int data_offset = stack_offset;
stack_offset += 16;
offset = stack_offset;
char *var_name = strndup(expr->expr.var_decl.name, expr->expr.var_decl.name_len);
shput(variables, var_name, offset);
int index = 0;
while (current && current->type == NODE_UNIT) {
ast_node *value = current->expr.unit_node.expr;
if (value) {
gen_expr(fp, value);
type *field_type = shget(struct_type->data.structure.member_types, field_name);
int element_offset = data_offset - (index * element_size);
if (field_type && field_type->size == 4) {
fprintf(fp, "mov %%eax, -%d(%%rbp)\n", offset + field_offset);
} else if (field_type && field_type->size == 2) {
fprintf(fp, "mov %%ax, -%d(%%rbp)\n", offset + field_offset);
} else if (field_type && field_type->size == 1) {
fprintf(fp, "mov %%al, -%d(%%rbp)\n", offset + field_offset);
if (element_type && element_type->size == 4) {
fprintf(fp, "mov %%eax, -%d(%%rbp)\n", element_offset);
} else if (element_type && element_type->size == 2) {
fprintf(fp, "mov %%ax, -%d(%%rbp)\n", element_offset);
} else if (element_type && element_type->size == 1) {
fprintf(fp, "mov %%al, -%d(%%rbp)\n", element_offset);
} else {
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset + field_offset);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", element_offset);
}
}
index++;
current = current->expr.unit_node.next;
}
free(field_name);
fprintf(fp, "lea -%d(%%rbp), %%rax\n", data_offset);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
fprintf(fp, "mov $%d, %%rax\n", element_count);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset - 8);
} else {
int index = 0;
while (current && current->type == NODE_UNIT) {
ast_node *value = current->expr.unit_node.expr;
if (value) {
gen_expr(fp, value);
int element_offset = offset + (index * element_size);
if (element_type && element_type->size == 4) {
fprintf(fp, "mov %%eax, -%d(%%rbp)\n", element_offset);
} else if (element_type && element_type->size == 2) {
fprintf(fp, "mov %%ax, -%d(%%rbp)\n", element_offset);
} else if (element_type && element_type->size == 1) {
fprintf(fp, "mov %%al, -%d(%%rbp)\n", element_offset);
} else {
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", element_offset);
}
}
index++;
current = current->expr.unit_node.next;
}
}
current = current->expr.unit_node.next;
}
} else {
gen_expr(fp, expr->expr.var_decl.value);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
// If assigning a slice value, copy the 16-byte structure
if (var_type && var_type->tag == TYPE_SLICE) {
fprintf(fp, "mov (%%rax), %%rcx\n"); // Load ptr field
fprintf(fp, "mov 8(%%rax), %%rdx\n"); // Load len field
fprintf(fp, "mov %%rcx, -%d(%%rbp)\n", offset); // Store ptr
fprintf(fp, "mov %%rdx, -%d(%%rbp)\n", offset - 8); // Store len
} else {
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset);
}
}
}
break;
@ -598,12 +766,24 @@ void gen_expr(FILE *fp, ast_node *expr)
if (base->type == NODE_IDENTIFIER) {
int base_offset = get_var_offset(base->expr.string.start, base->expr.string.len);
type *base_type = base->expr_type;
type *struct_type = base->expr_type;
if (base_type && base_type->tag == TYPE_SLICE && member_node->type == NODE_IDENTIFIER) {
char *field_name = strndup(member_node->expr.string.start, member_node->expr.string.len);
if (member_node->type == NODE_IDENTIFIER && struct_type &&
struct_type->tag == TYPE_STRUCT) {
member *m = struct_type->data.structure.members;
if (strcmp(field_name, "ptr") == 0) {
fprintf(fp, "mov -%d(%%rbp), %%rax\n", base_offset);
} else if (strcmp(field_name, "len") == 0) {
fprintf(fp, "mov -%d(%%rbp), %%rax\n", base_offset - 8);
} else {
fprintf(fp, "# ERROR: slice field '%s' not found\n", field_name);
}
free(field_name);
}
else if (member_node->type == NODE_IDENTIFIER && base_type &&
base_type->tag == TYPE_STRUCT) {
member *m = base_type->data.structure.members;
int field_offset = -1;
while (m) {
if (m->name_len == member_node->expr.string.len &&
@ -620,17 +800,165 @@ void gen_expr(FILE *fp, ast_node *expr)
fprintf(fp, "# ERROR: field not found\n");
}
} else {
fprintf(fp, "# ERROR: not a struct type\n");
fprintf(fp, "# ERROR: not a struct or slice type\n");
}
} else {
fprintf(fp, "# ERROR: complex struct access not implemented\n");
}
break;
}
case NODE_RANGE: {
if (expr->expr.binary.left->type == NODE_INTEGER &&
expr->expr.binary.right->type == NODE_INTEGER) {
i64 start = expr->expr.binary.left->expr.integer;
i64 end = expr->expr.binary.right->expr.integer;
i64 count = end - start + 1;
usize element_size = 8;
usize data_size = count * element_size;
usize aligned_data_size = (data_size + 7) & ~7;
stack_offset += aligned_data_size;
int data_offset = stack_offset;
for (i64 i = 0; i < count; i++) {
i64 value = start + i;
int element_offset = data_offset - (i * element_size);
fprintf(fp, "mov $%ld, %%rax\n", value);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", element_offset);
}
stack_offset += 16;
int slice_offset = stack_offset;
fprintf(fp, "lea -%d(%%rbp), %%rax\n", data_offset);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", slice_offset);
fprintf(fp, "mov $%ld, %%rax\n", count);
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", slice_offset - 8);
fprintf(fp, "lea -%d(%%rbp), %%rax\n", slice_offset);
} else {
fprintf(fp, "# ERROR: range expression requires constant bounds\n");
}
break;
}
case NODE_STRUCT_INIT: {
fprintf(fp, "# ERROR: struct init outside of variable declaration\n");
break;
}
case NODE_ARRAY_SUBSCRIPT: {
usize element_size = 8;
type *base_type = expr->expr.subscript.expr->expr_type;
bool is_slice = false;
if (base_type) {
if (base_type->tag == TYPE_PTR && base_type->data.ptr.child) {
element_size = base_type->data.ptr.child->size;
} else if (base_type->tag == TYPE_SLICE && base_type->data.slice.child) {
element_size = base_type->data.slice.child->size;
is_slice = true;
}
}
if (expr->expr.subscript.index->type == NODE_RANGE) {
if (expr->expr.subscript.expr->type == NODE_IDENTIFIER) {
int base_offset = get_var_offset(expr->expr.subscript.expr->expr.string.start,
expr->expr.subscript.expr->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rcx\n", base_offset);
gen_expr(fp, expr->expr.subscript.index->expr.binary.left);
fprintf(fp, "push %%rax\n");
gen_expr(fp, expr->expr.subscript.index->expr.binary.right);
fprintf(fp, "mov %%rax, %%rdx\n"); // rdx = end
fprintf(fp, "pop %%rax\n"); // rax = start
fprintf(fp, "mov %%rdx, %%r8\n");
fprintf(fp, "sub %%rax, %%r8\n");
fprintf(fp, "inc %%r8\n"); // r8 = new length
if (element_size != 1) {
fprintf(fp, "imul $%lu, %%rax\n", element_size);
}
fprintf(fp, "add %%rcx, %%rax\n"); // rax = new ptr
// Allocate temporary slice struct (16 bytes)
stack_offset += 16;
fprintf(fp, "mov %%rax, -%d(%%rbp)\n", stack_offset); // Store ptr
fprintf(fp, "mov %%r8, -%d(%%rbp)\n", stack_offset - 8); // Store len
fprintf(fp, "lea -%d(%%rbp), %%rax\n", stack_offset); // Return address of temp slice
}
}
else if (expr->expr.subscript.expr->type == NODE_IDENTIFIER && is_slice) {
int base_offset = get_var_offset(expr->expr.subscript.expr->expr.string.start,
expr->expr.subscript.expr->expr.string.len);
fprintf(fp, "mov -%d(%%rbp), %%rcx\n", base_offset);
gen_expr(fp, expr->expr.subscript.index);
if (element_size != 1) {
fprintf(fp, "imul $%lu, %%rax\n", element_size);
}
fprintf(fp, "add %%rcx, %%rax\n");
if (expr->expr_type && expr->expr_type->size == 4) {
fprintf(fp, "movl (%%rax), %%eax\n");
} else if (expr->expr_type && expr->expr_type->size == 2) {
fprintf(fp, "movzwl (%%rax), %%eax\n");
} else if (expr->expr_type && expr->expr_type->size == 1) {
fprintf(fp, "movzbl (%%rax), %%eax\n");
} else {
fprintf(fp, "mov (%%rax), %%rax\n");
}
} else if (expr->expr.subscript.expr->type == NODE_IDENTIFIER) {
int base_offset = get_var_offset(expr->expr.subscript.expr->expr.string.start,
expr->expr.subscript.expr->expr.string.len);
gen_expr(fp, expr->expr.subscript.index);
if (element_size != 1) {
fprintf(fp, "imul $%lu, %%rax\n", element_size);
}
fprintf(fp, "add $%d, %%rax\n", base_offset);
fprintf(fp, "neg %%rax\n");
fprintf(fp, "add %%rbp, %%rax\n");
if (expr->expr_type && expr->expr_type->size == 4) {
fprintf(fp, "movl (%%rax), %%eax\n");
} else if (expr->expr_type && expr->expr_type->size == 2) {
fprintf(fp, "movzwl (%%rax), %%eax\n");
} else if (expr->expr_type && expr->expr_type->size == 1) {
fprintf(fp, "movzbl (%%rax), %%eax\n");
} else {
fprintf(fp, "mov (%%rax), %%rax\n");
}
} else {
gen_expr(fp, expr->expr.subscript.expr);
fprintf(fp, "push %%rax\n");
gen_expr(fp, expr->expr.subscript.index);
if (element_size != 1) {
fprintf(fp, "imul $%lu, %%rax\n", element_size);
}
fprintf(fp, "pop %%rcx\n");
fprintf(fp, "add %%rcx, %%rax\n");
if (expr->expr_type && expr->expr_type->size == 4) {
fprintf(fp, "movl (%%rax), %%eax\n");
} else if (expr->expr_type && expr->expr_type->size == 2) {
fprintf(fp, "movzwl (%%rax), %%eax\n");
} else if (expr->expr_type && expr->expr_type->size == 1) {
fprintf(fp, "movzbl (%%rax), %%eax\n");
} else {
fprintf(fp, "mov (%%rax), %%rax\n");
}
}
break;
}
case NODE_CALL: {
const char *arg_regs[] = {"%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9"};
@ -676,6 +1004,10 @@ void gen_expr(FILE *fp, ast_node *expr)
void gen_function(FILE *fp, ast_node *fn)
{
if (fn->expr.function.is_extern || fn->expr.function.body == NULL) {
return;
}
ast_node *current = fn->expr.function.body;
stack_offset = 0;