Compare commits

...
Sign in to create a new pull request.

3 commits
son ... master

16 changed files with 1446 additions and 910 deletions

View file

@ -3,8 +3,8 @@
include config.mk include config.mk
SRC = lc.c utils.c lexer.c parser.c sema.c ir.c SRC = lc.c utils.c lexer.c parser.c sema.c codegen.c
HDR = config.def.h utils.h lexer.h parser.h sema.h ir.h HDR = config.def.h utils.h lexer.h parser.h sema.h codegen.h
OBJ = ${SRC:.c=.o} OBJ = ${SRC:.c=.o}
all: options lc all: options lc

1168
codegen.c Normal file

File diff suppressed because it is too large Load diff

8
codegen.h Normal file
View file

@ -0,0 +1,8 @@
#ifndef CODEGEN_H
#define CODEGEN_H
#include "parser.h"
void generate(ast_node *node);
#endif

View file

812
ir.c
View file

@ -1,812 +0,0 @@
#include "ir.h"
#include <stdlib.h>
#include <stdio.h>
#include "stb_ds.h"
#include "sema.h"
struct { ir_node key; ir_node *value; } *global_hash = NULL;
static ir_node *graph;
static ir_node *current_memory;
static ir_node *current_control;
static usize current_stack = 0;
static ir_node *current_scope = NULL;
static ir_node *build_expression(ast_node *node);
static struct {
ir_node **return_controls;
ir_node **return_memories;
ir_node **return_values;
} current_func = {0};
static void node_name(ir_node *node)
{
if (!node) {
printf("null [label=\"NULL\", style=filled, fillcolor=red]\n");
return;
}
printf("%ld ", node->id);
switch (node->code) {
case OC_START:
printf("[label=\"%s\", style=filled, color=orange]\n", node->data.start_name);
break;
case OC_RETURN:
printf("[label=\"return\", style=filled, color=orange]\n");
break;
case OC_ADD:
printf("[label=\"+\"]\n");
break;
case OC_NEG:
case OC_SUB:
printf("[label=\"-\"]\n");
break;
case OC_DIV:
printf("[label=\"/\"]\n");
break;
case OC_MUL:
printf("[label=\"*\"]\n");
break;
case OC_MOD:
printf("[label=\"%%\"]\n");
break;
case OC_BAND:
printf("[label=\"&\"]\n");
break;
case OC_BOR:
printf("[label=\"|\"]\n");
break;
case OC_BXOR:
printf("[label=\"^\"]\n");
break;
case OC_EQ:
printf("[label=\"==\"]\n");
break;
case OC_CONST_INT:
printf("[label=\"%ld\"]\n", node->data.const_int);
break;
case OC_CONST_FLOAT:
printf("[label=\"%f\"]\n", node->data.const_float);
break;
case OC_FRAME_PTR:
printf("[label=\"frame_ptr\"]\n");
break;
case OC_STORE:
printf("[label=\"store\", shape=box]\n");
break;
case OC_LOAD:
printf("[label=\"load\", shape=box]\n");
break;
case OC_ADDR:
printf("[label=\"addr\"]\n");
break;
case OC_REGION:
printf("[label=\"region\", shape=diamond, style=filled, color=green]\n");
break;
case OC_PHI:
printf("[label=\"phi\", shape=triangle]\n");
break;
case OC_IF:
printf("[label=\"if\", shape=diamond, style=filled, color=lightblue]\n");
break;
case OC_PROJ:
printf("[label=\"proj\", shape=diamond, style=filled, color=cyan]\n");
break;
default:
printf("[label=\"%d\"]\n", node->code);
break;
}
}
static void print_graph(ir_node *node)
{
for (int i = 0; i < hmlen(global_hash); i++) {
ir_node *node = global_hash[i].value;
node_name(node);
for (int j = 0; j < arrlen(node->out); j++) {
if (node->out[j]) {
node_name(node->out[j]);
printf("%ld->%ld\n", node->out[j]->id, node->id);
}
}
}
}
static void push_scope(void)
{
arrput(current_scope->data.symbol_tables, NULL);
}
static struct symbol_def *get_def(char *name)
{
for (int i = arrlen(current_scope->data.symbol_tables) - 1; i >= 0; i--) {
struct symbol_def *def = shget(current_scope->data.symbol_tables[i], name);
if (def) return def;
}
return NULL;
}
static void set_def(char *name, ir_node *node, bool lvalue)
{
for (int i = arrlen(current_scope->data.symbol_tables) - 1; i >= 0; i--) {
if (shget(current_scope->data.symbol_tables[i], name)) {
struct symbol_def *def = calloc(1, sizeof(struct symbol_def));
def->is_lvalue = lvalue;
def->node = node;
shput(current_scope->data.symbol_tables[i], name, def);
return;
}
}
int index = arrlen(current_scope->data.symbol_tables) - 1;
struct symbol_def *def = calloc(1, sizeof(struct symbol_def));
def->is_lvalue = lvalue;
def->node = node;
shput(current_scope->data.symbol_tables[index], name, def);
}
static ir_node *copy_scope(ir_node *src)
{
ir_node *dst = calloc(1, sizeof(ir_node));
dst->code = OC_SCOPE;
for (int i=0; i < arrlen(src->data.symbol_tables); i++) {
arrput(dst->data.symbol_tables, NULL);
symbol_table *src_table = src->data.symbol_tables[i];
for (int j=0; j < shlen(src_table); j++) {
shput(dst->data.symbol_tables[i], src_table[j].key, src_table[j].value);
}
}
return dst;
}
static void const_fold(ir_node *binary)
{
ir_node *left = binary->out[0];
ir_node *right = binary->out[1];
if (left->code == OC_CONST_INT && right->code == OC_CONST_INT) {
switch (binary->code) {
case OC_ADD:
binary->data.const_int = left->data.const_int + right->data.const_int;
break;
case OC_SUB:
binary->data.const_int = left->data.const_int - right->data.const_int;
break;
case OC_MUL:
binary->data.const_int = left->data.const_int * right->data.const_int;
break;
case OC_DIV:
if (right->data.const_int != 0)
binary->data.const_int = left->data.const_int / right->data.const_int;
break;
case OC_MOD:
if (right->data.const_int != 0)
binary->data.const_int = left->data.const_int % right->data.const_int;
break;
case OC_BOR:
binary->data.const_int = left->data.const_int | right->data.const_int;
break;
case OC_BAND:
binary->data.const_int = left->data.const_int & right->data.const_int;
break;
case OC_BXOR:
binary->data.const_int = left->data.const_int ^ right->data.const_int;
break;
case OC_EQ:
binary->data.const_int = left->data.const_int == right->data.const_int;
break;
default:
return;
}
binary->code = OC_CONST_INT;
arrfree(binary->out); binary->out = NULL;
arrfree(binary->in); binary->in = NULL;
binary->id = stbds_hash_bytes(binary, sizeof(ir_node), 0xcafebabe);
}
if (left->code == OC_CONST_FLOAT && right->code == OC_CONST_FLOAT) {
switch (binary->code) {
case OC_ADD:
binary->data.const_float = left->data.const_float + right->data.const_float;
break;
case OC_SUB:
binary->data.const_float = left->data.const_float - right->data.const_float;
break;
case OC_MUL:
binary->data.const_float = left->data.const_float * right->data.const_float;
break;
case OC_DIV:
if (right->data.const_float != 0.0f)
binary->data.const_float = left->data.const_float / right->data.const_float;
break;
default:
return;
}
binary->code = OC_CONST_FLOAT;
arrfree(binary->out); binary->out = NULL;
arrfree(binary->in); binary->in = NULL;
binary->id = stbds_hash_bytes(binary, sizeof(ir_node), 0xcafebabe);
}
}
static ir_node *build_address(usize base, usize offset) {
ir_node *addr = calloc(1, sizeof(ir_node));
addr->code = OC_ADDR;
ir_node *base_node = calloc(1, sizeof(ir_node));
if (base == -1) {
base_node->code = OC_FRAME_PTR;
base_node->id = stbds_hash_bytes(base_node, sizeof(ir_node), 0xcafebabe);
} else {
base_node->code = OC_CONST_INT;
base_node->data.const_int = base;
base_node->id = stbds_hash_bytes(base_node, sizeof(ir_node), 0xcafebabe);
}
ir_node *offset_node = calloc(1, sizeof(ir_node));
offset_node->code = OC_CONST_INT;
offset_node->data.const_int = offset;
offset_node->id = stbds_hash_bytes(offset_node, sizeof(ir_node), 0xcafebabe);
arrput(addr->out, base_node);
arrput(addr->out, offset_node);
addr->id = stbds_hash_bytes(addr, sizeof(ir_node), 0xcafebabe);
ir_node *tmp = hmget(global_hash, *addr);
if (tmp) {
free(addr);
return tmp;
}
return addr;
}
static ir_node *build_assign_ptr(ast_node *binary)
{
ir_node *val_node = build_expression(binary->expr.binary.right);
char *var_name = binary->expr.binary.left->expr.string.start;
ir_node *existing_def = get_def(var_name)->node;
ir_node *store = calloc(1, sizeof(ir_node));
store->code = OC_STORE;
arrput(store->out, current_control);
arrput(store->out, current_memory);
arrput(store->out, existing_def);
arrput(store->out, val_node);
store->id = stbds_hash_bytes(store, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *store, store);
current_memory = store;
return val_node;
}
static ir_node *build_assign(ast_node *binary)
{
ir_node *val_node = build_expression(binary->expr.binary.right);
char *var_name = binary->expr.binary.left->expr.string.start;
struct symbol_def *def = get_def(var_name);
if (def && def->is_lvalue) {
ir_node *existing_def = def->node;
ir_node *store = calloc(1, sizeof(ir_node));
store->code = OC_STORE;
arrput(store->out, current_control);
arrput(store->out, current_memory);
arrput(store->out, existing_def);
arrput(store->out, val_node);
store->id = stbds_hash_bytes(store, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *store, store);
current_memory = store;
return val_node;
}
set_def(var_name, val_node, false);
return val_node;
}
static ir_node *build_binary(ast_node *node)
{
ir_node *n = calloc(1, sizeof(ir_node));
switch (node->expr.binary.operator) {
case OP_ASSIGN:
free(n);
return build_assign(node);
case OP_ASSIGN_PTR:
free(n);
return build_assign_ptr(node);
case OP_PLUS:
n->code = OC_ADD;
break;
case OP_MINUS:
n->code = OC_SUB;
break;
case OP_DIV:
n->code = OC_DIV;
break;
case OP_MUL:
n->code = OC_MUL;
break;
case OP_MOD:
n->code = OC_MOD;
break;
case OP_BOR:
n->code = OC_BOR;
break;
case OP_BAND:
n->code = OC_BAND;
break;
case OP_BXOR:
n->code = OC_BXOR;
break;
case OP_EQ:
n->code = OC_EQ;
break;
default:
break;
}
arrput(n->out, build_expression(node->expr.binary.left));
arrput(n->out, build_expression(node->expr.binary.right));
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe);
const_fold(n);
ir_node *tmp = hmget(global_hash, *n);
if (tmp) {
free(n);
return tmp;
}
return n;
}
static ir_node *build_load(ast_node *node)
{
ir_node *n = calloc(1, sizeof(ir_node));
n->code = OC_LOAD;
arrput(n->out, current_memory);
arrput(n->out, build_expression(node));
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabebabecafe);
ir_node *tmp = hmget(global_hash, *n);
if (tmp) {
free(n);
return tmp;
}
return n;
}
static ir_node *build_unary(ast_node *node)
{
ir_node *n = calloc(1, sizeof(ir_node));
switch (node->expr.unary.operator) {
case UOP_MINUS:
n->code = OC_NEG;
arrput(n->out, build_expression(node->expr.unary.right));
break;
case UOP_REF:
free(n);
if (node->expr.unary.right->type == NODE_IDENTIFIER) {
struct symbol_def *def = get_def(node->expr.unary.right->expr.string.start);
if (def) {
return def->node;
}
}
return build_expression(node->expr.unary.right);
case UOP_DEREF:
free(n);
return build_load(node->expr.unary.right);
default:
break;
}
if (n->out && n->out[0]->code == OC_CONST_INT) {
switch (n->code) {
case OC_NEG:
n->data.const_int = -(n->out[0]->data.const_int);
break;
default:
break;
}
n->code = OC_CONST_INT;
arrfree(n->out); n->out = NULL;
} else if (n->out && n->out[0]->code == OC_CONST_FLOAT) {
switch (n->code) {
case OC_NEG:
n->data.const_float = -(n->out[0]->data.const_float);
break;
default:
break;
}
n->code = OC_CONST_FLOAT;
arrfree(n->out); n->out = NULL;
}
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe);
ir_node *tmp = hmget(global_hash, *n);
if (tmp) {
free(n);
return tmp;
}
return n;
}
static ir_node *build_if(ast_node *node)
{
ir_node *condition = build_expression(node->expr.if_stmt.condition);
ir_node *if_node = calloc(1, sizeof(ir_node));
if_node->code = OC_IF;
arrput(if_node->out, condition);
arrput(if_node->out, current_control);
if_node->id = stbds_hash_bytes(if_node, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *if_node, if_node);
ir_node *proj_true = calloc(1, sizeof(ir_node));
proj_true->code = OC_PROJ;
arrput(proj_true->out, if_node);
proj_true->id = stbds_hash_bytes(proj_true, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *proj_true, proj_true);
ir_node *proj_false = calloc(1, sizeof(ir_node));
proj_false->code = OC_PROJ;
arrput(proj_false->out, if_node);
proj_false->id = stbds_hash_bytes(proj_false, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *proj_false, proj_false);
ir_node *base_scope = copy_scope(current_scope);
ir_node *base_mem = current_memory;
current_control = proj_true;
ast_node *current = node->expr.if_stmt.body;
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr) {
build_expression(current->expr.unit_node.expr);
}
current = current->expr.unit_node.next;
}
ir_node *then_scope = current_scope;
ir_node *then_mem = current_memory;
ir_node *then_control = current_control;
current_scope = copy_scope(base_scope);
current_memory = base_mem;
current_control = proj_false;
current = node->expr.if_stmt.otherwise;
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr) {
build_expression(current->expr.unit_node.expr);
}
current = current->expr.unit_node.next;
}
ir_node *else_scope = current_scope;
ir_node *else_mem = current_memory;
ir_node *else_control = current_control;
ir_node *region = calloc(1, sizeof(ir_node));
region->code = OC_REGION;
arrput(region->out, then_control);
arrput(region->out, else_control);
region->id = stbds_hash_bytes(region, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *region, region);
if (then_mem->id != else_mem->id) {
ir_node *phi = calloc(1, sizeof(ir_node));
phi->code = OC_PHI;
arrput(phi->out, region);
arrput(phi->out, then_mem);
arrput(phi->out, else_mem);
phi->id = stbds_hash_bytes(phi, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *phi, phi);
current_memory = phi;
} else {
current_memory = then_mem;
}
current_scope = base_scope;
for (int i = 0; i < arrlen(current_scope->data.symbol_tables); i++) {
symbol_table *base_table = current_scope->data.symbol_tables[i];
for (int j = 0; j < shlen(base_table); j++) {
char *key = base_table[j].key;
ir_node *found_then = NULL;
symbol_table *t_table = then_scope->data.symbol_tables[i];
if (shget(t_table, key)->node) found_then = shget(t_table, key)->node;
else found_then = base_table[j].value->node;
ir_node *found_else = NULL;
symbol_table *e_table = else_scope->data.symbol_tables[i];
if (shget(e_table, key)->node) found_else = shget(e_table, key)->node;
else found_else = base_table[j].value->node;
if (found_then->id != found_else->id) {
ir_node *phi = calloc(1, sizeof(ir_node));
phi->code = OC_PHI;
arrput(phi->out, region);
arrput(phi->out, found_then);
arrput(phi->out, found_else);
phi->id = stbds_hash_bytes(phi, sizeof(ir_node), 0xcafebabe);
struct symbol_def *def = calloc(1, sizeof(struct symbol_def));
def->node = phi;
def->is_lvalue = false;
shput(current_scope->data.symbol_tables[i], key, def);
hmput(global_hash, *phi, phi);
} else {
struct symbol_def *def = calloc(1, sizeof(struct symbol_def));
def->node = found_then;
def->is_lvalue = false;
shput(current_scope->data.symbol_tables[i], key, def);
}
}
}
current_control = region;
return region;
}
static void build_return(ast_node *node)
{
ir_node *val = NULL;
if (node->expr.ret.value) {
val = build_expression(node->expr.ret.value);
} else {
val = calloc(1, sizeof(ir_node));
val->code = OC_VOID;
val->id = stbds_hash_bytes(val, sizeof(ir_node), 0xcafebabe);
}
arrput(current_func.return_controls, current_control);
arrput(current_func.return_memories, current_memory);
arrput(current_func.return_values, val);
current_control = NULL;
}
static void finalize_function(void)
{
int count = arrlen(current_func.return_controls);
if (count == 0) {
return;
}
ir_node *final_ctrl = NULL;
ir_node *final_mem = NULL;
ir_node *final_val = NULL;
if (count == 1) {
final_ctrl = current_func.return_controls[0];
final_mem = current_func.return_memories[0];
final_val = current_func.return_values[0];
}
else {
ir_node *region = calloc(1, sizeof(ir_node));
region->code = OC_REGION;
for (int i=0; i<count; i++) {
arrput(region->out, current_func.return_controls[i]);
}
hmput(global_hash, *region, region);
final_ctrl = region;
ir_node *mem_phi = calloc(1, sizeof(ir_node));
mem_phi->code = OC_PHI;
arrput(mem_phi->out, region);
for (int i=0; i<count; i++) {
arrput(mem_phi->out, current_func.return_memories[i]);
}
hmput(global_hash, *mem_phi, mem_phi);
mem_phi->id = stbds_hash_bytes(mem_phi, sizeof(ir_node), 0xcafebabe);
final_mem = mem_phi;
ir_node *val_phi = calloc(1, sizeof(ir_node));
val_phi->code = OC_PHI;
//arrput(val_phi->out, region);
for (int i=0; i<count; i++) {
arrput(val_phi->out, current_func.return_values[i]);
}
val_phi->id = stbds_hash_bytes(val_phi, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *val_phi, val_phi);
final_val = val_phi;
region->id = stbds_hash_bytes(region, sizeof(ir_node), 0xcafebabe);
}
ir_node *ret = calloc(1, sizeof(ir_node));
ret->code = OC_RETURN;
arrput(ret->out, final_ctrl);
arrput(ret->out, final_mem);
arrput(ret->out, final_val);
ret->id = stbds_hash_bytes(ret, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *ret, ret);
}
static ir_node *build_function(ast_node *node)
{
memset(&current_func, 0x0, sizeof(current_func));
ast_node *current = node->expr.function.body;
ir_node *func = calloc(1, sizeof(ir_node));
func->code = OC_START;
func->id = stbds_hash_bytes(func, sizeof(ir_node), 0xcafebabe);
func->data.start_name = node->expr.function.name;
ir_node *start_ctrl = calloc(1, sizeof(ir_node));
start_ctrl->code = OC_PROJ;
start_ctrl->id = stbds_hash_bytes(&start_ctrl, sizeof(usize), 0xcafebabe);
arrput(start_ctrl->out, func);
hmput(global_hash, *start_ctrl, start_ctrl);
current_control = start_ctrl;
ir_node *start_mem = calloc(1, sizeof(ir_node));
start_mem->code = OC_PROJ;
start_mem->id = stbds_hash_bytes(&start_mem, sizeof(usize), 0xcafebabe);
arrput(start_mem->out, func);
hmput(global_hash, *start_mem, start_mem);
current_memory = start_mem;
current_scope = calloc(1, sizeof(ir_node));
current_scope->code = OC_SCOPE;
push_scope();
member *m = node->expr.function.parameters;
while (m) {
ir_node *proj_param = calloc(1, sizeof(ir_node));
proj_param->code = OC_PROJ;
arrput(proj_param->out, func);
proj_param->id = stbds_hash_bytes(proj_param, sizeof(ir_node), 0xcafebabe);
set_def(m->name, proj_param, false);
hmput(global_hash, *proj_param, proj_param);
m = m->next;
}
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr) {
build_expression(current->expr.unit_node.expr);
}
current = current->expr.unit_node.next;
}
func->id = stbds_hash_bytes(func, sizeof(ir_node), 0xcafebabe);
finalize_function();
return func;
}
static ir_node *build_expression(ast_node *node)
{
ir_node *n = NULL;
ir_node *tmp = NULL;
switch (node->type) {
case NODE_UNARY:
n = build_unary(node);
break;
case NODE_BINARY:
n = build_binary(node);
break;
case NODE_INTEGER:
n = calloc(1, sizeof(ir_node));
n->code = OC_CONST_INT;
n->data.const_int = node->expr.integer;
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe);
tmp = hmget(global_hash, *n);
if (tmp) {
free(n);
return tmp;
}
break;
case NODE_VAR_DECL:
n = calloc(1, sizeof(ir_node));
if (node->address_taken) {
n->code = OC_STORE;
arrput(n->out, current_memory);
arrput(n->out, build_address(-1, current_stack));
arrput(n->out, build_expression(node->expr.var_decl.value));
current_memory = n;
current_stack += node->expr_type->size;
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *n, n);
n = n->out[1];
set_def(node->expr.var_decl.name, n, true);
} else {
n = build_expression(node->expr.var_decl.value);
set_def(node->expr.var_decl.name, n, false);
}
return n;
case NODE_IDENTIFIER:
struct symbol_def *def = get_def(node->expr.string.start);
n = def->node;
if (n && def->is_lvalue) {
ir_node *addr_node = n;
n = calloc(1, sizeof(ir_node));
n->code = OC_LOAD;
arrput(n->out, current_memory);
arrput(n->out, addr_node);
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe);
ir_node *tmp = hmget(global_hash, *n);
if (tmp) {
free(n);
n = tmp;
} else {
hmput(global_hash, *n, n);
}
}
break;
case NODE_IF:
n = build_if(node);
break;
case NODE_RETURN:
build_return(node);
break;
default:
break;
}
if (n) hmput(global_hash, *n, n);
return n;
}
void ir_build(ast_node *ast)
{
ast_node *current = ast;
graph = calloc(1, sizeof(ir_node));
graph->code = OC_START;
graph->id = stbds_hash_bytes(graph, sizeof(ir_node), 0xcafebabe);
graph->data.start_name = "program";
current_memory = calloc(1, sizeof(ir_node));
current_memory->code = OC_FRAME_PTR;
current_memory->id = stbds_hash_bytes(current_memory, sizeof(ir_node), 0xcafebabe);
current_scope = calloc(1, sizeof(ir_node));
current_scope->code = OC_SCOPE;
push_scope();
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) {
ir_node *expr = build_function(current->expr.unit_node.expr);
arrput(graph->out, expr);
hmput(global_hash, *expr, expr);
}
current = current->expr.unit_node.next;
}
printf("digraph G {\n");
print_graph(graph);
printf("}\n");
}

65
ir.h
View file

@ -1,65 +0,0 @@
#ifndef IR_H
#define IR_H
#include "utils.h"
#include "parser.h"
struct _ir_node;
struct symbol_def {
struct _ir_node *node;
bool is_lvalue;
};
typedef struct { char *key; struct symbol_def *value; } symbol_table;
typedef enum {
OC_START,
OC_ADD,
OC_SUB,
OC_MUL,
OC_DIV,
OC_MOD,
OC_BAND,
OC_BOR,
OC_BXOR,
OC_NEG,
OC_EQ,
OC_CONST_INT,
OC_CONST_FLOAT,
OC_VOID,
OC_FRAME_PTR,
OC_ADDR,
OC_STORE,
OC_LOAD,
OC_REGION,
OC_PHI,
OC_IF,
OC_PROJ,
OC_STOP,
OC_RETURN,
OC_SCOPE,
} opcode;
typedef struct _ir_node {
opcode code;
usize id;
struct _ir_node **in;
struct _ir_node **out;
union {
i64 const_int;
f64 const_float;
symbol_table **symbol_tables;
char *start_name;
} data;
} ir_node;
void ir_build(ast_node *ast);
#endif

6
lc.c
View file

@ -4,7 +4,7 @@
#include "lexer.h" #include "lexer.h"
#include "parser.h" #include "parser.h"
#include "sema.h" #include "sema.h"
#include "ir.h" #include "codegen.h"
void print_indent(int depth) { void print_indent(int depth) {
for (int i = 0; i < depth; i++) printf(" "); for (int i = 0; i < depth; i++) printf(" ");
@ -230,10 +230,10 @@ int main(void)
arena a = arena_init(0x1000 * 0x1000 * 64); arena a = arena_init(0x1000 * 0x1000 * 64);
lexer *l = lexer_init(src, size, &a); lexer *l = lexer_init(src, size, &a);
parser *p = parser_init(l, &a); parser *p = parser_init(l, &a);
//print_ast(p->ast, 0); print_ast(p->ast, 0);
sema_init(p, &a); sema_init(p, &a);
ir_build(p->ast); generate(p->ast);
arena_deinit(a); arena_deinit(a);

View file

@ -1089,6 +1089,7 @@ static ast_node *parse_function(parser *p)
{ {
ast_node *fn = arena_alloc(p->allocator, sizeof(ast_node)); ast_node *fn = arena_alloc(p->allocator, sizeof(ast_node));
fn->type = NODE_FUNCTION; fn->type = NODE_FUNCTION;
fn->expr.function.is_extern = false;
fn->expr.function.type = parse_type(p); fn->expr.function.type = parse_type(p);
fn->expr.function.name = peek(p)->lexeme; fn->expr.function.name = peek(p)->lexeme;
fn->expr.function.name_len = peek(p)->lexeme_len; fn->expr.function.name_len = peek(p)->lexeme_len;
@ -1097,7 +1098,14 @@ static ast_node *parse_function(parser *p)
advance(p); advance(p);
if (match(p, TOKEN_RPAREN)) { if (match(p, TOKEN_RPAREN)) {
fn->expr.function.body = parse_compound(p);; // Check if this is an extern declaration (semicolon) or definition (body)
if (match_peek(p, TOKEN_SEMICOLON)) {
// Extern function - no body, just consume semicolon
advance(p);
fn->expr.function.body = NULL;
} else {
fn->expr.function.body = parse_compound(p);
}
fn->expr.function.parameters = NULL; fn->expr.function.parameters = NULL;
fn->expr.function.parameters_len = 0; fn->expr.function.parameters_len = 0;
return fn; return fn;
@ -1110,8 +1118,14 @@ static ast_node *parse_function(parser *p)
if (!match(p, TOKEN_RPAREN)) { if (!match(p, TOKEN_RPAREN)) {
error(p, "expected `,`."); error(p, "expected `,`.");
return NULL; return NULL;
} else {
// Check if this is an extern declaration (semicolon) or definition (body)
if (match_peek(p, TOKEN_SEMICOLON)) {
advance(p);
fn->expr.function.body = NULL;
} else { } else {
fn->expr.function.body = parse_compound(p); fn->expr.function.body = parse_compound(p);
}
return fn; return fn;
} }
} }
@ -1132,7 +1146,14 @@ static ast_node *parse_function(parser *p)
prev = current; prev = current;
} }
// Check if this is an extern declaration (semicolon) or definition (body)
if (match_peek(p, TOKEN_SEMICOLON)) {
advance(p);
fn->expr.function.body = NULL;
} else {
fn->expr.function.body = parse_compound(p); fn->expr.function.body = parse_compound(p);
}
return fn; return fn;
} }
@ -1140,6 +1161,13 @@ static ast_node *parse_function(parser *p)
static ast_node *parse_statement(parser *p) static ast_node *parse_statement(parser *p)
{ {
token *cur = peek(p); token *cur = peek(p);
/* Check for extern function declaration */
bool is_extern = false;
if (match(p, TOKEN_EXTERN)) {
is_extern = true;
}
ast_node *type = parse_type(p); ast_node *type = parse_type(p);
if (type && type->type == NODE_STRUCT && type->expr.structure.name_len > 0) { if (type && type->type == NODE_STRUCT && type->expr.structure.name_len > 0) {
goto skip_struct; goto skip_struct;
@ -1148,9 +1176,20 @@ static ast_node *parse_statement(parser *p)
if (p->tokens->next && p->tokens->next->type == TOKEN_LPAREN) { if (p->tokens->next && p->tokens->next->type == TOKEN_LPAREN) {
/* Function definition. */ /* Function definition. */
p->tokens = cur; p->tokens = cur;
return parse_function(p); if (is_extern) {
advance(p); // Skip TOKEN_EXTERN
}
ast_node *fn = parse_function(p);
if (fn && is_extern) {
fn->expr.function.is_extern = true;
fn->expr.function.body = NULL;
}
return fn;
} }
p->tokens = cur; p->tokens = cur;
if (is_extern) {
advance(p); // Skip TOKEN_EXTERN for non-function case
}
/* Variable declaration. */ /* Variable declaration. */
ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
node->type = NODE_VAR_DECL; node->type = NODE_VAR_DECL;

View file

@ -230,6 +230,7 @@ typedef struct _ast_node {
usize name_len; usize name_len;
struct _ast_node *type; struct _ast_node *type;
struct _ast_node *body; struct _ast_node *body;
bool is_extern;
} function; } function;
struct { struct {
variant *variants; variant *variants;

View file

116
sema.c
View file

@ -26,9 +26,11 @@ static type *const_int = NULL;
static type *const_float = NULL; static type *const_float = NULL;
static bool in_loop = false; static bool in_loop = false;
static bool has_errors = false;
static void error(ast_node *n, char *msg) static void error(ast_node *n, char *msg)
{ {
has_errors = true;
if (n) { if (n) {
printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:%ld:%ld:\x1b[0m %s\n", n->position.row, n->position.column, msg); printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:%ld:%ld:\x1b[0m %s\n", n->position.row, n->position.column, msg);
} else { } else {
@ -133,6 +135,16 @@ static type *get_type(sema *s, ast_node *n)
char *name = NULL; char *name = NULL;
type *t = NULL; type *t = NULL;
switch (n->type) { switch (n->type) {
case NODE_ACCESS:
t = get_type(s, n->expr.access.expr);
name = intern_string(s, n->expr.access.member->expr.string.start, n->expr.access.member->expr.string.len);
if (t->tag != TYPE_STRUCT) {
error(n->expr.access.expr, "expected structure.");
return NULL;
}
t = shget(t->data.structure.member_types, name);
return t;
case NODE_IDENTIFIER: case NODE_IDENTIFIER:
name = intern_string(s, n->expr.string.start, n->expr.string.len); name = intern_string(s, n->expr.string.start, n->expr.string.len);
t = shget(type_reg, name); t = shget(type_reg, name);
@ -140,17 +152,18 @@ static type *get_type(sema *s, ast_node *n)
return t; return t;
case NODE_PTR_TYPE: case NODE_PTR_TYPE:
t = malloc(sizeof(type)); t = malloc(sizeof(type));
t->size = sizeof(usize);
t->alignment = sizeof(usize); t->alignment = sizeof(usize);
if (n->expr.ptr_type.flags & PTR_RAW) { if (n->expr.ptr_type.flags & PTR_RAW) {
t->name = "ptr"; t->name = "ptr";
t->tag = TYPE_PTR; t->tag = TYPE_PTR;
t->size = sizeof(usize);
t->data.ptr.child = get_type(s, n->expr.ptr_type.type); t->data.ptr.child = get_type(s, n->expr.ptr_type.type);
t->data.ptr.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0; t->data.ptr.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0;
t->data.ptr.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0; t->data.ptr.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0;
} else { } else {
t->name = "slice"; t->name = "slice";
t->tag = TYPE_SLICE; t->tag = TYPE_SLICE;
t->size = sizeof(usize) * 2; // ptr + len = 16 bytes
t->data.slice.child = get_type(s, n->expr.ptr_type.type); t->data.slice.child = get_type(s, n->expr.ptr_type.type);
t->data.slice.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0; t->data.slice.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0;
t->data.slice.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0; t->data.slice.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0;
@ -365,8 +378,8 @@ static ast_node *get_def(sema *s, char *name)
static type *get_string_type(sema *s, ast_node *node) static type *get_string_type(sema *s, ast_node *node)
{ {
type *string_type = arena_alloc(s->allocator, sizeof(type)); type *string_type = arena_alloc(s->allocator, sizeof(type));
string_type->tag = TYPE_PTR; string_type->tag = TYPE_SLICE;
string_type->size = sizeof(usize); string_type->size = sizeof(usize) * 2; // ptr + len = 16 bytes
string_type->alignment = sizeof(usize); string_type->alignment = sizeof(usize);
string_type->name = "slice"; string_type->name = "slice";
string_type->data.slice.child = shget(type_reg, "u8"); string_type->data.slice.child = shget(type_reg, "u8");
@ -397,6 +410,33 @@ static type *get_access_type(sema *s, ast_node *node)
ast_node *member = node->expr.access.member; ast_node *member = node->expr.access.member;
char *name_start = member->expr.string.start; char *name_start = member->expr.string.start;
usize name_len = member->expr.string.len; usize name_len = member->expr.string.len;
// Handle slice field access
if (t && t->tag == TYPE_SLICE) {
char *name = intern_string(s, name_start, name_len);
if (strcmp(name, "ptr") == 0) {
// Return pointer to element type
type *ptr_type = arena_alloc(s->allocator, sizeof(type));
ptr_type->tag = TYPE_PTR;
ptr_type->size = 8;
ptr_type->alignment = 8;
ptr_type->name = "ptr";
ptr_type->data.ptr.child = t->data.slice.child;
ptr_type->data.ptr.is_const = t->data.slice.is_const;
ptr_type->data.ptr.is_volatile = t->data.slice.is_volatile;
free(name);
return ptr_type;
} else if (strcmp(name, "len") == 0) {
// Return usize type
free(name);
return shget(type_reg, "usize");
} else {
error(node, "slice doesn't have that field");
free(name);
return NULL;
}
}
if (!t || (t->tag != TYPE_STRUCT && t->tag != TYPE_UNION)) { if (!t || (t->tag != TYPE_STRUCT && t->tag != TYPE_UNION)) {
error(node, "invalid expression."); error(node, "invalid expression.");
return NULL; return NULL;
@ -433,7 +473,8 @@ static bool can_cast(type *source, type *dest)
switch (dest->tag) { switch (dest->tag) {
case TYPE_INTEGER: case TYPE_INTEGER:
case TYPE_UINTEGER: case TYPE_UINTEGER:
return source->tag == TYPE_INTEGER_CONST; case TYPE_INTEGER_CONST:
return source->tag == TYPE_INTEGER_CONST || source->tag == TYPE_INTEGER || source->tag == TYPE_UINTEGER;
case TYPE_FLOAT: case TYPE_FLOAT:
return source->tag == TYPE_FLOAT_CONST; return source->tag == TYPE_FLOAT_CONST;
default: default:
@ -544,6 +585,36 @@ static type *get_expression_type(sema *s, ast_node *node)
return t; return t;
case NODE_ARRAY_SUBSCRIPT: case NODE_ARRAY_SUBSCRIPT:
t = get_expression_type(s, node->expr.subscript.expr); t = get_expression_type(s, node->expr.subscript.expr);
// Check if this is range subscripting (creates a slice)
if (node->expr.subscript.index && node->expr.subscript.index->type == NODE_RANGE) {
type *element_type = NULL;
switch (t->tag) {
case TYPE_SLICE:
element_type = t->data.slice.child;
break;
case TYPE_PTR:
element_type = t->data.ptr.child;
break;
default:
error(node, "only pointers and slices can be indexed.");
return NULL;
}
// Return a slice type
type *slice_type = arena_alloc(s->allocator, sizeof(type));
slice_type->tag = TYPE_SLICE;
slice_type->size = sizeof(usize) * 2;
slice_type->alignment = sizeof(usize);
slice_type->data.slice.child = element_type;
slice_type->data.slice.is_const = false;
slice_type->data.slice.len = 0;
node->expr_type = slice_type;
return slice_type;
}
// Regular subscript - return element type
switch (t->tag) { switch (t->tag) {
case TYPE_SLICE: case TYPE_SLICE:
t = t->data.slice.child; t = t->data.slice.child;
@ -558,11 +629,20 @@ static type *get_expression_type(sema *s, ast_node *node)
node->expr_type = t; node->expr_type = t;
return t; return t;
case NODE_CALL: case NODE_CALL:
prot = shget(prototypes, intern_string(s, node->expr.call.name, node->expr.call.name_len)); node->expr.call.name = intern_string(s, node->expr.call.name, node->expr.call.name_len);
prot = shget(prototypes, node->expr.call.name);
if (!prot) { if (!prot) {
error(node, "unknown function."); error(node, "unknown function.");
return NULL; return NULL;
} }
// Process call arguments
ast_node *arg = node->expr.call.parameters;
while (arg && arg->type == NODE_UNIT) {
if (arg->expr.unit_node.expr) {
get_expression_type(s, arg->expr.unit_node.expr);
}
arg = arg->expr.unit_node.next;
}
t = prot->type; t = prot->type;
node->expr_type = t; node->expr_type = t;
return t; return t;
@ -709,8 +789,21 @@ static void check_statement(sema *s, ast_node *node)
error(node, "redeclaration of variable."); error(node, "redeclaration of variable.");
break; break;
} }
if (!can_cast(get_expression_type(s, node->expr.var_decl.value), t) && !match(t, get_expression_type(s, node->expr.var_decl.value))) { if (t->tag == TYPE_STRUCT) {
error(node, "type mismatch."); // Struct initialization with NODE_STRUCT_INIT is allowed
} else if (node->expr.var_decl.value && node->expr.var_decl.value->type == NODE_STRUCT_INIT &&
(t->tag == TYPE_SLICE || t->tag == TYPE_PTR)) {
// Array/slice initialization with NODE_STRUCT_INIT is allowed
} else if (node->expr.var_decl.value && node->expr.var_decl.value->type == NODE_RANGE &&
t->tag == TYPE_SLICE) {
// Range initialization for slices is allowed
get_expression_type(s, node->expr.var_decl.value);
} else if (node->expr.var_decl.value && node->expr.var_decl.value->type == NODE_STRING &&
t->tag == TYPE_SLICE) {
// String literal can be assigned to slice
get_expression_type(s, node->expr.var_decl.value);
} else if (!can_cast(get_expression_type(s, node->expr.var_decl.value), t) && !match(t, get_expression_type(s, node->expr.var_decl.value))) {
error(node, "type mismatch (decl).");
} }
shput(current_scope->defs, name, node); shput(current_scope->defs, name, node);
break; break;
@ -740,11 +833,14 @@ static void check_function(sema *s, ast_node *f)
param = param->next; param = param->next;
} }
// Skip body checking for extern functions
if (!f->expr.function.is_extern && f->expr.function.body) {
ast_node *current = f->expr.function.body; ast_node *current = f->expr.function.body;
while (current && current->type == NODE_UNIT) { while (current && current->type == NODE_UNIT) {
check_statement(s, current->expr.unit_node.expr); check_statement(s, current->expr.unit_node.expr);
current = current->expr.unit_node.next; current = current->expr.unit_node.next;
} }
}
pop_scope(s); pop_scope(s);
} }
@ -797,6 +893,7 @@ void sema_init(parser *p, arena *a)
register_type(s, "u16", create_integer(s, "u16", 16, false)); register_type(s, "u16", create_integer(s, "u16", 16, false));
register_type(s, "u32", create_integer(s, "u32", 32, false)); register_type(s, "u32", create_integer(s, "u32", 32, false));
register_type(s, "u64", create_integer(s, "u64", 64, false)); register_type(s, "u64", create_integer(s, "u64", 64, false));
register_type(s, "usize", create_integer(s, "usize", 64, false));
register_type(s, "i8", create_integer(s, "i8", 8, true)); register_type(s, "i8", create_integer(s, "i8", 8, true));
register_type(s, "i16", create_integer(s, "i16", 16, true)); register_type(s, "i16", create_integer(s, "i16", 16, true));
register_type(s, "i32", create_integer(s, "i32", 32, true)); register_type(s, "i32", create_integer(s, "i32", 32, true));
@ -815,4 +912,9 @@ void sema_init(parser *p, arena *a)
const_float->data.flt = 0; const_float->data.flt = 0;
analyze_unit(s, s->ast); analyze_unit(s, s->ast);
if (has_errors) {
printf("Compilation failed.\n");
exit(1);
}
} }

BIN
test Executable file

Binary file not shown.

24
test.l
View file

@ -1,12 +1,20 @@
u32 main(u32 b) extern i64 write(i32 fd, *u8 buf, u64 count);
extern void exit(i32 code);
extern *u8 malloc(usize size);
i32 main()
{ {
u32 a = 4; [u8] message = "Hello world!\n";
//return a; *u8 message_heap = malloc(message.len);
if (b == 3) { [u8] new_message = message_heap[0..13];
return 3; u32 i = 0;
} else {
return 4; loop while i < message.len {
new_message[i] = message[i];
i = i + 1;
} }
return a; write(1, new_message.ptr, new_message.len);
return 0;
} }

90
test.s Normal file
View file

@ -0,0 +1,90 @@
.section .text
.global main
main:
push %rbp
mov %rsp, %rbp
sub $256, %rsp
movb $72, -32(%rbp)
movb $101, -31(%rbp)
movb $108, -30(%rbp)
movb $108, -29(%rbp)
movb $111, -28(%rbp)
movb $32, -27(%rbp)
movb $119, -26(%rbp)
movb $111, -25(%rbp)
movb $114, -24(%rbp)
movb $108, -23(%rbp)
movb $100, -22(%rbp)
movb $33, -21(%rbp)
movb $10, -20(%rbp)
lea -32(%rbp), %rax
mov %rax, -48(%rbp)
mov $14, %rax
mov %rax, -40(%rbp)
mov -40(%rbp), %rax
push %rax
pop %rdi
call malloc
mov %rax, -56(%rbp)
mov -56(%rbp), %rcx
mov $0, %rax
push %rax
mov $13, %rax
mov %rax, %rdx
pop %rax
mov %rdx, %r8
sub %rax, %r8
inc %r8
add %rcx, %rax
mov %rax, -88(%rbp)
mov %r8, -80(%rbp)
lea -88(%rbp), %rax
mov (%rax), %rcx
mov 8(%rax), %rdx
mov %rcx, -72(%rbp)
mov %rdx, -64(%rbp)
mov $0, %rax
mov %rax, -96(%rbp)
.L0:
mov -96(%rbp), %rax
mov %rax, %rcx
mov -40(%rbp), %rax
cmp %rax, %rcx
setl %al
movzx %al, %rax
test %rax, %rax
jz .L1
mov -72(%rbp), %rcx
mov -96(%rbp), %rax
add %rcx, %rax
push %rax
mov -48(%rbp), %rcx
mov -96(%rbp), %rax
add %rcx, %rax
movzbl (%rax), %eax
pop %rcx
mov %al, (%rcx)
mov -96(%rbp), %rax
mov %rax, %rcx
mov $1, %rax
add %rcx, %rax
mov %rax, -96(%rbp)
jmp .L0
.L1:
mov $1, %rax
push %rax
mov -72(%rbp), %rax
push %rax
mov -64(%rbp), %rax
push %rax
pop %rdx
pop %rsi
pop %rdi
call write
mov $0, %rax
mov %rbp, %rsp
pop %rbp
ret
mov %rbp, %rsp
pop %rbp
ret

View file

@ -1,2 +0,0 @@
export TODO_DIR="."
export TODO_FILE="$TODO_DIR/todo.txt"

View file

@ -1 +0,0 @@
implement dominator tree for control flow