starting over

This commit is contained in:
Lorenzo Torres 2025-12-14 16:37:10 +01:00
parent 09d6cf4b46
commit 23126974b5
24 changed files with 0 additions and 6402 deletions

View file

@ -1,59 +0,0 @@
# cc - C compiler
# See LICENSE file for copyright and license details.
include config.mk
SRC = lc.c utils.c lexer.c parser.c sema.c ir.c
HDR = config.def.h utils.h lexer.h parser.h sema.h ir.h
OBJ = ${SRC:.c=.o}
all: options lc
options:
@echo lc build options:
@echo "CFLAGS = ${CFLAGS}"
@echo "LDFLAGS = ${LDFLAGS}"
@echo "CC = ${CC}"
.c.o:
${CC} -c ${CFLAGS} $<
${OBJ}: config.h config.mk
config.h:
cp config.def.h $@
users.h:
cp users.def.h $@
lc: ${OBJ}
${CC} -o $@ ${OBJ} ${LDFLAGS}
clean:
rm -f lc ${OBJ} lc-${VERSION}.tar.gz
dist: clean
mkdir -p lc-${VERSION}
cp -R LICENSE Makefile README config.mk\
lc.1 ${HDR} ${SRC} lc-${VERSION}
tar -cf lc-${VERSION}.tar lc-${VERSION}
gzip lc-${VERSION}.tar
rm -rf lc-${VERSION}
install: all
mkdir -p ${DESTDIR}${PREFIX}/bin
cp -f lc ${DESTDIR}${PREFIX}/bin
chmod 755 ${DESTDIR}${PREFIX}/bin/lc
mkdir -p ${DESTDIR}${MANPREFIX}/man1
sed "s/VERSION/${VERSION}/g" < lc.1 > ${DESTDIR}${MANPREFIX}/man1/lc.1
chmod 644 ${DESTDIR}${MANPREFIX}/man1/lc.1
uninstall:
rm -f ${DESTDIR}${PREFIX}/bin/lc\
${DESTDIR}${MANPREFIX}/man1/lc.1
graph: clean all
./lc > graph.dot
dot -Tpdf graph.dot > graph.pdf
zathura ./graph.pdf
.PHONY: all options clean dist install uninstall

24
README
View file

@ -1,24 +0,0 @@
lc - L compiler
============================
lc is a L compiler. It can compile L code.
Requirements
------------
In order to build lc you need... a computer
Installation
------------
Edit config.mk to match your local setup (lc is installed into
the /usr/local namespace by default).
Afterwards enter the following command to build and install lc (if
necessary as root):
make clean install
Usage
-----------
lc file

View file

@ -1,4 +0,0 @@
#ifndef CONFIG_H
#define CONFIG_H
#endif

View file

@ -1,4 +0,0 @@
#ifndef CONFIG_H
#define CONFIG_H
#endif

View file

@ -1,27 +0,0 @@
# cc version
VERSION = 0.1
# Customize below to fit your system
# paths
PREFIX = /usr
MANPREFIX = ${PREFIX}/share/man
# OpenBSD (uncomment)
#MANPREFIX = ${PREFIX}/man
# includes and libs
INCS = -I.
LIBS =
# flags
CPPFLAGS = -DVERSION=\"${VERSION}\"
CFLAGS := -std=c23 -pedantic -Wall -O0 ${INCS} ${CPPFLAGS}
CFLAGS := ${CFLAGS} -g
LDFLAGS = ${LIBS}
# Solaris
#CFLAGS = -fast ${INCS} -DVERSION=\"${VERSION}\"
#LDFLAGS = ${LIBS}
# compiler and linker
CC = cc

View file

View file

@ -1,16 +0,0 @@
import std;
i32 main()
{
u32 x = 4;
loop {
u32 b = 3;
}
x == 3;
loop (0.., test) |k, i| {
}
u32 b = 3;
}

View file

@ -1,10 +0,0 @@
struct b {
i32 a,
u32 b,
u32 c,
}
u32 test()
{
f32 a = 5.0;
}

812
ir.c
View file

@ -1,812 +0,0 @@
#include "ir.h"
#include <stdlib.h>
#include <stdio.h>
#include "stb_ds.h"
#include "sema.h"
struct { ir_node key; ir_node *value; } *global_hash = NULL;
static ir_node *graph;
static ir_node *current_memory;
static ir_node *current_control;
static usize current_stack = 0;
static ir_node *current_scope = NULL;
static ir_node *build_expression(ast_node *node);
static struct {
ir_node **return_controls;
ir_node **return_memories;
ir_node **return_values;
} current_func = {0};
static void node_name(ir_node *node)
{
if (!node) {
printf("null [label=\"NULL\", style=filled, fillcolor=red]\n");
return;
}
printf("%ld ", node->id);
switch (node->code) {
case OC_START:
printf("[label=\"%s\", style=filled, color=orange]\n", node->data.start_name);
break;
case OC_RETURN:
printf("[label=\"return\", style=filled, color=orange]\n");
break;
case OC_ADD:
printf("[label=\"+\"]\n");
break;
case OC_NEG:
case OC_SUB:
printf("[label=\"-\"]\n");
break;
case OC_DIV:
printf("[label=\"/\"]\n");
break;
case OC_MUL:
printf("[label=\"*\"]\n");
break;
case OC_MOD:
printf("[label=\"%%\"]\n");
break;
case OC_BAND:
printf("[label=\"&\"]\n");
break;
case OC_BOR:
printf("[label=\"|\"]\n");
break;
case OC_BXOR:
printf("[label=\"^\"]\n");
break;
case OC_EQ:
printf("[label=\"==\"]\n");
break;
case OC_CONST_INT:
printf("[label=\"%ld\"]\n", node->data.const_int);
break;
case OC_CONST_FLOAT:
printf("[label=\"%f\"]\n", node->data.const_float);
break;
case OC_FRAME_PTR:
printf("[label=\"frame_ptr\"]\n");
break;
case OC_STORE:
printf("[label=\"store\", shape=box]\n");
break;
case OC_LOAD:
printf("[label=\"load\", shape=box]\n");
break;
case OC_ADDR:
printf("[label=\"addr\"]\n");
break;
case OC_REGION:
printf("[label=\"region\", shape=diamond, style=filled, color=green]\n");
break;
case OC_PHI:
printf("[label=\"phi\", shape=triangle]\n");
break;
case OC_IF:
printf("[label=\"if\", shape=diamond, style=filled, color=lightblue]\n");
break;
case OC_PROJ:
printf("[label=\"proj\", shape=diamond, style=filled, color=cyan]\n");
break;
default:
printf("[label=\"%d\"]\n", node->code);
break;
}
}
static void print_graph(ir_node *node)
{
for (int i = 0; i < hmlen(global_hash); i++) {
ir_node *node = global_hash[i].value;
node_name(node);
for (int j = 0; j < arrlen(node->out); j++) {
if (node->out[j]) {
node_name(node->out[j]);
printf("%ld->%ld\n", node->out[j]->id, node->id);
}
}
}
}
static void push_scope(void)
{
arrput(current_scope->data.symbol_tables, NULL);
}
static struct symbol_def *get_def(char *name)
{
for (int i = arrlen(current_scope->data.symbol_tables) - 1; i >= 0; i--) {
struct symbol_def *def = shget(current_scope->data.symbol_tables[i], name);
if (def) return def;
}
return NULL;
}
static void set_def(char *name, ir_node *node, bool lvalue)
{
for (int i = arrlen(current_scope->data.symbol_tables) - 1; i >= 0; i--) {
if (shget(current_scope->data.symbol_tables[i], name)) {
struct symbol_def *def = calloc(1, sizeof(struct symbol_def));
def->is_lvalue = lvalue;
def->node = node;
shput(current_scope->data.symbol_tables[i], name, def);
return;
}
}
int index = arrlen(current_scope->data.symbol_tables) - 1;
struct symbol_def *def = calloc(1, sizeof(struct symbol_def));
def->is_lvalue = lvalue;
def->node = node;
shput(current_scope->data.symbol_tables[index], name, def);
}
static ir_node *copy_scope(ir_node *src)
{
ir_node *dst = calloc(1, sizeof(ir_node));
dst->code = OC_SCOPE;
for (int i=0; i < arrlen(src->data.symbol_tables); i++) {
arrput(dst->data.symbol_tables, NULL);
symbol_table *src_table = src->data.symbol_tables[i];
for (int j=0; j < shlen(src_table); j++) {
shput(dst->data.symbol_tables[i], src_table[j].key, src_table[j].value);
}
}
return dst;
}
static void const_fold(ir_node *binary)
{
ir_node *left = binary->out[0];
ir_node *right = binary->out[1];
if (left->code == OC_CONST_INT && right->code == OC_CONST_INT) {
switch (binary->code) {
case OC_ADD:
binary->data.const_int = left->data.const_int + right->data.const_int;
break;
case OC_SUB:
binary->data.const_int = left->data.const_int - right->data.const_int;
break;
case OC_MUL:
binary->data.const_int = left->data.const_int * right->data.const_int;
break;
case OC_DIV:
if (right->data.const_int != 0)
binary->data.const_int = left->data.const_int / right->data.const_int;
break;
case OC_MOD:
if (right->data.const_int != 0)
binary->data.const_int = left->data.const_int % right->data.const_int;
break;
case OC_BOR:
binary->data.const_int = left->data.const_int | right->data.const_int;
break;
case OC_BAND:
binary->data.const_int = left->data.const_int & right->data.const_int;
break;
case OC_BXOR:
binary->data.const_int = left->data.const_int ^ right->data.const_int;
break;
case OC_EQ:
binary->data.const_int = left->data.const_int == right->data.const_int;
break;
default:
return;
}
binary->code = OC_CONST_INT;
arrfree(binary->out); binary->out = NULL;
arrfree(binary->in); binary->in = NULL;
binary->id = stbds_hash_bytes(binary, sizeof(ir_node), 0xcafebabe);
}
if (left->code == OC_CONST_FLOAT && right->code == OC_CONST_FLOAT) {
switch (binary->code) {
case OC_ADD:
binary->data.const_float = left->data.const_float + right->data.const_float;
break;
case OC_SUB:
binary->data.const_float = left->data.const_float - right->data.const_float;
break;
case OC_MUL:
binary->data.const_float = left->data.const_float * right->data.const_float;
break;
case OC_DIV:
if (right->data.const_float != 0.0f)
binary->data.const_float = left->data.const_float / right->data.const_float;
break;
default:
return;
}
binary->code = OC_CONST_FLOAT;
arrfree(binary->out); binary->out = NULL;
arrfree(binary->in); binary->in = NULL;
binary->id = stbds_hash_bytes(binary, sizeof(ir_node), 0xcafebabe);
}
}
static ir_node *build_address(usize base, usize offset) {
ir_node *addr = calloc(1, sizeof(ir_node));
addr->code = OC_ADDR;
ir_node *base_node = calloc(1, sizeof(ir_node));
if (base == -1) {
base_node->code = OC_FRAME_PTR;
base_node->id = stbds_hash_bytes(base_node, sizeof(ir_node), 0xcafebabe);
} else {
base_node->code = OC_CONST_INT;
base_node->data.const_int = base;
base_node->id = stbds_hash_bytes(base_node, sizeof(ir_node), 0xcafebabe);
}
ir_node *offset_node = calloc(1, sizeof(ir_node));
offset_node->code = OC_CONST_INT;
offset_node->data.const_int = offset;
offset_node->id = stbds_hash_bytes(offset_node, sizeof(ir_node), 0xcafebabe);
arrput(addr->out, base_node);
arrput(addr->out, offset_node);
addr->id = stbds_hash_bytes(addr, sizeof(ir_node), 0xcafebabe);
ir_node *tmp = hmget(global_hash, *addr);
if (tmp) {
free(addr);
return tmp;
}
return addr;
}
static ir_node *build_assign_ptr(ast_node *binary)
{
ir_node *val_node = build_expression(binary->expr.binary.right);
char *var_name = binary->expr.binary.left->expr.string.start;
ir_node *existing_def = get_def(var_name)->node;
ir_node *store = calloc(1, sizeof(ir_node));
store->code = OC_STORE;
arrput(store->out, current_control);
arrput(store->out, current_memory);
arrput(store->out, existing_def);
arrput(store->out, val_node);
store->id = stbds_hash_bytes(store, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *store, store);
current_memory = store;
return val_node;
}
static ir_node *build_assign(ast_node *binary)
{
ir_node *val_node = build_expression(binary->expr.binary.right);
char *var_name = binary->expr.binary.left->expr.string.start;
struct symbol_def *def = get_def(var_name);
if (def && def->is_lvalue) {
ir_node *existing_def = def->node;
ir_node *store = calloc(1, sizeof(ir_node));
store->code = OC_STORE;
arrput(store->out, current_control);
arrput(store->out, current_memory);
arrput(store->out, existing_def);
arrput(store->out, val_node);
store->id = stbds_hash_bytes(store, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *store, store);
current_memory = store;
return val_node;
}
set_def(var_name, val_node, false);
return val_node;
}
static ir_node *build_binary(ast_node *node)
{
ir_node *n = calloc(1, sizeof(ir_node));
switch (node->expr.binary.operator) {
case OP_ASSIGN:
free(n);
return build_assign(node);
case OP_ASSIGN_PTR:
free(n);
return build_assign_ptr(node);
case OP_PLUS:
n->code = OC_ADD;
break;
case OP_MINUS:
n->code = OC_SUB;
break;
case OP_DIV:
n->code = OC_DIV;
break;
case OP_MUL:
n->code = OC_MUL;
break;
case OP_MOD:
n->code = OC_MOD;
break;
case OP_BOR:
n->code = OC_BOR;
break;
case OP_BAND:
n->code = OC_BAND;
break;
case OP_BXOR:
n->code = OC_BXOR;
break;
case OP_EQ:
n->code = OC_EQ;
break;
default:
break;
}
arrput(n->out, build_expression(node->expr.binary.left));
arrput(n->out, build_expression(node->expr.binary.right));
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe);
const_fold(n);
ir_node *tmp = hmget(global_hash, *n);
if (tmp) {
free(n);
return tmp;
}
return n;
}
static ir_node *build_load(ast_node *node)
{
ir_node *n = calloc(1, sizeof(ir_node));
n->code = OC_LOAD;
arrput(n->out, current_memory);
arrput(n->out, build_expression(node));
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabebabecafe);
ir_node *tmp = hmget(global_hash, *n);
if (tmp) {
free(n);
return tmp;
}
return n;
}
static ir_node *build_unary(ast_node *node)
{
ir_node *n = calloc(1, sizeof(ir_node));
switch (node->expr.unary.operator) {
case UOP_MINUS:
n->code = OC_NEG;
arrput(n->out, build_expression(node->expr.unary.right));
break;
case UOP_REF:
free(n);
if (node->expr.unary.right->type == NODE_IDENTIFIER) {
struct symbol_def *def = get_def(node->expr.unary.right->expr.string.start);
if (def) {
return def->node;
}
}
return build_expression(node->expr.unary.right);
case UOP_DEREF:
free(n);
return build_load(node->expr.unary.right);
default:
break;
}
if (n->out && n->out[0]->code == OC_CONST_INT) {
switch (n->code) {
case OC_NEG:
n->data.const_int = -(n->out[0]->data.const_int);
break;
default:
break;
}
n->code = OC_CONST_INT;
arrfree(n->out); n->out = NULL;
} else if (n->out && n->out[0]->code == OC_CONST_FLOAT) {
switch (n->code) {
case OC_NEG:
n->data.const_float = -(n->out[0]->data.const_float);
break;
default:
break;
}
n->code = OC_CONST_FLOAT;
arrfree(n->out); n->out = NULL;
}
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe);
ir_node *tmp = hmget(global_hash, *n);
if (tmp) {
free(n);
return tmp;
}
return n;
}
static ir_node *build_if(ast_node *node)
{
ir_node *condition = build_expression(node->expr.if_stmt.condition);
ir_node *if_node = calloc(1, sizeof(ir_node));
if_node->code = OC_IF;
arrput(if_node->out, condition);
arrput(if_node->out, current_control);
if_node->id = stbds_hash_bytes(if_node, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *if_node, if_node);
ir_node *proj_true = calloc(1, sizeof(ir_node));
proj_true->code = OC_PROJ;
arrput(proj_true->out, if_node);
proj_true->id = stbds_hash_bytes(proj_true, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *proj_true, proj_true);
ir_node *proj_false = calloc(1, sizeof(ir_node));
proj_false->code = OC_PROJ;
arrput(proj_false->out, if_node);
proj_false->id = stbds_hash_bytes(proj_false, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *proj_false, proj_false);
ir_node *base_scope = copy_scope(current_scope);
ir_node *base_mem = current_memory;
current_control = proj_true;
ast_node *current = node->expr.if_stmt.body;
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr) {
build_expression(current->expr.unit_node.expr);
}
current = current->expr.unit_node.next;
}
ir_node *then_scope = current_scope;
ir_node *then_mem = current_memory;
ir_node *then_control = current_control;
current_scope = copy_scope(base_scope);
current_memory = base_mem;
current_control = proj_false;
current = node->expr.if_stmt.otherwise;
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr) {
build_expression(current->expr.unit_node.expr);
}
current = current->expr.unit_node.next;
}
ir_node *else_scope = current_scope;
ir_node *else_mem = current_memory;
ir_node *else_control = current_control;
ir_node *region = calloc(1, sizeof(ir_node));
region->code = OC_REGION;
arrput(region->out, then_control);
arrput(region->out, else_control);
region->id = stbds_hash_bytes(region, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *region, region);
if (then_mem->id != else_mem->id) {
ir_node *phi = calloc(1, sizeof(ir_node));
phi->code = OC_PHI;
arrput(phi->out, region);
arrput(phi->out, then_mem);
arrput(phi->out, else_mem);
phi->id = stbds_hash_bytes(phi, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *phi, phi);
current_memory = phi;
} else {
current_memory = then_mem;
}
current_scope = base_scope;
for (int i = 0; i < arrlen(current_scope->data.symbol_tables); i++) {
symbol_table *base_table = current_scope->data.symbol_tables[i];
for (int j = 0; j < shlen(base_table); j++) {
char *key = base_table[j].key;
ir_node *found_then = NULL;
symbol_table *t_table = then_scope->data.symbol_tables[i];
if (shget(t_table, key)->node) found_then = shget(t_table, key)->node;
else found_then = base_table[j].value->node;
ir_node *found_else = NULL;
symbol_table *e_table = else_scope->data.symbol_tables[i];
if (shget(e_table, key)->node) found_else = shget(e_table, key)->node;
else found_else = base_table[j].value->node;
if (found_then->id != found_else->id) {
ir_node *phi = calloc(1, sizeof(ir_node));
phi->code = OC_PHI;
arrput(phi->out, region);
arrput(phi->out, found_then);
arrput(phi->out, found_else);
phi->id = stbds_hash_bytes(phi, sizeof(ir_node), 0xcafebabe);
struct symbol_def *def = calloc(1, sizeof(struct symbol_def));
def->node = phi;
def->is_lvalue = false;
shput(current_scope->data.symbol_tables[i], key, def);
hmput(global_hash, *phi, phi);
} else {
struct symbol_def *def = calloc(1, sizeof(struct symbol_def));
def->node = found_then;
def->is_lvalue = false;
shput(current_scope->data.symbol_tables[i], key, def);
}
}
}
current_control = region;
return region;
}
static void build_return(ast_node *node)
{
ir_node *val = NULL;
if (node->expr.ret.value) {
val = build_expression(node->expr.ret.value);
} else {
val = calloc(1, sizeof(ir_node));
val->code = OC_VOID;
val->id = stbds_hash_bytes(val, sizeof(ir_node), 0xcafebabe);
}
arrput(current_func.return_controls, current_control);
arrput(current_func.return_memories, current_memory);
arrput(current_func.return_values, val);
current_control = NULL;
}
static void finalize_function(void)
{
int count = arrlen(current_func.return_controls);
if (count == 0) {
return;
}
ir_node *final_ctrl = NULL;
ir_node *final_mem = NULL;
ir_node *final_val = NULL;
if (count == 1) {
final_ctrl = current_func.return_controls[0];
final_mem = current_func.return_memories[0];
final_val = current_func.return_values[0];
}
else {
ir_node *region = calloc(1, sizeof(ir_node));
region->code = OC_REGION;
for (int i=0; i<count; i++) {
arrput(region->out, current_func.return_controls[i]);
}
hmput(global_hash, *region, region);
final_ctrl = region;
ir_node *mem_phi = calloc(1, sizeof(ir_node));
mem_phi->code = OC_PHI;
arrput(mem_phi->out, region);
for (int i=0; i<count; i++) {
arrput(mem_phi->out, current_func.return_memories[i]);
}
hmput(global_hash, *mem_phi, mem_phi);
mem_phi->id = stbds_hash_bytes(mem_phi, sizeof(ir_node), 0xcafebabe);
final_mem = mem_phi;
ir_node *val_phi = calloc(1, sizeof(ir_node));
val_phi->code = OC_PHI;
//arrput(val_phi->out, region);
for (int i=0; i<count; i++) {
arrput(val_phi->out, current_func.return_values[i]);
}
val_phi->id = stbds_hash_bytes(val_phi, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *val_phi, val_phi);
final_val = val_phi;
region->id = stbds_hash_bytes(region, sizeof(ir_node), 0xcafebabe);
}
ir_node *ret = calloc(1, sizeof(ir_node));
ret->code = OC_RETURN;
arrput(ret->out, final_ctrl);
arrput(ret->out, final_mem);
arrput(ret->out, final_val);
ret->id = stbds_hash_bytes(ret, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *ret, ret);
}
static ir_node *build_function(ast_node *node)
{
memset(&current_func, 0x0, sizeof(current_func));
ast_node *current = node->expr.function.body;
ir_node *func = calloc(1, sizeof(ir_node));
func->code = OC_START;
func->id = stbds_hash_bytes(func, sizeof(ir_node), 0xcafebabe);
func->data.start_name = node->expr.function.name;
ir_node *start_ctrl = calloc(1, sizeof(ir_node));
start_ctrl->code = OC_PROJ;
start_ctrl->id = stbds_hash_bytes(&start_ctrl, sizeof(usize), 0xcafebabe);
arrput(start_ctrl->out, func);
hmput(global_hash, *start_ctrl, start_ctrl);
current_control = start_ctrl;
ir_node *start_mem = calloc(1, sizeof(ir_node));
start_mem->code = OC_PROJ;
start_mem->id = stbds_hash_bytes(&start_mem, sizeof(usize), 0xcafebabe);
arrput(start_mem->out, func);
hmput(global_hash, *start_mem, start_mem);
current_memory = start_mem;
current_scope = calloc(1, sizeof(ir_node));
current_scope->code = OC_SCOPE;
push_scope();
member *m = node->expr.function.parameters;
while (m) {
ir_node *proj_param = calloc(1, sizeof(ir_node));
proj_param->code = OC_PROJ;
arrput(proj_param->out, func);
proj_param->id = stbds_hash_bytes(proj_param, sizeof(ir_node), 0xcafebabe);
set_def(m->name, proj_param, false);
hmput(global_hash, *proj_param, proj_param);
m = m->next;
}
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr) {
build_expression(current->expr.unit_node.expr);
}
current = current->expr.unit_node.next;
}
func->id = stbds_hash_bytes(func, sizeof(ir_node), 0xcafebabe);
finalize_function();
return func;
}
static ir_node *build_expression(ast_node *node)
{
ir_node *n = NULL;
ir_node *tmp = NULL;
switch (node->type) {
case NODE_UNARY:
n = build_unary(node);
break;
case NODE_BINARY:
n = build_binary(node);
break;
case NODE_INTEGER:
n = calloc(1, sizeof(ir_node));
n->code = OC_CONST_INT;
n->data.const_int = node->expr.integer;
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe);
tmp = hmget(global_hash, *n);
if (tmp) {
free(n);
return tmp;
}
break;
case NODE_VAR_DECL:
n = calloc(1, sizeof(ir_node));
if (node->address_taken) {
n->code = OC_STORE;
arrput(n->out, current_memory);
arrput(n->out, build_address(-1, current_stack));
arrput(n->out, build_expression(node->expr.var_decl.value));
current_memory = n;
current_stack += node->expr_type->size;
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe);
hmput(global_hash, *n, n);
n = n->out[1];
set_def(node->expr.var_decl.name, n, true);
} else {
n = build_expression(node->expr.var_decl.value);
set_def(node->expr.var_decl.name, n, false);
}
return n;
case NODE_IDENTIFIER:
struct symbol_def *def = get_def(node->expr.string.start);
n = def->node;
if (n && def->is_lvalue) {
ir_node *addr_node = n;
n = calloc(1, sizeof(ir_node));
n->code = OC_LOAD;
arrput(n->out, current_memory);
arrput(n->out, addr_node);
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe);
ir_node *tmp = hmget(global_hash, *n);
if (tmp) {
free(n);
n = tmp;
} else {
hmput(global_hash, *n, n);
}
}
break;
case NODE_IF:
n = build_if(node);
break;
case NODE_RETURN:
build_return(node);
break;
default:
break;
}
if (n) hmput(global_hash, *n, n);
return n;
}
void ir_build(ast_node *ast)
{
ast_node *current = ast;
graph = calloc(1, sizeof(ir_node));
graph->code = OC_START;
graph->id = stbds_hash_bytes(graph, sizeof(ir_node), 0xcafebabe);
graph->data.start_name = "program";
current_memory = calloc(1, sizeof(ir_node));
current_memory->code = OC_FRAME_PTR;
current_memory->id = stbds_hash_bytes(current_memory, sizeof(ir_node), 0xcafebabe);
current_scope = calloc(1, sizeof(ir_node));
current_scope->code = OC_SCOPE;
push_scope();
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) {
ir_node *expr = build_function(current->expr.unit_node.expr);
arrput(graph->out, expr);
hmput(global_hash, *expr, expr);
}
current = current->expr.unit_node.next;
}
printf("digraph G {\n");
print_graph(graph);
printf("}\n");
}

65
ir.h
View file

@ -1,65 +0,0 @@
#ifndef IR_H
#define IR_H
#include "utils.h"
#include "parser.h"
struct _ir_node;
struct symbol_def {
struct _ir_node *node;
bool is_lvalue;
};
typedef struct { char *key; struct symbol_def *value; } symbol_table;
typedef enum {
OC_START,
OC_ADD,
OC_SUB,
OC_MUL,
OC_DIV,
OC_MOD,
OC_BAND,
OC_BOR,
OC_BXOR,
OC_NEG,
OC_EQ,
OC_CONST_INT,
OC_CONST_FLOAT,
OC_VOID,
OC_FRAME_PTR,
OC_ADDR,
OC_STORE,
OC_LOAD,
OC_REGION,
OC_PHI,
OC_IF,
OC_PROJ,
OC_STOP,
OC_RETURN,
OC_SCOPE,
} opcode;
typedef struct _ir_node {
opcode code;
usize id;
struct _ir_node **in;
struct _ir_node **out;
union {
i64 const_int;
f64 const_float;
symbol_table **symbol_tables;
char *start_name;
} data;
} ir_node;
void ir_build(ast_node *ast);
#endif

241
lc.c
View file

@ -1,241 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include "utils.h"
#include "lexer.h"
#include "parser.h"
#include "sema.h"
#include "ir.h"
void print_indent(int depth) {
for (int i = 0; i < depth; i++) printf(" ");
}
const char* get_op_str(binary_op op) {
switch(op) {
case OP_PLUS: return "+";
case OP_MINUS: return "-";
case OP_DIV: return "/";
case OP_MUL: return "*";
case OP_EQ: return "==";
case OP_ASSIGN: return "=";
case OP_ASSIGN_PTR: return "<-";
case OP_AND: return "&&";
case OP_OR: return "||";
case OP_NEQ: return "!=";
case OP_GT: return ">";
case OP_LT: return "<";
case OP_GE: return ">=";
case OP_LE: return "<=";
case OP_BOR: return "|";
case OP_BAND: return "&";
case OP_BXOR: return "^";
case OP_MOD: return "%";
case OP_PLUS_EQ: return "+=";
case OP_MINUS_EQ: return "-=";
case OP_DIV_EQ: return "/=";
case OP_MUL_EQ: return "*=";
default: return "?";
}
}
const char *get_uop_str(unary_op op) {
switch (op) {
case UOP_INCR: return "++";
case UOP_MINUS: return "-";
case UOP_DECR: return "--";
case UOP_DEREF: return "*";
case UOP_REF: return "&";
case UOP_NOT: return "!";
default: return "?";
}
}
void print_ast(ast_node *node, int depth) {
if (!node) return;
print_indent(depth);
switch (node->type) {
case NODE_INTEGER:
printf("Integer: %lu\n", node->expr.integer);
break;
case NODE_FLOAT:
printf("Float: %f\n", node->expr.flt);
break;
case NODE_CHAR:
printf("Char: '%c'\n", node->expr.ch);
break;
case NODE_STRING:
printf("String: \"%.*s\"\n", (int)node->expr.string.len, node->expr.string.start);
break;
case NODE_IDENTIFIER:
printf("Identifier: %.*s\n", (int)node->expr.string.len, node->expr.string.start);
break;
case NODE_CAST:
printf("Cast:\n");
print_ast(node->expr.cast.type, depth);
print_ast(node->expr.cast.value, depth + 1);
break;
case NODE_ACCESS:
printf("Access:\n");
print_ast(node->expr.access.expr, depth + 1);
print_ast(node->expr.access.member, depth + 1);
break;
case NODE_LABEL:
printf("Label: %.*s\n", (int)node->expr.label.name_len, node->expr.label.name);
break;
case NODE_GOTO:
printf("Goto: %.*s\n", (int)node->expr.label.name_len, node->expr.label.name);
break;
case NODE_BINARY:
printf("BinaryOp (%s)\n", get_op_str(node->expr.binary.operator));
print_ast(node->expr.binary.left, depth + 1);
print_ast(node->expr.binary.right, depth + 1);
break;
case NODE_ARRAY_SUBSCRIPT:
printf("Array subscript\n");
print_ast(node->expr.subscript.expr, depth + 1);
print_ast(node->expr.subscript.index, depth + 1);
break;
case NODE_UNARY:
printf("UnaryOp (%s)\n", get_uop_str(node->expr.unary.operator));
print_ast(node->expr.unary.right, depth + 1);
break;
case NODE_POSTFIX:
printf("Postfix (%s)\n", get_uop_str(node->expr.unary.operator));
print_ast(node->expr.unary.right, depth + 1);
break;
case NODE_BREAK:
printf("Break\n");
break;
case NODE_TERNARY:
printf("Ternary (? :)\n");
print_indent(depth + 1); printf("Condition:\n");
print_ast(node->expr.ternary.condition, depth + 2);
print_indent(depth + 1); printf("Then:\n");
print_ast(node->expr.ternary.then, depth + 2);
print_indent(depth + 1); printf("Else:\n");
print_ast(node->expr.ternary.otherwise, depth + 2);
break;
case NODE_UNIT:
printf("Unit\n");
ast_node *current = node;
while (current && current->type == NODE_UNIT) {
print_ast(current->expr.unit_node.expr, depth + 1);
current = current->expr.unit_node.next;
}
break;
case NODE_CALL:
printf("Call: %.*s\n", (int)node->expr.call.name_len, node->expr.call.name);
current = node->expr.call.parameters;
while (current && current->type == NODE_UNIT) {
print_ast(current->expr.unit_node.expr, depth + 1);
current = current->expr.unit_node.next;
}
break;
case NODE_STRUCT_INIT:
printf("Struct init:\n");
current = node->expr.struct_init.members;
while (current && current->type == NODE_UNIT) {
print_ast(current->expr.unit_node.expr, depth + 1);
current = current->expr.unit_node.next;
}
break;
case NODE_STRUCT:
printf("Struct: %.*s\n", (int)node->expr.structure.name_len, node->expr.structure.name);
member *m = node->expr.structure.members;
while (m) {
print_ast(m->type, depth + 1);
m = m->next;
}
break;
case NODE_UNION:
printf("Union: %.*s\n", (int)node->expr.structure.name_len, node->expr.structure.name);
m = node->expr.structure.members;
while (m) {
print_ast(m->type, depth + 1);
m = m->next;
}
break;
case NODE_ENUM:
printf("Enum: %.*s\n", (int)node->expr.enm.name_len, node->expr.enm.name);
variant *v = node->expr.enm.variants;
while (v) {
printf("\t%.*s\n", (int)v->name_len, v->name);
v = v->next;
}
break;
case NODE_IF:
printf("If:\n");
print_ast(node->expr.whle.condition, depth + 1);
print_ast(node->expr.whle.body, depth + 1);
break;
case NODE_VAR_DECL:
printf("VarDecl: ");
print_ast(node->expr.var_decl.type, 0);
print_ast(node->expr.var_decl.value, depth + 1);
break;
case NODE_FUNCTION:
printf("Function: %.*s\n", (int)node->expr.function.name_len, node->expr.function.name);
m = node->expr.function.parameters;
while (m) {
print_ast(m->type, depth + 1);
m = m->next;
}
print_ast(node->expr.function.body, depth + 1);
break;
case NODE_RETURN:
printf("Return:\n");
print_ast(node->expr.ret.value, depth + 1);
break;
case NODE_IMPORT:
printf("Import:\n");
print_ast(node->expr.import.path, depth + 1);
break;
case NODE_WHILE:
printf("While:\n");
print_ast(node->expr.whle.condition, depth + 1);
print_ast(node->expr.whle.body, depth + 1);
break;
case NODE_FOR:
printf("For:\n");
print_ast(node->expr.fr.slices, depth + 1);
print_ast(node->expr.fr.captures, depth + 1);
print_indent(depth + 1);
print_ast(node->expr.fr.body, depth + 1);
break;
case NODE_RANGE:
printf("Range:\n");
print_ast(node->expr.binary.left, depth + 1);
print_ast(node->expr.binary.right, depth + 1);
break;
default:
printf("Unknown Node Type: %d\n", node->type);
break;
}
}
int main(void)
{
FILE *fp = fopen("test.l", "r");
usize size = 0;
fseek(fp, 0, SEEK_END);
size = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *src = malloc(size+1);
fread(src, size, 1, fp);
fclose(fp);
src[size] = '\0';
arena a = arena_init(0x1000 * 0x1000 * 64);
lexer *l = lexer_init(src, size, &a);
parser *p = parser_init(l, &a);
//print_ast(p->ast, 0);
sema_init(p, &a);
ir_build(p->ast);
arena_deinit(a);
return 0;
}

422
lexer.c
View file

@ -1,422 +0,0 @@
#include "lexer.h"
#include <stdbool.h>
#include <ctype.h>
#include <stdio.h>
#include <string.h>
trie_node *keywords;
static void add_token(lexer *l, token_type type, usize len)
{
token *t = arena_alloc(l->allocator, sizeof(token));
t->type = type;
t->lexeme_len = len;
t->lexeme = l->source + l->index;
t->position.row = l->row;
t->position.column = l->column;
if (!l->tokens) {
l->tokens = t;
l->tail = t;
} else {
l->tail->next = t;
l->tail = t;
}
}
static void add_error(lexer *l, char *msg)
{
token *t = arena_alloc(l->allocator, sizeof(token));
t->type = TOKEN_ERROR;
t->lexeme_len = strlen(msg);
t->lexeme = msg;
t->position.row = l->row;
t->position.column = l->column;
if (!l->tokens) {
l->tokens = t;
l->tail = t;
} else {
l->tail->next = t;
l->tail = t;
}
}
static void parse_number(lexer *l)
{
char c = l->source[l->index];
/* Is the number a float? */
bool f = false;
usize len = 0;
while (isdigit(c)) {
/* If a dot is found, and the character after it is a digit, this is a float. */
if (l->source[l->index+1] == '.' && isdigit(l->source[l->index+2])) {
f = true;
len += 3;
l->index += 3;
} else {
len += 1;
l->index += 1;
}
c = l->source[l->index];
}
l->index -= len;
if (f) {
add_token(l, TOKEN_FLOAT, len);
} else {
add_token(l, TOKEN_INTEGER, len);
}
l->index += len;
}
static void parse_identifier(lexer *l)
{
char c = l->source[l->index];
usize len = 0;
while (isalnum(c) || c == '_') {
len += 1;
l->index += 1;
c = l->source[l->index];
}
l->index -= len;
token_type keyword = trie_get(keywords, l->source + l->index, len);
if (keyword) {
add_token(l, keyword, len);
} else {
add_token(l, TOKEN_IDENTIFIER, len);
}
l->index += len;
}
static void parse_string(lexer *l)
{
char c = l->source[l->index];
usize len = 0;
while (c != '"') {
if (c == '\0' || c == '\n') {
l->index -= len;
add_error(l, "unclosed string literal.");
l->index += len;
return;
}
len += 1;
l->index += 1;
c = l->source[l->index];
}
l->index -= len;
add_token(l, TOKEN_STRING, len);
l->index += len + 1;
}
static bool parse_special(lexer *l)
{
switch (l->source[l->index]) {
case '+':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_PLUS_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '+') {
add_token(l, TOKEN_PLUS_PLUS, 2);
l->index += 2;
} else {
add_token(l, TOKEN_PLUS, 1);
l->index += 1;
}
return true;
case '-':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_MINUS_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '-') {
add_token(l, TOKEN_MINUS_MINUS, 2);
l->index += 2;
} else {
add_token(l, TOKEN_MINUS, 1);
l->index += 1;
}
return true;
case '/':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_SLASH_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_SLASH, 1);
l->index += 1;
}
return true;
case '*':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_STAR_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_STAR, 1);
l->index += 1;
}
return true;
case '%':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_PERC_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_PERC, 1);
l->index += 1;
}
return true;
case '&':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_AND_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '&') {
add_token(l, TOKEN_DOUBLE_AND, 2);
l->index += 2;
} else {
add_token(l, TOKEN_AND, 1);
l->index += 1;
}
return true;
case '^':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_HAT_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_HAT, 1);
l->index += 1;
}
return true;
case '|':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_PIPE_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '|') {
add_token(l, TOKEN_OR, 2);
l->index += 2;
} else {
add_token(l, TOKEN_PIPE, 1);
l->index += 1;
}
return true;
case '=':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_DOUBLE_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_EQ, 1);
l->index += 1;
}
return true;
case '>':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_GREATER_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '>') {
if (l->source[l->index+2] == '=') {
add_token(l, TOKEN_RSHIFT_EQ, 3);
l->index += 3;
return true;
}
add_token(l, TOKEN_RSHIFT, 2);
l->index += 2;
} else {
add_token(l, TOKEN_GREATER_THAN, 1);
l->index += 1;
}
return true;
case '<':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_LESS_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '-') {
add_token(l, TOKEN_ARROW, 2);
l->index += 2;
} else if (l->source[l->index+1] == '<') {
if (l->source[l->index+2] == '=') {
add_token(l, TOKEN_LSHIFT_EQ, 3);
l->index += 3;
return true;
}
add_token(l, TOKEN_LSHIFT, 2);
l->index += 2;
} else {
add_token(l, TOKEN_LESS_THAN, 1);
l->index += 1;
}
return true;
case '!':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_NOT_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_BANG, 1);
l->index += 1;
}
return true;
case ':':
add_token(l, TOKEN_COLON, 1);
l->index += 1;
return true;
case ';':
add_token(l, TOKEN_SEMICOLON, 1);
l->index += 1;
return true;
case '.':
if (l->source[l->index+1] == '.') {
add_token(l, TOKEN_DOUBLE_DOT, 2);
l->index += 2;
} else {
add_token(l, TOKEN_DOT, 1);
l->index += 1;
}
return true;
case ',':
add_token(l, TOKEN_COMMA, 1);
l->index += 1;
return true;
case '(':
add_token(l, TOKEN_LPAREN, 1);
l->index += 1;
return true;
case ')':
add_token(l, TOKEN_RPAREN, 1);
l->index += 1;
return true;
case '[':
add_token(l, TOKEN_LSQUARE, 1);
l->index += 1;
return true;
case ']':
add_token(l, TOKEN_RSQUARE, 1);
l->index += 1;
return true;
case '{':
add_token(l, TOKEN_LCURLY, 1);
l->index += 1;
return true;
case '}':
add_token(l, TOKEN_RCURLY, 1);
l->index += 1;
return true;
case '\'':
if (l->source[l->index+1] == '\\') {
if (l->source[l->index+3] != '\'') {
add_error(l, "unclosed character literal.");
l->index += 1;
return true;
}
l->index += 1;
add_token(l, TOKEN_CHAR, 2);
l->index += 3;
return true;
} else {
if (l->source[l->index+2] != '\'') {
add_error(l, "unclosed character literal.");
l->index += 1;
return true;
}
l->index += 1;
add_token(l, TOKEN_CHAR, 1);
l->index += 2;
return true;
}
default:
return false;
}
}
static void parse(lexer *l)
{
char c;
while (l->index <= l->size) {
c = l->source[l->index];
l->column += 1;
if (c == '\n') {
l->index += 1;
l->row += 1;
l->column = 0;
continue;
}
usize head = l->index;
if (c == '/' && l->source[l->index+1] == '/') {
while (l->source[l->index] != '\n') {
l->index += 1;
}
l->column += (l->index - head - 1);
}
if (isspace(c)) {
l->index += 1;
continue;
}
if (parse_special(l)) {
l->column += (l->index - head - 1);
continue;
}
if (isdigit(c)) {
parse_number(l);
l->column += (l->index - head - 1);
continue;
}
if (isalpha(c)) {
parse_identifier(l);
l->column += (l->index - head - 1);
continue;
}
if (c == '"') {
l->index += 1;
parse_string(l);
l->column += (l->index - head - 1);
continue;
}
l->index += 1;
}
}
lexer *lexer_init(char *source, usize size, arena *arena)
{
lexer *lex = arena_alloc(arena, sizeof(lexer));
lex->column = 1;
lex->row = 1;
lex->index = 0;
lex->size = size;
lex->tokens = 0;
lex->tail = 0;
lex->allocator = arena;
lex->source = source;
keywords = arena_alloc(arena, sizeof(trie_node));
trie_insert(keywords, lex->allocator, "true", TOKEN_TRUE);
trie_insert(keywords, lex->allocator, "false", TOKEN_FALSE);
trie_insert(keywords, lex->allocator, "struct", TOKEN_STRUCT);
trie_insert(keywords, lex->allocator, "enum", TOKEN_ENUM);
trie_insert(keywords, lex->allocator, "union", TOKEN_UNION);
trie_insert(keywords, lex->allocator, "loop", TOKEN_LOOP);
trie_insert(keywords, lex->allocator, "while", TOKEN_WHILE);
trie_insert(keywords, lex->allocator, "until", TOKEN_UNTIL);
trie_insert(keywords, lex->allocator, "goto", TOKEN_GOTO);
trie_insert(keywords, lex->allocator, "if", TOKEN_IF);
trie_insert(keywords, lex->allocator, "else", TOKEN_ELSE);
trie_insert(keywords, lex->allocator, "switch", TOKEN_SWITCH);
trie_insert(keywords, lex->allocator, "break", TOKEN_BREAK);
trie_insert(keywords, lex->allocator, "defer", TOKEN_DEFER);
trie_insert(keywords, lex->allocator, "return", TOKEN_RETURN);
trie_insert(keywords, lex->allocator, "import", TOKEN_IMPORT);
trie_insert(keywords, lex->allocator, "const", TOKEN_CONST);
trie_insert(keywords, lex->allocator, "extern", TOKEN_EXTERN);
trie_insert(keywords, lex->allocator, "volatile", TOKEN_VOLATILE);
parse(lex);
return lex;
}

97
lexer.h
View file

@ -1,97 +0,0 @@
#ifndef LEXER_H
#define LEXER_H
#include "utils.h"
typedef enum {
TOKEN_ERROR,
TOKEN_END,
TOKEN_PLUS, // +
TOKEN_PLUS_PLUS, // ++
TOKEN_MINUS, // -
TOKEN_MINUS_MINUS, // --
TOKEN_SLASH, // /
TOKEN_PERC, // %
TOKEN_STAR, // *
TOKEN_AND, // &
TOKEN_HAT, // ^
TOKEN_PIPE, // |
TOKEN_LSHIFT, // <<
TOKEN_RSHIFT, // >>
TOKEN_DOUBLE_EQ, // ==
TOKEN_ARROW, // <-
TOKEN_EQ, // =
TOKEN_LESS_THAN, // <
TOKEN_GREATER_THAN, // >
TOKEN_LESS_EQ, // <=
TOKEN_GREATER_EQ, // >=
TOKEN_NOT_EQ, // !=
TOKEN_PLUS_EQ, // +=
TOKEN_MINUS_EQ, // -=
TOKEN_STAR_EQ, // *=
TOKEN_SLASH_EQ, // /=
TOKEN_AND_EQ, // &=
TOKEN_HAT_EQ, // ^=
TOKEN_PIPE_EQ, // |=
TOKEN_PERC_EQ, // %=
TOKEN_LSHIFT_EQ, // <<=
TOKEN_RSHIFT_EQ, // >>=
TOKEN_OR, // ||
TOKEN_DOUBLE_AND, // &&
TOKEN_COLON, // :
TOKEN_SEMICOLON, // ;
TOKEN_DOT, // .
TOKEN_DOUBLE_DOT, // ..
TOKEN_BANG, // !
TOKEN_COMMA, // ,
TOKEN_LPAREN, // (
TOKEN_RPAREN, // )
TOKEN_LSQUARE, // [
TOKEN_RSQUARE, // ]
TOKEN_LCURLY, // {
TOKEN_RCURLY, // }
TOKEN_INTEGER,
TOKEN_FLOAT,
TOKEN_IDENTIFIER,
TOKEN_STRING,
TOKEN_CHAR,
TOKEN_TRUE,
TOKEN_FALSE,
TOKEN_GOTO,
TOKEN_LOOP,
TOKEN_WHILE,
TOKEN_UNTIL,
TOKEN_IF,
TOKEN_ELSE,
TOKEN_SWITCH,
TOKEN_BREAK,
TOKEN_DEFER,
TOKEN_RETURN,
TOKEN_IMPORT,
TOKEN_CONST,
TOKEN_EXTERN,
TOKEN_VOLATILE,
TOKEN_STRUCT,
TOKEN_ENUM,
TOKEN_UNION
} token_type;
typedef struct _token {
token_type type;
source_pos position;
char *lexeme;
usize lexeme_len;
struct _token *next;
} token;
typedef struct {
usize column, row, index, size;
char *source;
token *tokens;
token *tail;
arena *allocator;
} lexer;
lexer *lexer_init(char *source, usize size, arena *arena);
#endif

1346
parser.c

File diff suppressed because it is too large Load diff

255
parser.h
View file

@ -1,255 +0,0 @@
#ifndef PARSER_H
#define PARSER_H
#include "lexer.h"
#include "utils.h"
#include <stdbool.h>
struct _type;
struct _ast_node;
typedef enum {
OP_PLUS, // +
OP_MINUS, // -
OP_DIV, // /
OP_MUL, // *
OP_MOD, // %
OP_BOR, // |
OP_BAND, // &
OP_BXOR, // ^
OP_ASSIGN, // =
OP_ASSIGN_PTR, // <-
OP_RSHIFT_EQ, // >>=
OP_LSHIFT_EQ, // <<=
OP_PLUS_EQ, // +=
OP_MINUS_EQ, // -=
OP_DIV_EQ, // /=
OP_MUL_EQ, // *=
OP_BOR_EQ, // |=
OP_BAND_EQ, // &=
OP_BXOR_EQ, // ^=
OP_MOD_EQ, // %=
OP_EQ, // ==
OP_AND, // &&
OP_OR, // ||
OP_NEQ, // !=
OP_GT, // >
OP_LT, // <
OP_GE, // >=
OP_LE, // <=
} binary_op;
typedef enum {
UOP_INCR, // ++
UOP_MINUS, // -
UOP_DECR, // --
UOP_DEREF, // *
UOP_REF, // &
UOP_NOT, // !
} unary_op;
typedef enum {
LAYOUT_AUTO,
LAYOUT_PACKED,
LAYOUT_EXTERN
} struct_layout;
typedef struct _member {
struct _ast_node *type;
char *name;
usize name_len;
struct _member *next;
usize offset;
} member;
typedef struct {
char *name;
usize name_len;
member *params;
} function;
typedef struct _variant {
struct _ast_node *value;
char *name;
usize name_len;
struct _variant *next;
} variant;
typedef enum {
NODE_IDENTIFIER,
NODE_INTEGER,
NODE_FLOAT,
NODE_STRING,
NODE_CHAR,
NODE_BOOL,
NODE_CAST,
NODE_UNARY,
NODE_BINARY,
NODE_RANGE,
NODE_ARRAY_SUBSCRIPT,
NODE_POSTFIX,
NODE_CALL,
NODE_ACCESS,
NODE_STRUCT_INIT,
NODE_TERNARY, /* TODO */
NODE_BREAK,
NODE_RETURN,
NODE_IMPORT,
NODE_FOR,
NODE_WHILE,
NODE_IF,
NODE_VAR_DECL,
NODE_LABEL,
NODE_GOTO,
NODE_ENUM,
NODE_STRUCT,
NODE_UNION,
NODE_FUNCTION,
NODE_PTR_TYPE,
NODE_SWITCH, /* TODO */
NODE_UNIT,
} node_type;
#define PTR_SLICE 0x0
#define PTR_RAW 0x1
#define PTR_CONST 0x2
#define PTR_VOLATILE 0x4
#define LOOP_WHILE 0x1
#define LOOP_UNTIL 0x2
#define LOOP_AFTER 0x4
typedef struct _ast_node {
node_type type;
source_pos position;
struct _type *expr_type;
bool address_taken; // used in IR generation.
union {
struct {
struct _ast_node *type;
u8 flags;
} ptr_type;
struct {
char *name;
usize name_len;
} label; // both label and goto
struct {
struct _ast_node *left;
struct _ast_node *right;
binary_op operator;
} binary;
struct {
struct _ast_node *right;
unary_op operator;
} unary;
u8 boolean;
i64 integer;
f64 flt; // float
struct {
char *start;
usize len;
} string;
char ch; // char;
struct {
struct _ast_node *condition;
struct _ast_node *then;
struct _ast_node *otherwise;
} ternary;
struct {
struct _ast_node *value;
struct _ast_node *type;
} cast;
struct {
struct _ast_node *expr;
struct _ast_node *index;
} subscript;
struct {
struct _ast_node *expr;
struct _ast_node *member;
} access;
struct {
struct _ast_node *expr;
struct _ast_node *next;
} unit_node;
struct {
/* This should be a list of unit_node */
struct _ast_node *parameters;
usize param_len;
char *name;
usize name_len;
} call;
struct {
struct _ast_node *value;
} ret;
struct {
/* This should be an access. */
struct _ast_node *path;
} import;
struct {
/* These should be lists of unit_node */
struct _ast_node *slices;
usize slice_len;
struct _ast_node *captures;
usize capture_len;
struct _ast_node* body;
} fr; // for
struct {
struct _ast_node *condition;
struct _ast_node *body;
u8 flags;
} whle; // while
struct {
struct _ast_node *condition;
struct _ast_node *body;
struct _ast_node *otherwise;
u8 flags;
} if_stmt; // while
struct {
struct _ast_node **statements;
usize stmt_len;
} compound;
struct {
struct _ast_node *value;
char *name;
usize name_len;
struct _ast_node *type;
} var_decl;
struct {
member *members;
char *name;
usize name_len;
} structure;
struct {
member *parameters;
usize parameters_len;
char *name;
usize name_len;
struct _ast_node *type;
struct _ast_node *body;
} function;
struct {
variant *variants;
char *name;
usize name_len;
} enm; // enum
struct {
struct _ast_node *members;
usize members_len;
} struct_init;
} expr;
} ast_node;
typedef struct {
token *tokens;
token *previous;
ast_node *ast;
arena *allocator;
} parser;
parser *parser_init(lexer *l, arena *allocator);
#endif

View file

818
sema.c
View file

@ -1,818 +0,0 @@
#define STB_DS_IMPLEMENTATION
#include "sema.h"
#include <string.h>
#include <stdio.h>
typedef struct _res_node {
struct _res_node **in;
struct _res_node **out;
type *value;
} res_node;
typedef struct { res_node node; bool complete; } pair;
typedef struct { u8 flags; char *name; } type_key;
static struct { char *key; pair *value; } *types;
static struct { char *key; type *value; } *type_reg;
static struct { char *key; prototype *value; } *prototypes;
static scope *global_scope = NULL;
static scope *current_scope = NULL;
static type *current_return = NULL;
static type *const_int = NULL;
static type *const_float = NULL;
static bool in_loop = false;
static void error(ast_node *n, char *msg)
{
if (n) {
printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:%ld:%ld:\x1b[0m %s\n", n->position.row, n->position.column, msg);
} else {
printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:\x1b[0m %s\n", msg);
}
}
static char *intern_string(sema *s, char *str, usize len)
{
(void) s;
char *ptr = malloc(len + 1);
memcpy(ptr, str, len);
ptr[len] = '\0';
return ptr;
}
static type *create_integer(sema *s, char *name, u8 bits, bool sign)
{
type *t = arena_alloc(s->allocator, sizeof(type));
t->name = name;
t->tag = sign ? TYPE_INTEGER : TYPE_UINTEGER;
t->data.integer = bits;
pair *graph_node = arena_alloc(s->allocator, sizeof(pair));
graph_node->node.value = t;
graph_node->node.in = NULL;
graph_node->node.out = NULL;
shput(types, name, graph_node);
return t;
}
static type *create_float(sema *s, char *name, u8 bits)
{
type *t = arena_alloc(s->allocator, sizeof(type));
t->name = name;
t->tag = TYPE_FLOAT;
t->data.flt = bits;
pair *graph_node = arena_alloc(s->allocator, sizeof(pair));
graph_node->node.value = t;
graph_node->node.in = NULL;
graph_node->node.out = NULL;
shput(types, name, graph_node);
return t;
}
static void order_type(sema *s, ast_node *node)
{
if (node->type == NODE_STRUCT || node->type == NODE_UNION) {
type *t = arena_alloc(s->allocator, sizeof(type));
t->tag = node->type == NODE_STRUCT ? TYPE_STRUCT : TYPE_UNION;
t->data.structure.name = node->expr.structure.name;
t->data.structure.name_len = node->expr.structure.name_len;
t->data.structure.members = node->expr.structure.members;
char *k = intern_string(s, node->expr.structure.name, node->expr.structure.name_len);
t->name = k;
pair *graph_node = shget(types, k);
if (!graph_node) {
graph_node = arena_alloc(s->allocator, sizeof(pair));
graph_node->node.in = NULL;
graph_node->node.out = NULL;
} else if (graph_node->complete) {
error(node, "type already defined.");
return;
}
graph_node->node.value = t;
member *m = t->data.structure.members;
while (m) {
if (m->type->type != NODE_IDENTIFIER) {
m = m->next;
continue;
}
char *name = intern_string(s, m->type->expr.string.start, m->type->expr.string.len);
pair *p = shget(types, name);
if (!p) {
p = arena_alloc(s->allocator, sizeof(pair));
p->node.out = NULL;
p->node.in = NULL;
p->node.value = NULL;
p->complete = false;
shput(types, name, p);
}
arrput(graph_node->node.in, &p->node);
arrput(p->node.out, &graph_node->node);
m = m->next;
}
shput(types, k, graph_node);
graph_node->complete = true;
}
}
static type *get_type(sema *s, ast_node *n)
{
char *name = NULL;
type *t = NULL;
switch (n->type) {
case NODE_IDENTIFIER:
name = intern_string(s, n->expr.string.start, n->expr.string.len);
t = shget(type_reg, name);
free(name);
return t;
case NODE_PTR_TYPE:
t = malloc(sizeof(type));
t->size = sizeof(usize);
t->alignment = sizeof(usize);
if (n->expr.ptr_type.flags & PTR_RAW) {
t->name = "ptr";
t->tag = TYPE_PTR;
t->data.ptr.child = get_type(s, n->expr.ptr_type.type);
t->data.ptr.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0;
t->data.ptr.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0;
} else {
t->name = "slice";
t->tag = TYPE_SLICE;
t->data.slice.child = get_type(s, n->expr.ptr_type.type);
t->data.slice.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0;
t->data.slice.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0;
}
return t;
default:
error(n, "expected type.");
return NULL;
}
}
static void register_struct(sema *s, char *name, type *t)
{
usize alignment = 0;
member *m = t->data.structure.members;
usize offset = 0;
type *m_type = NULL;
while (m) {
m_type = get_type(s, m->type);
if (!m_type) {
error(m->type, "unknown type.");
return;
}
char *n = intern_string(s, m->name, m->name_len);
shput(t->data.structure.member_types, n, m_type);
if (m_type->size == 0) {
error(m->type, "a struct member can't be of type `void`.");
return;
}
if (alignment < m_type->alignment) {
alignment = m_type->alignment;
}
usize padding = (m_type->alignment - (offset % m_type->alignment)) % m_type->alignment;
offset += padding;
m->offset = offset;
offset += m_type->size;
m = m->next;
}
t->alignment = alignment;
if (t->alignment > 0) {
usize trailing_padding = (t->alignment - (offset % t->alignment)) % t->alignment;
offset += trailing_padding;
}
t->size = offset;
}
static void register_union(sema *s, char *name, type *t)
{
usize alignment = 0;
usize size = 0;
member *m = t->data.structure.members;
while (m) {
type *m_type = get_type(s, m->type);
if (!m_type) {
error(m->type, "unknown type.");
return;
}
char *n = intern_string(s, m->name, m->name_len);
shput(t->data.structure.member_types, n, m_type);
if (alignment < m_type->alignment) {
alignment = m_type->alignment;
}
if (size < m_type->size) {
size = m_type->size;
}
m = m->next;
}
t->alignment = alignment;
t->size = size;
}
static void register_type(sema *s, char *name, type *t)
{
switch (t->tag) {
case TYPE_INTEGER:
case TYPE_UINTEGER:
t->size = t->data.integer / 8;
t->alignment = t->data.integer / 8;
break;
case TYPE_PTR:
t->size = 8;
t->alignment = 8;
break;
case TYPE_FLOAT:
t->size = t->data.flt / 8;
t->alignment = t->data.flt / 8;
break;
case TYPE_STRUCT:
register_struct(s, name, t);
break;
case TYPE_UNION:
register_union(s, name, t);
break;
default:
error(NULL, "registering an invalid type.");
return;
}
shput(type_reg, name, t);
}
static void create_types(sema *s)
{
res_node **nodes = NULL;
res_node **ordered = NULL;
usize node_count = shlen(types);
for (int i=0; i < node_count; i++) {
if (arrlen(types[i].value->node.in) == 0) {
arrput(nodes, &types[i].value->node);
}
}
while (arrlen(nodes) > 0) {
res_node *n = nodes[0];
arrdel(nodes, 0);
arrput(ordered, n);
while (arrlen(n->out) > 0) {
res_node *dep = n->out[0];
arrdel(n->out, 0);
for (int j=0; j < arrlen(dep->in); j++) {
if (dep->in[j] == n) {
arrdel(dep->in, j);
}
}
if (arrlen(dep->in) == 0) {
arrput(nodes, dep);
}
}
}
if (arrlen(ordered) < node_count) {
error(NULL, "cycling struct definition.");
}
for (int i=0; i < arrlen(ordered); i++) {
type *t = ordered[i]->value;
if (t && (t->tag == TYPE_STRUCT || t->tag == TYPE_UNION)) {
char *name = t->name;
register_type(s, name, t);
}
}
}
static void create_prototype(sema *s, ast_node *node)
{
prototype *p = arena_alloc(s->allocator, sizeof(prototype));
p->name = intern_string(s, node->expr.function.name, node->expr.function.name_len);
node->expr.function.name = p->name;
if (shget(prototypes, p->name)) {
error(node, "function already defined.");
}
member *m = node->expr.function.parameters;
while (m) {
type *t = get_type(s, m->type);
if (!t) {
error(m->type, "unknown type.");
return;
}
arrput(p->parameters, t);
m = m->next;
}
p->type = get_type(s, node->expr.function.type);
shput(prototypes, p->name, p);
}
static void push_scope(sema *s)
{
scope *scp = arena_alloc(s->allocator, sizeof(scope));
scp->parent = current_scope;
current_scope = scp;
}
static void pop_scope(sema *s)
{
current_scope = current_scope->parent;
}
static ast_node *get_def(sema *s, char *name)
{
scope *current = current_scope;
while (current) {
ast_node *def = shget(current->defs, name);
if (def) return def;
current = current->parent;
}
return NULL;
}
static type *get_string_type(sema *s, ast_node *node)
{
type *string_type = arena_alloc(s->allocator, sizeof(type));
string_type->tag = TYPE_PTR;
string_type->size = sizeof(usize);
string_type->alignment = sizeof(usize);
string_type->name = "slice";
string_type->data.slice.child = shget(type_reg, "u8");
string_type->data.slice.is_const = true;
string_type->data.slice.is_volatile = false;
string_type->data.slice.len = node->expr.string.len;
return string_type;
}
static type *get_range_type(sema *s, ast_node *node)
{
type *range_type = arena_alloc(s->allocator, sizeof(type));
range_type->tag = TYPE_PTR;
range_type->size = sizeof(usize);
range_type->alignment = sizeof(usize);
range_type->name = "slice";
range_type->data.slice.child = shget(type_reg, "usize");
range_type->data.slice.is_const = true;
range_type->data.slice.is_volatile = false;
range_type->data.slice.len = node->expr.binary.right->expr.integer - node->expr.binary.left->expr.integer;
return range_type;
}
static type *get_expression_type(sema *s, ast_node *node);
static type *get_access_type(sema *s, ast_node *node)
{
type *t = get_expression_type(s, node->expr.access.expr);
ast_node *member = node->expr.access.member;
char *name_start = member->expr.string.start;
usize name_len = member->expr.string.len;
if (!t || (t->tag != TYPE_STRUCT && t->tag != TYPE_UNION)) {
error(node, "invalid expression.");
return NULL;
}
char *name = intern_string(s, name_start, name_len);
type *res = shget(t->data.structure.member_types, name);
if (!res) {
error(node, "struct doesn't have that member");
return NULL;
}
return res;
}
static type *get_identifier_type(sema *s, ast_node *node)
{
char *name_start = node->expr.string.start;
usize name_len = node->expr.string.len;
char *name = intern_string(s, name_start, name_len);
node->expr.string.start = name;
ast_node *def = get_def(s, name);
if (!def) {
error(node, "unknown identifier.");
}
return def->expr_type;
}
static bool match(type *t1, type *t2);
static bool can_cast(type *source, type *dest)
{
if (!dest || !source) return false;
switch (dest->tag) {
case TYPE_INTEGER:
case TYPE_UINTEGER:
return source->tag == TYPE_INTEGER_CONST;
case TYPE_FLOAT:
return source->tag == TYPE_FLOAT_CONST;
default:
return false;
}
}
static type *get_expression_type(sema *s, ast_node *node)
{
if (!node) {
return shget(type_reg, "void");
}
type *t = NULL;
prototype *prot = NULL;
switch (node->type) {
case NODE_IDENTIFIER:
t = get_identifier_type(s, node);
node->expr_type = t;
return t;
case NODE_INTEGER:
node->expr_type = const_int;
return const_int;
case NODE_FLOAT:
node->expr_type = const_float;
return const_float;
case NODE_STRING:
t = get_string_type(s, node);
node->expr_type = t;
return t;
case NODE_CHAR:
t = shget(type_reg, "u8");
node->expr_type = t;
return t;
case NODE_BOOL:
t = shget(type_reg, "bool");
node->expr_type = t;
return t;
case NODE_CAST:
t = get_type(s, node->expr.cast.type);
node->expr_type = t;
return t;
case NODE_POSTFIX:
case NODE_UNARY:
t = get_expression_type(s, node->expr.unary.right);
if (node->expr.unary.operator == UOP_REF) {
ast_node *target = node->expr.unary.right;
while (target->type == NODE_ACCESS) {
target = target->expr.access.expr;
}
if (target->type != NODE_IDENTIFIER) {
error(node, "expected identifier.");
return NULL;
}
char *name = target->expr.string.start;
ast_node *def = get_def(s, name);
if (def) {
def->address_taken = true;
target->address_taken = true;
}
type *tmp = t;
t = arena_alloc(s->allocator, sizeof(type));
t->tag = TYPE_PTR;
t->size = sizeof(usize);
t->alignment = sizeof(usize);
t->name = "ptr";
t->data.ptr.is_const = false;
t->data.ptr.is_volatile = false;
t->data.ptr.child = tmp;
} else if (node->expr.unary.operator == UOP_DEREF) {
if (t->tag != TYPE_PTR) {
error(node, "only pointers can be dereferenced.");
return NULL;
}
t = t->data.ptr.child;
}
node->expr_type = t;
return t;
case NODE_BINARY:
t = get_expression_type(s, node->expr.binary.left);
if (!t) return NULL;
if (node->expr.binary.operator == OP_ASSIGN_PTR) {
if (t->tag != TYPE_PTR) {
error(node, "expected pointer.");
return NULL;
}
t = t->data.ptr.child;
}
if (!can_cast(get_expression_type(s, node->expr.binary.right), t) && !match(t, get_expression_type(s, node->expr.binary.right))) {
error(node, "type mismatch.");
node->expr_type = NULL;
return NULL;
}
if (node->expr.binary.operator >= OP_EQ) {
t = shget(type_reg, "bool");
} else if (node->expr.binary.operator >= OP_ASSIGN && node->expr.binary.operator <= OP_MOD_EQ) {
t = shget(type_reg, "void");
}
node->expr_type = t;
return t;
case NODE_RANGE:
t = get_range_type(s, node);
node->expr_type = t;
return t;
case NODE_ARRAY_SUBSCRIPT:
t = get_expression_type(s, node->expr.subscript.expr);
switch (t->tag) {
case TYPE_SLICE:
t = t->data.slice.child;
break;
case TYPE_PTR:
t = t->data.ptr.child;
break;
default:
error(node, "only pointers and slices can be indexed.");
return NULL;
}
node->expr_type = t;
return t;
case NODE_CALL:
prot = shget(prototypes, intern_string(s, node->expr.call.name, node->expr.call.name_len));
if (!prot) {
error(node, "unknown function.");
return NULL;
}
t = prot->type;
node->expr_type = t;
return t;
case NODE_ACCESS:
t = get_access_type(s, node);
node->expr_type = t;
return t;
default:
t = shget(type_reg, "void");
node->expr_type = t;
return t;
}
}
static bool match(type *t1, type *t2)
{
if (!t1 || !t2) return false;
if (t1->tag != t2->tag) return false;
switch(t1->tag) {
case TYPE_VOID:
case TYPE_BOOL:
return true;
case TYPE_PTR:
return (t1->data.ptr.is_const == t2->data.ptr.is_const) && (t1->data.ptr.is_volatile == t2->data.ptr.is_volatile) && match(t1->data.ptr.child, t2->data.ptr.child);
case TYPE_SLICE:
return (t1->data.slice.is_const == t2->data.slice.is_const) && (t1->data.slice.is_volatile == t2->data.slice.is_volatile) && match(t1->data.slice.child, t2->data.slice.child) && t1->data.slice.len == t2->data.slice.len;
case TYPE_STRUCT:
case TYPE_UNION:
return t1 == t2;
case TYPE_INTEGER:
case TYPE_UINTEGER:
return t1->data.integer == t2->data.integer;
case TYPE_FLOAT:
return t1->data.flt == t2->data.flt;
case TYPE_ENUM:
case TYPE_GENERIC:
/* TODO */
return false;
case TYPE_INTEGER_CONST:
case TYPE_FLOAT_CONST:
return false;
}
return false;
}
static void check_statement(sema *s, ast_node *node);
static void check_body(sema *s, ast_node *node)
{
push_scope(s);
ast_node *current = node;
while (current && current->type == NODE_UNIT) {
check_statement(s, current->expr.unit_node.expr);
current = current->expr.unit_node.next;
}
pop_scope(s);
}
static void check_for(sema *s, ast_node *node)
{
ast_node *slices = node->expr.fr.slices;
ast_node *captures = node->expr.fr.captures;
push_scope(s);
ast_node *current_capture = captures;
ast_node *current_slice = slices;
while (current_capture) {
type *c_type = get_expression_type(s, current_slice->expr.unit_node.expr);
char *c_name = intern_string(s, current_capture->expr.unit_node.expr->expr.string.start, current_capture->expr.unit_node.expr->expr.string.len);
ast_node *cap_node = arena_alloc(s->allocator, sizeof(ast_node));
cap_node->type = NODE_VAR_DECL;
cap_node->expr_type = c_type;
cap_node->address_taken = false;
cap_node->expr.var_decl.name = c_name;
shput(current_scope->defs, c_name, cap_node);
current_capture = current_capture->expr.unit_node.next;
current_slice = current_slice->expr.unit_node.next;
}
ast_node *current = node->expr.fr.body;
in_loop = true;
while (current && current->type == NODE_UNIT) {
check_statement(s, current->expr.unit_node.expr);
current = current->expr.unit_node.next;
}
in_loop = false;
pop_scope(s);
}
static void check_statement(sema *s, ast_node *node)
{
if (!node) return;
type *t = NULL;
char *name = NULL;
switch(node->type) {
case NODE_RETURN:
if (!can_cast(get_expression_type(s, node->expr.ret.value), current_return) && !match(get_expression_type(s, node->expr.ret.value), current_return)) {
error(node, "return type doesn't match function's one.");
}
break;
case NODE_BREAK:
if (!in_loop) {
error(node, "`break` isn't in a loop.");
}
break;
case NODE_WHILE:
if (!match(get_expression_type(s, node->expr.whle.condition), shget(type_reg, "bool"))) {
error(node, "expected boolean value.");
return;
}
in_loop = true;
check_body(s, node->expr.whle.body);
in_loop = false;
break;
case NODE_IF:
if (!match(get_expression_type(s, node->expr.if_stmt.condition), shget(type_reg, "bool"))) {
error(node, "expected boolean value.");
return;
}
check_body(s, node->expr.if_stmt.body);
if (node->expr.if_stmt.otherwise) check_body(s, node->expr.if_stmt.otherwise);
break;
case NODE_FOR:
check_for(s, node);
break;
case NODE_VAR_DECL:
t = get_type(s, node->expr.var_decl.type);
node->expr_type = t;
name = intern_string(s, node->expr.var_decl.name, node->expr.var_decl.name_len);
node->expr.var_decl.name = name;
if (get_def(s, name)) {
error(node, "redeclaration of variable.");
break;
}
if (!can_cast(get_expression_type(s, node->expr.var_decl.value), t) && !match(t, get_expression_type(s, node->expr.var_decl.value))) {
error(node, "type mismatch.");
}
shput(current_scope->defs, name, node);
break;
default:
get_expression_type(s, node);
break;
}
}
static void check_function(sema *s, ast_node *f)
{
push_scope(s);
current_return = get_type(s, f->expr.function.type);
member *param = f->expr.function.parameters;
while (param) {
type *p_type = get_type(s, param->type);
char *t_name = intern_string(s, param->name, param->name_len);
param->name = t_name;
ast_node *param_node = arena_alloc(s->allocator, sizeof(ast_node));
param_node->type = NODE_VAR_DECL;
param_node->expr_type = p_type;
param_node->address_taken = false;
param_node->expr.var_decl.name = t_name;
shput(current_scope->defs, t_name, param_node);
param = param->next;
}
ast_node *current = f->expr.function.body;
while (current && current->type == NODE_UNIT) {
check_statement(s, current->expr.unit_node.expr);
current = current->expr.unit_node.next;
}
pop_scope(s);
}
static void analyze_unit(sema *s, ast_node *node)
{
ast_node *current = node;
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr)
order_type(s, current->expr.unit_node.expr);
current = current->expr.unit_node.next;
}
create_types(s);
current = node;
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) {
create_prototype(s, current->expr.unit_node.expr);
}
current = current->expr.unit_node.next;
}
current = node;
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) {
check_function(s, current->expr.unit_node.expr);
} else {
check_statement(s, current->expr.unit_node.expr);
}
current = current->expr.unit_node.next;
}
}
void sema_init(parser *p, arena *a)
{
sema *s = arena_alloc(a, sizeof(sema));
s->allocator = a;
types = NULL;
s->ast = p->ast;
global_scope = arena_alloc(a, sizeof(scope));
global_scope->parent = NULL;
global_scope->defs = NULL;
current_scope = global_scope;
register_type(s, "void", create_integer(s, "void", 0, false));
register_type(s, "bool", create_integer(s, "bool", 8, false));
register_type(s, "u8", create_integer(s, "u8", 8, false));
register_type(s, "u16", create_integer(s, "u16", 16, false));
register_type(s, "u32", create_integer(s, "u32", 32, false));
register_type(s, "u64", create_integer(s, "u64", 64, false));
register_type(s, "i8", create_integer(s, "i8", 8, true));
register_type(s, "i16", create_integer(s, "i16", 16, true));
register_type(s, "i32", create_integer(s, "i32", 32, true));
register_type(s, "i64", create_integer(s, "i64", 64, true));
register_type(s, "f32", create_float(s, "f32", 32));
register_type(s, "f64", create_float(s, "f64", 64));
const_int = arena_alloc(s->allocator, sizeof(type));
const_int->name = "const_int";
const_int->tag = TYPE_INTEGER_CONST;
const_int->data.integer = 0;
const_float = arena_alloc(s->allocator, sizeof(type));
const_float->name = "const_float";
const_float->tag = TYPE_FLOAT_CONST;
const_float->data.flt = 0;
analyze_unit(s, s->ast);
}

76
sema.h
View file

@ -1,76 +0,0 @@
#ifndef SEMA_H
#define SEMA_H
#include <stdbool.h>
#include "parser.h"
#include "stb_ds.h"
#include "utils.h"
typedef enum {
TYPE_VOID,
TYPE_BOOL,
TYPE_PTR,
TYPE_SLICE,
TYPE_FLOAT,
TYPE_FLOAT_CONST,
TYPE_INTEGER,
TYPE_INTEGER_CONST,
TYPE_UINTEGER,
TYPE_STRUCT,
TYPE_UNION,
TYPE_ENUM, /* TODO */
TYPE_GENERIC, /* TODO */
} type_tag;
typedef struct _type {
type_tag tag;
usize size;
usize alignment;
char *name;
union {
u8 integer;
u8 flt; // float
struct {
bool is_const;
bool is_volatile;
struct _type *child;
} ptr;
struct {
usize len;
bool is_const;
bool is_volatile;
struct _type *child;
} slice;
struct {
char *name;
usize name_len;
member *members;
struct { char *key; struct _type *value; } *member_types;
} structure;
struct {
char *name;
usize name_len;
variant *variants;
} enm; /* TODO */
} data;
} type;
typedef struct {
char *name;
type *type;
type **parameters;
} prototype;
typedef struct _scope {
struct _scope *parent;
struct { char *key; ast_node *value; } *defs;
} scope;
typedef struct {
arena *allocator;
ast_node *ast;
} sema;
void sema_init(parser *p, arena *a);
#endif

1895
stb_ds.h

File diff suppressed because it is too large Load diff

12
test.l
View file

@ -1,12 +0,0 @@
u32 main(u32 b)
{
u32 a = 4;
//return a;
if (b == 3) {
return 3;
} else {
return 4;
}
return a;
}

View file

@ -1,2 +0,0 @@
export TODO_DIR="."
export TODO_FILE="$TODO_DIR/todo.txt"

View file

@ -1 +0,0 @@
implement dominator tree for control flow

152
utils.c
View file

@ -1,152 +0,0 @@
#include "utils.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdbool.h>
i64 parse_int(char *s, usize len)
{
bool negative = false;
if (*s == '-') {
s += 1;
len -= 1;
negative = true;
}
u64 int_part = 0;
for (usize i=0; i < len; i++) {
int_part = (int_part * 10) + (s[i] - '0');
}
if (negative) {
int_part *= -1;
}
return int_part;
}
f64 parse_float(char *s, usize len)
{
bool negative = false;
if (*s == '-') {
s += 1;
len -= 1;
negative = true;
}
usize point_pos = 0;
for (usize i=0; i < len; i++) {
if (s[i] == '.') {
point_pos = i;
break;
}
}
i64 int_part = parse_int(s, point_pos);
i64 dec_part = parse_int(s+point_pos+1, len-point_pos-1);
for (usize i=0; i < len-point_pos-1; i++) {
int_part *= 10;
}
int_part += dec_part;
f64 f = (f64) int_part;
point_pos += 1;
for (usize i=0; i < len - point_pos; i++) {
f /= 10.0;
}
if (negative) {
f *= -1;
}
return f;
}
void trie_insert(trie_node *root, arena *a, char *key, uint16_t value)
{
trie_node *node = root;
while (*key) {
if (!node->children[(usize)*key]) {
node->children[(usize)*key] = arena_alloc(a, sizeof(trie_node));
memset(node->children[(usize)*key], 0x0, sizeof(trie_node));
}
node = node->children[(usize)*key];
key++;
}
node->value = value;
}
uint16_t trie_get(trie_node *root, char *key, usize len)
{
trie_node *node = root;
for (usize i=0; i < len; i++) {
if (!node->children[(usize)(key[i])]) {
return 0;
}
node = node->children[(usize)(key[i])];
}
return node->value;
}
#ifndef DEFAULT_ALIGNMENT
#define DEFAULT_ALIGNMENT (2 * sizeof(void *))
#endif
static usize align_forward(usize ptr, usize align) {
uintptr_t p = ptr;
uintptr_t a = (uintptr_t)align;
uintptr_t modulo = p & (a - 1);
if (modulo != 0) {
p += a - modulo;
}
return (usize)p;
}
arena arena_init(usize size)
{
void *memory = malloc(size);
memset(memory, 0x0, size);
return (arena){
.capacity = size,
.position = 0,
.memory = memory,
};
}
void *arena_alloc(arena *a, usize size) {
uintptr_t current_addr = (uintptr_t)a->memory + a->position;
uintptr_t padding = align_forward(current_addr, DEFAULT_ALIGNMENT) - current_addr;
if (a->position + padding + size > a->capacity) return NULL;
void *ret = (unsigned char *)a->memory + a->position + padding;
a->position += (size + padding);
return ret;
}
snapshot arena_snapshot(arena *a)
{
return a->position;
}
void arena_reset_to_snapshot(arena *a, snapshot s)
{
a->position = s;
}
void arena_reset(arena *a)
{
arena_reset_to_snapshot(a, 0);
}
void arena_deinit(arena a)
{
free(a.memory);
}

64
utils.h
View file

@ -1,64 +0,0 @@
#ifndef UTILS_H
#define UTILS_H
#include <stdint.h>
#include <stdint.h>
#include <stddef.h>
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int8_t i8;
typedef int16_t i16;
typedef int32_t i32;
typedef int64_t i64;
typedef size_t usize;
typedef float f32;
typedef double f64;
i64 parse_int(char *s, usize len);
f64 parse_float(char *s, usize len);
typedef struct {
usize capacity;
usize position;
void* memory;
} arena;
typedef usize snapshot;
/*
* NOTE(ernesto): faulty initialization is signalided by the arena.memory
* being null. It is the responsability of the caller to check for fulty
* initialization.
*/
arena arena_init(usize size);
/*
* Returns null on unsuccessfull allocation.
* In this implemention an allocation is only unsuccessfull if the arena
* does not have enough memory to allocate the requested space
*/
void *arena_alloc(arena *a, usize size);
snapshot arena_snapshot(arena *a);
void arena_reset_to_snapshot(arena *a, snapshot s);
void arena_reset(arena *a);
/* This call should never fail, also, do we even care if it does? */
void arena_deinit(arena a);
typedef struct _trie_node {
uint16_t value;
struct _trie_node *children[256];
} trie_node;
void trie_insert(trie_node *root, arena *a, char *key, uint16_t value);
uint16_t trie_get(trie_node *root, char *key, usize len);
typedef struct {
usize row, column;
} source_pos;
#endif