starting over
This commit is contained in:
parent
09d6cf4b46
commit
23126974b5
24 changed files with 0 additions and 6402 deletions
59
Makefile
59
Makefile
|
|
@ -1,59 +0,0 @@
|
|||
# cc - C compiler
|
||||
# See LICENSE file for copyright and license details.
|
||||
|
||||
include config.mk
|
||||
|
||||
SRC = lc.c utils.c lexer.c parser.c sema.c ir.c
|
||||
HDR = config.def.h utils.h lexer.h parser.h sema.h ir.h
|
||||
OBJ = ${SRC:.c=.o}
|
||||
|
||||
all: options lc
|
||||
|
||||
options:
|
||||
@echo lc build options:
|
||||
@echo "CFLAGS = ${CFLAGS}"
|
||||
@echo "LDFLAGS = ${LDFLAGS}"
|
||||
@echo "CC = ${CC}"
|
||||
|
||||
.c.o:
|
||||
${CC} -c ${CFLAGS} $<
|
||||
|
||||
${OBJ}: config.h config.mk
|
||||
|
||||
config.h:
|
||||
cp config.def.h $@
|
||||
|
||||
users.h:
|
||||
cp users.def.h $@
|
||||
|
||||
lc: ${OBJ}
|
||||
${CC} -o $@ ${OBJ} ${LDFLAGS}
|
||||
|
||||
clean:
|
||||
rm -f lc ${OBJ} lc-${VERSION}.tar.gz
|
||||
|
||||
dist: clean
|
||||
mkdir -p lc-${VERSION}
|
||||
cp -R LICENSE Makefile README config.mk\
|
||||
lc.1 ${HDR} ${SRC} lc-${VERSION}
|
||||
tar -cf lc-${VERSION}.tar lc-${VERSION}
|
||||
gzip lc-${VERSION}.tar
|
||||
rm -rf lc-${VERSION}
|
||||
|
||||
install: all
|
||||
mkdir -p ${DESTDIR}${PREFIX}/bin
|
||||
cp -f lc ${DESTDIR}${PREFIX}/bin
|
||||
chmod 755 ${DESTDIR}${PREFIX}/bin/lc
|
||||
mkdir -p ${DESTDIR}${MANPREFIX}/man1
|
||||
sed "s/VERSION/${VERSION}/g" < lc.1 > ${DESTDIR}${MANPREFIX}/man1/lc.1
|
||||
chmod 644 ${DESTDIR}${MANPREFIX}/man1/lc.1
|
||||
|
||||
uninstall:
|
||||
rm -f ${DESTDIR}${PREFIX}/bin/lc\
|
||||
${DESTDIR}${MANPREFIX}/man1/lc.1
|
||||
graph: clean all
|
||||
./lc > graph.dot
|
||||
dot -Tpdf graph.dot > graph.pdf
|
||||
zathura ./graph.pdf
|
||||
|
||||
.PHONY: all options clean dist install uninstall
|
||||
24
README
24
README
|
|
@ -1,24 +0,0 @@
|
|||
lc - L compiler
|
||||
============================
|
||||
lc is a L compiler. It can compile L code.
|
||||
|
||||
|
||||
Requirements
|
||||
------------
|
||||
In order to build lc you need... a computer
|
||||
|
||||
|
||||
Installation
|
||||
------------
|
||||
Edit config.mk to match your local setup (lc is installed into
|
||||
the /usr/local namespace by default).
|
||||
|
||||
Afterwards enter the following command to build and install lc (if
|
||||
necessary as root):
|
||||
|
||||
make clean install
|
||||
|
||||
|
||||
Usage
|
||||
-----------
|
||||
lc file
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
#ifndef CONFIG_H
|
||||
#define CONFIG_H
|
||||
|
||||
#endif
|
||||
4
config.h
4
config.h
|
|
@ -1,4 +0,0 @@
|
|||
#ifndef CONFIG_H
|
||||
#define CONFIG_H
|
||||
|
||||
#endif
|
||||
27
config.mk
27
config.mk
|
|
@ -1,27 +0,0 @@
|
|||
# cc version
|
||||
VERSION = 0.1
|
||||
|
||||
# Customize below to fit your system
|
||||
|
||||
# paths
|
||||
PREFIX = /usr
|
||||
MANPREFIX = ${PREFIX}/share/man
|
||||
|
||||
# OpenBSD (uncomment)
|
||||
#MANPREFIX = ${PREFIX}/man
|
||||
|
||||
# includes and libs
|
||||
INCS = -I.
|
||||
LIBS =
|
||||
# flags
|
||||
CPPFLAGS = -DVERSION=\"${VERSION}\"
|
||||
CFLAGS := -std=c23 -pedantic -Wall -O0 ${INCS} ${CPPFLAGS}
|
||||
CFLAGS := ${CFLAGS} -g
|
||||
LDFLAGS = ${LIBS}
|
||||
|
||||
# Solaris
|
||||
#CFLAGS = -fast ${INCS} -DVERSION=\"${VERSION}\"
|
||||
#LDFLAGS = ${LIBS}
|
||||
|
||||
# compiler and linker
|
||||
CC = cc
|
||||
0
done.txt
0
done.txt
|
|
@ -1,16 +0,0 @@
|
|||
import std;
|
||||
|
||||
i32 main()
|
||||
{
|
||||
u32 x = 4;
|
||||
loop {
|
||||
u32 b = 3;
|
||||
}
|
||||
x == 3;
|
||||
|
||||
loop (0.., test) |k, i| {
|
||||
|
||||
}
|
||||
u32 b = 3;
|
||||
|
||||
}
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
struct b {
|
||||
i32 a,
|
||||
u32 b,
|
||||
u32 c,
|
||||
}
|
||||
|
||||
u32 test()
|
||||
{
|
||||
f32 a = 5.0;
|
||||
}
|
||||
812
ir.c
812
ir.c
|
|
@ -1,812 +0,0 @@
|
|||
#include "ir.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "stb_ds.h"
|
||||
#include "sema.h"
|
||||
|
||||
struct { ir_node key; ir_node *value; } *global_hash = NULL;
|
||||
static ir_node *graph;
|
||||
static ir_node *current_memory;
|
||||
static ir_node *current_control;
|
||||
static usize current_stack = 0;
|
||||
|
||||
static ir_node *current_scope = NULL;
|
||||
|
||||
static ir_node *build_expression(ast_node *node);
|
||||
|
||||
static struct {
|
||||
ir_node **return_controls;
|
||||
ir_node **return_memories;
|
||||
ir_node **return_values;
|
||||
} current_func = {0};
|
||||
|
||||
static void node_name(ir_node *node)
|
||||
{
|
||||
if (!node) {
|
||||
printf("null [label=\"NULL\", style=filled, fillcolor=red]\n");
|
||||
return;
|
||||
}
|
||||
printf("%ld ", node->id);
|
||||
switch (node->code) {
|
||||
case OC_START:
|
||||
printf("[label=\"%s\", style=filled, color=orange]\n", node->data.start_name);
|
||||
break;
|
||||
case OC_RETURN:
|
||||
printf("[label=\"return\", style=filled, color=orange]\n");
|
||||
break;
|
||||
case OC_ADD:
|
||||
printf("[label=\"+\"]\n");
|
||||
break;
|
||||
case OC_NEG:
|
||||
case OC_SUB:
|
||||
printf("[label=\"-\"]\n");
|
||||
break;
|
||||
case OC_DIV:
|
||||
printf("[label=\"/\"]\n");
|
||||
break;
|
||||
case OC_MUL:
|
||||
printf("[label=\"*\"]\n");
|
||||
break;
|
||||
case OC_MOD:
|
||||
printf("[label=\"%%\"]\n");
|
||||
break;
|
||||
case OC_BAND:
|
||||
printf("[label=\"&\"]\n");
|
||||
break;
|
||||
case OC_BOR:
|
||||
printf("[label=\"|\"]\n");
|
||||
break;
|
||||
case OC_BXOR:
|
||||
printf("[label=\"^\"]\n");
|
||||
break;
|
||||
case OC_EQ:
|
||||
printf("[label=\"==\"]\n");
|
||||
break;
|
||||
case OC_CONST_INT:
|
||||
printf("[label=\"%ld\"]\n", node->data.const_int);
|
||||
break;
|
||||
case OC_CONST_FLOAT:
|
||||
printf("[label=\"%f\"]\n", node->data.const_float);
|
||||
break;
|
||||
case OC_FRAME_PTR:
|
||||
printf("[label=\"frame_ptr\"]\n");
|
||||
break;
|
||||
case OC_STORE:
|
||||
printf("[label=\"store\", shape=box]\n");
|
||||
break;
|
||||
case OC_LOAD:
|
||||
printf("[label=\"load\", shape=box]\n");
|
||||
break;
|
||||
case OC_ADDR:
|
||||
printf("[label=\"addr\"]\n");
|
||||
break;
|
||||
case OC_REGION:
|
||||
printf("[label=\"region\", shape=diamond, style=filled, color=green]\n");
|
||||
break;
|
||||
case OC_PHI:
|
||||
printf("[label=\"phi\", shape=triangle]\n");
|
||||
break;
|
||||
case OC_IF:
|
||||
printf("[label=\"if\", shape=diamond, style=filled, color=lightblue]\n");
|
||||
break;
|
||||
case OC_PROJ:
|
||||
printf("[label=\"proj\", shape=diamond, style=filled, color=cyan]\n");
|
||||
break;
|
||||
default:
|
||||
printf("[label=\"%d\"]\n", node->code);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void print_graph(ir_node *node)
|
||||
{
|
||||
for (int i = 0; i < hmlen(global_hash); i++) {
|
||||
ir_node *node = global_hash[i].value;
|
||||
node_name(node);
|
||||
|
||||
for (int j = 0; j < arrlen(node->out); j++) {
|
||||
if (node->out[j]) {
|
||||
node_name(node->out[j]);
|
||||
printf("%ld->%ld\n", node->out[j]->id, node->id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void push_scope(void)
|
||||
{
|
||||
arrput(current_scope->data.symbol_tables, NULL);
|
||||
}
|
||||
|
||||
static struct symbol_def *get_def(char *name)
|
||||
{
|
||||
for (int i = arrlen(current_scope->data.symbol_tables) - 1; i >= 0; i--) {
|
||||
struct symbol_def *def = shget(current_scope->data.symbol_tables[i], name);
|
||||
if (def) return def;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void set_def(char *name, ir_node *node, bool lvalue)
|
||||
{
|
||||
for (int i = arrlen(current_scope->data.symbol_tables) - 1; i >= 0; i--) {
|
||||
if (shget(current_scope->data.symbol_tables[i], name)) {
|
||||
struct symbol_def *def = calloc(1, sizeof(struct symbol_def));
|
||||
def->is_lvalue = lvalue;
|
||||
def->node = node;
|
||||
shput(current_scope->data.symbol_tables[i], name, def);
|
||||
return;
|
||||
}
|
||||
}
|
||||
int index = arrlen(current_scope->data.symbol_tables) - 1;
|
||||
struct symbol_def *def = calloc(1, sizeof(struct symbol_def));
|
||||
def->is_lvalue = lvalue;
|
||||
def->node = node;
|
||||
shput(current_scope->data.symbol_tables[index], name, def);
|
||||
}
|
||||
|
||||
static ir_node *copy_scope(ir_node *src)
|
||||
{
|
||||
ir_node *dst = calloc(1, sizeof(ir_node));
|
||||
dst->code = OC_SCOPE;
|
||||
|
||||
for (int i=0; i < arrlen(src->data.symbol_tables); i++) {
|
||||
arrput(dst->data.symbol_tables, NULL);
|
||||
symbol_table *src_table = src->data.symbol_tables[i];
|
||||
for (int j=0; j < shlen(src_table); j++) {
|
||||
shput(dst->data.symbol_tables[i], src_table[j].key, src_table[j].value);
|
||||
}
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
static void const_fold(ir_node *binary)
|
||||
{
|
||||
ir_node *left = binary->out[0];
|
||||
ir_node *right = binary->out[1];
|
||||
|
||||
if (left->code == OC_CONST_INT && right->code == OC_CONST_INT) {
|
||||
switch (binary->code) {
|
||||
case OC_ADD:
|
||||
binary->data.const_int = left->data.const_int + right->data.const_int;
|
||||
break;
|
||||
case OC_SUB:
|
||||
binary->data.const_int = left->data.const_int - right->data.const_int;
|
||||
break;
|
||||
case OC_MUL:
|
||||
binary->data.const_int = left->data.const_int * right->data.const_int;
|
||||
break;
|
||||
case OC_DIV:
|
||||
if (right->data.const_int != 0)
|
||||
binary->data.const_int = left->data.const_int / right->data.const_int;
|
||||
break;
|
||||
case OC_MOD:
|
||||
if (right->data.const_int != 0)
|
||||
binary->data.const_int = left->data.const_int % right->data.const_int;
|
||||
break;
|
||||
case OC_BOR:
|
||||
binary->data.const_int = left->data.const_int | right->data.const_int;
|
||||
break;
|
||||
case OC_BAND:
|
||||
binary->data.const_int = left->data.const_int & right->data.const_int;
|
||||
break;
|
||||
case OC_BXOR:
|
||||
binary->data.const_int = left->data.const_int ^ right->data.const_int;
|
||||
break;
|
||||
case OC_EQ:
|
||||
binary->data.const_int = left->data.const_int == right->data.const_int;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
binary->code = OC_CONST_INT;
|
||||
arrfree(binary->out); binary->out = NULL;
|
||||
arrfree(binary->in); binary->in = NULL;
|
||||
binary->id = stbds_hash_bytes(binary, sizeof(ir_node), 0xcafebabe);
|
||||
}
|
||||
|
||||
if (left->code == OC_CONST_FLOAT && right->code == OC_CONST_FLOAT) {
|
||||
switch (binary->code) {
|
||||
case OC_ADD:
|
||||
binary->data.const_float = left->data.const_float + right->data.const_float;
|
||||
break;
|
||||
case OC_SUB:
|
||||
binary->data.const_float = left->data.const_float - right->data.const_float;
|
||||
break;
|
||||
case OC_MUL:
|
||||
binary->data.const_float = left->data.const_float * right->data.const_float;
|
||||
break;
|
||||
case OC_DIV:
|
||||
if (right->data.const_float != 0.0f)
|
||||
binary->data.const_float = left->data.const_float / right->data.const_float;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
binary->code = OC_CONST_FLOAT;
|
||||
arrfree(binary->out); binary->out = NULL;
|
||||
arrfree(binary->in); binary->in = NULL;
|
||||
binary->id = stbds_hash_bytes(binary, sizeof(ir_node), 0xcafebabe);
|
||||
}
|
||||
}
|
||||
|
||||
static ir_node *build_address(usize base, usize offset) {
|
||||
ir_node *addr = calloc(1, sizeof(ir_node));
|
||||
addr->code = OC_ADDR;
|
||||
|
||||
ir_node *base_node = calloc(1, sizeof(ir_node));
|
||||
if (base == -1) {
|
||||
base_node->code = OC_FRAME_PTR;
|
||||
base_node->id = stbds_hash_bytes(base_node, sizeof(ir_node), 0xcafebabe);
|
||||
} else {
|
||||
base_node->code = OC_CONST_INT;
|
||||
base_node->data.const_int = base;
|
||||
base_node->id = stbds_hash_bytes(base_node, sizeof(ir_node), 0xcafebabe);
|
||||
}
|
||||
|
||||
ir_node *offset_node = calloc(1, sizeof(ir_node));
|
||||
offset_node->code = OC_CONST_INT;
|
||||
offset_node->data.const_int = offset;
|
||||
offset_node->id = stbds_hash_bytes(offset_node, sizeof(ir_node), 0xcafebabe);
|
||||
|
||||
arrput(addr->out, base_node);
|
||||
arrput(addr->out, offset_node);
|
||||
|
||||
addr->id = stbds_hash_bytes(addr, sizeof(ir_node), 0xcafebabe);
|
||||
ir_node *tmp = hmget(global_hash, *addr);
|
||||
if (tmp) {
|
||||
free(addr);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
static ir_node *build_assign_ptr(ast_node *binary)
|
||||
{
|
||||
ir_node *val_node = build_expression(binary->expr.binary.right);
|
||||
|
||||
char *var_name = binary->expr.binary.left->expr.string.start;
|
||||
|
||||
ir_node *existing_def = get_def(var_name)->node;
|
||||
|
||||
ir_node *store = calloc(1, sizeof(ir_node));
|
||||
store->code = OC_STORE;
|
||||
|
||||
arrput(store->out, current_control);
|
||||
|
||||
arrput(store->out, current_memory);
|
||||
arrput(store->out, existing_def);
|
||||
arrput(store->out, val_node);
|
||||
|
||||
store->id = stbds_hash_bytes(store, sizeof(ir_node), 0xcafebabe);
|
||||
hmput(global_hash, *store, store);
|
||||
|
||||
current_memory = store;
|
||||
|
||||
return val_node;
|
||||
}
|
||||
|
||||
static ir_node *build_assign(ast_node *binary)
|
||||
{
|
||||
ir_node *val_node = build_expression(binary->expr.binary.right);
|
||||
|
||||
char *var_name = binary->expr.binary.left->expr.string.start;
|
||||
|
||||
struct symbol_def *def = get_def(var_name);
|
||||
|
||||
if (def && def->is_lvalue) {
|
||||
ir_node *existing_def = def->node;
|
||||
ir_node *store = calloc(1, sizeof(ir_node));
|
||||
store->code = OC_STORE;
|
||||
|
||||
arrput(store->out, current_control);
|
||||
|
||||
arrput(store->out, current_memory);
|
||||
arrput(store->out, existing_def);
|
||||
arrput(store->out, val_node);
|
||||
|
||||
store->id = stbds_hash_bytes(store, sizeof(ir_node), 0xcafebabe);
|
||||
hmput(global_hash, *store, store);
|
||||
|
||||
current_memory = store;
|
||||
|
||||
return val_node;
|
||||
}
|
||||
|
||||
set_def(var_name, val_node, false);
|
||||
return val_node;
|
||||
}
|
||||
|
||||
static ir_node *build_binary(ast_node *node)
|
||||
{
|
||||
ir_node *n = calloc(1, sizeof(ir_node));
|
||||
switch (node->expr.binary.operator) {
|
||||
case OP_ASSIGN:
|
||||
free(n);
|
||||
return build_assign(node);
|
||||
case OP_ASSIGN_PTR:
|
||||
free(n);
|
||||
return build_assign_ptr(node);
|
||||
case OP_PLUS:
|
||||
n->code = OC_ADD;
|
||||
break;
|
||||
case OP_MINUS:
|
||||
n->code = OC_SUB;
|
||||
break;
|
||||
case OP_DIV:
|
||||
n->code = OC_DIV;
|
||||
break;
|
||||
case OP_MUL:
|
||||
n->code = OC_MUL;
|
||||
break;
|
||||
case OP_MOD:
|
||||
n->code = OC_MOD;
|
||||
break;
|
||||
case OP_BOR:
|
||||
n->code = OC_BOR;
|
||||
break;
|
||||
case OP_BAND:
|
||||
n->code = OC_BAND;
|
||||
break;
|
||||
case OP_BXOR:
|
||||
n->code = OC_BXOR;
|
||||
break;
|
||||
case OP_EQ:
|
||||
n->code = OC_EQ;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
arrput(n->out, build_expression(node->expr.binary.left));
|
||||
arrput(n->out, build_expression(node->expr.binary.right));
|
||||
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe);
|
||||
const_fold(n);
|
||||
ir_node *tmp = hmget(global_hash, *n);
|
||||
if (tmp) {
|
||||
free(n);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
static ir_node *build_load(ast_node *node)
|
||||
{
|
||||
ir_node *n = calloc(1, sizeof(ir_node));
|
||||
n->code = OC_LOAD;
|
||||
|
||||
arrput(n->out, current_memory);
|
||||
arrput(n->out, build_expression(node));
|
||||
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabebabecafe);
|
||||
|
||||
ir_node *tmp = hmget(global_hash, *n);
|
||||
if (tmp) {
|
||||
free(n);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
static ir_node *build_unary(ast_node *node)
|
||||
{
|
||||
ir_node *n = calloc(1, sizeof(ir_node));
|
||||
switch (node->expr.unary.operator) {
|
||||
case UOP_MINUS:
|
||||
n->code = OC_NEG;
|
||||
arrput(n->out, build_expression(node->expr.unary.right));
|
||||
break;
|
||||
case UOP_REF:
|
||||
free(n);
|
||||
|
||||
if (node->expr.unary.right->type == NODE_IDENTIFIER) {
|
||||
struct symbol_def *def = get_def(node->expr.unary.right->expr.string.start);
|
||||
if (def) {
|
||||
return def->node;
|
||||
}
|
||||
}
|
||||
|
||||
return build_expression(node->expr.unary.right);
|
||||
case UOP_DEREF:
|
||||
free(n);
|
||||
return build_load(node->expr.unary.right);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (n->out && n->out[0]->code == OC_CONST_INT) {
|
||||
switch (n->code) {
|
||||
case OC_NEG:
|
||||
n->data.const_int = -(n->out[0]->data.const_int);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
n->code = OC_CONST_INT;
|
||||
arrfree(n->out); n->out = NULL;
|
||||
} else if (n->out && n->out[0]->code == OC_CONST_FLOAT) {
|
||||
switch (n->code) {
|
||||
case OC_NEG:
|
||||
n->data.const_float = -(n->out[0]->data.const_float);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
n->code = OC_CONST_FLOAT;
|
||||
arrfree(n->out); n->out = NULL;
|
||||
}
|
||||
|
||||
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe);
|
||||
ir_node *tmp = hmget(global_hash, *n);
|
||||
if (tmp) {
|
||||
free(n);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
static ir_node *build_if(ast_node *node)
|
||||
{
|
||||
ir_node *condition = build_expression(node->expr.if_stmt.condition);
|
||||
|
||||
ir_node *if_node = calloc(1, sizeof(ir_node));
|
||||
if_node->code = OC_IF;
|
||||
arrput(if_node->out, condition);
|
||||
arrput(if_node->out, current_control);
|
||||
if_node->id = stbds_hash_bytes(if_node, sizeof(ir_node), 0xcafebabe);
|
||||
hmput(global_hash, *if_node, if_node);
|
||||
|
||||
ir_node *proj_true = calloc(1, sizeof(ir_node));
|
||||
proj_true->code = OC_PROJ;
|
||||
arrput(proj_true->out, if_node);
|
||||
proj_true->id = stbds_hash_bytes(proj_true, sizeof(ir_node), 0xcafebabe);
|
||||
hmput(global_hash, *proj_true, proj_true);
|
||||
|
||||
ir_node *proj_false = calloc(1, sizeof(ir_node));
|
||||
proj_false->code = OC_PROJ;
|
||||
arrput(proj_false->out, if_node);
|
||||
proj_false->id = stbds_hash_bytes(proj_false, sizeof(ir_node), 0xcafebabe);
|
||||
hmput(global_hash, *proj_false, proj_false);
|
||||
|
||||
ir_node *base_scope = copy_scope(current_scope);
|
||||
ir_node *base_mem = current_memory;
|
||||
|
||||
current_control = proj_true;
|
||||
|
||||
ast_node *current = node->expr.if_stmt.body;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
if (current->expr.unit_node.expr) {
|
||||
build_expression(current->expr.unit_node.expr);
|
||||
}
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
ir_node *then_scope = current_scope;
|
||||
ir_node *then_mem = current_memory;
|
||||
ir_node *then_control = current_control;
|
||||
|
||||
current_scope = copy_scope(base_scope);
|
||||
current_memory = base_mem;
|
||||
|
||||
current_control = proj_false;
|
||||
current = node->expr.if_stmt.otherwise;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
if (current->expr.unit_node.expr) {
|
||||
build_expression(current->expr.unit_node.expr);
|
||||
}
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
ir_node *else_scope = current_scope;
|
||||
ir_node *else_mem = current_memory;
|
||||
ir_node *else_control = current_control;
|
||||
|
||||
ir_node *region = calloc(1, sizeof(ir_node));
|
||||
region->code = OC_REGION;
|
||||
arrput(region->out, then_control);
|
||||
arrput(region->out, else_control);
|
||||
region->id = stbds_hash_bytes(region, sizeof(ir_node), 0xcafebabe);
|
||||
hmput(global_hash, *region, region);
|
||||
|
||||
if (then_mem->id != else_mem->id) {
|
||||
ir_node *phi = calloc(1, sizeof(ir_node));
|
||||
phi->code = OC_PHI;
|
||||
arrput(phi->out, region);
|
||||
arrput(phi->out, then_mem);
|
||||
arrput(phi->out, else_mem);
|
||||
phi->id = stbds_hash_bytes(phi, sizeof(ir_node), 0xcafebabe);
|
||||
|
||||
hmput(global_hash, *phi, phi);
|
||||
|
||||
current_memory = phi;
|
||||
} else {
|
||||
current_memory = then_mem;
|
||||
}
|
||||
|
||||
current_scope = base_scope;
|
||||
|
||||
for (int i = 0; i < arrlen(current_scope->data.symbol_tables); i++) {
|
||||
symbol_table *base_table = current_scope->data.symbol_tables[i];
|
||||
for (int j = 0; j < shlen(base_table); j++) {
|
||||
char *key = base_table[j].key;
|
||||
|
||||
ir_node *found_then = NULL;
|
||||
symbol_table *t_table = then_scope->data.symbol_tables[i];
|
||||
if (shget(t_table, key)->node) found_then = shget(t_table, key)->node;
|
||||
else found_then = base_table[j].value->node;
|
||||
|
||||
ir_node *found_else = NULL;
|
||||
symbol_table *e_table = else_scope->data.symbol_tables[i];
|
||||
if (shget(e_table, key)->node) found_else = shget(e_table, key)->node;
|
||||
else found_else = base_table[j].value->node;
|
||||
|
||||
if (found_then->id != found_else->id) {
|
||||
ir_node *phi = calloc(1, sizeof(ir_node));
|
||||
phi->code = OC_PHI;
|
||||
arrput(phi->out, region);
|
||||
arrput(phi->out, found_then);
|
||||
arrput(phi->out, found_else);
|
||||
phi->id = stbds_hash_bytes(phi, sizeof(ir_node), 0xcafebabe);
|
||||
struct symbol_def *def = calloc(1, sizeof(struct symbol_def));
|
||||
def->node = phi;
|
||||
def->is_lvalue = false;
|
||||
shput(current_scope->data.symbol_tables[i], key, def);
|
||||
hmput(global_hash, *phi, phi);
|
||||
} else {
|
||||
struct symbol_def *def = calloc(1, sizeof(struct symbol_def));
|
||||
def->node = found_then;
|
||||
def->is_lvalue = false;
|
||||
shput(current_scope->data.symbol_tables[i], key, def);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
current_control = region;
|
||||
|
||||
return region;
|
||||
}
|
||||
|
||||
static void build_return(ast_node *node)
|
||||
{
|
||||
ir_node *val = NULL;
|
||||
|
||||
if (node->expr.ret.value) {
|
||||
val = build_expression(node->expr.ret.value);
|
||||
} else {
|
||||
val = calloc(1, sizeof(ir_node));
|
||||
val->code = OC_VOID;
|
||||
val->id = stbds_hash_bytes(val, sizeof(ir_node), 0xcafebabe);
|
||||
}
|
||||
|
||||
arrput(current_func.return_controls, current_control);
|
||||
arrput(current_func.return_memories, current_memory);
|
||||
arrput(current_func.return_values, val);
|
||||
|
||||
current_control = NULL;
|
||||
}
|
||||
|
||||
static void finalize_function(void)
|
||||
{
|
||||
int count = arrlen(current_func.return_controls);
|
||||
|
||||
if (count == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
ir_node *final_ctrl = NULL;
|
||||
ir_node *final_mem = NULL;
|
||||
ir_node *final_val = NULL;
|
||||
|
||||
if (count == 1) {
|
||||
final_ctrl = current_func.return_controls[0];
|
||||
final_mem = current_func.return_memories[0];
|
||||
final_val = current_func.return_values[0];
|
||||
}
|
||||
else {
|
||||
ir_node *region = calloc(1, sizeof(ir_node));
|
||||
region->code = OC_REGION;
|
||||
for (int i=0; i<count; i++) {
|
||||
arrput(region->out, current_func.return_controls[i]);
|
||||
}
|
||||
hmput(global_hash, *region, region);
|
||||
final_ctrl = region;
|
||||
|
||||
ir_node *mem_phi = calloc(1, sizeof(ir_node));
|
||||
mem_phi->code = OC_PHI;
|
||||
arrput(mem_phi->out, region);
|
||||
for (int i=0; i<count; i++) {
|
||||
arrput(mem_phi->out, current_func.return_memories[i]);
|
||||
}
|
||||
hmput(global_hash, *mem_phi, mem_phi);
|
||||
mem_phi->id = stbds_hash_bytes(mem_phi, sizeof(ir_node), 0xcafebabe);
|
||||
final_mem = mem_phi;
|
||||
|
||||
ir_node *val_phi = calloc(1, sizeof(ir_node));
|
||||
val_phi->code = OC_PHI;
|
||||
//arrput(val_phi->out, region);
|
||||
for (int i=0; i<count; i++) {
|
||||
arrput(val_phi->out, current_func.return_values[i]);
|
||||
}
|
||||
val_phi->id = stbds_hash_bytes(val_phi, sizeof(ir_node), 0xcafebabe);
|
||||
hmput(global_hash, *val_phi, val_phi);
|
||||
final_val = val_phi;
|
||||
|
||||
region->id = stbds_hash_bytes(region, sizeof(ir_node), 0xcafebabe);
|
||||
}
|
||||
|
||||
ir_node *ret = calloc(1, sizeof(ir_node));
|
||||
ret->code = OC_RETURN;
|
||||
arrput(ret->out, final_ctrl);
|
||||
arrput(ret->out, final_mem);
|
||||
arrput(ret->out, final_val);
|
||||
ret->id = stbds_hash_bytes(ret, sizeof(ir_node), 0xcafebabe);
|
||||
|
||||
hmput(global_hash, *ret, ret);
|
||||
}
|
||||
|
||||
static ir_node *build_function(ast_node *node)
|
||||
{
|
||||
memset(¤t_func, 0x0, sizeof(current_func));
|
||||
ast_node *current = node->expr.function.body;
|
||||
|
||||
ir_node *func = calloc(1, sizeof(ir_node));
|
||||
func->code = OC_START;
|
||||
func->id = stbds_hash_bytes(func, sizeof(ir_node), 0xcafebabe);
|
||||
func->data.start_name = node->expr.function.name;
|
||||
|
||||
ir_node *start_ctrl = calloc(1, sizeof(ir_node));
|
||||
start_ctrl->code = OC_PROJ;
|
||||
start_ctrl->id = stbds_hash_bytes(&start_ctrl, sizeof(usize), 0xcafebabe);
|
||||
arrput(start_ctrl->out, func);
|
||||
hmput(global_hash, *start_ctrl, start_ctrl);
|
||||
|
||||
current_control = start_ctrl;
|
||||
|
||||
ir_node *start_mem = calloc(1, sizeof(ir_node));
|
||||
start_mem->code = OC_PROJ;
|
||||
start_mem->id = stbds_hash_bytes(&start_mem, sizeof(usize), 0xcafebabe);
|
||||
arrput(start_mem->out, func);
|
||||
hmput(global_hash, *start_mem, start_mem);
|
||||
|
||||
current_memory = start_mem;
|
||||
|
||||
current_scope = calloc(1, sizeof(ir_node));
|
||||
current_scope->code = OC_SCOPE;
|
||||
|
||||
push_scope();
|
||||
|
||||
member *m = node->expr.function.parameters;
|
||||
while (m) {
|
||||
ir_node *proj_param = calloc(1, sizeof(ir_node));
|
||||
proj_param->code = OC_PROJ;
|
||||
arrput(proj_param->out, func);
|
||||
proj_param->id = stbds_hash_bytes(proj_param, sizeof(ir_node), 0xcafebabe);
|
||||
set_def(m->name, proj_param, false);
|
||||
hmput(global_hash, *proj_param, proj_param);
|
||||
|
||||
m = m->next;
|
||||
}
|
||||
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
if (current->expr.unit_node.expr) {
|
||||
build_expression(current->expr.unit_node.expr);
|
||||
}
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
|
||||
func->id = stbds_hash_bytes(func, sizeof(ir_node), 0xcafebabe);
|
||||
|
||||
finalize_function();
|
||||
|
||||
return func;
|
||||
}
|
||||
|
||||
static ir_node *build_expression(ast_node *node)
|
||||
{
|
||||
ir_node *n = NULL;
|
||||
ir_node *tmp = NULL;
|
||||
switch (node->type) {
|
||||
case NODE_UNARY:
|
||||
n = build_unary(node);
|
||||
break;
|
||||
case NODE_BINARY:
|
||||
n = build_binary(node);
|
||||
break;
|
||||
case NODE_INTEGER:
|
||||
n = calloc(1, sizeof(ir_node));
|
||||
n->code = OC_CONST_INT;
|
||||
n->data.const_int = node->expr.integer;
|
||||
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe);
|
||||
tmp = hmget(global_hash, *n);
|
||||
if (tmp) {
|
||||
free(n);
|
||||
return tmp;
|
||||
}
|
||||
break;
|
||||
case NODE_VAR_DECL:
|
||||
n = calloc(1, sizeof(ir_node));
|
||||
if (node->address_taken) {
|
||||
n->code = OC_STORE;
|
||||
|
||||
arrput(n->out, current_memory);
|
||||
arrput(n->out, build_address(-1, current_stack));
|
||||
arrput(n->out, build_expression(node->expr.var_decl.value));
|
||||
current_memory = n;
|
||||
current_stack += node->expr_type->size;
|
||||
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe);
|
||||
hmput(global_hash, *n, n);
|
||||
n = n->out[1];
|
||||
set_def(node->expr.var_decl.name, n, true);
|
||||
} else {
|
||||
n = build_expression(node->expr.var_decl.value);
|
||||
set_def(node->expr.var_decl.name, n, false);
|
||||
}
|
||||
|
||||
return n;
|
||||
case NODE_IDENTIFIER:
|
||||
struct symbol_def *def = get_def(node->expr.string.start);
|
||||
n = def->node;
|
||||
|
||||
if (n && def->is_lvalue) {
|
||||
ir_node *addr_node = n;
|
||||
|
||||
n = calloc(1, sizeof(ir_node));
|
||||
n->code = OC_LOAD;
|
||||
|
||||
arrput(n->out, current_memory);
|
||||
arrput(n->out, addr_node);
|
||||
|
||||
n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe);
|
||||
|
||||
ir_node *tmp = hmget(global_hash, *n);
|
||||
if (tmp) {
|
||||
free(n);
|
||||
n = tmp;
|
||||
} else {
|
||||
hmput(global_hash, *n, n);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case NODE_IF:
|
||||
n = build_if(node);
|
||||
break;
|
||||
case NODE_RETURN:
|
||||
build_return(node);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (n) hmput(global_hash, *n, n);
|
||||
return n;
|
||||
}
|
||||
|
||||
void ir_build(ast_node *ast)
|
||||
{
|
||||
ast_node *current = ast;
|
||||
|
||||
graph = calloc(1, sizeof(ir_node));
|
||||
graph->code = OC_START;
|
||||
graph->id = stbds_hash_bytes(graph, sizeof(ir_node), 0xcafebabe);
|
||||
graph->data.start_name = "program";
|
||||
|
||||
current_memory = calloc(1, sizeof(ir_node));
|
||||
current_memory->code = OC_FRAME_PTR;
|
||||
current_memory->id = stbds_hash_bytes(current_memory, sizeof(ir_node), 0xcafebabe);
|
||||
|
||||
current_scope = calloc(1, sizeof(ir_node));
|
||||
current_scope->code = OC_SCOPE;
|
||||
push_scope();
|
||||
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) {
|
||||
ir_node *expr = build_function(current->expr.unit_node.expr);
|
||||
arrput(graph->out, expr);
|
||||
hmput(global_hash, *expr, expr);
|
||||
}
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
printf("digraph G {\n");
|
||||
print_graph(graph);
|
||||
printf("}\n");
|
||||
}
|
||||
65
ir.h
65
ir.h
|
|
@ -1,65 +0,0 @@
|
|||
#ifndef IR_H
|
||||
#define IR_H
|
||||
|
||||
#include "utils.h"
|
||||
#include "parser.h"
|
||||
|
||||
struct _ir_node;
|
||||
struct symbol_def {
|
||||
struct _ir_node *node;
|
||||
bool is_lvalue;
|
||||
};
|
||||
|
||||
typedef struct { char *key; struct symbol_def *value; } symbol_table;
|
||||
|
||||
typedef enum {
|
||||
OC_START,
|
||||
OC_ADD,
|
||||
OC_SUB,
|
||||
OC_MUL,
|
||||
OC_DIV,
|
||||
OC_MOD,
|
||||
OC_BAND,
|
||||
OC_BOR,
|
||||
OC_BXOR,
|
||||
OC_NEG,
|
||||
OC_EQ,
|
||||
|
||||
OC_CONST_INT,
|
||||
OC_CONST_FLOAT,
|
||||
OC_VOID,
|
||||
|
||||
OC_FRAME_PTR,
|
||||
OC_ADDR,
|
||||
|
||||
OC_STORE,
|
||||
OC_LOAD,
|
||||
|
||||
OC_REGION,
|
||||
OC_PHI,
|
||||
|
||||
OC_IF,
|
||||
OC_PROJ,
|
||||
|
||||
OC_STOP,
|
||||
OC_RETURN,
|
||||
|
||||
OC_SCOPE,
|
||||
} opcode;
|
||||
|
||||
typedef struct _ir_node {
|
||||
opcode code;
|
||||
usize id;
|
||||
struct _ir_node **in;
|
||||
struct _ir_node **out;
|
||||
union {
|
||||
i64 const_int;
|
||||
f64 const_float;
|
||||
symbol_table **symbol_tables;
|
||||
char *start_name;
|
||||
} data;
|
||||
} ir_node;
|
||||
|
||||
void ir_build(ast_node *ast);
|
||||
|
||||
#endif
|
||||
241
lc.c
241
lc.c
|
|
@ -1,241 +0,0 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "utils.h"
|
||||
#include "lexer.h"
|
||||
#include "parser.h"
|
||||
#include "sema.h"
|
||||
#include "ir.h"
|
||||
|
||||
void print_indent(int depth) {
|
||||
for (int i = 0; i < depth; i++) printf(" ");
|
||||
}
|
||||
|
||||
const char* get_op_str(binary_op op) {
|
||||
switch(op) {
|
||||
case OP_PLUS: return "+";
|
||||
case OP_MINUS: return "-";
|
||||
case OP_DIV: return "/";
|
||||
case OP_MUL: return "*";
|
||||
case OP_EQ: return "==";
|
||||
case OP_ASSIGN: return "=";
|
||||
case OP_ASSIGN_PTR: return "<-";
|
||||
case OP_AND: return "&&";
|
||||
case OP_OR: return "||";
|
||||
case OP_NEQ: return "!=";
|
||||
case OP_GT: return ">";
|
||||
case OP_LT: return "<";
|
||||
case OP_GE: return ">=";
|
||||
case OP_LE: return "<=";
|
||||
case OP_BOR: return "|";
|
||||
case OP_BAND: return "&";
|
||||
case OP_BXOR: return "^";
|
||||
case OP_MOD: return "%";
|
||||
case OP_PLUS_EQ: return "+=";
|
||||
case OP_MINUS_EQ: return "-=";
|
||||
case OP_DIV_EQ: return "/=";
|
||||
case OP_MUL_EQ: return "*=";
|
||||
default: return "?";
|
||||
}
|
||||
}
|
||||
|
||||
const char *get_uop_str(unary_op op) {
|
||||
switch (op) {
|
||||
case UOP_INCR: return "++";
|
||||
case UOP_MINUS: return "-";
|
||||
case UOP_DECR: return "--";
|
||||
case UOP_DEREF: return "*";
|
||||
case UOP_REF: return "&";
|
||||
case UOP_NOT: return "!";
|
||||
default: return "?";
|
||||
}
|
||||
}
|
||||
|
||||
void print_ast(ast_node *node, int depth) {
|
||||
if (!node) return;
|
||||
|
||||
print_indent(depth);
|
||||
|
||||
switch (node->type) {
|
||||
case NODE_INTEGER:
|
||||
printf("Integer: %lu\n", node->expr.integer);
|
||||
break;
|
||||
case NODE_FLOAT:
|
||||
printf("Float: %f\n", node->expr.flt);
|
||||
break;
|
||||
case NODE_CHAR:
|
||||
printf("Char: '%c'\n", node->expr.ch);
|
||||
break;
|
||||
case NODE_STRING:
|
||||
printf("String: \"%.*s\"\n", (int)node->expr.string.len, node->expr.string.start);
|
||||
break;
|
||||
case NODE_IDENTIFIER:
|
||||
printf("Identifier: %.*s\n", (int)node->expr.string.len, node->expr.string.start);
|
||||
break;
|
||||
case NODE_CAST:
|
||||
printf("Cast:\n");
|
||||
print_ast(node->expr.cast.type, depth);
|
||||
print_ast(node->expr.cast.value, depth + 1);
|
||||
break;
|
||||
case NODE_ACCESS:
|
||||
printf("Access:\n");
|
||||
print_ast(node->expr.access.expr, depth + 1);
|
||||
print_ast(node->expr.access.member, depth + 1);
|
||||
break;
|
||||
case NODE_LABEL:
|
||||
printf("Label: %.*s\n", (int)node->expr.label.name_len, node->expr.label.name);
|
||||
break;
|
||||
case NODE_GOTO:
|
||||
printf("Goto: %.*s\n", (int)node->expr.label.name_len, node->expr.label.name);
|
||||
break;
|
||||
case NODE_BINARY:
|
||||
printf("BinaryOp (%s)\n", get_op_str(node->expr.binary.operator));
|
||||
print_ast(node->expr.binary.left, depth + 1);
|
||||
print_ast(node->expr.binary.right, depth + 1);
|
||||
break;
|
||||
case NODE_ARRAY_SUBSCRIPT:
|
||||
printf("Array subscript\n");
|
||||
print_ast(node->expr.subscript.expr, depth + 1);
|
||||
print_ast(node->expr.subscript.index, depth + 1);
|
||||
break;
|
||||
case NODE_UNARY:
|
||||
printf("UnaryOp (%s)\n", get_uop_str(node->expr.unary.operator));
|
||||
print_ast(node->expr.unary.right, depth + 1);
|
||||
break;
|
||||
case NODE_POSTFIX:
|
||||
printf("Postfix (%s)\n", get_uop_str(node->expr.unary.operator));
|
||||
print_ast(node->expr.unary.right, depth + 1);
|
||||
break;
|
||||
case NODE_BREAK:
|
||||
printf("Break\n");
|
||||
break;
|
||||
case NODE_TERNARY:
|
||||
printf("Ternary (? :)\n");
|
||||
print_indent(depth + 1); printf("Condition:\n");
|
||||
print_ast(node->expr.ternary.condition, depth + 2);
|
||||
print_indent(depth + 1); printf("Then:\n");
|
||||
print_ast(node->expr.ternary.then, depth + 2);
|
||||
print_indent(depth + 1); printf("Else:\n");
|
||||
print_ast(node->expr.ternary.otherwise, depth + 2);
|
||||
break;
|
||||
case NODE_UNIT:
|
||||
printf("Unit\n");
|
||||
ast_node *current = node;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
print_ast(current->expr.unit_node.expr, depth + 1);
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
break;
|
||||
case NODE_CALL:
|
||||
printf("Call: %.*s\n", (int)node->expr.call.name_len, node->expr.call.name);
|
||||
current = node->expr.call.parameters;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
print_ast(current->expr.unit_node.expr, depth + 1);
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
break;
|
||||
case NODE_STRUCT_INIT:
|
||||
printf("Struct init:\n");
|
||||
current = node->expr.struct_init.members;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
print_ast(current->expr.unit_node.expr, depth + 1);
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
break;
|
||||
case NODE_STRUCT:
|
||||
printf("Struct: %.*s\n", (int)node->expr.structure.name_len, node->expr.structure.name);
|
||||
member *m = node->expr.structure.members;
|
||||
while (m) {
|
||||
print_ast(m->type, depth + 1);
|
||||
m = m->next;
|
||||
}
|
||||
break;
|
||||
case NODE_UNION:
|
||||
printf("Union: %.*s\n", (int)node->expr.structure.name_len, node->expr.structure.name);
|
||||
m = node->expr.structure.members;
|
||||
while (m) {
|
||||
print_ast(m->type, depth + 1);
|
||||
m = m->next;
|
||||
}
|
||||
break;
|
||||
case NODE_ENUM:
|
||||
printf("Enum: %.*s\n", (int)node->expr.enm.name_len, node->expr.enm.name);
|
||||
variant *v = node->expr.enm.variants;
|
||||
while (v) {
|
||||
printf("\t%.*s\n", (int)v->name_len, v->name);
|
||||
v = v->next;
|
||||
}
|
||||
break;
|
||||
case NODE_IF:
|
||||
printf("If:\n");
|
||||
print_ast(node->expr.whle.condition, depth + 1);
|
||||
print_ast(node->expr.whle.body, depth + 1);
|
||||
break;
|
||||
case NODE_VAR_DECL:
|
||||
printf("VarDecl: ");
|
||||
print_ast(node->expr.var_decl.type, 0);
|
||||
print_ast(node->expr.var_decl.value, depth + 1);
|
||||
break;
|
||||
case NODE_FUNCTION:
|
||||
printf("Function: %.*s\n", (int)node->expr.function.name_len, node->expr.function.name);
|
||||
m = node->expr.function.parameters;
|
||||
while (m) {
|
||||
print_ast(m->type, depth + 1);
|
||||
m = m->next;
|
||||
}
|
||||
print_ast(node->expr.function.body, depth + 1);
|
||||
break;
|
||||
case NODE_RETURN:
|
||||
printf("Return:\n");
|
||||
print_ast(node->expr.ret.value, depth + 1);
|
||||
break;
|
||||
case NODE_IMPORT:
|
||||
printf("Import:\n");
|
||||
print_ast(node->expr.import.path, depth + 1);
|
||||
break;
|
||||
case NODE_WHILE:
|
||||
printf("While:\n");
|
||||
print_ast(node->expr.whle.condition, depth + 1);
|
||||
print_ast(node->expr.whle.body, depth + 1);
|
||||
break;
|
||||
case NODE_FOR:
|
||||
printf("For:\n");
|
||||
print_ast(node->expr.fr.slices, depth + 1);
|
||||
print_ast(node->expr.fr.captures, depth + 1);
|
||||
print_indent(depth + 1);
|
||||
print_ast(node->expr.fr.body, depth + 1);
|
||||
break;
|
||||
case NODE_RANGE:
|
||||
printf("Range:\n");
|
||||
print_ast(node->expr.binary.left, depth + 1);
|
||||
print_ast(node->expr.binary.right, depth + 1);
|
||||
break;
|
||||
default:
|
||||
printf("Unknown Node Type: %d\n", node->type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
FILE *fp = fopen("test.l", "r");
|
||||
usize size = 0;
|
||||
fseek(fp, 0, SEEK_END);
|
||||
size = ftell(fp);
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
char *src = malloc(size+1);
|
||||
fread(src, size, 1, fp);
|
||||
fclose(fp);
|
||||
src[size] = '\0';
|
||||
|
||||
arena a = arena_init(0x1000 * 0x1000 * 64);
|
||||
lexer *l = lexer_init(src, size, &a);
|
||||
parser *p = parser_init(l, &a);
|
||||
//print_ast(p->ast, 0);
|
||||
sema_init(p, &a);
|
||||
|
||||
ir_build(p->ast);
|
||||
|
||||
arena_deinit(a);
|
||||
|
||||
return 0;
|
||||
}
|
||||
422
lexer.c
422
lexer.c
|
|
@ -1,422 +0,0 @@
|
|||
#include "lexer.h"
|
||||
#include <stdbool.h>
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
trie_node *keywords;
|
||||
|
||||
static void add_token(lexer *l, token_type type, usize len)
|
||||
{
|
||||
token *t = arena_alloc(l->allocator, sizeof(token));
|
||||
t->type = type;
|
||||
t->lexeme_len = len;
|
||||
t->lexeme = l->source + l->index;
|
||||
t->position.row = l->row;
|
||||
t->position.column = l->column;
|
||||
|
||||
if (!l->tokens) {
|
||||
l->tokens = t;
|
||||
l->tail = t;
|
||||
} else {
|
||||
l->tail->next = t;
|
||||
l->tail = t;
|
||||
}
|
||||
}
|
||||
|
||||
static void add_error(lexer *l, char *msg)
|
||||
{
|
||||
token *t = arena_alloc(l->allocator, sizeof(token));
|
||||
t->type = TOKEN_ERROR;
|
||||
t->lexeme_len = strlen(msg);
|
||||
t->lexeme = msg;
|
||||
t->position.row = l->row;
|
||||
t->position.column = l->column;
|
||||
|
||||
if (!l->tokens) {
|
||||
l->tokens = t;
|
||||
l->tail = t;
|
||||
} else {
|
||||
l->tail->next = t;
|
||||
l->tail = t;
|
||||
}
|
||||
}
|
||||
|
||||
static void parse_number(lexer *l)
|
||||
{
|
||||
char c = l->source[l->index];
|
||||
/* Is the number a float? */
|
||||
bool f = false;
|
||||
usize len = 0;
|
||||
|
||||
while (isdigit(c)) {
|
||||
/* If a dot is found, and the character after it is a digit, this is a float. */
|
||||
if (l->source[l->index+1] == '.' && isdigit(l->source[l->index+2])) {
|
||||
f = true;
|
||||
len += 3;
|
||||
l->index += 3;
|
||||
} else {
|
||||
len += 1;
|
||||
l->index += 1;
|
||||
}
|
||||
c = l->source[l->index];
|
||||
}
|
||||
l->index -= len;
|
||||
if (f) {
|
||||
add_token(l, TOKEN_FLOAT, len);
|
||||
} else {
|
||||
add_token(l, TOKEN_INTEGER, len);
|
||||
}
|
||||
l->index += len;
|
||||
}
|
||||
|
||||
static void parse_identifier(lexer *l)
|
||||
{
|
||||
char c = l->source[l->index];
|
||||
usize len = 0;
|
||||
|
||||
while (isalnum(c) || c == '_') {
|
||||
len += 1;
|
||||
l->index += 1;
|
||||
c = l->source[l->index];
|
||||
}
|
||||
l->index -= len;
|
||||
token_type keyword = trie_get(keywords, l->source + l->index, len);
|
||||
if (keyword) {
|
||||
add_token(l, keyword, len);
|
||||
} else {
|
||||
add_token(l, TOKEN_IDENTIFIER, len);
|
||||
}
|
||||
l->index += len;
|
||||
}
|
||||
|
||||
static void parse_string(lexer *l)
|
||||
{
|
||||
char c = l->source[l->index];
|
||||
usize len = 0;
|
||||
|
||||
while (c != '"') {
|
||||
if (c == '\0' || c == '\n') {
|
||||
l->index -= len;
|
||||
add_error(l, "unclosed string literal.");
|
||||
l->index += len;
|
||||
return;
|
||||
}
|
||||
len += 1;
|
||||
l->index += 1;
|
||||
c = l->source[l->index];
|
||||
}
|
||||
l->index -= len;
|
||||
add_token(l, TOKEN_STRING, len);
|
||||
l->index += len + 1;
|
||||
}
|
||||
|
||||
static bool parse_special(lexer *l)
|
||||
{
|
||||
switch (l->source[l->index]) {
|
||||
case '+':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_PLUS_EQ, 2);
|
||||
l->index += 2;
|
||||
} else if (l->source[l->index+1] == '+') {
|
||||
add_token(l, TOKEN_PLUS_PLUS, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_PLUS, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '-':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_MINUS_EQ, 2);
|
||||
l->index += 2;
|
||||
} else if (l->source[l->index+1] == '-') {
|
||||
add_token(l, TOKEN_MINUS_MINUS, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_MINUS, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '/':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_SLASH_EQ, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_SLASH, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '*':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_STAR_EQ, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_STAR, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '%':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_PERC_EQ, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_PERC, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '&':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_AND_EQ, 2);
|
||||
l->index += 2;
|
||||
} else if (l->source[l->index+1] == '&') {
|
||||
add_token(l, TOKEN_DOUBLE_AND, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_AND, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '^':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_HAT_EQ, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_HAT, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '|':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_PIPE_EQ, 2);
|
||||
l->index += 2;
|
||||
} else if (l->source[l->index+1] == '|') {
|
||||
add_token(l, TOKEN_OR, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_PIPE, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '=':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_DOUBLE_EQ, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_EQ, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '>':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_GREATER_EQ, 2);
|
||||
l->index += 2;
|
||||
} else if (l->source[l->index+1] == '>') {
|
||||
if (l->source[l->index+2] == '=') {
|
||||
add_token(l, TOKEN_RSHIFT_EQ, 3);
|
||||
l->index += 3;
|
||||
return true;
|
||||
}
|
||||
add_token(l, TOKEN_RSHIFT, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_GREATER_THAN, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '<':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_LESS_EQ, 2);
|
||||
l->index += 2;
|
||||
} else if (l->source[l->index+1] == '-') {
|
||||
add_token(l, TOKEN_ARROW, 2);
|
||||
l->index += 2;
|
||||
} else if (l->source[l->index+1] == '<') {
|
||||
if (l->source[l->index+2] == '=') {
|
||||
add_token(l, TOKEN_LSHIFT_EQ, 3);
|
||||
l->index += 3;
|
||||
return true;
|
||||
}
|
||||
add_token(l, TOKEN_LSHIFT, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_LESS_THAN, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '!':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_NOT_EQ, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_BANG, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case ':':
|
||||
add_token(l, TOKEN_COLON, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case ';':
|
||||
add_token(l, TOKEN_SEMICOLON, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case '.':
|
||||
if (l->source[l->index+1] == '.') {
|
||||
add_token(l, TOKEN_DOUBLE_DOT, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_DOT, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case ',':
|
||||
add_token(l, TOKEN_COMMA, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case '(':
|
||||
add_token(l, TOKEN_LPAREN, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case ')':
|
||||
add_token(l, TOKEN_RPAREN, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case '[':
|
||||
add_token(l, TOKEN_LSQUARE, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case ']':
|
||||
add_token(l, TOKEN_RSQUARE, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case '{':
|
||||
add_token(l, TOKEN_LCURLY, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case '}':
|
||||
add_token(l, TOKEN_RCURLY, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case '\'':
|
||||
if (l->source[l->index+1] == '\\') {
|
||||
if (l->source[l->index+3] != '\'') {
|
||||
add_error(l, "unclosed character literal.");
|
||||
l->index += 1;
|
||||
return true;
|
||||
}
|
||||
l->index += 1;
|
||||
add_token(l, TOKEN_CHAR, 2);
|
||||
l->index += 3;
|
||||
return true;
|
||||
} else {
|
||||
if (l->source[l->index+2] != '\'') {
|
||||
add_error(l, "unclosed character literal.");
|
||||
l->index += 1;
|
||||
return true;
|
||||
}
|
||||
l->index += 1;
|
||||
add_token(l, TOKEN_CHAR, 1);
|
||||
l->index += 2;
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void parse(lexer *l)
|
||||
{
|
||||
char c;
|
||||
|
||||
while (l->index <= l->size) {
|
||||
c = l->source[l->index];
|
||||
l->column += 1;
|
||||
|
||||
if (c == '\n') {
|
||||
l->index += 1;
|
||||
l->row += 1;
|
||||
l->column = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
usize head = l->index;
|
||||
|
||||
if (c == '/' && l->source[l->index+1] == '/') {
|
||||
while (l->source[l->index] != '\n') {
|
||||
l->index += 1;
|
||||
}
|
||||
l->column += (l->index - head - 1);
|
||||
}
|
||||
|
||||
if (isspace(c)) {
|
||||
l->index += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (parse_special(l)) {
|
||||
l->column += (l->index - head - 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isdigit(c)) {
|
||||
parse_number(l);
|
||||
l->column += (l->index - head - 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isalpha(c)) {
|
||||
parse_identifier(l);
|
||||
l->column += (l->index - head - 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '"') {
|
||||
l->index += 1;
|
||||
parse_string(l);
|
||||
l->column += (l->index - head - 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
l->index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
lexer *lexer_init(char *source, usize size, arena *arena)
|
||||
{
|
||||
lexer *lex = arena_alloc(arena, sizeof(lexer));
|
||||
lex->column = 1;
|
||||
lex->row = 1;
|
||||
lex->index = 0;
|
||||
lex->size = size;
|
||||
lex->tokens = 0;
|
||||
lex->tail = 0;
|
||||
lex->allocator = arena;
|
||||
lex->source = source;
|
||||
|
||||
keywords = arena_alloc(arena, sizeof(trie_node));
|
||||
trie_insert(keywords, lex->allocator, "true", TOKEN_TRUE);
|
||||
trie_insert(keywords, lex->allocator, "false", TOKEN_FALSE);
|
||||
trie_insert(keywords, lex->allocator, "struct", TOKEN_STRUCT);
|
||||
trie_insert(keywords, lex->allocator, "enum", TOKEN_ENUM);
|
||||
trie_insert(keywords, lex->allocator, "union", TOKEN_UNION);
|
||||
trie_insert(keywords, lex->allocator, "loop", TOKEN_LOOP);
|
||||
trie_insert(keywords, lex->allocator, "while", TOKEN_WHILE);
|
||||
trie_insert(keywords, lex->allocator, "until", TOKEN_UNTIL);
|
||||
trie_insert(keywords, lex->allocator, "goto", TOKEN_GOTO);
|
||||
trie_insert(keywords, lex->allocator, "if", TOKEN_IF);
|
||||
trie_insert(keywords, lex->allocator, "else", TOKEN_ELSE);
|
||||
trie_insert(keywords, lex->allocator, "switch", TOKEN_SWITCH);
|
||||
trie_insert(keywords, lex->allocator, "break", TOKEN_BREAK);
|
||||
trie_insert(keywords, lex->allocator, "defer", TOKEN_DEFER);
|
||||
trie_insert(keywords, lex->allocator, "return", TOKEN_RETURN);
|
||||
trie_insert(keywords, lex->allocator, "import", TOKEN_IMPORT);
|
||||
trie_insert(keywords, lex->allocator, "const", TOKEN_CONST);
|
||||
trie_insert(keywords, lex->allocator, "extern", TOKEN_EXTERN);
|
||||
trie_insert(keywords, lex->allocator, "volatile", TOKEN_VOLATILE);
|
||||
|
||||
parse(lex);
|
||||
|
||||
return lex;
|
||||
}
|
||||
97
lexer.h
97
lexer.h
|
|
@ -1,97 +0,0 @@
|
|||
#ifndef LEXER_H
|
||||
#define LEXER_H
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
typedef enum {
|
||||
TOKEN_ERROR,
|
||||
TOKEN_END,
|
||||
TOKEN_PLUS, // +
|
||||
TOKEN_PLUS_PLUS, // ++
|
||||
TOKEN_MINUS, // -
|
||||
TOKEN_MINUS_MINUS, // --
|
||||
TOKEN_SLASH, // /
|
||||
TOKEN_PERC, // %
|
||||
TOKEN_STAR, // *
|
||||
TOKEN_AND, // &
|
||||
TOKEN_HAT, // ^
|
||||
TOKEN_PIPE, // |
|
||||
TOKEN_LSHIFT, // <<
|
||||
TOKEN_RSHIFT, // >>
|
||||
TOKEN_DOUBLE_EQ, // ==
|
||||
TOKEN_ARROW, // <-
|
||||
TOKEN_EQ, // =
|
||||
TOKEN_LESS_THAN, // <
|
||||
TOKEN_GREATER_THAN, // >
|
||||
TOKEN_LESS_EQ, // <=
|
||||
TOKEN_GREATER_EQ, // >=
|
||||
TOKEN_NOT_EQ, // !=
|
||||
TOKEN_PLUS_EQ, // +=
|
||||
TOKEN_MINUS_EQ, // -=
|
||||
TOKEN_STAR_EQ, // *=
|
||||
TOKEN_SLASH_EQ, // /=
|
||||
TOKEN_AND_EQ, // &=
|
||||
TOKEN_HAT_EQ, // ^=
|
||||
TOKEN_PIPE_EQ, // |=
|
||||
TOKEN_PERC_EQ, // %=
|
||||
TOKEN_LSHIFT_EQ, // <<=
|
||||
TOKEN_RSHIFT_EQ, // >>=
|
||||
TOKEN_OR, // ||
|
||||
TOKEN_DOUBLE_AND, // &&
|
||||
TOKEN_COLON, // :
|
||||
TOKEN_SEMICOLON, // ;
|
||||
TOKEN_DOT, // .
|
||||
TOKEN_DOUBLE_DOT, // ..
|
||||
TOKEN_BANG, // !
|
||||
TOKEN_COMMA, // ,
|
||||
TOKEN_LPAREN, // (
|
||||
TOKEN_RPAREN, // )
|
||||
TOKEN_LSQUARE, // [
|
||||
TOKEN_RSQUARE, // ]
|
||||
TOKEN_LCURLY, // {
|
||||
TOKEN_RCURLY, // }
|
||||
TOKEN_INTEGER,
|
||||
TOKEN_FLOAT,
|
||||
TOKEN_IDENTIFIER,
|
||||
TOKEN_STRING,
|
||||
TOKEN_CHAR,
|
||||
TOKEN_TRUE,
|
||||
TOKEN_FALSE,
|
||||
TOKEN_GOTO,
|
||||
TOKEN_LOOP,
|
||||
TOKEN_WHILE,
|
||||
TOKEN_UNTIL,
|
||||
TOKEN_IF,
|
||||
TOKEN_ELSE,
|
||||
TOKEN_SWITCH,
|
||||
TOKEN_BREAK,
|
||||
TOKEN_DEFER,
|
||||
TOKEN_RETURN,
|
||||
TOKEN_IMPORT,
|
||||
TOKEN_CONST,
|
||||
TOKEN_EXTERN,
|
||||
TOKEN_VOLATILE,
|
||||
TOKEN_STRUCT,
|
||||
TOKEN_ENUM,
|
||||
TOKEN_UNION
|
||||
} token_type;
|
||||
|
||||
typedef struct _token {
|
||||
token_type type;
|
||||
source_pos position;
|
||||
char *lexeme;
|
||||
usize lexeme_len;
|
||||
struct _token *next;
|
||||
} token;
|
||||
|
||||
typedef struct {
|
||||
usize column, row, index, size;
|
||||
char *source;
|
||||
token *tokens;
|
||||
token *tail;
|
||||
arena *allocator;
|
||||
} lexer;
|
||||
|
||||
lexer *lexer_init(char *source, usize size, arena *arena);
|
||||
|
||||
#endif
|
||||
255
parser.h
255
parser.h
|
|
@ -1,255 +0,0 @@
|
|||
#ifndef PARSER_H
|
||||
#define PARSER_H
|
||||
|
||||
#include "lexer.h"
|
||||
#include "utils.h"
|
||||
#include <stdbool.h>
|
||||
|
||||
struct _type;
|
||||
struct _ast_node;
|
||||
|
||||
typedef enum {
|
||||
OP_PLUS, // +
|
||||
OP_MINUS, // -
|
||||
OP_DIV, // /
|
||||
OP_MUL, // *
|
||||
OP_MOD, // %
|
||||
OP_BOR, // |
|
||||
OP_BAND, // &
|
||||
OP_BXOR, // ^
|
||||
|
||||
OP_ASSIGN, // =
|
||||
OP_ASSIGN_PTR, // <-
|
||||
OP_RSHIFT_EQ, // >>=
|
||||
OP_LSHIFT_EQ, // <<=
|
||||
OP_PLUS_EQ, // +=
|
||||
OP_MINUS_EQ, // -=
|
||||
OP_DIV_EQ, // /=
|
||||
OP_MUL_EQ, // *=
|
||||
OP_BOR_EQ, // |=
|
||||
OP_BAND_EQ, // &=
|
||||
OP_BXOR_EQ, // ^=
|
||||
OP_MOD_EQ, // %=
|
||||
|
||||
OP_EQ, // ==
|
||||
OP_AND, // &&
|
||||
OP_OR, // ||
|
||||
OP_NEQ, // !=
|
||||
OP_GT, // >
|
||||
OP_LT, // <
|
||||
OP_GE, // >=
|
||||
OP_LE, // <=
|
||||
} binary_op;
|
||||
|
||||
typedef enum {
|
||||
UOP_INCR, // ++
|
||||
UOP_MINUS, // -
|
||||
UOP_DECR, // --
|
||||
UOP_DEREF, // *
|
||||
UOP_REF, // &
|
||||
UOP_NOT, // !
|
||||
} unary_op;
|
||||
|
||||
typedef enum {
|
||||
LAYOUT_AUTO,
|
||||
LAYOUT_PACKED,
|
||||
LAYOUT_EXTERN
|
||||
} struct_layout;
|
||||
|
||||
typedef struct _member {
|
||||
struct _ast_node *type;
|
||||
char *name;
|
||||
usize name_len;
|
||||
struct _member *next;
|
||||
usize offset;
|
||||
} member;
|
||||
|
||||
typedef struct {
|
||||
char *name;
|
||||
usize name_len;
|
||||
member *params;
|
||||
} function;
|
||||
|
||||
typedef struct _variant {
|
||||
struct _ast_node *value;
|
||||
char *name;
|
||||
usize name_len;
|
||||
struct _variant *next;
|
||||
} variant;
|
||||
|
||||
typedef enum {
|
||||
NODE_IDENTIFIER,
|
||||
NODE_INTEGER,
|
||||
NODE_FLOAT,
|
||||
NODE_STRING,
|
||||
NODE_CHAR,
|
||||
NODE_BOOL,
|
||||
NODE_CAST,
|
||||
NODE_UNARY,
|
||||
NODE_BINARY,
|
||||
NODE_RANGE,
|
||||
NODE_ARRAY_SUBSCRIPT,
|
||||
NODE_POSTFIX,
|
||||
NODE_CALL,
|
||||
NODE_ACCESS,
|
||||
NODE_STRUCT_INIT,
|
||||
NODE_TERNARY, /* TODO */
|
||||
|
||||
NODE_BREAK,
|
||||
NODE_RETURN,
|
||||
NODE_IMPORT,
|
||||
NODE_FOR,
|
||||
NODE_WHILE,
|
||||
NODE_IF,
|
||||
NODE_VAR_DECL,
|
||||
NODE_LABEL,
|
||||
NODE_GOTO,
|
||||
|
||||
NODE_ENUM,
|
||||
NODE_STRUCT,
|
||||
NODE_UNION,
|
||||
NODE_FUNCTION,
|
||||
NODE_PTR_TYPE,
|
||||
NODE_SWITCH, /* TODO */
|
||||
NODE_UNIT,
|
||||
} node_type;
|
||||
|
||||
#define PTR_SLICE 0x0
|
||||
#define PTR_RAW 0x1
|
||||
#define PTR_CONST 0x2
|
||||
#define PTR_VOLATILE 0x4
|
||||
|
||||
#define LOOP_WHILE 0x1
|
||||
#define LOOP_UNTIL 0x2
|
||||
#define LOOP_AFTER 0x4
|
||||
|
||||
typedef struct _ast_node {
|
||||
node_type type;
|
||||
source_pos position;
|
||||
struct _type *expr_type;
|
||||
bool address_taken; // used in IR generation.
|
||||
union {
|
||||
struct {
|
||||
struct _ast_node *type;
|
||||
u8 flags;
|
||||
} ptr_type;
|
||||
struct {
|
||||
char *name;
|
||||
usize name_len;
|
||||
} label; // both label and goto
|
||||
struct {
|
||||
struct _ast_node *left;
|
||||
struct _ast_node *right;
|
||||
binary_op operator;
|
||||
} binary;
|
||||
struct {
|
||||
struct _ast_node *right;
|
||||
unary_op operator;
|
||||
} unary;
|
||||
u8 boolean;
|
||||
i64 integer;
|
||||
f64 flt; // float
|
||||
struct {
|
||||
char *start;
|
||||
usize len;
|
||||
} string;
|
||||
char ch; // char;
|
||||
struct {
|
||||
struct _ast_node *condition;
|
||||
struct _ast_node *then;
|
||||
struct _ast_node *otherwise;
|
||||
} ternary;
|
||||
struct {
|
||||
struct _ast_node *value;
|
||||
struct _ast_node *type;
|
||||
} cast;
|
||||
struct {
|
||||
struct _ast_node *expr;
|
||||
struct _ast_node *index;
|
||||
} subscript;
|
||||
struct {
|
||||
struct _ast_node *expr;
|
||||
struct _ast_node *member;
|
||||
} access;
|
||||
struct {
|
||||
struct _ast_node *expr;
|
||||
struct _ast_node *next;
|
||||
} unit_node;
|
||||
struct {
|
||||
/* This should be a list of unit_node */
|
||||
struct _ast_node *parameters;
|
||||
usize param_len;
|
||||
char *name;
|
||||
usize name_len;
|
||||
} call;
|
||||
struct {
|
||||
struct _ast_node *value;
|
||||
} ret;
|
||||
struct {
|
||||
/* This should be an access. */
|
||||
struct _ast_node *path;
|
||||
} import;
|
||||
struct {
|
||||
/* These should be lists of unit_node */
|
||||
struct _ast_node *slices;
|
||||
usize slice_len;
|
||||
struct _ast_node *captures;
|
||||
usize capture_len;
|
||||
struct _ast_node* body;
|
||||
} fr; // for
|
||||
struct {
|
||||
struct _ast_node *condition;
|
||||
struct _ast_node *body;
|
||||
u8 flags;
|
||||
} whle; // while
|
||||
struct {
|
||||
struct _ast_node *condition;
|
||||
struct _ast_node *body;
|
||||
struct _ast_node *otherwise;
|
||||
u8 flags;
|
||||
} if_stmt; // while
|
||||
struct {
|
||||
struct _ast_node **statements;
|
||||
usize stmt_len;
|
||||
} compound;
|
||||
struct {
|
||||
struct _ast_node *value;
|
||||
char *name;
|
||||
usize name_len;
|
||||
struct _ast_node *type;
|
||||
} var_decl;
|
||||
struct {
|
||||
member *members;
|
||||
char *name;
|
||||
usize name_len;
|
||||
} structure;
|
||||
struct {
|
||||
member *parameters;
|
||||
usize parameters_len;
|
||||
char *name;
|
||||
usize name_len;
|
||||
struct _ast_node *type;
|
||||
struct _ast_node *body;
|
||||
} function;
|
||||
struct {
|
||||
variant *variants;
|
||||
char *name;
|
||||
usize name_len;
|
||||
} enm; // enum
|
||||
struct {
|
||||
struct _ast_node *members;
|
||||
usize members_len;
|
||||
} struct_init;
|
||||
} expr;
|
||||
} ast_node;
|
||||
|
||||
typedef struct {
|
||||
token *tokens;
|
||||
token *previous;
|
||||
ast_node *ast;
|
||||
arena *allocator;
|
||||
} parser;
|
||||
|
||||
parser *parser_init(lexer *l, arena *allocator);
|
||||
|
||||
#endif
|
||||
818
sema.c
818
sema.c
|
|
@ -1,818 +0,0 @@
|
|||
#define STB_DS_IMPLEMENTATION
|
||||
#include "sema.h"
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
typedef struct _res_node {
|
||||
struct _res_node **in;
|
||||
struct _res_node **out;
|
||||
type *value;
|
||||
} res_node;
|
||||
|
||||
typedef struct { res_node node; bool complete; } pair;
|
||||
|
||||
typedef struct { u8 flags; char *name; } type_key;
|
||||
|
||||
static struct { char *key; pair *value; } *types;
|
||||
static struct { char *key; type *value; } *type_reg;
|
||||
|
||||
static struct { char *key; prototype *value; } *prototypes;
|
||||
|
||||
static scope *global_scope = NULL;
|
||||
static scope *current_scope = NULL;
|
||||
static type *current_return = NULL;
|
||||
|
||||
static type *const_int = NULL;
|
||||
static type *const_float = NULL;
|
||||
|
||||
static bool in_loop = false;
|
||||
|
||||
static void error(ast_node *n, char *msg)
|
||||
{
|
||||
if (n) {
|
||||
printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:%ld:%ld:\x1b[0m %s\n", n->position.row, n->position.column, msg);
|
||||
} else {
|
||||
printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:\x1b[0m %s\n", msg);
|
||||
}
|
||||
}
|
||||
|
||||
static char *intern_string(sema *s, char *str, usize len)
|
||||
{
|
||||
(void) s;
|
||||
char *ptr = malloc(len + 1);
|
||||
memcpy(ptr, str, len);
|
||||
ptr[len] = '\0';
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static type *create_integer(sema *s, char *name, u8 bits, bool sign)
|
||||
{
|
||||
type *t = arena_alloc(s->allocator, sizeof(type));
|
||||
t->name = name;
|
||||
t->tag = sign ? TYPE_INTEGER : TYPE_UINTEGER;
|
||||
t->data.integer = bits;
|
||||
|
||||
pair *graph_node = arena_alloc(s->allocator, sizeof(pair));
|
||||
graph_node->node.value = t;
|
||||
graph_node->node.in = NULL;
|
||||
graph_node->node.out = NULL;
|
||||
|
||||
shput(types, name, graph_node);
|
||||
return t;
|
||||
}
|
||||
|
||||
static type *create_float(sema *s, char *name, u8 bits)
|
||||
{
|
||||
type *t = arena_alloc(s->allocator, sizeof(type));
|
||||
t->name = name;
|
||||
t->tag = TYPE_FLOAT;
|
||||
t->data.flt = bits;
|
||||
|
||||
pair *graph_node = arena_alloc(s->allocator, sizeof(pair));
|
||||
graph_node->node.value = t;
|
||||
graph_node->node.in = NULL;
|
||||
graph_node->node.out = NULL;
|
||||
|
||||
shput(types, name, graph_node);
|
||||
return t;
|
||||
}
|
||||
|
||||
static void order_type(sema *s, ast_node *node)
|
||||
{
|
||||
if (node->type == NODE_STRUCT || node->type == NODE_UNION) {
|
||||
type *t = arena_alloc(s->allocator, sizeof(type));
|
||||
t->tag = node->type == NODE_STRUCT ? TYPE_STRUCT : TYPE_UNION;
|
||||
t->data.structure.name = node->expr.structure.name;
|
||||
t->data.structure.name_len = node->expr.structure.name_len;
|
||||
t->data.structure.members = node->expr.structure.members;
|
||||
|
||||
char *k = intern_string(s, node->expr.structure.name, node->expr.structure.name_len);
|
||||
t->name = k;
|
||||
pair *graph_node = shget(types, k);
|
||||
|
||||
if (!graph_node) {
|
||||
graph_node = arena_alloc(s->allocator, sizeof(pair));
|
||||
graph_node->node.in = NULL;
|
||||
graph_node->node.out = NULL;
|
||||
} else if (graph_node->complete) {
|
||||
error(node, "type already defined.");
|
||||
return;
|
||||
}
|
||||
graph_node->node.value = t;
|
||||
|
||||
member *m = t->data.structure.members;
|
||||
while (m) {
|
||||
if (m->type->type != NODE_IDENTIFIER) {
|
||||
m = m->next;
|
||||
continue;
|
||||
}
|
||||
char *name = intern_string(s, m->type->expr.string.start, m->type->expr.string.len);
|
||||
pair *p = shget(types, name);
|
||||
if (!p) {
|
||||
p = arena_alloc(s->allocator, sizeof(pair));
|
||||
p->node.out = NULL;
|
||||
p->node.in = NULL;
|
||||
p->node.value = NULL;
|
||||
p->complete = false;
|
||||
shput(types, name, p);
|
||||
}
|
||||
|
||||
arrput(graph_node->node.in, &p->node);
|
||||
arrput(p->node.out, &graph_node->node);
|
||||
|
||||
m = m->next;
|
||||
}
|
||||
|
||||
shput(types, k, graph_node);
|
||||
graph_node->complete = true;
|
||||
}
|
||||
}
|
||||
|
||||
static type *get_type(sema *s, ast_node *n)
|
||||
{
|
||||
char *name = NULL;
|
||||
type *t = NULL;
|
||||
switch (n->type) {
|
||||
case NODE_IDENTIFIER:
|
||||
name = intern_string(s, n->expr.string.start, n->expr.string.len);
|
||||
t = shget(type_reg, name);
|
||||
free(name);
|
||||
return t;
|
||||
case NODE_PTR_TYPE:
|
||||
t = malloc(sizeof(type));
|
||||
t->size = sizeof(usize);
|
||||
t->alignment = sizeof(usize);
|
||||
if (n->expr.ptr_type.flags & PTR_RAW) {
|
||||
t->name = "ptr";
|
||||
t->tag = TYPE_PTR;
|
||||
t->data.ptr.child = get_type(s, n->expr.ptr_type.type);
|
||||
t->data.ptr.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0;
|
||||
t->data.ptr.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0;
|
||||
} else {
|
||||
t->name = "slice";
|
||||
t->tag = TYPE_SLICE;
|
||||
t->data.slice.child = get_type(s, n->expr.ptr_type.type);
|
||||
t->data.slice.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0;
|
||||
t->data.slice.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0;
|
||||
}
|
||||
return t;
|
||||
default:
|
||||
error(n, "expected type.");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void register_struct(sema *s, char *name, type *t)
|
||||
{
|
||||
usize alignment = 0;
|
||||
member *m = t->data.structure.members;
|
||||
|
||||
usize offset = 0;
|
||||
type *m_type = NULL;
|
||||
while (m) {
|
||||
m_type = get_type(s, m->type);
|
||||
|
||||
if (!m_type) {
|
||||
error(m->type, "unknown type.");
|
||||
return;
|
||||
}
|
||||
|
||||
char *n = intern_string(s, m->name, m->name_len);
|
||||
shput(t->data.structure.member_types, n, m_type);
|
||||
|
||||
if (m_type->size == 0) {
|
||||
error(m->type, "a struct member can't be of type `void`.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (alignment < m_type->alignment) {
|
||||
alignment = m_type->alignment;
|
||||
}
|
||||
|
||||
usize padding = (m_type->alignment - (offset % m_type->alignment)) % m_type->alignment;
|
||||
offset += padding;
|
||||
m->offset = offset;
|
||||
offset += m_type->size;
|
||||
|
||||
m = m->next;
|
||||
}
|
||||
|
||||
t->alignment = alignment;
|
||||
|
||||
if (t->alignment > 0) {
|
||||
usize trailing_padding = (t->alignment - (offset % t->alignment)) % t->alignment;
|
||||
offset += trailing_padding;
|
||||
}
|
||||
|
||||
t->size = offset;
|
||||
}
|
||||
|
||||
static void register_union(sema *s, char *name, type *t)
|
||||
{
|
||||
usize alignment = 0;
|
||||
usize size = 0;
|
||||
member *m = t->data.structure.members;
|
||||
while (m) {
|
||||
type *m_type = get_type(s, m->type);
|
||||
|
||||
if (!m_type) {
|
||||
error(m->type, "unknown type.");
|
||||
return;
|
||||
}
|
||||
|
||||
char *n = intern_string(s, m->name, m->name_len);
|
||||
shput(t->data.structure.member_types, n, m_type);
|
||||
|
||||
if (alignment < m_type->alignment) {
|
||||
alignment = m_type->alignment;
|
||||
}
|
||||
|
||||
if (size < m_type->size) {
|
||||
size = m_type->size;
|
||||
}
|
||||
|
||||
m = m->next;
|
||||
}
|
||||
|
||||
t->alignment = alignment;
|
||||
t->size = size;
|
||||
}
|
||||
|
||||
static void register_type(sema *s, char *name, type *t)
|
||||
{
|
||||
switch (t->tag) {
|
||||
case TYPE_INTEGER:
|
||||
case TYPE_UINTEGER:
|
||||
t->size = t->data.integer / 8;
|
||||
t->alignment = t->data.integer / 8;
|
||||
break;
|
||||
case TYPE_PTR:
|
||||
t->size = 8;
|
||||
t->alignment = 8;
|
||||
break;
|
||||
case TYPE_FLOAT:
|
||||
t->size = t->data.flt / 8;
|
||||
t->alignment = t->data.flt / 8;
|
||||
break;
|
||||
case TYPE_STRUCT:
|
||||
register_struct(s, name, t);
|
||||
break;
|
||||
case TYPE_UNION:
|
||||
register_union(s, name, t);
|
||||
break;
|
||||
default:
|
||||
error(NULL, "registering an invalid type.");
|
||||
return;
|
||||
}
|
||||
|
||||
shput(type_reg, name, t);
|
||||
}
|
||||
|
||||
static void create_types(sema *s)
|
||||
{
|
||||
res_node **nodes = NULL;
|
||||
res_node **ordered = NULL;
|
||||
usize node_count = shlen(types);
|
||||
for (int i=0; i < node_count; i++) {
|
||||
if (arrlen(types[i].value->node.in) == 0) {
|
||||
arrput(nodes, &types[i].value->node);
|
||||
}
|
||||
}
|
||||
|
||||
while (arrlen(nodes) > 0) {
|
||||
res_node *n = nodes[0];
|
||||
arrdel(nodes, 0);
|
||||
arrput(ordered, n);
|
||||
while (arrlen(n->out) > 0) {
|
||||
res_node *dep = n->out[0];
|
||||
arrdel(n->out, 0);
|
||||
|
||||
for (int j=0; j < arrlen(dep->in); j++) {
|
||||
if (dep->in[j] == n) {
|
||||
arrdel(dep->in, j);
|
||||
}
|
||||
}
|
||||
|
||||
if (arrlen(dep->in) == 0) {
|
||||
arrput(nodes, dep);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (arrlen(ordered) < node_count) {
|
||||
error(NULL, "cycling struct definition.");
|
||||
}
|
||||
|
||||
for (int i=0; i < arrlen(ordered); i++) {
|
||||
type *t = ordered[i]->value;
|
||||
if (t && (t->tag == TYPE_STRUCT || t->tag == TYPE_UNION)) {
|
||||
char *name = t->name;
|
||||
register_type(s, name, t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void create_prototype(sema *s, ast_node *node)
|
||||
{
|
||||
prototype *p = arena_alloc(s->allocator, sizeof(prototype));
|
||||
p->name = intern_string(s, node->expr.function.name, node->expr.function.name_len);
|
||||
node->expr.function.name = p->name;
|
||||
if (shget(prototypes, p->name)) {
|
||||
error(node, "function already defined.");
|
||||
}
|
||||
|
||||
member *m = node->expr.function.parameters;
|
||||
while (m) {
|
||||
type *t = get_type(s, m->type);
|
||||
if (!t) {
|
||||
error(m->type, "unknown type.");
|
||||
return;
|
||||
}
|
||||
|
||||
arrput(p->parameters, t);
|
||||
m = m->next;
|
||||
}
|
||||
|
||||
p->type = get_type(s, node->expr.function.type);
|
||||
shput(prototypes, p->name, p);
|
||||
}
|
||||
|
||||
static void push_scope(sema *s)
|
||||
{
|
||||
scope *scp = arena_alloc(s->allocator, sizeof(scope));
|
||||
scp->parent = current_scope;
|
||||
current_scope = scp;
|
||||
}
|
||||
|
||||
static void pop_scope(sema *s)
|
||||
{
|
||||
current_scope = current_scope->parent;
|
||||
}
|
||||
|
||||
static ast_node *get_def(sema *s, char *name)
|
||||
{
|
||||
scope *current = current_scope;
|
||||
while (current) {
|
||||
ast_node *def = shget(current->defs, name);
|
||||
if (def) return def;
|
||||
|
||||
current = current->parent;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static type *get_string_type(sema *s, ast_node *node)
|
||||
{
|
||||
type *string_type = arena_alloc(s->allocator, sizeof(type));
|
||||
string_type->tag = TYPE_PTR;
|
||||
string_type->size = sizeof(usize);
|
||||
string_type->alignment = sizeof(usize);
|
||||
string_type->name = "slice";
|
||||
string_type->data.slice.child = shget(type_reg, "u8");
|
||||
string_type->data.slice.is_const = true;
|
||||
string_type->data.slice.is_volatile = false;
|
||||
string_type->data.slice.len = node->expr.string.len;
|
||||
return string_type;
|
||||
}
|
||||
|
||||
static type *get_range_type(sema *s, ast_node *node)
|
||||
{
|
||||
type *range_type = arena_alloc(s->allocator, sizeof(type));
|
||||
range_type->tag = TYPE_PTR;
|
||||
range_type->size = sizeof(usize);
|
||||
range_type->alignment = sizeof(usize);
|
||||
range_type->name = "slice";
|
||||
range_type->data.slice.child = shget(type_reg, "usize");
|
||||
range_type->data.slice.is_const = true;
|
||||
range_type->data.slice.is_volatile = false;
|
||||
range_type->data.slice.len = node->expr.binary.right->expr.integer - node->expr.binary.left->expr.integer;
|
||||
return range_type;
|
||||
}
|
||||
|
||||
static type *get_expression_type(sema *s, ast_node *node);
|
||||
static type *get_access_type(sema *s, ast_node *node)
|
||||
{
|
||||
type *t = get_expression_type(s, node->expr.access.expr);
|
||||
ast_node *member = node->expr.access.member;
|
||||
char *name_start = member->expr.string.start;
|
||||
usize name_len = member->expr.string.len;
|
||||
if (!t || (t->tag != TYPE_STRUCT && t->tag != TYPE_UNION)) {
|
||||
error(node, "invalid expression.");
|
||||
return NULL;
|
||||
}
|
||||
char *name = intern_string(s, name_start, name_len);
|
||||
type *res = shget(t->data.structure.member_types, name);
|
||||
if (!res) {
|
||||
error(node, "struct doesn't have that member");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static type *get_identifier_type(sema *s, ast_node *node)
|
||||
{
|
||||
char *name_start = node->expr.string.start;
|
||||
usize name_len = node->expr.string.len;
|
||||
char *name = intern_string(s, name_start, name_len);
|
||||
node->expr.string.start = name;
|
||||
ast_node *def = get_def(s, name);
|
||||
if (!def) {
|
||||
error(node, "unknown identifier.");
|
||||
}
|
||||
return def->expr_type;
|
||||
}
|
||||
|
||||
static bool match(type *t1, type *t2);
|
||||
|
||||
static bool can_cast(type *source, type *dest)
|
||||
{
|
||||
if (!dest || !source) return false;
|
||||
|
||||
switch (dest->tag) {
|
||||
case TYPE_INTEGER:
|
||||
case TYPE_UINTEGER:
|
||||
return source->tag == TYPE_INTEGER_CONST;
|
||||
case TYPE_FLOAT:
|
||||
return source->tag == TYPE_FLOAT_CONST;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static type *get_expression_type(sema *s, ast_node *node)
|
||||
{
|
||||
if (!node) {
|
||||
return shget(type_reg, "void");
|
||||
}
|
||||
|
||||
type *t = NULL;
|
||||
prototype *prot = NULL;
|
||||
switch (node->type) {
|
||||
case NODE_IDENTIFIER:
|
||||
t = get_identifier_type(s, node);
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_INTEGER:
|
||||
node->expr_type = const_int;
|
||||
return const_int;
|
||||
case NODE_FLOAT:
|
||||
node->expr_type = const_float;
|
||||
return const_float;
|
||||
case NODE_STRING:
|
||||
t = get_string_type(s, node);
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_CHAR:
|
||||
t = shget(type_reg, "u8");
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_BOOL:
|
||||
t = shget(type_reg, "bool");
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_CAST:
|
||||
t = get_type(s, node->expr.cast.type);
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_POSTFIX:
|
||||
case NODE_UNARY:
|
||||
t = get_expression_type(s, node->expr.unary.right);
|
||||
if (node->expr.unary.operator == UOP_REF) {
|
||||
ast_node *target = node->expr.unary.right;
|
||||
while (target->type == NODE_ACCESS) {
|
||||
target = target->expr.access.expr;
|
||||
}
|
||||
|
||||
if (target->type != NODE_IDENTIFIER) {
|
||||
error(node, "expected identifier.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *name = target->expr.string.start;
|
||||
ast_node *def = get_def(s, name);
|
||||
|
||||
if (def) {
|
||||
def->address_taken = true;
|
||||
target->address_taken = true;
|
||||
}
|
||||
|
||||
type *tmp = t;
|
||||
t = arena_alloc(s->allocator, sizeof(type));
|
||||
t->tag = TYPE_PTR;
|
||||
t->size = sizeof(usize);
|
||||
t->alignment = sizeof(usize);
|
||||
t->name = "ptr";
|
||||
t->data.ptr.is_const = false;
|
||||
t->data.ptr.is_volatile = false;
|
||||
t->data.ptr.child = tmp;
|
||||
} else if (node->expr.unary.operator == UOP_DEREF) {
|
||||
if (t->tag != TYPE_PTR) {
|
||||
error(node, "only pointers can be dereferenced.");
|
||||
return NULL;
|
||||
}
|
||||
t = t->data.ptr.child;
|
||||
}
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_BINARY:
|
||||
t = get_expression_type(s, node->expr.binary.left);
|
||||
if (!t) return NULL;
|
||||
if (node->expr.binary.operator == OP_ASSIGN_PTR) {
|
||||
if (t->tag != TYPE_PTR) {
|
||||
error(node, "expected pointer.");
|
||||
return NULL;
|
||||
}
|
||||
t = t->data.ptr.child;
|
||||
}
|
||||
if (!can_cast(get_expression_type(s, node->expr.binary.right), t) && !match(t, get_expression_type(s, node->expr.binary.right))) {
|
||||
error(node, "type mismatch.");
|
||||
node->expr_type = NULL;
|
||||
return NULL;
|
||||
}
|
||||
if (node->expr.binary.operator >= OP_EQ) {
|
||||
t = shget(type_reg, "bool");
|
||||
} else if (node->expr.binary.operator >= OP_ASSIGN && node->expr.binary.operator <= OP_MOD_EQ) {
|
||||
t = shget(type_reg, "void");
|
||||
}
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_RANGE:
|
||||
t = get_range_type(s, node);
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_ARRAY_SUBSCRIPT:
|
||||
t = get_expression_type(s, node->expr.subscript.expr);
|
||||
switch (t->tag) {
|
||||
case TYPE_SLICE:
|
||||
t = t->data.slice.child;
|
||||
break;
|
||||
case TYPE_PTR:
|
||||
t = t->data.ptr.child;
|
||||
break;
|
||||
default:
|
||||
error(node, "only pointers and slices can be indexed.");
|
||||
return NULL;
|
||||
}
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_CALL:
|
||||
prot = shget(prototypes, intern_string(s, node->expr.call.name, node->expr.call.name_len));
|
||||
if (!prot) {
|
||||
error(node, "unknown function.");
|
||||
return NULL;
|
||||
}
|
||||
t = prot->type;
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_ACCESS:
|
||||
t = get_access_type(s, node);
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
default:
|
||||
t = shget(type_reg, "void");
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
||||
static bool match(type *t1, type *t2)
|
||||
{
|
||||
if (!t1 || !t2) return false;
|
||||
if (t1->tag != t2->tag) return false;
|
||||
|
||||
switch(t1->tag) {
|
||||
case TYPE_VOID:
|
||||
case TYPE_BOOL:
|
||||
return true;
|
||||
case TYPE_PTR:
|
||||
return (t1->data.ptr.is_const == t2->data.ptr.is_const) && (t1->data.ptr.is_volatile == t2->data.ptr.is_volatile) && match(t1->data.ptr.child, t2->data.ptr.child);
|
||||
case TYPE_SLICE:
|
||||
return (t1->data.slice.is_const == t2->data.slice.is_const) && (t1->data.slice.is_volatile == t2->data.slice.is_volatile) && match(t1->data.slice.child, t2->data.slice.child) && t1->data.slice.len == t2->data.slice.len;
|
||||
case TYPE_STRUCT:
|
||||
case TYPE_UNION:
|
||||
return t1 == t2;
|
||||
case TYPE_INTEGER:
|
||||
case TYPE_UINTEGER:
|
||||
return t1->data.integer == t2->data.integer;
|
||||
case TYPE_FLOAT:
|
||||
return t1->data.flt == t2->data.flt;
|
||||
case TYPE_ENUM:
|
||||
case TYPE_GENERIC:
|
||||
/* TODO */
|
||||
return false;
|
||||
case TYPE_INTEGER_CONST:
|
||||
case TYPE_FLOAT_CONST:
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void check_statement(sema *s, ast_node *node);
|
||||
static void check_body(sema *s, ast_node *node)
|
||||
{
|
||||
push_scope(s);
|
||||
|
||||
ast_node *current = node;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
check_statement(s, current->expr.unit_node.expr);
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
|
||||
pop_scope(s);
|
||||
}
|
||||
|
||||
static void check_for(sema *s, ast_node *node)
|
||||
{
|
||||
ast_node *slices = node->expr.fr.slices;
|
||||
ast_node *captures = node->expr.fr.captures;
|
||||
|
||||
push_scope(s);
|
||||
|
||||
ast_node *current_capture = captures;
|
||||
ast_node *current_slice = slices;
|
||||
|
||||
while (current_capture) {
|
||||
type *c_type = get_expression_type(s, current_slice->expr.unit_node.expr);
|
||||
char *c_name = intern_string(s, current_capture->expr.unit_node.expr->expr.string.start, current_capture->expr.unit_node.expr->expr.string.len);
|
||||
|
||||
ast_node *cap_node = arena_alloc(s->allocator, sizeof(ast_node));
|
||||
cap_node->type = NODE_VAR_DECL;
|
||||
cap_node->expr_type = c_type;
|
||||
cap_node->address_taken = false;
|
||||
cap_node->expr.var_decl.name = c_name;
|
||||
|
||||
shput(current_scope->defs, c_name, cap_node);
|
||||
current_capture = current_capture->expr.unit_node.next;
|
||||
current_slice = current_slice->expr.unit_node.next;
|
||||
}
|
||||
|
||||
ast_node *current = node->expr.fr.body;
|
||||
|
||||
in_loop = true;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
check_statement(s, current->expr.unit_node.expr);
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
in_loop = false;
|
||||
|
||||
pop_scope(s);
|
||||
}
|
||||
|
||||
static void check_statement(sema *s, ast_node *node)
|
||||
{
|
||||
if (!node) return;
|
||||
|
||||
type *t = NULL;
|
||||
char *name = NULL;
|
||||
switch(node->type) {
|
||||
case NODE_RETURN:
|
||||
if (!can_cast(get_expression_type(s, node->expr.ret.value), current_return) && !match(get_expression_type(s, node->expr.ret.value), current_return)) {
|
||||
error(node, "return type doesn't match function's one.");
|
||||
}
|
||||
break;
|
||||
case NODE_BREAK:
|
||||
if (!in_loop) {
|
||||
error(node, "`break` isn't in a loop.");
|
||||
}
|
||||
break;
|
||||
case NODE_WHILE:
|
||||
if (!match(get_expression_type(s, node->expr.whle.condition), shget(type_reg, "bool"))) {
|
||||
error(node, "expected boolean value.");
|
||||
return;
|
||||
}
|
||||
|
||||
in_loop = true;
|
||||
check_body(s, node->expr.whle.body);
|
||||
in_loop = false;
|
||||
break;
|
||||
case NODE_IF:
|
||||
if (!match(get_expression_type(s, node->expr.if_stmt.condition), shget(type_reg, "bool"))) {
|
||||
error(node, "expected boolean value.");
|
||||
return;
|
||||
}
|
||||
|
||||
check_body(s, node->expr.if_stmt.body);
|
||||
if (node->expr.if_stmt.otherwise) check_body(s, node->expr.if_stmt.otherwise);
|
||||
break;
|
||||
case NODE_FOR:
|
||||
check_for(s, node);
|
||||
break;
|
||||
case NODE_VAR_DECL:
|
||||
t = get_type(s, node->expr.var_decl.type);
|
||||
node->expr_type = t;
|
||||
name = intern_string(s, node->expr.var_decl.name, node->expr.var_decl.name_len);
|
||||
node->expr.var_decl.name = name;
|
||||
if (get_def(s, name)) {
|
||||
error(node, "redeclaration of variable.");
|
||||
break;
|
||||
}
|
||||
if (!can_cast(get_expression_type(s, node->expr.var_decl.value), t) && !match(t, get_expression_type(s, node->expr.var_decl.value))) {
|
||||
error(node, "type mismatch.");
|
||||
}
|
||||
shput(current_scope->defs, name, node);
|
||||
break;
|
||||
default:
|
||||
get_expression_type(s, node);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void check_function(sema *s, ast_node *f)
|
||||
{
|
||||
push_scope(s);
|
||||
current_return = get_type(s, f->expr.function.type);
|
||||
|
||||
member *param = f->expr.function.parameters;
|
||||
while (param) {
|
||||
type *p_type = get_type(s, param->type);
|
||||
char *t_name = intern_string(s, param->name, param->name_len);
|
||||
param->name = t_name;
|
||||
ast_node *param_node = arena_alloc(s->allocator, sizeof(ast_node));
|
||||
param_node->type = NODE_VAR_DECL;
|
||||
param_node->expr_type = p_type;
|
||||
param_node->address_taken = false;
|
||||
param_node->expr.var_decl.name = t_name;
|
||||
|
||||
shput(current_scope->defs, t_name, param_node);
|
||||
param = param->next;
|
||||
}
|
||||
|
||||
ast_node *current = f->expr.function.body;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
check_statement(s, current->expr.unit_node.expr);
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
|
||||
pop_scope(s);
|
||||
}
|
||||
|
||||
static void analyze_unit(sema *s, ast_node *node)
|
||||
{
|
||||
ast_node *current = node;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
if (current->expr.unit_node.expr)
|
||||
order_type(s, current->expr.unit_node.expr);
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
|
||||
create_types(s);
|
||||
|
||||
current = node;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) {
|
||||
create_prototype(s, current->expr.unit_node.expr);
|
||||
}
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
|
||||
current = node;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) {
|
||||
check_function(s, current->expr.unit_node.expr);
|
||||
} else {
|
||||
check_statement(s, current->expr.unit_node.expr);
|
||||
}
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
}
|
||||
|
||||
void sema_init(parser *p, arena *a)
|
||||
{
|
||||
sema *s = arena_alloc(a, sizeof(sema));
|
||||
s->allocator = a;
|
||||
types = NULL;
|
||||
s->ast = p->ast;
|
||||
|
||||
global_scope = arena_alloc(a, sizeof(scope));
|
||||
global_scope->parent = NULL;
|
||||
global_scope->defs = NULL;
|
||||
current_scope = global_scope;
|
||||
|
||||
register_type(s, "void", create_integer(s, "void", 0, false));
|
||||
register_type(s, "bool", create_integer(s, "bool", 8, false));
|
||||
register_type(s, "u8", create_integer(s, "u8", 8, false));
|
||||
register_type(s, "u16", create_integer(s, "u16", 16, false));
|
||||
register_type(s, "u32", create_integer(s, "u32", 32, false));
|
||||
register_type(s, "u64", create_integer(s, "u64", 64, false));
|
||||
register_type(s, "i8", create_integer(s, "i8", 8, true));
|
||||
register_type(s, "i16", create_integer(s, "i16", 16, true));
|
||||
register_type(s, "i32", create_integer(s, "i32", 32, true));
|
||||
register_type(s, "i64", create_integer(s, "i64", 64, true));
|
||||
register_type(s, "f32", create_float(s, "f32", 32));
|
||||
register_type(s, "f64", create_float(s, "f64", 64));
|
||||
|
||||
const_int = arena_alloc(s->allocator, sizeof(type));
|
||||
const_int->name = "const_int";
|
||||
const_int->tag = TYPE_INTEGER_CONST;
|
||||
const_int->data.integer = 0;
|
||||
|
||||
const_float = arena_alloc(s->allocator, sizeof(type));
|
||||
const_float->name = "const_float";
|
||||
const_float->tag = TYPE_FLOAT_CONST;
|
||||
const_float->data.flt = 0;
|
||||
|
||||
analyze_unit(s, s->ast);
|
||||
}
|
||||
76
sema.h
76
sema.h
|
|
@ -1,76 +0,0 @@
|
|||
#ifndef SEMA_H
|
||||
#define SEMA_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "parser.h"
|
||||
#include "stb_ds.h"
|
||||
#include "utils.h"
|
||||
|
||||
typedef enum {
|
||||
TYPE_VOID,
|
||||
TYPE_BOOL,
|
||||
TYPE_PTR,
|
||||
TYPE_SLICE,
|
||||
TYPE_FLOAT,
|
||||
TYPE_FLOAT_CONST,
|
||||
TYPE_INTEGER,
|
||||
TYPE_INTEGER_CONST,
|
||||
TYPE_UINTEGER,
|
||||
TYPE_STRUCT,
|
||||
TYPE_UNION,
|
||||
TYPE_ENUM, /* TODO */
|
||||
TYPE_GENERIC, /* TODO */
|
||||
} type_tag;
|
||||
|
||||
typedef struct _type {
|
||||
type_tag tag;
|
||||
usize size;
|
||||
usize alignment;
|
||||
char *name;
|
||||
union {
|
||||
u8 integer;
|
||||
u8 flt; // float
|
||||
struct {
|
||||
bool is_const;
|
||||
bool is_volatile;
|
||||
struct _type *child;
|
||||
} ptr;
|
||||
struct {
|
||||
usize len;
|
||||
bool is_const;
|
||||
bool is_volatile;
|
||||
struct _type *child;
|
||||
} slice;
|
||||
struct {
|
||||
char *name;
|
||||
usize name_len;
|
||||
member *members;
|
||||
struct { char *key; struct _type *value; } *member_types;
|
||||
} structure;
|
||||
struct {
|
||||
char *name;
|
||||
usize name_len;
|
||||
variant *variants;
|
||||
} enm; /* TODO */
|
||||
} data;
|
||||
} type;
|
||||
|
||||
typedef struct {
|
||||
char *name;
|
||||
type *type;
|
||||
type **parameters;
|
||||
} prototype;
|
||||
|
||||
typedef struct _scope {
|
||||
struct _scope *parent;
|
||||
struct { char *key; ast_node *value; } *defs;
|
||||
} scope;
|
||||
|
||||
typedef struct {
|
||||
arena *allocator;
|
||||
ast_node *ast;
|
||||
} sema;
|
||||
|
||||
void sema_init(parser *p, arena *a);
|
||||
|
||||
#endif
|
||||
12
test.l
12
test.l
|
|
@ -1,12 +0,0 @@
|
|||
u32 main(u32 b)
|
||||
{
|
||||
u32 a = 4;
|
||||
//return a;
|
||||
if (b == 3) {
|
||||
return 3;
|
||||
} else {
|
||||
return 4;
|
||||
}
|
||||
|
||||
return a;
|
||||
}
|
||||
2
todo.cfg
2
todo.cfg
|
|
@ -1,2 +0,0 @@
|
|||
export TODO_DIR="."
|
||||
export TODO_FILE="$TODO_DIR/todo.txt"
|
||||
1
todo.txt
1
todo.txt
|
|
@ -1 +0,0 @@
|
|||
implement dominator tree for control flow
|
||||
152
utils.c
152
utils.c
|
|
@ -1,152 +0,0 @@
|
|||
#include "utils.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
i64 parse_int(char *s, usize len)
|
||||
{
|
||||
bool negative = false;
|
||||
if (*s == '-') {
|
||||
s += 1;
|
||||
len -= 1;
|
||||
negative = true;
|
||||
}
|
||||
|
||||
u64 int_part = 0;
|
||||
for (usize i=0; i < len; i++) {
|
||||
int_part = (int_part * 10) + (s[i] - '0');
|
||||
}
|
||||
|
||||
if (negative) {
|
||||
int_part *= -1;
|
||||
}
|
||||
|
||||
return int_part;
|
||||
}
|
||||
|
||||
f64 parse_float(char *s, usize len)
|
||||
{
|
||||
bool negative = false;
|
||||
if (*s == '-') {
|
||||
s += 1;
|
||||
len -= 1;
|
||||
negative = true;
|
||||
}
|
||||
|
||||
usize point_pos = 0;
|
||||
for (usize i=0; i < len; i++) {
|
||||
if (s[i] == '.') {
|
||||
point_pos = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
i64 int_part = parse_int(s, point_pos);
|
||||
i64 dec_part = parse_int(s+point_pos+1, len-point_pos-1);
|
||||
for (usize i=0; i < len-point_pos-1; i++) {
|
||||
int_part *= 10;
|
||||
}
|
||||
|
||||
int_part += dec_part;
|
||||
|
||||
f64 f = (f64) int_part;
|
||||
|
||||
point_pos += 1;
|
||||
|
||||
for (usize i=0; i < len - point_pos; i++) {
|
||||
f /= 10.0;
|
||||
}
|
||||
|
||||
if (negative) {
|
||||
f *= -1;
|
||||
}
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
|
||||
void trie_insert(trie_node *root, arena *a, char *key, uint16_t value)
|
||||
{
|
||||
trie_node *node = root;
|
||||
while (*key) {
|
||||
if (!node->children[(usize)*key]) {
|
||||
node->children[(usize)*key] = arena_alloc(a, sizeof(trie_node));
|
||||
memset(node->children[(usize)*key], 0x0, sizeof(trie_node));
|
||||
}
|
||||
node = node->children[(usize)*key];
|
||||
|
||||
key++;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
}
|
||||
|
||||
uint16_t trie_get(trie_node *root, char *key, usize len)
|
||||
{
|
||||
trie_node *node = root;
|
||||
for (usize i=0; i < len; i++) {
|
||||
if (!node->children[(usize)(key[i])]) {
|
||||
return 0;
|
||||
}
|
||||
node = node->children[(usize)(key[i])];
|
||||
}
|
||||
|
||||
return node->value;
|
||||
}
|
||||
|
||||
#ifndef DEFAULT_ALIGNMENT
|
||||
#define DEFAULT_ALIGNMENT (2 * sizeof(void *))
|
||||
#endif
|
||||
|
||||
static usize align_forward(usize ptr, usize align) {
|
||||
uintptr_t p = ptr;
|
||||
uintptr_t a = (uintptr_t)align;
|
||||
uintptr_t modulo = p & (a - 1);
|
||||
|
||||
if (modulo != 0) {
|
||||
p += a - modulo;
|
||||
}
|
||||
return (usize)p;
|
||||
}
|
||||
|
||||
arena arena_init(usize size)
|
||||
{
|
||||
void *memory = malloc(size);
|
||||
memset(memory, 0x0, size);
|
||||
return (arena){
|
||||
.capacity = size,
|
||||
.position = 0,
|
||||
.memory = memory,
|
||||
};
|
||||
}
|
||||
|
||||
void *arena_alloc(arena *a, usize size) {
|
||||
uintptr_t current_addr = (uintptr_t)a->memory + a->position;
|
||||
uintptr_t padding = align_forward(current_addr, DEFAULT_ALIGNMENT) - current_addr;
|
||||
if (a->position + padding + size > a->capacity) return NULL;
|
||||
void *ret = (unsigned char *)a->memory + a->position + padding;
|
||||
a->position += (size + padding);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
snapshot arena_snapshot(arena *a)
|
||||
{
|
||||
return a->position;
|
||||
}
|
||||
|
||||
void arena_reset_to_snapshot(arena *a, snapshot s)
|
||||
{
|
||||
a->position = s;
|
||||
}
|
||||
|
||||
void arena_reset(arena *a)
|
||||
{
|
||||
arena_reset_to_snapshot(a, 0);
|
||||
}
|
||||
|
||||
void arena_deinit(arena a)
|
||||
{
|
||||
free(a.memory);
|
||||
}
|
||||
64
utils.h
64
utils.h
|
|
@ -1,64 +0,0 @@
|
|||
#ifndef UTILS_H
|
||||
#define UTILS_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
typedef uint32_t u32;
|
||||
typedef uint64_t u64;
|
||||
|
||||
typedef int8_t i8;
|
||||
typedef int16_t i16;
|
||||
typedef int32_t i32;
|
||||
typedef int64_t i64;
|
||||
|
||||
typedef size_t usize;
|
||||
|
||||
typedef float f32;
|
||||
typedef double f64;
|
||||
|
||||
i64 parse_int(char *s, usize len);
|
||||
f64 parse_float(char *s, usize len);
|
||||
|
||||
typedef struct {
|
||||
usize capacity;
|
||||
usize position;
|
||||
void* memory;
|
||||
} arena;
|
||||
|
||||
typedef usize snapshot;
|
||||
|
||||
/*
|
||||
* NOTE(ernesto): faulty initialization is signalided by the arena.memory
|
||||
* being null. It is the responsability of the caller to check for fulty
|
||||
* initialization.
|
||||
*/
|
||||
arena arena_init(usize size);
|
||||
/*
|
||||
* Returns null on unsuccessfull allocation.
|
||||
* In this implemention an allocation is only unsuccessfull if the arena
|
||||
* does not have enough memory to allocate the requested space
|
||||
*/
|
||||
void *arena_alloc(arena *a, usize size);
|
||||
snapshot arena_snapshot(arena *a);
|
||||
void arena_reset_to_snapshot(arena *a, snapshot s);
|
||||
void arena_reset(arena *a);
|
||||
/* This call should never fail, also, do we even care if it does? */
|
||||
void arena_deinit(arena a);
|
||||
|
||||
typedef struct _trie_node {
|
||||
uint16_t value;
|
||||
struct _trie_node *children[256];
|
||||
} trie_node;
|
||||
|
||||
void trie_insert(trie_node *root, arena *a, char *key, uint16_t value);
|
||||
uint16_t trie_get(trie_node *root, char *key, usize len);
|
||||
|
||||
typedef struct {
|
||||
usize row, column;
|
||||
} source_pos;
|
||||
|
||||
#endif
|
||||
Loading…
Add table
Add a link
Reference in a new issue