diff --git a/Makefile b/Makefile deleted file mode 100644 index 203633a..0000000 --- a/Makefile +++ /dev/null @@ -1,59 +0,0 @@ -# cc - C compiler -# See LICENSE file for copyright and license details. - -include config.mk - -SRC = lc.c utils.c lexer.c parser.c sema.c ir.c -HDR = config.def.h utils.h lexer.h parser.h sema.h ir.h -OBJ = ${SRC:.c=.o} - -all: options lc - -options: - @echo lc build options: - @echo "CFLAGS = ${CFLAGS}" - @echo "LDFLAGS = ${LDFLAGS}" - @echo "CC = ${CC}" - -.c.o: - ${CC} -c ${CFLAGS} $< - -${OBJ}: config.h config.mk - -config.h: - cp config.def.h $@ - -users.h: - cp users.def.h $@ - -lc: ${OBJ} - ${CC} -o $@ ${OBJ} ${LDFLAGS} - -clean: - rm -f lc ${OBJ} lc-${VERSION}.tar.gz - -dist: clean - mkdir -p lc-${VERSION} - cp -R LICENSE Makefile README config.mk\ - lc.1 ${HDR} ${SRC} lc-${VERSION} - tar -cf lc-${VERSION}.tar lc-${VERSION} - gzip lc-${VERSION}.tar - rm -rf lc-${VERSION} - -install: all - mkdir -p ${DESTDIR}${PREFIX}/bin - cp -f lc ${DESTDIR}${PREFIX}/bin - chmod 755 ${DESTDIR}${PREFIX}/bin/lc - mkdir -p ${DESTDIR}${MANPREFIX}/man1 - sed "s/VERSION/${VERSION}/g" < lc.1 > ${DESTDIR}${MANPREFIX}/man1/lc.1 - chmod 644 ${DESTDIR}${MANPREFIX}/man1/lc.1 - -uninstall: - rm -f ${DESTDIR}${PREFIX}/bin/lc\ - ${DESTDIR}${MANPREFIX}/man1/lc.1 -graph: clean all - ./lc > graph.dot - dot -Tpdf graph.dot > graph.pdf - zathura ./graph.pdf - -.PHONY: all options clean dist install uninstall diff --git a/README b/README deleted file mode 100644 index 6c8fea1..0000000 --- a/README +++ /dev/null @@ -1,24 +0,0 @@ -lc - L compiler -============================ -lc is a L compiler. It can compile L code. - - -Requirements ------------- -In order to build lc you need... a computer - - -Installation ------------- -Edit config.mk to match your local setup (lc is installed into -the /usr/local namespace by default). - -Afterwards enter the following command to build and install lc (if -necessary as root): - - make clean install - - -Usage ------------ -lc file diff --git a/config.def.h b/config.def.h deleted file mode 100644 index 184290d..0000000 --- a/config.def.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef CONFIG_H -#define CONFIG_H - -#endif diff --git a/config.h b/config.h deleted file mode 100644 index 184290d..0000000 --- a/config.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef CONFIG_H -#define CONFIG_H - -#endif diff --git a/config.mk b/config.mk deleted file mode 100644 index d6cbc51..0000000 --- a/config.mk +++ /dev/null @@ -1,27 +0,0 @@ -# cc version -VERSION = 0.1 - -# Customize below to fit your system - -# paths -PREFIX = /usr -MANPREFIX = ${PREFIX}/share/man - -# OpenBSD (uncomment) -#MANPREFIX = ${PREFIX}/man - -# includes and libs -INCS = -I. -LIBS = -# flags -CPPFLAGS = -DVERSION=\"${VERSION}\" -CFLAGS := -std=c23 -pedantic -Wall -O0 ${INCS} ${CPPFLAGS} -CFLAGS := ${CFLAGS} -g -LDFLAGS = ${LIBS} - -# Solaris -#CFLAGS = -fast ${INCS} -DVERSION=\"${VERSION}\" -#LDFLAGS = ${LIBS} - -# compiler and linker -CC = cc diff --git a/done.txt b/done.txt deleted file mode 100644 index e69de29..0000000 diff --git a/examples/for.l b/examples/for.l deleted file mode 100644 index 24181ba..0000000 --- a/examples/for.l +++ /dev/null @@ -1,16 +0,0 @@ -import std; - -i32 main() -{ - u32 x = 4; - loop { - u32 b = 3; - } - x == 3; - - loop (0.., test) |k, i| { - - } - u32 b = 3; - -} diff --git a/examples/hello_world.l b/examples/hello_world.l deleted file mode 100644 index 68b4c08..0000000 --- a/examples/hello_world.l +++ /dev/null @@ -1,10 +0,0 @@ -struct b { - i32 a, - u32 b, - u32 c, -} - -u32 test() -{ - f32 a = 5.0; -} diff --git a/ir.c b/ir.c deleted file mode 100644 index 61da14a..0000000 --- a/ir.c +++ /dev/null @@ -1,812 +0,0 @@ -#include "ir.h" -#include -#include -#include "stb_ds.h" -#include "sema.h" - -struct { ir_node key; ir_node *value; } *global_hash = NULL; -static ir_node *graph; -static ir_node *current_memory; -static ir_node *current_control; -static usize current_stack = 0; - -static ir_node *current_scope = NULL; - -static ir_node *build_expression(ast_node *node); - -static struct { - ir_node **return_controls; - ir_node **return_memories; - ir_node **return_values; -} current_func = {0}; - -static void node_name(ir_node *node) -{ - if (!node) { - printf("null [label=\"NULL\", style=filled, fillcolor=red]\n"); - return; - } - printf("%ld ", node->id); - switch (node->code) { - case OC_START: - printf("[label=\"%s\", style=filled, color=orange]\n", node->data.start_name); - break; - case OC_RETURN: - printf("[label=\"return\", style=filled, color=orange]\n"); - break; - case OC_ADD: - printf("[label=\"+\"]\n"); - break; - case OC_NEG: - case OC_SUB: - printf("[label=\"-\"]\n"); - break; - case OC_DIV: - printf("[label=\"/\"]\n"); - break; - case OC_MUL: - printf("[label=\"*\"]\n"); - break; - case OC_MOD: - printf("[label=\"%%\"]\n"); - break; - case OC_BAND: - printf("[label=\"&\"]\n"); - break; - case OC_BOR: - printf("[label=\"|\"]\n"); - break; - case OC_BXOR: - printf("[label=\"^\"]\n"); - break; - case OC_EQ: - printf("[label=\"==\"]\n"); - break; - case OC_CONST_INT: - printf("[label=\"%ld\"]\n", node->data.const_int); - break; - case OC_CONST_FLOAT: - printf("[label=\"%f\"]\n", node->data.const_float); - break; - case OC_FRAME_PTR: - printf("[label=\"frame_ptr\"]\n"); - break; - case OC_STORE: - printf("[label=\"store\", shape=box]\n"); - break; - case OC_LOAD: - printf("[label=\"load\", shape=box]\n"); - break; - case OC_ADDR: - printf("[label=\"addr\"]\n"); - break; - case OC_REGION: - printf("[label=\"region\", shape=diamond, style=filled, color=green]\n"); - break; - case OC_PHI: - printf("[label=\"phi\", shape=triangle]\n"); - break; - case OC_IF: - printf("[label=\"if\", shape=diamond, style=filled, color=lightblue]\n"); - break; - case OC_PROJ: - printf("[label=\"proj\", shape=diamond, style=filled, color=cyan]\n"); - break; - default: - printf("[label=\"%d\"]\n", node->code); - break; - } -} - -static void print_graph(ir_node *node) -{ - for (int i = 0; i < hmlen(global_hash); i++) { - ir_node *node = global_hash[i].value; - node_name(node); - - for (int j = 0; j < arrlen(node->out); j++) { - if (node->out[j]) { - node_name(node->out[j]); - printf("%ld->%ld\n", node->out[j]->id, node->id); - } - } - } -} - -static void push_scope(void) -{ - arrput(current_scope->data.symbol_tables, NULL); -} - -static struct symbol_def *get_def(char *name) -{ - for (int i = arrlen(current_scope->data.symbol_tables) - 1; i >= 0; i--) { - struct symbol_def *def = shget(current_scope->data.symbol_tables[i], name); - if (def) return def; - } - return NULL; -} - -static void set_def(char *name, ir_node *node, bool lvalue) -{ - for (int i = arrlen(current_scope->data.symbol_tables) - 1; i >= 0; i--) { - if (shget(current_scope->data.symbol_tables[i], name)) { - struct symbol_def *def = calloc(1, sizeof(struct symbol_def)); - def->is_lvalue = lvalue; - def->node = node; - shput(current_scope->data.symbol_tables[i], name, def); - return; - } - } - int index = arrlen(current_scope->data.symbol_tables) - 1; - struct symbol_def *def = calloc(1, sizeof(struct symbol_def)); - def->is_lvalue = lvalue; - def->node = node; - shput(current_scope->data.symbol_tables[index], name, def); -} - -static ir_node *copy_scope(ir_node *src) -{ - ir_node *dst = calloc(1, sizeof(ir_node)); - dst->code = OC_SCOPE; - - for (int i=0; i < arrlen(src->data.symbol_tables); i++) { - arrput(dst->data.symbol_tables, NULL); - symbol_table *src_table = src->data.symbol_tables[i]; - for (int j=0; j < shlen(src_table); j++) { - shput(dst->data.symbol_tables[i], src_table[j].key, src_table[j].value); - } - } - return dst; -} - -static void const_fold(ir_node *binary) -{ - ir_node *left = binary->out[0]; - ir_node *right = binary->out[1]; - - if (left->code == OC_CONST_INT && right->code == OC_CONST_INT) { - switch (binary->code) { - case OC_ADD: - binary->data.const_int = left->data.const_int + right->data.const_int; - break; - case OC_SUB: - binary->data.const_int = left->data.const_int - right->data.const_int; - break; - case OC_MUL: - binary->data.const_int = left->data.const_int * right->data.const_int; - break; - case OC_DIV: - if (right->data.const_int != 0) - binary->data.const_int = left->data.const_int / right->data.const_int; - break; - case OC_MOD: - if (right->data.const_int != 0) - binary->data.const_int = left->data.const_int % right->data.const_int; - break; - case OC_BOR: - binary->data.const_int = left->data.const_int | right->data.const_int; - break; - case OC_BAND: - binary->data.const_int = left->data.const_int & right->data.const_int; - break; - case OC_BXOR: - binary->data.const_int = left->data.const_int ^ right->data.const_int; - break; - case OC_EQ: - binary->data.const_int = left->data.const_int == right->data.const_int; - break; - default: - return; - } - binary->code = OC_CONST_INT; - arrfree(binary->out); binary->out = NULL; - arrfree(binary->in); binary->in = NULL; - binary->id = stbds_hash_bytes(binary, sizeof(ir_node), 0xcafebabe); - } - - if (left->code == OC_CONST_FLOAT && right->code == OC_CONST_FLOAT) { - switch (binary->code) { - case OC_ADD: - binary->data.const_float = left->data.const_float + right->data.const_float; - break; - case OC_SUB: - binary->data.const_float = left->data.const_float - right->data.const_float; - break; - case OC_MUL: - binary->data.const_float = left->data.const_float * right->data.const_float; - break; - case OC_DIV: - if (right->data.const_float != 0.0f) - binary->data.const_float = left->data.const_float / right->data.const_float; - break; - default: - return; - } - binary->code = OC_CONST_FLOAT; - arrfree(binary->out); binary->out = NULL; - arrfree(binary->in); binary->in = NULL; - binary->id = stbds_hash_bytes(binary, sizeof(ir_node), 0xcafebabe); - } -} - -static ir_node *build_address(usize base, usize offset) { - ir_node *addr = calloc(1, sizeof(ir_node)); - addr->code = OC_ADDR; - - ir_node *base_node = calloc(1, sizeof(ir_node)); - if (base == -1) { - base_node->code = OC_FRAME_PTR; - base_node->id = stbds_hash_bytes(base_node, sizeof(ir_node), 0xcafebabe); - } else { - base_node->code = OC_CONST_INT; - base_node->data.const_int = base; - base_node->id = stbds_hash_bytes(base_node, sizeof(ir_node), 0xcafebabe); - } - - ir_node *offset_node = calloc(1, sizeof(ir_node)); - offset_node->code = OC_CONST_INT; - offset_node->data.const_int = offset; - offset_node->id = stbds_hash_bytes(offset_node, sizeof(ir_node), 0xcafebabe); - - arrput(addr->out, base_node); - arrput(addr->out, offset_node); - - addr->id = stbds_hash_bytes(addr, sizeof(ir_node), 0xcafebabe); - ir_node *tmp = hmget(global_hash, *addr); - if (tmp) { - free(addr); - return tmp; - } - - return addr; -} - -static ir_node *build_assign_ptr(ast_node *binary) -{ - ir_node *val_node = build_expression(binary->expr.binary.right); - - char *var_name = binary->expr.binary.left->expr.string.start; - - ir_node *existing_def = get_def(var_name)->node; - - ir_node *store = calloc(1, sizeof(ir_node)); - store->code = OC_STORE; - - arrput(store->out, current_control); - - arrput(store->out, current_memory); - arrput(store->out, existing_def); - arrput(store->out, val_node); - - store->id = stbds_hash_bytes(store, sizeof(ir_node), 0xcafebabe); - hmput(global_hash, *store, store); - - current_memory = store; - - return val_node; -} - -static ir_node *build_assign(ast_node *binary) -{ - ir_node *val_node = build_expression(binary->expr.binary.right); - - char *var_name = binary->expr.binary.left->expr.string.start; - - struct symbol_def *def = get_def(var_name); - - if (def && def->is_lvalue) { - ir_node *existing_def = def->node; - ir_node *store = calloc(1, sizeof(ir_node)); - store->code = OC_STORE; - - arrput(store->out, current_control); - - arrput(store->out, current_memory); - arrput(store->out, existing_def); - arrput(store->out, val_node); - - store->id = stbds_hash_bytes(store, sizeof(ir_node), 0xcafebabe); - hmput(global_hash, *store, store); - - current_memory = store; - - return val_node; - } - - set_def(var_name, val_node, false); - return val_node; -} - -static ir_node *build_binary(ast_node *node) -{ - ir_node *n = calloc(1, sizeof(ir_node)); - switch (node->expr.binary.operator) { - case OP_ASSIGN: - free(n); - return build_assign(node); - case OP_ASSIGN_PTR: - free(n); - return build_assign_ptr(node); - case OP_PLUS: - n->code = OC_ADD; - break; - case OP_MINUS: - n->code = OC_SUB; - break; - case OP_DIV: - n->code = OC_DIV; - break; - case OP_MUL: - n->code = OC_MUL; - break; - case OP_MOD: - n->code = OC_MOD; - break; - case OP_BOR: - n->code = OC_BOR; - break; - case OP_BAND: - n->code = OC_BAND; - break; - case OP_BXOR: - n->code = OC_BXOR; - break; - case OP_EQ: - n->code = OC_EQ; - break; - default: - break; - } - arrput(n->out, build_expression(node->expr.binary.left)); - arrput(n->out, build_expression(node->expr.binary.right)); - n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe); - const_fold(n); - ir_node *tmp = hmget(global_hash, *n); - if (tmp) { - free(n); - return tmp; - } - - return n; -} - -static ir_node *build_load(ast_node *node) -{ - ir_node *n = calloc(1, sizeof(ir_node)); - n->code = OC_LOAD; - - arrput(n->out, current_memory); - arrput(n->out, build_expression(node)); - n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabebabecafe); - - ir_node *tmp = hmget(global_hash, *n); - if (tmp) { - free(n); - return tmp; - } - - return n; -} - -static ir_node *build_unary(ast_node *node) -{ - ir_node *n = calloc(1, sizeof(ir_node)); - switch (node->expr.unary.operator) { - case UOP_MINUS: - n->code = OC_NEG; - arrput(n->out, build_expression(node->expr.unary.right)); - break; - case UOP_REF: - free(n); - - if (node->expr.unary.right->type == NODE_IDENTIFIER) { - struct symbol_def *def = get_def(node->expr.unary.right->expr.string.start); - if (def) { - return def->node; - } - } - - return build_expression(node->expr.unary.right); - case UOP_DEREF: - free(n); - return build_load(node->expr.unary.right); - default: - break; - } - - if (n->out && n->out[0]->code == OC_CONST_INT) { - switch (n->code) { - case OC_NEG: - n->data.const_int = -(n->out[0]->data.const_int); - break; - default: - break; - } - n->code = OC_CONST_INT; - arrfree(n->out); n->out = NULL; - } else if (n->out && n->out[0]->code == OC_CONST_FLOAT) { - switch (n->code) { - case OC_NEG: - n->data.const_float = -(n->out[0]->data.const_float); - break; - default: - break; - } - n->code = OC_CONST_FLOAT; - arrfree(n->out); n->out = NULL; - } - - n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe); - ir_node *tmp = hmget(global_hash, *n); - if (tmp) { - free(n); - return tmp; - } - - return n; -} - -static ir_node *build_if(ast_node *node) -{ - ir_node *condition = build_expression(node->expr.if_stmt.condition); - - ir_node *if_node = calloc(1, sizeof(ir_node)); - if_node->code = OC_IF; - arrput(if_node->out, condition); - arrput(if_node->out, current_control); - if_node->id = stbds_hash_bytes(if_node, sizeof(ir_node), 0xcafebabe); - hmput(global_hash, *if_node, if_node); - - ir_node *proj_true = calloc(1, sizeof(ir_node)); - proj_true->code = OC_PROJ; - arrput(proj_true->out, if_node); - proj_true->id = stbds_hash_bytes(proj_true, sizeof(ir_node), 0xcafebabe); - hmput(global_hash, *proj_true, proj_true); - - ir_node *proj_false = calloc(1, sizeof(ir_node)); - proj_false->code = OC_PROJ; - arrput(proj_false->out, if_node); - proj_false->id = stbds_hash_bytes(proj_false, sizeof(ir_node), 0xcafebabe); - hmput(global_hash, *proj_false, proj_false); - - ir_node *base_scope = copy_scope(current_scope); - ir_node *base_mem = current_memory; - - current_control = proj_true; - - ast_node *current = node->expr.if_stmt.body; - while (current && current->type == NODE_UNIT) { - if (current->expr.unit_node.expr) { - build_expression(current->expr.unit_node.expr); - } - current = current->expr.unit_node.next; - } - ir_node *then_scope = current_scope; - ir_node *then_mem = current_memory; - ir_node *then_control = current_control; - - current_scope = copy_scope(base_scope); - current_memory = base_mem; - - current_control = proj_false; - current = node->expr.if_stmt.otherwise; - while (current && current->type == NODE_UNIT) { - if (current->expr.unit_node.expr) { - build_expression(current->expr.unit_node.expr); - } - current = current->expr.unit_node.next; - } - ir_node *else_scope = current_scope; - ir_node *else_mem = current_memory; - ir_node *else_control = current_control; - - ir_node *region = calloc(1, sizeof(ir_node)); - region->code = OC_REGION; - arrput(region->out, then_control); - arrput(region->out, else_control); - region->id = stbds_hash_bytes(region, sizeof(ir_node), 0xcafebabe); - hmput(global_hash, *region, region); - - if (then_mem->id != else_mem->id) { - ir_node *phi = calloc(1, sizeof(ir_node)); - phi->code = OC_PHI; - arrput(phi->out, region); - arrput(phi->out, then_mem); - arrput(phi->out, else_mem); - phi->id = stbds_hash_bytes(phi, sizeof(ir_node), 0xcafebabe); - - hmput(global_hash, *phi, phi); - - current_memory = phi; - } else { - current_memory = then_mem; - } - - current_scope = base_scope; - - for (int i = 0; i < arrlen(current_scope->data.symbol_tables); i++) { - symbol_table *base_table = current_scope->data.symbol_tables[i]; - for (int j = 0; j < shlen(base_table); j++) { - char *key = base_table[j].key; - - ir_node *found_then = NULL; - symbol_table *t_table = then_scope->data.symbol_tables[i]; - if (shget(t_table, key)->node) found_then = shget(t_table, key)->node; - else found_then = base_table[j].value->node; - - ir_node *found_else = NULL; - symbol_table *e_table = else_scope->data.symbol_tables[i]; - if (shget(e_table, key)->node) found_else = shget(e_table, key)->node; - else found_else = base_table[j].value->node; - - if (found_then->id != found_else->id) { - ir_node *phi = calloc(1, sizeof(ir_node)); - phi->code = OC_PHI; - arrput(phi->out, region); - arrput(phi->out, found_then); - arrput(phi->out, found_else); - phi->id = stbds_hash_bytes(phi, sizeof(ir_node), 0xcafebabe); - struct symbol_def *def = calloc(1, sizeof(struct symbol_def)); - def->node = phi; - def->is_lvalue = false; - shput(current_scope->data.symbol_tables[i], key, def); - hmput(global_hash, *phi, phi); - } else { - struct symbol_def *def = calloc(1, sizeof(struct symbol_def)); - def->node = found_then; - def->is_lvalue = false; - shput(current_scope->data.symbol_tables[i], key, def); - } - } - } - - current_control = region; - - return region; -} - -static void build_return(ast_node *node) -{ - ir_node *val = NULL; - - if (node->expr.ret.value) { - val = build_expression(node->expr.ret.value); - } else { - val = calloc(1, sizeof(ir_node)); - val->code = OC_VOID; - val->id = stbds_hash_bytes(val, sizeof(ir_node), 0xcafebabe); - } - - arrput(current_func.return_controls, current_control); - arrput(current_func.return_memories, current_memory); - arrput(current_func.return_values, val); - - current_control = NULL; -} - -static void finalize_function(void) -{ - int count = arrlen(current_func.return_controls); - - if (count == 0) { - return; - } - - ir_node *final_ctrl = NULL; - ir_node *final_mem = NULL; - ir_node *final_val = NULL; - - if (count == 1) { - final_ctrl = current_func.return_controls[0]; - final_mem = current_func.return_memories[0]; - final_val = current_func.return_values[0]; - } - else { - ir_node *region = calloc(1, sizeof(ir_node)); - region->code = OC_REGION; - for (int i=0; iout, current_func.return_controls[i]); - } - hmput(global_hash, *region, region); - final_ctrl = region; - - ir_node *mem_phi = calloc(1, sizeof(ir_node)); - mem_phi->code = OC_PHI; - arrput(mem_phi->out, region); - for (int i=0; iout, current_func.return_memories[i]); - } - hmput(global_hash, *mem_phi, mem_phi); - mem_phi->id = stbds_hash_bytes(mem_phi, sizeof(ir_node), 0xcafebabe); - final_mem = mem_phi; - - ir_node *val_phi = calloc(1, sizeof(ir_node)); - val_phi->code = OC_PHI; - //arrput(val_phi->out, region); - for (int i=0; iout, current_func.return_values[i]); - } - val_phi->id = stbds_hash_bytes(val_phi, sizeof(ir_node), 0xcafebabe); - hmput(global_hash, *val_phi, val_phi); - final_val = val_phi; - - region->id = stbds_hash_bytes(region, sizeof(ir_node), 0xcafebabe); - } - - ir_node *ret = calloc(1, sizeof(ir_node)); - ret->code = OC_RETURN; - arrput(ret->out, final_ctrl); - arrput(ret->out, final_mem); - arrput(ret->out, final_val); - ret->id = stbds_hash_bytes(ret, sizeof(ir_node), 0xcafebabe); - - hmput(global_hash, *ret, ret); -} - -static ir_node *build_function(ast_node *node) -{ - memset(¤t_func, 0x0, sizeof(current_func)); - ast_node *current = node->expr.function.body; - - ir_node *func = calloc(1, sizeof(ir_node)); - func->code = OC_START; - func->id = stbds_hash_bytes(func, sizeof(ir_node), 0xcafebabe); - func->data.start_name = node->expr.function.name; - - ir_node *start_ctrl = calloc(1, sizeof(ir_node)); - start_ctrl->code = OC_PROJ; - start_ctrl->id = stbds_hash_bytes(&start_ctrl, sizeof(usize), 0xcafebabe); - arrput(start_ctrl->out, func); - hmput(global_hash, *start_ctrl, start_ctrl); - - current_control = start_ctrl; - - ir_node *start_mem = calloc(1, sizeof(ir_node)); - start_mem->code = OC_PROJ; - start_mem->id = stbds_hash_bytes(&start_mem, sizeof(usize), 0xcafebabe); - arrput(start_mem->out, func); - hmput(global_hash, *start_mem, start_mem); - - current_memory = start_mem; - - current_scope = calloc(1, sizeof(ir_node)); - current_scope->code = OC_SCOPE; - - push_scope(); - - member *m = node->expr.function.parameters; - while (m) { - ir_node *proj_param = calloc(1, sizeof(ir_node)); - proj_param->code = OC_PROJ; - arrput(proj_param->out, func); - proj_param->id = stbds_hash_bytes(proj_param, sizeof(ir_node), 0xcafebabe); - set_def(m->name, proj_param, false); - hmput(global_hash, *proj_param, proj_param); - - m = m->next; - } - - while (current && current->type == NODE_UNIT) { - if (current->expr.unit_node.expr) { - build_expression(current->expr.unit_node.expr); - } - current = current->expr.unit_node.next; - } - - func->id = stbds_hash_bytes(func, sizeof(ir_node), 0xcafebabe); - - finalize_function(); - - return func; -} - -static ir_node *build_expression(ast_node *node) -{ - ir_node *n = NULL; - ir_node *tmp = NULL; - switch (node->type) { - case NODE_UNARY: - n = build_unary(node); - break; - case NODE_BINARY: - n = build_binary(node); - break; - case NODE_INTEGER: - n = calloc(1, sizeof(ir_node)); - n->code = OC_CONST_INT; - n->data.const_int = node->expr.integer; - n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe); - tmp = hmget(global_hash, *n); - if (tmp) { - free(n); - return tmp; - } - break; - case NODE_VAR_DECL: - n = calloc(1, sizeof(ir_node)); - if (node->address_taken) { - n->code = OC_STORE; - - arrput(n->out, current_memory); - arrput(n->out, build_address(-1, current_stack)); - arrput(n->out, build_expression(node->expr.var_decl.value)); - current_memory = n; - current_stack += node->expr_type->size; - n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe); - hmput(global_hash, *n, n); - n = n->out[1]; - set_def(node->expr.var_decl.name, n, true); - } else { - n = build_expression(node->expr.var_decl.value); - set_def(node->expr.var_decl.name, n, false); - } - - return n; - case NODE_IDENTIFIER: - struct symbol_def *def = get_def(node->expr.string.start); - n = def->node; - - if (n && def->is_lvalue) { - ir_node *addr_node = n; - - n = calloc(1, sizeof(ir_node)); - n->code = OC_LOAD; - - arrput(n->out, current_memory); - arrput(n->out, addr_node); - - n->id = stbds_hash_bytes(n, sizeof(ir_node), 0xcafebabe); - - ir_node *tmp = hmget(global_hash, *n); - if (tmp) { - free(n); - n = tmp; - } else { - hmput(global_hash, *n, n); - } - } - break; - case NODE_IF: - n = build_if(node); - break; - case NODE_RETURN: - build_return(node); - break; - default: - break; - } - - if (n) hmput(global_hash, *n, n); - return n; -} - -void ir_build(ast_node *ast) -{ - ast_node *current = ast; - - graph = calloc(1, sizeof(ir_node)); - graph->code = OC_START; - graph->id = stbds_hash_bytes(graph, sizeof(ir_node), 0xcafebabe); - graph->data.start_name = "program"; - - current_memory = calloc(1, sizeof(ir_node)); - current_memory->code = OC_FRAME_PTR; - current_memory->id = stbds_hash_bytes(current_memory, sizeof(ir_node), 0xcafebabe); - - current_scope = calloc(1, sizeof(ir_node)); - current_scope->code = OC_SCOPE; - push_scope(); - - while (current && current->type == NODE_UNIT) { - if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) { - ir_node *expr = build_function(current->expr.unit_node.expr); - arrput(graph->out, expr); - hmput(global_hash, *expr, expr); - } - current = current->expr.unit_node.next; - } - printf("digraph G {\n"); - print_graph(graph); - printf("}\n"); -} diff --git a/ir.h b/ir.h deleted file mode 100644 index bfd684f..0000000 --- a/ir.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef IR_H -#define IR_H - -#include "utils.h" -#include "parser.h" - -struct _ir_node; -struct symbol_def { - struct _ir_node *node; - bool is_lvalue; -}; - -typedef struct { char *key; struct symbol_def *value; } symbol_table; - -typedef enum { - OC_START, - OC_ADD, - OC_SUB, - OC_MUL, - OC_DIV, - OC_MOD, - OC_BAND, - OC_BOR, - OC_BXOR, - OC_NEG, - OC_EQ, - - OC_CONST_INT, - OC_CONST_FLOAT, - OC_VOID, - - OC_FRAME_PTR, - OC_ADDR, - - OC_STORE, - OC_LOAD, - - OC_REGION, - OC_PHI, - - OC_IF, - OC_PROJ, - - OC_STOP, - OC_RETURN, - - OC_SCOPE, -} opcode; - -typedef struct _ir_node { - opcode code; - usize id; - struct _ir_node **in; - struct _ir_node **out; - union { - i64 const_int; - f64 const_float; - symbol_table **symbol_tables; - char *start_name; - } data; -} ir_node; - -void ir_build(ast_node *ast); - -#endif diff --git a/lc.c b/lc.c deleted file mode 100644 index 0d0ddbb..0000000 --- a/lc.c +++ /dev/null @@ -1,241 +0,0 @@ -#include -#include -#include "utils.h" -#include "lexer.h" -#include "parser.h" -#include "sema.h" -#include "ir.h" - -void print_indent(int depth) { - for (int i = 0; i < depth; i++) printf(" "); -} - -const char* get_op_str(binary_op op) { - switch(op) { - case OP_PLUS: return "+"; - case OP_MINUS: return "-"; - case OP_DIV: return "/"; - case OP_MUL: return "*"; - case OP_EQ: return "=="; - case OP_ASSIGN: return "="; - case OP_ASSIGN_PTR: return "<-"; - case OP_AND: return "&&"; - case OP_OR: return "||"; - case OP_NEQ: return "!="; - case OP_GT: return ">"; - case OP_LT: return "<"; - case OP_GE: return ">="; - case OP_LE: return "<="; - case OP_BOR: return "|"; - case OP_BAND: return "&"; - case OP_BXOR: return "^"; - case OP_MOD: return "%"; - case OP_PLUS_EQ: return "+="; - case OP_MINUS_EQ: return "-="; - case OP_DIV_EQ: return "/="; - case OP_MUL_EQ: return "*="; - default: return "?"; - } -} - -const char *get_uop_str(unary_op op) { - switch (op) { - case UOP_INCR: return "++"; - case UOP_MINUS: return "-"; - case UOP_DECR: return "--"; - case UOP_DEREF: return "*"; - case UOP_REF: return "&"; - case UOP_NOT: return "!"; - default: return "?"; - } -} - -void print_ast(ast_node *node, int depth) { - if (!node) return; - - print_indent(depth); - - switch (node->type) { - case NODE_INTEGER: - printf("Integer: %lu\n", node->expr.integer); - break; - case NODE_FLOAT: - printf("Float: %f\n", node->expr.flt); - break; - case NODE_CHAR: - printf("Char: '%c'\n", node->expr.ch); - break; - case NODE_STRING: - printf("String: \"%.*s\"\n", (int)node->expr.string.len, node->expr.string.start); - break; - case NODE_IDENTIFIER: - printf("Identifier: %.*s\n", (int)node->expr.string.len, node->expr.string.start); - break; - case NODE_CAST: - printf("Cast:\n"); - print_ast(node->expr.cast.type, depth); - print_ast(node->expr.cast.value, depth + 1); - break; - case NODE_ACCESS: - printf("Access:\n"); - print_ast(node->expr.access.expr, depth + 1); - print_ast(node->expr.access.member, depth + 1); - break; - case NODE_LABEL: - printf("Label: %.*s\n", (int)node->expr.label.name_len, node->expr.label.name); - break; - case NODE_GOTO: - printf("Goto: %.*s\n", (int)node->expr.label.name_len, node->expr.label.name); - break; - case NODE_BINARY: - printf("BinaryOp (%s)\n", get_op_str(node->expr.binary.operator)); - print_ast(node->expr.binary.left, depth + 1); - print_ast(node->expr.binary.right, depth + 1); - break; - case NODE_ARRAY_SUBSCRIPT: - printf("Array subscript\n"); - print_ast(node->expr.subscript.expr, depth + 1); - print_ast(node->expr.subscript.index, depth + 1); - break; - case NODE_UNARY: - printf("UnaryOp (%s)\n", get_uop_str(node->expr.unary.operator)); - print_ast(node->expr.unary.right, depth + 1); - break; - case NODE_POSTFIX: - printf("Postfix (%s)\n", get_uop_str(node->expr.unary.operator)); - print_ast(node->expr.unary.right, depth + 1); - break; - case NODE_BREAK: - printf("Break\n"); - break; - case NODE_TERNARY: - printf("Ternary (? :)\n"); - print_indent(depth + 1); printf("Condition:\n"); - print_ast(node->expr.ternary.condition, depth + 2); - print_indent(depth + 1); printf("Then:\n"); - print_ast(node->expr.ternary.then, depth + 2); - print_indent(depth + 1); printf("Else:\n"); - print_ast(node->expr.ternary.otherwise, depth + 2); - break; - case NODE_UNIT: - printf("Unit\n"); - ast_node *current = node; - while (current && current->type == NODE_UNIT) { - print_ast(current->expr.unit_node.expr, depth + 1); - current = current->expr.unit_node.next; - } - break; - case NODE_CALL: - printf("Call: %.*s\n", (int)node->expr.call.name_len, node->expr.call.name); - current = node->expr.call.parameters; - while (current && current->type == NODE_UNIT) { - print_ast(current->expr.unit_node.expr, depth + 1); - current = current->expr.unit_node.next; - } - break; - case NODE_STRUCT_INIT: - printf("Struct init:\n"); - current = node->expr.struct_init.members; - while (current && current->type == NODE_UNIT) { - print_ast(current->expr.unit_node.expr, depth + 1); - current = current->expr.unit_node.next; - } - break; - case NODE_STRUCT: - printf("Struct: %.*s\n", (int)node->expr.structure.name_len, node->expr.structure.name); - member *m = node->expr.structure.members; - while (m) { - print_ast(m->type, depth + 1); - m = m->next; - } - break; - case NODE_UNION: - printf("Union: %.*s\n", (int)node->expr.structure.name_len, node->expr.structure.name); - m = node->expr.structure.members; - while (m) { - print_ast(m->type, depth + 1); - m = m->next; - } - break; - case NODE_ENUM: - printf("Enum: %.*s\n", (int)node->expr.enm.name_len, node->expr.enm.name); - variant *v = node->expr.enm.variants; - while (v) { - printf("\t%.*s\n", (int)v->name_len, v->name); - v = v->next; - } - break; - case NODE_IF: - printf("If:\n"); - print_ast(node->expr.whle.condition, depth + 1); - print_ast(node->expr.whle.body, depth + 1); - break; - case NODE_VAR_DECL: - printf("VarDecl: "); - print_ast(node->expr.var_decl.type, 0); - print_ast(node->expr.var_decl.value, depth + 1); - break; - case NODE_FUNCTION: - printf("Function: %.*s\n", (int)node->expr.function.name_len, node->expr.function.name); - m = node->expr.function.parameters; - while (m) { - print_ast(m->type, depth + 1); - m = m->next; - } - print_ast(node->expr.function.body, depth + 1); - break; - case NODE_RETURN: - printf("Return:\n"); - print_ast(node->expr.ret.value, depth + 1); - break; - case NODE_IMPORT: - printf("Import:\n"); - print_ast(node->expr.import.path, depth + 1); - break; - case NODE_WHILE: - printf("While:\n"); - print_ast(node->expr.whle.condition, depth + 1); - print_ast(node->expr.whle.body, depth + 1); - break; - case NODE_FOR: - printf("For:\n"); - print_ast(node->expr.fr.slices, depth + 1); - print_ast(node->expr.fr.captures, depth + 1); - print_indent(depth + 1); - print_ast(node->expr.fr.body, depth + 1); - break; - case NODE_RANGE: - printf("Range:\n"); - print_ast(node->expr.binary.left, depth + 1); - print_ast(node->expr.binary.right, depth + 1); - break; - default: - printf("Unknown Node Type: %d\n", node->type); - break; - } -} - -int main(void) -{ - FILE *fp = fopen("test.l", "r"); - usize size = 0; - fseek(fp, 0, SEEK_END); - size = ftell(fp); - fseek(fp, 0, SEEK_SET); - char *src = malloc(size+1); - fread(src, size, 1, fp); - fclose(fp); - src[size] = '\0'; - - arena a = arena_init(0x1000 * 0x1000 * 64); - lexer *l = lexer_init(src, size, &a); - parser *p = parser_init(l, &a); - //print_ast(p->ast, 0); - sema_init(p, &a); - - ir_build(p->ast); - - arena_deinit(a); - - return 0; -} diff --git a/lexer.c b/lexer.c deleted file mode 100644 index 22063fd..0000000 --- a/lexer.c +++ /dev/null @@ -1,422 +0,0 @@ -#include "lexer.h" -#include -#include -#include -#include - -trie_node *keywords; - -static void add_token(lexer *l, token_type type, usize len) -{ - token *t = arena_alloc(l->allocator, sizeof(token)); - t->type = type; - t->lexeme_len = len; - t->lexeme = l->source + l->index; - t->position.row = l->row; - t->position.column = l->column; - - if (!l->tokens) { - l->tokens = t; - l->tail = t; - } else { - l->tail->next = t; - l->tail = t; - } -} - -static void add_error(lexer *l, char *msg) -{ - token *t = arena_alloc(l->allocator, sizeof(token)); - t->type = TOKEN_ERROR; - t->lexeme_len = strlen(msg); - t->lexeme = msg; - t->position.row = l->row; - t->position.column = l->column; - - if (!l->tokens) { - l->tokens = t; - l->tail = t; - } else { - l->tail->next = t; - l->tail = t; - } -} - -static void parse_number(lexer *l) -{ - char c = l->source[l->index]; - /* Is the number a float? */ - bool f = false; - usize len = 0; - - while (isdigit(c)) { - /* If a dot is found, and the character after it is a digit, this is a float. */ - if (l->source[l->index+1] == '.' && isdigit(l->source[l->index+2])) { - f = true; - len += 3; - l->index += 3; - } else { - len += 1; - l->index += 1; - } - c = l->source[l->index]; - } - l->index -= len; - if (f) { - add_token(l, TOKEN_FLOAT, len); - } else { - add_token(l, TOKEN_INTEGER, len); - } - l->index += len; -} - -static void parse_identifier(lexer *l) -{ - char c = l->source[l->index]; - usize len = 0; - - while (isalnum(c) || c == '_') { - len += 1; - l->index += 1; - c = l->source[l->index]; - } - l->index -= len; - token_type keyword = trie_get(keywords, l->source + l->index, len); - if (keyword) { - add_token(l, keyword, len); - } else { - add_token(l, TOKEN_IDENTIFIER, len); - } - l->index += len; -} - -static void parse_string(lexer *l) -{ - char c = l->source[l->index]; - usize len = 0; - - while (c != '"') { - if (c == '\0' || c == '\n') { - l->index -= len; - add_error(l, "unclosed string literal."); - l->index += len; - return; - } - len += 1; - l->index += 1; - c = l->source[l->index]; - } - l->index -= len; - add_token(l, TOKEN_STRING, len); - l->index += len + 1; -} - -static bool parse_special(lexer *l) -{ - switch (l->source[l->index]) { - case '+': - if (l->source[l->index+1] == '=') { - add_token(l, TOKEN_PLUS_EQ, 2); - l->index += 2; - } else if (l->source[l->index+1] == '+') { - add_token(l, TOKEN_PLUS_PLUS, 2); - l->index += 2; - } else { - add_token(l, TOKEN_PLUS, 1); - l->index += 1; - } - return true; - case '-': - if (l->source[l->index+1] == '=') { - add_token(l, TOKEN_MINUS_EQ, 2); - l->index += 2; - } else if (l->source[l->index+1] == '-') { - add_token(l, TOKEN_MINUS_MINUS, 2); - l->index += 2; - } else { - add_token(l, TOKEN_MINUS, 1); - l->index += 1; - } - return true; - case '/': - if (l->source[l->index+1] == '=') { - add_token(l, TOKEN_SLASH_EQ, 2); - l->index += 2; - } else { - add_token(l, TOKEN_SLASH, 1); - l->index += 1; - } - return true; - case '*': - if (l->source[l->index+1] == '=') { - add_token(l, TOKEN_STAR_EQ, 2); - l->index += 2; - } else { - add_token(l, TOKEN_STAR, 1); - l->index += 1; - } - return true; - case '%': - if (l->source[l->index+1] == '=') { - add_token(l, TOKEN_PERC_EQ, 2); - l->index += 2; - } else { - add_token(l, TOKEN_PERC, 1); - l->index += 1; - } - return true; - case '&': - if (l->source[l->index+1] == '=') { - add_token(l, TOKEN_AND_EQ, 2); - l->index += 2; - } else if (l->source[l->index+1] == '&') { - add_token(l, TOKEN_DOUBLE_AND, 2); - l->index += 2; - } else { - add_token(l, TOKEN_AND, 1); - l->index += 1; - } - return true; - case '^': - if (l->source[l->index+1] == '=') { - add_token(l, TOKEN_HAT_EQ, 2); - l->index += 2; - } else { - add_token(l, TOKEN_HAT, 1); - l->index += 1; - } - return true; - case '|': - if (l->source[l->index+1] == '=') { - add_token(l, TOKEN_PIPE_EQ, 2); - l->index += 2; - } else if (l->source[l->index+1] == '|') { - add_token(l, TOKEN_OR, 2); - l->index += 2; - } else { - add_token(l, TOKEN_PIPE, 1); - l->index += 1; - } - return true; - case '=': - if (l->source[l->index+1] == '=') { - add_token(l, TOKEN_DOUBLE_EQ, 2); - l->index += 2; - } else { - add_token(l, TOKEN_EQ, 1); - l->index += 1; - } - return true; - case '>': - if (l->source[l->index+1] == '=') { - add_token(l, TOKEN_GREATER_EQ, 2); - l->index += 2; - } else if (l->source[l->index+1] == '>') { - if (l->source[l->index+2] == '=') { - add_token(l, TOKEN_RSHIFT_EQ, 3); - l->index += 3; - return true; - } - add_token(l, TOKEN_RSHIFT, 2); - l->index += 2; - } else { - add_token(l, TOKEN_GREATER_THAN, 1); - l->index += 1; - } - return true; - case '<': - if (l->source[l->index+1] == '=') { - add_token(l, TOKEN_LESS_EQ, 2); - l->index += 2; - } else if (l->source[l->index+1] == '-') { - add_token(l, TOKEN_ARROW, 2); - l->index += 2; - } else if (l->source[l->index+1] == '<') { - if (l->source[l->index+2] == '=') { - add_token(l, TOKEN_LSHIFT_EQ, 3); - l->index += 3; - return true; - } - add_token(l, TOKEN_LSHIFT, 2); - l->index += 2; - } else { - add_token(l, TOKEN_LESS_THAN, 1); - l->index += 1; - } - return true; - case '!': - if (l->source[l->index+1] == '=') { - add_token(l, TOKEN_NOT_EQ, 2); - l->index += 2; - } else { - add_token(l, TOKEN_BANG, 1); - l->index += 1; - } - return true; - case ':': - add_token(l, TOKEN_COLON, 1); - l->index += 1; - return true; - case ';': - add_token(l, TOKEN_SEMICOLON, 1); - l->index += 1; - return true; - case '.': - if (l->source[l->index+1] == '.') { - add_token(l, TOKEN_DOUBLE_DOT, 2); - l->index += 2; - } else { - add_token(l, TOKEN_DOT, 1); - l->index += 1; - } - return true; - case ',': - add_token(l, TOKEN_COMMA, 1); - l->index += 1; - return true; - case '(': - add_token(l, TOKEN_LPAREN, 1); - l->index += 1; - return true; - case ')': - add_token(l, TOKEN_RPAREN, 1); - l->index += 1; - return true; - case '[': - add_token(l, TOKEN_LSQUARE, 1); - l->index += 1; - return true; - case ']': - add_token(l, TOKEN_RSQUARE, 1); - l->index += 1; - return true; - case '{': - add_token(l, TOKEN_LCURLY, 1); - l->index += 1; - return true; - case '}': - add_token(l, TOKEN_RCURLY, 1); - l->index += 1; - return true; - case '\'': - if (l->source[l->index+1] == '\\') { - if (l->source[l->index+3] != '\'') { - add_error(l, "unclosed character literal."); - l->index += 1; - return true; - } - l->index += 1; - add_token(l, TOKEN_CHAR, 2); - l->index += 3; - return true; - } else { - if (l->source[l->index+2] != '\'') { - add_error(l, "unclosed character literal."); - l->index += 1; - return true; - } - l->index += 1; - add_token(l, TOKEN_CHAR, 1); - l->index += 2; - return true; - } - default: - return false; - } -} - -static void parse(lexer *l) -{ - char c; - - while (l->index <= l->size) { - c = l->source[l->index]; - l->column += 1; - - if (c == '\n') { - l->index += 1; - l->row += 1; - l->column = 0; - continue; - } - - usize head = l->index; - - if (c == '/' && l->source[l->index+1] == '/') { - while (l->source[l->index] != '\n') { - l->index += 1; - } - l->column += (l->index - head - 1); - } - - if (isspace(c)) { - l->index += 1; - continue; - } - - - if (parse_special(l)) { - l->column += (l->index - head - 1); - continue; - } - - if (isdigit(c)) { - parse_number(l); - l->column += (l->index - head - 1); - continue; - } - - if (isalpha(c)) { - parse_identifier(l); - l->column += (l->index - head - 1); - continue; - } - - if (c == '"') { - l->index += 1; - parse_string(l); - l->column += (l->index - head - 1); - continue; - } - - l->index += 1; - } -} - -lexer *lexer_init(char *source, usize size, arena *arena) -{ - lexer *lex = arena_alloc(arena, sizeof(lexer)); - lex->column = 1; - lex->row = 1; - lex->index = 0; - lex->size = size; - lex->tokens = 0; - lex->tail = 0; - lex->allocator = arena; - lex->source = source; - - keywords = arena_alloc(arena, sizeof(trie_node)); - trie_insert(keywords, lex->allocator, "true", TOKEN_TRUE); - trie_insert(keywords, lex->allocator, "false", TOKEN_FALSE); - trie_insert(keywords, lex->allocator, "struct", TOKEN_STRUCT); - trie_insert(keywords, lex->allocator, "enum", TOKEN_ENUM); - trie_insert(keywords, lex->allocator, "union", TOKEN_UNION); - trie_insert(keywords, lex->allocator, "loop", TOKEN_LOOP); - trie_insert(keywords, lex->allocator, "while", TOKEN_WHILE); - trie_insert(keywords, lex->allocator, "until", TOKEN_UNTIL); - trie_insert(keywords, lex->allocator, "goto", TOKEN_GOTO); - trie_insert(keywords, lex->allocator, "if", TOKEN_IF); - trie_insert(keywords, lex->allocator, "else", TOKEN_ELSE); - trie_insert(keywords, lex->allocator, "switch", TOKEN_SWITCH); - trie_insert(keywords, lex->allocator, "break", TOKEN_BREAK); - trie_insert(keywords, lex->allocator, "defer", TOKEN_DEFER); - trie_insert(keywords, lex->allocator, "return", TOKEN_RETURN); - trie_insert(keywords, lex->allocator, "import", TOKEN_IMPORT); - trie_insert(keywords, lex->allocator, "const", TOKEN_CONST); - trie_insert(keywords, lex->allocator, "extern", TOKEN_EXTERN); - trie_insert(keywords, lex->allocator, "volatile", TOKEN_VOLATILE); - - parse(lex); - - return lex; -} diff --git a/lexer.h b/lexer.h deleted file mode 100644 index 72277df..0000000 --- a/lexer.h +++ /dev/null @@ -1,97 +0,0 @@ -#ifndef LEXER_H -#define LEXER_H - -#include "utils.h" - -typedef enum { - TOKEN_ERROR, - TOKEN_END, - TOKEN_PLUS, // + - TOKEN_PLUS_PLUS, // ++ - TOKEN_MINUS, // - - TOKEN_MINUS_MINUS, // -- - TOKEN_SLASH, // / - TOKEN_PERC, // % - TOKEN_STAR, // * - TOKEN_AND, // & - TOKEN_HAT, // ^ - TOKEN_PIPE, // | - TOKEN_LSHIFT, // << - TOKEN_RSHIFT, // >> - TOKEN_DOUBLE_EQ, // == - TOKEN_ARROW, // <- - TOKEN_EQ, // = - TOKEN_LESS_THAN, // < - TOKEN_GREATER_THAN, // > - TOKEN_LESS_EQ, // <= - TOKEN_GREATER_EQ, // >= - TOKEN_NOT_EQ, // != - TOKEN_PLUS_EQ, // += - TOKEN_MINUS_EQ, // -= - TOKEN_STAR_EQ, // *= - TOKEN_SLASH_EQ, // /= - TOKEN_AND_EQ, // &= - TOKEN_HAT_EQ, // ^= - TOKEN_PIPE_EQ, // |= - TOKEN_PERC_EQ, // %= - TOKEN_LSHIFT_EQ, // <<= - TOKEN_RSHIFT_EQ, // >>= - TOKEN_OR, // || - TOKEN_DOUBLE_AND, // && - TOKEN_COLON, // : - TOKEN_SEMICOLON, // ; - TOKEN_DOT, // . - TOKEN_DOUBLE_DOT, // .. - TOKEN_BANG, // ! - TOKEN_COMMA, // , - TOKEN_LPAREN, // ( - TOKEN_RPAREN, // ) - TOKEN_LSQUARE, // [ - TOKEN_RSQUARE, // ] - TOKEN_LCURLY, // { - TOKEN_RCURLY, // } - TOKEN_INTEGER, - TOKEN_FLOAT, - TOKEN_IDENTIFIER, - TOKEN_STRING, - TOKEN_CHAR, - TOKEN_TRUE, - TOKEN_FALSE, - TOKEN_GOTO, - TOKEN_LOOP, - TOKEN_WHILE, - TOKEN_UNTIL, - TOKEN_IF, - TOKEN_ELSE, - TOKEN_SWITCH, - TOKEN_BREAK, - TOKEN_DEFER, - TOKEN_RETURN, - TOKEN_IMPORT, - TOKEN_CONST, - TOKEN_EXTERN, - TOKEN_VOLATILE, - TOKEN_STRUCT, - TOKEN_ENUM, - TOKEN_UNION -} token_type; - -typedef struct _token { - token_type type; - source_pos position; - char *lexeme; - usize lexeme_len; - struct _token *next; -} token; - -typedef struct { - usize column, row, index, size; - char *source; - token *tokens; - token *tail; - arena *allocator; -} lexer; - -lexer *lexer_init(char *source, usize size, arena *arena); - -#endif diff --git a/parser.c b/parser.c deleted file mode 100644 index 8061fc1..0000000 --- a/parser.c +++ /dev/null @@ -1,1346 +0,0 @@ -#include "parser.h" -#include -#include -#include -#include - -bool has_errors = false; - -ast_node *parse_expression(parser *p); -static ast_node *parse_statement(parser *p); -static ast_node *parse_type(parser *p); - -/* Consume a token in the list. */ -static void advance(parser *p) -{ - p->previous = p->tokens; - if (p->tokens) - p->tokens = p->tokens->next; -} - -/* Get the current token in the list, without consuming */ -static token *peek(parser *p) -{ - return p->tokens; -} - -/* - * Check if the current token type is the same as `type`, - * without consuming it. - */ -static bool match_peek(parser *p, token_type type) -{ - if (p->tokens) - { - return p->tokens->type == type; - } - else - { - return false; - } -} - -/* Same as `match_peek()` but it consumes the token. */ -static bool match(parser *p, token_type type) -{ - if (p->tokens) - { - if (p->tokens->type == type) - { - advance(p); - return true; - } - } - return false; -} - -/* - * When an error is encountered, try to find a - * token that could define a part of the code - * which doesn't depend on the one giving the - * error. This is needed to print multiple errors - * instead of just failing at the first one. - */ -static void parser_sync(parser *p) -{ - advance(p); - - while (p->tokens) - { - if (p->previous->type == TOKEN_SEMICOLON || p->previous->type == TOKEN_RCURLY) - { - return; - } - - switch (p->tokens->type) - { - case TOKEN_STRUCT: - case TOKEN_ENUM: - case TOKEN_UNION: - case TOKEN_IF: - case TOKEN_LOOP: - case TOKEN_RETURN: - case TOKEN_SWITCH: - return; - default: - advance(p); - } - } -} - -/* Print the error message and sync the parser. */ -static void error(parser *p, char *msg) -{ - printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:%ld:%ld:\x1b[0m %s\n", p->previous->position.row, p->previous->position.column, msg); - has_errors = true; - parser_sync(p); -} - -static ast_node *parse_call(parser *p) -{ - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_CALL; - node->position = p->previous->position; - node->expr.call.name = peek(p)->lexeme; - node->expr.call.name_len = peek(p)->lexeme_len; - advance(p); - /* Skip also the opening `(` */ - advance(p); - /* Call without parameters */ - if (match(p, TOKEN_RPAREN)) - { - node->expr.call.parameters = NULL; - return node; - } - - snapshot arena_start = arena_snapshot(p->allocator); - node->expr.call.parameters = arena_alloc(p->allocator, sizeof(ast_node)); - node->expr.call.parameters->type = NODE_UNIT; - node->expr.call.parameters->expr.unit_node.expr = parse_expression(p); - ast_node *tail = node->expr.call.parameters; - node->expr.call.param_len = 1; - - /* In this case, there is only one parameter */ - if (match(p, TOKEN_RPAREN)) - { - return node; - } - - if (match(p, TOKEN_COMMA)) - { - ast_node *expr = parse_expression(p); - if (expr) - { - while (!match(p, TOKEN_RPAREN)) - { - if (!match(p, TOKEN_COMMA)) - { - error(p, "expected `)`."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); - tail->expr.unit_node.next->expr.unit_node.expr = expr; - tail = tail->expr.unit_node.next; - tail->type = NODE_UNIT; - expr = parse_expression(p); - if (!expr) - { - error(p, "expected `)`."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - node->expr.call.param_len += 1; - } - - tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); - tail->expr.unit_node.next->expr.unit_node.expr = expr; - tail = tail->expr.unit_node.next; - tail->type = NODE_UNIT; - } - else - { - error(p, "expected expression."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - } - else - { - error(p, "expected `)`."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - - return node; -} - -/* Parse expressions with the highest precedence. */ -static ast_node *parse_factor(parser *p) -{ - token *t = peek(p); - if (match(p, TOKEN_INTEGER)) - { - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_INTEGER; - node->position = p->previous->position; - node->expr.integer = parse_int(t->lexeme, t->lexeme_len); - if (match(p, TOKEN_DOUBLE_DOT)) { - ast_node *range = arena_alloc(p->allocator, sizeof(ast_node)); - range->type = NODE_RANGE; - range->expr.binary.left = node; - range->expr.binary.operator = OP_PLUS; - snapshot snap = arena_snapshot(p->allocator); - ast_node *end = parse_factor(p); - if (!end) { - range->expr.binary.right = NULL; - } else if (end->type != NODE_INTEGER) { - arena_reset_to_snapshot(p->allocator, snap); - error(p, "expected integer."); - return NULL; - } else { - range->expr.binary.right = end; - } - return range; - } - return node; - } - else if (match(p, TOKEN_FLOAT)) - { - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_FLOAT; - node->position = p->previous->position; - node->expr.flt = parse_float(t->lexeme, t->lexeme_len); - return node; - } - else if (match(p, TOKEN_TRUE)) { - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_BOOL; - node->position = p->previous->position; - node->expr.boolean = 1; - return node; - } - else if (match(p, TOKEN_FALSE)) { - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_BOOL; - node->position = p->previous->position; - node->expr.boolean = 0; - return node; - } - else if (match_peek(p, TOKEN_IDENTIFIER)) - { - /* If a `(` is found after an identifier, it should be a call. */ - if (p->tokens->next && p->tokens->next->type == TOKEN_LPAREN) - { - return parse_call(p); - } - advance(p); - - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_IDENTIFIER; - node->position = p->previous->position; - node->expr.string.start = t->lexeme; - node->expr.string.len = t->lexeme_len; - return node; - } - else if (match(p, TOKEN_STRING)) - { - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_STRING; - node->position = p->previous->position; - node->expr.string.start = t->lexeme; - node->expr.string.len = t->lexeme_len; - return node; - } - else if (match(p, TOKEN_CHAR)) - { - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_CHAR; - node->position = p->previous->position; - if (t->lexeme_len == 2) - { - char c; - switch (t->lexeme[1]) - { - case 'n': - c = '\n'; - break; - case 't': - c = '\t'; - break; - case 'r': - c = '\r'; - break; - case '0': - c = '\0'; - break; - case '\\': - c = '\\'; - break; - case '\'': - c = '\''; - break; - default: - error(p, "invalid escape code."); - return NULL; - } - node->expr.ch = c; - } - else - { - node->expr.ch = *(t->lexeme); - } - return node; - } - else if (match(p, TOKEN_LPAREN)) - { - ast_node *node = parse_expression(p); - if (!match(p, TOKEN_RPAREN)) - { - error(p, "unclosed parenthesis"); - return NULL; - } - - return node; - } - - return NULL; -} - -ast_node *parse_unary(parser *p) -{ - if (match(p, TOKEN_PLUS_PLUS) || match(p, TOKEN_MINUS) || match(p, TOKEN_MINUS_MINUS) || match(p, TOKEN_STAR) || match(p, TOKEN_AND) || match(p, TOKEN_BANG)) - { - unary_op op; - switch (p->previous->type) - { - case TOKEN_PLUS_PLUS: - op = UOP_INCR; - break; - case TOKEN_MINUS: - op = UOP_MINUS; - break; - case TOKEN_MINUS_MINUS: - op = UOP_DECR; - break; - case TOKEN_STAR: - op = UOP_DEREF; - break; - case TOKEN_AND: - op = UOP_REF; - break; - case TOKEN_BANG: - op = UOP_NOT; - break; - default: - goto end; - } - - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_UNARY; - node->position = p->previous->position; - node->expr.unary.operator = op; - node->expr.unary.right = parse_expression(p); - - return node; - } - - /* Type cast. */ - if (match_peek(p, TOKEN_LPAREN) && p->tokens->next && p->tokens->next->type == TOKEN_IDENTIFIER && p->tokens->next->next && p->tokens->next->next->type == TOKEN_RPAREN) - { - advance(p); - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_CAST; - node->position = p->previous->position; - node->expr.cast.type = parse_type(p); - advance(p); - advance(p); - node->expr.cast.value = parse_expression(p); - return node; - } - -end: - return parse_factor(p); -} - -ast_node *parse_term(parser *p) -{ - ast_node *left = parse_unary(p); - - while (match_peek(p, TOKEN_STAR) || match_peek(p, TOKEN_SLASH) || match_peek(p, TOKEN_PERC)) { - binary_op op; - switch (peek(p)->type) { - case TOKEN_STAR: - op = OP_MUL; - break; - case TOKEN_SLASH: - op = OP_DIV; - break; - case TOKEN_PERC: - op = OP_MOD; - break; - default: - continue; - } - advance(p); - ast_node *right = parse_factor(p); - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_BINARY; - node->position = p->previous->position; - node->expr.binary.left = left; - node->expr.binary.right = right; - node->expr.binary.operator = op; - left = node; - } - - return left; -} - -/* - * Following the recursive descent parser algorithm, this - * parses all the arithmetic expressions. - */ -ast_node *parse_expression(parser *p) -{ - ast_node *left = parse_term(p); - - while (match_peek(p, TOKEN_PLUS) || match_peek(p, TOKEN_MINUS)) - { - binary_op op = peek(p)->type == TOKEN_PLUS ? OP_PLUS : OP_MINUS; - advance(p); - ast_node *right = parse_term(p); - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_BINARY; - node->position = p->previous->position; - node->expr.binary.left = left; - node->expr.binary.right = right; - node->expr.binary.operator = op; - left = node; - } - - /* - * If after parsing an expression a `[` character - * is found, it should be an array subscript expression. - */ - if (match_peek(p, TOKEN_LSQUARE)) { - while (match(p, TOKEN_LSQUARE)) { - ast_node *index = parse_expression(p); - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_ARRAY_SUBSCRIPT; - node->position = p->previous->position; - node->expr.subscript.expr = left; - node->expr.subscript.index = index; - - if (!match(p, TOKEN_RSQUARE)) - { - error(p, "expected `]`."); - return NULL; - } - - left = node; - - } - } - - /* - * If after parsing an expression a `.` character - * is found, it should be a member access expression. - */ - if (match_peek(p, TOKEN_DOT) && p->tokens->next && p->tokens->next->type != TOKEN_LCURLY) { - while (match(p, TOKEN_DOT)) { - if (!match_peek(p, TOKEN_IDENTIFIER)) { - error(p, "expected identifier after member access."); - return NULL; - } - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_ACCESS; - node->position = p->previous->position; - node->expr.access.expr = left; - node->expr.access.member = parse_factor(p); - - left = node; - } - } - - /* - * If after parsing an expression a `++` or a `--` - * token is found, it should be a postfix expression. - */ - if (match(p, TOKEN_PLUS_PLUS) || match(p, TOKEN_MINUS_MINUS)) - { - unary_op op; - switch (p->previous->type) - { - case TOKEN_PLUS_PLUS: - op = UOP_INCR; - break; - case TOKEN_MINUS_MINUS: - op = UOP_DECR; - break; - default: - break; - } - - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_POSTFIX; - node->position = p->previous->position; - node->expr.unary.operator = op; - node->expr.unary.right = left; - - return node; - } - - if (match(p, TOKEN_DOT)) { - if (match(p, TOKEN_LCURLY)) { - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_STRUCT_INIT; - node->position = p->previous->position; - - if (match(p, TOKEN_RCURLY)) - { - node->expr.struct_init.members = NULL; - return node; - } - - snapshot arena_start = arena_snapshot(p->allocator); - node->expr.struct_init.members = arena_alloc(p->allocator, sizeof(ast_node)); - node->expr.struct_init.members->type = NODE_UNIT; - node->expr.struct_init.members->expr.unit_node.expr = parse_expression(p); - ast_node *tail = node->expr.struct_init.members; - node->expr.struct_init.members_len = 1; - - /* In this case, there is only one parameter */ - if (match(p, TOKEN_RCURLY)) - { - return node; - } - - if (match(p, TOKEN_COMMA)) - { - ast_node *expr = parse_expression(p); - if (expr) - { - while (!match(p, TOKEN_RCURLY)) - { - if (!match(p, TOKEN_COMMA)) - { - error(p, "expected `}`."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); - tail->expr.unit_node.next->expr.unit_node.expr = expr; - tail = tail->expr.unit_node.next; - tail->type = NODE_UNIT; - expr = parse_expression(p); - if (!expr) - { - error(p, "expected `}`."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - node->expr.struct_init.members_len += 1; - } - - tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); - tail->expr.unit_node.next->expr.unit_node.expr = expr; - tail = tail->expr.unit_node.next; - tail->type = NODE_UNIT; - } - else - { - error(p, "expected member initialization."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - } - else - { - error(p, "expected `}`."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - - return node; - } else { - error(p, "unexpected `.`"); - return NULL; - } - } - - if (p->tokens && ((p->tokens->type >= TOKEN_DOUBLE_EQ && p->tokens->type <= TOKEN_NOT_EQ) || (p->tokens->type >= TOKEN_LSHIFT_EQ && p->tokens->type <= TOKEN_DOUBLE_AND))) - { - binary_op op; - switch (p->tokens->type) - { - case TOKEN_ARROW: - op = OP_ASSIGN_PTR; - break; - case TOKEN_EQ: - op = OP_ASSIGN; - break; - case TOKEN_DOUBLE_EQ: - op = OP_EQ; - break; - case TOKEN_LESS_THAN: - op = OP_LT; - break; - case TOKEN_GREATER_THAN: - op = OP_GT; - break; - case TOKEN_LESS_EQ: - op = OP_LE; - break; - case TOKEN_GREATER_EQ: - op = OP_GE; - break; - case TOKEN_NOT_EQ: - op = OP_NEQ; - break; - case TOKEN_LSHIFT_EQ: - op = OP_LSHIFT_EQ; - break; - case TOKEN_RSHIFT_EQ: - op = OP_RSHIFT_EQ; - break; - case TOKEN_OR: - op = OP_OR; - break; - case TOKEN_DOUBLE_AND: - op = OP_AND; - break; - default: - break; - } - advance(p); - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_BINARY; - node->position = p->previous->position; - node->expr.binary.left = left; - node->expr.binary.operator = op; - node->expr.binary.right = parse_expression(p); - if (!node->expr.binary.right) { - error(p, "expected expression."); - return NULL; - } - - return node; - } - - return left; -} - -static ast_node *parse_compound(parser *p) -{ - if (!match(p, TOKEN_LCURLY)) { - error(p, "expected `{`."); - return NULL; - } - - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_UNIT; - node->position = p->previous->position; - - if (match(p, TOKEN_RCURLY)) - { - node->expr.unit_node.expr = NULL; - node->expr.unit_node.next = NULL; - return node; - } - - snapshot arena_start = arena_snapshot(p->allocator); - node->expr.unit_node.expr = parse_statement(p); - ast_node *tail = node; - - /* In this case, there is only one parameter */ - if (match(p, TOKEN_RCURLY)) - { - return node; - } - - ast_node *expr = parse_statement(p); - if (expr) - { - while (!match(p, TOKEN_RCURLY)) - { - tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); - tail->expr.unit_node.next->expr.unit_node.expr = expr; - tail = tail->expr.unit_node.next; - tail->type = NODE_UNIT; - expr = parse_statement(p); - if (!expr) - { - error(p, "expected `}`."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - } - - tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); - tail->expr.unit_node.next->expr.unit_node.expr = expr; - tail = tail->expr.unit_node.next; - tail->type = NODE_UNIT; - } - - return node; -} - -static ast_node *parse_for(parser *p) -{ - advance(p); - ast_node* node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_FOR; - node->position = p->previous->position; - - snapshot arena_start = arena_snapshot(p->allocator); - node->expr.fr.slices = arena_alloc(p->allocator, sizeof(ast_node)); - node->expr.fr.slices->type = NODE_UNIT; - node->expr.fr.slices->expr.unit_node.expr = parse_expression(p); - ast_node *tail = node->expr.fr.slices; - node->expr.fr.slice_len = 1; - - /* In this case, there is only one slice. */ - if (match(p, TOKEN_RPAREN)) - { - goto parse_captures; - } - - if (match(p, TOKEN_COMMA)) - { - ast_node *expr = parse_expression(p); - if (expr) - { - while (!match(p, TOKEN_RPAREN)) - { - if (!match(p, TOKEN_COMMA)) - { - error(p, "expected `)`."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); - tail->expr.unit_node.next->expr.unit_node.expr = expr; - tail = tail->expr.unit_node.next; - tail->type = NODE_UNIT; - expr = parse_expression(p); - if (!expr) - { - error(p, "expected `)`."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - } - - tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); - tail->expr.unit_node.next->expr.unit_node.expr = expr; - tail = tail->expr.unit_node.next; - node->expr.fr.slice_len += 1; - tail->type = NODE_UNIT; - } - else - { - error(p, "expected expression."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - } - else - { - error(p, "expected `)`."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - -parse_captures: - - if (!match(p, TOKEN_PIPE)) { - error(p, "expected capture."); - return NULL; - } - - arena_start = arena_snapshot(p->allocator); - node->expr.fr.captures = arena_alloc(p->allocator, sizeof(ast_node)); - node->expr.fr.captures->type = NODE_UNIT; - node->expr.fr.captures->expr.unit_node.expr = parse_factor(p); - if (node->expr.fr.captures->expr.unit_node.expr && node->expr.fr.captures->expr.unit_node.expr->type != NODE_IDENTIFIER) { - error(p, "captures must be identifiers."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - tail = node->expr.fr.captures; - node->expr.fr.capture_len = 1; - - /* In this case, there is only one capture */ - if (match(p, TOKEN_PIPE)) { - goto parse_body; - } - - if (match(p, TOKEN_COMMA)) { - ast_node *expr = parse_expression(p); - if (expr) { - while (!match(p, TOKEN_PIPE)) { - if (!match(p, TOKEN_COMMA)) { - error(p, "expected `)`."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); - tail->expr.unit_node.next->expr.unit_node.expr = expr; - tail = tail->expr.unit_node.next; - tail->type = NODE_UNIT; - expr = parse_factor(p); - if (!expr) { - error(p, "expected `|`."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - } - - tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); - tail->expr.unit_node.next->expr.unit_node.expr = expr; - tail = tail->expr.unit_node.next; - tail->type = NODE_UNIT; - node->expr.fr.capture_len += 1; - } else { - error(p, "expected identifier."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - } else { - error(p, "expected `|`."); - arena_reset_to_snapshot(p->allocator, arena_start); - return NULL; - } - -parse_body:; - if (node->expr.fr.capture_len != node->expr.fr.slice_len) { - error(p, "invalid number of captures."); - return NULL; - } - - ast_node* body = parse_compound(p); - node->expr.fr.body = body; - return node; -} - -static ast_node *parse_while(parser *p) -{ - u8 flags = 0x0; - - if (match(p, TOKEN_WHILE)) { - flags |= LOOP_WHILE; - } else if (match(p, TOKEN_UNTIL)) { - flags |= LOOP_UNTIL; - } else if (!match_peek(p, TOKEN_LCURLY)) { - error(p, "expected `while`, `until` or `{`."); - return NULL; - } - ast_node *condition = parse_expression(p); - if (!condition) { - flags |= LOOP_AFTER; - } - ast_node *body = parse_compound(p); - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_WHILE; - node->position = p->previous->position; - node->expr.whle.body = body; - - if (flags & LOOP_AFTER) { - if (match(p, TOKEN_WHILE)) { - flags |= LOOP_WHILE; - condition = parse_expression(p); - } else if (match(p, TOKEN_UNTIL)) { - flags |= LOOP_UNTIL; - condition = parse_expression(p); - } else { - node->expr.whle.condition = NULL; - } - } - - node->expr.whle.condition = condition; - - return node; -} - -static ast_node *parse_if(parser *p) -{ - ast_node *condition = parse_expression(p); - ast_node *body = parse_compound(p); - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_IF; - node->position = p->previous->position; - node->expr.if_stmt.body = body; - node->expr.if_stmt.condition = condition; - if (match(p, TOKEN_ELSE)) { - body = parse_compound(p); - node->expr.if_stmt.otherwise = body; - } - return node; -} - -static ast_node *parse_struct(parser *p); -static ast_node *parse_type(parser *p) -{ - ast_node *type = NULL; - - if (match(p, TOKEN_STRUCT)) { - type = parse_struct(p); - } else if (match(p, TOKEN_UNION)) { - type = parse_struct(p); - type->type = NODE_UNION; - } else if (match(p, TOKEN_LSQUARE)) { - /* Array/slice type */ - type = arena_alloc(p->allocator, sizeof(ast_node)); - type->type = NODE_PTR_TYPE; - if (match(p, TOKEN_CONST)) type->expr.ptr_type.flags |= PTR_CONST; - if (match(p, TOKEN_VOLATILE)) type->expr.ptr_type.flags |= PTR_VOLATILE; - type->expr.ptr_type.flags |= PTR_SLICE; - type->expr.ptr_type.type = parse_type(p); - if (!type->expr.ptr_type.type) { - error(p, "expected type."); - return NULL; - } - if (!match(p, TOKEN_RSQUARE)) { - error(p, "expected `]`."); - return NULL; - } - } else if (match(p, TOKEN_STAR)) { - type = arena_alloc(p->allocator, sizeof(ast_node)); - type->type = NODE_PTR_TYPE; - if (match(p, TOKEN_CONST)) type->expr.ptr_type.flags |= PTR_CONST; - if (match(p, TOKEN_VOLATILE)) type->expr.ptr_type.flags |= PTR_VOLATILE; - type->expr.ptr_type.flags |= PTR_RAW; - type->expr.ptr_type.type = parse_type(p); - if (!type->expr.ptr_type.type) { - error(p, "expected type."); - return NULL; - } - } else if (match_peek(p, TOKEN_IDENTIFIER)) { - type = parse_factor(p); - } - - return type; -} - -static member *parse_member(parser *p) -{ - ast_node *type = parse_type(p); - - if (!match_peek(p, TOKEN_IDENTIFIER)) { - error(p, "expected identifier."); - return NULL; - } - - member *m = arena_alloc(p->allocator, sizeof(member)); - m->type = type; - m->name = peek(p)->lexeme; - m->name_len = peek(p)->lexeme_len; - advance(p); - - - return m; -} - -static variant *parse_variant(parser *p) -{ - if (!match_peek(p, TOKEN_IDENTIFIER)) { - error(p, "expected identifier."); - return NULL; - } - - variant *v = arena_alloc(p->allocator, sizeof(variant)); - v->name = peek(p)->lexeme; - v->name_len = peek(p)->lexeme_len; - advance(p); - - if (match(p, TOKEN_EQ)) { - v->value = parse_factor(p); - if (!v->value) { - error(p, "expected integer."); - return NULL; - } - - if (v->value->type != NODE_INTEGER) { - error(p, "expected integer."); - return NULL; - } - } - - return v; -} - -static ast_node *parse_enum(parser *p) -{ - ast_node *enm = arena_alloc(p->allocator, sizeof(ast_node)); - enm->type = NODE_ENUM; - enm->position = p->previous->position; - if (match_peek(p, TOKEN_IDENTIFIER)) { - /* Named enum */ - enm->expr.enm.name = peek(p)->lexeme; - enm->expr.enm.name_len = peek(p)->lexeme_len; - advance(p); - } else if (!match_peek(p, TOKEN_LCURLY)) { - error(p, "expected identifier or `{`."); - return NULL; - } else { - enm->expr.enm.name = NULL; - enm->expr.enm.name_len = 0; - } - - if (!match(p, TOKEN_LCURLY)) { - error(p, "expected `{`."); - return NULL; - } - - variant *prev = parse_variant(p); - variant *head = prev; - enm->expr.enm.variants = head; - if (!prev) { - error(p, "invalid enum definition. Enums should contain at least 1 variant."); - return NULL; - } - if (!match(p, TOKEN_COMMA)) { - if (!match(p, TOKEN_RCURLY)) { - error(p, "expected `,`."); - return NULL; - } else { - return enm; - } - } - while (!match(p, TOKEN_RCURLY)) { - variant *current = parse_variant(p); - if (!current) { - error(p, "expected variant definition."); - return NULL; - } - prev->next = current; - if (!match(p, TOKEN_COMMA)) { - if (!match_peek(p, TOKEN_RCURLY)) { - error(p, "expected `,`."); - return NULL; - } - } - - prev = current; - } - - return enm; -} - -static ast_node *parse_struct(parser *p) -{ - ast_node *structure = arena_alloc(p->allocator, sizeof(ast_node)); - structure->type = NODE_STRUCT; - structure->position = p->previous->position; - if (match_peek(p, TOKEN_IDENTIFIER)) { - /* Named structure */ - structure->expr.structure.name = peek(p)->lexeme; - structure->expr.structure.name_len = peek(p)->lexeme_len; - advance(p); - } else if (!match_peek(p, TOKEN_LCURLY)) { - error(p, "expected identifier or `{`."); - return NULL; - } else { - structure->expr.structure.name = NULL; - structure->expr.structure.name_len = 0; - } - - if (!match(p, TOKEN_LCURLY)) { - error(p, "expected `{`."); - return NULL; - } - - member *prev = parse_member(p); - member *head = prev; - structure->expr.structure.members = head; - if (!prev) { - error(p, "invalid struct definition. Structs should contain at least 1 member."); - return NULL; - } - if (!match(p, TOKEN_COMMA)) { - if (!match(p, TOKEN_RCURLY)) { - error(p, "expected `,`."); - return NULL; - } else { - return structure; - } - } - while (!match(p, TOKEN_RCURLY)) { - member *current = parse_member(p); - if (!current) { - error(p, "expected member definition."); - return NULL; - } - prev->next = current; - if (!match(p, TOKEN_COMMA)) { - if (!match_peek(p, TOKEN_RCURLY)) { - error(p, "expected `,`."); - return NULL; - } - } - - prev = current; - } - - return structure; -} - -static ast_node *parse_function(parser *p) -{ - ast_node *fn = arena_alloc(p->allocator, sizeof(ast_node)); - fn->type = NODE_FUNCTION; - fn->expr.function.type = parse_type(p); - fn->expr.function.name = peek(p)->lexeme; - fn->expr.function.name_len = peek(p)->lexeme_len; - advance(p); - /* Consume `(` */ - advance(p); - - if (match(p, TOKEN_RPAREN)) { - fn->expr.function.body = parse_compound(p);; - fn->expr.function.parameters = NULL; - fn->expr.function.parameters_len = 0; - return fn; - } - member *prev = parse_member(p); - member *head = prev; - fn->expr.function.parameters = head; - fn->expr.function.parameters_len = 1; - if (!match(p, TOKEN_COMMA)) { - if (!match(p, TOKEN_RPAREN)) { - error(p, "expected `,`."); - return NULL; - } else { - fn->expr.function.body = parse_compound(p); - return fn; - } - } - while (!match(p, TOKEN_RPAREN)) { - member *current = parse_member(p); - if (!current) { - error(p, "expected parameter."); - return NULL; - } - prev->next = current; - if (!match(p, TOKEN_COMMA)) { - if (!match_peek(p, TOKEN_RPAREN)) { - error(p, "expected `,`."); - return NULL; - } - } - fn->expr.function.parameters_len += 1; - - prev = current; - } - fn->expr.function.body = parse_compound(p); - - return fn; -} - -static ast_node *parse_statement(parser *p) -{ - token *cur = peek(p); - ast_node *type = parse_type(p); - if (type && type->type == NODE_STRUCT && type->expr.structure.name_len > 0) { - goto skip_struct; - } - if (type && match_peek(p, TOKEN_IDENTIFIER)) { - if (p->tokens->next && p->tokens->next->type == TOKEN_LPAREN) { - /* Function definition. */ - p->tokens = cur; - return parse_function(p); - } - p->tokens = cur; - /* Variable declaration. */ - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_VAR_DECL; - node->position = p->previous->position; - node->expr.var_decl.type = parse_type(p); - node->expr.var_decl.name = p->tokens->lexeme; - node->expr.var_decl.name_len = p->tokens->lexeme_len; - advance(p); - if (match(p, TOKEN_EQ)) { - node->expr.var_decl.value = parse_expression(p); - } else { - node->expr.var_decl.value = NULL; - } - - if (!match(p, TOKEN_SEMICOLON)) { - error(p, "expected `;` after statement."); - return NULL; - } - - return node; - } -skip_struct: - p->tokens = cur; - - if (match(p, TOKEN_BREAK)) - { - if (!match(p, TOKEN_SEMICOLON)) - { - error(p, "expected `;` after `break`."); - return NULL; - } - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_BREAK; - node->position = p->previous->position; - return node; - } - else if (match(p, TOKEN_RETURN)) - { - ast_node *expr = parse_expression(p); - - if (!expr) - { - error(p, "expected expression after `return`."); - return NULL; - } - if (!match(p, TOKEN_SEMICOLON)) - { - error(p, "expected `;`."); - return NULL; - } - - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_RETURN; - node->position = p->previous->position; - node->expr.ret.value = expr; - return node; - } - else if (match_peek(p, TOKEN_IDENTIFIER) && p->tokens->next && p->tokens->next->type == TOKEN_COLON) - { - /* In this case, this is a label. */ - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_LABEL; - node->position = p->previous->position; - node->expr.label.name = p->tokens->lexeme; - node->expr.label.name_len = p->tokens->lexeme_len; - advance(p); - /* Consume `:` */ - advance(p); - return node; - } - else if (match(p, TOKEN_GOTO)) - { - if (!match_peek(p, TOKEN_IDENTIFIER)) - { - error(p, "expected label identifier after `goto`."); - return NULL; - } - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_GOTO; - node->position = p->previous->position; - node->expr.label.name = p->tokens->lexeme; - node->expr.label.name_len = p->tokens->lexeme_len; - advance(p); - if (!match(p, TOKEN_SEMICOLON)) - { - error(p, "expected `;` after `goto`."); - return NULL; - } - return node; - } - else if (match(p, TOKEN_IMPORT)) - { - ast_node *expr = parse_expression(p); - if (!expr) - { - error(p, "expected module path after `import`."); - return NULL; - } - if (expr->type != NODE_ACCESS && expr->type != NODE_IDENTIFIER) - { - error(p, "expected module path after `import`."); - return NULL; - } - - ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); - node->type = NODE_IMPORT; - node->position = p->previous->position; - node->expr.import.path = expr; - - if (!match(p, TOKEN_SEMICOLON)) - { - error(p, "expected `;` after `import`."); - return NULL; - } - - return node; - } - else if (match(p, TOKEN_LOOP)) - { - if (p->tokens->type == TOKEN_LPAREN) - { - return parse_for(p); - } - else - { - return parse_while(p); - } - } - else if (match(p, TOKEN_IF)) { - return parse_if(p); - } - else if (match(p, TOKEN_STRUCT)) - { - return parse_struct(p); - } - else if (match(p, TOKEN_ENUM)) - { - return parse_enum(p); - } - else if (match(p, TOKEN_UNION)) - { - ast_node *u = parse_struct(p); - u->type = NODE_UNION; - return u; - } - else - { - ast_node *expr = parse_expression(p); - if (!expr) - { - return NULL; - } - if (!match(p, TOKEN_SEMICOLON)) - { - error(p, "expected `;` after expression."); - return NULL; - } - return expr; - } -} - -/* Get a list of expressions to form a full AST. */ -static void parse(parser *p) -{ - p->ast = arena_alloc(p->allocator, sizeof(ast_node)); - p->ast->type = NODE_UNIT; - p->ast->expr.unit_node.expr = parse_statement(p); - ast_node *tail = p->ast; - ast_node *expr = parse_statement(p); - while (expr) { - tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); - tail->expr.unit_node.next->expr.unit_node.expr = expr; - tail = tail->expr.unit_node.next; - tail->type = NODE_UNIT; - expr = parse_statement(p); - } -} - -parser *parser_init(lexer *l, arena *allocator) -{ - parser *p = arena_alloc(allocator, sizeof(parser)); - p->tokens = l->tokens; - p->allocator= allocator; - - parse(p); - - if (has_errors) { - printf("Compilation failed.\n"); - exit(1); - } - - return p; -} diff --git a/parser.h b/parser.h deleted file mode 100644 index dced7ec..0000000 --- a/parser.h +++ /dev/null @@ -1,255 +0,0 @@ -#ifndef PARSER_H -#define PARSER_H - -#include "lexer.h" -#include "utils.h" -#include - -struct _type; -struct _ast_node; - -typedef enum { - OP_PLUS, // + - OP_MINUS, // - - OP_DIV, // / - OP_MUL, // * - OP_MOD, // % - OP_BOR, // | - OP_BAND, // & - OP_BXOR, // ^ - - OP_ASSIGN, // = - OP_ASSIGN_PTR, // <- - OP_RSHIFT_EQ, // >>= - OP_LSHIFT_EQ, // <<= - OP_PLUS_EQ, // += - OP_MINUS_EQ, // -= - OP_DIV_EQ, // /= - OP_MUL_EQ, // *= - OP_BOR_EQ, // |= - OP_BAND_EQ, // &= - OP_BXOR_EQ, // ^= - OP_MOD_EQ, // %= - - OP_EQ, // == - OP_AND, // && - OP_OR, // || - OP_NEQ, // != - OP_GT, // > - OP_LT, // < - OP_GE, // >= - OP_LE, // <= -} binary_op; - -typedef enum { - UOP_INCR, // ++ - UOP_MINUS, // - - UOP_DECR, // -- - UOP_DEREF, // * - UOP_REF, // & - UOP_NOT, // ! -} unary_op; - -typedef enum { - LAYOUT_AUTO, - LAYOUT_PACKED, - LAYOUT_EXTERN -} struct_layout; - -typedef struct _member { - struct _ast_node *type; - char *name; - usize name_len; - struct _member *next; - usize offset; -} member; - -typedef struct { - char *name; - usize name_len; - member *params; -} function; - -typedef struct _variant { - struct _ast_node *value; - char *name; - usize name_len; - struct _variant *next; -} variant; - -typedef enum { - NODE_IDENTIFIER, - NODE_INTEGER, - NODE_FLOAT, - NODE_STRING, - NODE_CHAR, - NODE_BOOL, - NODE_CAST, - NODE_UNARY, - NODE_BINARY, - NODE_RANGE, - NODE_ARRAY_SUBSCRIPT, - NODE_POSTFIX, - NODE_CALL, - NODE_ACCESS, - NODE_STRUCT_INIT, - NODE_TERNARY, /* TODO */ - - NODE_BREAK, - NODE_RETURN, - NODE_IMPORT, - NODE_FOR, - NODE_WHILE, - NODE_IF, - NODE_VAR_DECL, - NODE_LABEL, - NODE_GOTO, - - NODE_ENUM, - NODE_STRUCT, - NODE_UNION, - NODE_FUNCTION, - NODE_PTR_TYPE, - NODE_SWITCH, /* TODO */ - NODE_UNIT, -} node_type; - -#define PTR_SLICE 0x0 -#define PTR_RAW 0x1 -#define PTR_CONST 0x2 -#define PTR_VOLATILE 0x4 - -#define LOOP_WHILE 0x1 -#define LOOP_UNTIL 0x2 -#define LOOP_AFTER 0x4 - -typedef struct _ast_node { - node_type type; - source_pos position; - struct _type *expr_type; - bool address_taken; // used in IR generation. - union { - struct { - struct _ast_node *type; - u8 flags; - } ptr_type; - struct { - char *name; - usize name_len; - } label; // both label and goto - struct { - struct _ast_node *left; - struct _ast_node *right; - binary_op operator; - } binary; - struct { - struct _ast_node *right; - unary_op operator; - } unary; - u8 boolean; - i64 integer; - f64 flt; // float - struct { - char *start; - usize len; - } string; - char ch; // char; - struct { - struct _ast_node *condition; - struct _ast_node *then; - struct _ast_node *otherwise; - } ternary; - struct { - struct _ast_node *value; - struct _ast_node *type; - } cast; - struct { - struct _ast_node *expr; - struct _ast_node *index; - } subscript; - struct { - struct _ast_node *expr; - struct _ast_node *member; - } access; - struct { - struct _ast_node *expr; - struct _ast_node *next; - } unit_node; - struct { - /* This should be a list of unit_node */ - struct _ast_node *parameters; - usize param_len; - char *name; - usize name_len; - } call; - struct { - struct _ast_node *value; - } ret; - struct { - /* This should be an access. */ - struct _ast_node *path; - } import; - struct { - /* These should be lists of unit_node */ - struct _ast_node *slices; - usize slice_len; - struct _ast_node *captures; - usize capture_len; - struct _ast_node* body; - } fr; // for - struct { - struct _ast_node *condition; - struct _ast_node *body; - u8 flags; - } whle; // while - struct { - struct _ast_node *condition; - struct _ast_node *body; - struct _ast_node *otherwise; - u8 flags; - } if_stmt; // while - struct { - struct _ast_node **statements; - usize stmt_len; - } compound; - struct { - struct _ast_node *value; - char *name; - usize name_len; - struct _ast_node *type; - } var_decl; - struct { - member *members; - char *name; - usize name_len; - } structure; - struct { - member *parameters; - usize parameters_len; - char *name; - usize name_len; - struct _ast_node *type; - struct _ast_node *body; - } function; - struct { - variant *variants; - char *name; - usize name_len; - } enm; // enum - struct { - struct _ast_node *members; - usize members_len; - } struct_init; - } expr; -} ast_node; - -typedef struct { - token *tokens; - token *previous; - ast_node *ast; - arena *allocator; -} parser; - -parser *parser_init(lexer *l, arena *allocator); - -#endif diff --git a/report.txt b/report.txt deleted file mode 100644 index e69de29..0000000 diff --git a/sema.c b/sema.c deleted file mode 100644 index 9f2033d..0000000 --- a/sema.c +++ /dev/null @@ -1,818 +0,0 @@ -#define STB_DS_IMPLEMENTATION -#include "sema.h" -#include -#include - -typedef struct _res_node { - struct _res_node **in; - struct _res_node **out; - type *value; -} res_node; - -typedef struct { res_node node; bool complete; } pair; - -typedef struct { u8 flags; char *name; } type_key; - -static struct { char *key; pair *value; } *types; -static struct { char *key; type *value; } *type_reg; - -static struct { char *key; prototype *value; } *prototypes; - -static scope *global_scope = NULL; -static scope *current_scope = NULL; -static type *current_return = NULL; - -static type *const_int = NULL; -static type *const_float = NULL; - -static bool in_loop = false; - -static void error(ast_node *n, char *msg) -{ - if (n) { - printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:%ld:%ld:\x1b[0m %s\n", n->position.row, n->position.column, msg); - } else { - printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:\x1b[0m %s\n", msg); - } -} - -static char *intern_string(sema *s, char *str, usize len) -{ - (void) s; - char *ptr = malloc(len + 1); - memcpy(ptr, str, len); - ptr[len] = '\0'; - return ptr; -} - -static type *create_integer(sema *s, char *name, u8 bits, bool sign) -{ - type *t = arena_alloc(s->allocator, sizeof(type)); - t->name = name; - t->tag = sign ? TYPE_INTEGER : TYPE_UINTEGER; - t->data.integer = bits; - - pair *graph_node = arena_alloc(s->allocator, sizeof(pair)); - graph_node->node.value = t; - graph_node->node.in = NULL; - graph_node->node.out = NULL; - - shput(types, name, graph_node); - return t; -} - -static type *create_float(sema *s, char *name, u8 bits) -{ - type *t = arena_alloc(s->allocator, sizeof(type)); - t->name = name; - t->tag = TYPE_FLOAT; - t->data.flt = bits; - - pair *graph_node = arena_alloc(s->allocator, sizeof(pair)); - graph_node->node.value = t; - graph_node->node.in = NULL; - graph_node->node.out = NULL; - - shput(types, name, graph_node); - return t; -} - -static void order_type(sema *s, ast_node *node) -{ - if (node->type == NODE_STRUCT || node->type == NODE_UNION) { - type *t = arena_alloc(s->allocator, sizeof(type)); - t->tag = node->type == NODE_STRUCT ? TYPE_STRUCT : TYPE_UNION; - t->data.structure.name = node->expr.structure.name; - t->data.structure.name_len = node->expr.structure.name_len; - t->data.structure.members = node->expr.structure.members; - - char *k = intern_string(s, node->expr.structure.name, node->expr.structure.name_len); - t->name = k; - pair *graph_node = shget(types, k); - - if (!graph_node) { - graph_node = arena_alloc(s->allocator, sizeof(pair)); - graph_node->node.in = NULL; - graph_node->node.out = NULL; - } else if (graph_node->complete) { - error(node, "type already defined."); - return; - } - graph_node->node.value = t; - - member *m = t->data.structure.members; - while (m) { - if (m->type->type != NODE_IDENTIFIER) { - m = m->next; - continue; - } - char *name = intern_string(s, m->type->expr.string.start, m->type->expr.string.len); - pair *p = shget(types, name); - if (!p) { - p = arena_alloc(s->allocator, sizeof(pair)); - p->node.out = NULL; - p->node.in = NULL; - p->node.value = NULL; - p->complete = false; - shput(types, name, p); - } - - arrput(graph_node->node.in, &p->node); - arrput(p->node.out, &graph_node->node); - - m = m->next; - } - - shput(types, k, graph_node); - graph_node->complete = true; - } -} - -static type *get_type(sema *s, ast_node *n) -{ - char *name = NULL; - type *t = NULL; - switch (n->type) { - case NODE_IDENTIFIER: - name = intern_string(s, n->expr.string.start, n->expr.string.len); - t = shget(type_reg, name); - free(name); - return t; - case NODE_PTR_TYPE: - t = malloc(sizeof(type)); - t->size = sizeof(usize); - t->alignment = sizeof(usize); - if (n->expr.ptr_type.flags & PTR_RAW) { - t->name = "ptr"; - t->tag = TYPE_PTR; - t->data.ptr.child = get_type(s, n->expr.ptr_type.type); - t->data.ptr.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0; - t->data.ptr.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0; - } else { - t->name = "slice"; - t->tag = TYPE_SLICE; - t->data.slice.child = get_type(s, n->expr.ptr_type.type); - t->data.slice.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0; - t->data.slice.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0; - } - return t; - default: - error(n, "expected type."); - return NULL; - } -} - -static void register_struct(sema *s, char *name, type *t) -{ - usize alignment = 0; - member *m = t->data.structure.members; - - usize offset = 0; - type *m_type = NULL; - while (m) { - m_type = get_type(s, m->type); - - if (!m_type) { - error(m->type, "unknown type."); - return; - } - - char *n = intern_string(s, m->name, m->name_len); - shput(t->data.structure.member_types, n, m_type); - - if (m_type->size == 0) { - error(m->type, "a struct member can't be of type `void`."); - return; - } - - if (alignment < m_type->alignment) { - alignment = m_type->alignment; - } - - usize padding = (m_type->alignment - (offset % m_type->alignment)) % m_type->alignment; - offset += padding; - m->offset = offset; - offset += m_type->size; - - m = m->next; - } - - t->alignment = alignment; - - if (t->alignment > 0) { - usize trailing_padding = (t->alignment - (offset % t->alignment)) % t->alignment; - offset += trailing_padding; - } - - t->size = offset; -} - -static void register_union(sema *s, char *name, type *t) -{ - usize alignment = 0; - usize size = 0; - member *m = t->data.structure.members; - while (m) { - type *m_type = get_type(s, m->type); - - if (!m_type) { - error(m->type, "unknown type."); - return; - } - - char *n = intern_string(s, m->name, m->name_len); - shput(t->data.structure.member_types, n, m_type); - - if (alignment < m_type->alignment) { - alignment = m_type->alignment; - } - - if (size < m_type->size) { - size = m_type->size; - } - - m = m->next; - } - - t->alignment = alignment; - t->size = size; -} - -static void register_type(sema *s, char *name, type *t) -{ - switch (t->tag) { - case TYPE_INTEGER: - case TYPE_UINTEGER: - t->size = t->data.integer / 8; - t->alignment = t->data.integer / 8; - break; - case TYPE_PTR: - t->size = 8; - t->alignment = 8; - break; - case TYPE_FLOAT: - t->size = t->data.flt / 8; - t->alignment = t->data.flt / 8; - break; - case TYPE_STRUCT: - register_struct(s, name, t); - break; - case TYPE_UNION: - register_union(s, name, t); - break; - default: - error(NULL, "registering an invalid type."); - return; - } - - shput(type_reg, name, t); -} - -static void create_types(sema *s) -{ - res_node **nodes = NULL; - res_node **ordered = NULL; - usize node_count = shlen(types); - for (int i=0; i < node_count; i++) { - if (arrlen(types[i].value->node.in) == 0) { - arrput(nodes, &types[i].value->node); - } - } - - while (arrlen(nodes) > 0) { - res_node *n = nodes[0]; - arrdel(nodes, 0); - arrput(ordered, n); - while (arrlen(n->out) > 0) { - res_node *dep = n->out[0]; - arrdel(n->out, 0); - - for (int j=0; j < arrlen(dep->in); j++) { - if (dep->in[j] == n) { - arrdel(dep->in, j); - } - } - - if (arrlen(dep->in) == 0) { - arrput(nodes, dep); - } - } - } - - if (arrlen(ordered) < node_count) { - error(NULL, "cycling struct definition."); - } - - for (int i=0; i < arrlen(ordered); i++) { - type *t = ordered[i]->value; - if (t && (t->tag == TYPE_STRUCT || t->tag == TYPE_UNION)) { - char *name = t->name; - register_type(s, name, t); - } - } -} - -static void create_prototype(sema *s, ast_node *node) -{ - prototype *p = arena_alloc(s->allocator, sizeof(prototype)); - p->name = intern_string(s, node->expr.function.name, node->expr.function.name_len); - node->expr.function.name = p->name; - if (shget(prototypes, p->name)) { - error(node, "function already defined."); - } - - member *m = node->expr.function.parameters; - while (m) { - type *t = get_type(s, m->type); - if (!t) { - error(m->type, "unknown type."); - return; - } - - arrput(p->parameters, t); - m = m->next; - } - - p->type = get_type(s, node->expr.function.type); - shput(prototypes, p->name, p); -} - -static void push_scope(sema *s) -{ - scope *scp = arena_alloc(s->allocator, sizeof(scope)); - scp->parent = current_scope; - current_scope = scp; -} - -static void pop_scope(sema *s) -{ - current_scope = current_scope->parent; -} - -static ast_node *get_def(sema *s, char *name) -{ - scope *current = current_scope; - while (current) { - ast_node *def = shget(current->defs, name); - if (def) return def; - - current = current->parent; - } - - return NULL; -} - -static type *get_string_type(sema *s, ast_node *node) -{ - type *string_type = arena_alloc(s->allocator, sizeof(type)); - string_type->tag = TYPE_PTR; - string_type->size = sizeof(usize); - string_type->alignment = sizeof(usize); - string_type->name = "slice"; - string_type->data.slice.child = shget(type_reg, "u8"); - string_type->data.slice.is_const = true; - string_type->data.slice.is_volatile = false; - string_type->data.slice.len = node->expr.string.len; - return string_type; -} - -static type *get_range_type(sema *s, ast_node *node) -{ - type *range_type = arena_alloc(s->allocator, sizeof(type)); - range_type->tag = TYPE_PTR; - range_type->size = sizeof(usize); - range_type->alignment = sizeof(usize); - range_type->name = "slice"; - range_type->data.slice.child = shget(type_reg, "usize"); - range_type->data.slice.is_const = true; - range_type->data.slice.is_volatile = false; - range_type->data.slice.len = node->expr.binary.right->expr.integer - node->expr.binary.left->expr.integer; - return range_type; -} - -static type *get_expression_type(sema *s, ast_node *node); -static type *get_access_type(sema *s, ast_node *node) -{ - type *t = get_expression_type(s, node->expr.access.expr); - ast_node *member = node->expr.access.member; - char *name_start = member->expr.string.start; - usize name_len = member->expr.string.len; - if (!t || (t->tag != TYPE_STRUCT && t->tag != TYPE_UNION)) { - error(node, "invalid expression."); - return NULL; - } - char *name = intern_string(s, name_start, name_len); - type *res = shget(t->data.structure.member_types, name); - if (!res) { - error(node, "struct doesn't have that member"); - return NULL; - } - - return res; -} - -static type *get_identifier_type(sema *s, ast_node *node) -{ - char *name_start = node->expr.string.start; - usize name_len = node->expr.string.len; - char *name = intern_string(s, name_start, name_len); - node->expr.string.start = name; - ast_node *def = get_def(s, name); - if (!def) { - error(node, "unknown identifier."); - } - return def->expr_type; -} - -static bool match(type *t1, type *t2); - -static bool can_cast(type *source, type *dest) -{ - if (!dest || !source) return false; - - switch (dest->tag) { - case TYPE_INTEGER: - case TYPE_UINTEGER: - return source->tag == TYPE_INTEGER_CONST; - case TYPE_FLOAT: - return source->tag == TYPE_FLOAT_CONST; - default: - return false; - } -} - -static type *get_expression_type(sema *s, ast_node *node) -{ - if (!node) { - return shget(type_reg, "void"); - } - - type *t = NULL; - prototype *prot = NULL; - switch (node->type) { - case NODE_IDENTIFIER: - t = get_identifier_type(s, node); - node->expr_type = t; - return t; - case NODE_INTEGER: - node->expr_type = const_int; - return const_int; - case NODE_FLOAT: - node->expr_type = const_float; - return const_float; - case NODE_STRING: - t = get_string_type(s, node); - node->expr_type = t; - return t; - case NODE_CHAR: - t = shget(type_reg, "u8"); - node->expr_type = t; - return t; - case NODE_BOOL: - t = shget(type_reg, "bool"); - node->expr_type = t; - return t; - case NODE_CAST: - t = get_type(s, node->expr.cast.type); - node->expr_type = t; - return t; - case NODE_POSTFIX: - case NODE_UNARY: - t = get_expression_type(s, node->expr.unary.right); - if (node->expr.unary.operator == UOP_REF) { - ast_node *target = node->expr.unary.right; - while (target->type == NODE_ACCESS) { - target = target->expr.access.expr; - } - - if (target->type != NODE_IDENTIFIER) { - error(node, "expected identifier."); - return NULL; - } - - char *name = target->expr.string.start; - ast_node *def = get_def(s, name); - - if (def) { - def->address_taken = true; - target->address_taken = true; - } - - type *tmp = t; - t = arena_alloc(s->allocator, sizeof(type)); - t->tag = TYPE_PTR; - t->size = sizeof(usize); - t->alignment = sizeof(usize); - t->name = "ptr"; - t->data.ptr.is_const = false; - t->data.ptr.is_volatile = false; - t->data.ptr.child = tmp; - } else if (node->expr.unary.operator == UOP_DEREF) { - if (t->tag != TYPE_PTR) { - error(node, "only pointers can be dereferenced."); - return NULL; - } - t = t->data.ptr.child; - } - node->expr_type = t; - return t; - case NODE_BINARY: - t = get_expression_type(s, node->expr.binary.left); - if (!t) return NULL; - if (node->expr.binary.operator == OP_ASSIGN_PTR) { - if (t->tag != TYPE_PTR) { - error(node, "expected pointer."); - return NULL; - } - t = t->data.ptr.child; - } - if (!can_cast(get_expression_type(s, node->expr.binary.right), t) && !match(t, get_expression_type(s, node->expr.binary.right))) { - error(node, "type mismatch."); - node->expr_type = NULL; - return NULL; - } - if (node->expr.binary.operator >= OP_EQ) { - t = shget(type_reg, "bool"); - } else if (node->expr.binary.operator >= OP_ASSIGN && node->expr.binary.operator <= OP_MOD_EQ) { - t = shget(type_reg, "void"); - } - node->expr_type = t; - return t; - case NODE_RANGE: - t = get_range_type(s, node); - node->expr_type = t; - return t; - case NODE_ARRAY_SUBSCRIPT: - t = get_expression_type(s, node->expr.subscript.expr); - switch (t->tag) { - case TYPE_SLICE: - t = t->data.slice.child; - break; - case TYPE_PTR: - t = t->data.ptr.child; - break; - default: - error(node, "only pointers and slices can be indexed."); - return NULL; - } - node->expr_type = t; - return t; - case NODE_CALL: - prot = shget(prototypes, intern_string(s, node->expr.call.name, node->expr.call.name_len)); - if (!prot) { - error(node, "unknown function."); - return NULL; - } - t = prot->type; - node->expr_type = t; - return t; - case NODE_ACCESS: - t = get_access_type(s, node); - node->expr_type = t; - return t; - default: - t = shget(type_reg, "void"); - node->expr_type = t; - return t; - } -} - -static bool match(type *t1, type *t2) -{ - if (!t1 || !t2) return false; - if (t1->tag != t2->tag) return false; - - switch(t1->tag) { - case TYPE_VOID: - case TYPE_BOOL: - return true; - case TYPE_PTR: - return (t1->data.ptr.is_const == t2->data.ptr.is_const) && (t1->data.ptr.is_volatile == t2->data.ptr.is_volatile) && match(t1->data.ptr.child, t2->data.ptr.child); - case TYPE_SLICE: - return (t1->data.slice.is_const == t2->data.slice.is_const) && (t1->data.slice.is_volatile == t2->data.slice.is_volatile) && match(t1->data.slice.child, t2->data.slice.child) && t1->data.slice.len == t2->data.slice.len; - case TYPE_STRUCT: - case TYPE_UNION: - return t1 == t2; - case TYPE_INTEGER: - case TYPE_UINTEGER: - return t1->data.integer == t2->data.integer; - case TYPE_FLOAT: - return t1->data.flt == t2->data.flt; - case TYPE_ENUM: - case TYPE_GENERIC: - /* TODO */ - return false; - case TYPE_INTEGER_CONST: - case TYPE_FLOAT_CONST: - return false; - } - - return false; -} - -static void check_statement(sema *s, ast_node *node); -static void check_body(sema *s, ast_node *node) -{ - push_scope(s); - - ast_node *current = node; - while (current && current->type == NODE_UNIT) { - check_statement(s, current->expr.unit_node.expr); - current = current->expr.unit_node.next; - } - - pop_scope(s); -} - -static void check_for(sema *s, ast_node *node) -{ - ast_node *slices = node->expr.fr.slices; - ast_node *captures = node->expr.fr.captures; - - push_scope(s); - - ast_node *current_capture = captures; - ast_node *current_slice = slices; - - while (current_capture) { - type *c_type = get_expression_type(s, current_slice->expr.unit_node.expr); - char *c_name = intern_string(s, current_capture->expr.unit_node.expr->expr.string.start, current_capture->expr.unit_node.expr->expr.string.len); - - ast_node *cap_node = arena_alloc(s->allocator, sizeof(ast_node)); - cap_node->type = NODE_VAR_DECL; - cap_node->expr_type = c_type; - cap_node->address_taken = false; - cap_node->expr.var_decl.name = c_name; - - shput(current_scope->defs, c_name, cap_node); - current_capture = current_capture->expr.unit_node.next; - current_slice = current_slice->expr.unit_node.next; - } - - ast_node *current = node->expr.fr.body; - - in_loop = true; - while (current && current->type == NODE_UNIT) { - check_statement(s, current->expr.unit_node.expr); - current = current->expr.unit_node.next; - } - in_loop = false; - - pop_scope(s); -} - -static void check_statement(sema *s, ast_node *node) -{ - if (!node) return; - - type *t = NULL; - char *name = NULL; - switch(node->type) { - case NODE_RETURN: - if (!can_cast(get_expression_type(s, node->expr.ret.value), current_return) && !match(get_expression_type(s, node->expr.ret.value), current_return)) { - error(node, "return type doesn't match function's one."); - } - break; - case NODE_BREAK: - if (!in_loop) { - error(node, "`break` isn't in a loop."); - } - break; - case NODE_WHILE: - if (!match(get_expression_type(s, node->expr.whle.condition), shget(type_reg, "bool"))) { - error(node, "expected boolean value."); - return; - } - - in_loop = true; - check_body(s, node->expr.whle.body); - in_loop = false; - break; - case NODE_IF: - if (!match(get_expression_type(s, node->expr.if_stmt.condition), shget(type_reg, "bool"))) { - error(node, "expected boolean value."); - return; - } - - check_body(s, node->expr.if_stmt.body); - if (node->expr.if_stmt.otherwise) check_body(s, node->expr.if_stmt.otherwise); - break; - case NODE_FOR: - check_for(s, node); - break; - case NODE_VAR_DECL: - t = get_type(s, node->expr.var_decl.type); - node->expr_type = t; - name = intern_string(s, node->expr.var_decl.name, node->expr.var_decl.name_len); - node->expr.var_decl.name = name; - if (get_def(s, name)) { - error(node, "redeclaration of variable."); - break; - } - if (!can_cast(get_expression_type(s, node->expr.var_decl.value), t) && !match(t, get_expression_type(s, node->expr.var_decl.value))) { - error(node, "type mismatch."); - } - shput(current_scope->defs, name, node); - break; - default: - get_expression_type(s, node); - break; - } -} - -static void check_function(sema *s, ast_node *f) -{ - push_scope(s); - current_return = get_type(s, f->expr.function.type); - - member *param = f->expr.function.parameters; - while (param) { - type *p_type = get_type(s, param->type); - char *t_name = intern_string(s, param->name, param->name_len); - param->name = t_name; - ast_node *param_node = arena_alloc(s->allocator, sizeof(ast_node)); - param_node->type = NODE_VAR_DECL; - param_node->expr_type = p_type; - param_node->address_taken = false; - param_node->expr.var_decl.name = t_name; - - shput(current_scope->defs, t_name, param_node); - param = param->next; - } - - ast_node *current = f->expr.function.body; - while (current && current->type == NODE_UNIT) { - check_statement(s, current->expr.unit_node.expr); - current = current->expr.unit_node.next; - } - - pop_scope(s); -} - -static void analyze_unit(sema *s, ast_node *node) -{ - ast_node *current = node; - while (current && current->type == NODE_UNIT) { - if (current->expr.unit_node.expr) - order_type(s, current->expr.unit_node.expr); - current = current->expr.unit_node.next; - } - - create_types(s); - - current = node; - while (current && current->type == NODE_UNIT) { - if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) { - create_prototype(s, current->expr.unit_node.expr); - } - current = current->expr.unit_node.next; - } - - current = node; - while (current && current->type == NODE_UNIT) { - if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) { - check_function(s, current->expr.unit_node.expr); - } else { - check_statement(s, current->expr.unit_node.expr); - } - current = current->expr.unit_node.next; - } -} - -void sema_init(parser *p, arena *a) -{ - sema *s = arena_alloc(a, sizeof(sema)); - s->allocator = a; - types = NULL; - s->ast = p->ast; - - global_scope = arena_alloc(a, sizeof(scope)); - global_scope->parent = NULL; - global_scope->defs = NULL; - current_scope = global_scope; - - register_type(s, "void", create_integer(s, "void", 0, false)); - register_type(s, "bool", create_integer(s, "bool", 8, false)); - register_type(s, "u8", create_integer(s, "u8", 8, false)); - register_type(s, "u16", create_integer(s, "u16", 16, false)); - register_type(s, "u32", create_integer(s, "u32", 32, false)); - register_type(s, "u64", create_integer(s, "u64", 64, false)); - register_type(s, "i8", create_integer(s, "i8", 8, true)); - register_type(s, "i16", create_integer(s, "i16", 16, true)); - register_type(s, "i32", create_integer(s, "i32", 32, true)); - register_type(s, "i64", create_integer(s, "i64", 64, true)); - register_type(s, "f32", create_float(s, "f32", 32)); - register_type(s, "f64", create_float(s, "f64", 64)); - - const_int = arena_alloc(s->allocator, sizeof(type)); - const_int->name = "const_int"; - const_int->tag = TYPE_INTEGER_CONST; - const_int->data.integer = 0; - - const_float = arena_alloc(s->allocator, sizeof(type)); - const_float->name = "const_float"; - const_float->tag = TYPE_FLOAT_CONST; - const_float->data.flt = 0; - - analyze_unit(s, s->ast); -} diff --git a/sema.h b/sema.h deleted file mode 100644 index 4813675..0000000 --- a/sema.h +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef SEMA_H -#define SEMA_H - -#include -#include "parser.h" -#include "stb_ds.h" -#include "utils.h" - -typedef enum { - TYPE_VOID, - TYPE_BOOL, - TYPE_PTR, - TYPE_SLICE, - TYPE_FLOAT, - TYPE_FLOAT_CONST, - TYPE_INTEGER, - TYPE_INTEGER_CONST, - TYPE_UINTEGER, - TYPE_STRUCT, - TYPE_UNION, - TYPE_ENUM, /* TODO */ - TYPE_GENERIC, /* TODO */ -} type_tag; - -typedef struct _type { - type_tag tag; - usize size; - usize alignment; - char *name; - union { - u8 integer; - u8 flt; // float - struct { - bool is_const; - bool is_volatile; - struct _type *child; - } ptr; - struct { - usize len; - bool is_const; - bool is_volatile; - struct _type *child; - } slice; - struct { - char *name; - usize name_len; - member *members; - struct { char *key; struct _type *value; } *member_types; - } structure; - struct { - char *name; - usize name_len; - variant *variants; - } enm; /* TODO */ - } data; -} type; - -typedef struct { - char *name; - type *type; - type **parameters; -} prototype; - -typedef struct _scope { - struct _scope *parent; - struct { char *key; ast_node *value; } *defs; -} scope; - -typedef struct { - arena *allocator; - ast_node *ast; -} sema; - -void sema_init(parser *p, arena *a); - -#endif diff --git a/stb_ds.h b/stb_ds.h deleted file mode 100644 index e84c82d..0000000 --- a/stb_ds.h +++ /dev/null @@ -1,1895 +0,0 @@ -/* stb_ds.h - v0.67 - public domain data structures - Sean Barrett 2019 - - This is a single-header-file library that provides easy-to-use - dynamic arrays and hash tables for C (also works in C++). - - For a gentle introduction: - http://nothings.org/stb_ds - - To use this library, do this in *one* C or C++ file: - #define STB_DS_IMPLEMENTATION - #include "stb_ds.h" - -TABLE OF CONTENTS - - Table of Contents - Compile-time options - License - Documentation - Notes - Notes - Dynamic arrays - Notes - Hash maps - Credits - -COMPILE-TIME OPTIONS - - #define STBDS_NO_SHORT_NAMES - - This flag needs to be set globally. - - By default stb_ds exposes shorter function names that are not qualified - with the "stbds_" prefix. If these names conflict with the names in your - code, define this flag. - - #define STBDS_SIPHASH_2_4 - - This flag only needs to be set in the file containing #define STB_DS_IMPLEMENTATION. - - By default stb_ds.h hashes using a weaker variant of SipHash and a custom hash for - 4- and 8-byte keys. On 64-bit platforms, you can define the above flag to force - stb_ds.h to use specification-compliant SipHash-2-4 for all keys. Doing so makes - hash table insertion about 20% slower on 4- and 8-byte keys, 5% slower on - 64-byte keys, and 10% slower on 256-byte keys on my test computer. - - #define STBDS_REALLOC(context,ptr,size) better_realloc - #define STBDS_FREE(context,ptr) better_free - - These defines only need to be set in the file containing #define STB_DS_IMPLEMENTATION. - - By default stb_ds uses stdlib realloc() and free() for memory management. You can - substitute your own functions instead by defining these symbols. You must either - define both, or neither. Note that at the moment, 'context' will always be NULL. - @TODO add an array/hash initialization function that takes a memory context pointer. - - #define STBDS_UNIT_TESTS - - Defines a function stbds_unit_tests() that checks the functioning of the data structures. - - Note that on older versions of gcc (e.g. 5.x.x) you may need to build with '-std=c++0x' - (or equivalentally '-std=c++11') when using anonymous structures as seen on the web - page or in STBDS_UNIT_TESTS. - -LICENSE - - Placed in the public domain and also MIT licensed. - See end of file for detailed license information. - -DOCUMENTATION - - Dynamic Arrays - - Non-function interface: - - Declare an empty dynamic array of type T - T* foo = NULL; - - Access the i'th item of a dynamic array 'foo' of type T, T* foo: - foo[i] - - Functions (actually macros) - - arrfree: - void arrfree(T*); - Frees the array. - - arrlen: - ptrdiff_t arrlen(T*); - Returns the number of elements in the array. - - arrlenu: - size_t arrlenu(T*); - Returns the number of elements in the array as an unsigned type. - - arrpop: - T arrpop(T* a) - Removes the final element of the array and returns it. - - arrput: - T arrput(T* a, T b); - Appends the item b to the end of array a. Returns b. - - arrins: - T arrins(T* a, int p, T b); - Inserts the item b into the middle of array a, into a[p], - moving the rest of the array over. Returns b. - - arrinsn: - void arrinsn(T* a, int p, int n); - Inserts n uninitialized items into array a starting at a[p], - moving the rest of the array over. - - arraddnptr: - T* arraddnptr(T* a, int n) - Appends n uninitialized items onto array at the end. - Returns a pointer to the first uninitialized item added. - - arraddnindex: - size_t arraddnindex(T* a, int n) - Appends n uninitialized items onto array at the end. - Returns the index of the first uninitialized item added. - - arrdel: - void arrdel(T* a, int p); - Deletes the element at a[p], moving the rest of the array over. - - arrdeln: - void arrdeln(T* a, int p, int n); - Deletes n elements starting at a[p], moving the rest of the array over. - - arrdelswap: - void arrdelswap(T* a, int p); - Deletes the element at a[p], replacing it with the element from - the end of the array. O(1) performance. - - arrsetlen: - void arrsetlen(T* a, int n); - Changes the length of the array to n. Allocates uninitialized - slots at the end if necessary. - - arrsetcap: - size_t arrsetcap(T* a, int n); - Sets the length of allocated storage to at least n. It will not - change the length of the array. - - arrcap: - size_t arrcap(T* a); - Returns the number of total elements the array can contain without - needing to be reallocated. - - Hash maps & String hash maps - - Given T is a structure type: struct { TK key; TV value; }. Note that some - functions do not require TV value and can have other fields. For string - hash maps, TK must be 'char *'. - - Special interface: - - stbds_rand_seed: - void stbds_rand_seed(size_t seed); - For security against adversarially chosen data, you should seed the - library with a strong random number. Or at least seed it with time(). - - stbds_hash_string: - size_t stbds_hash_string(char *str, size_t seed); - Returns a hash value for a string. - - stbds_hash_bytes: - size_t stbds_hash_bytes(void *p, size_t len, size_t seed); - These functions hash an arbitrary number of bytes. The function - uses a custom hash for 4- and 8-byte data, and a weakened version - of SipHash for everything else. On 64-bit platforms you can get - specification-compliant SipHash-2-4 on all data by defining - STBDS_SIPHASH_2_4, at a significant cost in speed. - - Non-function interface: - - Declare an empty hash map of type T - T* foo = NULL; - - Access the i'th entry in a hash table T* foo: - foo[i] - - Function interface (actually macros): - - hmfree - shfree - void hmfree(T*); - void shfree(T*); - Frees the hashmap and sets the pointer to NULL. - - hmlen - shlen - ptrdiff_t hmlen(T*) - ptrdiff_t shlen(T*) - Returns the number of elements in the hashmap. - - hmlenu - shlenu - size_t hmlenu(T*) - size_t shlenu(T*) - Returns the number of elements in the hashmap. - - hmgeti - shgeti - hmgeti_ts - ptrdiff_t hmgeti(T*, TK key) - ptrdiff_t shgeti(T*, char* key) - ptrdiff_t hmgeti_ts(T*, TK key, ptrdiff_t tempvar) - Returns the index in the hashmap which has the key 'key', or -1 - if the key is not present. - - hmget - hmget_ts - shget - TV hmget(T*, TK key) - TV shget(T*, char* key) - TV hmget_ts(T*, TK key, ptrdiff_t tempvar) - Returns the value corresponding to 'key' in the hashmap. - The structure must have a 'value' field - - hmgets - shgets - T hmgets(T*, TK key) - T shgets(T*, char* key) - Returns the structure corresponding to 'key' in the hashmap. - - hmgetp - shgetp - hmgetp_ts - hmgetp_null - shgetp_null - T* hmgetp(T*, TK key) - T* shgetp(T*, char* key) - T* hmgetp_ts(T*, TK key, ptrdiff_t tempvar) - T* hmgetp_null(T*, TK key) - T* shgetp_null(T*, char *key) - Returns a pointer to the structure corresponding to 'key' in - the hashmap. Functions ending in "_null" return NULL if the key - is not present in the hashmap; the others return a pointer to a - structure holding the default value (but not the searched-for key). - - hmdefault - shdefault - TV hmdefault(T*, TV value) - TV shdefault(T*, TV value) - Sets the default value for the hashmap, the value which will be - returned by hmget/shget if the key is not present. - - hmdefaults - shdefaults - TV hmdefaults(T*, T item) - TV shdefaults(T*, T item) - Sets the default struct for the hashmap, the contents which will be - returned by hmgets/shgets if the key is not present. - - hmput - shput - TV hmput(T*, TK key, TV value) - TV shput(T*, char* key, TV value) - Inserts a pair into the hashmap. If the key is already - present in the hashmap, updates its value. - - hmputs - shputs - T hmputs(T*, T item) - T shputs(T*, T item) - Inserts a struct with T.key into the hashmap. If the struct is already - present in the hashmap, updates it. - - hmdel - shdel - int hmdel(T*, TK key) - int shdel(T*, char* key) - If 'key' is in the hashmap, deletes its entry and returns 1. - Otherwise returns 0. - - Function interface (actually macros) for strings only: - - sh_new_strdup - void sh_new_strdup(T*); - Overwrites the existing pointer with a newly allocated - string hashmap which will automatically allocate and free - each string key using realloc/free - - sh_new_arena - void sh_new_arena(T*); - Overwrites the existing pointer with a newly allocated - string hashmap which will automatically allocate each string - key to a string arena. Every string key ever used by this - hash table remains in the arena until the arena is freed. - Additionally, any key which is deleted and reinserted will - be allocated multiple times in the string arena. - -NOTES - - * These data structures are realloc'd when they grow, and the macro - "functions" write to the provided pointer. This means: (a) the pointer - must be an lvalue, and (b) the pointer to the data structure is not - stable, and you must maintain it the same as you would a realloc'd - pointer. For example, if you pass a pointer to a dynamic array to a - function which updates it, the function must return back the new - pointer to the caller. This is the price of trying to do this in C. - - * The following are the only functions that are thread-safe on a single data - structure, i.e. can be run in multiple threads simultaneously on the same - data structure - hmlen shlen - hmlenu shlenu - hmget_ts shget_ts - hmgeti_ts shgeti_ts - hmgets_ts shgets_ts - - * You iterate over the contents of a dynamic array and a hashmap in exactly - the same way, using arrlen/hmlen/shlen: - - for (i=0; i < arrlen(foo); ++i) - ... foo[i] ... - - * All operations except arrins/arrdel are O(1) amortized, but individual - operations can be slow, so these data structures may not be suitable - for real time use. Dynamic arrays double in capacity as needed, so - elements are copied an average of once. Hash tables double/halve - their size as needed, with appropriate hysteresis to maintain O(1) - performance. - -NOTES - DYNAMIC ARRAY - - * If you know how long a dynamic array is going to be in advance, you can avoid - extra memory allocations by using arrsetlen to allocate it to that length in - advance and use foo[n] while filling it out, or arrsetcap to allocate the memory - for that length and use arrput/arrpush as normal. - - * Unlike some other versions of the dynamic array, this version should - be safe to use with strict-aliasing optimizations. - -NOTES - HASH MAP - - * For compilers other than GCC and clang (e.g. Visual Studio), for hmput/hmget/hmdel - and variants, the key must be an lvalue (so the macro can take the address of it). - Extensions are used that eliminate this requirement if you're using C99 and later - in GCC or clang, or if you're using C++ in GCC. But note that this can make your - code less portable. - - * To test for presence of a key in a hashmap, just do 'hmgeti(foo,key) >= 0'. - - * The iteration order of your data in the hashmap is determined solely by the - order of insertions and deletions. In particular, if you never delete, new - keys are always added at the end of the array. This will be consistent - across all platforms and versions of the library. However, you should not - attempt to serialize the internal hash table, as the hash is not consistent - between different platforms, and may change with future versions of the library. - - * Use sh_new_arena() for string hashmaps that you never delete from. Initialize - with NULL if you're managing the memory for your strings, or your strings are - never freed (at least until the hashmap is freed). Otherwise, use sh_new_strdup(). - @TODO: make an arena variant that garbage collects the strings with a trivial - copy collector into a new arena whenever the table shrinks / rebuilds. Since - current arena recommendation is to only use arena if it never deletes, then - this can just replace current arena implementation. - - * If adversarial input is a serious concern and you're on a 64-bit platform, - enable STBDS_SIPHASH_2_4 (see the 'Compile-time options' section), and pass - a strong random number to stbds_rand_seed. - - * The default value for the hash table is stored in foo[-1], so if you - use code like 'hmget(T,k)->value = 5' you can accidentally overwrite - the value stored by hmdefault if 'k' is not present. - -CREDITS - - Sean Barrett -- library, idea for dynamic array API/implementation - Per Vognsen -- idea for hash table API/implementation - Rafael Sachetto -- arrpop() - github:HeroicKatora -- arraddn() reworking - - Bugfixes: - Andy Durdin - Shane Liesegang - Vinh Truong - Andreas Molzer - github:hashitaku - github:srdjanstipic - Macoy Madson - Andreas Vennstrom - Tobias Mansfield-Williams -*/ - -#ifdef STBDS_UNIT_TESTS -#define _CRT_SECURE_NO_WARNINGS -#endif - -#ifndef INCLUDE_STB_DS_H -#define INCLUDE_STB_DS_H - -#include -#include - -#ifndef STBDS_NO_SHORT_NAMES -#define arrlen stbds_arrlen -#define arrlenu stbds_arrlenu -#define arrput stbds_arrput -#define arrpush stbds_arrput -#define arrpop stbds_arrpop -#define arrfree stbds_arrfree -#define arraddn stbds_arraddn // deprecated, use one of the following instead: -#define arraddnptr stbds_arraddnptr -#define arraddnindex stbds_arraddnindex -#define arrsetlen stbds_arrsetlen -#define arrlast stbds_arrlast -#define arrins stbds_arrins -#define arrinsn stbds_arrinsn -#define arrdel stbds_arrdel -#define arrdeln stbds_arrdeln -#define arrdelswap stbds_arrdelswap -#define arrcap stbds_arrcap -#define arrsetcap stbds_arrsetcap - -#define hmput stbds_hmput -#define hmputs stbds_hmputs -#define hmget stbds_hmget -#define hmget_ts stbds_hmget_ts -#define hmgets stbds_hmgets -#define hmgetp stbds_hmgetp -#define hmgetp_ts stbds_hmgetp_ts -#define hmgetp_null stbds_hmgetp_null -#define hmgeti stbds_hmgeti -#define hmgeti_ts stbds_hmgeti_ts -#define hmdel stbds_hmdel -#define hmlen stbds_hmlen -#define hmlenu stbds_hmlenu -#define hmfree stbds_hmfree -#define hmdefault stbds_hmdefault -#define hmdefaults stbds_hmdefaults - -#define shput stbds_shput -#define shputi stbds_shputi -#define shputs stbds_shputs -#define shget stbds_shget -#define shgeti stbds_shgeti -#define shgets stbds_shgets -#define shgetp stbds_shgetp -#define shgetp_null stbds_shgetp_null -#define shdel stbds_shdel -#define shlen stbds_shlen -#define shlenu stbds_shlenu -#define shfree stbds_shfree -#define shdefault stbds_shdefault -#define shdefaults stbds_shdefaults -#define sh_new_arena stbds_sh_new_arena -#define sh_new_strdup stbds_sh_new_strdup - -#define stralloc stbds_stralloc -#define strreset stbds_strreset -#endif - -#if defined(STBDS_REALLOC) && !defined(STBDS_FREE) || !defined(STBDS_REALLOC) && defined(STBDS_FREE) -#error "You must define both STBDS_REALLOC and STBDS_FREE, or neither." -#endif -#if !defined(STBDS_REALLOC) && !defined(STBDS_FREE) -#include -#define STBDS_REALLOC(c,p,s) realloc(p,s) -#define STBDS_FREE(c,p) free(p) -#endif - -#ifdef _MSC_VER -#define STBDS_NOTUSED(v) (void)(v) -#else -#define STBDS_NOTUSED(v) (void)sizeof(v) -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -// for security against attackers, seed the library with a random number, at least time() but stronger is better -extern void stbds_rand_seed(size_t seed); - -// these are the hash functions used internally if you want to test them or use them for other purposes -extern size_t stbds_hash_bytes(void *p, size_t len, size_t seed); -extern size_t stbds_hash_string(char *str, size_t seed); - -// this is a simple string arena allocator, initialize with e.g. 'stbds_string_arena my_arena={0}'. -typedef struct stbds_string_arena stbds_string_arena; -extern char * stbds_stralloc(stbds_string_arena *a, char *str); -extern void stbds_strreset(stbds_string_arena *a); - -// have to #define STBDS_UNIT_TESTS to call this -extern void stbds_unit_tests(void); - -/////////////// -// -// Everything below here is implementation details -// - -extern void * stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap); -extern void stbds_arrfreef(void *a); -extern void stbds_hmfree_func(void *p, size_t elemsize); -extern void * stbds_hmget_key(void *a, size_t elemsize, void *key, size_t keysize, int mode); -extern void * stbds_hmget_key_ts(void *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode); -extern void * stbds_hmput_default(void *a, size_t elemsize); -extern void * stbds_hmput_key(void *a, size_t elemsize, void *key, size_t keysize, int mode); -extern void * stbds_hmdel_key(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode); -extern void * stbds_shmode_func(size_t elemsize, int mode); - -#ifdef __cplusplus -} -#endif - -#if defined(__GNUC__) || defined(__clang__) -#define STBDS_HAS_TYPEOF -#ifdef __cplusplus -//#define STBDS_HAS_LITERAL_ARRAY // this is currently broken for clang -#endif -#endif - -#if !defined(__cplusplus) -#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L -#define STBDS_HAS_LITERAL_ARRAY -#endif -#endif - -// this macro takes the address of the argument, but on gcc/clang can accept rvalues -#if defined(STBDS_HAS_LITERAL_ARRAY) && defined(STBDS_HAS_TYPEOF) - #if __clang__ - #define STBDS_ADDRESSOF(typevar, value) ((__typeof__(typevar)[1]){value}) // literal array decays to pointer to value - #else - #define STBDS_ADDRESSOF(typevar, value) ((typeof(typevar)[1]){value}) // literal array decays to pointer to value - #endif -#else -#define STBDS_ADDRESSOF(typevar, value) &(value) -#endif - -#define STBDS_OFFSETOF(var,field) ((char *) &(var)->field - (char *) (var)) - -#define stbds_header(t) ((stbds_array_header *) (t) - 1) -#define stbds_temp(t) stbds_header(t)->temp -#define stbds_temp_key(t) (*(char **) stbds_header(t)->hash_table) - -#define stbds_arrsetcap(a,n) (stbds_arrgrow(a,0,n)) -#define stbds_arrsetlen(a,n) ((stbds_arrcap(a) < (size_t) (n) ? stbds_arrsetcap((a),(size_t)(n)),0 : 0), (a) ? stbds_header(a)->length = (size_t) (n) : 0) -#define stbds_arrcap(a) ((a) ? stbds_header(a)->capacity : 0) -#define stbds_arrlen(a) ((a) ? (ptrdiff_t) stbds_header(a)->length : 0) -#define stbds_arrlenu(a) ((a) ? stbds_header(a)->length : 0) -#define stbds_arrput(a,v) (stbds_arrmaybegrow(a,1), (a)[stbds_header(a)->length++] = (v)) -#define stbds_arrpush stbds_arrput // synonym -#define stbds_arrpop(a) (stbds_header(a)->length--, (a)[stbds_header(a)->length]) -#define stbds_arraddn(a,n) ((void)(stbds_arraddnindex(a, n))) // deprecated, use one of the following instead: -#define stbds_arraddnptr(a,n) (stbds_arrmaybegrow(a,n), (n) ? (stbds_header(a)->length += (n), &(a)[stbds_header(a)->length-(n)]) : (a)) -#define stbds_arraddnindex(a,n)(stbds_arrmaybegrow(a,n), (n) ? (stbds_header(a)->length += (n), stbds_header(a)->length-(n)) : stbds_arrlen(a)) -#define stbds_arraddnoff stbds_arraddnindex -#define stbds_arrlast(a) ((a)[stbds_header(a)->length-1]) -#define stbds_arrfree(a) ((void) ((a) ? STBDS_FREE(NULL,stbds_header(a)) : (void)0), (a)=NULL) -#define stbds_arrdel(a,i) stbds_arrdeln(a,i,1) -#define stbds_arrdeln(a,i,n) (memmove(&(a)[i], &(a)[(i)+(n)], sizeof *(a) * (stbds_header(a)->length-(n)-(i))), stbds_header(a)->length -= (n)) -#define stbds_arrdelswap(a,i) ((a)[i] = stbds_arrlast(a), stbds_header(a)->length -= 1) -#define stbds_arrinsn(a,i,n) (stbds_arraddn((a),(n)), memmove(&(a)[(i)+(n)], &(a)[i], sizeof *(a) * (stbds_header(a)->length-(n)-(i)))) -#define stbds_arrins(a,i,v) (stbds_arrinsn((a),(i),1), (a)[i]=(v)) - -#define stbds_arrmaybegrow(a,n) ((!(a) || stbds_header(a)->length + (n) > stbds_header(a)->capacity) \ - ? (stbds_arrgrow(a,n,0),0) : 0) - -#define stbds_arrgrow(a,b,c) ((a) = stbds_arrgrowf_wrapper((a), sizeof *(a), (b), (c))) - -#define stbds_hmput(t, k, v) \ - ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, 0), \ - (t)[stbds_temp((t)-1)].key = (k), \ - (t)[stbds_temp((t)-1)].value = (v)) - -#define stbds_hmputs(t, s) \ - ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), &(s).key, sizeof (s).key, STBDS_HM_BINARY), \ - (t)[stbds_temp((t)-1)] = (s)) - -#define stbds_hmgeti(t,k) \ - ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, STBDS_HM_BINARY), \ - stbds_temp((t)-1)) - -#define stbds_hmgeti_ts(t,k,temp) \ - ((t) = stbds_hmget_key_ts_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, &(temp), STBDS_HM_BINARY), \ - (temp)) - -#define stbds_hmgetp(t, k) \ - ((void) stbds_hmgeti(t,k), &(t)[stbds_temp((t)-1)]) - -#define stbds_hmgetp_ts(t, k, temp) \ - ((void) stbds_hmgeti_ts(t,k,temp), &(t)[temp]) - -#define stbds_hmdel(t,k) \ - (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, STBDS_OFFSETOF((t),key), STBDS_HM_BINARY)),(t)?stbds_temp((t)-1):0) - -#define stbds_hmdefault(t, v) \ - ((t) = stbds_hmput_default_wrapper((t), sizeof *(t)), (t)[-1].value = (v)) - -#define stbds_hmdefaults(t, s) \ - ((t) = stbds_hmput_default_wrapper((t), sizeof *(t)), (t)[-1] = (s)) - -#define stbds_hmfree(p) \ - ((void) ((p) != NULL ? stbds_hmfree_func((p)-1,sizeof*(p)),0 : 0),(p)=NULL) - -#define stbds_hmgets(t, k) (*stbds_hmgetp(t,k)) -#define stbds_hmget(t, k) (stbds_hmgetp(t,k)->value) -#define stbds_hmget_ts(t, k, temp) (stbds_hmgetp_ts(t,k,temp)->value) -#define stbds_hmlen(t) ((t) ? (ptrdiff_t) stbds_header((t)-1)->length-1 : 0) -#define stbds_hmlenu(t) ((t) ? stbds_header((t)-1)->length-1 : 0) -#define stbds_hmgetp_null(t,k) (stbds_hmgeti(t,k) == -1 ? NULL : &(t)[stbds_temp((t)-1)]) - -#define stbds_shput(t, k, v) \ - ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING), \ - (t)[stbds_temp((t)-1)].value = (v)) - -#define stbds_shputi(t, k, v) \ - ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING), \ - (t)[stbds_temp((t)-1)].value = (v), stbds_temp((t)-1)) - -#define stbds_shputs(t, s) \ - ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (s).key, sizeof (s).key, STBDS_HM_STRING), \ - (t)[stbds_temp((t)-1)] = (s), \ - (t)[stbds_temp((t)-1)].key = stbds_temp_key((t)-1)) // above line overwrites whole structure, so must rewrite key here if it was allocated internally - -#define stbds_pshput(t, p) \ - ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (p)->key, sizeof (p)->key, STBDS_HM_PTR_TO_STRING), \ - (t)[stbds_temp((t)-1)] = (p)) - -#define stbds_shgeti(t,k) \ - ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING), \ - stbds_temp((t)-1)) - -#define stbds_pshgeti(t,k) \ - ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (*(t))->key, STBDS_HM_PTR_TO_STRING), \ - stbds_temp((t)-1)) - -#define stbds_shgetp(t, k) \ - ((void) stbds_shgeti(t,k), &(t)[stbds_temp((t)-1)]) - -#define stbds_pshget(t, k) \ - ((void) stbds_pshgeti(t,k), (t)[stbds_temp((t)-1)]) - -#define stbds_shdel(t,k) \ - (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_OFFSETOF((t),key), STBDS_HM_STRING)),(t)?stbds_temp((t)-1):0) -#define stbds_pshdel(t,k) \ - (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) (k), sizeof (*(t))->key, STBDS_OFFSETOF(*(t),key), STBDS_HM_PTR_TO_STRING)),(t)?stbds_temp((t)-1):0) - -#define stbds_sh_new_arena(t) \ - ((t) = stbds_shmode_func_wrapper(t, sizeof *(t), STBDS_SH_ARENA)) -#define stbds_sh_new_strdup(t) \ - ((t) = stbds_shmode_func_wrapper(t, sizeof *(t), STBDS_SH_STRDUP)) - -#define stbds_shdefault(t, v) stbds_hmdefault(t,v) -#define stbds_shdefaults(t, s) stbds_hmdefaults(t,s) - -#define stbds_shfree stbds_hmfree -#define stbds_shlenu stbds_hmlenu - -#define stbds_shgets(t, k) (*stbds_shgetp(t,k)) -#define stbds_shget(t, k) (stbds_shgetp(t,k)->value) -#define stbds_shgetp_null(t,k) (stbds_shgeti(t,k) == -1 ? NULL : &(t)[stbds_temp((t)-1)]) -#define stbds_shlen stbds_hmlen - -typedef struct -{ - size_t length; - size_t capacity; - void * hash_table; - ptrdiff_t temp; -} stbds_array_header; - -typedef struct stbds_string_block -{ - struct stbds_string_block *next; - char storage[8]; -} stbds_string_block; - -struct stbds_string_arena -{ - stbds_string_block *storage; - size_t remaining; - unsigned char block; - unsigned char mode; // this isn't used by the string arena itself -}; - -#define STBDS_HM_BINARY 0 -#define STBDS_HM_STRING 1 - -enum -{ - STBDS_SH_NONE, - STBDS_SH_DEFAULT, - STBDS_SH_STRDUP, - STBDS_SH_ARENA -}; - -#ifdef __cplusplus -// in C we use implicit assignment from these void*-returning functions to T*. -// in C++ these templates make the same code work -template static T * stbds_arrgrowf_wrapper(T *a, size_t elemsize, size_t addlen, size_t min_cap) { - return (T*)stbds_arrgrowf((void *)a, elemsize, addlen, min_cap); -} -template static T * stbds_hmget_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, int mode) { - return (T*)stbds_hmget_key((void*)a, elemsize, key, keysize, mode); -} -template static T * stbds_hmget_key_ts_wrapper(T *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode) { - return (T*)stbds_hmget_key_ts((void*)a, elemsize, key, keysize, temp, mode); -} -template static T * stbds_hmput_default_wrapper(T *a, size_t elemsize) { - return (T*)stbds_hmput_default((void *)a, elemsize); -} -template static T * stbds_hmput_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, int mode) { - return (T*)stbds_hmput_key((void*)a, elemsize, key, keysize, mode); -} -template static T * stbds_hmdel_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode){ - return (T*)stbds_hmdel_key((void*)a, elemsize, key, keysize, keyoffset, mode); -} -template static T * stbds_shmode_func_wrapper(T *, size_t elemsize, int mode) { - return (T*)stbds_shmode_func(elemsize, mode); -} -#else -#define stbds_arrgrowf_wrapper stbds_arrgrowf -#define stbds_hmget_key_wrapper stbds_hmget_key -#define stbds_hmget_key_ts_wrapper stbds_hmget_key_ts -#define stbds_hmput_default_wrapper stbds_hmput_default -#define stbds_hmput_key_wrapper stbds_hmput_key -#define stbds_hmdel_key_wrapper stbds_hmdel_key -#define stbds_shmode_func_wrapper(t,e,m) stbds_shmode_func(e,m) -#endif - -#endif // INCLUDE_STB_DS_H - - -////////////////////////////////////////////////////////////////////////////// -// -// IMPLEMENTATION -// - -#ifdef STB_DS_IMPLEMENTATION -#include -#include - -#ifndef STBDS_ASSERT -#define STBDS_ASSERT_WAS_UNDEFINED -#define STBDS_ASSERT(x) ((void) 0) -#endif - -#ifdef STBDS_STATISTICS -#define STBDS_STATS(x) x -size_t stbds_array_grow; -size_t stbds_hash_grow; -size_t stbds_hash_shrink; -size_t stbds_hash_rebuild; -size_t stbds_hash_probes; -size_t stbds_hash_alloc; -size_t stbds_rehash_probes; -size_t stbds_rehash_items; -#else -#define STBDS_STATS(x) -#endif - -// -// stbds_arr implementation -// - -//int *prev_allocs[65536]; -//int num_prev; - -void *stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap) -{ - stbds_array_header temp={0}; // force debugging - void *b; - size_t min_len = stbds_arrlen(a) + addlen; - (void) sizeof(temp); - - // compute the minimum capacity needed - if (min_len > min_cap) - min_cap = min_len; - - if (min_cap <= stbds_arrcap(a)) - return a; - - // increase needed capacity to guarantee O(1) amortized - if (min_cap < 2 * stbds_arrcap(a)) - min_cap = 2 * stbds_arrcap(a); - else if (min_cap < 4) - min_cap = 4; - - //if (num_prev < 65536) if (a) prev_allocs[num_prev++] = (int *) ((char *) a+1); - //if (num_prev == 2201) - // num_prev = num_prev; - b = STBDS_REALLOC(NULL, (a) ? stbds_header(a) : 0, elemsize * min_cap + sizeof(stbds_array_header)); - //if (num_prev < 65536) prev_allocs[num_prev++] = (int *) (char *) b; - b = (char *) b + sizeof(stbds_array_header); - if (a == NULL) { - stbds_header(b)->length = 0; - stbds_header(b)->hash_table = 0; - stbds_header(b)->temp = 0; - } else { - STBDS_STATS(++stbds_array_grow); - } - stbds_header(b)->capacity = min_cap; - - return b; -} - -void stbds_arrfreef(void *a) -{ - STBDS_FREE(NULL, stbds_header(a)); -} - -// -// stbds_hm hash table implementation -// - -#ifdef STBDS_INTERNAL_SMALL_BUCKET -#define STBDS_BUCKET_LENGTH 4 -#else -#define STBDS_BUCKET_LENGTH 8 -#endif - -#define STBDS_BUCKET_SHIFT (STBDS_BUCKET_LENGTH == 8 ? 3 : 2) -#define STBDS_BUCKET_MASK (STBDS_BUCKET_LENGTH-1) -#define STBDS_CACHE_LINE_SIZE 64 - -#define STBDS_ALIGN_FWD(n,a) (((n) + (a) - 1) & ~((a)-1)) - -typedef struct -{ - size_t hash [STBDS_BUCKET_LENGTH]; - ptrdiff_t index[STBDS_BUCKET_LENGTH]; -} stbds_hash_bucket; // in 32-bit, this is one 64-byte cache line; in 64-bit, each array is one 64-byte cache line - -typedef struct -{ - char * temp_key; // this MUST be the first field of the hash table - size_t slot_count; - size_t used_count; - size_t used_count_threshold; - size_t used_count_shrink_threshold; - size_t tombstone_count; - size_t tombstone_count_threshold; - size_t seed; - size_t slot_count_log2; - stbds_string_arena string; - stbds_hash_bucket *storage; // not a separate allocation, just 64-byte aligned storage after this struct -} stbds_hash_index; - -#define STBDS_INDEX_EMPTY -1 -#define STBDS_INDEX_DELETED -2 -#define STBDS_INDEX_IN_USE(x) ((x) >= 0) - -#define STBDS_HASH_EMPTY 0 -#define STBDS_HASH_DELETED 1 - -static size_t stbds_hash_seed=0x31415926; - -void stbds_rand_seed(size_t seed) -{ - stbds_hash_seed = seed; -} - -#define stbds_load_32_or_64(var, temp, v32, v64_hi, v64_lo) \ - temp = v64_lo ^ v32, temp <<= 16, temp <<= 16, temp >>= 16, temp >>= 16, /* discard if 32-bit */ \ - var = v64_hi, var <<= 16, var <<= 16, /* discard if 32-bit */ \ - var ^= temp ^ v32 - -#define STBDS_SIZE_T_BITS ((sizeof (size_t)) * 8) - -static size_t stbds_probe_position(size_t hash, size_t slot_count, size_t slot_log2) -{ - size_t pos; - STBDS_NOTUSED(slot_log2); - pos = hash & (slot_count-1); - #ifdef STBDS_INTERNAL_BUCKET_START - pos &= ~STBDS_BUCKET_MASK; - #endif - return pos; -} - -static size_t stbds_log2(size_t slot_count) -{ - size_t n=0; - while (slot_count > 1) { - slot_count >>= 1; - ++n; - } - return n; -} - -static stbds_hash_index *stbds_make_hash_index(size_t slot_count, stbds_hash_index *ot) -{ - stbds_hash_index *t; - t = (stbds_hash_index *) STBDS_REALLOC(NULL,0,(slot_count >> STBDS_BUCKET_SHIFT) * sizeof(stbds_hash_bucket) + sizeof(stbds_hash_index) + STBDS_CACHE_LINE_SIZE-1); - t->storage = (stbds_hash_bucket *) STBDS_ALIGN_FWD((size_t) (t+1), STBDS_CACHE_LINE_SIZE); - t->slot_count = slot_count; - t->slot_count_log2 = stbds_log2(slot_count); - t->tombstone_count = 0; - t->used_count = 0; - - #if 0 // A1 - t->used_count_threshold = slot_count*12/16; // if 12/16th of table is occupied, grow - t->tombstone_count_threshold = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild - t->used_count_shrink_threshold = slot_count* 4/16; // if table is only 4/16th full, shrink - #elif 1 // A2 - //t->used_count_threshold = slot_count*12/16; // if 12/16th of table is occupied, grow - //t->tombstone_count_threshold = slot_count* 3/16; // if tombstones are 3/16th of table, rebuild - //t->used_count_shrink_threshold = slot_count* 4/16; // if table is only 4/16th full, shrink - - // compute without overflowing - t->used_count_threshold = slot_count - (slot_count>>2); - t->tombstone_count_threshold = (slot_count>>3) + (slot_count>>4); - t->used_count_shrink_threshold = slot_count >> 2; - - #elif 0 // B1 - t->used_count_threshold = slot_count*13/16; // if 13/16th of table is occupied, grow - t->tombstone_count_threshold = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild - t->used_count_shrink_threshold = slot_count* 5/16; // if table is only 5/16th full, shrink - #else // C1 - t->used_count_threshold = slot_count*14/16; // if 14/16th of table is occupied, grow - t->tombstone_count_threshold = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild - t->used_count_shrink_threshold = slot_count* 6/16; // if table is only 6/16th full, shrink - #endif - // Following statistics were measured on a Core i7-6700 @ 4.00Ghz, compiled with clang 7.0.1 -O2 - // Note that the larger tables have high variance as they were run fewer times - // A1 A2 B1 C1 - // 0.10ms : 0.10ms : 0.10ms : 0.11ms : 2,000 inserts creating 2K table - // 0.96ms : 0.95ms : 0.97ms : 1.04ms : 20,000 inserts creating 20K table - // 14.48ms : 14.46ms : 10.63ms : 11.00ms : 200,000 inserts creating 200K table - // 195.74ms : 196.35ms : 203.69ms : 214.92ms : 2,000,000 inserts creating 2M table - // 2193.88ms : 2209.22ms : 2285.54ms : 2437.17ms : 20,000,000 inserts creating 20M table - // 65.27ms : 53.77ms : 65.33ms : 65.47ms : 500,000 inserts & deletes in 2K table - // 72.78ms : 62.45ms : 71.95ms : 72.85ms : 500,000 inserts & deletes in 20K table - // 89.47ms : 77.72ms : 96.49ms : 96.75ms : 500,000 inserts & deletes in 200K table - // 97.58ms : 98.14ms : 97.18ms : 97.53ms : 500,000 inserts & deletes in 2M table - // 118.61ms : 119.62ms : 120.16ms : 118.86ms : 500,000 inserts & deletes in 20M table - // 192.11ms : 194.39ms : 196.38ms : 195.73ms : 500,000 inserts & deletes in 200M table - - if (slot_count <= STBDS_BUCKET_LENGTH) - t->used_count_shrink_threshold = 0; - // to avoid infinite loop, we need to guarantee that at least one slot is empty and will terminate probes - STBDS_ASSERT(t->used_count_threshold + t->tombstone_count_threshold < t->slot_count); - STBDS_STATS(++stbds_hash_alloc); - if (ot) { - t->string = ot->string; - // reuse old seed so we can reuse old hashes so below "copy out old data" doesn't do any hashing - t->seed = ot->seed; - } else { - size_t a,b,temp; - memset(&t->string, 0, sizeof(t->string)); - t->seed = stbds_hash_seed; - // LCG - // in 32-bit, a = 2147001325 b = 715136305 - // in 64-bit, a = 2862933555777941757 b = 3037000493 - stbds_load_32_or_64(a,temp, 2147001325, 0x27bb2ee6, 0x87b0b0fd); - stbds_load_32_or_64(b,temp, 715136305, 0, 0xb504f32d); - stbds_hash_seed = stbds_hash_seed * a + b; - } - - { - size_t i,j; - for (i=0; i < slot_count >> STBDS_BUCKET_SHIFT; ++i) { - stbds_hash_bucket *b = &t->storage[i]; - for (j=0; j < STBDS_BUCKET_LENGTH; ++j) - b->hash[j] = STBDS_HASH_EMPTY; - for (j=0; j < STBDS_BUCKET_LENGTH; ++j) - b->index[j] = STBDS_INDEX_EMPTY; - } - } - - // copy out the old data, if any - if (ot) { - size_t i,j; - t->used_count = ot->used_count; - for (i=0; i < ot->slot_count >> STBDS_BUCKET_SHIFT; ++i) { - stbds_hash_bucket *ob = &ot->storage[i]; - for (j=0; j < STBDS_BUCKET_LENGTH; ++j) { - if (STBDS_INDEX_IN_USE(ob->index[j])) { - size_t hash = ob->hash[j]; - size_t pos = stbds_probe_position(hash, t->slot_count, t->slot_count_log2); - size_t step = STBDS_BUCKET_LENGTH; - STBDS_STATS(++stbds_rehash_items); - for (;;) { - size_t limit,z; - stbds_hash_bucket *bucket; - bucket = &t->storage[pos >> STBDS_BUCKET_SHIFT]; - STBDS_STATS(++stbds_rehash_probes); - - for (z=pos & STBDS_BUCKET_MASK; z < STBDS_BUCKET_LENGTH; ++z) { - if (bucket->hash[z] == 0) { - bucket->hash[z] = hash; - bucket->index[z] = ob->index[j]; - goto done; - } - } - - limit = pos & STBDS_BUCKET_MASK; - for (z = 0; z < limit; ++z) { - if (bucket->hash[z] == 0) { - bucket->hash[z] = hash; - bucket->index[z] = ob->index[j]; - goto done; - } - } - - pos += step; // quadratic probing - step += STBDS_BUCKET_LENGTH; - pos &= (t->slot_count-1); - } - } - done: - ; - } - } - } - - return t; -} - -#define STBDS_ROTATE_LEFT(val, n) (((val) << (n)) | ((val) >> (STBDS_SIZE_T_BITS - (n)))) -#define STBDS_ROTATE_RIGHT(val, n) (((val) >> (n)) | ((val) << (STBDS_SIZE_T_BITS - (n)))) - -size_t stbds_hash_string(char *str, size_t seed) -{ - size_t hash = seed; - while (*str) - hash = STBDS_ROTATE_LEFT(hash, 9) + (unsigned char) *str++; - - // Thomas Wang 64-to-32 bit mix function, hopefully also works in 32 bits - hash ^= seed; - hash = (~hash) + (hash << 18); - hash ^= hash ^ STBDS_ROTATE_RIGHT(hash,31); - hash = hash * 21; - hash ^= hash ^ STBDS_ROTATE_RIGHT(hash,11); - hash += (hash << 6); - hash ^= STBDS_ROTATE_RIGHT(hash,22); - return hash+seed; -} - -#ifdef STBDS_SIPHASH_2_4 -#define STBDS_SIPHASH_C_ROUNDS 2 -#define STBDS_SIPHASH_D_ROUNDS 4 -typedef int STBDS_SIPHASH_2_4_can_only_be_used_in_64_bit_builds[sizeof(size_t) == 8 ? 1 : -1]; -#endif - -#ifndef STBDS_SIPHASH_C_ROUNDS -#define STBDS_SIPHASH_C_ROUNDS 1 -#endif -#ifndef STBDS_SIPHASH_D_ROUNDS -#define STBDS_SIPHASH_D_ROUNDS 1 -#endif - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable:4127) // conditional expression is constant, for do..while(0) and sizeof()== -#endif - -static size_t stbds_siphash_bytes(void *p, size_t len, size_t seed) -{ - unsigned char *d = (unsigned char *) p; - size_t i,j; - size_t v0,v1,v2,v3, data; - - // hash that works on 32- or 64-bit registers without knowing which we have - // (computes different results on 32-bit and 64-bit platform) - // derived from siphash, but on 32-bit platforms very different as it uses 4 32-bit state not 4 64-bit - v0 = ((((size_t) 0x736f6d65 << 16) << 16) + 0x70736575) ^ seed; - v1 = ((((size_t) 0x646f7261 << 16) << 16) + 0x6e646f6d) ^ ~seed; - v2 = ((((size_t) 0x6c796765 << 16) << 16) + 0x6e657261) ^ seed; - v3 = ((((size_t) 0x74656462 << 16) << 16) + 0x79746573) ^ ~seed; - - #ifdef STBDS_TEST_SIPHASH_2_4 - // hardcoded with key material in the siphash test vectors - v0 ^= 0x0706050403020100ull ^ seed; - v1 ^= 0x0f0e0d0c0b0a0908ull ^ ~seed; - v2 ^= 0x0706050403020100ull ^ seed; - v3 ^= 0x0f0e0d0c0b0a0908ull ^ ~seed; - #endif - - #define STBDS_SIPROUND() \ - do { \ - v0 += v1; v1 = STBDS_ROTATE_LEFT(v1, 13); v1 ^= v0; v0 = STBDS_ROTATE_LEFT(v0,STBDS_SIZE_T_BITS/2); \ - v2 += v3; v3 = STBDS_ROTATE_LEFT(v3, 16); v3 ^= v2; \ - v2 += v1; v1 = STBDS_ROTATE_LEFT(v1, 17); v1 ^= v2; v2 = STBDS_ROTATE_LEFT(v2,STBDS_SIZE_T_BITS/2); \ - v0 += v3; v3 = STBDS_ROTATE_LEFT(v3, 21); v3 ^= v0; \ - } while (0) - - for (i=0; i+sizeof(size_t) <= len; i += sizeof(size_t), d += sizeof(size_t)) { - data = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); - data |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // discarded if size_t == 4 - - v3 ^= data; - for (j=0; j < STBDS_SIPHASH_C_ROUNDS; ++j) - STBDS_SIPROUND(); - v0 ^= data; - } - data = len << (STBDS_SIZE_T_BITS-8); - switch (len - i) { - case 7: data |= ((size_t) d[6] << 24) << 24; // fall through - case 6: data |= ((size_t) d[5] << 20) << 20; // fall through - case 5: data |= ((size_t) d[4] << 16) << 16; // fall through - case 4: data |= (d[3] << 24); // fall through - case 3: data |= (d[2] << 16); // fall through - case 2: data |= (d[1] << 8); // fall through - case 1: data |= d[0]; // fall through - case 0: break; - } - v3 ^= data; - for (j=0; j < STBDS_SIPHASH_C_ROUNDS; ++j) - STBDS_SIPROUND(); - v0 ^= data; - v2 ^= 0xff; - for (j=0; j < STBDS_SIPHASH_D_ROUNDS; ++j) - STBDS_SIPROUND(); - -#ifdef STBDS_SIPHASH_2_4 - return v0^v1^v2^v3; -#else - return v1^v2^v3; // slightly stronger since v0^v3 in above cancels out final round operation? I tweeted at the authors of SipHash about this but they didn't reply -#endif -} - -size_t stbds_hash_bytes(void *p, size_t len, size_t seed) -{ -#ifdef STBDS_SIPHASH_2_4 - return stbds_siphash_bytes(p,len,seed); -#else - unsigned char *d = (unsigned char *) p; - - if (len == 4) { - unsigned int hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); - #if 0 - // HASH32-A Bob Jenkin's hash function w/o large constants - hash ^= seed; - hash -= (hash<<6); - hash ^= (hash>>17); - hash -= (hash<<9); - hash ^= seed; - hash ^= (hash<<4); - hash -= (hash<<3); - hash ^= (hash<<10); - hash ^= (hash>>15); - #elif 1 - // HASH32-BB Bob Jenkin's presumably-accidental version of Thomas Wang hash with rotates turned into shifts. - // Note that converting these back to rotates makes it run a lot slower, presumably due to collisions, so I'm - // not really sure what's going on. - hash ^= seed; - hash = (hash ^ 61) ^ (hash >> 16); - hash = hash + (hash << 3); - hash = hash ^ (hash >> 4); - hash = hash * 0x27d4eb2d; - hash ^= seed; - hash = hash ^ (hash >> 15); - #else // HASH32-C - Murmur3 - hash ^= seed; - hash *= 0xcc9e2d51; - hash = (hash << 17) | (hash >> 15); - hash *= 0x1b873593; - hash ^= seed; - hash = (hash << 19) | (hash >> 13); - hash = hash*5 + 0xe6546b64; - hash ^= hash >> 16; - hash *= 0x85ebca6b; - hash ^= seed; - hash ^= hash >> 13; - hash *= 0xc2b2ae35; - hash ^= hash >> 16; - #endif - // Following statistics were measured on a Core i7-6700 @ 4.00Ghz, compiled with clang 7.0.1 -O2 - // Note that the larger tables have high variance as they were run fewer times - // HASH32-A // HASH32-BB // HASH32-C - // 0.10ms // 0.10ms // 0.10ms : 2,000 inserts creating 2K table - // 0.96ms // 0.95ms // 0.99ms : 20,000 inserts creating 20K table - // 14.69ms // 14.43ms // 14.97ms : 200,000 inserts creating 200K table - // 199.99ms // 195.36ms // 202.05ms : 2,000,000 inserts creating 2M table - // 2234.84ms // 2187.74ms // 2240.38ms : 20,000,000 inserts creating 20M table - // 55.68ms // 53.72ms // 57.31ms : 500,000 inserts & deletes in 2K table - // 63.43ms // 61.99ms // 65.73ms : 500,000 inserts & deletes in 20K table - // 80.04ms // 77.96ms // 81.83ms : 500,000 inserts & deletes in 200K table - // 100.42ms // 97.40ms // 102.39ms : 500,000 inserts & deletes in 2M table - // 119.71ms // 120.59ms // 121.63ms : 500,000 inserts & deletes in 20M table - // 185.28ms // 195.15ms // 187.74ms : 500,000 inserts & deletes in 200M table - // 15.58ms // 14.79ms // 15.52ms : 200,000 inserts creating 200K table with varying key spacing - - return (((size_t) hash << 16 << 16) | hash) ^ seed; - } else if (len == 8 && sizeof(size_t) == 8) { - size_t hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); - hash |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // avoid warning if size_t == 4 - hash ^= seed; - hash = (~hash) + (hash << 21); - hash ^= STBDS_ROTATE_RIGHT(hash,24); - hash *= 265; - hash ^= STBDS_ROTATE_RIGHT(hash,14); - hash ^= seed; - hash *= 21; - hash ^= STBDS_ROTATE_RIGHT(hash,28); - hash += (hash << 31); - hash = (~hash) + (hash << 18); - return hash; - } else { - return stbds_siphash_bytes(p,len,seed); - } -#endif -} -#ifdef _MSC_VER -#pragma warning(pop) -#endif - - -static int stbds_is_key_equal(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode, size_t i) -{ - if (mode >= STBDS_HM_STRING) - return 0==strcmp((char *) key, * (char **) ((char *) a + elemsize*i + keyoffset)); - else - return 0==memcmp(key, (char *) a + elemsize*i + keyoffset, keysize); -} - -#define STBDS_HASH_TO_ARR(x,elemsize) ((char*) (x) - (elemsize)) -#define STBDS_ARR_TO_HASH(x,elemsize) ((char*) (x) + (elemsize)) - -#define stbds_hash_table(a) ((stbds_hash_index *) stbds_header(a)->hash_table) - -void stbds_hmfree_func(void *a, size_t elemsize) -{ - if (a == NULL) return; - if (stbds_hash_table(a) != NULL) { - if (stbds_hash_table(a)->string.mode == STBDS_SH_STRDUP) { - size_t i; - // skip 0th element, which is default - for (i=1; i < stbds_header(a)->length; ++i) - STBDS_FREE(NULL, *(char**) ((char *) a + elemsize*i)); - } - stbds_strreset(&stbds_hash_table(a)->string); - } - STBDS_FREE(NULL, stbds_header(a)->hash_table); - STBDS_FREE(NULL, stbds_header(a)); -} - -static ptrdiff_t stbds_hm_find_slot(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode) -{ - void *raw_a = STBDS_HASH_TO_ARR(a,elemsize); - stbds_hash_index *table = stbds_hash_table(raw_a); - size_t hash = mode >= STBDS_HM_STRING ? stbds_hash_string((char*)key,table->seed) : stbds_hash_bytes(key, keysize,table->seed); - size_t step = STBDS_BUCKET_LENGTH; - size_t limit,i; - size_t pos; - stbds_hash_bucket *bucket; - - if (hash < 2) hash += 2; // stored hash values are forbidden from being 0, so we can detect empty slots - - pos = stbds_probe_position(hash, table->slot_count, table->slot_count_log2); - - for (;;) { - STBDS_STATS(++stbds_hash_probes); - bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT]; - - // start searching from pos to end of bucket, this should help performance on small hash tables that fit in cache - for (i=pos & STBDS_BUCKET_MASK; i < STBDS_BUCKET_LENGTH; ++i) { - if (bucket->hash[i] == hash) { - if (stbds_is_key_equal(a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) { - return (pos & ~STBDS_BUCKET_MASK)+i; - } - } else if (bucket->hash[i] == STBDS_HASH_EMPTY) { - return -1; - } - } - - // search from beginning of bucket to pos - limit = pos & STBDS_BUCKET_MASK; - for (i = 0; i < limit; ++i) { - if (bucket->hash[i] == hash) { - if (stbds_is_key_equal(a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) { - return (pos & ~STBDS_BUCKET_MASK)+i; - } - } else if (bucket->hash[i] == STBDS_HASH_EMPTY) { - return -1; - } - } - - // quadratic probing - pos += step; - step += STBDS_BUCKET_LENGTH; - pos &= (table->slot_count-1); - } - /* NOTREACHED */ -} - -void * stbds_hmget_key_ts(void *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode) -{ - size_t keyoffset = 0; - if (a == NULL) { - // make it non-empty so we can return a temp - a = stbds_arrgrowf(0, elemsize, 0, 1); - stbds_header(a)->length += 1; - memset(a, 0, elemsize); - *temp = STBDS_INDEX_EMPTY; - // adjust a to point after the default element - return STBDS_ARR_TO_HASH(a,elemsize); - } else { - stbds_hash_index *table; - void *raw_a = STBDS_HASH_TO_ARR(a,elemsize); - // adjust a to point to the default element - table = (stbds_hash_index *) stbds_header(raw_a)->hash_table; - if (table == 0) { - *temp = -1; - } else { - ptrdiff_t slot = stbds_hm_find_slot(a, elemsize, key, keysize, keyoffset, mode); - if (slot < 0) { - *temp = STBDS_INDEX_EMPTY; - } else { - stbds_hash_bucket *b = &table->storage[slot >> STBDS_BUCKET_SHIFT]; - *temp = b->index[slot & STBDS_BUCKET_MASK]; - } - } - return a; - } -} - -void * stbds_hmget_key(void *a, size_t elemsize, void *key, size_t keysize, int mode) -{ - ptrdiff_t temp; - void *p = stbds_hmget_key_ts(a, elemsize, key, keysize, &temp, mode); - stbds_temp(STBDS_HASH_TO_ARR(p,elemsize)) = temp; - return p; -} - -void * stbds_hmput_default(void *a, size_t elemsize) -{ - // three cases: - // a is NULL <- allocate - // a has a hash table but no entries, because of shmode <- grow - // a has entries <- do nothing - if (a == NULL || stbds_header(STBDS_HASH_TO_ARR(a,elemsize))->length == 0) { - a = stbds_arrgrowf(a ? STBDS_HASH_TO_ARR(a,elemsize) : NULL, elemsize, 0, 1); - stbds_header(a)->length += 1; - memset(a, 0, elemsize); - a=STBDS_ARR_TO_HASH(a,elemsize); - } - return a; -} - -static char *stbds_strdup(char *str); - -void *stbds_hmput_key(void *a, size_t elemsize, void *key, size_t keysize, int mode) -{ - size_t keyoffset=0; - void *raw_a; - stbds_hash_index *table; - - if (a == NULL) { - a = stbds_arrgrowf(0, elemsize, 0, 1); - memset(a, 0, elemsize); - stbds_header(a)->length += 1; - // adjust a to point AFTER the default element - a = STBDS_ARR_TO_HASH(a,elemsize); - } - - // adjust a to point to the default element - raw_a = a; - a = STBDS_HASH_TO_ARR(a,elemsize); - - table = (stbds_hash_index *) stbds_header(a)->hash_table; - - if (table == NULL || table->used_count >= table->used_count_threshold) { - stbds_hash_index *nt; - size_t slot_count; - - slot_count = (table == NULL) ? STBDS_BUCKET_LENGTH : table->slot_count*2; - nt = stbds_make_hash_index(slot_count, table); - if (table) - STBDS_FREE(NULL, table); - else - nt->string.mode = mode >= STBDS_HM_STRING ? STBDS_SH_DEFAULT : 0; - stbds_header(a)->hash_table = table = nt; - STBDS_STATS(++stbds_hash_grow); - } - - // we iterate hash table explicitly because we want to track if we saw a tombstone - { - size_t hash = mode >= STBDS_HM_STRING ? stbds_hash_string((char*)key,table->seed) : stbds_hash_bytes(key, keysize,table->seed); - size_t step = STBDS_BUCKET_LENGTH; - size_t pos; - ptrdiff_t tombstone = -1; - stbds_hash_bucket *bucket; - - // stored hash values are forbidden from being 0, so we can detect empty slots to early out quickly - if (hash < 2) hash += 2; - - pos = stbds_probe_position(hash, table->slot_count, table->slot_count_log2); - - for (;;) { - size_t limit, i; - STBDS_STATS(++stbds_hash_probes); - bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT]; - - // start searching from pos to end of bucket - for (i=pos & STBDS_BUCKET_MASK; i < STBDS_BUCKET_LENGTH; ++i) { - if (bucket->hash[i] == hash) { - if (stbds_is_key_equal(raw_a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) { - stbds_temp(a) = bucket->index[i]; - if (mode >= STBDS_HM_STRING) - stbds_temp_key(a) = * (char **) ((char *) raw_a + elemsize*bucket->index[i] + keyoffset); - return STBDS_ARR_TO_HASH(a,elemsize); - } - } else if (bucket->hash[i] == 0) { - pos = (pos & ~STBDS_BUCKET_MASK) + i; - goto found_empty_slot; - } else if (tombstone < 0) { - if (bucket->index[i] == STBDS_INDEX_DELETED) - tombstone = (ptrdiff_t) ((pos & ~STBDS_BUCKET_MASK) + i); - } - } - - // search from beginning of bucket to pos - limit = pos & STBDS_BUCKET_MASK; - for (i = 0; i < limit; ++i) { - if (bucket->hash[i] == hash) { - if (stbds_is_key_equal(raw_a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) { - stbds_temp(a) = bucket->index[i]; - return STBDS_ARR_TO_HASH(a,elemsize); - } - } else if (bucket->hash[i] == 0) { - pos = (pos & ~STBDS_BUCKET_MASK) + i; - goto found_empty_slot; - } else if (tombstone < 0) { - if (bucket->index[i] == STBDS_INDEX_DELETED) - tombstone = (ptrdiff_t) ((pos & ~STBDS_BUCKET_MASK) + i); - } - } - - // quadratic probing - pos += step; - step += STBDS_BUCKET_LENGTH; - pos &= (table->slot_count-1); - } - found_empty_slot: - if (tombstone >= 0) { - pos = tombstone; - --table->tombstone_count; - } - ++table->used_count; - - { - ptrdiff_t i = (ptrdiff_t) stbds_arrlen(a); - // we want to do stbds_arraddn(1), but we can't use the macros since we don't have something of the right type - if ((size_t) i+1 > stbds_arrcap(a)) - *(void **) &a = stbds_arrgrowf(a, elemsize, 1, 0); - raw_a = STBDS_ARR_TO_HASH(a,elemsize); - - STBDS_ASSERT((size_t) i+1 <= stbds_arrcap(a)); - stbds_header(a)->length = i+1; - bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT]; - bucket->hash[pos & STBDS_BUCKET_MASK] = hash; - bucket->index[pos & STBDS_BUCKET_MASK] = i-1; - stbds_temp(a) = i-1; - - switch (table->string.mode) { - case STBDS_SH_STRDUP: stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = stbds_strdup((char*) key); break; - case STBDS_SH_ARENA: stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = stbds_stralloc(&table->string, (char*)key); break; - case STBDS_SH_DEFAULT: stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = (char *) key; break; - default: memcpy((char *) a + elemsize*i, key, keysize); break; - } - } - return STBDS_ARR_TO_HASH(a,elemsize); - } -} - -void * stbds_shmode_func(size_t elemsize, int mode) -{ - void *a = stbds_arrgrowf(0, elemsize, 0, 1); - stbds_hash_index *h; - memset(a, 0, elemsize); - stbds_header(a)->length = 1; - stbds_header(a)->hash_table = h = (stbds_hash_index *) stbds_make_hash_index(STBDS_BUCKET_LENGTH, NULL); - h->string.mode = (unsigned char) mode; - return STBDS_ARR_TO_HASH(a,elemsize); -} - -void * stbds_hmdel_key(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode) -{ - if (a == NULL) { - return 0; - } else { - stbds_hash_index *table; - void *raw_a = STBDS_HASH_TO_ARR(a,elemsize); - table = (stbds_hash_index *) stbds_header(raw_a)->hash_table; - stbds_temp(raw_a) = 0; - if (table == 0) { - return a; - } else { - ptrdiff_t slot; - slot = stbds_hm_find_slot(a, elemsize, key, keysize, keyoffset, mode); - if (slot < 0) - return a; - else { - stbds_hash_bucket *b = &table->storage[slot >> STBDS_BUCKET_SHIFT]; - int i = slot & STBDS_BUCKET_MASK; - ptrdiff_t old_index = b->index[i]; - ptrdiff_t final_index = (ptrdiff_t) stbds_arrlen(raw_a)-1-1; // minus one for the raw_a vs a, and minus one for 'last' - STBDS_ASSERT(slot < (ptrdiff_t) table->slot_count); - --table->used_count; - ++table->tombstone_count; - stbds_temp(raw_a) = 1; - STBDS_ASSERT(table->used_count >= 0); - //STBDS_ASSERT(table->tombstone_count < table->slot_count/4); - b->hash[i] = STBDS_HASH_DELETED; - b->index[i] = STBDS_INDEX_DELETED; - - if (mode == STBDS_HM_STRING && table->string.mode == STBDS_SH_STRDUP) - STBDS_FREE(NULL, *(char**) ((char *) a+elemsize*old_index)); - - // if indices are the same, memcpy is a no-op, but back-pointer-fixup will fail, so skip - if (old_index != final_index) { - // swap delete - memmove((char*) a + elemsize*old_index, (char*) a + elemsize*final_index, elemsize); - - // now find the slot for the last element - if (mode == STBDS_HM_STRING) - slot = stbds_hm_find_slot(a, elemsize, *(char**) ((char *) a+elemsize*old_index + keyoffset), keysize, keyoffset, mode); - else - slot = stbds_hm_find_slot(a, elemsize, (char* ) a+elemsize*old_index + keyoffset, keysize, keyoffset, mode); - STBDS_ASSERT(slot >= 0); - b = &table->storage[slot >> STBDS_BUCKET_SHIFT]; - i = slot & STBDS_BUCKET_MASK; - STBDS_ASSERT(b->index[i] == final_index); - b->index[i] = old_index; - } - stbds_header(raw_a)->length -= 1; - - if (table->used_count < table->used_count_shrink_threshold && table->slot_count > STBDS_BUCKET_LENGTH) { - stbds_header(raw_a)->hash_table = stbds_make_hash_index(table->slot_count>>1, table); - STBDS_FREE(NULL, table); - STBDS_STATS(++stbds_hash_shrink); - } else if (table->tombstone_count > table->tombstone_count_threshold) { - stbds_header(raw_a)->hash_table = stbds_make_hash_index(table->slot_count , table); - STBDS_FREE(NULL, table); - STBDS_STATS(++stbds_hash_rebuild); - } - - return a; - } - } - } - /* NOTREACHED */ -} - -static char *stbds_strdup(char *str) -{ - // to keep replaceable allocator simple, we don't want to use strdup. - // rolling our own also avoids problem of strdup vs _strdup - size_t len = strlen(str)+1; - char *p = (char*) STBDS_REALLOC(NULL, 0, len); - memmove(p, str, len); - return p; -} - -#ifndef STBDS_STRING_ARENA_BLOCKSIZE_MIN -#define STBDS_STRING_ARENA_BLOCKSIZE_MIN 512u -#endif -#ifndef STBDS_STRING_ARENA_BLOCKSIZE_MAX -#define STBDS_STRING_ARENA_BLOCKSIZE_MAX (1u<<20) -#endif - -char *stbds_stralloc(stbds_string_arena *a, char *str) -{ - char *p; - size_t len = strlen(str)+1; - if (len > a->remaining) { - // compute the next blocksize - size_t blocksize = a->block; - - // size is 512, 512, 1024, 1024, 2048, 2048, 4096, 4096, etc., so that - // there are log(SIZE) allocations to free when we destroy the table - blocksize = (size_t) (STBDS_STRING_ARENA_BLOCKSIZE_MIN) << (blocksize>>1); - - // if size is under 1M, advance to next blocktype - if (blocksize < (size_t)(STBDS_STRING_ARENA_BLOCKSIZE_MAX)) - ++a->block; - - if (len > blocksize) { - // if string is larger than blocksize, then just allocate the full size. - // note that we still advance string_block so block size will continue - // increasing, so e.g. if somebody only calls this with 1000-long strings, - // eventually the arena will start doubling and handling those as well - stbds_string_block *sb = (stbds_string_block *) STBDS_REALLOC(NULL, 0, sizeof(*sb)-8 + len); - memmove(sb->storage, str, len); - if (a->storage) { - // insert it after the first element, so that we don't waste the space there - sb->next = a->storage->next; - a->storage->next = sb; - } else { - sb->next = 0; - a->storage = sb; - a->remaining = 0; // this is redundant, but good for clarity - } - return sb->storage; - } else { - stbds_string_block *sb = (stbds_string_block *) STBDS_REALLOC(NULL, 0, sizeof(*sb)-8 + blocksize); - sb->next = a->storage; - a->storage = sb; - a->remaining = blocksize; - } - } - - STBDS_ASSERT(len <= a->remaining); - p = a->storage->storage + a->remaining - len; - a->remaining -= len; - memmove(p, str, len); - return p; -} - -void stbds_strreset(stbds_string_arena *a) -{ - stbds_string_block *x,*y; - x = a->storage; - while (x) { - y = x->next; - STBDS_FREE(NULL, x); - x = y; - } - memset(a, 0, sizeof(*a)); -} - -#endif - -////////////////////////////////////////////////////////////////////////////// -// -// UNIT TESTS -// - -#ifdef STBDS_UNIT_TESTS -#include -#ifdef STBDS_ASSERT_WAS_UNDEFINED -#undef STBDS_ASSERT -#endif -#ifndef STBDS_ASSERT -#define STBDS_ASSERT assert -#include -#endif - -typedef struct { int key,b,c,d; } stbds_struct; -typedef struct { int key[2],b,c,d; } stbds_struct2; - -static char buffer[256]; -char *strkey(int n) -{ -#if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__) - sprintf_s(buffer, sizeof(buffer), "test_%d", n); -#else - sprintf(buffer, "test_%d", n); -#endif - return buffer; -} - -void stbds_unit_tests(void) -{ -#if defined(_MSC_VER) && _MSC_VER <= 1200 && defined(__cplusplus) - // VC6 C++ doesn't like the template<> trick on unnamed structures, so do nothing! - STBDS_ASSERT(0); -#else - const int testsize = 100000; - const int testsize2 = testsize/20; - int *arr=NULL; - struct { int key; int value; } *intmap = NULL; - struct { char *key; int value; } *strmap = NULL, s; - struct { stbds_struct key; int value; } *map = NULL; - stbds_struct *map2 = NULL; - stbds_struct2 *map3 = NULL; - stbds_string_arena sa = { 0 }; - int key3[2] = { 1,2 }; - ptrdiff_t temp; - - int i,j; - - STBDS_ASSERT(arrlen(arr)==0); - for (i=0; i < 20000; i += 50) { - for (j=0; j < i; ++j) - arrpush(arr,j); - arrfree(arr); - } - - for (i=0; i < 4; ++i) { - arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4); - arrdel(arr,i); - arrfree(arr); - arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4); - arrdelswap(arr,i); - arrfree(arr); - } - - for (i=0; i < 5; ++i) { - arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4); - stbds_arrins(arr,i,5); - STBDS_ASSERT(arr[i] == 5); - if (i < 4) - STBDS_ASSERT(arr[4] == 4); - arrfree(arr); - } - - i = 1; - STBDS_ASSERT(hmgeti(intmap,i) == -1); - hmdefault(intmap, -2); - STBDS_ASSERT(hmgeti(intmap, i) == -1); - STBDS_ASSERT(hmget (intmap, i) == -2); - for (i=0; i < testsize; i+=2) - hmput(intmap, i, i*5); - for (i=0; i < testsize; i+=1) { - if (i & 1) STBDS_ASSERT(hmget(intmap, i) == -2 ); - else STBDS_ASSERT(hmget(intmap, i) == i*5); - if (i & 1) STBDS_ASSERT(hmget_ts(intmap, i, temp) == -2 ); - else STBDS_ASSERT(hmget_ts(intmap, i, temp) == i*5); - } - for (i=0; i < testsize; i+=2) - hmput(intmap, i, i*3); - for (i=0; i < testsize; i+=1) - if (i & 1) STBDS_ASSERT(hmget(intmap, i) == -2 ); - else STBDS_ASSERT(hmget(intmap, i) == i*3); - for (i=2; i < testsize; i+=4) - hmdel(intmap, i); // delete half the entries - for (i=0; i < testsize; i+=1) - if (i & 3) STBDS_ASSERT(hmget(intmap, i) == -2 ); - else STBDS_ASSERT(hmget(intmap, i) == i*3); - for (i=0; i < testsize; i+=1) - hmdel(intmap, i); // delete the rest of the entries - for (i=0; i < testsize; i+=1) - STBDS_ASSERT(hmget(intmap, i) == -2 ); - hmfree(intmap); - for (i=0; i < testsize; i+=2) - hmput(intmap, i, i*3); - hmfree(intmap); - - #if defined(__clang__) || defined(__GNUC__) - #ifndef __cplusplus - intmap = NULL; - hmput(intmap, 15, 7); - hmput(intmap, 11, 3); - hmput(intmap, 9, 5); - STBDS_ASSERT(hmget(intmap, 9) == 5); - STBDS_ASSERT(hmget(intmap, 11) == 3); - STBDS_ASSERT(hmget(intmap, 15) == 7); - #endif - #endif - - for (i=0; i < testsize; ++i) - stralloc(&sa, strkey(i)); - strreset(&sa); - - { - s.key = "a", s.value = 1; - shputs(strmap, s); - STBDS_ASSERT(*strmap[0].key == 'a'); - STBDS_ASSERT(strmap[0].key == s.key); - STBDS_ASSERT(strmap[0].value == s.value); - shfree(strmap); - } - - { - s.key = "a", s.value = 1; - sh_new_strdup(strmap); - shputs(strmap, s); - STBDS_ASSERT(*strmap[0].key == 'a'); - STBDS_ASSERT(strmap[0].key != s.key); - STBDS_ASSERT(strmap[0].value == s.value); - shfree(strmap); - } - - { - s.key = "a", s.value = 1; - sh_new_arena(strmap); - shputs(strmap, s); - STBDS_ASSERT(*strmap[0].key == 'a'); - STBDS_ASSERT(strmap[0].key != s.key); - STBDS_ASSERT(strmap[0].value == s.value); - shfree(strmap); - } - - for (j=0; j < 2; ++j) { - STBDS_ASSERT(shgeti(strmap,"foo") == -1); - if (j == 0) - sh_new_strdup(strmap); - else - sh_new_arena(strmap); - STBDS_ASSERT(shgeti(strmap,"foo") == -1); - shdefault(strmap, -2); - STBDS_ASSERT(shgeti(strmap,"foo") == -1); - for (i=0; i < testsize; i+=2) - shput(strmap, strkey(i), i*3); - for (i=0; i < testsize; i+=1) - if (i & 1) STBDS_ASSERT(shget(strmap, strkey(i)) == -2 ); - else STBDS_ASSERT(shget(strmap, strkey(i)) == i*3); - for (i=2; i < testsize; i+=4) - shdel(strmap, strkey(i)); // delete half the entries - for (i=0; i < testsize; i+=1) - if (i & 3) STBDS_ASSERT(shget(strmap, strkey(i)) == -2 ); - else STBDS_ASSERT(shget(strmap, strkey(i)) == i*3); - for (i=0; i < testsize; i+=1) - shdel(strmap, strkey(i)); // delete the rest of the entries - for (i=0; i < testsize; i+=1) - STBDS_ASSERT(shget(strmap, strkey(i)) == -2 ); - shfree(strmap); - } - - { - struct { char *key; char value; } *hash = NULL; - char name[4] = "jen"; - shput(hash, "bob" , 'h'); - shput(hash, "sally" , 'e'); - shput(hash, "fred" , 'l'); - shput(hash, "jen" , 'x'); - shput(hash, "doug" , 'o'); - - shput(hash, name , 'l'); - shfree(hash); - } - - for (i=0; i < testsize; i += 2) { - stbds_struct s = { i,i*2,i*3,i*4 }; - hmput(map, s, i*5); - } - - for (i=0; i < testsize; i += 1) { - stbds_struct s = { i,i*2,i*3 ,i*4 }; - stbds_struct t = { i,i*2,i*3+1,i*4 }; - if (i & 1) STBDS_ASSERT(hmget(map, s) == 0); - else STBDS_ASSERT(hmget(map, s) == i*5); - if (i & 1) STBDS_ASSERT(hmget_ts(map, s, temp) == 0); - else STBDS_ASSERT(hmget_ts(map, s, temp) == i*5); - //STBDS_ASSERT(hmget(map, t.key) == 0); - } - - for (i=0; i < testsize; i += 2) { - stbds_struct s = { i,i*2,i*3,i*4 }; - hmputs(map2, s); - } - hmfree(map); - - for (i=0; i < testsize; i += 1) { - stbds_struct s = { i,i*2,i*3,i*4 }; - stbds_struct t = { i,i*2,i*3+1,i*4 }; - if (i & 1) STBDS_ASSERT(hmgets(map2, s.key).d == 0); - else STBDS_ASSERT(hmgets(map2, s.key).d == i*4); - //STBDS_ASSERT(hmgetp(map2, t.key) == 0); - } - hmfree(map2); - - for (i=0; i < testsize; i += 2) { - stbds_struct2 s = { { i,i*2 }, i*3,i*4, i*5 }; - hmputs(map3, s); - } - for (i=0; i < testsize; i += 1) { - stbds_struct2 s = { { i,i*2}, i*3, i*4, i*5 }; - stbds_struct2 t = { { i,i*2}, i*3+1, i*4, i*5 }; - if (i & 1) STBDS_ASSERT(hmgets(map3, s.key).d == 0); - else STBDS_ASSERT(hmgets(map3, s.key).d == i*5); - //STBDS_ASSERT(hmgetp(map3, t.key) == 0); - } -#endif -} -#endif - - -/* ------------------------------------------------------------------------------- -This software is available under 2 licenses -- choose whichever you prefer. ------------------------------------------------------------------------------- -ALTERNATIVE A - MIT License -Copyright (c) 2019 Sean Barrett -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. ------------------------------------------------------------------------------- -ALTERNATIVE B - Public Domain (www.unlicense.org) -This is free and unencumbered software released into the public domain. -Anyone is free to copy, modify, publish, use, compile, sell, or distribute this -software, either in source code form or as a compiled binary, for any purpose, -commercial or non-commercial, and by any means. -In jurisdictions that recognize copyright laws, the author or authors of this -software dedicate any and all copyright interest in the software to the public -domain. We make this dedication for the benefit of the public at large and to -the detriment of our heirs and successors. We intend this dedication to be an -overt act of relinquishment in perpetuity of all present and future rights to -this software under copyright law. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------- -*/ diff --git a/test.l b/test.l deleted file mode 100644 index 6c3aec2..0000000 --- a/test.l +++ /dev/null @@ -1,12 +0,0 @@ -u32 main(u32 b) -{ - u32 a = 4; - //return a; - if (b == 3) { - return 3; - } else { - return 4; - } - - return a; -} diff --git a/todo.cfg b/todo.cfg deleted file mode 100644 index 5e35825..0000000 --- a/todo.cfg +++ /dev/null @@ -1,2 +0,0 @@ -export TODO_DIR="." -export TODO_FILE="$TODO_DIR/todo.txt" diff --git a/todo.txt b/todo.txt deleted file mode 100644 index c023562..0000000 --- a/todo.txt +++ /dev/null @@ -1 +0,0 @@ -implement dominator tree for control flow diff --git a/utils.c b/utils.c deleted file mode 100644 index c6f0781..0000000 --- a/utils.c +++ /dev/null @@ -1,152 +0,0 @@ -#include "utils.h" -#include -#include -#include -#include - -i64 parse_int(char *s, usize len) -{ - bool negative = false; - if (*s == '-') { - s += 1; - len -= 1; - negative = true; - } - - u64 int_part = 0; - for (usize i=0; i < len; i++) { - int_part = (int_part * 10) + (s[i] - '0'); - } - - if (negative) { - int_part *= -1; - } - - return int_part; -} - -f64 parse_float(char *s, usize len) -{ - bool negative = false; - if (*s == '-') { - s += 1; - len -= 1; - negative = true; - } - - usize point_pos = 0; - for (usize i=0; i < len; i++) { - if (s[i] == '.') { - point_pos = i; - break; - } - } - - i64 int_part = parse_int(s, point_pos); - i64 dec_part = parse_int(s+point_pos+1, len-point_pos-1); - for (usize i=0; i < len-point_pos-1; i++) { - int_part *= 10; - } - - int_part += dec_part; - - f64 f = (f64) int_part; - - point_pos += 1; - - for (usize i=0; i < len - point_pos; i++) { - f /= 10.0; - } - - if (negative) { - f *= -1; - } - - return f; -} - - -void trie_insert(trie_node *root, arena *a, char *key, uint16_t value) -{ - trie_node *node = root; - while (*key) { - if (!node->children[(usize)*key]) { - node->children[(usize)*key] = arena_alloc(a, sizeof(trie_node)); - memset(node->children[(usize)*key], 0x0, sizeof(trie_node)); - } - node = node->children[(usize)*key]; - - key++; - } - - node->value = value; -} - -uint16_t trie_get(trie_node *root, char *key, usize len) -{ - trie_node *node = root; - for (usize i=0; i < len; i++) { - if (!node->children[(usize)(key[i])]) { - return 0; - } - node = node->children[(usize)(key[i])]; - } - - return node->value; -} - -#ifndef DEFAULT_ALIGNMENT -#define DEFAULT_ALIGNMENT (2 * sizeof(void *)) -#endif - -static usize align_forward(usize ptr, usize align) { - uintptr_t p = ptr; - uintptr_t a = (uintptr_t)align; - uintptr_t modulo = p & (a - 1); - - if (modulo != 0) { - p += a - modulo; - } - return (usize)p; -} - -arena arena_init(usize size) -{ - void *memory = malloc(size); - memset(memory, 0x0, size); - return (arena){ - .capacity = size, - .position = 0, - .memory = memory, - }; -} - -void *arena_alloc(arena *a, usize size) { - uintptr_t current_addr = (uintptr_t)a->memory + a->position; - uintptr_t padding = align_forward(current_addr, DEFAULT_ALIGNMENT) - current_addr; - if (a->position + padding + size > a->capacity) return NULL; - void *ret = (unsigned char *)a->memory + a->position + padding; - a->position += (size + padding); - - return ret; -} - -snapshot arena_snapshot(arena *a) -{ - return a->position; -} - -void arena_reset_to_snapshot(arena *a, snapshot s) -{ - a->position = s; -} - -void arena_reset(arena *a) -{ - arena_reset_to_snapshot(a, 0); -} - -void arena_deinit(arena a) -{ - free(a.memory); -} diff --git a/utils.h b/utils.h deleted file mode 100644 index edc5c55..0000000 --- a/utils.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef UTILS_H -#define UTILS_H - -#include -#include -#include - -typedef uint8_t u8; -typedef uint16_t u16; -typedef uint32_t u32; -typedef uint64_t u64; - -typedef int8_t i8; -typedef int16_t i16; -typedef int32_t i32; -typedef int64_t i64; - -typedef size_t usize; - -typedef float f32; -typedef double f64; - -i64 parse_int(char *s, usize len); -f64 parse_float(char *s, usize len); - -typedef struct { - usize capacity; - usize position; - void* memory; -} arena; - -typedef usize snapshot; - -/* - * NOTE(ernesto): faulty initialization is signalided by the arena.memory - * being null. It is the responsability of the caller to check for fulty - * initialization. - */ -arena arena_init(usize size); -/* - * Returns null on unsuccessfull allocation. - * In this implemention an allocation is only unsuccessfull if the arena - * does not have enough memory to allocate the requested space - */ -void *arena_alloc(arena *a, usize size); -snapshot arena_snapshot(arena *a); -void arena_reset_to_snapshot(arena *a, snapshot s); -void arena_reset(arena *a); -/* This call should never fail, also, do we even care if it does? */ -void arena_deinit(arena a); - -typedef struct _trie_node { - uint16_t value; - struct _trie_node *children[256]; -} trie_node; - -void trie_insert(trie_node *root, arena *a, char *key, uint16_t value); -uint16_t trie_get(trie_node *root, char *key, usize len); - -typedef struct { - usize row, column; -} source_pos; - -#endif