diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7b960e9 --- /dev/null +++ b/Makefile @@ -0,0 +1,59 @@ +# cc - C compiler +# See LICENSE file for copyright and license details. + +include config.mk + +SRC = lc.c utils.c lexer.c parser.c sema.c codegen.c +HDR = config.def.h utils.h lexer.h parser.h sema.h codegen.h +OBJ = ${SRC:.c=.o} + +all: options lc + +options: + @echo lc build options: + @echo "CFLAGS = ${CFLAGS}" + @echo "LDFLAGS = ${LDFLAGS}" + @echo "CC = ${CC}" + +.c.o: + ${CC} -c ${CFLAGS} $< + +${OBJ}: config.h config.mk + +config.h: + cp config.def.h $@ + +users.h: + cp users.def.h $@ + +lc: ${OBJ} + ${CC} -o $@ ${OBJ} ${LDFLAGS} + +clean: + rm -f lc ${OBJ} lc-${VERSION}.tar.gz + +dist: clean + mkdir -p lc-${VERSION} + cp -R LICENSE Makefile README config.mk\ + lc.1 ${HDR} ${SRC} lc-${VERSION} + tar -cf lc-${VERSION}.tar lc-${VERSION} + gzip lc-${VERSION}.tar + rm -rf lc-${VERSION} + +install: all + mkdir -p ${DESTDIR}${PREFIX}/bin + cp -f lc ${DESTDIR}${PREFIX}/bin + chmod 755 ${DESTDIR}${PREFIX}/bin/lc + mkdir -p ${DESTDIR}${MANPREFIX}/man1 + sed "s/VERSION/${VERSION}/g" < lc.1 > ${DESTDIR}${MANPREFIX}/man1/lc.1 + chmod 644 ${DESTDIR}${MANPREFIX}/man1/lc.1 + +uninstall: + rm -f ${DESTDIR}${PREFIX}/bin/lc\ + ${DESTDIR}${MANPREFIX}/man1/lc.1 +graph: clean all + ./lc > graph.dot + dot -Tpdf graph.dot > graph.pdf + zathura ./graph.pdf + +.PHONY: all options clean dist install uninstall diff --git a/README b/README new file mode 100644 index 0000000..6c8fea1 --- /dev/null +++ b/README @@ -0,0 +1,24 @@ +lc - L compiler +============================ +lc is a L compiler. It can compile L code. + + +Requirements +------------ +In order to build lc you need... a computer + + +Installation +------------ +Edit config.mk to match your local setup (lc is installed into +the /usr/local namespace by default). + +Afterwards enter the following command to build and install lc (if +necessary as root): + + make clean install + + +Usage +----------- +lc file diff --git a/codegen.c b/codegen.c new file mode 100644 index 0000000..d1c58fd --- /dev/null +++ b/codegen.c @@ -0,0 +1,1168 @@ +#include +#include +#include +#include "codegen.h" +#include "sema.h" + +#include "stb_ds.h" + +typedef struct { + char *key; + int value; +} var_map; + +static var_map *variables = NULL; +static int stack_offset = 0; +static int label_counter = 0; +static int *break_stack = NULL; + +void gen_expr(FILE *fp, ast_node *expr); +void gen_unary(FILE *fp, ast_node *expr); +void gen_statement_list(FILE *fp, ast_node *stmt); +int get_var_offset(char *name, usize name_len); +int get_var_offset_sized(char *name, usize name_len, usize size); + +void gen_binary(FILE *fp, ast_node *expr) +{ + switch (expr->expr.binary.operator) { + case OP_PLUS: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "mov %%rax, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "add %%rcx, %%rax\n"); + break; + case OP_MINUS: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "mov %%rax, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "sub %%rax, %%rcx\n"); + fprintf(fp, "mov %%rcx, %%rax\n"); + break; + case OP_MUL: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "mov %%rax, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "imul %%rcx, %%rax\n"); + break; + case OP_DIV: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "mov %%rax, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "mov %%rax, %%rbx\n"); + fprintf(fp, "mov %%rcx, %%rax\n"); + fprintf(fp, "cqo\n"); + fprintf(fp, "idiv %%rbx\n"); + break; + case OP_MOD: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "mov %%rax, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "mov %%rax, %%rbx\n"); + fprintf(fp, "mov %%rcx, %%rax\n"); + fprintf(fp, "cqo\n"); + fprintf(fp, "idiv %%rbx\n"); + fprintf(fp, "mov %%rdx, %%rax\n"); + break; + case OP_BOR: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "mov %%rax, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "or %%rcx, %%rax\n"); + break; + case OP_BAND: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "mov %%rax, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "and %%rcx, %%rax\n"); + break; + case OP_BXOR: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "mov %%rax, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "xor %%rcx, %%rax\n"); + break; + case OP_EQ: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "mov %%rax, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "cmp %%rax, %%rcx\n"); + fprintf(fp, "sete %%al\n"); + fprintf(fp, "movzx %%al, %%rax\n"); + break; + case OP_NEQ: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "mov %%rax, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "cmp %%rax, %%rcx\n"); + fprintf(fp, "setne %%al\n"); + fprintf(fp, "movzx %%al, %%rax\n"); + break; + case OP_LT: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "mov %%rax, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "cmp %%rax, %%rcx\n"); + fprintf(fp, "setl %%al\n"); + fprintf(fp, "movzx %%al, %%rax\n"); + break; + case OP_GT: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "mov %%rax, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "cmp %%rax, %%rcx\n"); + fprintf(fp, "setg %%al\n"); + fprintf(fp, "movzx %%al, %%rax\n"); + break; + case OP_LE: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "mov %%rax, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "cmp %%rax, %%rcx\n"); + fprintf(fp, "setle %%al\n"); + fprintf(fp, "movzx %%al, %%rax\n"); + break; + case OP_GE: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "mov %%rax, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "cmp %%rax, %%rcx\n"); + fprintf(fp, "setge %%al\n"); + fprintf(fp, "movzx %%al, %%rax\n"); + break; + case OP_AND: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "test %%rax, %%rax\n"); + fprintf(fp, "setne %%al\n"); + fprintf(fp, "movzx %%al, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "test %%rax, %%rax\n"); + fprintf(fp, "setne %%al\n"); + fprintf(fp, "movzx %%al, %%rax\n"); + fprintf(fp, "and %%rcx, %%rax\n"); + break; + case OP_OR: + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "test %%rax, %%rax\n"); + fprintf(fp, "setne %%al\n"); + fprintf(fp, "movzx %%al, %%rcx\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "test %%rax, %%rax\n"); + fprintf(fp, "setne %%al\n"); + fprintf(fp, "movzx %%al, %%rax\n"); + fprintf(fp, "or %%rcx, %%rax\n"); + break; + case OP_ASSIGN: { + if (expr->expr.binary.left->type == NODE_IDENTIFIER) { + gen_expr(fp, expr->expr.binary.right); + int offset = get_var_offset(expr->expr.binary.left->expr.string.start, + expr->expr.binary.left->expr.string.len); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset); + } else if (expr->expr.binary.left->type == NODE_ARRAY_SUBSCRIPT) { + ast_node *subscript = expr->expr.binary.left; + usize element_size = 8; + type *base_type = subscript->expr.subscript.expr->expr_type; + bool is_slice = false; + + if (base_type) { + if (base_type->tag == TYPE_PTR && base_type->data.ptr.child) { + element_size = base_type->data.ptr.child->size; + } else if (base_type->tag == TYPE_SLICE && base_type->data.slice.child) { + element_size = base_type->data.slice.child->size; + is_slice = true; + } + } + + if (subscript->expr.subscript.expr->type == NODE_IDENTIFIER && is_slice) { + int base_offset = get_var_offset(subscript->expr.subscript.expr->expr.string.start, + subscript->expr.subscript.expr->expr.string.len); + + fprintf(fp, "mov -%d(%%rbp), %%rcx\n", base_offset); + gen_expr(fp, subscript->expr.subscript.index); + + if (element_size != 1) { + fprintf(fp, "imul $%lu, %%rax\n", element_size); + } + + fprintf(fp, "add %%rcx, %%rax\n"); + fprintf(fp, "push %%rax\n"); + + gen_expr(fp, expr->expr.binary.right); + + fprintf(fp, "pop %%rcx\n"); + + if (subscript->expr_type && subscript->expr_type->size == 4) { + fprintf(fp, "mov %%eax, (%%rcx)\n"); + } else if (subscript->expr_type && subscript->expr_type->size == 2) { + fprintf(fp, "mov %%ax, (%%rcx)\n"); + } else if (subscript->expr_type && subscript->expr_type->size == 1) { + fprintf(fp, "mov %%al, (%%rcx)\n"); + } else { + fprintf(fp, "mov %%rax, (%%rcx)\n"); + } + } else if (subscript->expr.subscript.expr->type == NODE_IDENTIFIER) { + int base_offset = get_var_offset(subscript->expr.subscript.expr->expr.string.start, + subscript->expr.subscript.expr->expr.string.len); + + gen_expr(fp, subscript->expr.subscript.index); + + if (element_size != 1) { + fprintf(fp, "imul $%lu, %%rax\n", element_size); + } + + fprintf(fp, "add $%d, %%rax\n", base_offset); + fprintf(fp, "neg %%rax\n"); + fprintf(fp, "add %%rbp, %%rax\n"); + fprintf(fp, "push %%rax\n"); + + gen_expr(fp, expr->expr.binary.right); + + fprintf(fp, "pop %%rcx\n"); + + if (subscript->expr_type && subscript->expr_type->size == 4) { + fprintf(fp, "mov %%eax, (%%rcx)\n"); + } else if (subscript->expr_type && subscript->expr_type->size == 2) { + fprintf(fp, "mov %%ax, (%%rcx)\n"); + } else if (subscript->expr_type && subscript->expr_type->size == 1) { + fprintf(fp, "mov %%al, (%%rcx)\n"); + } else { + fprintf(fp, "mov %%rax, (%%rcx)\n"); + } + } else { + gen_expr(fp, subscript->expr.subscript.expr); + fprintf(fp, "push %%rax\n"); + + gen_expr(fp, subscript->expr.subscript.index); + + if (element_size != 1) { + fprintf(fp, "imul $%lu, %%rax\n", element_size); + } + + fprintf(fp, "pop %%rcx\n"); + fprintf(fp, "add %%rcx, %%rax\n"); + fprintf(fp, "push %%rax\n"); + + gen_expr(fp, expr->expr.binary.right); + + fprintf(fp, "pop %%rcx\n"); + + if (subscript->expr_type && subscript->expr_type->size == 4) { + fprintf(fp, "mov %%eax, (%%rcx)\n"); + } else if (subscript->expr_type && subscript->expr_type->size == 2) { + fprintf(fp, "mov %%ax, (%%rcx)\n"); + } else if (subscript->expr_type && subscript->expr_type->size == 1) { + fprintf(fp, "mov %%al, (%%rcx)\n"); + } else { + fprintf(fp, "mov %%rax, (%%rcx)\n"); + } + } + } else { + fprintf(fp, "# ERROR: left side of assignment must be identifier\n"); + break; + } + + break; + } + case OP_ASSIGN_PTR: { + gen_expr(fp, expr->expr.binary.left); + fprintf(fp, "push %%rax\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "pop %%rcx\n"); + fprintf(fp, "mov %%rax, (%%rcx)\n"); + break; + } + case OP_PLUS_EQ: { + if (expr->expr.binary.left->type != NODE_IDENTIFIER) { + fprintf(fp, "# ERROR: left side of assignment must be identifier\n"); + break; + } + int offset = get_var_offset(expr->expr.binary.left->expr.string.start, + expr->expr.binary.left->expr.string.len); + fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "add %%rcx, %%rax\n"); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset); + break; + } + case OP_MINUS_EQ: { + if (expr->expr.binary.left->type != NODE_IDENTIFIER) { + fprintf(fp, "# ERROR: left side of assignment must be identifier\n"); + break; + } + int offset = get_var_offset(expr->expr.binary.left->expr.string.start, + expr->expr.binary.left->expr.string.len); + fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "sub %%rax, %%rcx\n"); + fprintf(fp, "mov %%rcx, %%rax\n"); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset); + break; + } + case OP_MUL_EQ: { + if (expr->expr.binary.left->type != NODE_IDENTIFIER) { + fprintf(fp, "# ERROR: left side of assignment must be identifier\n"); + break; + } + int offset = get_var_offset(expr->expr.binary.left->expr.string.start, + expr->expr.binary.left->expr.string.len); + fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "imul %%rcx, %%rax\n"); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset); + break; + } + case OP_DIV_EQ: { + if (expr->expr.binary.left->type != NODE_IDENTIFIER) { + fprintf(fp, "# ERROR: left side of assignment must be identifier\n"); + break; + } + int offset = get_var_offset(expr->expr.binary.left->expr.string.start, + expr->expr.binary.left->expr.string.len); + fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "mov %%rax, %%rbx\n"); + fprintf(fp, "mov %%rcx, %%rax\n"); + fprintf(fp, "cqo\n"); + fprintf(fp, "idiv %%rbx\n"); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset); + break; + } + case OP_MOD_EQ: { + if (expr->expr.binary.left->type != NODE_IDENTIFIER) { + fprintf(fp, "# ERROR: left side of assignment must be identifier\n"); + break; + } + int offset = get_var_offset(expr->expr.binary.left->expr.string.start, + expr->expr.binary.left->expr.string.len); + fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "mov %%rax, %%rbx\n"); + fprintf(fp, "mov %%rcx, %%rax\n"); + fprintf(fp, "cqo\n"); + fprintf(fp, "idiv %%rbx\n"); + fprintf(fp, "mov %%rdx, -%d(%%rbp)\n", offset); + break; + } + case OP_BOR_EQ: { + if (expr->expr.binary.left->type != NODE_IDENTIFIER) { + fprintf(fp, "# ERROR: left side of assignment must be identifier\n"); + break; + } + int offset = get_var_offset(expr->expr.binary.left->expr.string.start, + expr->expr.binary.left->expr.string.len); + fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "or %%rcx, %%rax\n"); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset); + break; + } + case OP_BAND_EQ: { + if (expr->expr.binary.left->type != NODE_IDENTIFIER) { + fprintf(fp, "# ERROR: left side of assignment must be identifier\n"); + break; + } + int offset = get_var_offset(expr->expr.binary.left->expr.string.start, + expr->expr.binary.left->expr.string.len); + fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "and %%rcx, %%rax\n"); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset); + break; + } + case OP_BXOR_EQ: { + if (expr->expr.binary.left->type != NODE_IDENTIFIER) { + fprintf(fp, "# ERROR: left side of assignment must be identifier\n"); + break; + } + int offset = get_var_offset(expr->expr.binary.left->expr.string.start, + expr->expr.binary.left->expr.string.len); + fprintf(fp, "mov -%d(%%rbp), %%rcx\n", offset); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "xor %%rcx, %%rax\n"); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset); + break; + } + case OP_RSHIFT_EQ: { + if (expr->expr.binary.left->type != NODE_IDENTIFIER) { + fprintf(fp, "# ERROR: left side of assignment must be identifier\n"); + break; + } + int offset = get_var_offset(expr->expr.binary.left->expr.string.start, + expr->expr.binary.left->expr.string.len); + fprintf(fp, "mov -%d(%%rbp), %%rax\n", offset); + fprintf(fp, "push %%rax\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "mov %%rax, %%rcx\n"); + fprintf(fp, "pop %%rax\n"); + fprintf(fp, "sar %%cl, %%rax\n"); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset); + break; + } + case OP_LSHIFT_EQ: { + if (expr->expr.binary.left->type != NODE_IDENTIFIER) { + break; + } + int offset = get_var_offset(expr->expr.binary.left->expr.string.start, + expr->expr.binary.left->expr.string.len); + fprintf(fp, "mov -%d(%%rbp), %%rax\n", offset); + fprintf(fp, "push %%rax\n"); + gen_expr(fp, expr->expr.binary.right); + fprintf(fp, "mov %%rax, %%rcx\n"); + fprintf(fp, "pop %%rax\n"); + fprintf(fp, "shl %%cl, %%rax\n"); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset); + break; + } + } +} + +int get_var_offset(char *name, usize name_len) +{ + char *var_name = strndup(name, name_len); + ptrdiff_t idx = shgeti(variables, var_name); + + if (idx >= 0) { + free(var_name); + return variables[idx].value; + } + + stack_offset += 8; + shput(variables, var_name, stack_offset); + return stack_offset; +} + +int get_var_offset_sized(char *name, usize name_len, usize size) +{ + char *var_name = strndup(name, name_len); + ptrdiff_t idx = shgeti(variables, var_name); + + if (idx >= 0) { + free(var_name); + return variables[idx].value; + } + + usize aligned_size = (size + 7) & ~7; + stack_offset += aligned_size; + shput(variables, var_name, stack_offset); + return stack_offset; +} + +void gen_statement_list(FILE *fp, ast_node *stmt) +{ + if (!stmt) return; + + if (stmt->type == NODE_UNIT) { + ast_node *current = stmt; + while (current && current->type == NODE_UNIT) { + if (current->expr.unit_node.expr) { + gen_expr(fp, current->expr.unit_node.expr); + } + current = current->expr.unit_node.next; + } + } else { + gen_expr(fp, stmt); + } +} + +void gen_unary(FILE *fp, ast_node *expr) +{ + switch (expr->expr.unary.operator) { + case UOP_MINUS: + gen_expr(fp, expr->expr.unary.right); + fprintf(fp, "neg %%rax\n"); + break; + case UOP_NOT: + gen_expr(fp, expr->expr.unary.right); + fprintf(fp, "test %%rax, %%rax\n"); + fprintf(fp, "sete %%al\n"); + fprintf(fp, "movzx %%al, %%rax\n"); + break; + case UOP_INCR: + if (expr->expr.unary.right->type != NODE_IDENTIFIER) { + fprintf(fp, "# ERROR: increment requires identifier\n"); + break; + } + int offset_incr = get_var_offset(expr->expr.unary.right->expr.string.start, + expr->expr.unary.right->expr.string.len); + fprintf(fp, "mov -%d(%%rbp), %%rax\n", offset_incr); + fprintf(fp, "inc %%rax\n"); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset_incr); + break; + case UOP_DECR: + if (expr->expr.unary.right->type != NODE_IDENTIFIER) { + fprintf(fp, "# ERROR: decrement requires identifier\n"); + break; + } + int offset_decr = get_var_offset(expr->expr.unary.right->expr.string.start, + expr->expr.unary.right->expr.string.len); + fprintf(fp, "mov -%d(%%rbp), %%rax\n", offset_decr); + fprintf(fp, "dec %%rax\n"); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset_decr); + break; + case UOP_REF: + if (expr->expr.unary.right->type != NODE_IDENTIFIER) { + fprintf(fp, "# ERROR: address-of requires identifier\n"); + break; + } + int offset_ref = get_var_offset(expr->expr.unary.right->expr.string.start, + expr->expr.unary.right->expr.string.len); + fprintf(fp, "lea -%d(%%rbp), %%rax\n", offset_ref); + break; + case UOP_DEREF: + gen_expr(fp, expr->expr.unary.right); + fprintf(fp, "mov (%%rax), %%rax\n"); + break; + } +} + +void gen_expr(FILE *fp, ast_node *expr) +{ + switch (expr->type) { + case NODE_INTEGER: + fprintf(fp, "mov $%lu, %%rax\n", expr->expr.integer); + break; + case NODE_FLOAT: { + // TODO: do not truncate + i64 int_val = (i64)expr->expr.flt; + fprintf(fp, "mov $%ld, %%rax\n", int_val); + break; + } + case NODE_CHAR: + fprintf(fp, "mov $%d, %%rax\n", (int)(unsigned char)expr->expr.ch); + break; + case NODE_BOOL: + fprintf(fp, "mov $%d, %%rax\n", expr->expr.boolean ? 1 : 0); + break; + case NODE_IDENTIFIER: { + int offset = get_var_offset(expr->expr.string.start, expr->expr.string.len); + fprintf(fp, "mov -%d(%%rbp), %%rax\n", offset); + break; + } + case NODE_BINARY: + gen_binary(fp, expr); + break; + case NODE_UNARY: + gen_unary(fp, expr); + break; + case NODE_CAST: + gen_expr(fp, expr->expr.cast.value); + break; + case NODE_VAR_DECL: { + int offset = 0; + type *var_type = expr->expr_type; + if (!var_type && expr->expr.var_decl.type) { + var_type = expr->expr.var_decl.type->expr_type; + } + + bool is_inline_slice = false; + if (var_type && var_type->tag == TYPE_SLICE && expr->expr.var_decl.value && + (expr->expr.var_decl.value->type == NODE_STRUCT_INIT || + expr->expr.var_decl.value->type == NODE_RANGE)) { + is_inline_slice = true; + } + + if (!is_inline_slice) { + if (var_type && var_type->size > 0) { + offset = get_var_offset_sized(expr->expr.var_decl.name, + expr->expr.var_decl.name_len, var_type->size); + } else { + offset = get_var_offset(expr->expr.var_decl.name, expr->expr.var_decl.name_len); + } + } + + if (expr->expr.var_decl.value) { + if (expr->expr.var_decl.value->type == NODE_RANGE && var_type && var_type->tag == TYPE_SLICE) { + ast_node *range = expr->expr.var_decl.value; + if (range->expr.binary.left->type == NODE_INTEGER && + range->expr.binary.right->type == NODE_INTEGER) { + i64 start = range->expr.binary.left->expr.integer; + i64 end = range->expr.binary.right->expr.integer; + i64 count = end - start + 1; + + type *element_type = var_type->data.slice.child; + usize element_size = element_type ? element_type->size : 8; + + usize data_size = count * element_size; + usize aligned_data_size = (data_size + 7) & ~7; + stack_offset += aligned_data_size; + int data_offset = stack_offset; + + stack_offset += 16; + offset = stack_offset; + + char *var_name = strndup(expr->expr.var_decl.name, expr->expr.var_decl.name_len); + shput(variables, var_name, offset); + + for (i64 i = 0; i < count; i++) { + i64 value = start + i; + int element_offset = data_offset - (i * element_size); + fprintf(fp, "mov $%ld, %%rax\n", value); + if (element_size == 4) { + fprintf(fp, "mov %%eax, -%d(%%rbp)\n", element_offset); + } else if (element_size == 2) { + fprintf(fp, "mov %%ax, -%d(%%rbp)\n", element_offset); + } else if (element_size == 1) { + fprintf(fp, "mov %%al, -%d(%%rbp)\n", element_offset); + } else { + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", element_offset); + } + } + + fprintf(fp, "lea -%d(%%rbp), %%rax\n", data_offset); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset); + fprintf(fp, "mov $%ld, %%rax\n", count); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset - 8); + } + } else if (expr->expr.var_decl.value->type == NODE_STRING && var_type && var_type->tag == TYPE_SLICE) { + ast_node *str = expr->expr.var_decl.value; + usize str_len = str->expr.string.len; + char *str_data = str->expr.string.start; + + usize aligned_data_size = (str_len + 7) & ~7; + stack_offset += aligned_data_size; + int data_offset = stack_offset; + + stack_offset += 16; + offset = stack_offset; + + char *var_name = strndup(expr->expr.var_decl.name, expr->expr.var_decl.name_len); + shput(variables, var_name, offset); + + for (usize i = 0; i < str_len; i++) { + int byte_offset = data_offset - i; + if ((unsigned char)str_data[i] == '\\' && (unsigned char)str_data[i+1] == 'n') { + fprintf(fp, "movb $%d, -%d(%%rbp)\n", (unsigned char)'\n', byte_offset); + i += 1; + } else { + fprintf(fp, "movb $%d, -%d(%%rbp)\n", (unsigned char)str_data[i], byte_offset); + } + } + + fprintf(fp, "lea -%d(%%rbp), %%rax\n", data_offset); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset); + fprintf(fp, "mov $%lu, %%rax\n", str_len); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset - 8); + } else if (expr->expr.var_decl.value->type == NODE_STRUCT_INIT) { + ast_node *member_list = expr->expr.var_decl.value->expr.struct_init.members; + ast_node *current = member_list; + + if (var_type && var_type->tag == TYPE_STRUCT) { + while (current && current->type == NODE_UNIT) { + ast_node *assignment = current->expr.unit_node.expr; + if (assignment && assignment->type == NODE_BINARY && + assignment->expr.binary.operator == OP_ASSIGN) { + ast_node *field = assignment->expr.binary.left; + ast_node *value = assignment->expr.binary.right; + + if (field->type == NODE_IDENTIFIER) { + char *field_name = strndup(field->expr.string.start, + field->expr.string.len); + + member *m = var_type->data.structure.members; + int field_offset = -1; + while (m) { + if (m->name_len == field->expr.string.len && + strncmp(m->name, field->expr.string.start, m->name_len) == 0) { + field_offset = m->offset; + break; + } + m = m->next; + } + + if (field_offset >= 0) { + gen_expr(fp, value); + + type *field_type = shget(var_type->data.structure.member_types, field_name); + + if (field_type && field_type->size == 4) { + fprintf(fp, "mov %%eax, -%d(%%rbp)\n", offset + field_offset); + } else if (field_type && field_type->size == 2) { + fprintf(fp, "mov %%ax, -%d(%%rbp)\n", offset + field_offset); + } else if (field_type && field_type->size == 1) { + fprintf(fp, "mov %%al, -%d(%%rbp)\n", offset + field_offset); + } else { + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset + field_offset); + } + } + + free(field_name); + } + } + current = current->expr.unit_node.next; + } + } + else if (var_type && (var_type->tag == TYPE_PTR || var_type->tag == TYPE_SLICE)) { + usize element_size = 8; + type *element_type = NULL; + + if (var_type->tag == TYPE_PTR && var_type->data.ptr.child) { + element_type = var_type->data.ptr.child; + element_size = element_type->size; + } else if (var_type->tag == TYPE_SLICE && var_type->data.slice.child) { + element_type = var_type->data.slice.child; + element_size = element_type->size; + } + + int element_count = 0; + ast_node *count_node = current; + while (count_node && count_node->type == NODE_UNIT) { + element_count++; + count_node = count_node->expr.unit_node.next; + } + if (var_type->tag == TYPE_SLICE) { + usize data_size = element_count * element_size; + usize aligned_data_size = (data_size + 7) & ~7; + stack_offset += aligned_data_size; + int data_offset = stack_offset; + stack_offset += 16; + offset = stack_offset; + + char *var_name = strndup(expr->expr.var_decl.name, expr->expr.var_decl.name_len); + shput(variables, var_name, offset); + + int index = 0; + while (current && current->type == NODE_UNIT) { + ast_node *value = current->expr.unit_node.expr; + if (value) { + gen_expr(fp, value); + + int element_offset = data_offset - (index * element_size); + + if (element_type && element_type->size == 4) { + fprintf(fp, "mov %%eax, -%d(%%rbp)\n", element_offset); + } else if (element_type && element_type->size == 2) { + fprintf(fp, "mov %%ax, -%d(%%rbp)\n", element_offset); + } else if (element_type && element_type->size == 1) { + fprintf(fp, "mov %%al, -%d(%%rbp)\n", element_offset); + } else { + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", element_offset); + } + } + index++; + current = current->expr.unit_node.next; + } + + fprintf(fp, "lea -%d(%%rbp), %%rax\n", data_offset); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset); + fprintf(fp, "mov $%d, %%rax\n", element_count); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset - 8); + } else { + int index = 0; + while (current && current->type == NODE_UNIT) { + ast_node *value = current->expr.unit_node.expr; + if (value) { + gen_expr(fp, value); + + int element_offset = offset + (index * element_size); + + if (element_type && element_type->size == 4) { + fprintf(fp, "mov %%eax, -%d(%%rbp)\n", element_offset); + } else if (element_type && element_type->size == 2) { + fprintf(fp, "mov %%ax, -%d(%%rbp)\n", element_offset); + } else if (element_type && element_type->size == 1) { + fprintf(fp, "mov %%al, -%d(%%rbp)\n", element_offset); + } else { + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", element_offset); + } + } + index++; + current = current->expr.unit_node.next; + } + } + } + } else { + gen_expr(fp, expr->expr.var_decl.value); + + // If assigning a slice value, copy the 16-byte structure + if (var_type && var_type->tag == TYPE_SLICE) { + fprintf(fp, "mov (%%rax), %%rcx\n"); // Load ptr field + fprintf(fp, "mov 8(%%rax), %%rdx\n"); // Load len field + fprintf(fp, "mov %%rcx, -%d(%%rbp)\n", offset); // Store ptr + fprintf(fp, "mov %%rdx, -%d(%%rbp)\n", offset - 8); // Store len + } else { + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", offset); + } + } + } + break; + } + case NODE_RETURN: { + if (expr->expr.ret.value) { + gen_expr(fp, expr->expr.ret.value); + } + fprintf(fp, "mov %%rbp, %%rsp\n"); + fprintf(fp, "pop %%rbp\n"); + fprintf(fp, "ret\n"); + break; + } + case NODE_IF: { + int label_else = label_counter++; + int label_end = label_counter++; + + gen_expr(fp, expr->expr.if_stmt.condition); + fprintf(fp, "test %%rax, %%rax\n"); + + if (expr->expr.if_stmt.otherwise) { + fprintf(fp, "jz .L%d\n", label_else); + } else { + fprintf(fp, "jz .L%d\n", label_end); + } + + gen_statement_list(fp, expr->expr.if_stmt.body); + + if (expr->expr.if_stmt.otherwise) { + fprintf(fp, "jmp .L%d\n", label_end); + fprintf(fp, ".L%d:\n", label_else); + gen_statement_list(fp, expr->expr.if_stmt.otherwise); + } + + fprintf(fp, ".L%d:\n", label_end); + break; + } + case NODE_WHILE: { + int label_start = label_counter++; + int label_end = label_counter++; + + fprintf(fp, ".L%d:\n", label_start); + + gen_expr(fp, expr->expr.whle.condition); + fprintf(fp, "test %%rax, %%rax\n"); + fprintf(fp, "jz .L%d\n", label_end); + + arrput(break_stack, label_end); + + gen_statement_list(fp, expr->expr.whle.body); + + arrpop(break_stack); + + fprintf(fp, "jmp .L%d\n", label_start); + + fprintf(fp, ".L%d:\n", label_end); + break; + } + case NODE_LABEL: { + char *label_name = strndup(expr->expr.label.name, expr->expr.label.name_len); + fprintf(fp, ".L_%s:\n", label_name); + free(label_name); + break; + } + case NODE_GOTO: { + char *label_name = strndup(expr->expr.label.name, expr->expr.label.name_len); + fprintf(fp, "jmp .L_%s\n", label_name); + free(label_name); + break; + } + case NODE_BREAK: { + if (arrlen(break_stack) > 0) { + int loop_end = break_stack[arrlen(break_stack) - 1]; + fprintf(fp, "jmp .L%d\n", loop_end); + } else { + fprintf(fp, "# ERROR: break outside of loop\n"); + } + break; + } + case NODE_ACCESS: { + ast_node *base = expr->expr.access.expr; + ast_node *member_node = expr->expr.access.member; + + if (base->type == NODE_IDENTIFIER) { + int base_offset = get_var_offset(base->expr.string.start, base->expr.string.len); + type *base_type = base->expr_type; + + if (base_type && base_type->tag == TYPE_SLICE && member_node->type == NODE_IDENTIFIER) { + char *field_name = strndup(member_node->expr.string.start, member_node->expr.string.len); + + if (strcmp(field_name, "ptr") == 0) { + fprintf(fp, "mov -%d(%%rbp), %%rax\n", base_offset); + } else if (strcmp(field_name, "len") == 0) { + fprintf(fp, "mov -%d(%%rbp), %%rax\n", base_offset - 8); + } else { + fprintf(fp, "# ERROR: slice field '%s' not found\n", field_name); + } + + free(field_name); + } + else if (member_node->type == NODE_IDENTIFIER && base_type && + base_type->tag == TYPE_STRUCT) { + member *m = base_type->data.structure.members; + int field_offset = -1; + while (m) { + if (m->name_len == member_node->expr.string.len && + strncmp(m->name, member_node->expr.string.start, m->name_len) == 0) { + field_offset = m->offset; + break; + } + m = m->next; + } + + if (field_offset >= 0) { + fprintf(fp, "mov -%d(%%rbp), %%rax\n", base_offset + field_offset); + } else { + fprintf(fp, "# ERROR: field not found\n"); + } + } else { + fprintf(fp, "# ERROR: not a struct or slice type\n"); + } + } else { + fprintf(fp, "# ERROR: complex struct access not implemented\n"); + } + break; + } + case NODE_RANGE: { + if (expr->expr.binary.left->type == NODE_INTEGER && + expr->expr.binary.right->type == NODE_INTEGER) { + i64 start = expr->expr.binary.left->expr.integer; + i64 end = expr->expr.binary.right->expr.integer; + i64 count = end - start + 1; + + usize element_size = 8; + usize data_size = count * element_size; + usize aligned_data_size = (data_size + 7) & ~7; + stack_offset += aligned_data_size; + int data_offset = stack_offset; + + for (i64 i = 0; i < count; i++) { + i64 value = start + i; + int element_offset = data_offset - (i * element_size); + fprintf(fp, "mov $%ld, %%rax\n", value); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", element_offset); + } + + stack_offset += 16; + int slice_offset = stack_offset; + + fprintf(fp, "lea -%d(%%rbp), %%rax\n", data_offset); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", slice_offset); + fprintf(fp, "mov $%ld, %%rax\n", count); + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", slice_offset - 8); + + fprintf(fp, "lea -%d(%%rbp), %%rax\n", slice_offset); + } else { + fprintf(fp, "# ERROR: range expression requires constant bounds\n"); + } + break; + } + case NODE_STRUCT_INIT: { + fprintf(fp, "# ERROR: struct init outside of variable declaration\n"); + break; + } + case NODE_ARRAY_SUBSCRIPT: { + usize element_size = 8; + type *base_type = expr->expr.subscript.expr->expr_type; + bool is_slice = false; + + if (base_type) { + if (base_type->tag == TYPE_PTR && base_type->data.ptr.child) { + element_size = base_type->data.ptr.child->size; + } else if (base_type->tag == TYPE_SLICE && base_type->data.slice.child) { + element_size = base_type->data.slice.child->size; + is_slice = true; + } + } + + if (expr->expr.subscript.index->type == NODE_RANGE) { + if (expr->expr.subscript.expr->type == NODE_IDENTIFIER) { + int base_offset = get_var_offset(expr->expr.subscript.expr->expr.string.start, + expr->expr.subscript.expr->expr.string.len); + + fprintf(fp, "mov -%d(%%rbp), %%rcx\n", base_offset); + + gen_expr(fp, expr->expr.subscript.index->expr.binary.left); + fprintf(fp, "push %%rax\n"); + + gen_expr(fp, expr->expr.subscript.index->expr.binary.right); + fprintf(fp, "mov %%rax, %%rdx\n"); // rdx = end + fprintf(fp, "pop %%rax\n"); // rax = start + fprintf(fp, "mov %%rdx, %%r8\n"); + fprintf(fp, "sub %%rax, %%r8\n"); + fprintf(fp, "inc %%r8\n"); // r8 = new length + + if (element_size != 1) { + fprintf(fp, "imul $%lu, %%rax\n", element_size); + } + fprintf(fp, "add %%rcx, %%rax\n"); // rax = new ptr + + // Allocate temporary slice struct (16 bytes) + stack_offset += 16; + fprintf(fp, "mov %%rax, -%d(%%rbp)\n", stack_offset); // Store ptr + fprintf(fp, "mov %%r8, -%d(%%rbp)\n", stack_offset - 8); // Store len + fprintf(fp, "lea -%d(%%rbp), %%rax\n", stack_offset); // Return address of temp slice + } + } + else if (expr->expr.subscript.expr->type == NODE_IDENTIFIER && is_slice) { + int base_offset = get_var_offset(expr->expr.subscript.expr->expr.string.start, + expr->expr.subscript.expr->expr.string.len); + + fprintf(fp, "mov -%d(%%rbp), %%rcx\n", base_offset); + + gen_expr(fp, expr->expr.subscript.index); + + if (element_size != 1) { + fprintf(fp, "imul $%lu, %%rax\n", element_size); + } + + fprintf(fp, "add %%rcx, %%rax\n"); + + if (expr->expr_type && expr->expr_type->size == 4) { + fprintf(fp, "movl (%%rax), %%eax\n"); + } else if (expr->expr_type && expr->expr_type->size == 2) { + fprintf(fp, "movzwl (%%rax), %%eax\n"); + } else if (expr->expr_type && expr->expr_type->size == 1) { + fprintf(fp, "movzbl (%%rax), %%eax\n"); + } else { + fprintf(fp, "mov (%%rax), %%rax\n"); + } + } else if (expr->expr.subscript.expr->type == NODE_IDENTIFIER) { + int base_offset = get_var_offset(expr->expr.subscript.expr->expr.string.start, + expr->expr.subscript.expr->expr.string.len); + + gen_expr(fp, expr->expr.subscript.index); + + if (element_size != 1) { + fprintf(fp, "imul $%lu, %%rax\n", element_size); + } + + fprintf(fp, "add $%d, %%rax\n", base_offset); + fprintf(fp, "neg %%rax\n"); + fprintf(fp, "add %%rbp, %%rax\n"); + + if (expr->expr_type && expr->expr_type->size == 4) { + fprintf(fp, "movl (%%rax), %%eax\n"); + } else if (expr->expr_type && expr->expr_type->size == 2) { + fprintf(fp, "movzwl (%%rax), %%eax\n"); + } else if (expr->expr_type && expr->expr_type->size == 1) { + fprintf(fp, "movzbl (%%rax), %%eax\n"); + } else { + fprintf(fp, "mov (%%rax), %%rax\n"); + } + } else { + gen_expr(fp, expr->expr.subscript.expr); + fprintf(fp, "push %%rax\n"); + + gen_expr(fp, expr->expr.subscript.index); + + if (element_size != 1) { + fprintf(fp, "imul $%lu, %%rax\n", element_size); + } + + fprintf(fp, "pop %%rcx\n"); + fprintf(fp, "add %%rcx, %%rax\n"); + + if (expr->expr_type && expr->expr_type->size == 4) { + fprintf(fp, "movl (%%rax), %%eax\n"); + } else if (expr->expr_type && expr->expr_type->size == 2) { + fprintf(fp, "movzwl (%%rax), %%eax\n"); + } else if (expr->expr_type && expr->expr_type->size == 1) { + fprintf(fp, "movzbl (%%rax), %%eax\n"); + } else { + fprintf(fp, "mov (%%rax), %%rax\n"); + } + } + break; + } + case NODE_CALL: { + const char *arg_regs[] = {"%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9"}; + + int param_count = 0; + ast_node *param = expr->expr.call.parameters; + while (param && param->type == NODE_UNIT) { + param_count++; + param = param->expr.unit_node.next; + } + + param = expr->expr.call.parameters; + int i = 0; + while (param && param->type == NODE_UNIT) { + if (param->expr.unit_node.expr) { + gen_expr(fp, param->expr.unit_node.expr); + fprintf(fp, "push %%rax\n"); + } + param = param->expr.unit_node.next; + i++; + } + + for (int j = param_count - 1; j >= 0; j--) { + if (j < 6) { + fprintf(fp, "pop %s\n", arg_regs[j]); + } else { + // TODO: handle more than 6 arguments properly + fprintf(fp, "pop %%rax\n"); + fprintf(fp, "push %%rax\n"); + } + } + + fprintf(fp, "call %.*s\n", (int)expr->expr.call.name_len, expr->expr.call.name); + + if (param_count > 6) { + int stack_args = param_count - 6; + fprintf(fp, "add $%d, %%rsp\n", stack_args * 8); + } + + break; + } + } +} + +void gen_function(FILE *fp, ast_node *fn) +{ + if (fn->expr.function.is_extern || fn->expr.function.body == NULL) { + return; + } + + ast_node *current = fn->expr.function.body; + + stack_offset = 0; + label_counter = 0; + shfree(variables); + variables = NULL; + arrfree(break_stack); + break_stack = NULL; + + fprintf(fp, ".global %s\n%s:\n", fn->expr.function.name, fn->expr.function.name); + + fprintf(fp, "push %%rbp\n"); + fprintf(fp, "mov %%rsp, %%rbp\n"); + fprintf(fp, "sub $256, %%rsp\n"); + + const char *param_regs[] = {"%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9"}; + member *param = fn->expr.function.parameters; + int param_idx = 0; + while (param && param_idx < 6) { + int offset = get_var_offset(param->name, param->name_len); + fprintf(fp, "mov %s, -%d(%%rbp)\n", param_regs[param_idx], offset); + param = param->next; + param_idx++; + } + + while (current && current->type == NODE_UNIT) { + if (current->expr.unit_node.expr) { + gen_expr(fp, current->expr.unit_node.expr); + } + current = current->expr.unit_node.next; + } + + fprintf(fp, "mov %%rbp, %%rsp\n"); + fprintf(fp, "pop %%rbp\n"); + fprintf(fp, "ret\n"); +} + +void generate(ast_node *node) +{ + FILE *fp = fopen("test.s", "w"); + + ast_node *current = node; + + fprintf(fp, ".section .text\n"); + while (current && current->type == NODE_UNIT) { + if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) { + gen_function(fp, current->expr.unit_node.expr); + } + current = current->expr.unit_node.next; + } + + fclose(fp); + + shfree(variables); + variables = NULL; + arrfree(break_stack); + break_stack = NULL; +} diff --git a/codegen.h b/codegen.h new file mode 100644 index 0000000..8e87114 --- /dev/null +++ b/codegen.h @@ -0,0 +1,8 @@ +#ifndef CODEGEN_H +#define CODEGEN_H + +#include "parser.h" + +void generate(ast_node *node); + +#endif diff --git a/config.def.h b/config.def.h new file mode 100644 index 0000000..184290d --- /dev/null +++ b/config.def.h @@ -0,0 +1,4 @@ +#ifndef CONFIG_H +#define CONFIG_H + +#endif diff --git a/config.h b/config.h new file mode 100644 index 0000000..184290d --- /dev/null +++ b/config.h @@ -0,0 +1,4 @@ +#ifndef CONFIG_H +#define CONFIG_H + +#endif diff --git a/config.mk b/config.mk new file mode 100644 index 0000000..d6cbc51 --- /dev/null +++ b/config.mk @@ -0,0 +1,27 @@ +# cc version +VERSION = 0.1 + +# Customize below to fit your system + +# paths +PREFIX = /usr +MANPREFIX = ${PREFIX}/share/man + +# OpenBSD (uncomment) +#MANPREFIX = ${PREFIX}/man + +# includes and libs +INCS = -I. +LIBS = +# flags +CPPFLAGS = -DVERSION=\"${VERSION}\" +CFLAGS := -std=c23 -pedantic -Wall -O0 ${INCS} ${CPPFLAGS} +CFLAGS := ${CFLAGS} -g +LDFLAGS = ${LIBS} + +# Solaris +#CFLAGS = -fast ${INCS} -DVERSION=\"${VERSION}\" +#LDFLAGS = ${LIBS} + +# compiler and linker +CC = cc diff --git a/examples/for.l b/examples/for.l new file mode 100644 index 0000000..24181ba --- /dev/null +++ b/examples/for.l @@ -0,0 +1,16 @@ +import std; + +i32 main() +{ + u32 x = 4; + loop { + u32 b = 3; + } + x == 3; + + loop (0.., test) |k, i| { + + } + u32 b = 3; + +} diff --git a/examples/hello_world.l b/examples/hello_world.l new file mode 100644 index 0000000..68b4c08 --- /dev/null +++ b/examples/hello_world.l @@ -0,0 +1,10 @@ +struct b { + i32 a, + u32 b, + u32 c, +} + +u32 test() +{ + f32 a = 5.0; +} diff --git a/lc.c b/lc.c new file mode 100644 index 0000000..ad01157 --- /dev/null +++ b/lc.c @@ -0,0 +1,241 @@ +#include +#include +#include "utils.h" +#include "lexer.h" +#include "parser.h" +#include "sema.h" +#include "codegen.h" + +void print_indent(int depth) { + for (int i = 0; i < depth; i++) printf(" "); +} + +const char* get_op_str(binary_op op) { + switch(op) { + case OP_PLUS: return "+"; + case OP_MINUS: return "-"; + case OP_DIV: return "/"; + case OP_MUL: return "*"; + case OP_EQ: return "=="; + case OP_ASSIGN: return "="; + case OP_ASSIGN_PTR: return "<-"; + case OP_AND: return "&&"; + case OP_OR: return "||"; + case OP_NEQ: return "!="; + case OP_GT: return ">"; + case OP_LT: return "<"; + case OP_GE: return ">="; + case OP_LE: return "<="; + case OP_BOR: return "|"; + case OP_BAND: return "&"; + case OP_BXOR: return "^"; + case OP_MOD: return "%"; + case OP_PLUS_EQ: return "+="; + case OP_MINUS_EQ: return "-="; + case OP_DIV_EQ: return "/="; + case OP_MUL_EQ: return "*="; + default: return "?"; + } +} + +const char *get_uop_str(unary_op op) { + switch (op) { + case UOP_INCR: return "++"; + case UOP_MINUS: return "-"; + case UOP_DECR: return "--"; + case UOP_DEREF: return "*"; + case UOP_REF: return "&"; + case UOP_NOT: return "!"; + default: return "?"; + } +} + +void print_ast(ast_node *node, int depth) { + if (!node) return; + + print_indent(depth); + + switch (node->type) { + case NODE_INTEGER: + printf("Integer: %lu\n", node->expr.integer); + break; + case NODE_FLOAT: + printf("Float: %f\n", node->expr.flt); + break; + case NODE_CHAR: + printf("Char: '%c'\n", node->expr.ch); + break; + case NODE_STRING: + printf("String: \"%.*s\"\n", (int)node->expr.string.len, node->expr.string.start); + break; + case NODE_IDENTIFIER: + printf("Identifier: %.*s\n", (int)node->expr.string.len, node->expr.string.start); + break; + case NODE_CAST: + printf("Cast:\n"); + print_ast(node->expr.cast.type, depth); + print_ast(node->expr.cast.value, depth + 1); + break; + case NODE_ACCESS: + printf("Access:\n"); + print_ast(node->expr.access.expr, depth + 1); + print_ast(node->expr.access.member, depth + 1); + break; + case NODE_LABEL: + printf("Label: %.*s\n", (int)node->expr.label.name_len, node->expr.label.name); + break; + case NODE_GOTO: + printf("Goto: %.*s\n", (int)node->expr.label.name_len, node->expr.label.name); + break; + case NODE_BINARY: + printf("BinaryOp (%s)\n", get_op_str(node->expr.binary.operator)); + print_ast(node->expr.binary.left, depth + 1); + print_ast(node->expr.binary.right, depth + 1); + break; + case NODE_ARRAY_SUBSCRIPT: + printf("Array subscript\n"); + print_ast(node->expr.subscript.expr, depth + 1); + print_ast(node->expr.subscript.index, depth + 1); + break; + case NODE_UNARY: + printf("UnaryOp (%s)\n", get_uop_str(node->expr.unary.operator)); + print_ast(node->expr.unary.right, depth + 1); + break; + case NODE_POSTFIX: + printf("Postfix (%s)\n", get_uop_str(node->expr.unary.operator)); + print_ast(node->expr.unary.right, depth + 1); + break; + case NODE_BREAK: + printf("Break\n"); + break; + case NODE_TERNARY: + printf("Ternary (? :)\n"); + print_indent(depth + 1); printf("Condition:\n"); + print_ast(node->expr.ternary.condition, depth + 2); + print_indent(depth + 1); printf("Then:\n"); + print_ast(node->expr.ternary.then, depth + 2); + print_indent(depth + 1); printf("Else:\n"); + print_ast(node->expr.ternary.otherwise, depth + 2); + break; + case NODE_UNIT: + printf("Unit\n"); + ast_node *current = node; + while (current && current->type == NODE_UNIT) { + print_ast(current->expr.unit_node.expr, depth + 1); + current = current->expr.unit_node.next; + } + break; + case NODE_CALL: + printf("Call: %.*s\n", (int)node->expr.call.name_len, node->expr.call.name); + current = node->expr.call.parameters; + while (current && current->type == NODE_UNIT) { + print_ast(current->expr.unit_node.expr, depth + 1); + current = current->expr.unit_node.next; + } + break; + case NODE_STRUCT_INIT: + printf("Struct init:\n"); + current = node->expr.struct_init.members; + while (current && current->type == NODE_UNIT) { + print_ast(current->expr.unit_node.expr, depth + 1); + current = current->expr.unit_node.next; + } + break; + case NODE_STRUCT: + printf("Struct: %.*s\n", (int)node->expr.structure.name_len, node->expr.structure.name); + member *m = node->expr.structure.members; + while (m) { + print_ast(m->type, depth + 1); + m = m->next; + } + break; + case NODE_UNION: + printf("Union: %.*s\n", (int)node->expr.structure.name_len, node->expr.structure.name); + m = node->expr.structure.members; + while (m) { + print_ast(m->type, depth + 1); + m = m->next; + } + break; + case NODE_ENUM: + printf("Enum: %.*s\n", (int)node->expr.enm.name_len, node->expr.enm.name); + variant *v = node->expr.enm.variants; + while (v) { + printf("\t%.*s\n", (int)v->name_len, v->name); + v = v->next; + } + break; + case NODE_IF: + printf("If:\n"); + print_ast(node->expr.whle.condition, depth + 1); + print_ast(node->expr.whle.body, depth + 1); + break; + case NODE_VAR_DECL: + printf("VarDecl: "); + print_ast(node->expr.var_decl.type, 0); + print_ast(node->expr.var_decl.value, depth + 1); + break; + case NODE_FUNCTION: + printf("Function: %.*s\n", (int)node->expr.function.name_len, node->expr.function.name); + m = node->expr.function.parameters; + while (m) { + print_ast(m->type, depth + 1); + m = m->next; + } + print_ast(node->expr.function.body, depth + 1); + break; + case NODE_RETURN: + printf("Return:\n"); + print_ast(node->expr.ret.value, depth + 1); + break; + case NODE_IMPORT: + printf("Import:\n"); + print_ast(node->expr.import.path, depth + 1); + break; + case NODE_WHILE: + printf("While:\n"); + print_ast(node->expr.whle.condition, depth + 1); + print_ast(node->expr.whle.body, depth + 1); + break; + case NODE_FOR: + printf("For:\n"); + print_ast(node->expr.fr.slices, depth + 1); + print_ast(node->expr.fr.captures, depth + 1); + print_indent(depth + 1); + print_ast(node->expr.fr.body, depth + 1); + break; + case NODE_RANGE: + printf("Range:\n"); + print_ast(node->expr.binary.left, depth + 1); + print_ast(node->expr.binary.right, depth + 1); + break; + default: + printf("Unknown Node Type: %d\n", node->type); + break; + } +} + +int main(void) +{ + FILE *fp = fopen("test.l", "r"); + usize size = 0; + fseek(fp, 0, SEEK_END); + size = ftell(fp); + fseek(fp, 0, SEEK_SET); + char *src = malloc(size+1); + fread(src, size, 1, fp); + fclose(fp); + src[size] = '\0'; + + arena a = arena_init(0x1000 * 0x1000 * 64); + lexer *l = lexer_init(src, size, &a); + parser *p = parser_init(l, &a); + print_ast(p->ast, 0); + sema_init(p, &a); + + generate(p->ast); + + arena_deinit(a); + + return 0; +} diff --git a/lexer.c b/lexer.c new file mode 100644 index 0000000..22063fd --- /dev/null +++ b/lexer.c @@ -0,0 +1,422 @@ +#include "lexer.h" +#include +#include +#include +#include + +trie_node *keywords; + +static void add_token(lexer *l, token_type type, usize len) +{ + token *t = arena_alloc(l->allocator, sizeof(token)); + t->type = type; + t->lexeme_len = len; + t->lexeme = l->source + l->index; + t->position.row = l->row; + t->position.column = l->column; + + if (!l->tokens) { + l->tokens = t; + l->tail = t; + } else { + l->tail->next = t; + l->tail = t; + } +} + +static void add_error(lexer *l, char *msg) +{ + token *t = arena_alloc(l->allocator, sizeof(token)); + t->type = TOKEN_ERROR; + t->lexeme_len = strlen(msg); + t->lexeme = msg; + t->position.row = l->row; + t->position.column = l->column; + + if (!l->tokens) { + l->tokens = t; + l->tail = t; + } else { + l->tail->next = t; + l->tail = t; + } +} + +static void parse_number(lexer *l) +{ + char c = l->source[l->index]; + /* Is the number a float? */ + bool f = false; + usize len = 0; + + while (isdigit(c)) { + /* If a dot is found, and the character after it is a digit, this is a float. */ + if (l->source[l->index+1] == '.' && isdigit(l->source[l->index+2])) { + f = true; + len += 3; + l->index += 3; + } else { + len += 1; + l->index += 1; + } + c = l->source[l->index]; + } + l->index -= len; + if (f) { + add_token(l, TOKEN_FLOAT, len); + } else { + add_token(l, TOKEN_INTEGER, len); + } + l->index += len; +} + +static void parse_identifier(lexer *l) +{ + char c = l->source[l->index]; + usize len = 0; + + while (isalnum(c) || c == '_') { + len += 1; + l->index += 1; + c = l->source[l->index]; + } + l->index -= len; + token_type keyword = trie_get(keywords, l->source + l->index, len); + if (keyword) { + add_token(l, keyword, len); + } else { + add_token(l, TOKEN_IDENTIFIER, len); + } + l->index += len; +} + +static void parse_string(lexer *l) +{ + char c = l->source[l->index]; + usize len = 0; + + while (c != '"') { + if (c == '\0' || c == '\n') { + l->index -= len; + add_error(l, "unclosed string literal."); + l->index += len; + return; + } + len += 1; + l->index += 1; + c = l->source[l->index]; + } + l->index -= len; + add_token(l, TOKEN_STRING, len); + l->index += len + 1; +} + +static bool parse_special(lexer *l) +{ + switch (l->source[l->index]) { + case '+': + if (l->source[l->index+1] == '=') { + add_token(l, TOKEN_PLUS_EQ, 2); + l->index += 2; + } else if (l->source[l->index+1] == '+') { + add_token(l, TOKEN_PLUS_PLUS, 2); + l->index += 2; + } else { + add_token(l, TOKEN_PLUS, 1); + l->index += 1; + } + return true; + case '-': + if (l->source[l->index+1] == '=') { + add_token(l, TOKEN_MINUS_EQ, 2); + l->index += 2; + } else if (l->source[l->index+1] == '-') { + add_token(l, TOKEN_MINUS_MINUS, 2); + l->index += 2; + } else { + add_token(l, TOKEN_MINUS, 1); + l->index += 1; + } + return true; + case '/': + if (l->source[l->index+1] == '=') { + add_token(l, TOKEN_SLASH_EQ, 2); + l->index += 2; + } else { + add_token(l, TOKEN_SLASH, 1); + l->index += 1; + } + return true; + case '*': + if (l->source[l->index+1] == '=') { + add_token(l, TOKEN_STAR_EQ, 2); + l->index += 2; + } else { + add_token(l, TOKEN_STAR, 1); + l->index += 1; + } + return true; + case '%': + if (l->source[l->index+1] == '=') { + add_token(l, TOKEN_PERC_EQ, 2); + l->index += 2; + } else { + add_token(l, TOKEN_PERC, 1); + l->index += 1; + } + return true; + case '&': + if (l->source[l->index+1] == '=') { + add_token(l, TOKEN_AND_EQ, 2); + l->index += 2; + } else if (l->source[l->index+1] == '&') { + add_token(l, TOKEN_DOUBLE_AND, 2); + l->index += 2; + } else { + add_token(l, TOKEN_AND, 1); + l->index += 1; + } + return true; + case '^': + if (l->source[l->index+1] == '=') { + add_token(l, TOKEN_HAT_EQ, 2); + l->index += 2; + } else { + add_token(l, TOKEN_HAT, 1); + l->index += 1; + } + return true; + case '|': + if (l->source[l->index+1] == '=') { + add_token(l, TOKEN_PIPE_EQ, 2); + l->index += 2; + } else if (l->source[l->index+1] == '|') { + add_token(l, TOKEN_OR, 2); + l->index += 2; + } else { + add_token(l, TOKEN_PIPE, 1); + l->index += 1; + } + return true; + case '=': + if (l->source[l->index+1] == '=') { + add_token(l, TOKEN_DOUBLE_EQ, 2); + l->index += 2; + } else { + add_token(l, TOKEN_EQ, 1); + l->index += 1; + } + return true; + case '>': + if (l->source[l->index+1] == '=') { + add_token(l, TOKEN_GREATER_EQ, 2); + l->index += 2; + } else if (l->source[l->index+1] == '>') { + if (l->source[l->index+2] == '=') { + add_token(l, TOKEN_RSHIFT_EQ, 3); + l->index += 3; + return true; + } + add_token(l, TOKEN_RSHIFT, 2); + l->index += 2; + } else { + add_token(l, TOKEN_GREATER_THAN, 1); + l->index += 1; + } + return true; + case '<': + if (l->source[l->index+1] == '=') { + add_token(l, TOKEN_LESS_EQ, 2); + l->index += 2; + } else if (l->source[l->index+1] == '-') { + add_token(l, TOKEN_ARROW, 2); + l->index += 2; + } else if (l->source[l->index+1] == '<') { + if (l->source[l->index+2] == '=') { + add_token(l, TOKEN_LSHIFT_EQ, 3); + l->index += 3; + return true; + } + add_token(l, TOKEN_LSHIFT, 2); + l->index += 2; + } else { + add_token(l, TOKEN_LESS_THAN, 1); + l->index += 1; + } + return true; + case '!': + if (l->source[l->index+1] == '=') { + add_token(l, TOKEN_NOT_EQ, 2); + l->index += 2; + } else { + add_token(l, TOKEN_BANG, 1); + l->index += 1; + } + return true; + case ':': + add_token(l, TOKEN_COLON, 1); + l->index += 1; + return true; + case ';': + add_token(l, TOKEN_SEMICOLON, 1); + l->index += 1; + return true; + case '.': + if (l->source[l->index+1] == '.') { + add_token(l, TOKEN_DOUBLE_DOT, 2); + l->index += 2; + } else { + add_token(l, TOKEN_DOT, 1); + l->index += 1; + } + return true; + case ',': + add_token(l, TOKEN_COMMA, 1); + l->index += 1; + return true; + case '(': + add_token(l, TOKEN_LPAREN, 1); + l->index += 1; + return true; + case ')': + add_token(l, TOKEN_RPAREN, 1); + l->index += 1; + return true; + case '[': + add_token(l, TOKEN_LSQUARE, 1); + l->index += 1; + return true; + case ']': + add_token(l, TOKEN_RSQUARE, 1); + l->index += 1; + return true; + case '{': + add_token(l, TOKEN_LCURLY, 1); + l->index += 1; + return true; + case '}': + add_token(l, TOKEN_RCURLY, 1); + l->index += 1; + return true; + case '\'': + if (l->source[l->index+1] == '\\') { + if (l->source[l->index+3] != '\'') { + add_error(l, "unclosed character literal."); + l->index += 1; + return true; + } + l->index += 1; + add_token(l, TOKEN_CHAR, 2); + l->index += 3; + return true; + } else { + if (l->source[l->index+2] != '\'') { + add_error(l, "unclosed character literal."); + l->index += 1; + return true; + } + l->index += 1; + add_token(l, TOKEN_CHAR, 1); + l->index += 2; + return true; + } + default: + return false; + } +} + +static void parse(lexer *l) +{ + char c; + + while (l->index <= l->size) { + c = l->source[l->index]; + l->column += 1; + + if (c == '\n') { + l->index += 1; + l->row += 1; + l->column = 0; + continue; + } + + usize head = l->index; + + if (c == '/' && l->source[l->index+1] == '/') { + while (l->source[l->index] != '\n') { + l->index += 1; + } + l->column += (l->index - head - 1); + } + + if (isspace(c)) { + l->index += 1; + continue; + } + + + if (parse_special(l)) { + l->column += (l->index - head - 1); + continue; + } + + if (isdigit(c)) { + parse_number(l); + l->column += (l->index - head - 1); + continue; + } + + if (isalpha(c)) { + parse_identifier(l); + l->column += (l->index - head - 1); + continue; + } + + if (c == '"') { + l->index += 1; + parse_string(l); + l->column += (l->index - head - 1); + continue; + } + + l->index += 1; + } +} + +lexer *lexer_init(char *source, usize size, arena *arena) +{ + lexer *lex = arena_alloc(arena, sizeof(lexer)); + lex->column = 1; + lex->row = 1; + lex->index = 0; + lex->size = size; + lex->tokens = 0; + lex->tail = 0; + lex->allocator = arena; + lex->source = source; + + keywords = arena_alloc(arena, sizeof(trie_node)); + trie_insert(keywords, lex->allocator, "true", TOKEN_TRUE); + trie_insert(keywords, lex->allocator, "false", TOKEN_FALSE); + trie_insert(keywords, lex->allocator, "struct", TOKEN_STRUCT); + trie_insert(keywords, lex->allocator, "enum", TOKEN_ENUM); + trie_insert(keywords, lex->allocator, "union", TOKEN_UNION); + trie_insert(keywords, lex->allocator, "loop", TOKEN_LOOP); + trie_insert(keywords, lex->allocator, "while", TOKEN_WHILE); + trie_insert(keywords, lex->allocator, "until", TOKEN_UNTIL); + trie_insert(keywords, lex->allocator, "goto", TOKEN_GOTO); + trie_insert(keywords, lex->allocator, "if", TOKEN_IF); + trie_insert(keywords, lex->allocator, "else", TOKEN_ELSE); + trie_insert(keywords, lex->allocator, "switch", TOKEN_SWITCH); + trie_insert(keywords, lex->allocator, "break", TOKEN_BREAK); + trie_insert(keywords, lex->allocator, "defer", TOKEN_DEFER); + trie_insert(keywords, lex->allocator, "return", TOKEN_RETURN); + trie_insert(keywords, lex->allocator, "import", TOKEN_IMPORT); + trie_insert(keywords, lex->allocator, "const", TOKEN_CONST); + trie_insert(keywords, lex->allocator, "extern", TOKEN_EXTERN); + trie_insert(keywords, lex->allocator, "volatile", TOKEN_VOLATILE); + + parse(lex); + + return lex; +} diff --git a/lexer.h b/lexer.h new file mode 100644 index 0000000..72277df --- /dev/null +++ b/lexer.h @@ -0,0 +1,97 @@ +#ifndef LEXER_H +#define LEXER_H + +#include "utils.h" + +typedef enum { + TOKEN_ERROR, + TOKEN_END, + TOKEN_PLUS, // + + TOKEN_PLUS_PLUS, // ++ + TOKEN_MINUS, // - + TOKEN_MINUS_MINUS, // -- + TOKEN_SLASH, // / + TOKEN_PERC, // % + TOKEN_STAR, // * + TOKEN_AND, // & + TOKEN_HAT, // ^ + TOKEN_PIPE, // | + TOKEN_LSHIFT, // << + TOKEN_RSHIFT, // >> + TOKEN_DOUBLE_EQ, // == + TOKEN_ARROW, // <- + TOKEN_EQ, // = + TOKEN_LESS_THAN, // < + TOKEN_GREATER_THAN, // > + TOKEN_LESS_EQ, // <= + TOKEN_GREATER_EQ, // >= + TOKEN_NOT_EQ, // != + TOKEN_PLUS_EQ, // += + TOKEN_MINUS_EQ, // -= + TOKEN_STAR_EQ, // *= + TOKEN_SLASH_EQ, // /= + TOKEN_AND_EQ, // &= + TOKEN_HAT_EQ, // ^= + TOKEN_PIPE_EQ, // |= + TOKEN_PERC_EQ, // %= + TOKEN_LSHIFT_EQ, // <<= + TOKEN_RSHIFT_EQ, // >>= + TOKEN_OR, // || + TOKEN_DOUBLE_AND, // && + TOKEN_COLON, // : + TOKEN_SEMICOLON, // ; + TOKEN_DOT, // . + TOKEN_DOUBLE_DOT, // .. + TOKEN_BANG, // ! + TOKEN_COMMA, // , + TOKEN_LPAREN, // ( + TOKEN_RPAREN, // ) + TOKEN_LSQUARE, // [ + TOKEN_RSQUARE, // ] + TOKEN_LCURLY, // { + TOKEN_RCURLY, // } + TOKEN_INTEGER, + TOKEN_FLOAT, + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_CHAR, + TOKEN_TRUE, + TOKEN_FALSE, + TOKEN_GOTO, + TOKEN_LOOP, + TOKEN_WHILE, + TOKEN_UNTIL, + TOKEN_IF, + TOKEN_ELSE, + TOKEN_SWITCH, + TOKEN_BREAK, + TOKEN_DEFER, + TOKEN_RETURN, + TOKEN_IMPORT, + TOKEN_CONST, + TOKEN_EXTERN, + TOKEN_VOLATILE, + TOKEN_STRUCT, + TOKEN_ENUM, + TOKEN_UNION +} token_type; + +typedef struct _token { + token_type type; + source_pos position; + char *lexeme; + usize lexeme_len; + struct _token *next; +} token; + +typedef struct { + usize column, row, index, size; + char *source; + token *tokens; + token *tail; + arena *allocator; +} lexer; + +lexer *lexer_init(char *source, usize size, arena *arena); + +#endif diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..4fd7c65 --- /dev/null +++ b/parser.c @@ -0,0 +1,1385 @@ +#include "parser.h" +#include +#include +#include +#include + +bool has_errors = false; + +ast_node *parse_expression(parser *p); +static ast_node *parse_statement(parser *p); +static ast_node *parse_type(parser *p); + +/* Consume a token in the list. */ +static void advance(parser *p) +{ + p->previous = p->tokens; + if (p->tokens) + p->tokens = p->tokens->next; +} + +/* Get the current token in the list, without consuming */ +static token *peek(parser *p) +{ + return p->tokens; +} + +/* + * Check if the current token type is the same as `type`, + * without consuming it. + */ +static bool match_peek(parser *p, token_type type) +{ + if (p->tokens) + { + return p->tokens->type == type; + } + else + { + return false; + } +} + +/* Same as `match_peek()` but it consumes the token. */ +static bool match(parser *p, token_type type) +{ + if (p->tokens) + { + if (p->tokens->type == type) + { + advance(p); + return true; + } + } + return false; +} + +/* + * When an error is encountered, try to find a + * token that could define a part of the code + * which doesn't depend on the one giving the + * error. This is needed to print multiple errors + * instead of just failing at the first one. + */ +static void parser_sync(parser *p) +{ + advance(p); + + while (p->tokens) + { + if (p->previous->type == TOKEN_SEMICOLON || p->previous->type == TOKEN_RCURLY) + { + return; + } + + switch (p->tokens->type) + { + case TOKEN_STRUCT: + case TOKEN_ENUM: + case TOKEN_UNION: + case TOKEN_IF: + case TOKEN_LOOP: + case TOKEN_RETURN: + case TOKEN_SWITCH: + return; + default: + advance(p); + } + } +} + +/* Print the error message and sync the parser. */ +static void error(parser *p, char *msg) +{ + printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:%ld:%ld:\x1b[0m %s\n", p->previous->position.row, p->previous->position.column, msg); + has_errors = true; + parser_sync(p); +} + +static ast_node *parse_call(parser *p) +{ + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_CALL; + node->position = p->previous->position; + node->expr.call.name = peek(p)->lexeme; + node->expr.call.name_len = peek(p)->lexeme_len; + advance(p); + /* Skip also the opening `(` */ + advance(p); + /* Call without parameters */ + if (match(p, TOKEN_RPAREN)) + { + node->expr.call.parameters = NULL; + return node; + } + + snapshot arena_start = arena_snapshot(p->allocator); + node->expr.call.parameters = arena_alloc(p->allocator, sizeof(ast_node)); + node->expr.call.parameters->type = NODE_UNIT; + node->expr.call.parameters->expr.unit_node.expr = parse_expression(p); + ast_node *tail = node->expr.call.parameters; + node->expr.call.param_len = 1; + + /* In this case, there is only one parameter */ + if (match(p, TOKEN_RPAREN)) + { + return node; + } + + if (match(p, TOKEN_COMMA)) + { + ast_node *expr = parse_expression(p); + if (expr) + { + while (!match(p, TOKEN_RPAREN)) + { + if (!match(p, TOKEN_COMMA)) + { + error(p, "expected `)`."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); + tail->expr.unit_node.next->expr.unit_node.expr = expr; + tail = tail->expr.unit_node.next; + tail->type = NODE_UNIT; + expr = parse_expression(p); + if (!expr) + { + error(p, "expected `)`."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + node->expr.call.param_len += 1; + } + + tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); + tail->expr.unit_node.next->expr.unit_node.expr = expr; + tail = tail->expr.unit_node.next; + tail->type = NODE_UNIT; + } + else + { + error(p, "expected expression."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + } + else + { + error(p, "expected `)`."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + + return node; +} + +/* Parse expressions with the highest precedence. */ +static ast_node *parse_factor(parser *p) +{ + token *t = peek(p); + if (match(p, TOKEN_INTEGER)) + { + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_INTEGER; + node->position = p->previous->position; + node->expr.integer = parse_int(t->lexeme, t->lexeme_len); + if (match(p, TOKEN_DOUBLE_DOT)) { + ast_node *range = arena_alloc(p->allocator, sizeof(ast_node)); + range->type = NODE_RANGE; + range->expr.binary.left = node; + range->expr.binary.operator = OP_PLUS; + snapshot snap = arena_snapshot(p->allocator); + ast_node *end = parse_factor(p); + if (!end) { + range->expr.binary.right = NULL; + } else if (end->type != NODE_INTEGER) { + arena_reset_to_snapshot(p->allocator, snap); + error(p, "expected integer."); + return NULL; + } else { + range->expr.binary.right = end; + } + return range; + } + return node; + } + else if (match(p, TOKEN_FLOAT)) + { + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_FLOAT; + node->position = p->previous->position; + node->expr.flt = parse_float(t->lexeme, t->lexeme_len); + return node; + } + else if (match(p, TOKEN_TRUE)) { + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_BOOL; + node->position = p->previous->position; + node->expr.boolean = 1; + return node; + } + else if (match(p, TOKEN_FALSE)) { + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_BOOL; + node->position = p->previous->position; + node->expr.boolean = 0; + return node; + } + else if (match_peek(p, TOKEN_IDENTIFIER)) + { + /* If a `(` is found after an identifier, it should be a call. */ + if (p->tokens->next && p->tokens->next->type == TOKEN_LPAREN) + { + return parse_call(p); + } + advance(p); + + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_IDENTIFIER; + node->position = p->previous->position; + node->expr.string.start = t->lexeme; + node->expr.string.len = t->lexeme_len; + return node; + } + else if (match(p, TOKEN_STRING)) + { + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_STRING; + node->position = p->previous->position; + node->expr.string.start = t->lexeme; + node->expr.string.len = t->lexeme_len; + return node; + } + else if (match(p, TOKEN_CHAR)) + { + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_CHAR; + node->position = p->previous->position; + if (t->lexeme_len == 2) + { + char c; + switch (t->lexeme[1]) + { + case 'n': + c = '\n'; + break; + case 't': + c = '\t'; + break; + case 'r': + c = '\r'; + break; + case '0': + c = '\0'; + break; + case '\\': + c = '\\'; + break; + case '\'': + c = '\''; + break; + default: + error(p, "invalid escape code."); + return NULL; + } + node->expr.ch = c; + } + else + { + node->expr.ch = *(t->lexeme); + } + return node; + } + else if (match(p, TOKEN_LPAREN)) + { + ast_node *node = parse_expression(p); + if (!match(p, TOKEN_RPAREN)) + { + error(p, "unclosed parenthesis"); + return NULL; + } + + return node; + } + + return NULL; +} + +ast_node *parse_unary(parser *p) +{ + if (match(p, TOKEN_PLUS_PLUS) || match(p, TOKEN_MINUS) || match(p, TOKEN_MINUS_MINUS) || match(p, TOKEN_STAR) || match(p, TOKEN_AND) || match(p, TOKEN_BANG)) + { + unary_op op; + switch (p->previous->type) + { + case TOKEN_PLUS_PLUS: + op = UOP_INCR; + break; + case TOKEN_MINUS: + op = UOP_MINUS; + break; + case TOKEN_MINUS_MINUS: + op = UOP_DECR; + break; + case TOKEN_STAR: + op = UOP_DEREF; + break; + case TOKEN_AND: + op = UOP_REF; + break; + case TOKEN_BANG: + op = UOP_NOT; + break; + default: + goto end; + } + + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_UNARY; + node->position = p->previous->position; + node->expr.unary.operator = op; + node->expr.unary.right = parse_expression(p); + + return node; + } + + /* Type cast. */ + if (match_peek(p, TOKEN_LPAREN) && p->tokens->next && p->tokens->next->type == TOKEN_IDENTIFIER && p->tokens->next->next && p->tokens->next->next->type == TOKEN_RPAREN) + { + advance(p); + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_CAST; + node->position = p->previous->position; + node->expr.cast.type = parse_type(p); + advance(p); + advance(p); + node->expr.cast.value = parse_expression(p); + return node; + } + +end: + return parse_factor(p); +} + +ast_node *parse_term(parser *p) +{ + ast_node *left = parse_unary(p); + + while (match_peek(p, TOKEN_STAR) || match_peek(p, TOKEN_SLASH) || match_peek(p, TOKEN_PERC)) { + binary_op op; + switch (peek(p)->type) { + case TOKEN_STAR: + op = OP_MUL; + break; + case TOKEN_SLASH: + op = OP_DIV; + break; + case TOKEN_PERC: + op = OP_MOD; + break; + default: + continue; + } + advance(p); + ast_node *right = parse_factor(p); + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_BINARY; + node->position = p->previous->position; + node->expr.binary.left = left; + node->expr.binary.right = right; + node->expr.binary.operator = op; + left = node; + } + + return left; +} + +/* + * Following the recursive descent parser algorithm, this + * parses all the arithmetic expressions. + */ +ast_node *parse_expression(parser *p) +{ + ast_node *left = parse_term(p); + + while (match_peek(p, TOKEN_PLUS) || match_peek(p, TOKEN_MINUS)) + { + binary_op op = peek(p)->type == TOKEN_PLUS ? OP_PLUS : OP_MINUS; + advance(p); + ast_node *right = parse_term(p); + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_BINARY; + node->position = p->previous->position; + node->expr.binary.left = left; + node->expr.binary.right = right; + node->expr.binary.operator = op; + left = node; + } + + /* + * If after parsing an expression a `[` character + * is found, it should be an array subscript expression. + */ + if (match_peek(p, TOKEN_LSQUARE)) { + while (match(p, TOKEN_LSQUARE)) { + ast_node *index = parse_expression(p); + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_ARRAY_SUBSCRIPT; + node->position = p->previous->position; + node->expr.subscript.expr = left; + node->expr.subscript.index = index; + + if (!match(p, TOKEN_RSQUARE)) + { + error(p, "expected `]`."); + return NULL; + } + + left = node; + + } + } + + /* + * If after parsing an expression a `.` character + * is found, it should be a member access expression. + */ + if (match_peek(p, TOKEN_DOT) && p->tokens->next && p->tokens->next->type != TOKEN_LCURLY) { + while (match(p, TOKEN_DOT)) { + if (!match_peek(p, TOKEN_IDENTIFIER)) { + error(p, "expected identifier after member access."); + return NULL; + } + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_ACCESS; + node->position = p->previous->position; + node->expr.access.expr = left; + node->expr.access.member = parse_factor(p); + + left = node; + } + } + + /* + * If after parsing an expression a `++` or a `--` + * token is found, it should be a postfix expression. + */ + if (match(p, TOKEN_PLUS_PLUS) || match(p, TOKEN_MINUS_MINUS)) + { + unary_op op; + switch (p->previous->type) + { + case TOKEN_PLUS_PLUS: + op = UOP_INCR; + break; + case TOKEN_MINUS_MINUS: + op = UOP_DECR; + break; + default: + break; + } + + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_POSTFIX; + node->position = p->previous->position; + node->expr.unary.operator = op; + node->expr.unary.right = left; + + return node; + } + + if (match(p, TOKEN_DOT)) { + if (match(p, TOKEN_LCURLY)) { + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_STRUCT_INIT; + node->position = p->previous->position; + + if (match(p, TOKEN_RCURLY)) + { + node->expr.struct_init.members = NULL; + return node; + } + + snapshot arena_start = arena_snapshot(p->allocator); + node->expr.struct_init.members = arena_alloc(p->allocator, sizeof(ast_node)); + node->expr.struct_init.members->type = NODE_UNIT; + node->expr.struct_init.members->expr.unit_node.expr = parse_expression(p); + ast_node *tail = node->expr.struct_init.members; + node->expr.struct_init.members_len = 1; + + /* In this case, there is only one parameter */ + if (match(p, TOKEN_RCURLY)) + { + return node; + } + + if (match(p, TOKEN_COMMA)) + { + ast_node *expr = parse_expression(p); + if (expr) + { + while (!match(p, TOKEN_RCURLY)) + { + if (!match(p, TOKEN_COMMA)) + { + error(p, "expected `}`."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); + tail->expr.unit_node.next->expr.unit_node.expr = expr; + tail = tail->expr.unit_node.next; + tail->type = NODE_UNIT; + expr = parse_expression(p); + if (!expr) + { + error(p, "expected `}`."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + node->expr.struct_init.members_len += 1; + } + + tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); + tail->expr.unit_node.next->expr.unit_node.expr = expr; + tail = tail->expr.unit_node.next; + tail->type = NODE_UNIT; + } + else + { + error(p, "expected member initialization."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + } + else + { + error(p, "expected `}`."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + + return node; + } else { + error(p, "unexpected `.`"); + return NULL; + } + } + + if (p->tokens && ((p->tokens->type >= TOKEN_DOUBLE_EQ && p->tokens->type <= TOKEN_NOT_EQ) || (p->tokens->type >= TOKEN_LSHIFT_EQ && p->tokens->type <= TOKEN_DOUBLE_AND))) + { + binary_op op; + switch (p->tokens->type) + { + case TOKEN_ARROW: + op = OP_ASSIGN_PTR; + break; + case TOKEN_EQ: + op = OP_ASSIGN; + break; + case TOKEN_DOUBLE_EQ: + op = OP_EQ; + break; + case TOKEN_LESS_THAN: + op = OP_LT; + break; + case TOKEN_GREATER_THAN: + op = OP_GT; + break; + case TOKEN_LESS_EQ: + op = OP_LE; + break; + case TOKEN_GREATER_EQ: + op = OP_GE; + break; + case TOKEN_NOT_EQ: + op = OP_NEQ; + break; + case TOKEN_LSHIFT_EQ: + op = OP_LSHIFT_EQ; + break; + case TOKEN_RSHIFT_EQ: + op = OP_RSHIFT_EQ; + break; + case TOKEN_OR: + op = OP_OR; + break; + case TOKEN_DOUBLE_AND: + op = OP_AND; + break; + default: + break; + } + advance(p); + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_BINARY; + node->position = p->previous->position; + node->expr.binary.left = left; + node->expr.binary.operator = op; + node->expr.binary.right = parse_expression(p); + if (!node->expr.binary.right) { + error(p, "expected expression."); + return NULL; + } + + return node; + } + + return left; +} + +static ast_node *parse_compound(parser *p) +{ + if (!match(p, TOKEN_LCURLY)) { + error(p, "expected `{`."); + return NULL; + } + + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_UNIT; + node->position = p->previous->position; + + if (match(p, TOKEN_RCURLY)) + { + node->expr.unit_node.expr = NULL; + node->expr.unit_node.next = NULL; + return node; + } + + snapshot arena_start = arena_snapshot(p->allocator); + node->expr.unit_node.expr = parse_statement(p); + ast_node *tail = node; + + /* In this case, there is only one parameter */ + if (match(p, TOKEN_RCURLY)) + { + return node; + } + + ast_node *expr = parse_statement(p); + if (expr) + { + while (!match(p, TOKEN_RCURLY)) + { + tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); + tail->expr.unit_node.next->expr.unit_node.expr = expr; + tail = tail->expr.unit_node.next; + tail->type = NODE_UNIT; + expr = parse_statement(p); + if (!expr) + { + error(p, "expected `}`."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + } + + tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); + tail->expr.unit_node.next->expr.unit_node.expr = expr; + tail = tail->expr.unit_node.next; + tail->type = NODE_UNIT; + } + + return node; +} + +static ast_node *parse_for(parser *p) +{ + advance(p); + ast_node* node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_FOR; + node->position = p->previous->position; + + snapshot arena_start = arena_snapshot(p->allocator); + node->expr.fr.slices = arena_alloc(p->allocator, sizeof(ast_node)); + node->expr.fr.slices->type = NODE_UNIT; + node->expr.fr.slices->expr.unit_node.expr = parse_expression(p); + ast_node *tail = node->expr.fr.slices; + node->expr.fr.slice_len = 1; + + /* In this case, there is only one slice. */ + if (match(p, TOKEN_RPAREN)) + { + goto parse_captures; + } + + if (match(p, TOKEN_COMMA)) + { + ast_node *expr = parse_expression(p); + if (expr) + { + while (!match(p, TOKEN_RPAREN)) + { + if (!match(p, TOKEN_COMMA)) + { + error(p, "expected `)`."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); + tail->expr.unit_node.next->expr.unit_node.expr = expr; + tail = tail->expr.unit_node.next; + tail->type = NODE_UNIT; + expr = parse_expression(p); + if (!expr) + { + error(p, "expected `)`."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + } + + tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); + tail->expr.unit_node.next->expr.unit_node.expr = expr; + tail = tail->expr.unit_node.next; + node->expr.fr.slice_len += 1; + tail->type = NODE_UNIT; + } + else + { + error(p, "expected expression."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + } + else + { + error(p, "expected `)`."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + +parse_captures: + + if (!match(p, TOKEN_PIPE)) { + error(p, "expected capture."); + return NULL; + } + + arena_start = arena_snapshot(p->allocator); + node->expr.fr.captures = arena_alloc(p->allocator, sizeof(ast_node)); + node->expr.fr.captures->type = NODE_UNIT; + node->expr.fr.captures->expr.unit_node.expr = parse_factor(p); + if (node->expr.fr.captures->expr.unit_node.expr && node->expr.fr.captures->expr.unit_node.expr->type != NODE_IDENTIFIER) { + error(p, "captures must be identifiers."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + tail = node->expr.fr.captures; + node->expr.fr.capture_len = 1; + + /* In this case, there is only one capture */ + if (match(p, TOKEN_PIPE)) { + goto parse_body; + } + + if (match(p, TOKEN_COMMA)) { + ast_node *expr = parse_expression(p); + if (expr) { + while (!match(p, TOKEN_PIPE)) { + if (!match(p, TOKEN_COMMA)) { + error(p, "expected `)`."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); + tail->expr.unit_node.next->expr.unit_node.expr = expr; + tail = tail->expr.unit_node.next; + tail->type = NODE_UNIT; + expr = parse_factor(p); + if (!expr) { + error(p, "expected `|`."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + } + + tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); + tail->expr.unit_node.next->expr.unit_node.expr = expr; + tail = tail->expr.unit_node.next; + tail->type = NODE_UNIT; + node->expr.fr.capture_len += 1; + } else { + error(p, "expected identifier."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + } else { + error(p, "expected `|`."); + arena_reset_to_snapshot(p->allocator, arena_start); + return NULL; + } + +parse_body:; + if (node->expr.fr.capture_len != node->expr.fr.slice_len) { + error(p, "invalid number of captures."); + return NULL; + } + + ast_node* body = parse_compound(p); + node->expr.fr.body = body; + return node; +} + +static ast_node *parse_while(parser *p) +{ + u8 flags = 0x0; + + if (match(p, TOKEN_WHILE)) { + flags |= LOOP_WHILE; + } else if (match(p, TOKEN_UNTIL)) { + flags |= LOOP_UNTIL; + } else if (!match_peek(p, TOKEN_LCURLY)) { + error(p, "expected `while`, `until` or `{`."); + return NULL; + } + ast_node *condition = parse_expression(p); + if (!condition) { + flags |= LOOP_AFTER; + } + ast_node *body = parse_compound(p); + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_WHILE; + node->position = p->previous->position; + node->expr.whle.body = body; + + if (flags & LOOP_AFTER) { + if (match(p, TOKEN_WHILE)) { + flags |= LOOP_WHILE; + condition = parse_expression(p); + } else if (match(p, TOKEN_UNTIL)) { + flags |= LOOP_UNTIL; + condition = parse_expression(p); + } else { + node->expr.whle.condition = NULL; + } + } + + node->expr.whle.condition = condition; + + return node; +} + +static ast_node *parse_if(parser *p) +{ + ast_node *condition = parse_expression(p); + ast_node *body = parse_compound(p); + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_IF; + node->position = p->previous->position; + node->expr.if_stmt.body = body; + node->expr.if_stmt.condition = condition; + if (match(p, TOKEN_ELSE)) { + body = parse_compound(p); + node->expr.if_stmt.otherwise = body; + } + return node; +} + +static ast_node *parse_struct(parser *p); +static ast_node *parse_type(parser *p) +{ + ast_node *type = NULL; + + if (match(p, TOKEN_STRUCT)) { + type = parse_struct(p); + } else if (match(p, TOKEN_UNION)) { + type = parse_struct(p); + type->type = NODE_UNION; + } else if (match(p, TOKEN_LSQUARE)) { + /* Array/slice type */ + type = arena_alloc(p->allocator, sizeof(ast_node)); + type->type = NODE_PTR_TYPE; + if (match(p, TOKEN_CONST)) type->expr.ptr_type.flags |= PTR_CONST; + if (match(p, TOKEN_VOLATILE)) type->expr.ptr_type.flags |= PTR_VOLATILE; + type->expr.ptr_type.flags |= PTR_SLICE; + type->expr.ptr_type.type = parse_type(p); + if (!type->expr.ptr_type.type) { + error(p, "expected type."); + return NULL; + } + if (!match(p, TOKEN_RSQUARE)) { + error(p, "expected `]`."); + return NULL; + } + } else if (match(p, TOKEN_STAR)) { + type = arena_alloc(p->allocator, sizeof(ast_node)); + type->type = NODE_PTR_TYPE; + if (match(p, TOKEN_CONST)) type->expr.ptr_type.flags |= PTR_CONST; + if (match(p, TOKEN_VOLATILE)) type->expr.ptr_type.flags |= PTR_VOLATILE; + type->expr.ptr_type.flags |= PTR_RAW; + type->expr.ptr_type.type = parse_type(p); + if (!type->expr.ptr_type.type) { + error(p, "expected type."); + return NULL; + } + } else if (match_peek(p, TOKEN_IDENTIFIER)) { + type = parse_factor(p); + } + + return type; +} + +static member *parse_member(parser *p) +{ + ast_node *type = parse_type(p); + + if (!match_peek(p, TOKEN_IDENTIFIER)) { + error(p, "expected identifier."); + return NULL; + } + + member *m = arena_alloc(p->allocator, sizeof(member)); + m->type = type; + m->name = peek(p)->lexeme; + m->name_len = peek(p)->lexeme_len; + advance(p); + + + return m; +} + +static variant *parse_variant(parser *p) +{ + if (!match_peek(p, TOKEN_IDENTIFIER)) { + error(p, "expected identifier."); + return NULL; + } + + variant *v = arena_alloc(p->allocator, sizeof(variant)); + v->name = peek(p)->lexeme; + v->name_len = peek(p)->lexeme_len; + advance(p); + + if (match(p, TOKEN_EQ)) { + v->value = parse_factor(p); + if (!v->value) { + error(p, "expected integer."); + return NULL; + } + + if (v->value->type != NODE_INTEGER) { + error(p, "expected integer."); + return NULL; + } + } + + return v; +} + +static ast_node *parse_enum(parser *p) +{ + ast_node *enm = arena_alloc(p->allocator, sizeof(ast_node)); + enm->type = NODE_ENUM; + enm->position = p->previous->position; + if (match_peek(p, TOKEN_IDENTIFIER)) { + /* Named enum */ + enm->expr.enm.name = peek(p)->lexeme; + enm->expr.enm.name_len = peek(p)->lexeme_len; + advance(p); + } else if (!match_peek(p, TOKEN_LCURLY)) { + error(p, "expected identifier or `{`."); + return NULL; + } else { + enm->expr.enm.name = NULL; + enm->expr.enm.name_len = 0; + } + + if (!match(p, TOKEN_LCURLY)) { + error(p, "expected `{`."); + return NULL; + } + + variant *prev = parse_variant(p); + variant *head = prev; + enm->expr.enm.variants = head; + if (!prev) { + error(p, "invalid enum definition. Enums should contain at least 1 variant."); + return NULL; + } + if (!match(p, TOKEN_COMMA)) { + if (!match(p, TOKEN_RCURLY)) { + error(p, "expected `,`."); + return NULL; + } else { + return enm; + } + } + while (!match(p, TOKEN_RCURLY)) { + variant *current = parse_variant(p); + if (!current) { + error(p, "expected variant definition."); + return NULL; + } + prev->next = current; + if (!match(p, TOKEN_COMMA)) { + if (!match_peek(p, TOKEN_RCURLY)) { + error(p, "expected `,`."); + return NULL; + } + } + + prev = current; + } + + return enm; +} + +static ast_node *parse_struct(parser *p) +{ + ast_node *structure = arena_alloc(p->allocator, sizeof(ast_node)); + structure->type = NODE_STRUCT; + structure->position = p->previous->position; + if (match_peek(p, TOKEN_IDENTIFIER)) { + /* Named structure */ + structure->expr.structure.name = peek(p)->lexeme; + structure->expr.structure.name_len = peek(p)->lexeme_len; + advance(p); + } else if (!match_peek(p, TOKEN_LCURLY)) { + error(p, "expected identifier or `{`."); + return NULL; + } else { + structure->expr.structure.name = NULL; + structure->expr.structure.name_len = 0; + } + + if (!match(p, TOKEN_LCURLY)) { + error(p, "expected `{`."); + return NULL; + } + + member *prev = parse_member(p); + member *head = prev; + structure->expr.structure.members = head; + if (!prev) { + error(p, "invalid struct definition. Structs should contain at least 1 member."); + return NULL; + } + if (!match(p, TOKEN_COMMA)) { + if (!match(p, TOKEN_RCURLY)) { + error(p, "expected `,`."); + return NULL; + } else { + return structure; + } + } + while (!match(p, TOKEN_RCURLY)) { + member *current = parse_member(p); + if (!current) { + error(p, "expected member definition."); + return NULL; + } + prev->next = current; + if (!match(p, TOKEN_COMMA)) { + if (!match_peek(p, TOKEN_RCURLY)) { + error(p, "expected `,`."); + return NULL; + } + } + + prev = current; + } + + return structure; +} + +static ast_node *parse_function(parser *p) +{ + ast_node *fn = arena_alloc(p->allocator, sizeof(ast_node)); + fn->type = NODE_FUNCTION; + fn->expr.function.is_extern = false; + fn->expr.function.type = parse_type(p); + fn->expr.function.name = peek(p)->lexeme; + fn->expr.function.name_len = peek(p)->lexeme_len; + advance(p); + /* Consume `(` */ + advance(p); + + if (match(p, TOKEN_RPAREN)) { + // Check if this is an extern declaration (semicolon) or definition (body) + if (match_peek(p, TOKEN_SEMICOLON)) { + // Extern function - no body, just consume semicolon + advance(p); + fn->expr.function.body = NULL; + } else { + fn->expr.function.body = parse_compound(p); + } + fn->expr.function.parameters = NULL; + fn->expr.function.parameters_len = 0; + return fn; + } + member *prev = parse_member(p); + member *head = prev; + fn->expr.function.parameters = head; + fn->expr.function.parameters_len = 1; + if (!match(p, TOKEN_COMMA)) { + if (!match(p, TOKEN_RPAREN)) { + error(p, "expected `,`."); + return NULL; + } else { + // Check if this is an extern declaration (semicolon) or definition (body) + if (match_peek(p, TOKEN_SEMICOLON)) { + advance(p); + fn->expr.function.body = NULL; + } else { + fn->expr.function.body = parse_compound(p); + } + return fn; + } + } + while (!match(p, TOKEN_RPAREN)) { + member *current = parse_member(p); + if (!current) { + error(p, "expected parameter."); + return NULL; + } + prev->next = current; + if (!match(p, TOKEN_COMMA)) { + if (!match_peek(p, TOKEN_RPAREN)) { + error(p, "expected `,`."); + return NULL; + } + } + fn->expr.function.parameters_len += 1; + + prev = current; + } + + // Check if this is an extern declaration (semicolon) or definition (body) + if (match_peek(p, TOKEN_SEMICOLON)) { + advance(p); + fn->expr.function.body = NULL; + } else { + fn->expr.function.body = parse_compound(p); + } + + return fn; +} + +static ast_node *parse_statement(parser *p) +{ + token *cur = peek(p); + + /* Check for extern function declaration */ + bool is_extern = false; + if (match(p, TOKEN_EXTERN)) { + is_extern = true; + } + + ast_node *type = parse_type(p); + if (type && type->type == NODE_STRUCT && type->expr.structure.name_len > 0) { + goto skip_struct; + } + if (type && match_peek(p, TOKEN_IDENTIFIER)) { + if (p->tokens->next && p->tokens->next->type == TOKEN_LPAREN) { + /* Function definition. */ + p->tokens = cur; + if (is_extern) { + advance(p); // Skip TOKEN_EXTERN + } + ast_node *fn = parse_function(p); + if (fn && is_extern) { + fn->expr.function.is_extern = true; + fn->expr.function.body = NULL; + } + return fn; + } + p->tokens = cur; + if (is_extern) { + advance(p); // Skip TOKEN_EXTERN for non-function case + } + /* Variable declaration. */ + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_VAR_DECL; + node->position = p->previous->position; + node->expr.var_decl.type = parse_type(p); + node->expr.var_decl.name = p->tokens->lexeme; + node->expr.var_decl.name_len = p->tokens->lexeme_len; + advance(p); + if (match(p, TOKEN_EQ)) { + node->expr.var_decl.value = parse_expression(p); + } else { + node->expr.var_decl.value = NULL; + } + + if (!match(p, TOKEN_SEMICOLON)) { + error(p, "expected `;` after statement."); + return NULL; + } + + return node; + } +skip_struct: + p->tokens = cur; + + if (match(p, TOKEN_BREAK)) + { + if (!match(p, TOKEN_SEMICOLON)) + { + error(p, "expected `;` after `break`."); + return NULL; + } + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_BREAK; + node->position = p->previous->position; + return node; + } + else if (match(p, TOKEN_RETURN)) + { + ast_node *expr = parse_expression(p); + + if (!expr) + { + error(p, "expected expression after `return`."); + return NULL; + } + if (!match(p, TOKEN_SEMICOLON)) + { + error(p, "expected `;`."); + return NULL; + } + + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_RETURN; + node->position = p->previous->position; + node->expr.ret.value = expr; + return node; + } + else if (match_peek(p, TOKEN_IDENTIFIER) && p->tokens->next && p->tokens->next->type == TOKEN_COLON) + { + /* In this case, this is a label. */ + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_LABEL; + node->position = p->previous->position; + node->expr.label.name = p->tokens->lexeme; + node->expr.label.name_len = p->tokens->lexeme_len; + advance(p); + /* Consume `:` */ + advance(p); + return node; + } + else if (match(p, TOKEN_GOTO)) + { + if (!match_peek(p, TOKEN_IDENTIFIER)) + { + error(p, "expected label identifier after `goto`."); + return NULL; + } + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_GOTO; + node->position = p->previous->position; + node->expr.label.name = p->tokens->lexeme; + node->expr.label.name_len = p->tokens->lexeme_len; + advance(p); + if (!match(p, TOKEN_SEMICOLON)) + { + error(p, "expected `;` after `goto`."); + return NULL; + } + return node; + } + else if (match(p, TOKEN_IMPORT)) + { + ast_node *expr = parse_expression(p); + if (!expr) + { + error(p, "expected module path after `import`."); + return NULL; + } + if (expr->type != NODE_ACCESS && expr->type != NODE_IDENTIFIER) + { + error(p, "expected module path after `import`."); + return NULL; + } + + ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); + node->type = NODE_IMPORT; + node->position = p->previous->position; + node->expr.import.path = expr; + + if (!match(p, TOKEN_SEMICOLON)) + { + error(p, "expected `;` after `import`."); + return NULL; + } + + return node; + } + else if (match(p, TOKEN_LOOP)) + { + if (p->tokens->type == TOKEN_LPAREN) + { + return parse_for(p); + } + else + { + return parse_while(p); + } + } + else if (match(p, TOKEN_IF)) { + return parse_if(p); + } + else if (match(p, TOKEN_STRUCT)) + { + return parse_struct(p); + } + else if (match(p, TOKEN_ENUM)) + { + return parse_enum(p); + } + else if (match(p, TOKEN_UNION)) + { + ast_node *u = parse_struct(p); + u->type = NODE_UNION; + return u; + } + else + { + ast_node *expr = parse_expression(p); + if (!expr) + { + return NULL; + } + if (!match(p, TOKEN_SEMICOLON)) + { + error(p, "expected `;` after expression."); + return NULL; + } + return expr; + } +} + +/* Get a list of expressions to form a full AST. */ +static void parse(parser *p) +{ + p->ast = arena_alloc(p->allocator, sizeof(ast_node)); + p->ast->type = NODE_UNIT; + p->ast->expr.unit_node.expr = parse_statement(p); + ast_node *tail = p->ast; + ast_node *expr = parse_statement(p); + while (expr) { + tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); + tail->expr.unit_node.next->expr.unit_node.expr = expr; + tail = tail->expr.unit_node.next; + tail->type = NODE_UNIT; + expr = parse_statement(p); + } +} + +parser *parser_init(lexer *l, arena *allocator) +{ + parser *p = arena_alloc(allocator, sizeof(parser)); + p->tokens = l->tokens; + p->allocator= allocator; + + parse(p); + + if (has_errors) { + printf("Compilation failed.\n"); + exit(1); + } + + return p; +} diff --git a/parser.h b/parser.h new file mode 100644 index 0000000..bbe3d0c --- /dev/null +++ b/parser.h @@ -0,0 +1,256 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "lexer.h" +#include "utils.h" +#include + +struct _type; +struct _ast_node; + +typedef enum { + OP_PLUS, // + + OP_MINUS, // - + OP_DIV, // / + OP_MUL, // * + OP_MOD, // % + OP_BOR, // | + OP_BAND, // & + OP_BXOR, // ^ + + OP_ASSIGN, // = + OP_ASSIGN_PTR, // <- + OP_RSHIFT_EQ, // >>= + OP_LSHIFT_EQ, // <<= + OP_PLUS_EQ, // += + OP_MINUS_EQ, // -= + OP_DIV_EQ, // /= + OP_MUL_EQ, // *= + OP_BOR_EQ, // |= + OP_BAND_EQ, // &= + OP_BXOR_EQ, // ^= + OP_MOD_EQ, // %= + + OP_EQ, // == + OP_AND, // && + OP_OR, // || + OP_NEQ, // != + OP_GT, // > + OP_LT, // < + OP_GE, // >= + OP_LE, // <= +} binary_op; + +typedef enum { + UOP_INCR, // ++ + UOP_MINUS, // - + UOP_DECR, // -- + UOP_DEREF, // * + UOP_REF, // & + UOP_NOT, // ! +} unary_op; + +typedef enum { + LAYOUT_AUTO, + LAYOUT_PACKED, + LAYOUT_EXTERN +} struct_layout; + +typedef struct _member { + struct _ast_node *type; + char *name; + usize name_len; + struct _member *next; + usize offset; +} member; + +typedef struct { + char *name; + usize name_len; + member *params; +} function; + +typedef struct _variant { + struct _ast_node *value; + char *name; + usize name_len; + struct _variant *next; +} variant; + +typedef enum { + NODE_IDENTIFIER, + NODE_INTEGER, + NODE_FLOAT, + NODE_STRING, + NODE_CHAR, + NODE_BOOL, + NODE_CAST, + NODE_UNARY, + NODE_BINARY, + NODE_RANGE, + NODE_ARRAY_SUBSCRIPT, + NODE_POSTFIX, + NODE_CALL, + NODE_ACCESS, + NODE_STRUCT_INIT, + NODE_TERNARY, /* TODO */ + + NODE_BREAK, + NODE_RETURN, + NODE_IMPORT, + NODE_FOR, + NODE_WHILE, + NODE_IF, + NODE_VAR_DECL, + NODE_LABEL, + NODE_GOTO, + + NODE_ENUM, + NODE_STRUCT, + NODE_UNION, + NODE_FUNCTION, + NODE_PTR_TYPE, + NODE_SWITCH, /* TODO */ + NODE_UNIT, +} node_type; + +#define PTR_SLICE 0x0 +#define PTR_RAW 0x1 +#define PTR_CONST 0x2 +#define PTR_VOLATILE 0x4 + +#define LOOP_WHILE 0x1 +#define LOOP_UNTIL 0x2 +#define LOOP_AFTER 0x4 + +typedef struct _ast_node { + node_type type; + source_pos position; + struct _type *expr_type; + bool address_taken; // used in IR generation. + union { + struct { + struct _ast_node *type; + u8 flags; + } ptr_type; + struct { + char *name; + usize name_len; + } label; // both label and goto + struct { + struct _ast_node *left; + struct _ast_node *right; + binary_op operator; + } binary; + struct { + struct _ast_node *right; + unary_op operator; + } unary; + u8 boolean; + i64 integer; + f64 flt; // float + struct { + char *start; + usize len; + } string; + char ch; // char; + struct { + struct _ast_node *condition; + struct _ast_node *then; + struct _ast_node *otherwise; + } ternary; + struct { + struct _ast_node *value; + struct _ast_node *type; + } cast; + struct { + struct _ast_node *expr; + struct _ast_node *index; + } subscript; + struct { + struct _ast_node *expr; + struct _ast_node *member; + } access; + struct { + struct _ast_node *expr; + struct _ast_node *next; + } unit_node; + struct { + /* This should be a list of unit_node */ + struct _ast_node *parameters; + usize param_len; + char *name; + usize name_len; + } call; + struct { + struct _ast_node *value; + } ret; + struct { + /* This should be an access. */ + struct _ast_node *path; + } import; + struct { + /* These should be lists of unit_node */ + struct _ast_node *slices; + usize slice_len; + struct _ast_node *captures; + usize capture_len; + struct _ast_node* body; + } fr; // for + struct { + struct _ast_node *condition; + struct _ast_node *body; + u8 flags; + } whle; // while + struct { + struct _ast_node *condition; + struct _ast_node *body; + struct _ast_node *otherwise; + u8 flags; + } if_stmt; // while + struct { + struct _ast_node **statements; + usize stmt_len; + } compound; + struct { + struct _ast_node *value; + char *name; + usize name_len; + struct _ast_node *type; + } var_decl; + struct { + member *members; + char *name; + usize name_len; + } structure; + struct { + member *parameters; + usize parameters_len; + char *name; + usize name_len; + struct _ast_node *type; + struct _ast_node *body; + bool is_extern; + } function; + struct { + variant *variants; + char *name; + usize name_len; + } enm; // enum + struct { + struct _ast_node *members; + usize members_len; + } struct_init; + } expr; +} ast_node; + +typedef struct { + token *tokens; + token *previous; + ast_node *ast; + arena *allocator; +} parser; + +parser *parser_init(lexer *l, arena *allocator); + +#endif diff --git a/sema.c b/sema.c new file mode 100644 index 0000000..4e87625 --- /dev/null +++ b/sema.c @@ -0,0 +1,920 @@ +#define STB_DS_IMPLEMENTATION +#include "sema.h" +#include +#include + +typedef struct _res_node { + struct _res_node **in; + struct _res_node **out; + type *value; +} res_node; + +typedef struct { res_node node; bool complete; } pair; + +typedef struct { u8 flags; char *name; } type_key; + +static struct { char *key; pair *value; } *types; +static struct { char *key; type *value; } *type_reg; + +static struct { char *key; prototype *value; } *prototypes; + +static scope *global_scope = NULL; +static scope *current_scope = NULL; +static type *current_return = NULL; + +static type *const_int = NULL; +static type *const_float = NULL; + +static bool in_loop = false; +static bool has_errors = false; + +static void error(ast_node *n, char *msg) +{ + has_errors = true; + if (n) { + printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:%ld:%ld:\x1b[0m %s\n", n->position.row, n->position.column, msg); + } else { + printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:\x1b[0m %s\n", msg); + } +} + +static char *intern_string(sema *s, char *str, usize len) +{ + (void) s; + char *ptr = malloc(len + 1); + memcpy(ptr, str, len); + ptr[len] = '\0'; + return ptr; +} + +static type *create_integer(sema *s, char *name, u8 bits, bool sign) +{ + type *t = arena_alloc(s->allocator, sizeof(type)); + t->name = name; + t->tag = sign ? TYPE_INTEGER : TYPE_UINTEGER; + t->data.integer = bits; + + pair *graph_node = arena_alloc(s->allocator, sizeof(pair)); + graph_node->node.value = t; + graph_node->node.in = NULL; + graph_node->node.out = NULL; + + shput(types, name, graph_node); + return t; +} + +static type *create_float(sema *s, char *name, u8 bits) +{ + type *t = arena_alloc(s->allocator, sizeof(type)); + t->name = name; + t->tag = TYPE_FLOAT; + t->data.flt = bits; + + pair *graph_node = arena_alloc(s->allocator, sizeof(pair)); + graph_node->node.value = t; + graph_node->node.in = NULL; + graph_node->node.out = NULL; + + shput(types, name, graph_node); + return t; +} + +static void order_type(sema *s, ast_node *node) +{ + if (node->type == NODE_STRUCT || node->type == NODE_UNION) { + type *t = arena_alloc(s->allocator, sizeof(type)); + t->tag = node->type == NODE_STRUCT ? TYPE_STRUCT : TYPE_UNION; + t->data.structure.name = node->expr.structure.name; + t->data.structure.name_len = node->expr.structure.name_len; + t->data.structure.members = node->expr.structure.members; + + char *k = intern_string(s, node->expr.structure.name, node->expr.structure.name_len); + t->name = k; + pair *graph_node = shget(types, k); + + if (!graph_node) { + graph_node = arena_alloc(s->allocator, sizeof(pair)); + graph_node->node.in = NULL; + graph_node->node.out = NULL; + } else if (graph_node->complete) { + error(node, "type already defined."); + return; + } + graph_node->node.value = t; + + member *m = t->data.structure.members; + while (m) { + if (m->type->type != NODE_IDENTIFIER) { + m = m->next; + continue; + } + char *name = intern_string(s, m->type->expr.string.start, m->type->expr.string.len); + pair *p = shget(types, name); + if (!p) { + p = arena_alloc(s->allocator, sizeof(pair)); + p->node.out = NULL; + p->node.in = NULL; + p->node.value = NULL; + p->complete = false; + shput(types, name, p); + } + + arrput(graph_node->node.in, &p->node); + arrput(p->node.out, &graph_node->node); + + m = m->next; + } + + shput(types, k, graph_node); + graph_node->complete = true; + } +} + +static type *get_type(sema *s, ast_node *n) +{ + char *name = NULL; + type *t = NULL; + switch (n->type) { + case NODE_ACCESS: + t = get_type(s, n->expr.access.expr); + name = intern_string(s, n->expr.access.member->expr.string.start, n->expr.access.member->expr.string.len); + if (t->tag != TYPE_STRUCT) { + error(n->expr.access.expr, "expected structure."); + return NULL; + } + t = shget(t->data.structure.member_types, name); + + return t; + case NODE_IDENTIFIER: + name = intern_string(s, n->expr.string.start, n->expr.string.len); + t = shget(type_reg, name); + free(name); + return t; + case NODE_PTR_TYPE: + t = malloc(sizeof(type)); + t->alignment = sizeof(usize); + if (n->expr.ptr_type.flags & PTR_RAW) { + t->name = "ptr"; + t->tag = TYPE_PTR; + t->size = sizeof(usize); + t->data.ptr.child = get_type(s, n->expr.ptr_type.type); + t->data.ptr.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0; + t->data.ptr.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0; + } else { + t->name = "slice"; + t->tag = TYPE_SLICE; + t->size = sizeof(usize) * 2; // ptr + len = 16 bytes + t->data.slice.child = get_type(s, n->expr.ptr_type.type); + t->data.slice.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0; + t->data.slice.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0; + } + return t; + default: + error(n, "expected type."); + return NULL; + } +} + +static void register_struct(sema *s, char *name, type *t) +{ + usize alignment = 0; + member *m = t->data.structure.members; + + usize offset = 0; + type *m_type = NULL; + while (m) { + m_type = get_type(s, m->type); + + if (!m_type) { + error(m->type, "unknown type."); + return; + } + + char *n = intern_string(s, m->name, m->name_len); + shput(t->data.structure.member_types, n, m_type); + + if (m_type->size == 0) { + error(m->type, "a struct member can't be of type `void`."); + return; + } + + if (alignment < m_type->alignment) { + alignment = m_type->alignment; + } + + usize padding = (m_type->alignment - (offset % m_type->alignment)) % m_type->alignment; + offset += padding; + m->offset = offset; + offset += m_type->size; + + m = m->next; + } + + t->alignment = alignment; + + if (t->alignment > 0) { + usize trailing_padding = (t->alignment - (offset % t->alignment)) % t->alignment; + offset += trailing_padding; + } + + t->size = offset; +} + +static void register_union(sema *s, char *name, type *t) +{ + usize alignment = 0; + usize size = 0; + member *m = t->data.structure.members; + while (m) { + type *m_type = get_type(s, m->type); + + if (!m_type) { + error(m->type, "unknown type."); + return; + } + + char *n = intern_string(s, m->name, m->name_len); + shput(t->data.structure.member_types, n, m_type); + + if (alignment < m_type->alignment) { + alignment = m_type->alignment; + } + + if (size < m_type->size) { + size = m_type->size; + } + + m = m->next; + } + + t->alignment = alignment; + t->size = size; +} + +static void register_type(sema *s, char *name, type *t) +{ + switch (t->tag) { + case TYPE_INTEGER: + case TYPE_UINTEGER: + t->size = t->data.integer / 8; + t->alignment = t->data.integer / 8; + break; + case TYPE_PTR: + t->size = 8; + t->alignment = 8; + break; + case TYPE_FLOAT: + t->size = t->data.flt / 8; + t->alignment = t->data.flt / 8; + break; + case TYPE_STRUCT: + register_struct(s, name, t); + break; + case TYPE_UNION: + register_union(s, name, t); + break; + default: + error(NULL, "registering an invalid type."); + return; + } + + shput(type_reg, name, t); +} + +static void create_types(sema *s) +{ + res_node **nodes = NULL; + res_node **ordered = NULL; + usize node_count = shlen(types); + for (int i=0; i < node_count; i++) { + if (arrlen(types[i].value->node.in) == 0) { + arrput(nodes, &types[i].value->node); + } + } + + while (arrlen(nodes) > 0) { + res_node *n = nodes[0]; + arrdel(nodes, 0); + arrput(ordered, n); + while (arrlen(n->out) > 0) { + res_node *dep = n->out[0]; + arrdel(n->out, 0); + + for (int j=0; j < arrlen(dep->in); j++) { + if (dep->in[j] == n) { + arrdel(dep->in, j); + } + } + + if (arrlen(dep->in) == 0) { + arrput(nodes, dep); + } + } + } + + if (arrlen(ordered) < node_count) { + error(NULL, "cycling struct definition."); + } + + for (int i=0; i < arrlen(ordered); i++) { + type *t = ordered[i]->value; + if (t && (t->tag == TYPE_STRUCT || t->tag == TYPE_UNION)) { + char *name = t->name; + register_type(s, name, t); + } + } +} + +static void create_prototype(sema *s, ast_node *node) +{ + prototype *p = arena_alloc(s->allocator, sizeof(prototype)); + p->name = intern_string(s, node->expr.function.name, node->expr.function.name_len); + node->expr.function.name = p->name; + if (shget(prototypes, p->name)) { + error(node, "function already defined."); + } + + member *m = node->expr.function.parameters; + while (m) { + type *t = get_type(s, m->type); + if (!t) { + error(m->type, "unknown type."); + return; + } + + arrput(p->parameters, t); + m = m->next; + } + + p->type = get_type(s, node->expr.function.type); + shput(prototypes, p->name, p); +} + +static void push_scope(sema *s) +{ + scope *scp = arena_alloc(s->allocator, sizeof(scope)); + scp->parent = current_scope; + current_scope = scp; +} + +static void pop_scope(sema *s) +{ + current_scope = current_scope->parent; +} + +static ast_node *get_def(sema *s, char *name) +{ + scope *current = current_scope; + while (current) { + ast_node *def = shget(current->defs, name); + if (def) return def; + + current = current->parent; + } + + return NULL; +} + +static type *get_string_type(sema *s, ast_node *node) +{ + type *string_type = arena_alloc(s->allocator, sizeof(type)); + string_type->tag = TYPE_SLICE; + string_type->size = sizeof(usize) * 2; // ptr + len = 16 bytes + string_type->alignment = sizeof(usize); + string_type->name = "slice"; + string_type->data.slice.child = shget(type_reg, "u8"); + string_type->data.slice.is_const = true; + string_type->data.slice.is_volatile = false; + string_type->data.slice.len = node->expr.string.len; + return string_type; +} + +static type *get_range_type(sema *s, ast_node *node) +{ + type *range_type = arena_alloc(s->allocator, sizeof(type)); + range_type->tag = TYPE_PTR; + range_type->size = sizeof(usize); + range_type->alignment = sizeof(usize); + range_type->name = "slice"; + range_type->data.slice.child = shget(type_reg, "usize"); + range_type->data.slice.is_const = true; + range_type->data.slice.is_volatile = false; + range_type->data.slice.len = node->expr.binary.right->expr.integer - node->expr.binary.left->expr.integer; + return range_type; +} + +static type *get_expression_type(sema *s, ast_node *node); +static type *get_access_type(sema *s, ast_node *node) +{ + type *t = get_expression_type(s, node->expr.access.expr); + ast_node *member = node->expr.access.member; + char *name_start = member->expr.string.start; + usize name_len = member->expr.string.len; + + // Handle slice field access + if (t && t->tag == TYPE_SLICE) { + char *name = intern_string(s, name_start, name_len); + if (strcmp(name, "ptr") == 0) { + // Return pointer to element type + type *ptr_type = arena_alloc(s->allocator, sizeof(type)); + ptr_type->tag = TYPE_PTR; + ptr_type->size = 8; + ptr_type->alignment = 8; + ptr_type->name = "ptr"; + ptr_type->data.ptr.child = t->data.slice.child; + ptr_type->data.ptr.is_const = t->data.slice.is_const; + ptr_type->data.ptr.is_volatile = t->data.slice.is_volatile; + free(name); + return ptr_type; + } else if (strcmp(name, "len") == 0) { + // Return usize type + free(name); + return shget(type_reg, "usize"); + } else { + error(node, "slice doesn't have that field"); + free(name); + return NULL; + } + } + + if (!t || (t->tag != TYPE_STRUCT && t->tag != TYPE_UNION)) { + error(node, "invalid expression."); + return NULL; + } + char *name = intern_string(s, name_start, name_len); + type *res = shget(t->data.structure.member_types, name); + if (!res) { + error(node, "struct doesn't have that member"); + return NULL; + } + + return res; +} + +static type *get_identifier_type(sema *s, ast_node *node) +{ + char *name_start = node->expr.string.start; + usize name_len = node->expr.string.len; + char *name = intern_string(s, name_start, name_len); + node->expr.string.start = name; + ast_node *def = get_def(s, name); + if (!def) { + error(node, "unknown identifier."); + } + return def->expr_type; +} + +static bool match(type *t1, type *t2); + +static bool can_cast(type *source, type *dest) +{ + if (!dest || !source) return false; + + switch (dest->tag) { + case TYPE_INTEGER: + case TYPE_UINTEGER: + case TYPE_INTEGER_CONST: + return source->tag == TYPE_INTEGER_CONST || source->tag == TYPE_INTEGER || source->tag == TYPE_UINTEGER; + case TYPE_FLOAT: + return source->tag == TYPE_FLOAT_CONST; + default: + return false; + } +} + +static type *get_expression_type(sema *s, ast_node *node) +{ + if (!node) { + return shget(type_reg, "void"); + } + + type *t = NULL; + prototype *prot = NULL; + switch (node->type) { + case NODE_IDENTIFIER: + t = get_identifier_type(s, node); + node->expr_type = t; + return t; + case NODE_INTEGER: + node->expr_type = const_int; + return const_int; + case NODE_FLOAT: + node->expr_type = const_float; + return const_float; + case NODE_STRING: + t = get_string_type(s, node); + node->expr_type = t; + return t; + case NODE_CHAR: + t = shget(type_reg, "u8"); + node->expr_type = t; + return t; + case NODE_BOOL: + t = shget(type_reg, "bool"); + node->expr_type = t; + return t; + case NODE_CAST: + t = get_type(s, node->expr.cast.type); + node->expr_type = t; + return t; + case NODE_POSTFIX: + case NODE_UNARY: + t = get_expression_type(s, node->expr.unary.right); + if (node->expr.unary.operator == UOP_REF) { + ast_node *target = node->expr.unary.right; + while (target->type == NODE_ACCESS) { + target = target->expr.access.expr; + } + + if (target->type != NODE_IDENTIFIER) { + error(node, "expected identifier."); + return NULL; + } + + char *name = target->expr.string.start; + ast_node *def = get_def(s, name); + + if (def) { + def->address_taken = true; + target->address_taken = true; + } + + type *tmp = t; + t = arena_alloc(s->allocator, sizeof(type)); + t->tag = TYPE_PTR; + t->size = sizeof(usize); + t->alignment = sizeof(usize); + t->name = "ptr"; + t->data.ptr.is_const = false; + t->data.ptr.is_volatile = false; + t->data.ptr.child = tmp; + } else if (node->expr.unary.operator == UOP_DEREF) { + if (t->tag != TYPE_PTR) { + error(node, "only pointers can be dereferenced."); + return NULL; + } + t = t->data.ptr.child; + } + node->expr_type = t; + return t; + case NODE_BINARY: + t = get_expression_type(s, node->expr.binary.left); + if (!t) return NULL; + if (node->expr.binary.operator == OP_ASSIGN_PTR) { + if (t->tag != TYPE_PTR) { + error(node, "expected pointer."); + return NULL; + } + t = t->data.ptr.child; + } + if (!can_cast(get_expression_type(s, node->expr.binary.right), t) && !match(t, get_expression_type(s, node->expr.binary.right))) { + error(node, "type mismatch."); + node->expr_type = NULL; + return NULL; + } + if (node->expr.binary.operator >= OP_EQ) { + t = shget(type_reg, "bool"); + } else if (node->expr.binary.operator >= OP_ASSIGN && node->expr.binary.operator <= OP_MOD_EQ) { + t = shget(type_reg, "void"); + } + node->expr_type = t; + return t; + case NODE_RANGE: + t = get_range_type(s, node); + node->expr_type = t; + return t; + case NODE_ARRAY_SUBSCRIPT: + t = get_expression_type(s, node->expr.subscript.expr); + + // Check if this is range subscripting (creates a slice) + if (node->expr.subscript.index && node->expr.subscript.index->type == NODE_RANGE) { + type *element_type = NULL; + switch (t->tag) { + case TYPE_SLICE: + element_type = t->data.slice.child; + break; + case TYPE_PTR: + element_type = t->data.ptr.child; + break; + default: + error(node, "only pointers and slices can be indexed."); + return NULL; + } + + // Return a slice type + type *slice_type = arena_alloc(s->allocator, sizeof(type)); + slice_type->tag = TYPE_SLICE; + slice_type->size = sizeof(usize) * 2; + slice_type->alignment = sizeof(usize); + slice_type->data.slice.child = element_type; + slice_type->data.slice.is_const = false; + slice_type->data.slice.len = 0; + + node->expr_type = slice_type; + return slice_type; + } + + // Regular subscript - return element type + switch (t->tag) { + case TYPE_SLICE: + t = t->data.slice.child; + break; + case TYPE_PTR: + t = t->data.ptr.child; + break; + default: + error(node, "only pointers and slices can be indexed."); + return NULL; + } + node->expr_type = t; + return t; + case NODE_CALL: + node->expr.call.name = intern_string(s, node->expr.call.name, node->expr.call.name_len); + prot = shget(prototypes, node->expr.call.name); + if (!prot) { + error(node, "unknown function."); + return NULL; + } + // Process call arguments + ast_node *arg = node->expr.call.parameters; + while (arg && arg->type == NODE_UNIT) { + if (arg->expr.unit_node.expr) { + get_expression_type(s, arg->expr.unit_node.expr); + } + arg = arg->expr.unit_node.next; + } + t = prot->type; + node->expr_type = t; + return t; + case NODE_ACCESS: + t = get_access_type(s, node); + node->expr_type = t; + return t; + default: + t = shget(type_reg, "void"); + node->expr_type = t; + return t; + } +} + +static bool match(type *t1, type *t2) +{ + if (!t1 || !t2) return false; + if (t1->tag != t2->tag) return false; + + switch(t1->tag) { + case TYPE_VOID: + case TYPE_BOOL: + return true; + case TYPE_PTR: + return (t1->data.ptr.is_const == t2->data.ptr.is_const) && (t1->data.ptr.is_volatile == t2->data.ptr.is_volatile) && match(t1->data.ptr.child, t2->data.ptr.child); + case TYPE_SLICE: + return (t1->data.slice.is_const == t2->data.slice.is_const) && (t1->data.slice.is_volatile == t2->data.slice.is_volatile) && match(t1->data.slice.child, t2->data.slice.child) && t1->data.slice.len == t2->data.slice.len; + case TYPE_STRUCT: + case TYPE_UNION: + return t1 == t2; + case TYPE_INTEGER: + case TYPE_UINTEGER: + return t1->data.integer == t2->data.integer; + case TYPE_FLOAT: + return t1->data.flt == t2->data.flt; + case TYPE_ENUM: + case TYPE_GENERIC: + /* TODO */ + return false; + case TYPE_INTEGER_CONST: + case TYPE_FLOAT_CONST: + return false; + } + + return false; +} + +static void check_statement(sema *s, ast_node *node); +static void check_body(sema *s, ast_node *node) +{ + push_scope(s); + + ast_node *current = node; + while (current && current->type == NODE_UNIT) { + check_statement(s, current->expr.unit_node.expr); + current = current->expr.unit_node.next; + } + + pop_scope(s); +} + +static void check_for(sema *s, ast_node *node) +{ + ast_node *slices = node->expr.fr.slices; + ast_node *captures = node->expr.fr.captures; + + push_scope(s); + + ast_node *current_capture = captures; + ast_node *current_slice = slices; + + while (current_capture) { + type *c_type = get_expression_type(s, current_slice->expr.unit_node.expr); + char *c_name = intern_string(s, current_capture->expr.unit_node.expr->expr.string.start, current_capture->expr.unit_node.expr->expr.string.len); + + ast_node *cap_node = arena_alloc(s->allocator, sizeof(ast_node)); + cap_node->type = NODE_VAR_DECL; + cap_node->expr_type = c_type; + cap_node->address_taken = false; + cap_node->expr.var_decl.name = c_name; + + shput(current_scope->defs, c_name, cap_node); + current_capture = current_capture->expr.unit_node.next; + current_slice = current_slice->expr.unit_node.next; + } + + ast_node *current = node->expr.fr.body; + + in_loop = true; + while (current && current->type == NODE_UNIT) { + check_statement(s, current->expr.unit_node.expr); + current = current->expr.unit_node.next; + } + in_loop = false; + + pop_scope(s); +} + +static void check_statement(sema *s, ast_node *node) +{ + if (!node) return; + + type *t = NULL; + char *name = NULL; + switch(node->type) { + case NODE_RETURN: + if (!can_cast(get_expression_type(s, node->expr.ret.value), current_return) && !match(get_expression_type(s, node->expr.ret.value), current_return)) { + error(node, "return type doesn't match function's one."); + } + break; + case NODE_BREAK: + if (!in_loop) { + error(node, "`break` isn't in a loop."); + } + break; + case NODE_WHILE: + if (!match(get_expression_type(s, node->expr.whle.condition), shget(type_reg, "bool"))) { + error(node, "expected boolean value."); + return; + } + + in_loop = true; + check_body(s, node->expr.whle.body); + in_loop = false; + break; + case NODE_IF: + if (!match(get_expression_type(s, node->expr.if_stmt.condition), shget(type_reg, "bool"))) { + error(node, "expected boolean value."); + return; + } + + check_body(s, node->expr.if_stmt.body); + if (node->expr.if_stmt.otherwise) check_body(s, node->expr.if_stmt.otherwise); + break; + case NODE_FOR: + check_for(s, node); + break; + case NODE_VAR_DECL: + t = get_type(s, node->expr.var_decl.type); + node->expr_type = t; + name = intern_string(s, node->expr.var_decl.name, node->expr.var_decl.name_len); + node->expr.var_decl.name = name; + if (get_def(s, name)) { + error(node, "redeclaration of variable."); + break; + } + if (t->tag == TYPE_STRUCT) { + // Struct initialization with NODE_STRUCT_INIT is allowed + } else if (node->expr.var_decl.value && node->expr.var_decl.value->type == NODE_STRUCT_INIT && + (t->tag == TYPE_SLICE || t->tag == TYPE_PTR)) { + // Array/slice initialization with NODE_STRUCT_INIT is allowed + } else if (node->expr.var_decl.value && node->expr.var_decl.value->type == NODE_RANGE && + t->tag == TYPE_SLICE) { + // Range initialization for slices is allowed + get_expression_type(s, node->expr.var_decl.value); + } else if (node->expr.var_decl.value && node->expr.var_decl.value->type == NODE_STRING && + t->tag == TYPE_SLICE) { + // String literal can be assigned to slice + get_expression_type(s, node->expr.var_decl.value); + } else if (!can_cast(get_expression_type(s, node->expr.var_decl.value), t) && !match(t, get_expression_type(s, node->expr.var_decl.value))) { + error(node, "type mismatch (decl)."); + } + shput(current_scope->defs, name, node); + break; + default: + get_expression_type(s, node); + break; + } +} + +static void check_function(sema *s, ast_node *f) +{ + push_scope(s); + current_return = get_type(s, f->expr.function.type); + + member *param = f->expr.function.parameters; + while (param) { + type *p_type = get_type(s, param->type); + char *t_name = intern_string(s, param->name, param->name_len); + param->name = t_name; + ast_node *param_node = arena_alloc(s->allocator, sizeof(ast_node)); + param_node->type = NODE_VAR_DECL; + param_node->expr_type = p_type; + param_node->address_taken = false; + param_node->expr.var_decl.name = t_name; + + shput(current_scope->defs, t_name, param_node); + param = param->next; + } + + // Skip body checking for extern functions + if (!f->expr.function.is_extern && f->expr.function.body) { + ast_node *current = f->expr.function.body; + while (current && current->type == NODE_UNIT) { + check_statement(s, current->expr.unit_node.expr); + current = current->expr.unit_node.next; + } + } + + pop_scope(s); +} + +static void analyze_unit(sema *s, ast_node *node) +{ + ast_node *current = node; + while (current && current->type == NODE_UNIT) { + if (current->expr.unit_node.expr) + order_type(s, current->expr.unit_node.expr); + current = current->expr.unit_node.next; + } + + create_types(s); + + current = node; + while (current && current->type == NODE_UNIT) { + if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) { + create_prototype(s, current->expr.unit_node.expr); + } + current = current->expr.unit_node.next; + } + + current = node; + while (current && current->type == NODE_UNIT) { + if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) { + check_function(s, current->expr.unit_node.expr); + } else { + check_statement(s, current->expr.unit_node.expr); + } + current = current->expr.unit_node.next; + } +} + +void sema_init(parser *p, arena *a) +{ + sema *s = arena_alloc(a, sizeof(sema)); + s->allocator = a; + types = NULL; + s->ast = p->ast; + + global_scope = arena_alloc(a, sizeof(scope)); + global_scope->parent = NULL; + global_scope->defs = NULL; + current_scope = global_scope; + + register_type(s, "void", create_integer(s, "void", 0, false)); + register_type(s, "bool", create_integer(s, "bool", 8, false)); + register_type(s, "u8", create_integer(s, "u8", 8, false)); + register_type(s, "u16", create_integer(s, "u16", 16, false)); + register_type(s, "u32", create_integer(s, "u32", 32, false)); + register_type(s, "u64", create_integer(s, "u64", 64, false)); + register_type(s, "usize", create_integer(s, "usize", 64, false)); + register_type(s, "i8", create_integer(s, "i8", 8, true)); + register_type(s, "i16", create_integer(s, "i16", 16, true)); + register_type(s, "i32", create_integer(s, "i32", 32, true)); + register_type(s, "i64", create_integer(s, "i64", 64, true)); + register_type(s, "f32", create_float(s, "f32", 32)); + register_type(s, "f64", create_float(s, "f64", 64)); + + const_int = arena_alloc(s->allocator, sizeof(type)); + const_int->name = "const_int"; + const_int->tag = TYPE_INTEGER_CONST; + const_int->data.integer = 0; + + const_float = arena_alloc(s->allocator, sizeof(type)); + const_float->name = "const_float"; + const_float->tag = TYPE_FLOAT_CONST; + const_float->data.flt = 0; + + analyze_unit(s, s->ast); + + if (has_errors) { + printf("Compilation failed.\n"); + exit(1); + } +} diff --git a/sema.h b/sema.h new file mode 100644 index 0000000..4813675 --- /dev/null +++ b/sema.h @@ -0,0 +1,76 @@ +#ifndef SEMA_H +#define SEMA_H + +#include +#include "parser.h" +#include "stb_ds.h" +#include "utils.h" + +typedef enum { + TYPE_VOID, + TYPE_BOOL, + TYPE_PTR, + TYPE_SLICE, + TYPE_FLOAT, + TYPE_FLOAT_CONST, + TYPE_INTEGER, + TYPE_INTEGER_CONST, + TYPE_UINTEGER, + TYPE_STRUCT, + TYPE_UNION, + TYPE_ENUM, /* TODO */ + TYPE_GENERIC, /* TODO */ +} type_tag; + +typedef struct _type { + type_tag tag; + usize size; + usize alignment; + char *name; + union { + u8 integer; + u8 flt; // float + struct { + bool is_const; + bool is_volatile; + struct _type *child; + } ptr; + struct { + usize len; + bool is_const; + bool is_volatile; + struct _type *child; + } slice; + struct { + char *name; + usize name_len; + member *members; + struct { char *key; struct _type *value; } *member_types; + } structure; + struct { + char *name; + usize name_len; + variant *variants; + } enm; /* TODO */ + } data; +} type; + +typedef struct { + char *name; + type *type; + type **parameters; +} prototype; + +typedef struct _scope { + struct _scope *parent; + struct { char *key; ast_node *value; } *defs; +} scope; + +typedef struct { + arena *allocator; + ast_node *ast; +} sema; + +void sema_init(parser *p, arena *a); + +#endif diff --git a/stb_ds.h b/stb_ds.h new file mode 100644 index 0000000..e84c82d --- /dev/null +++ b/stb_ds.h @@ -0,0 +1,1895 @@ +/* stb_ds.h - v0.67 - public domain data structures - Sean Barrett 2019 + + This is a single-header-file library that provides easy-to-use + dynamic arrays and hash tables for C (also works in C++). + + For a gentle introduction: + http://nothings.org/stb_ds + + To use this library, do this in *one* C or C++ file: + #define STB_DS_IMPLEMENTATION + #include "stb_ds.h" + +TABLE OF CONTENTS + + Table of Contents + Compile-time options + License + Documentation + Notes + Notes - Dynamic arrays + Notes - Hash maps + Credits + +COMPILE-TIME OPTIONS + + #define STBDS_NO_SHORT_NAMES + + This flag needs to be set globally. + + By default stb_ds exposes shorter function names that are not qualified + with the "stbds_" prefix. If these names conflict with the names in your + code, define this flag. + + #define STBDS_SIPHASH_2_4 + + This flag only needs to be set in the file containing #define STB_DS_IMPLEMENTATION. + + By default stb_ds.h hashes using a weaker variant of SipHash and a custom hash for + 4- and 8-byte keys. On 64-bit platforms, you can define the above flag to force + stb_ds.h to use specification-compliant SipHash-2-4 for all keys. Doing so makes + hash table insertion about 20% slower on 4- and 8-byte keys, 5% slower on + 64-byte keys, and 10% slower on 256-byte keys on my test computer. + + #define STBDS_REALLOC(context,ptr,size) better_realloc + #define STBDS_FREE(context,ptr) better_free + + These defines only need to be set in the file containing #define STB_DS_IMPLEMENTATION. + + By default stb_ds uses stdlib realloc() and free() for memory management. You can + substitute your own functions instead by defining these symbols. You must either + define both, or neither. Note that at the moment, 'context' will always be NULL. + @TODO add an array/hash initialization function that takes a memory context pointer. + + #define STBDS_UNIT_TESTS + + Defines a function stbds_unit_tests() that checks the functioning of the data structures. + + Note that on older versions of gcc (e.g. 5.x.x) you may need to build with '-std=c++0x' + (or equivalentally '-std=c++11') when using anonymous structures as seen on the web + page or in STBDS_UNIT_TESTS. + +LICENSE + + Placed in the public domain and also MIT licensed. + See end of file for detailed license information. + +DOCUMENTATION + + Dynamic Arrays + + Non-function interface: + + Declare an empty dynamic array of type T + T* foo = NULL; + + Access the i'th item of a dynamic array 'foo' of type T, T* foo: + foo[i] + + Functions (actually macros) + + arrfree: + void arrfree(T*); + Frees the array. + + arrlen: + ptrdiff_t arrlen(T*); + Returns the number of elements in the array. + + arrlenu: + size_t arrlenu(T*); + Returns the number of elements in the array as an unsigned type. + + arrpop: + T arrpop(T* a) + Removes the final element of the array and returns it. + + arrput: + T arrput(T* a, T b); + Appends the item b to the end of array a. Returns b. + + arrins: + T arrins(T* a, int p, T b); + Inserts the item b into the middle of array a, into a[p], + moving the rest of the array over. Returns b. + + arrinsn: + void arrinsn(T* a, int p, int n); + Inserts n uninitialized items into array a starting at a[p], + moving the rest of the array over. + + arraddnptr: + T* arraddnptr(T* a, int n) + Appends n uninitialized items onto array at the end. + Returns a pointer to the first uninitialized item added. + + arraddnindex: + size_t arraddnindex(T* a, int n) + Appends n uninitialized items onto array at the end. + Returns the index of the first uninitialized item added. + + arrdel: + void arrdel(T* a, int p); + Deletes the element at a[p], moving the rest of the array over. + + arrdeln: + void arrdeln(T* a, int p, int n); + Deletes n elements starting at a[p], moving the rest of the array over. + + arrdelswap: + void arrdelswap(T* a, int p); + Deletes the element at a[p], replacing it with the element from + the end of the array. O(1) performance. + + arrsetlen: + void arrsetlen(T* a, int n); + Changes the length of the array to n. Allocates uninitialized + slots at the end if necessary. + + arrsetcap: + size_t arrsetcap(T* a, int n); + Sets the length of allocated storage to at least n. It will not + change the length of the array. + + arrcap: + size_t arrcap(T* a); + Returns the number of total elements the array can contain without + needing to be reallocated. + + Hash maps & String hash maps + + Given T is a structure type: struct { TK key; TV value; }. Note that some + functions do not require TV value and can have other fields. For string + hash maps, TK must be 'char *'. + + Special interface: + + stbds_rand_seed: + void stbds_rand_seed(size_t seed); + For security against adversarially chosen data, you should seed the + library with a strong random number. Or at least seed it with time(). + + stbds_hash_string: + size_t stbds_hash_string(char *str, size_t seed); + Returns a hash value for a string. + + stbds_hash_bytes: + size_t stbds_hash_bytes(void *p, size_t len, size_t seed); + These functions hash an arbitrary number of bytes. The function + uses a custom hash for 4- and 8-byte data, and a weakened version + of SipHash for everything else. On 64-bit platforms you can get + specification-compliant SipHash-2-4 on all data by defining + STBDS_SIPHASH_2_4, at a significant cost in speed. + + Non-function interface: + + Declare an empty hash map of type T + T* foo = NULL; + + Access the i'th entry in a hash table T* foo: + foo[i] + + Function interface (actually macros): + + hmfree + shfree + void hmfree(T*); + void shfree(T*); + Frees the hashmap and sets the pointer to NULL. + + hmlen + shlen + ptrdiff_t hmlen(T*) + ptrdiff_t shlen(T*) + Returns the number of elements in the hashmap. + + hmlenu + shlenu + size_t hmlenu(T*) + size_t shlenu(T*) + Returns the number of elements in the hashmap. + + hmgeti + shgeti + hmgeti_ts + ptrdiff_t hmgeti(T*, TK key) + ptrdiff_t shgeti(T*, char* key) + ptrdiff_t hmgeti_ts(T*, TK key, ptrdiff_t tempvar) + Returns the index in the hashmap which has the key 'key', or -1 + if the key is not present. + + hmget + hmget_ts + shget + TV hmget(T*, TK key) + TV shget(T*, char* key) + TV hmget_ts(T*, TK key, ptrdiff_t tempvar) + Returns the value corresponding to 'key' in the hashmap. + The structure must have a 'value' field + + hmgets + shgets + T hmgets(T*, TK key) + T shgets(T*, char* key) + Returns the structure corresponding to 'key' in the hashmap. + + hmgetp + shgetp + hmgetp_ts + hmgetp_null + shgetp_null + T* hmgetp(T*, TK key) + T* shgetp(T*, char* key) + T* hmgetp_ts(T*, TK key, ptrdiff_t tempvar) + T* hmgetp_null(T*, TK key) + T* shgetp_null(T*, char *key) + Returns a pointer to the structure corresponding to 'key' in + the hashmap. Functions ending in "_null" return NULL if the key + is not present in the hashmap; the others return a pointer to a + structure holding the default value (but not the searched-for key). + + hmdefault + shdefault + TV hmdefault(T*, TV value) + TV shdefault(T*, TV value) + Sets the default value for the hashmap, the value which will be + returned by hmget/shget if the key is not present. + + hmdefaults + shdefaults + TV hmdefaults(T*, T item) + TV shdefaults(T*, T item) + Sets the default struct for the hashmap, the contents which will be + returned by hmgets/shgets if the key is not present. + + hmput + shput + TV hmput(T*, TK key, TV value) + TV shput(T*, char* key, TV value) + Inserts a pair into the hashmap. If the key is already + present in the hashmap, updates its value. + + hmputs + shputs + T hmputs(T*, T item) + T shputs(T*, T item) + Inserts a struct with T.key into the hashmap. If the struct is already + present in the hashmap, updates it. + + hmdel + shdel + int hmdel(T*, TK key) + int shdel(T*, char* key) + If 'key' is in the hashmap, deletes its entry and returns 1. + Otherwise returns 0. + + Function interface (actually macros) for strings only: + + sh_new_strdup + void sh_new_strdup(T*); + Overwrites the existing pointer with a newly allocated + string hashmap which will automatically allocate and free + each string key using realloc/free + + sh_new_arena + void sh_new_arena(T*); + Overwrites the existing pointer with a newly allocated + string hashmap which will automatically allocate each string + key to a string arena. Every string key ever used by this + hash table remains in the arena until the arena is freed. + Additionally, any key which is deleted and reinserted will + be allocated multiple times in the string arena. + +NOTES + + * These data structures are realloc'd when they grow, and the macro + "functions" write to the provided pointer. This means: (a) the pointer + must be an lvalue, and (b) the pointer to the data structure is not + stable, and you must maintain it the same as you would a realloc'd + pointer. For example, if you pass a pointer to a dynamic array to a + function which updates it, the function must return back the new + pointer to the caller. This is the price of trying to do this in C. + + * The following are the only functions that are thread-safe on a single data + structure, i.e. can be run in multiple threads simultaneously on the same + data structure + hmlen shlen + hmlenu shlenu + hmget_ts shget_ts + hmgeti_ts shgeti_ts + hmgets_ts shgets_ts + + * You iterate over the contents of a dynamic array and a hashmap in exactly + the same way, using arrlen/hmlen/shlen: + + for (i=0; i < arrlen(foo); ++i) + ... foo[i] ... + + * All operations except arrins/arrdel are O(1) amortized, but individual + operations can be slow, so these data structures may not be suitable + for real time use. Dynamic arrays double in capacity as needed, so + elements are copied an average of once. Hash tables double/halve + their size as needed, with appropriate hysteresis to maintain O(1) + performance. + +NOTES - DYNAMIC ARRAY + + * If you know how long a dynamic array is going to be in advance, you can avoid + extra memory allocations by using arrsetlen to allocate it to that length in + advance and use foo[n] while filling it out, or arrsetcap to allocate the memory + for that length and use arrput/arrpush as normal. + + * Unlike some other versions of the dynamic array, this version should + be safe to use with strict-aliasing optimizations. + +NOTES - HASH MAP + + * For compilers other than GCC and clang (e.g. Visual Studio), for hmput/hmget/hmdel + and variants, the key must be an lvalue (so the macro can take the address of it). + Extensions are used that eliminate this requirement if you're using C99 and later + in GCC or clang, or if you're using C++ in GCC. But note that this can make your + code less portable. + + * To test for presence of a key in a hashmap, just do 'hmgeti(foo,key) >= 0'. + + * The iteration order of your data in the hashmap is determined solely by the + order of insertions and deletions. In particular, if you never delete, new + keys are always added at the end of the array. This will be consistent + across all platforms and versions of the library. However, you should not + attempt to serialize the internal hash table, as the hash is not consistent + between different platforms, and may change with future versions of the library. + + * Use sh_new_arena() for string hashmaps that you never delete from. Initialize + with NULL if you're managing the memory for your strings, or your strings are + never freed (at least until the hashmap is freed). Otherwise, use sh_new_strdup(). + @TODO: make an arena variant that garbage collects the strings with a trivial + copy collector into a new arena whenever the table shrinks / rebuilds. Since + current arena recommendation is to only use arena if it never deletes, then + this can just replace current arena implementation. + + * If adversarial input is a serious concern and you're on a 64-bit platform, + enable STBDS_SIPHASH_2_4 (see the 'Compile-time options' section), and pass + a strong random number to stbds_rand_seed. + + * The default value for the hash table is stored in foo[-1], so if you + use code like 'hmget(T,k)->value = 5' you can accidentally overwrite + the value stored by hmdefault if 'k' is not present. + +CREDITS + + Sean Barrett -- library, idea for dynamic array API/implementation + Per Vognsen -- idea for hash table API/implementation + Rafael Sachetto -- arrpop() + github:HeroicKatora -- arraddn() reworking + + Bugfixes: + Andy Durdin + Shane Liesegang + Vinh Truong + Andreas Molzer + github:hashitaku + github:srdjanstipic + Macoy Madson + Andreas Vennstrom + Tobias Mansfield-Williams +*/ + +#ifdef STBDS_UNIT_TESTS +#define _CRT_SECURE_NO_WARNINGS +#endif + +#ifndef INCLUDE_STB_DS_H +#define INCLUDE_STB_DS_H + +#include +#include + +#ifndef STBDS_NO_SHORT_NAMES +#define arrlen stbds_arrlen +#define arrlenu stbds_arrlenu +#define arrput stbds_arrput +#define arrpush stbds_arrput +#define arrpop stbds_arrpop +#define arrfree stbds_arrfree +#define arraddn stbds_arraddn // deprecated, use one of the following instead: +#define arraddnptr stbds_arraddnptr +#define arraddnindex stbds_arraddnindex +#define arrsetlen stbds_arrsetlen +#define arrlast stbds_arrlast +#define arrins stbds_arrins +#define arrinsn stbds_arrinsn +#define arrdel stbds_arrdel +#define arrdeln stbds_arrdeln +#define arrdelswap stbds_arrdelswap +#define arrcap stbds_arrcap +#define arrsetcap stbds_arrsetcap + +#define hmput stbds_hmput +#define hmputs stbds_hmputs +#define hmget stbds_hmget +#define hmget_ts stbds_hmget_ts +#define hmgets stbds_hmgets +#define hmgetp stbds_hmgetp +#define hmgetp_ts stbds_hmgetp_ts +#define hmgetp_null stbds_hmgetp_null +#define hmgeti stbds_hmgeti +#define hmgeti_ts stbds_hmgeti_ts +#define hmdel stbds_hmdel +#define hmlen stbds_hmlen +#define hmlenu stbds_hmlenu +#define hmfree stbds_hmfree +#define hmdefault stbds_hmdefault +#define hmdefaults stbds_hmdefaults + +#define shput stbds_shput +#define shputi stbds_shputi +#define shputs stbds_shputs +#define shget stbds_shget +#define shgeti stbds_shgeti +#define shgets stbds_shgets +#define shgetp stbds_shgetp +#define shgetp_null stbds_shgetp_null +#define shdel stbds_shdel +#define shlen stbds_shlen +#define shlenu stbds_shlenu +#define shfree stbds_shfree +#define shdefault stbds_shdefault +#define shdefaults stbds_shdefaults +#define sh_new_arena stbds_sh_new_arena +#define sh_new_strdup stbds_sh_new_strdup + +#define stralloc stbds_stralloc +#define strreset stbds_strreset +#endif + +#if defined(STBDS_REALLOC) && !defined(STBDS_FREE) || !defined(STBDS_REALLOC) && defined(STBDS_FREE) +#error "You must define both STBDS_REALLOC and STBDS_FREE, or neither." +#endif +#if !defined(STBDS_REALLOC) && !defined(STBDS_FREE) +#include +#define STBDS_REALLOC(c,p,s) realloc(p,s) +#define STBDS_FREE(c,p) free(p) +#endif + +#ifdef _MSC_VER +#define STBDS_NOTUSED(v) (void)(v) +#else +#define STBDS_NOTUSED(v) (void)sizeof(v) +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// for security against attackers, seed the library with a random number, at least time() but stronger is better +extern void stbds_rand_seed(size_t seed); + +// these are the hash functions used internally if you want to test them or use them for other purposes +extern size_t stbds_hash_bytes(void *p, size_t len, size_t seed); +extern size_t stbds_hash_string(char *str, size_t seed); + +// this is a simple string arena allocator, initialize with e.g. 'stbds_string_arena my_arena={0}'. +typedef struct stbds_string_arena stbds_string_arena; +extern char * stbds_stralloc(stbds_string_arena *a, char *str); +extern void stbds_strreset(stbds_string_arena *a); + +// have to #define STBDS_UNIT_TESTS to call this +extern void stbds_unit_tests(void); + +/////////////// +// +// Everything below here is implementation details +// + +extern void * stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap); +extern void stbds_arrfreef(void *a); +extern void stbds_hmfree_func(void *p, size_t elemsize); +extern void * stbds_hmget_key(void *a, size_t elemsize, void *key, size_t keysize, int mode); +extern void * stbds_hmget_key_ts(void *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode); +extern void * stbds_hmput_default(void *a, size_t elemsize); +extern void * stbds_hmput_key(void *a, size_t elemsize, void *key, size_t keysize, int mode); +extern void * stbds_hmdel_key(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode); +extern void * stbds_shmode_func(size_t elemsize, int mode); + +#ifdef __cplusplus +} +#endif + +#if defined(__GNUC__) || defined(__clang__) +#define STBDS_HAS_TYPEOF +#ifdef __cplusplus +//#define STBDS_HAS_LITERAL_ARRAY // this is currently broken for clang +#endif +#endif + +#if !defined(__cplusplus) +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define STBDS_HAS_LITERAL_ARRAY +#endif +#endif + +// this macro takes the address of the argument, but on gcc/clang can accept rvalues +#if defined(STBDS_HAS_LITERAL_ARRAY) && defined(STBDS_HAS_TYPEOF) + #if __clang__ + #define STBDS_ADDRESSOF(typevar, value) ((__typeof__(typevar)[1]){value}) // literal array decays to pointer to value + #else + #define STBDS_ADDRESSOF(typevar, value) ((typeof(typevar)[1]){value}) // literal array decays to pointer to value + #endif +#else +#define STBDS_ADDRESSOF(typevar, value) &(value) +#endif + +#define STBDS_OFFSETOF(var,field) ((char *) &(var)->field - (char *) (var)) + +#define stbds_header(t) ((stbds_array_header *) (t) - 1) +#define stbds_temp(t) stbds_header(t)->temp +#define stbds_temp_key(t) (*(char **) stbds_header(t)->hash_table) + +#define stbds_arrsetcap(a,n) (stbds_arrgrow(a,0,n)) +#define stbds_arrsetlen(a,n) ((stbds_arrcap(a) < (size_t) (n) ? stbds_arrsetcap((a),(size_t)(n)),0 : 0), (a) ? stbds_header(a)->length = (size_t) (n) : 0) +#define stbds_arrcap(a) ((a) ? stbds_header(a)->capacity : 0) +#define stbds_arrlen(a) ((a) ? (ptrdiff_t) stbds_header(a)->length : 0) +#define stbds_arrlenu(a) ((a) ? stbds_header(a)->length : 0) +#define stbds_arrput(a,v) (stbds_arrmaybegrow(a,1), (a)[stbds_header(a)->length++] = (v)) +#define stbds_arrpush stbds_arrput // synonym +#define stbds_arrpop(a) (stbds_header(a)->length--, (a)[stbds_header(a)->length]) +#define stbds_arraddn(a,n) ((void)(stbds_arraddnindex(a, n))) // deprecated, use one of the following instead: +#define stbds_arraddnptr(a,n) (stbds_arrmaybegrow(a,n), (n) ? (stbds_header(a)->length += (n), &(a)[stbds_header(a)->length-(n)]) : (a)) +#define stbds_arraddnindex(a,n)(stbds_arrmaybegrow(a,n), (n) ? (stbds_header(a)->length += (n), stbds_header(a)->length-(n)) : stbds_arrlen(a)) +#define stbds_arraddnoff stbds_arraddnindex +#define stbds_arrlast(a) ((a)[stbds_header(a)->length-1]) +#define stbds_arrfree(a) ((void) ((a) ? STBDS_FREE(NULL,stbds_header(a)) : (void)0), (a)=NULL) +#define stbds_arrdel(a,i) stbds_arrdeln(a,i,1) +#define stbds_arrdeln(a,i,n) (memmove(&(a)[i], &(a)[(i)+(n)], sizeof *(a) * (stbds_header(a)->length-(n)-(i))), stbds_header(a)->length -= (n)) +#define stbds_arrdelswap(a,i) ((a)[i] = stbds_arrlast(a), stbds_header(a)->length -= 1) +#define stbds_arrinsn(a,i,n) (stbds_arraddn((a),(n)), memmove(&(a)[(i)+(n)], &(a)[i], sizeof *(a) * (stbds_header(a)->length-(n)-(i)))) +#define stbds_arrins(a,i,v) (stbds_arrinsn((a),(i),1), (a)[i]=(v)) + +#define stbds_arrmaybegrow(a,n) ((!(a) || stbds_header(a)->length + (n) > stbds_header(a)->capacity) \ + ? (stbds_arrgrow(a,n,0),0) : 0) + +#define stbds_arrgrow(a,b,c) ((a) = stbds_arrgrowf_wrapper((a), sizeof *(a), (b), (c))) + +#define stbds_hmput(t, k, v) \ + ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, 0), \ + (t)[stbds_temp((t)-1)].key = (k), \ + (t)[stbds_temp((t)-1)].value = (v)) + +#define stbds_hmputs(t, s) \ + ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), &(s).key, sizeof (s).key, STBDS_HM_BINARY), \ + (t)[stbds_temp((t)-1)] = (s)) + +#define stbds_hmgeti(t,k) \ + ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, STBDS_HM_BINARY), \ + stbds_temp((t)-1)) + +#define stbds_hmgeti_ts(t,k,temp) \ + ((t) = stbds_hmget_key_ts_wrapper((t), sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, &(temp), STBDS_HM_BINARY), \ + (temp)) + +#define stbds_hmgetp(t, k) \ + ((void) stbds_hmgeti(t,k), &(t)[stbds_temp((t)-1)]) + +#define stbds_hmgetp_ts(t, k, temp) \ + ((void) stbds_hmgeti_ts(t,k,temp), &(t)[temp]) + +#define stbds_hmdel(t,k) \ + (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) STBDS_ADDRESSOF((t)->key, (k)), sizeof (t)->key, STBDS_OFFSETOF((t),key), STBDS_HM_BINARY)),(t)?stbds_temp((t)-1):0) + +#define stbds_hmdefault(t, v) \ + ((t) = stbds_hmput_default_wrapper((t), sizeof *(t)), (t)[-1].value = (v)) + +#define stbds_hmdefaults(t, s) \ + ((t) = stbds_hmput_default_wrapper((t), sizeof *(t)), (t)[-1] = (s)) + +#define stbds_hmfree(p) \ + ((void) ((p) != NULL ? stbds_hmfree_func((p)-1,sizeof*(p)),0 : 0),(p)=NULL) + +#define stbds_hmgets(t, k) (*stbds_hmgetp(t,k)) +#define stbds_hmget(t, k) (stbds_hmgetp(t,k)->value) +#define stbds_hmget_ts(t, k, temp) (stbds_hmgetp_ts(t,k,temp)->value) +#define stbds_hmlen(t) ((t) ? (ptrdiff_t) stbds_header((t)-1)->length-1 : 0) +#define stbds_hmlenu(t) ((t) ? stbds_header((t)-1)->length-1 : 0) +#define stbds_hmgetp_null(t,k) (stbds_hmgeti(t,k) == -1 ? NULL : &(t)[stbds_temp((t)-1)]) + +#define stbds_shput(t, k, v) \ + ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING), \ + (t)[stbds_temp((t)-1)].value = (v)) + +#define stbds_shputi(t, k, v) \ + ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING), \ + (t)[stbds_temp((t)-1)].value = (v), stbds_temp((t)-1)) + +#define stbds_shputs(t, s) \ + ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (s).key, sizeof (s).key, STBDS_HM_STRING), \ + (t)[stbds_temp((t)-1)] = (s), \ + (t)[stbds_temp((t)-1)].key = stbds_temp_key((t)-1)) // above line overwrites whole structure, so must rewrite key here if it was allocated internally + +#define stbds_pshput(t, p) \ + ((t) = stbds_hmput_key_wrapper((t), sizeof *(t), (void*) (p)->key, sizeof (p)->key, STBDS_HM_PTR_TO_STRING), \ + (t)[stbds_temp((t)-1)] = (p)) + +#define stbds_shgeti(t,k) \ + ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_HM_STRING), \ + stbds_temp((t)-1)) + +#define stbds_pshgeti(t,k) \ + ((t) = stbds_hmget_key_wrapper((t), sizeof *(t), (void*) (k), sizeof (*(t))->key, STBDS_HM_PTR_TO_STRING), \ + stbds_temp((t)-1)) + +#define stbds_shgetp(t, k) \ + ((void) stbds_shgeti(t,k), &(t)[stbds_temp((t)-1)]) + +#define stbds_pshget(t, k) \ + ((void) stbds_pshgeti(t,k), (t)[stbds_temp((t)-1)]) + +#define stbds_shdel(t,k) \ + (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) (k), sizeof (t)->key, STBDS_OFFSETOF((t),key), STBDS_HM_STRING)),(t)?stbds_temp((t)-1):0) +#define stbds_pshdel(t,k) \ + (((t) = stbds_hmdel_key_wrapper((t),sizeof *(t), (void*) (k), sizeof (*(t))->key, STBDS_OFFSETOF(*(t),key), STBDS_HM_PTR_TO_STRING)),(t)?stbds_temp((t)-1):0) + +#define stbds_sh_new_arena(t) \ + ((t) = stbds_shmode_func_wrapper(t, sizeof *(t), STBDS_SH_ARENA)) +#define stbds_sh_new_strdup(t) \ + ((t) = stbds_shmode_func_wrapper(t, sizeof *(t), STBDS_SH_STRDUP)) + +#define stbds_shdefault(t, v) stbds_hmdefault(t,v) +#define stbds_shdefaults(t, s) stbds_hmdefaults(t,s) + +#define stbds_shfree stbds_hmfree +#define stbds_shlenu stbds_hmlenu + +#define stbds_shgets(t, k) (*stbds_shgetp(t,k)) +#define stbds_shget(t, k) (stbds_shgetp(t,k)->value) +#define stbds_shgetp_null(t,k) (stbds_shgeti(t,k) == -1 ? NULL : &(t)[stbds_temp((t)-1)]) +#define stbds_shlen stbds_hmlen + +typedef struct +{ + size_t length; + size_t capacity; + void * hash_table; + ptrdiff_t temp; +} stbds_array_header; + +typedef struct stbds_string_block +{ + struct stbds_string_block *next; + char storage[8]; +} stbds_string_block; + +struct stbds_string_arena +{ + stbds_string_block *storage; + size_t remaining; + unsigned char block; + unsigned char mode; // this isn't used by the string arena itself +}; + +#define STBDS_HM_BINARY 0 +#define STBDS_HM_STRING 1 + +enum +{ + STBDS_SH_NONE, + STBDS_SH_DEFAULT, + STBDS_SH_STRDUP, + STBDS_SH_ARENA +}; + +#ifdef __cplusplus +// in C we use implicit assignment from these void*-returning functions to T*. +// in C++ these templates make the same code work +template static T * stbds_arrgrowf_wrapper(T *a, size_t elemsize, size_t addlen, size_t min_cap) { + return (T*)stbds_arrgrowf((void *)a, elemsize, addlen, min_cap); +} +template static T * stbds_hmget_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, int mode) { + return (T*)stbds_hmget_key((void*)a, elemsize, key, keysize, mode); +} +template static T * stbds_hmget_key_ts_wrapper(T *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode) { + return (T*)stbds_hmget_key_ts((void*)a, elemsize, key, keysize, temp, mode); +} +template static T * stbds_hmput_default_wrapper(T *a, size_t elemsize) { + return (T*)stbds_hmput_default((void *)a, elemsize); +} +template static T * stbds_hmput_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, int mode) { + return (T*)stbds_hmput_key((void*)a, elemsize, key, keysize, mode); +} +template static T * stbds_hmdel_key_wrapper(T *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode){ + return (T*)stbds_hmdel_key((void*)a, elemsize, key, keysize, keyoffset, mode); +} +template static T * stbds_shmode_func_wrapper(T *, size_t elemsize, int mode) { + return (T*)stbds_shmode_func(elemsize, mode); +} +#else +#define stbds_arrgrowf_wrapper stbds_arrgrowf +#define stbds_hmget_key_wrapper stbds_hmget_key +#define stbds_hmget_key_ts_wrapper stbds_hmget_key_ts +#define stbds_hmput_default_wrapper stbds_hmput_default +#define stbds_hmput_key_wrapper stbds_hmput_key +#define stbds_hmdel_key_wrapper stbds_hmdel_key +#define stbds_shmode_func_wrapper(t,e,m) stbds_shmode_func(e,m) +#endif + +#endif // INCLUDE_STB_DS_H + + +////////////////////////////////////////////////////////////////////////////// +// +// IMPLEMENTATION +// + +#ifdef STB_DS_IMPLEMENTATION +#include +#include + +#ifndef STBDS_ASSERT +#define STBDS_ASSERT_WAS_UNDEFINED +#define STBDS_ASSERT(x) ((void) 0) +#endif + +#ifdef STBDS_STATISTICS +#define STBDS_STATS(x) x +size_t stbds_array_grow; +size_t stbds_hash_grow; +size_t stbds_hash_shrink; +size_t stbds_hash_rebuild; +size_t stbds_hash_probes; +size_t stbds_hash_alloc; +size_t stbds_rehash_probes; +size_t stbds_rehash_items; +#else +#define STBDS_STATS(x) +#endif + +// +// stbds_arr implementation +// + +//int *prev_allocs[65536]; +//int num_prev; + +void *stbds_arrgrowf(void *a, size_t elemsize, size_t addlen, size_t min_cap) +{ + stbds_array_header temp={0}; // force debugging + void *b; + size_t min_len = stbds_arrlen(a) + addlen; + (void) sizeof(temp); + + // compute the minimum capacity needed + if (min_len > min_cap) + min_cap = min_len; + + if (min_cap <= stbds_arrcap(a)) + return a; + + // increase needed capacity to guarantee O(1) amortized + if (min_cap < 2 * stbds_arrcap(a)) + min_cap = 2 * stbds_arrcap(a); + else if (min_cap < 4) + min_cap = 4; + + //if (num_prev < 65536) if (a) prev_allocs[num_prev++] = (int *) ((char *) a+1); + //if (num_prev == 2201) + // num_prev = num_prev; + b = STBDS_REALLOC(NULL, (a) ? stbds_header(a) : 0, elemsize * min_cap + sizeof(stbds_array_header)); + //if (num_prev < 65536) prev_allocs[num_prev++] = (int *) (char *) b; + b = (char *) b + sizeof(stbds_array_header); + if (a == NULL) { + stbds_header(b)->length = 0; + stbds_header(b)->hash_table = 0; + stbds_header(b)->temp = 0; + } else { + STBDS_STATS(++stbds_array_grow); + } + stbds_header(b)->capacity = min_cap; + + return b; +} + +void stbds_arrfreef(void *a) +{ + STBDS_FREE(NULL, stbds_header(a)); +} + +// +// stbds_hm hash table implementation +// + +#ifdef STBDS_INTERNAL_SMALL_BUCKET +#define STBDS_BUCKET_LENGTH 4 +#else +#define STBDS_BUCKET_LENGTH 8 +#endif + +#define STBDS_BUCKET_SHIFT (STBDS_BUCKET_LENGTH == 8 ? 3 : 2) +#define STBDS_BUCKET_MASK (STBDS_BUCKET_LENGTH-1) +#define STBDS_CACHE_LINE_SIZE 64 + +#define STBDS_ALIGN_FWD(n,a) (((n) + (a) - 1) & ~((a)-1)) + +typedef struct +{ + size_t hash [STBDS_BUCKET_LENGTH]; + ptrdiff_t index[STBDS_BUCKET_LENGTH]; +} stbds_hash_bucket; // in 32-bit, this is one 64-byte cache line; in 64-bit, each array is one 64-byte cache line + +typedef struct +{ + char * temp_key; // this MUST be the first field of the hash table + size_t slot_count; + size_t used_count; + size_t used_count_threshold; + size_t used_count_shrink_threshold; + size_t tombstone_count; + size_t tombstone_count_threshold; + size_t seed; + size_t slot_count_log2; + stbds_string_arena string; + stbds_hash_bucket *storage; // not a separate allocation, just 64-byte aligned storage after this struct +} stbds_hash_index; + +#define STBDS_INDEX_EMPTY -1 +#define STBDS_INDEX_DELETED -2 +#define STBDS_INDEX_IN_USE(x) ((x) >= 0) + +#define STBDS_HASH_EMPTY 0 +#define STBDS_HASH_DELETED 1 + +static size_t stbds_hash_seed=0x31415926; + +void stbds_rand_seed(size_t seed) +{ + stbds_hash_seed = seed; +} + +#define stbds_load_32_or_64(var, temp, v32, v64_hi, v64_lo) \ + temp = v64_lo ^ v32, temp <<= 16, temp <<= 16, temp >>= 16, temp >>= 16, /* discard if 32-bit */ \ + var = v64_hi, var <<= 16, var <<= 16, /* discard if 32-bit */ \ + var ^= temp ^ v32 + +#define STBDS_SIZE_T_BITS ((sizeof (size_t)) * 8) + +static size_t stbds_probe_position(size_t hash, size_t slot_count, size_t slot_log2) +{ + size_t pos; + STBDS_NOTUSED(slot_log2); + pos = hash & (slot_count-1); + #ifdef STBDS_INTERNAL_BUCKET_START + pos &= ~STBDS_BUCKET_MASK; + #endif + return pos; +} + +static size_t stbds_log2(size_t slot_count) +{ + size_t n=0; + while (slot_count > 1) { + slot_count >>= 1; + ++n; + } + return n; +} + +static stbds_hash_index *stbds_make_hash_index(size_t slot_count, stbds_hash_index *ot) +{ + stbds_hash_index *t; + t = (stbds_hash_index *) STBDS_REALLOC(NULL,0,(slot_count >> STBDS_BUCKET_SHIFT) * sizeof(stbds_hash_bucket) + sizeof(stbds_hash_index) + STBDS_CACHE_LINE_SIZE-1); + t->storage = (stbds_hash_bucket *) STBDS_ALIGN_FWD((size_t) (t+1), STBDS_CACHE_LINE_SIZE); + t->slot_count = slot_count; + t->slot_count_log2 = stbds_log2(slot_count); + t->tombstone_count = 0; + t->used_count = 0; + + #if 0 // A1 + t->used_count_threshold = slot_count*12/16; // if 12/16th of table is occupied, grow + t->tombstone_count_threshold = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild + t->used_count_shrink_threshold = slot_count* 4/16; // if table is only 4/16th full, shrink + #elif 1 // A2 + //t->used_count_threshold = slot_count*12/16; // if 12/16th of table is occupied, grow + //t->tombstone_count_threshold = slot_count* 3/16; // if tombstones are 3/16th of table, rebuild + //t->used_count_shrink_threshold = slot_count* 4/16; // if table is only 4/16th full, shrink + + // compute without overflowing + t->used_count_threshold = slot_count - (slot_count>>2); + t->tombstone_count_threshold = (slot_count>>3) + (slot_count>>4); + t->used_count_shrink_threshold = slot_count >> 2; + + #elif 0 // B1 + t->used_count_threshold = slot_count*13/16; // if 13/16th of table is occupied, grow + t->tombstone_count_threshold = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild + t->used_count_shrink_threshold = slot_count* 5/16; // if table is only 5/16th full, shrink + #else // C1 + t->used_count_threshold = slot_count*14/16; // if 14/16th of table is occupied, grow + t->tombstone_count_threshold = slot_count* 2/16; // if tombstones are 2/16th of table, rebuild + t->used_count_shrink_threshold = slot_count* 6/16; // if table is only 6/16th full, shrink + #endif + // Following statistics were measured on a Core i7-6700 @ 4.00Ghz, compiled with clang 7.0.1 -O2 + // Note that the larger tables have high variance as they were run fewer times + // A1 A2 B1 C1 + // 0.10ms : 0.10ms : 0.10ms : 0.11ms : 2,000 inserts creating 2K table + // 0.96ms : 0.95ms : 0.97ms : 1.04ms : 20,000 inserts creating 20K table + // 14.48ms : 14.46ms : 10.63ms : 11.00ms : 200,000 inserts creating 200K table + // 195.74ms : 196.35ms : 203.69ms : 214.92ms : 2,000,000 inserts creating 2M table + // 2193.88ms : 2209.22ms : 2285.54ms : 2437.17ms : 20,000,000 inserts creating 20M table + // 65.27ms : 53.77ms : 65.33ms : 65.47ms : 500,000 inserts & deletes in 2K table + // 72.78ms : 62.45ms : 71.95ms : 72.85ms : 500,000 inserts & deletes in 20K table + // 89.47ms : 77.72ms : 96.49ms : 96.75ms : 500,000 inserts & deletes in 200K table + // 97.58ms : 98.14ms : 97.18ms : 97.53ms : 500,000 inserts & deletes in 2M table + // 118.61ms : 119.62ms : 120.16ms : 118.86ms : 500,000 inserts & deletes in 20M table + // 192.11ms : 194.39ms : 196.38ms : 195.73ms : 500,000 inserts & deletes in 200M table + + if (slot_count <= STBDS_BUCKET_LENGTH) + t->used_count_shrink_threshold = 0; + // to avoid infinite loop, we need to guarantee that at least one slot is empty and will terminate probes + STBDS_ASSERT(t->used_count_threshold + t->tombstone_count_threshold < t->slot_count); + STBDS_STATS(++stbds_hash_alloc); + if (ot) { + t->string = ot->string; + // reuse old seed so we can reuse old hashes so below "copy out old data" doesn't do any hashing + t->seed = ot->seed; + } else { + size_t a,b,temp; + memset(&t->string, 0, sizeof(t->string)); + t->seed = stbds_hash_seed; + // LCG + // in 32-bit, a = 2147001325 b = 715136305 + // in 64-bit, a = 2862933555777941757 b = 3037000493 + stbds_load_32_or_64(a,temp, 2147001325, 0x27bb2ee6, 0x87b0b0fd); + stbds_load_32_or_64(b,temp, 715136305, 0, 0xb504f32d); + stbds_hash_seed = stbds_hash_seed * a + b; + } + + { + size_t i,j; + for (i=0; i < slot_count >> STBDS_BUCKET_SHIFT; ++i) { + stbds_hash_bucket *b = &t->storage[i]; + for (j=0; j < STBDS_BUCKET_LENGTH; ++j) + b->hash[j] = STBDS_HASH_EMPTY; + for (j=0; j < STBDS_BUCKET_LENGTH; ++j) + b->index[j] = STBDS_INDEX_EMPTY; + } + } + + // copy out the old data, if any + if (ot) { + size_t i,j; + t->used_count = ot->used_count; + for (i=0; i < ot->slot_count >> STBDS_BUCKET_SHIFT; ++i) { + stbds_hash_bucket *ob = &ot->storage[i]; + for (j=0; j < STBDS_BUCKET_LENGTH; ++j) { + if (STBDS_INDEX_IN_USE(ob->index[j])) { + size_t hash = ob->hash[j]; + size_t pos = stbds_probe_position(hash, t->slot_count, t->slot_count_log2); + size_t step = STBDS_BUCKET_LENGTH; + STBDS_STATS(++stbds_rehash_items); + for (;;) { + size_t limit,z; + stbds_hash_bucket *bucket; + bucket = &t->storage[pos >> STBDS_BUCKET_SHIFT]; + STBDS_STATS(++stbds_rehash_probes); + + for (z=pos & STBDS_BUCKET_MASK; z < STBDS_BUCKET_LENGTH; ++z) { + if (bucket->hash[z] == 0) { + bucket->hash[z] = hash; + bucket->index[z] = ob->index[j]; + goto done; + } + } + + limit = pos & STBDS_BUCKET_MASK; + for (z = 0; z < limit; ++z) { + if (bucket->hash[z] == 0) { + bucket->hash[z] = hash; + bucket->index[z] = ob->index[j]; + goto done; + } + } + + pos += step; // quadratic probing + step += STBDS_BUCKET_LENGTH; + pos &= (t->slot_count-1); + } + } + done: + ; + } + } + } + + return t; +} + +#define STBDS_ROTATE_LEFT(val, n) (((val) << (n)) | ((val) >> (STBDS_SIZE_T_BITS - (n)))) +#define STBDS_ROTATE_RIGHT(val, n) (((val) >> (n)) | ((val) << (STBDS_SIZE_T_BITS - (n)))) + +size_t stbds_hash_string(char *str, size_t seed) +{ + size_t hash = seed; + while (*str) + hash = STBDS_ROTATE_LEFT(hash, 9) + (unsigned char) *str++; + + // Thomas Wang 64-to-32 bit mix function, hopefully also works in 32 bits + hash ^= seed; + hash = (~hash) + (hash << 18); + hash ^= hash ^ STBDS_ROTATE_RIGHT(hash,31); + hash = hash * 21; + hash ^= hash ^ STBDS_ROTATE_RIGHT(hash,11); + hash += (hash << 6); + hash ^= STBDS_ROTATE_RIGHT(hash,22); + return hash+seed; +} + +#ifdef STBDS_SIPHASH_2_4 +#define STBDS_SIPHASH_C_ROUNDS 2 +#define STBDS_SIPHASH_D_ROUNDS 4 +typedef int STBDS_SIPHASH_2_4_can_only_be_used_in_64_bit_builds[sizeof(size_t) == 8 ? 1 : -1]; +#endif + +#ifndef STBDS_SIPHASH_C_ROUNDS +#define STBDS_SIPHASH_C_ROUNDS 1 +#endif +#ifndef STBDS_SIPHASH_D_ROUNDS +#define STBDS_SIPHASH_D_ROUNDS 1 +#endif + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4127) // conditional expression is constant, for do..while(0) and sizeof()== +#endif + +static size_t stbds_siphash_bytes(void *p, size_t len, size_t seed) +{ + unsigned char *d = (unsigned char *) p; + size_t i,j; + size_t v0,v1,v2,v3, data; + + // hash that works on 32- or 64-bit registers without knowing which we have + // (computes different results on 32-bit and 64-bit platform) + // derived from siphash, but on 32-bit platforms very different as it uses 4 32-bit state not 4 64-bit + v0 = ((((size_t) 0x736f6d65 << 16) << 16) + 0x70736575) ^ seed; + v1 = ((((size_t) 0x646f7261 << 16) << 16) + 0x6e646f6d) ^ ~seed; + v2 = ((((size_t) 0x6c796765 << 16) << 16) + 0x6e657261) ^ seed; + v3 = ((((size_t) 0x74656462 << 16) << 16) + 0x79746573) ^ ~seed; + + #ifdef STBDS_TEST_SIPHASH_2_4 + // hardcoded with key material in the siphash test vectors + v0 ^= 0x0706050403020100ull ^ seed; + v1 ^= 0x0f0e0d0c0b0a0908ull ^ ~seed; + v2 ^= 0x0706050403020100ull ^ seed; + v3 ^= 0x0f0e0d0c0b0a0908ull ^ ~seed; + #endif + + #define STBDS_SIPROUND() \ + do { \ + v0 += v1; v1 = STBDS_ROTATE_LEFT(v1, 13); v1 ^= v0; v0 = STBDS_ROTATE_LEFT(v0,STBDS_SIZE_T_BITS/2); \ + v2 += v3; v3 = STBDS_ROTATE_LEFT(v3, 16); v3 ^= v2; \ + v2 += v1; v1 = STBDS_ROTATE_LEFT(v1, 17); v1 ^= v2; v2 = STBDS_ROTATE_LEFT(v2,STBDS_SIZE_T_BITS/2); \ + v0 += v3; v3 = STBDS_ROTATE_LEFT(v3, 21); v3 ^= v0; \ + } while (0) + + for (i=0; i+sizeof(size_t) <= len; i += sizeof(size_t), d += sizeof(size_t)) { + data = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); + data |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // discarded if size_t == 4 + + v3 ^= data; + for (j=0; j < STBDS_SIPHASH_C_ROUNDS; ++j) + STBDS_SIPROUND(); + v0 ^= data; + } + data = len << (STBDS_SIZE_T_BITS-8); + switch (len - i) { + case 7: data |= ((size_t) d[6] << 24) << 24; // fall through + case 6: data |= ((size_t) d[5] << 20) << 20; // fall through + case 5: data |= ((size_t) d[4] << 16) << 16; // fall through + case 4: data |= (d[3] << 24); // fall through + case 3: data |= (d[2] << 16); // fall through + case 2: data |= (d[1] << 8); // fall through + case 1: data |= d[0]; // fall through + case 0: break; + } + v3 ^= data; + for (j=0; j < STBDS_SIPHASH_C_ROUNDS; ++j) + STBDS_SIPROUND(); + v0 ^= data; + v2 ^= 0xff; + for (j=0; j < STBDS_SIPHASH_D_ROUNDS; ++j) + STBDS_SIPROUND(); + +#ifdef STBDS_SIPHASH_2_4 + return v0^v1^v2^v3; +#else + return v1^v2^v3; // slightly stronger since v0^v3 in above cancels out final round operation? I tweeted at the authors of SipHash about this but they didn't reply +#endif +} + +size_t stbds_hash_bytes(void *p, size_t len, size_t seed) +{ +#ifdef STBDS_SIPHASH_2_4 + return stbds_siphash_bytes(p,len,seed); +#else + unsigned char *d = (unsigned char *) p; + + if (len == 4) { + unsigned int hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); + #if 0 + // HASH32-A Bob Jenkin's hash function w/o large constants + hash ^= seed; + hash -= (hash<<6); + hash ^= (hash>>17); + hash -= (hash<<9); + hash ^= seed; + hash ^= (hash<<4); + hash -= (hash<<3); + hash ^= (hash<<10); + hash ^= (hash>>15); + #elif 1 + // HASH32-BB Bob Jenkin's presumably-accidental version of Thomas Wang hash with rotates turned into shifts. + // Note that converting these back to rotates makes it run a lot slower, presumably due to collisions, so I'm + // not really sure what's going on. + hash ^= seed; + hash = (hash ^ 61) ^ (hash >> 16); + hash = hash + (hash << 3); + hash = hash ^ (hash >> 4); + hash = hash * 0x27d4eb2d; + hash ^= seed; + hash = hash ^ (hash >> 15); + #else // HASH32-C - Murmur3 + hash ^= seed; + hash *= 0xcc9e2d51; + hash = (hash << 17) | (hash >> 15); + hash *= 0x1b873593; + hash ^= seed; + hash = (hash << 19) | (hash >> 13); + hash = hash*5 + 0xe6546b64; + hash ^= hash >> 16; + hash *= 0x85ebca6b; + hash ^= seed; + hash ^= hash >> 13; + hash *= 0xc2b2ae35; + hash ^= hash >> 16; + #endif + // Following statistics were measured on a Core i7-6700 @ 4.00Ghz, compiled with clang 7.0.1 -O2 + // Note that the larger tables have high variance as they were run fewer times + // HASH32-A // HASH32-BB // HASH32-C + // 0.10ms // 0.10ms // 0.10ms : 2,000 inserts creating 2K table + // 0.96ms // 0.95ms // 0.99ms : 20,000 inserts creating 20K table + // 14.69ms // 14.43ms // 14.97ms : 200,000 inserts creating 200K table + // 199.99ms // 195.36ms // 202.05ms : 2,000,000 inserts creating 2M table + // 2234.84ms // 2187.74ms // 2240.38ms : 20,000,000 inserts creating 20M table + // 55.68ms // 53.72ms // 57.31ms : 500,000 inserts & deletes in 2K table + // 63.43ms // 61.99ms // 65.73ms : 500,000 inserts & deletes in 20K table + // 80.04ms // 77.96ms // 81.83ms : 500,000 inserts & deletes in 200K table + // 100.42ms // 97.40ms // 102.39ms : 500,000 inserts & deletes in 2M table + // 119.71ms // 120.59ms // 121.63ms : 500,000 inserts & deletes in 20M table + // 185.28ms // 195.15ms // 187.74ms : 500,000 inserts & deletes in 200M table + // 15.58ms // 14.79ms // 15.52ms : 200,000 inserts creating 200K table with varying key spacing + + return (((size_t) hash << 16 << 16) | hash) ^ seed; + } else if (len == 8 && sizeof(size_t) == 8) { + size_t hash = d[0] | (d[1] << 8) | (d[2] << 16) | (d[3] << 24); + hash |= (size_t) (d[4] | (d[5] << 8) | (d[6] << 16) | (d[7] << 24)) << 16 << 16; // avoid warning if size_t == 4 + hash ^= seed; + hash = (~hash) + (hash << 21); + hash ^= STBDS_ROTATE_RIGHT(hash,24); + hash *= 265; + hash ^= STBDS_ROTATE_RIGHT(hash,14); + hash ^= seed; + hash *= 21; + hash ^= STBDS_ROTATE_RIGHT(hash,28); + hash += (hash << 31); + hash = (~hash) + (hash << 18); + return hash; + } else { + return stbds_siphash_bytes(p,len,seed); + } +#endif +} +#ifdef _MSC_VER +#pragma warning(pop) +#endif + + +static int stbds_is_key_equal(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode, size_t i) +{ + if (mode >= STBDS_HM_STRING) + return 0==strcmp((char *) key, * (char **) ((char *) a + elemsize*i + keyoffset)); + else + return 0==memcmp(key, (char *) a + elemsize*i + keyoffset, keysize); +} + +#define STBDS_HASH_TO_ARR(x,elemsize) ((char*) (x) - (elemsize)) +#define STBDS_ARR_TO_HASH(x,elemsize) ((char*) (x) + (elemsize)) + +#define stbds_hash_table(a) ((stbds_hash_index *) stbds_header(a)->hash_table) + +void stbds_hmfree_func(void *a, size_t elemsize) +{ + if (a == NULL) return; + if (stbds_hash_table(a) != NULL) { + if (stbds_hash_table(a)->string.mode == STBDS_SH_STRDUP) { + size_t i; + // skip 0th element, which is default + for (i=1; i < stbds_header(a)->length; ++i) + STBDS_FREE(NULL, *(char**) ((char *) a + elemsize*i)); + } + stbds_strreset(&stbds_hash_table(a)->string); + } + STBDS_FREE(NULL, stbds_header(a)->hash_table); + STBDS_FREE(NULL, stbds_header(a)); +} + +static ptrdiff_t stbds_hm_find_slot(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode) +{ + void *raw_a = STBDS_HASH_TO_ARR(a,elemsize); + stbds_hash_index *table = stbds_hash_table(raw_a); + size_t hash = mode >= STBDS_HM_STRING ? stbds_hash_string((char*)key,table->seed) : stbds_hash_bytes(key, keysize,table->seed); + size_t step = STBDS_BUCKET_LENGTH; + size_t limit,i; + size_t pos; + stbds_hash_bucket *bucket; + + if (hash < 2) hash += 2; // stored hash values are forbidden from being 0, so we can detect empty slots + + pos = stbds_probe_position(hash, table->slot_count, table->slot_count_log2); + + for (;;) { + STBDS_STATS(++stbds_hash_probes); + bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT]; + + // start searching from pos to end of bucket, this should help performance on small hash tables that fit in cache + for (i=pos & STBDS_BUCKET_MASK; i < STBDS_BUCKET_LENGTH; ++i) { + if (bucket->hash[i] == hash) { + if (stbds_is_key_equal(a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) { + return (pos & ~STBDS_BUCKET_MASK)+i; + } + } else if (bucket->hash[i] == STBDS_HASH_EMPTY) { + return -1; + } + } + + // search from beginning of bucket to pos + limit = pos & STBDS_BUCKET_MASK; + for (i = 0; i < limit; ++i) { + if (bucket->hash[i] == hash) { + if (stbds_is_key_equal(a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) { + return (pos & ~STBDS_BUCKET_MASK)+i; + } + } else if (bucket->hash[i] == STBDS_HASH_EMPTY) { + return -1; + } + } + + // quadratic probing + pos += step; + step += STBDS_BUCKET_LENGTH; + pos &= (table->slot_count-1); + } + /* NOTREACHED */ +} + +void * stbds_hmget_key_ts(void *a, size_t elemsize, void *key, size_t keysize, ptrdiff_t *temp, int mode) +{ + size_t keyoffset = 0; + if (a == NULL) { + // make it non-empty so we can return a temp + a = stbds_arrgrowf(0, elemsize, 0, 1); + stbds_header(a)->length += 1; + memset(a, 0, elemsize); + *temp = STBDS_INDEX_EMPTY; + // adjust a to point after the default element + return STBDS_ARR_TO_HASH(a,elemsize); + } else { + stbds_hash_index *table; + void *raw_a = STBDS_HASH_TO_ARR(a,elemsize); + // adjust a to point to the default element + table = (stbds_hash_index *) stbds_header(raw_a)->hash_table; + if (table == 0) { + *temp = -1; + } else { + ptrdiff_t slot = stbds_hm_find_slot(a, elemsize, key, keysize, keyoffset, mode); + if (slot < 0) { + *temp = STBDS_INDEX_EMPTY; + } else { + stbds_hash_bucket *b = &table->storage[slot >> STBDS_BUCKET_SHIFT]; + *temp = b->index[slot & STBDS_BUCKET_MASK]; + } + } + return a; + } +} + +void * stbds_hmget_key(void *a, size_t elemsize, void *key, size_t keysize, int mode) +{ + ptrdiff_t temp; + void *p = stbds_hmget_key_ts(a, elemsize, key, keysize, &temp, mode); + stbds_temp(STBDS_HASH_TO_ARR(p,elemsize)) = temp; + return p; +} + +void * stbds_hmput_default(void *a, size_t elemsize) +{ + // three cases: + // a is NULL <- allocate + // a has a hash table but no entries, because of shmode <- grow + // a has entries <- do nothing + if (a == NULL || stbds_header(STBDS_HASH_TO_ARR(a,elemsize))->length == 0) { + a = stbds_arrgrowf(a ? STBDS_HASH_TO_ARR(a,elemsize) : NULL, elemsize, 0, 1); + stbds_header(a)->length += 1; + memset(a, 0, elemsize); + a=STBDS_ARR_TO_HASH(a,elemsize); + } + return a; +} + +static char *stbds_strdup(char *str); + +void *stbds_hmput_key(void *a, size_t elemsize, void *key, size_t keysize, int mode) +{ + size_t keyoffset=0; + void *raw_a; + stbds_hash_index *table; + + if (a == NULL) { + a = stbds_arrgrowf(0, elemsize, 0, 1); + memset(a, 0, elemsize); + stbds_header(a)->length += 1; + // adjust a to point AFTER the default element + a = STBDS_ARR_TO_HASH(a,elemsize); + } + + // adjust a to point to the default element + raw_a = a; + a = STBDS_HASH_TO_ARR(a,elemsize); + + table = (stbds_hash_index *) stbds_header(a)->hash_table; + + if (table == NULL || table->used_count >= table->used_count_threshold) { + stbds_hash_index *nt; + size_t slot_count; + + slot_count = (table == NULL) ? STBDS_BUCKET_LENGTH : table->slot_count*2; + nt = stbds_make_hash_index(slot_count, table); + if (table) + STBDS_FREE(NULL, table); + else + nt->string.mode = mode >= STBDS_HM_STRING ? STBDS_SH_DEFAULT : 0; + stbds_header(a)->hash_table = table = nt; + STBDS_STATS(++stbds_hash_grow); + } + + // we iterate hash table explicitly because we want to track if we saw a tombstone + { + size_t hash = mode >= STBDS_HM_STRING ? stbds_hash_string((char*)key,table->seed) : stbds_hash_bytes(key, keysize,table->seed); + size_t step = STBDS_BUCKET_LENGTH; + size_t pos; + ptrdiff_t tombstone = -1; + stbds_hash_bucket *bucket; + + // stored hash values are forbidden from being 0, so we can detect empty slots to early out quickly + if (hash < 2) hash += 2; + + pos = stbds_probe_position(hash, table->slot_count, table->slot_count_log2); + + for (;;) { + size_t limit, i; + STBDS_STATS(++stbds_hash_probes); + bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT]; + + // start searching from pos to end of bucket + for (i=pos & STBDS_BUCKET_MASK; i < STBDS_BUCKET_LENGTH; ++i) { + if (bucket->hash[i] == hash) { + if (stbds_is_key_equal(raw_a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) { + stbds_temp(a) = bucket->index[i]; + if (mode >= STBDS_HM_STRING) + stbds_temp_key(a) = * (char **) ((char *) raw_a + elemsize*bucket->index[i] + keyoffset); + return STBDS_ARR_TO_HASH(a,elemsize); + } + } else if (bucket->hash[i] == 0) { + pos = (pos & ~STBDS_BUCKET_MASK) + i; + goto found_empty_slot; + } else if (tombstone < 0) { + if (bucket->index[i] == STBDS_INDEX_DELETED) + tombstone = (ptrdiff_t) ((pos & ~STBDS_BUCKET_MASK) + i); + } + } + + // search from beginning of bucket to pos + limit = pos & STBDS_BUCKET_MASK; + for (i = 0; i < limit; ++i) { + if (bucket->hash[i] == hash) { + if (stbds_is_key_equal(raw_a, elemsize, key, keysize, keyoffset, mode, bucket->index[i])) { + stbds_temp(a) = bucket->index[i]; + return STBDS_ARR_TO_HASH(a,elemsize); + } + } else if (bucket->hash[i] == 0) { + pos = (pos & ~STBDS_BUCKET_MASK) + i; + goto found_empty_slot; + } else if (tombstone < 0) { + if (bucket->index[i] == STBDS_INDEX_DELETED) + tombstone = (ptrdiff_t) ((pos & ~STBDS_BUCKET_MASK) + i); + } + } + + // quadratic probing + pos += step; + step += STBDS_BUCKET_LENGTH; + pos &= (table->slot_count-1); + } + found_empty_slot: + if (tombstone >= 0) { + pos = tombstone; + --table->tombstone_count; + } + ++table->used_count; + + { + ptrdiff_t i = (ptrdiff_t) stbds_arrlen(a); + // we want to do stbds_arraddn(1), but we can't use the macros since we don't have something of the right type + if ((size_t) i+1 > stbds_arrcap(a)) + *(void **) &a = stbds_arrgrowf(a, elemsize, 1, 0); + raw_a = STBDS_ARR_TO_HASH(a,elemsize); + + STBDS_ASSERT((size_t) i+1 <= stbds_arrcap(a)); + stbds_header(a)->length = i+1; + bucket = &table->storage[pos >> STBDS_BUCKET_SHIFT]; + bucket->hash[pos & STBDS_BUCKET_MASK] = hash; + bucket->index[pos & STBDS_BUCKET_MASK] = i-1; + stbds_temp(a) = i-1; + + switch (table->string.mode) { + case STBDS_SH_STRDUP: stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = stbds_strdup((char*) key); break; + case STBDS_SH_ARENA: stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = stbds_stralloc(&table->string, (char*)key); break; + case STBDS_SH_DEFAULT: stbds_temp_key(a) = *(char **) ((char *) a + elemsize*i) = (char *) key; break; + default: memcpy((char *) a + elemsize*i, key, keysize); break; + } + } + return STBDS_ARR_TO_HASH(a,elemsize); + } +} + +void * stbds_shmode_func(size_t elemsize, int mode) +{ + void *a = stbds_arrgrowf(0, elemsize, 0, 1); + stbds_hash_index *h; + memset(a, 0, elemsize); + stbds_header(a)->length = 1; + stbds_header(a)->hash_table = h = (stbds_hash_index *) stbds_make_hash_index(STBDS_BUCKET_LENGTH, NULL); + h->string.mode = (unsigned char) mode; + return STBDS_ARR_TO_HASH(a,elemsize); +} + +void * stbds_hmdel_key(void *a, size_t elemsize, void *key, size_t keysize, size_t keyoffset, int mode) +{ + if (a == NULL) { + return 0; + } else { + stbds_hash_index *table; + void *raw_a = STBDS_HASH_TO_ARR(a,elemsize); + table = (stbds_hash_index *) stbds_header(raw_a)->hash_table; + stbds_temp(raw_a) = 0; + if (table == 0) { + return a; + } else { + ptrdiff_t slot; + slot = stbds_hm_find_slot(a, elemsize, key, keysize, keyoffset, mode); + if (slot < 0) + return a; + else { + stbds_hash_bucket *b = &table->storage[slot >> STBDS_BUCKET_SHIFT]; + int i = slot & STBDS_BUCKET_MASK; + ptrdiff_t old_index = b->index[i]; + ptrdiff_t final_index = (ptrdiff_t) stbds_arrlen(raw_a)-1-1; // minus one for the raw_a vs a, and minus one for 'last' + STBDS_ASSERT(slot < (ptrdiff_t) table->slot_count); + --table->used_count; + ++table->tombstone_count; + stbds_temp(raw_a) = 1; + STBDS_ASSERT(table->used_count >= 0); + //STBDS_ASSERT(table->tombstone_count < table->slot_count/4); + b->hash[i] = STBDS_HASH_DELETED; + b->index[i] = STBDS_INDEX_DELETED; + + if (mode == STBDS_HM_STRING && table->string.mode == STBDS_SH_STRDUP) + STBDS_FREE(NULL, *(char**) ((char *) a+elemsize*old_index)); + + // if indices are the same, memcpy is a no-op, but back-pointer-fixup will fail, so skip + if (old_index != final_index) { + // swap delete + memmove((char*) a + elemsize*old_index, (char*) a + elemsize*final_index, elemsize); + + // now find the slot for the last element + if (mode == STBDS_HM_STRING) + slot = stbds_hm_find_slot(a, elemsize, *(char**) ((char *) a+elemsize*old_index + keyoffset), keysize, keyoffset, mode); + else + slot = stbds_hm_find_slot(a, elemsize, (char* ) a+elemsize*old_index + keyoffset, keysize, keyoffset, mode); + STBDS_ASSERT(slot >= 0); + b = &table->storage[slot >> STBDS_BUCKET_SHIFT]; + i = slot & STBDS_BUCKET_MASK; + STBDS_ASSERT(b->index[i] == final_index); + b->index[i] = old_index; + } + stbds_header(raw_a)->length -= 1; + + if (table->used_count < table->used_count_shrink_threshold && table->slot_count > STBDS_BUCKET_LENGTH) { + stbds_header(raw_a)->hash_table = stbds_make_hash_index(table->slot_count>>1, table); + STBDS_FREE(NULL, table); + STBDS_STATS(++stbds_hash_shrink); + } else if (table->tombstone_count > table->tombstone_count_threshold) { + stbds_header(raw_a)->hash_table = stbds_make_hash_index(table->slot_count , table); + STBDS_FREE(NULL, table); + STBDS_STATS(++stbds_hash_rebuild); + } + + return a; + } + } + } + /* NOTREACHED */ +} + +static char *stbds_strdup(char *str) +{ + // to keep replaceable allocator simple, we don't want to use strdup. + // rolling our own also avoids problem of strdup vs _strdup + size_t len = strlen(str)+1; + char *p = (char*) STBDS_REALLOC(NULL, 0, len); + memmove(p, str, len); + return p; +} + +#ifndef STBDS_STRING_ARENA_BLOCKSIZE_MIN +#define STBDS_STRING_ARENA_BLOCKSIZE_MIN 512u +#endif +#ifndef STBDS_STRING_ARENA_BLOCKSIZE_MAX +#define STBDS_STRING_ARENA_BLOCKSIZE_MAX (1u<<20) +#endif + +char *stbds_stralloc(stbds_string_arena *a, char *str) +{ + char *p; + size_t len = strlen(str)+1; + if (len > a->remaining) { + // compute the next blocksize + size_t blocksize = a->block; + + // size is 512, 512, 1024, 1024, 2048, 2048, 4096, 4096, etc., so that + // there are log(SIZE) allocations to free when we destroy the table + blocksize = (size_t) (STBDS_STRING_ARENA_BLOCKSIZE_MIN) << (blocksize>>1); + + // if size is under 1M, advance to next blocktype + if (blocksize < (size_t)(STBDS_STRING_ARENA_BLOCKSIZE_MAX)) + ++a->block; + + if (len > blocksize) { + // if string is larger than blocksize, then just allocate the full size. + // note that we still advance string_block so block size will continue + // increasing, so e.g. if somebody only calls this with 1000-long strings, + // eventually the arena will start doubling and handling those as well + stbds_string_block *sb = (stbds_string_block *) STBDS_REALLOC(NULL, 0, sizeof(*sb)-8 + len); + memmove(sb->storage, str, len); + if (a->storage) { + // insert it after the first element, so that we don't waste the space there + sb->next = a->storage->next; + a->storage->next = sb; + } else { + sb->next = 0; + a->storage = sb; + a->remaining = 0; // this is redundant, but good for clarity + } + return sb->storage; + } else { + stbds_string_block *sb = (stbds_string_block *) STBDS_REALLOC(NULL, 0, sizeof(*sb)-8 + blocksize); + sb->next = a->storage; + a->storage = sb; + a->remaining = blocksize; + } + } + + STBDS_ASSERT(len <= a->remaining); + p = a->storage->storage + a->remaining - len; + a->remaining -= len; + memmove(p, str, len); + return p; +} + +void stbds_strreset(stbds_string_arena *a) +{ + stbds_string_block *x,*y; + x = a->storage; + while (x) { + y = x->next; + STBDS_FREE(NULL, x); + x = y; + } + memset(a, 0, sizeof(*a)); +} + +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// UNIT TESTS +// + +#ifdef STBDS_UNIT_TESTS +#include +#ifdef STBDS_ASSERT_WAS_UNDEFINED +#undef STBDS_ASSERT +#endif +#ifndef STBDS_ASSERT +#define STBDS_ASSERT assert +#include +#endif + +typedef struct { int key,b,c,d; } stbds_struct; +typedef struct { int key[2],b,c,d; } stbds_struct2; + +static char buffer[256]; +char *strkey(int n) +{ +#if defined(_WIN32) && defined(__STDC_WANT_SECURE_LIB__) + sprintf_s(buffer, sizeof(buffer), "test_%d", n); +#else + sprintf(buffer, "test_%d", n); +#endif + return buffer; +} + +void stbds_unit_tests(void) +{ +#if defined(_MSC_VER) && _MSC_VER <= 1200 && defined(__cplusplus) + // VC6 C++ doesn't like the template<> trick on unnamed structures, so do nothing! + STBDS_ASSERT(0); +#else + const int testsize = 100000; + const int testsize2 = testsize/20; + int *arr=NULL; + struct { int key; int value; } *intmap = NULL; + struct { char *key; int value; } *strmap = NULL, s; + struct { stbds_struct key; int value; } *map = NULL; + stbds_struct *map2 = NULL; + stbds_struct2 *map3 = NULL; + stbds_string_arena sa = { 0 }; + int key3[2] = { 1,2 }; + ptrdiff_t temp; + + int i,j; + + STBDS_ASSERT(arrlen(arr)==0); + for (i=0; i < 20000; i += 50) { + for (j=0; j < i; ++j) + arrpush(arr,j); + arrfree(arr); + } + + for (i=0; i < 4; ++i) { + arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4); + arrdel(arr,i); + arrfree(arr); + arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4); + arrdelswap(arr,i); + arrfree(arr); + } + + for (i=0; i < 5; ++i) { + arrpush(arr,1); arrpush(arr,2); arrpush(arr,3); arrpush(arr,4); + stbds_arrins(arr,i,5); + STBDS_ASSERT(arr[i] == 5); + if (i < 4) + STBDS_ASSERT(arr[4] == 4); + arrfree(arr); + } + + i = 1; + STBDS_ASSERT(hmgeti(intmap,i) == -1); + hmdefault(intmap, -2); + STBDS_ASSERT(hmgeti(intmap, i) == -1); + STBDS_ASSERT(hmget (intmap, i) == -2); + for (i=0; i < testsize; i+=2) + hmput(intmap, i, i*5); + for (i=0; i < testsize; i+=1) { + if (i & 1) STBDS_ASSERT(hmget(intmap, i) == -2 ); + else STBDS_ASSERT(hmget(intmap, i) == i*5); + if (i & 1) STBDS_ASSERT(hmget_ts(intmap, i, temp) == -2 ); + else STBDS_ASSERT(hmget_ts(intmap, i, temp) == i*5); + } + for (i=0; i < testsize; i+=2) + hmput(intmap, i, i*3); + for (i=0; i < testsize; i+=1) + if (i & 1) STBDS_ASSERT(hmget(intmap, i) == -2 ); + else STBDS_ASSERT(hmget(intmap, i) == i*3); + for (i=2; i < testsize; i+=4) + hmdel(intmap, i); // delete half the entries + for (i=0; i < testsize; i+=1) + if (i & 3) STBDS_ASSERT(hmget(intmap, i) == -2 ); + else STBDS_ASSERT(hmget(intmap, i) == i*3); + for (i=0; i < testsize; i+=1) + hmdel(intmap, i); // delete the rest of the entries + for (i=0; i < testsize; i+=1) + STBDS_ASSERT(hmget(intmap, i) == -2 ); + hmfree(intmap); + for (i=0; i < testsize; i+=2) + hmput(intmap, i, i*3); + hmfree(intmap); + + #if defined(__clang__) || defined(__GNUC__) + #ifndef __cplusplus + intmap = NULL; + hmput(intmap, 15, 7); + hmput(intmap, 11, 3); + hmput(intmap, 9, 5); + STBDS_ASSERT(hmget(intmap, 9) == 5); + STBDS_ASSERT(hmget(intmap, 11) == 3); + STBDS_ASSERT(hmget(intmap, 15) == 7); + #endif + #endif + + for (i=0; i < testsize; ++i) + stralloc(&sa, strkey(i)); + strreset(&sa); + + { + s.key = "a", s.value = 1; + shputs(strmap, s); + STBDS_ASSERT(*strmap[0].key == 'a'); + STBDS_ASSERT(strmap[0].key == s.key); + STBDS_ASSERT(strmap[0].value == s.value); + shfree(strmap); + } + + { + s.key = "a", s.value = 1; + sh_new_strdup(strmap); + shputs(strmap, s); + STBDS_ASSERT(*strmap[0].key == 'a'); + STBDS_ASSERT(strmap[0].key != s.key); + STBDS_ASSERT(strmap[0].value == s.value); + shfree(strmap); + } + + { + s.key = "a", s.value = 1; + sh_new_arena(strmap); + shputs(strmap, s); + STBDS_ASSERT(*strmap[0].key == 'a'); + STBDS_ASSERT(strmap[0].key != s.key); + STBDS_ASSERT(strmap[0].value == s.value); + shfree(strmap); + } + + for (j=0; j < 2; ++j) { + STBDS_ASSERT(shgeti(strmap,"foo") == -1); + if (j == 0) + sh_new_strdup(strmap); + else + sh_new_arena(strmap); + STBDS_ASSERT(shgeti(strmap,"foo") == -1); + shdefault(strmap, -2); + STBDS_ASSERT(shgeti(strmap,"foo") == -1); + for (i=0; i < testsize; i+=2) + shput(strmap, strkey(i), i*3); + for (i=0; i < testsize; i+=1) + if (i & 1) STBDS_ASSERT(shget(strmap, strkey(i)) == -2 ); + else STBDS_ASSERT(shget(strmap, strkey(i)) == i*3); + for (i=2; i < testsize; i+=4) + shdel(strmap, strkey(i)); // delete half the entries + for (i=0; i < testsize; i+=1) + if (i & 3) STBDS_ASSERT(shget(strmap, strkey(i)) == -2 ); + else STBDS_ASSERT(shget(strmap, strkey(i)) == i*3); + for (i=0; i < testsize; i+=1) + shdel(strmap, strkey(i)); // delete the rest of the entries + for (i=0; i < testsize; i+=1) + STBDS_ASSERT(shget(strmap, strkey(i)) == -2 ); + shfree(strmap); + } + + { + struct { char *key; char value; } *hash = NULL; + char name[4] = "jen"; + shput(hash, "bob" , 'h'); + shput(hash, "sally" , 'e'); + shput(hash, "fred" , 'l'); + shput(hash, "jen" , 'x'); + shput(hash, "doug" , 'o'); + + shput(hash, name , 'l'); + shfree(hash); + } + + for (i=0; i < testsize; i += 2) { + stbds_struct s = { i,i*2,i*3,i*4 }; + hmput(map, s, i*5); + } + + for (i=0; i < testsize; i += 1) { + stbds_struct s = { i,i*2,i*3 ,i*4 }; + stbds_struct t = { i,i*2,i*3+1,i*4 }; + if (i & 1) STBDS_ASSERT(hmget(map, s) == 0); + else STBDS_ASSERT(hmget(map, s) == i*5); + if (i & 1) STBDS_ASSERT(hmget_ts(map, s, temp) == 0); + else STBDS_ASSERT(hmget_ts(map, s, temp) == i*5); + //STBDS_ASSERT(hmget(map, t.key) == 0); + } + + for (i=0; i < testsize; i += 2) { + stbds_struct s = { i,i*2,i*3,i*4 }; + hmputs(map2, s); + } + hmfree(map); + + for (i=0; i < testsize; i += 1) { + stbds_struct s = { i,i*2,i*3,i*4 }; + stbds_struct t = { i,i*2,i*3+1,i*4 }; + if (i & 1) STBDS_ASSERT(hmgets(map2, s.key).d == 0); + else STBDS_ASSERT(hmgets(map2, s.key).d == i*4); + //STBDS_ASSERT(hmgetp(map2, t.key) == 0); + } + hmfree(map2); + + for (i=0; i < testsize; i += 2) { + stbds_struct2 s = { { i,i*2 }, i*3,i*4, i*5 }; + hmputs(map3, s); + } + for (i=0; i < testsize; i += 1) { + stbds_struct2 s = { { i,i*2}, i*3, i*4, i*5 }; + stbds_struct2 t = { { i,i*2}, i*3+1, i*4, i*5 }; + if (i & 1) STBDS_ASSERT(hmgets(map3, s.key).d == 0); + else STBDS_ASSERT(hmgets(map3, s.key).d == i*5); + //STBDS_ASSERT(hmgetp(map3, t.key) == 0); + } +#endif +} +#endif + + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2019 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/test b/test new file mode 100755 index 0000000..1b89d35 Binary files /dev/null and b/test differ diff --git a/test.l b/test.l new file mode 100644 index 0000000..2763f97 --- /dev/null +++ b/test.l @@ -0,0 +1,20 @@ +extern i64 write(i32 fd, *u8 buf, u64 count); +extern void exit(i32 code); +extern *u8 malloc(usize size); + +i32 main() +{ + [u8] message = "Hello world!\n"; + *u8 message_heap = malloc(message.len); + [u8] new_message = message_heap[0..13]; + u32 i = 0; + + loop while i < message.len { + new_message[i] = message[i]; + i = i + 1; + } + + write(1, new_message.ptr, new_message.len); + + return 0; +} diff --git a/test.s b/test.s new file mode 100644 index 0000000..b86c66e --- /dev/null +++ b/test.s @@ -0,0 +1,90 @@ +.section .text +.global main +main: +push %rbp +mov %rsp, %rbp +sub $256, %rsp +movb $72, -32(%rbp) +movb $101, -31(%rbp) +movb $108, -30(%rbp) +movb $108, -29(%rbp) +movb $111, -28(%rbp) +movb $32, -27(%rbp) +movb $119, -26(%rbp) +movb $111, -25(%rbp) +movb $114, -24(%rbp) +movb $108, -23(%rbp) +movb $100, -22(%rbp) +movb $33, -21(%rbp) +movb $10, -20(%rbp) +lea -32(%rbp), %rax +mov %rax, -48(%rbp) +mov $14, %rax +mov %rax, -40(%rbp) +mov -40(%rbp), %rax +push %rax +pop %rdi +call malloc +mov %rax, -56(%rbp) +mov -56(%rbp), %rcx +mov $0, %rax +push %rax +mov $13, %rax +mov %rax, %rdx +pop %rax +mov %rdx, %r8 +sub %rax, %r8 +inc %r8 +add %rcx, %rax +mov %rax, -88(%rbp) +mov %r8, -80(%rbp) +lea -88(%rbp), %rax +mov (%rax), %rcx +mov 8(%rax), %rdx +mov %rcx, -72(%rbp) +mov %rdx, -64(%rbp) +mov $0, %rax +mov %rax, -96(%rbp) +.L0: +mov -96(%rbp), %rax +mov %rax, %rcx +mov -40(%rbp), %rax +cmp %rax, %rcx +setl %al +movzx %al, %rax +test %rax, %rax +jz .L1 +mov -72(%rbp), %rcx +mov -96(%rbp), %rax +add %rcx, %rax +push %rax +mov -48(%rbp), %rcx +mov -96(%rbp), %rax +add %rcx, %rax +movzbl (%rax), %eax +pop %rcx +mov %al, (%rcx) +mov -96(%rbp), %rax +mov %rax, %rcx +mov $1, %rax +add %rcx, %rax +mov %rax, -96(%rbp) +jmp .L0 +.L1: +mov $1, %rax +push %rax +mov -72(%rbp), %rax +push %rax +mov -64(%rbp), %rax +push %rax +pop %rdx +pop %rsi +pop %rdi +call write +mov $0, %rax +mov %rbp, %rsp +pop %rbp +ret +mov %rbp, %rsp +pop %rbp +ret diff --git a/utils.c b/utils.c new file mode 100644 index 0000000..c6f0781 --- /dev/null +++ b/utils.c @@ -0,0 +1,152 @@ +#include "utils.h" +#include +#include +#include +#include + +i64 parse_int(char *s, usize len) +{ + bool negative = false; + if (*s == '-') { + s += 1; + len -= 1; + negative = true; + } + + u64 int_part = 0; + for (usize i=0; i < len; i++) { + int_part = (int_part * 10) + (s[i] - '0'); + } + + if (negative) { + int_part *= -1; + } + + return int_part; +} + +f64 parse_float(char *s, usize len) +{ + bool negative = false; + if (*s == '-') { + s += 1; + len -= 1; + negative = true; + } + + usize point_pos = 0; + for (usize i=0; i < len; i++) { + if (s[i] == '.') { + point_pos = i; + break; + } + } + + i64 int_part = parse_int(s, point_pos); + i64 dec_part = parse_int(s+point_pos+1, len-point_pos-1); + for (usize i=0; i < len-point_pos-1; i++) { + int_part *= 10; + } + + int_part += dec_part; + + f64 f = (f64) int_part; + + point_pos += 1; + + for (usize i=0; i < len - point_pos; i++) { + f /= 10.0; + } + + if (negative) { + f *= -1; + } + + return f; +} + + +void trie_insert(trie_node *root, arena *a, char *key, uint16_t value) +{ + trie_node *node = root; + while (*key) { + if (!node->children[(usize)*key]) { + node->children[(usize)*key] = arena_alloc(a, sizeof(trie_node)); + memset(node->children[(usize)*key], 0x0, sizeof(trie_node)); + } + node = node->children[(usize)*key]; + + key++; + } + + node->value = value; +} + +uint16_t trie_get(trie_node *root, char *key, usize len) +{ + trie_node *node = root; + for (usize i=0; i < len; i++) { + if (!node->children[(usize)(key[i])]) { + return 0; + } + node = node->children[(usize)(key[i])]; + } + + return node->value; +} + +#ifndef DEFAULT_ALIGNMENT +#define DEFAULT_ALIGNMENT (2 * sizeof(void *)) +#endif + +static usize align_forward(usize ptr, usize align) { + uintptr_t p = ptr; + uintptr_t a = (uintptr_t)align; + uintptr_t modulo = p & (a - 1); + + if (modulo != 0) { + p += a - modulo; + } + return (usize)p; +} + +arena arena_init(usize size) +{ + void *memory = malloc(size); + memset(memory, 0x0, size); + return (arena){ + .capacity = size, + .position = 0, + .memory = memory, + }; +} + +void *arena_alloc(arena *a, usize size) { + uintptr_t current_addr = (uintptr_t)a->memory + a->position; + uintptr_t padding = align_forward(current_addr, DEFAULT_ALIGNMENT) - current_addr; + if (a->position + padding + size > a->capacity) return NULL; + void *ret = (unsigned char *)a->memory + a->position + padding; + a->position += (size + padding); + + return ret; +} + +snapshot arena_snapshot(arena *a) +{ + return a->position; +} + +void arena_reset_to_snapshot(arena *a, snapshot s) +{ + a->position = s; +} + +void arena_reset(arena *a) +{ + arena_reset_to_snapshot(a, 0); +} + +void arena_deinit(arena a) +{ + free(a.memory); +} diff --git a/utils.h b/utils.h new file mode 100644 index 0000000..edc5c55 --- /dev/null +++ b/utils.h @@ -0,0 +1,64 @@ +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +typedef int8_t i8; +typedef int16_t i16; +typedef int32_t i32; +typedef int64_t i64; + +typedef size_t usize; + +typedef float f32; +typedef double f64; + +i64 parse_int(char *s, usize len); +f64 parse_float(char *s, usize len); + +typedef struct { + usize capacity; + usize position; + void* memory; +} arena; + +typedef usize snapshot; + +/* + * NOTE(ernesto): faulty initialization is signalided by the arena.memory + * being null. It is the responsability of the caller to check for fulty + * initialization. + */ +arena arena_init(usize size); +/* + * Returns null on unsuccessfull allocation. + * In this implemention an allocation is only unsuccessfull if the arena + * does not have enough memory to allocate the requested space + */ +void *arena_alloc(arena *a, usize size); +snapshot arena_snapshot(arena *a); +void arena_reset_to_snapshot(arena *a, snapshot s); +void arena_reset(arena *a); +/* This call should never fail, also, do we even care if it does? */ +void arena_deinit(arena a); + +typedef struct _trie_node { + uint16_t value; + struct _trie_node *children[256]; +} trie_node; + +void trie_insert(trie_node *root, arena *a, char *key, uint16_t value); +uint16_t trie_get(trie_node *root, char *key, usize len); + +typedef struct { + usize row, column; +} source_pos; + +#endif