implemented basic parsing

2025-11-30 21:56:39 +01:00 · 2025-11-30 21:56:39 +01:00 · abf1d7c066
commit abf1d7c066
parent 038930d881
10 changed files with 588 additions and 486 deletions
--- a/4
+++ b/4
@ -3,8 +3,8 @@
 include config.mk
-SRC = cc.c utils.c lexer.c
+SRC = cc.c utils.c lexer.c parser.c
-HDR = config.def.h utils.h
+HDR = config.def.h utils.h lexer.h parser.h sema.h
 OBJ = ${SRC:.c=.o}
 all: options cc
--- a/cc.c
+++ b/cc.c
@ -2,6 +2,121 @@
 #include <stdlib.h>
 #include "utils.h"
 #include "lexer.h"
 #include "parser.h"
 // Helper to print indentation
 void print_indent(int depth) {
    for (int i = 0; i < depth; i++) printf("  ");
 }
 // Helper to convert Binary Op enum to string
 const char* get_op_str(binary_op op) {
 	switch(op) {
 		case OP_PLUS: return "+";
 		case OP_MINUS: return "-";
 		case OP_DIV: return "/";
 		case OP_MUL: return "*";
 		case OP_EQ: return "==";
 		case OP_ASSIGN: return "=";
 		case OP_AND: return "&&";
 		case OP_OR: return "||";
 		case OP_NEQ: return "!=";
 		case OP_GT: return ">";
 		case OP_LT: return "<";
 		case OP_GE: return ">=";
 		case OP_LE: return "<=";
 		case OP_BOR: return "|";
 		case OP_BAND: return "&";
 		case OP_BXOR: return "^";
 		case OP_MOD: return "%";
 		case OP_PLUS_EQ: return "+=";
 		case OP_MINUS_EQ: return "-=";
 		case OP_DIV_EQ: return "/=";
 		case OP_MUL_EQ: return "*=";
 		default: return "?";
 	}
 }
 const char *get_uop_str(unary_op op) {
 	switch (op) {
 		case UOP_INCR: return "++";
 		case UOP_MINUS: return "-";
 		case UOP_DECR: return "--";
 		case UOP_DEREF: return "*";
 		case UOP_REF: return "&";
 		case UOP_NOT: return "!";
 		default: return "?";
 	}
 }
 void print_ast(ast_node *node, int depth) {
 	if (!node) return;
 	print_indent(depth);
 	switch (node->type) {
 		case NODE_INTEGER:
 			printf("Integer: %lu\n", node->expr.integer);
 			break;
 		case NODE_FLOAT:
 			printf("Float: %f\n", node->expr.flt);
 			break;
 		case NODE_CHAR:
 			printf("Char: '%c'\n", node->expr.ch);
 			break;
 		case NODE_STRING:
 			printf("String: \"%.*s\"\n", (int)node->expr.string.len, node->expr.string.start);
 			break;
 		case NODE_IDENTIFIER:
 			printf("Identifier: %.*s\n", (int)node->expr.string.len, node->expr.string.start);
 			break;
 		case NODE_BINARY:
 			printf("BinaryOp (%s)\n", get_op_str(node->expr.binary.operator));
 			print_ast(node->expr.binary.left, depth + 1);
 			print_ast(node->expr.binary.right, depth + 1);
 			break;
 		case NODE_UNARY:
 			printf("UnaryOp (%s)\n", get_uop_str(node->expr.unary.operator));
 			print_ast(node->expr.unary.right, depth + 1);
 			break;
 		case NODE_TERNARY:
 			printf("Ternary (? :)\n");
 			print_indent(depth + 1); printf("Condition:\n");
 			print_ast(node->expr.ternary.condition, depth + 2);
 			print_indent(depth + 1); printf("Then:\n");
 			print_ast(node->expr.ternary.then, depth + 2);
 			print_indent(depth + 1); printf("Else:\n");
 			print_ast(node->expr.ternary.otherwise, depth + 2);
 			break;
 		case NODE_UNIT:
 		case NODE_COMPOUND: 
 			printf("Unit/Block:\n");
 			ast_node *current = node;
 			while (current && (current->type == NODE_UNIT || current->type == NODE_COMPOUND)) {
 				print_ast(current->expr.unit_node.expr, depth + 1);
 				current = current->expr.unit_node.next;
 			}
 			break;
 		case NODE_IF:
 			printf("IfStmt (Fields missing in struct)\n");
 			break;
 		case NODE_WHILE:
 			printf("WhileStmt (Fields missing in struct)\n");
 			break;
 		case NODE_VAR_DECL:
 			printf("VarDecl (Fields missing in struct)\n");
 			break;
 		case NODE_FUNCTION_DEF:
 			printf("FunctionDef (Fields missing in struct)\n");
 			break;
 		case NODE_RETURN:
 			printf("Return (Fields missing in struct)\n");
 			break;
 		default:
 			printf("Unknown Node Type: %d\n", node->type);
 			break;
 	}
 }
 int main(void)
 {
@ -17,6 +132,8 @@ int main(void)
 	arena a = arena_init(0x1000 * 0x1000 * 64);
 	lexer *l = lexer_init(src, size, &a);
 	parser *p = parser_init(l, &a);
 	print_ast(p->ast, 0);
 	arena_deinit(a);
--- a/lexer.c
+++ b/lexer.c
@ -381,8 +381,8 @@ static void parse(lexer *l)
 lexer *lexer_init(char *source, usize size, arena *arena)
 {
 	lexer *lex = arena_alloc(arena, sizeof(lexer));
-	lex->column = 0;
+	lex->column = 1;
-	lex->row = 0;
+	lex->row = 1;
 	lex->index = 0;
 	lex->size = size;
 	lex->tokens = 0;
@ -403,6 +403,7 @@ lexer *lexer_init(char *source, usize size, arena *arena)
 	trie_insert(keywords, lex->allocator, "case", TOKEN_CASE);
 	trie_insert(keywords, lex->allocator, "do", TOKEN_DO);
 	trie_insert(keywords, lex->allocator, "defer", TOKEN_DEFER);
 	trie_insert(keywords, lex->allocator, "return", TOKEN_RETURN);
 	trie_insert(keywords, lex->allocator, "module", TOKEN_MODULE);
 	trie_insert(keywords, lex->allocator, "static", TOKEN_STATIC);
 	trie_insert(keywords, lex->allocator, "const", TOKEN_CONST);
--- a/lexer.h
+++ b/lexer.h
@ -67,6 +67,7 @@ typedef enum {
 	TOKEN_DO,
 	TOKEN_DEFER,
 	TOKEN_MODULE,
 	TOKEN_RETURN,
 	TOKEN_STATIC,
 	TOKEN_CONST,
--- a/parser.c
+++ b/parser.c
@ -0,0 +1,231 @@
 #include "parser.h"
 #include <stdbool.h>
 #include <stdio.h>
 ast_node *parse_expression(parser *p);
 static void advance(parser *p)
 {
 	p->previous = p->tokens;
 	if (p->tokens)
 		p->tokens = p->tokens->next;
 }
 static token *peek(parser *p)
 {
 	return p->tokens;
 }
 static bool match_peek(parser *p, token_type type)
 {
 	if (p->tokens) {
 		return p->tokens->type == type;
 	} else {
 		return false;
 	}
 }
 static bool match(parser *p, token_type type)
 {
 	if (p->tokens) {
 		if (p->tokens->type == type) {
 			advance(p);
 			return true;
 		}
 	}
 	return false;
 }
 static void parser_sync(parser *p)
 {
 	advance(p);
 	while (p->tokens) {
 		if (p->previous->type == TOKEN_SEMICOLON || p->previous->type == TOKEN_RCURLY) {
 			return;
 		}
 		switch (p->tokens->type) {
 			case TOKEN_STRUCT:
 			case TOKEN_ENUM:
 			case TOKEN_IF:
 			case TOKEN_WHILE:
 			case TOKEN_FOR:
 			case TOKEN_DO:
 			case TOKEN_RETURN:
 			case TOKEN_SWITCH:
 				return;
 			default:
 				advance(p);
 		}
 	}
 }
 static void error(parser *p, char *msg)
 {
 	printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:%ld:%ld:\x1b[0m %s\n", p->previous->position.row, p->previous->position.column, msg);
 	parser_sync(p);
 }
 static ast_node *parse_factor(parser *p)
 {
 	token *t = peek(p);
 	if (match(p, TOKEN_INTEGER)) {
 		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
 		node->type = NODE_INTEGER;
 		node->expr.integer = parse_int(t->lexeme, t->lexeme_len);
 		return node;
 	} else if (match(p, TOKEN_FLOAT)) {
 		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
 		node->type = NODE_FLOAT;
 		node->expr.flt = parse_float(t->lexeme, t->lexeme_len);
 		return node;
 	} else if (match(p, TOKEN_IDENTIFIER)) {
 		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
 		node->type = NODE_IDENTIFIER;
 		node->expr.string.start = t->lexeme;
 		node->expr.string.len = t->lexeme_len;
 		return node;
 	} else if (match(p, TOKEN_STRING)) {
 		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
 		node->type = NODE_STRING;
 		node->expr.string.start = t->lexeme;
 		node->expr.string.len = t->lexeme_len;
 		return node;
 	} else if (match(p, TOKEN_CHAR)) {
 		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
 		node->type = NODE_CHAR;
 		if (t->lexeme_len == 2) {
 			char c;
 			switch (t->lexeme[1]) {
 				case 'n': c = '\n'; break;
 				case 't': c = '\t'; break;
 				case 'r': c = '\r'; break;
 				case '0': c = '\0'; break;
 				case '\\': c = '\\'; break;
 				case '\'': c = '\''; break;
 				default:
 					error(p, "invalid escape code.");
 					return NULL;
 			}
 			node->expr.ch = c;
 		} else {
 			node->expr.ch = *(t->lexeme);
 		}
 		return node;
 	} else if (match(p, TOKEN_LPAREN)) {
 		ast_node *node = parse_expression(p);
 		if (!match(p, TOKEN_RPAREN)) {
 			error(p, "unclosed parenthesis");
 			return NULL;
 		}
 		return node;
 	}
 	return NULL;
 }
 ast_node *parse_unary(parser *p)
 {
 	if (match(p, TOKEN_PLUS_PLUS) || match(p, TOKEN_MINUS) || match(p, TOKEN_MINUS_MINUS) || match(p, TOKEN_STAR) || match(p, TOKEN_AND) || match(p, TOKEN_BANG)) {
 		unary_op op;
 		switch (p->previous->type) {
 			case TOKEN_PLUS_PLUS:
 				op = UOP_INCR;
 				break;
 			case TOKEN_MINUS:
 				op = UOP_MINUS;
 				break;
 			case TOKEN_MINUS_MINUS:
 				op = UOP_DECR;
 				break;
 			case TOKEN_STAR:
 				op = UOP_DEREF;
 				break;
 			case TOKEN_AND:
 				op = UOP_REF;
 				break;
 			case TOKEN_BANG:
 				op = UOP_NOT;
 				break;
 			default:
 				goto end;
 		}
 		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
 		node->type = NODE_UNARY;
 		node->expr.unary.operator = op;
 		node->expr.unary.right = parse_expression(p);
 		return node;
 	}
 end:
 	return parse_factor(p);
 }
 ast_node *parse_term(parser *p)
 {
 	ast_node *left = parse_unary(p);
 	while (match_peek(p, TOKEN_STAR) || match_peek(p, TOKEN_SLASH)) {
 		binary_op op = peek(p)->type == TOKEN_STAR ? OP_MUL : OP_DIV;
 		advance(p);
 		ast_node *right = parse_factor(p);
 		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
 		node->type = NODE_BINARY;
 		node->expr.binary.left = left;
 		node->expr.binary.right = right;
 		node->expr.binary.operator = op;
 		left = node;
 	}
 	return left;
 }
 ast_node *parse_expression(parser *p)
 {
 	ast_node *left = parse_term(p);
 	while (match_peek(p, TOKEN_PLUS) || match_peek(p, TOKEN_MINUS)) {
 		binary_op op = peek(p)->type == TOKEN_PLUS ? OP_PLUS : OP_MINUS;
 		advance(p);
 		ast_node *right = parse_term(p);
 		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
 		node->type = NODE_BINARY;
 		node->expr.binary.left = left;
 		node->expr.binary.right = right;
 		node->expr.binary.operator = op;
 		left = node;
 	}
 	return left;
 }
 static void parse(parser *p)
 {
 	p->ast = arena_alloc(p->allocator, sizeof(ast_node));
 	p->ast->type = NODE_UNIT;
 	p->ast->expr.unit_node.expr = parse_expression(p);
 	ast_node *tail = p->ast;
 	ast_node *expr = parse_expression(p);
 	while (expr) {
 		tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node));
 		tail->expr.unit_node.next->expr.unit_node.expr = expr;
 		tail = tail->expr.unit_node.next;
 		tail->type = NODE_UNIT;
 		expr = parse_expression(p);
 	}
 }
 parser *parser_init(lexer *l, arena *allocator)
 {
 	parser *p = arena_alloc(allocator, sizeof(parser));
 	p->tokens = l->tokens;
 	p->allocator= allocator;
 	parse(p);
 	return p;
 }
--- a/parser.h
+++ b/parser.h
@ -0,0 +1,145 @@
 #ifndef PARSER_H
 #define PARSER_H
 #include "lexer.h"
 #include "utils.h"
 typedef enum {
 	OP_PLUS, // +
 	OP_MINUS, // -
 	OP_DIV, // /
 	OP_MUL, // *
 	OP_EQ, // ==
 	OP_ASSIGN, // =
 	OP_AND, // &&
 	OP_OR, // ||
 	OP_NEQ, // !=
 	OP_GT, // >
 	OP_LT, // <
 	OP_GE, // >=
 	OP_LE, // <=
 	OP_BOR, // |
 	OP_BAND, // &
 	OP_BXOR, // ^
 	OP_MOD, // %
 	OP_PLUS_EQ, // +=
 	OP_MINUS_EQ, // -=
 	OP_DIV_EQ, // /=
 	OP_MUL_EQ, // *=
 	OP_BOR_EQ, // |=
 	OP_BAND_EQ, // &=
 	OP_BXOR_EQ, // ^=
 	OP_MOD_EQ, // %=
 } binary_op;
 typedef enum {
 	UOP_INCR, // ++
 	UOP_MINUS, // -
 	UOP_DECR, // --
 	UOP_DEREF, // *
 	UOP_REF, // &
 	UOP_NOT, // !
 } unary_op;
 typedef enum {
 	LAYOUT_AUTO,
 	LAYOUT_PACKED,
 	LAYOUT_EXTERN
 } struct_layout;
 typedef struct {
 	char *type_name;
 	usize type_len;
 	char *name;
 	usize name_len;
 	usize offset;
 } member;
 typedef struct {
 	char *name;
 	usize name_len;
 	member *params;
 } function_decl;
 typedef enum {
 	NODE_IDENTIFIER,
 	NODE_INTEGER,
 	NODE_FLOAT,
 	NODE_STRING,
 	NODE_CHAR,
 	NODE_TERNARY,
 	NODE_CAST,
 	NODE_ARRAY_SUBSCRIPT,
 	NODE_ACCESS,
 	NODE_ACCESS_PTR,
 	NODE_CALL,
 	NODE_POSTFIX,
 	NODE_UNARY,
 	NODE_BINARY,
 	NODE_GOTO,
 	NODE_BREAK,
 	NODE_CASE,
 	NODE_SWITCH,
 	NODE_FOR,
 	NODE_DO,
 	NODE_WHILE,
 	NODE_IF,
 	NODE_RETURN,
 	NODE_COMPOUND,
 	NODE_TYPEDEF,
 	NODE_ENUM,
 	NODE_STRUCT,
 	NODE_UNION,
 	NODE_VAR_DECL,
 	NODE_FUNCTION_DEF,
 	NODE_FUNCTION_DECL,
 	NODE_UNIT,
 	NODE_AS,
 } node_type;
 typedef struct _ast_node {
 	node_type type;
 	union {
 		struct {
 			struct _ast_node *left;
 			struct _ast_node *right;
 			binary_op operator;
 		} binary;
 		struct {
 			struct _ast_node *right;
 			unary_op operator;
 		} unary;
 		u64 integer;
 		f64 flt; // float
 		struct {
 			char *start;
 			usize len;
 		} string;
 		char ch; // char;
 		struct {
 			struct _ast_node *condition;
 			struct _ast_node *then;
 			struct _ast_node *otherwise;
 		} ternary;
 		struct {
 			struct _ast_node *value;
 			char *type;
 			usize type_len;
 		} cast;
 		struct {
 			struct _ast_node *expr;
 			struct _ast_node *next;
 		} unit_node;
 	} expr;
 } ast_node;
 typedef struct {
 	token *tokens;
 	token *previous;
 	ast_node *ast;
 	arena *allocator;
 } parser;
 parser *parser_init(lexer *l, arena *allocator);
 #endif
--- a/sema.h
+++ b/sema.h
@ -0,0 +1,54 @@
 #ifndef SEMA_H
 #define SEMA_H
 typedef enum {
 	TYPE_VOID,
 	TYPE_PTR,
 	TYPE_I8,
 	TYPE_I16,
 	TYPE_I32,
 	TYPE_I64,
 	TYPE_U8,
 	TYPE_U16,
 	TYPE_U32,
 	TYPE_U64,
 	TYPE_STRUCT,
 	TYPE_UNION,
 	TYPE_ENUM,
 } type_tag;
 typedef struct _type {
 	type_tag tag;
 	union {
 		u8 integer;
 		u8 flt; // float
 		struct {
 			bool is_const;
 			bool is_volatile;
 			u16 alignment;
 			struct _type child;
 		} ptr;
 		struct {
 			usize len;
 			struct _type child;
 		} array;
 		struct {
 			struct_layout layout;
 			char *name;
 			usize name_len;
 			usize alignment;
 			member *members;
 			function_decl *decls;
 		} structure;
 		struct {
 			struct_layout layout;
 			char *name;
 			usize name_len;
 			usize alignment;
 			member *members;
 			function_decl *decls;
 		} enum;
 	} data;
 } type;
 #endif
--- a/test.c
+++ b/test.c
@ -1,482 +1 @@
-#include "lexer.h"
+hello+3-(--ciao)
 #include <stdbool.h>
 #include <ctype.h>
 #include <stdio.h>
 #include <string.h>
 static const char *token_type_str[] = {
 	[TOKEN_ERROR]        = "TOKEN_ERROR",
 	[TOKEN_END]          = "TOKEN_END",
 	[TOKEN_PLUS]         = "TOKEN_PLUS",
 	[TOKEN_PLUS_PLUS]    = "TOKEN_PLUS_PLUS",
 	[TOKEN_MINUS]        = "TOKEN_MINUS",
 	[TOKEN_MINUS_MINUS]  = "TOKEN_MINUS_MINUS",
 	[TOKEN_SLASH]        = "TOKEN_SLASH",
 	[TOKEN_PERC]         = "TOKEN_PERC",
 	[TOKEN_STAR]         = "TOKEN_STAR",
 	[TOKEN_AND]          = "TOKEN_AND",
 	[TOKEN_HAT]          = "TOKEN_HAT",
 	[TOKEN_PIPE]         = "TOKEN_PIPE",
 	[TOKEN_EQ]           = "TOKEN_EQ",
 	[TOKEN_ARROW]	     = "TOKEN_ARROW",
 	[TOKEN_LSHIFT]       = "TOKEN_LSHIFT",
 	[TOKEN_RSHIFT]       = "TOKEN_RSHIFT",
 	[TOKEN_DOUBLE_EQ]    = "TOKEN_DOUBLE_EQ",
 	[TOKEN_LESS_THAN]    = "TOKEN_LESS_THAN",
 	[TOKEN_GREATER_THAN] = "TOKEN_GREATER_THAN",
 	[TOKEN_LESS_EQ]      = "TOKEN_LESS_EQ",
 	[TOKEN_GREATER_EQ]   = "TOKEN_GREATER_EQ",
 	[TOKEN_NOT_EQ]       = "TOKEN_NOT_EQ",
 	[TOKEN_PLUS_EQ]      = "TOKEN_PLUS_EQ",
 	[TOKEN_MINUS_EQ]     = "TOKEN_MINUS_EQ",
 	[TOKEN_STAR_EQ]      = "TOKEN_STAR_EQ",
 	[TOKEN_SLASH_EQ]     = "TOKEN_SLASH_EQ",
 	[TOKEN_AND_EQ]       = "TOKEN_AND_EQ",
 	[TOKEN_HAT_EQ]       = "TOKEN_HAT_EQ",
 	[TOKEN_PIPE_EQ]      = "TOKEN_PIPE_EQ",
 	[TOKEN_PERC_EQ]      = "TOKEN_PERC_EQ",
 	[TOKEN_LSHIFT_EQ]    = "TOKEN_LSHIFT_EQ",
 	[TOKEN_RSHIFT_EQ]    = "TOKEN_RSHIFT_EQ",
 	[TOKEN_OR]           = "TOKEN_OR",
 	[TOKEN_DOUBLE_AND]   = "TOKEN_DOUBLE_AND",
 	[TOKEN_COLON]        = "TOKEN_COLON",
 	[TOKEN_SEMICOLON]    = "TOKEN_SEMICOLON",
 	[TOKEN_DOT]          = "TOKEN_DOT",
 	[TOKEN_BANG]         = "TOKEN_BANG",
 	[TOKEN_COMMA]        = "TOKEN_COMMA",
 	[TOKEN_LPAREN]       = "TOKEN_LPAREN",
 	[TOKEN_RPAREN]       = "TOKEN_RPAREN",
 	[TOKEN_LSQUARE]      = "TOKEN_LSQUARE",
 	[TOKEN_RSQUARE]      = "TOKEN_RSQUARE",
 	[TOKEN_LCURLY]       = "TOKEN_LCURLY",
 	[TOKEN_RCURLY]       = "TOKEN_RCURLY",
 	[TOKEN_INTEGER]      = "TOKEN_INTEGER",
 	[TOKEN_FLOAT]        = "TOKEN_FLOAT",
 	[TOKEN_IDENTIFIER]   = "TOKEN_IDENTIFIER",
 	[TOKEN_STRING]       = "TOKEN_STRING",
 	[TOKEN_CHAR]         = "TOKEN_CHAR",
 	[TOKEN_WHILE]        = "TOKEN_WHILE",
 	[TOKEN_FOR]          = "TOKEN_FOR",
 	[TOKEN_GOTO]         = "TOKEN_GOTO",
 	[TOKEN_IF]           = "TOKEN_IF",
 	[TOKEN_ELSE]         = "TOKEN_ELSE",
 	[TOKEN_SWITCH]       = "TOKEN_SWITCH",
 	[TOKEN_CASE]         = "TOKEN_CASE",
 	[TOKEN_DO]           = "TOKEN_DO",
 	[TOKEN_DEFER]        = "TOKEN_DEFER",
 	[TOKEN_MODULE]       = "TOKEN_MODULE",
 	[TOKEN_STATIC]       = "TOKEN_STATIC",
 	[TOKEN_CONST]        = "TOKEN_CONST",
 	[TOKEN_EXTERN]       = "TOKEN_EXTERN",
 	[TOKEN_VOLATILE]     = "TOKEN_VOLATILE",
 };
 trie_node *keywords;
 void lexer_print_token(token *t)
 {
 	printf("%s: ", token_type_str[t->type]);
 	for (usize i=0; i < t->lexeme_len; i++) {
 		printf("%c", t->lexeme[i]);
 	}
 }
 static void add_token(lexer *l, token_type type, usize len)
 {
 	token *t = arena_alloc(l->allocator, sizeof(token));
 	t->type = type;
 	t->lexeme_len = len;
 	t->lexeme = l->source + l->index;
 	t->position.row = l->row;
 	t->position.column = l->column;
 	if (!l->tokens) {
 		l->tokens = t;
 		l->tail = t;
 	} else {
 		l->tail->next = t;
 		l->tail = t;
 	}
 }
 static void add_error(lexer *l, char *msg)
 {
 	token *t = arena_alloc(l->allocator, sizeof(token));
 	t->type = TOKEN_ERROR;
 	t->lexeme_len = strlen(msg);
 	t->lexeme = msg;
 	t->position.row = l->row;
 	t->position.column = l->column;
 	if (!l->tokens) {
 		l->tokens = t;
 		l->tail = t;
 	} else {
 		l->tail->next = t;
 		l->tail = t;
 	}
 }
 static void parse_number(lexer *l)
 {
 	char c = l->source[l->index];
 	/* Is the number a float? */
 	bool f = false;
 	usize len = 0;
 	while (isdigit(c)) {
 		/* If a dot is found, and the character after it is a digit, this is a float. */
 		if (l->source[l->index+1] == '.' && isdigit(l->source[l->index+2])) {
 			f = true;
 			len += 3;
 			l->index += 3;
 		} else {
 			len += 1;
 			l->index += 1;
 		}
 		c = l->source[l->index];
 	}
 	l->index -= len;
 	if (f) {
 		add_token(l, TOKEN_FLOAT, len);
 	} else {
 		add_token(l, TOKEN_INTEGER, len);
 	}
 	l->index += len;
 }
 static void parse_identifier(lexer *l)
 {
 	char c = l->source[l->index];
 	usize len = 0;
 	while (isalnum(c) || c == '_') {
 		len += 1;
 		l->index += 1;
 		c = l->source[l->index];
 	}
 	l->index -= len;
 	token_type keyword = trie_get(keywords, l->source + l->index, len);
 	if (keyword) {
 		add_token(l, keyword, len);
 	} else {
 		add_token(l, TOKEN_IDENTIFIER, len);
 	}
 	l->index += len;
 }
 static void parse_string(lexer *l)
 {
 	char c = l->source[l->index];
 	usize len = 0;
 	while (c != '"') {
 		if (c == '\0' || c == '\n') {
 			printf("%c", c);
 			l->index -= len;
 			add_error(l, "unclosed string literal.");
 			l->index += len;
 			return;
 		}
 		len += 1;
 		l->index += 1;
 		c = l->source[l->index];
 	}
 	l->index -= len;
 	add_token(l, TOKEN_STRING, len);
 	l->index += len + 1;
 }
 static bool parse_special(lexer *l)
 {
 	switch (l->source[l->index]) {
 	case '+':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_PLUS_EQ, 2);
 			l->index += 2;
 		} else if (l->source[l->index+1] == '+') {
 			add_token(l, TOKEN_PLUS_PLUS, 2);
 			l->index += 2;
 		} else {
 			add_token(l, TOKEN_PLUS, 1);
 			l->index += 1;
 		}
 		return true;
 	case '-':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_MINUS_EQ, 2);
 			l->index += 2;
 		} else if (l->source[l->index+1] == '-') {
 			add_token(l, TOKEN_MINUS_MINUS, 2);
 			l->index += 2;
 		} else if (l->source[l->index+1] == '>') {
 			add_token(l, TOKEN_ARROW, 2);
 			l->index += 2;
 		} else {
 			add_token(l, TOKEN_MINUS, 1);
 			l->index += 1;
 		}
 		return true;
 	case '/':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_SLASH_EQ, 2);
 			l->index += 2;
 		}  else {
 			add_token(l, TOKEN_SLASH, 1);
 			l->index += 1;
 		}
 		return true;
 	case '*':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_STAR_EQ, 2);
 			l->index += 2;
 		}  else {
 			add_token(l, TOKEN_STAR, 1);
 			l->index += 1;
 		}
 		return true;
 	case '%':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_PERC_EQ, 2);
 			l->index += 2;
 		}  else {
 			add_token(l, TOKEN_PERC, 1);
 			l->index += 1;
 		}
 		return true;
 	case '&':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_AND_EQ, 2);
 			l->index += 2;
 		} else if (l->source[l->index+1] == '&') {
 			add_token(l, TOKEN_DOUBLE_AND, 2);
 			l->index += 2;
 		} else {
 			add_token(l, TOKEN_AND, 1);
 			l->index += 1;
 		}
 		return true;
 	case '^':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_HAT_EQ, 2);
 			l->index += 2;
 		}  else {
 			add_token(l, TOKEN_HAT, 1);
 			l->index += 1;
 		}
 		return true;
 	case '|':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_PIPE_EQ, 2);
 			l->index += 2;
 		} else if (l->source[l->index+1] == '|') {
 			add_token(l, TOKEN_OR, 2);
 			l->index += 2;
 		} else {
 			add_token(l, TOKEN_PIPE, 1);
 			l->index += 1;
 		}
 		return true;
 	case '=':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_DOUBLE_EQ, 2);
 			l->index += 2;
 		}  else {
 			add_token(l, TOKEN_EQ, 1);
 			l->index += 1;
 		}
 		return true;
 	case '>':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_GREATER_EQ, 2);
 			l->index += 2;
 		} else if (l->source[l->index+1] == '>') {
 			if (l->source[l->index+2] == '=') {
 				add_token(l, TOKEN_RSHIFT_EQ, 3);
 				l->index += 3;
 				return true;
 			}
 			add_token(l, TOKEN_RSHIFT, 2);
 			l->index += 2;
 		} else {
 			add_token(l, TOKEN_GREATER_THAN, 1);
 			l->index += 1;
 		}
 		return true;
 	case '<':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_LESS_EQ, 2);
 			l->index += 2;
 		} else if (l->source[l->index+1] == '<') {
 			if (l->source[l->index+2] == '=') {
 				add_token(l, TOKEN_LSHIFT_EQ, 3);
 				l->index += 3;
 				return true;
 			}
 			add_token(l, TOKEN_LSHIFT, 2);
 			l->index += 2;
 		} else {
 			add_token(l, TOKEN_LESS_THAN, 1);
 			l->index += 1;
 		}
 		return true;
 	case '!':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_NOT_EQ, 2);
 			l->index += 2;
 		}  else {
 			add_token(l, TOKEN_BANG, 1);
 			l->index += 1;
 		}
 		return true;
 	case ':':
 		add_token(l, TOKEN_COLON, 1);
 		l->index += 1;
 		return true;
 	case ';':
 		add_token(l, TOKEN_SEMICOLON, 1);
 		l->index += 1;
 		return true;
 	case '.':
 		add_token(l, TOKEN_DOT, 1);
 		l->index += 1;
 		return true;
 	case ',':
 		add_token(l, TOKEN_COMMA, 1);
 		l->index += 1;
 		return true;
 	case '(':
 		add_token(l, TOKEN_LPAREN, 1);
 		l->index += 1;
 		return true;
 	case ')':
 		add_token(l, TOKEN_RPAREN, 1);
 		l->index += 1;
 		return true;
 	case '[':
 		add_token(l, TOKEN_LSQUARE, 1);
 		l->index += 1;
 		return true;
 	case ']':
 		add_token(l, TOKEN_RSQUARE, 1);
 		l->index += 1;
 		return true;
 	case '{':
 		add_token(l, TOKEN_LCURLY, 1);
 		l->index += 1;
 		return true;
 	case '}':
 		add_token(l, TOKEN_RCURLY, 1);
 		l->index += 1;
 		return true;
 	case '\'':
 		if (l->source[l->index+1] == '\\') {
 			if (l->source[l->index+3] != '\'') {
 				add_error(l, "unclosed character literal.");
 				return true;
 			}
 			l->index += 1;
 			add_token(l, TOKEN_CHAR, 2);
 			l->index += 3;
 			return true;
 		} else {
 			if (l->source[l->index+2] != '\'') {
 				add_error(l, "unclosed character literal.");
 				return false;
 			}
 			l->index += 1;
 			add_token(l, TOKEN_CHAR, 1);
 			l->index += 2;
 			return true;
 		}
 	default:
 		return false;
 	}
 }
 static void parse(lexer *l)
 {
 	char c;
 	while (l->index <= l->size) {
 		c = l->source[l->index];
 		l->column += 1;
 		if (c == '\n') {
 			l->index += 1;
 			l->row += 1;
 			l->column = 0;
 			continue;
 		}
 		if (isspace(c)) {
 			l->index += 1;
 			continue;
 		}
 		usize head = l->index;
 		if (parse_special(l)) {
 			l->column += (l->index - head - 1);
 			continue;
 		}
 		if (isdigit(c)) {
 			parse_number(l);
 			l->column += (l->index - head - 1);
 			continue;
 		}
 		if (isalpha(c)) {
 			parse_identifier(l);
 			l->column += (l->index - head - 1);
 			continue;
 		}
 		if (c == '"') {
 			l->index += 1;
 			parse_string(l);
 			l->column += (l->index - head - 1);
 			continue;
 		}
 		l->index += 1;
 	}
 }
 lexer *lexer_init(char *source, usize size, arena *arena)
 {
 	lexer *lex = arena_alloc(arena, sizeof(lexer));
 	lex->column = 0;
 	lex->row = 0;
 	lex->index = 0;
 	lex->size = size;
 	lex->tokens = 0;
 	lex->tail = 0;
 	lex->allocator = arena;
 	lex->source = source;
 	keywords = arena_alloc(arena, sizeof(trie_node));
 	trie_insert(keywords, lex->allocator, "while", TOKEN_WHILE);
 	trie_insert(keywords, lex->allocator, "for", TOKEN_FOR);
 	trie_insert(keywords, lex->allocator, "goto", TOKEN_GOTO);
 	trie_insert(keywords, lex->allocator, "if", TOKEN_IF);
 	trie_insert(keywords, lex->allocator, "else", TOKEN_ELSE);
 	trie_insert(keywords, lex->allocator, "switch", TOKEN_SWITCH);
 	trie_insert(keywords, lex->allocator, "case", TOKEN_CASE);
 	trie_insert(keywords, lex->allocator, "do", TOKEN_DO);
 	trie_insert(keywords, lex->allocator, "defer", TOKEN_DEFER);
 	trie_insert(keywords, lex->allocator, "module", TOKEN_MODULE);
 	trie_insert(keywords, lex->allocator, "static", TOKEN_STATIC);
 	trie_insert(keywords, lex->allocator, "const", TOKEN_CONST);
 	trie_insert(keywords, lex->allocator, "extern", TOKEN_EXTERN);
 	trie_insert(keywords, lex->allocator, "volatile", TOKEN_VOLATILE);
 	parse(lex);
 	return lex;
 }
--- a/utils.c
+++ b/utils.c
@ -3,6 +3,37 @@
 #include <string.h>
 #include <stdio.h>
 u64 parse_int(char *s, usize len)
 {
 	u64 int_part = 0;
 	for (usize i=0; i < len; i++) {
 		int_part = (int_part * 10) + (s[i] - '0');
 	}
 	return int_part;
 }
 f64 parse_float(char *s, usize len)
 {
 	f64 decimal_part = (f64)parse_int(s, len);
 	usize point_pos = 0;
 	for (usize i=0; i < len; i++) {
 		if (s[i] == '.') {
 			point_pos = i;
 			break;
 		}
 	}
 	point_pos += 1;
 	for (usize i=0; i < len - point_pos; i++) {
 		decimal_part /= 10.0;
 	}
 	return decimal_part;
 }
 void trie_insert(trie_node *root, arena *a, char *key, uint16_t value)
 {
 	trie_node *node = root;
--- a/utils.h
+++ b/utils.h
@ -20,6 +20,9 @@ typedef size_t usize;
 typedef float f32;
 typedef double f64;
 u64 parse_int(char *s, usize len);
 f64 parse_float(char *s, usize len);
 typedef struct {
 	usize capacity;
 	usize position;