implemented basic parsing

2025-11-30 21:56:39 +01:00 · 2025-11-30 21:56:39 +01:00 · abf1d7c066
commit abf1d7c066
parent 038930d881
10 changed files with 588 additions and 486 deletions
--- a/4
+++ b/4
@ -3,8 +3,8 @@

 include config.mk

-SRC = cc.c utils.c lexer.c
-HDR = config.def.h utils.h
+SRC = cc.c utils.c lexer.c parser.c
+HDR = config.def.h utils.h lexer.h parser.h sema.h
 OBJ = ${SRC:.c=.o}

 all: options cc
--- a/cc.c
+++ b/cc.c
@ -2,6 +2,121 @@
 #include <stdlib.h>
 #include "utils.h"
 #include "lexer.h"
+#include "parser.h"
+
+// Helper to print indentation
+void print_indent(int depth) {
+    for (int i = 0; i < depth; i++) printf("  ");
+}
+
+// Helper to convert Binary Op enum to string
+const char* get_op_str(binary_op op) {
+	switch(op) {
+		case OP_PLUS: return "+";
+		case OP_MINUS: return "-";
+		case OP_DIV: return "/";
+		case OP_MUL: return "*";
+		case OP_EQ: return "==";
+		case OP_ASSIGN: return "=";
+		case OP_AND: return "&&";
+		case OP_OR: return "||";
+		case OP_NEQ: return "!=";
+		case OP_GT: return ">";
+		case OP_LT: return "<";
+		case OP_GE: return ">=";
+		case OP_LE: return "<=";
+		case OP_BOR: return "|";
+		case OP_BAND: return "&";
+		case OP_BXOR: return "^";
+		case OP_MOD: return "%";
+		case OP_PLUS_EQ: return "+=";
+		case OP_MINUS_EQ: return "-=";
+		case OP_DIV_EQ: return "/=";
+		case OP_MUL_EQ: return "*=";
+		default: return "?";
+	}
+}
+
+const char *get_uop_str(unary_op op) {
+	switch (op) {
+		case UOP_INCR: return "++";
+		case UOP_MINUS: return "-";
+		case UOP_DECR: return "--";
+		case UOP_DEREF: return "*";
+		case UOP_REF: return "&";
+		case UOP_NOT: return "!";
+		default: return "?";
+	}
+}
+
+void print_ast(ast_node *node, int depth) {
+	if (!node) return;
+
+	print_indent(depth);
+
+	switch (node->type) {
+		case NODE_INTEGER:
+			printf("Integer: %lu\n", node->expr.integer);
+			break;
+		case NODE_FLOAT:
+			printf("Float: %f\n", node->expr.flt);
+			break;
+		case NODE_CHAR:
+			printf("Char: '%c'\n", node->expr.ch);
+			break;
+		case NODE_STRING:
+			printf("String: \"%.*s\"\n", (int)node->expr.string.len, node->expr.string.start);
+			break;
+		case NODE_IDENTIFIER:
+			printf("Identifier: %.*s\n", (int)node->expr.string.len, node->expr.string.start);
+			break;
+		case NODE_BINARY:
+			printf("BinaryOp (%s)\n", get_op_str(node->expr.binary.operator));
+			print_ast(node->expr.binary.left, depth + 1);
+			print_ast(node->expr.binary.right, depth + 1);
+			break;
+		case NODE_UNARY:
+			printf("UnaryOp (%s)\n", get_uop_str(node->expr.unary.operator));
+			print_ast(node->expr.unary.right, depth + 1);
+			break;
+		case NODE_TERNARY:
+			printf("Ternary (? :)\n");
+			print_indent(depth + 1); printf("Condition:\n");
+			print_ast(node->expr.ternary.condition, depth + 2);
+			print_indent(depth + 1); printf("Then:\n");
+			print_ast(node->expr.ternary.then, depth + 2);
+			print_indent(depth + 1); printf("Else:\n");
+			print_ast(node->expr.ternary.otherwise, depth + 2);
+			break;
+		case NODE_UNIT:
+		case NODE_COMPOUND: 
+			printf("Unit/Block:\n");
+			ast_node *current = node;
+			while (current && (current->type == NODE_UNIT || current->type == NODE_COMPOUND)) {
+				print_ast(current->expr.unit_node.expr, depth + 1);
+				current = current->expr.unit_node.next;
+			}
+			break;
+		case NODE_IF:
+			printf("IfStmt (Fields missing in struct)\n");
+			break;
+		case NODE_WHILE:
+			printf("WhileStmt (Fields missing in struct)\n");
+			break;
+		case NODE_VAR_DECL:
+			printf("VarDecl (Fields missing in struct)\n");
+			break;
+		case NODE_FUNCTION_DEF:
+			printf("FunctionDef (Fields missing in struct)\n");
+			break;
+		case NODE_RETURN:
+			printf("Return (Fields missing in struct)\n");
+			break;
+		default:
+			printf("Unknown Node Type: %d\n", node->type);
+			break;
+	}
+}

 int main(void)
 {
@ -17,6 +132,8 @@ int main(void)

 	arena a = arena_init(0x1000 * 0x1000 * 64);
 	lexer *l = lexer_init(src, size, &a);
+	parser *p = parser_init(l, &a);
+	print_ast(p->ast, 0);

 	arena_deinit(a);

--- a/lexer.c
+++ b/lexer.c
@ -381,8 +381,8 @@ static void parse(lexer *l)
 lexer *lexer_init(char *source, usize size, arena *arena)
 {
 	lexer *lex = arena_alloc(arena, sizeof(lexer));
-	lex->column = 0;
-	lex->row = 0;
+	lex->column = 1;
+	lex->row = 1;
 	lex->index = 0;
 	lex->size = size;
 	lex->tokens = 0;
@ -403,6 +403,7 @@ lexer *lexer_init(char *source, usize size, arena *arena)
 	trie_insert(keywords, lex->allocator, "case", TOKEN_CASE);
 	trie_insert(keywords, lex->allocator, "do", TOKEN_DO);
 	trie_insert(keywords, lex->allocator, "defer", TOKEN_DEFER);
+	trie_insert(keywords, lex->allocator, "return", TOKEN_RETURN);
 	trie_insert(keywords, lex->allocator, "module", TOKEN_MODULE);
 	trie_insert(keywords, lex->allocator, "static", TOKEN_STATIC);
 	trie_insert(keywords, lex->allocator, "const", TOKEN_CONST);
--- a/lexer.h
+++ b/lexer.h
@ -67,6 +67,7 @@ typedef enum {
 	TOKEN_DO,
 	TOKEN_DEFER,
 	TOKEN_MODULE,
+	TOKEN_RETURN,

 	TOKEN_STATIC,
 	TOKEN_CONST,
--- a/parser.c
+++ b/parser.c
@ -0,0 +1,231 @@
+#include "parser.h"
+#include <stdbool.h>
+#include <stdio.h>
+
+ast_node *parse_expression(parser *p);
+
+static void advance(parser *p)
+{
+	p->previous = p->tokens;
+	if (p->tokens)
+		p->tokens = p->tokens->next;
+}
+
+static token *peek(parser *p)
+{
+	return p->tokens;
+}
+
+static bool match_peek(parser *p, token_type type)
+{
+	if (p->tokens) {
+		return p->tokens->type == type;
+	} else {
+		return false;
+	}
+}
+
+static bool match(parser *p, token_type type)
+{
+	if (p->tokens) {
+		if (p->tokens->type == type) {
+			advance(p);
+			return true;
+		}
+	}
+	return false;
+}
+
+static void parser_sync(parser *p)
+{
+	advance(p);
+
+	while (p->tokens) {
+		if (p->previous->type == TOKEN_SEMICOLON || p->previous->type == TOKEN_RCURLY) {
+			return;
+		}
+
+		switch (p->tokens->type) {
+			case TOKEN_STRUCT:
+			case TOKEN_ENUM:
+			case TOKEN_IF:
+			case TOKEN_WHILE:
+			case TOKEN_FOR:
+			case TOKEN_DO:
+			case TOKEN_RETURN:
+			case TOKEN_SWITCH:
+				return;
+			default:
+				advance(p);
+		}
+	}
+}
+
+static void error(parser *p, char *msg)
+{
+	printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:%ld:%ld:\x1b[0m %s\n", p->previous->position.row, p->previous->position.column, msg);
+	parser_sync(p);
+}
+
+static ast_node *parse_factor(parser *p)
+{
+	token *t = peek(p);
+	if (match(p, TOKEN_INTEGER)) {
+		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
+		node->type = NODE_INTEGER;
+		node->expr.integer = parse_int(t->lexeme, t->lexeme_len);
+		return node;
+	} else if (match(p, TOKEN_FLOAT)) {
+		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
+		node->type = NODE_FLOAT;
+		node->expr.flt = parse_float(t->lexeme, t->lexeme_len);
+		return node;
+	} else if (match(p, TOKEN_IDENTIFIER)) {
+		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
+		node->type = NODE_IDENTIFIER;
+		node->expr.string.start = t->lexeme;
+		node->expr.string.len = t->lexeme_len;
+		return node;
+	} else if (match(p, TOKEN_STRING)) {
+		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
+		node->type = NODE_STRING;
+		node->expr.string.start = t->lexeme;
+		node->expr.string.len = t->lexeme_len;
+		return node;
+	} else if (match(p, TOKEN_CHAR)) {
+		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
+		node->type = NODE_CHAR;
+		if (t->lexeme_len == 2) {
+			char c;
+			switch (t->lexeme[1]) {
+				case 'n': c = '\n'; break;
+				case 't': c = '\t'; break;
+				case 'r': c = '\r'; break;
+				case '0': c = '\0'; break;
+				case '\\': c = '\\'; break;
+				case '\'': c = '\''; break;
+				default:
+					error(p, "invalid escape code.");
+					return NULL;
+			}
+			node->expr.ch = c;
+		} else {
+			node->expr.ch = *(t->lexeme);
+		}
+		return node;
+	} else if (match(p, TOKEN_LPAREN)) {
+		ast_node *node = parse_expression(p);
+		if (!match(p, TOKEN_RPAREN)) {
+			error(p, "unclosed parenthesis");
+			return NULL;
+		}
+
+		return node;
+	}
+
+	return NULL;
+}
+
+ast_node *parse_unary(parser *p)
+{
+	if (match(p, TOKEN_PLUS_PLUS) || match(p, TOKEN_MINUS) || match(p, TOKEN_MINUS_MINUS) || match(p, TOKEN_STAR) || match(p, TOKEN_AND) || match(p, TOKEN_BANG)) {
+		unary_op op;
+		switch (p->previous->type) {
+			case TOKEN_PLUS_PLUS:
+				op = UOP_INCR;
+				break;
+			case TOKEN_MINUS:
+				op = UOP_MINUS;
+				break;
+			case TOKEN_MINUS_MINUS:
+				op = UOP_DECR;
+				break;
+			case TOKEN_STAR:
+				op = UOP_DEREF;
+				break;
+			case TOKEN_AND:
+				op = UOP_REF;
+				break;
+			case TOKEN_BANG:
+				op = UOP_NOT;
+				break;
+			default:
+				goto end;
+		}
+
+		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
+		node->type = NODE_UNARY;
+		node->expr.unary.operator = op;
+		node->expr.unary.right = parse_expression(p);
+
+		return node;
+	}
+
+end:
+	return parse_factor(p);
+}
+
+ast_node *parse_term(parser *p)
+{
+	ast_node *left = parse_unary(p);
+
+	while (match_peek(p, TOKEN_STAR) || match_peek(p, TOKEN_SLASH)) {
+		binary_op op = peek(p)->type == TOKEN_STAR ? OP_MUL : OP_DIV;
+		advance(p);
+		ast_node *right = parse_factor(p);
+		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
+		node->type = NODE_BINARY;
+		node->expr.binary.left = left;
+		node->expr.binary.right = right;
+		node->expr.binary.operator = op;
+		left = node;
+	}
+
+	return left;
+}
+
+ast_node *parse_expression(parser *p)
+{
+	ast_node *left = parse_term(p);
+
+	while (match_peek(p, TOKEN_PLUS) || match_peek(p, TOKEN_MINUS)) {
+		binary_op op = peek(p)->type == TOKEN_PLUS ? OP_PLUS : OP_MINUS;
+		advance(p);
+		ast_node *right = parse_term(p);
+		ast_node *node = arena_alloc(p->allocator, sizeof(ast_node));
+		node->type = NODE_BINARY;
+		node->expr.binary.left = left;
+		node->expr.binary.right = right;
+		node->expr.binary.operator = op;
+		left = node;
+	}
+
+	return left;
+}
+
+static void parse(parser *p)
+{
+	p->ast = arena_alloc(p->allocator, sizeof(ast_node));
+	p->ast->type = NODE_UNIT;
+	p->ast->expr.unit_node.expr = parse_expression(p);
+	ast_node *tail = p->ast;
+	ast_node *expr = parse_expression(p);
+	while (expr) {
+		tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node));
+		tail->expr.unit_node.next->expr.unit_node.expr = expr;
+		tail = tail->expr.unit_node.next;
+		tail->type = NODE_UNIT;
+		expr = parse_expression(p);
+	}
+}
+
+parser *parser_init(lexer *l, arena *allocator)
+{
+	parser *p = arena_alloc(allocator, sizeof(parser));
+	p->tokens = l->tokens;
+	p->allocator= allocator;
+
+	parse(p);
+
+	return p;
+}
--- a/parser.h
+++ b/parser.h
@ -0,0 +1,145 @@
+#ifndef PARSER_H
+#define PARSER_H
+
+#include "lexer.h"
+#include "utils.h"
+
+typedef enum {
+	OP_PLUS, // +
+	OP_MINUS, // -
+	OP_DIV, // /
+	OP_MUL, // *
+	OP_EQ, // ==
+	OP_ASSIGN, // =
+	OP_AND, // &&
+	OP_OR, // ||
+	OP_NEQ, // !=
+	OP_GT, // >
+	OP_LT, // <
+	OP_GE, // >=
+	OP_LE, // <=
+	OP_BOR, // |
+	OP_BAND, // &
+	OP_BXOR, // ^
+	OP_MOD, // %
+	OP_PLUS_EQ, // +=
+	OP_MINUS_EQ, // -=
+	OP_DIV_EQ, // /=
+	OP_MUL_EQ, // *=
+	OP_BOR_EQ, // |=
+	OP_BAND_EQ, // &=
+	OP_BXOR_EQ, // ^=
+	OP_MOD_EQ, // %=
+} binary_op;
+
+typedef enum {
+	UOP_INCR, // ++
+	UOP_MINUS, // -
+	UOP_DECR, // --
+	UOP_DEREF, // *
+	UOP_REF, // &
+	UOP_NOT, // !
+} unary_op;
+
+typedef enum {
+	LAYOUT_AUTO,
+	LAYOUT_PACKED,
+	LAYOUT_EXTERN
+} struct_layout;
+
+typedef struct {
+	char *type_name;
+	usize type_len;
+	char *name;
+	usize name_len;
+	usize offset;
+} member;
+
+typedef struct {
+	char *name;
+	usize name_len;
+	member *params;
+} function_decl;
+
+typedef enum {
+	NODE_IDENTIFIER,
+	NODE_INTEGER,
+	NODE_FLOAT,
+	NODE_STRING,
+	NODE_CHAR,
+	NODE_TERNARY,
+	NODE_CAST,
+	NODE_ARRAY_SUBSCRIPT,
+	NODE_ACCESS,
+	NODE_ACCESS_PTR,
+	NODE_CALL,
+	NODE_POSTFIX,
+	NODE_UNARY,
+	NODE_BINARY,
+	NODE_GOTO,
+	NODE_BREAK,
+	NODE_CASE,
+	NODE_SWITCH,
+	NODE_FOR,
+	NODE_DO,
+	NODE_WHILE,
+	NODE_IF,
+	NODE_RETURN,
+	NODE_COMPOUND,
+	NODE_TYPEDEF,
+	NODE_ENUM,
+	NODE_STRUCT,
+	NODE_UNION,
+	NODE_VAR_DECL,
+	NODE_FUNCTION_DEF,
+	NODE_FUNCTION_DECL,
+	NODE_UNIT,
+	NODE_AS,
+} node_type;
+
+typedef struct _ast_node {
+	node_type type;
+	union {
+		struct {
+			struct _ast_node *left;
+			struct _ast_node *right;
+			binary_op operator;
+		} binary;
+		struct {
+			struct _ast_node *right;
+			unary_op operator;
+		} unary;
+		u64 integer;
+		f64 flt; // float
+		struct {
+			char *start;
+			usize len;
+		} string;
+		char ch; // char;
+		struct {
+			struct _ast_node *condition;
+			struct _ast_node *then;
+			struct _ast_node *otherwise;
+		} ternary;
+		struct {
+			struct _ast_node *value;
+			char *type;
+			usize type_len;
+		} cast;
+		struct {
+			struct _ast_node *expr;
+			struct _ast_node *next;
+		} unit_node;
+	} expr;
+} ast_node;
+
+typedef struct {
+	token *tokens;
+	token *previous;
+	ast_node *ast;
+	arena *allocator;
+} parser;
+
+parser *parser_init(lexer *l, arena *allocator);
+
+#endif
--- a/sema.h
+++ b/sema.h
@ -0,0 +1,54 @@
+#ifndef SEMA_H
+#define SEMA_H
+
+typedef enum {
+	TYPE_VOID,
+	TYPE_PTR,
+	TYPE_I8,
+	TYPE_I16,
+	TYPE_I32,
+	TYPE_I64,
+	TYPE_U8,
+	TYPE_U16,
+	TYPE_U32,
+	TYPE_U64,
+	TYPE_STRUCT,
+	TYPE_UNION,
+	TYPE_ENUM,
+} type_tag;
+
+typedef struct _type {
+	type_tag tag;
+	union {
+		u8 integer;
+		u8 flt; // float
+		struct {
+			bool is_const;
+			bool is_volatile;
+			u16 alignment;
+			struct _type child;
+		} ptr;
+		struct {
+			usize len;
+			struct _type child;
+		} array;
+		struct {
+			struct_layout layout;
+			char *name;
+			usize name_len;
+			usize alignment;
+			member *members;
+			function_decl *decls;
+		} structure;
+		struct {
+			struct_layout layout;
+			char *name;
+			usize name_len;
+			usize alignment;
+			member *members;
+			function_decl *decls;
+		} enum;
+	} data;
+} type;
+
+#endif
--- a/test.c
+++ b/test.c
@ -1,482 +1 @@
-#include "lexer.h"
-#include <stdbool.h>
-#include <ctype.h>
-#include <stdio.h>
-#include <string.h>
-
-static const char *token_type_str[] = {
-	[TOKEN_ERROR]        = "TOKEN_ERROR",
-	[TOKEN_END]          = "TOKEN_END",
-
-	[TOKEN_PLUS]         = "TOKEN_PLUS",
-	[TOKEN_PLUS_PLUS]    = "TOKEN_PLUS_PLUS",
-	[TOKEN_MINUS]        = "TOKEN_MINUS",
-	[TOKEN_MINUS_MINUS]  = "TOKEN_MINUS_MINUS",
-	[TOKEN_SLASH]        = "TOKEN_SLASH",
-	[TOKEN_PERC]         = "TOKEN_PERC",
-	[TOKEN_STAR]         = "TOKEN_STAR",
-	[TOKEN_AND]          = "TOKEN_AND",
-	[TOKEN_HAT]          = "TOKEN_HAT",
-	[TOKEN_PIPE]         = "TOKEN_PIPE",
-	[TOKEN_EQ]           = "TOKEN_EQ",
-	[TOKEN_ARROW]	     = "TOKEN_ARROW",
-	[TOKEN_LSHIFT]       = "TOKEN_LSHIFT",
-	[TOKEN_RSHIFT]       = "TOKEN_RSHIFT",
-	[TOKEN_DOUBLE_EQ]    = "TOKEN_DOUBLE_EQ",
-	[TOKEN_LESS_THAN]    = "TOKEN_LESS_THAN",
-	[TOKEN_GREATER_THAN] = "TOKEN_GREATER_THAN",
-	[TOKEN_LESS_EQ]      = "TOKEN_LESS_EQ",
-	[TOKEN_GREATER_EQ]   = "TOKEN_GREATER_EQ",
-	[TOKEN_NOT_EQ]       = "TOKEN_NOT_EQ",
-	[TOKEN_PLUS_EQ]      = "TOKEN_PLUS_EQ",
-	[TOKEN_MINUS_EQ]     = "TOKEN_MINUS_EQ",
-	[TOKEN_STAR_EQ]      = "TOKEN_STAR_EQ",
-	[TOKEN_SLASH_EQ]     = "TOKEN_SLASH_EQ",
-	[TOKEN_AND_EQ]       = "TOKEN_AND_EQ",
-	[TOKEN_HAT_EQ]       = "TOKEN_HAT_EQ",
-	[TOKEN_PIPE_EQ]      = "TOKEN_PIPE_EQ",
-	[TOKEN_PERC_EQ]      = "TOKEN_PERC_EQ",
-	[TOKEN_LSHIFT_EQ]    = "TOKEN_LSHIFT_EQ",
-	[TOKEN_RSHIFT_EQ]    = "TOKEN_RSHIFT_EQ",
-	[TOKEN_OR]           = "TOKEN_OR",
-	[TOKEN_DOUBLE_AND]   = "TOKEN_DOUBLE_AND",
-	[TOKEN_COLON]        = "TOKEN_COLON",
-	[TOKEN_SEMICOLON]    = "TOKEN_SEMICOLON",
-	[TOKEN_DOT]          = "TOKEN_DOT",
-	[TOKEN_BANG]         = "TOKEN_BANG",
-	[TOKEN_COMMA]        = "TOKEN_COMMA",
-	[TOKEN_LPAREN]       = "TOKEN_LPAREN",
-	[TOKEN_RPAREN]       = "TOKEN_RPAREN",
-	[TOKEN_LSQUARE]      = "TOKEN_LSQUARE",
-	[TOKEN_RSQUARE]      = "TOKEN_RSQUARE",
-	[TOKEN_LCURLY]       = "TOKEN_LCURLY",
-	[TOKEN_RCURLY]       = "TOKEN_RCURLY",
-
-	[TOKEN_INTEGER]      = "TOKEN_INTEGER",
-	[TOKEN_FLOAT]        = "TOKEN_FLOAT",
-	[TOKEN_IDENTIFIER]   = "TOKEN_IDENTIFIER",
-	[TOKEN_STRING]       = "TOKEN_STRING",
-	[TOKEN_CHAR]         = "TOKEN_CHAR",
-
-	[TOKEN_WHILE]        = "TOKEN_WHILE",
-	[TOKEN_FOR]          = "TOKEN_FOR",
-	[TOKEN_GOTO]         = "TOKEN_GOTO",
-	[TOKEN_IF]           = "TOKEN_IF",
-	[TOKEN_ELSE]         = "TOKEN_ELSE",
-	[TOKEN_SWITCH]       = "TOKEN_SWITCH",
-	[TOKEN_CASE]         = "TOKEN_CASE",
-	[TOKEN_DO]           = "TOKEN_DO",
-	[TOKEN_DEFER]        = "TOKEN_DEFER",
-	[TOKEN_MODULE]       = "TOKEN_MODULE",
-
-	[TOKEN_STATIC]       = "TOKEN_STATIC",
-	[TOKEN_CONST]        = "TOKEN_CONST",
-	[TOKEN_EXTERN]       = "TOKEN_EXTERN",
-	[TOKEN_VOLATILE]     = "TOKEN_VOLATILE",
-};
-
-trie_node *keywords;
-
-void lexer_print_token(token *t)
-{
-	printf("%s: ", token_type_str[t->type]);
-	for (usize i=0; i < t->lexeme_len; i++) {
-		printf("%c", t->lexeme[i]);
-	}
-}
-
-static void add_token(lexer *l, token_type type, usize len)
-{
-	token *t = arena_alloc(l->allocator, sizeof(token));
-	t->type = type;
-	t->lexeme_len = len;
-	t->lexeme = l->source + l->index;
-	t->position.row = l->row;
-	t->position.column = l->column;
-
-	if (!l->tokens) {
-		l->tokens = t;
-		l->tail = t;
-	} else {
-		l->tail->next = t;
-		l->tail = t;
-	}
-}
-
-static void add_error(lexer *l, char *msg)
-{
-	token *t = arena_alloc(l->allocator, sizeof(token));
-	t->type = TOKEN_ERROR;
-	t->lexeme_len = strlen(msg);
-	t->lexeme = msg;
-	t->position.row = l->row;
-	t->position.column = l->column;
-
-	if (!l->tokens) {
-		l->tokens = t;
-		l->tail = t;
-	} else {
-		l->tail->next = t;
-		l->tail = t;
-	}
-}
-
-static void parse_number(lexer *l)
-{
-	char c = l->source[l->index];
-	/* Is the number a float? */
-	bool f = false;
-	usize len = 0;
-
-	while (isdigit(c)) {
-		/* If a dot is found, and the character after it is a digit, this is a float. */
-		if (l->source[l->index+1] == '.' && isdigit(l->source[l->index+2])) {
-			f = true;
-			len += 3;
-			l->index += 3;
-		} else {
-			len += 1;
-			l->index += 1;
-		}
-		c = l->source[l->index];
-	}
-	l->index -= len;
-	if (f) {
-		add_token(l, TOKEN_FLOAT, len);
-	} else {
-		add_token(l, TOKEN_INTEGER, len);
-	}
-	l->index += len;
-}
-
-static void parse_identifier(lexer *l)
-{
-	char c = l->source[l->index];
-	usize len = 0;
-
-	while (isalnum(c) || c == '_') {
-		len += 1;
-		l->index += 1;
-		c = l->source[l->index];
-	}
-	l->index -= len;
-	token_type keyword = trie_get(keywords, l->source + l->index, len);
-	if (keyword) {
-		add_token(l, keyword, len);
-	} else {
-		add_token(l, TOKEN_IDENTIFIER, len);
-	}
-	l->index += len;
-}
-
-static void parse_string(lexer *l)
-{
-	char c = l->source[l->index];
-	usize len = 0;
-
-	while (c != '"') {
-		if (c == '\0' || c == '\n') {
-			printf("%c", c);
-			l->index -= len;
-			add_error(l, "unclosed string literal.");
-			l->index += len;
-			return;
-		}
-		len += 1;
-		l->index += 1;
-		c = l->source[l->index];
-	}
-	l->index -= len;
-	add_token(l, TOKEN_STRING, len);
-	l->index += len + 1;
-}
-
-static bool parse_special(lexer *l)
-{
-	switch (l->source[l->index]) {
-	case '+':
-		if (l->source[l->index+1] == '=') {
-			add_token(l, TOKEN_PLUS_EQ, 2);
-			l->index += 2;
-		} else if (l->source[l->index+1] == '+') {
-			add_token(l, TOKEN_PLUS_PLUS, 2);
-			l->index += 2;
-		} else {
-			add_token(l, TOKEN_PLUS, 1);
-			l->index += 1;
-		}
-		return true;
-	case '-':
-		if (l->source[l->index+1] == '=') {
-			add_token(l, TOKEN_MINUS_EQ, 2);
-			l->index += 2;
-		} else if (l->source[l->index+1] == '-') {
-			add_token(l, TOKEN_MINUS_MINUS, 2);
-			l->index += 2;
-		} else if (l->source[l->index+1] == '>') {
-			add_token(l, TOKEN_ARROW, 2);
-			l->index += 2;
-		} else {
-			add_token(l, TOKEN_MINUS, 1);
-			l->index += 1;
-		}
-		return true;
-	case '/':
-		if (l->source[l->index+1] == '=') {
-			add_token(l, TOKEN_SLASH_EQ, 2);
-			l->index += 2;
-		}  else {
-			add_token(l, TOKEN_SLASH, 1);
-			l->index += 1;
-		}
-		return true;
-	case '*':
-		if (l->source[l->index+1] == '=') {
-			add_token(l, TOKEN_STAR_EQ, 2);
-			l->index += 2;
-		}  else {
-			add_token(l, TOKEN_STAR, 1);
-			l->index += 1;
-		}
-		return true;
-	case '%':
-		if (l->source[l->index+1] == '=') {
-			add_token(l, TOKEN_PERC_EQ, 2);
-			l->index += 2;
-		}  else {
-			add_token(l, TOKEN_PERC, 1);
-			l->index += 1;
-		}
-		return true;
-	case '&':
-		if (l->source[l->index+1] == '=') {
-			add_token(l, TOKEN_AND_EQ, 2);
-			l->index += 2;
-		} else if (l->source[l->index+1] == '&') {
-			add_token(l, TOKEN_DOUBLE_AND, 2);
-			l->index += 2;
-		} else {
-			add_token(l, TOKEN_AND, 1);
-			l->index += 1;
-		}
-		return true;
-	case '^':
-		if (l->source[l->index+1] == '=') {
-			add_token(l, TOKEN_HAT_EQ, 2);
-			l->index += 2;
-		}  else {
-			add_token(l, TOKEN_HAT, 1);
-			l->index += 1;
-		}
-		return true;
-	case '|':
-		if (l->source[l->index+1] == '=') {
-			add_token(l, TOKEN_PIPE_EQ, 2);
-			l->index += 2;
-		} else if (l->source[l->index+1] == '|') {
-			add_token(l, TOKEN_OR, 2);
-			l->index += 2;
-		} else {
-			add_token(l, TOKEN_PIPE, 1);
-			l->index += 1;
-		}
-		return true;
-	case '=':
-		if (l->source[l->index+1] == '=') {
-			add_token(l, TOKEN_DOUBLE_EQ, 2);
-			l->index += 2;
-		}  else {
-			add_token(l, TOKEN_EQ, 1);
-			l->index += 1;
-		}
-		return true;
-	case '>':
-		if (l->source[l->index+1] == '=') {
-			add_token(l, TOKEN_GREATER_EQ, 2);
-			l->index += 2;
-		} else if (l->source[l->index+1] == '>') {
-			if (l->source[l->index+2] == '=') {
-				add_token(l, TOKEN_RSHIFT_EQ, 3);
-				l->index += 3;
-				return true;
-			}
-			add_token(l, TOKEN_RSHIFT, 2);
-			l->index += 2;
-		} else {
-			add_token(l, TOKEN_GREATER_THAN, 1);
-			l->index += 1;
-		}
-		return true;
-	case '<':
-		if (l->source[l->index+1] == '=') {
-			add_token(l, TOKEN_LESS_EQ, 2);
-			l->index += 2;
-		} else if (l->source[l->index+1] == '<') {
-			if (l->source[l->index+2] == '=') {
-				add_token(l, TOKEN_LSHIFT_EQ, 3);
-				l->index += 3;
-				return true;
-			}
-			add_token(l, TOKEN_LSHIFT, 2);
-			l->index += 2;
-		} else {
-			add_token(l, TOKEN_LESS_THAN, 1);
-			l->index += 1;
-		}
-		return true;
-	case '!':
-		if (l->source[l->index+1] == '=') {
-			add_token(l, TOKEN_NOT_EQ, 2);
-			l->index += 2;
-		}  else {
-			add_token(l, TOKEN_BANG, 1);
-			l->index += 1;
-		}
-		return true;
-	case ':':
-		add_token(l, TOKEN_COLON, 1);
-		l->index += 1;
-		return true;
-	case ';':
-		add_token(l, TOKEN_SEMICOLON, 1);
-		l->index += 1;
-		return true;
-	case '.':
-		add_token(l, TOKEN_DOT, 1);
-		l->index += 1;
-		return true;
-	case ',':
-		add_token(l, TOKEN_COMMA, 1);
-		l->index += 1;
-		return true;
-	case '(':
-		add_token(l, TOKEN_LPAREN, 1);
-		l->index += 1;
-		return true;
-	case ')':
-		add_token(l, TOKEN_RPAREN, 1);
-		l->index += 1;
-		return true;
-	case '[':
-		add_token(l, TOKEN_LSQUARE, 1);
-		l->index += 1;
-		return true;
-	case ']':
-		add_token(l, TOKEN_RSQUARE, 1);
-		l->index += 1;
-		return true;
-	case '{':
-		add_token(l, TOKEN_LCURLY, 1);
-		l->index += 1;
-		return true;
-	case '}':
-		add_token(l, TOKEN_RCURLY, 1);
-		l->index += 1;
-		return true;
-	case '\'':
-		if (l->source[l->index+1] == '\\') {
-			if (l->source[l->index+3] != '\'') {
-				add_error(l, "unclosed character literal.");
-				return true;
-			}
-			l->index += 1;
-			add_token(l, TOKEN_CHAR, 2);
-			l->index += 3;
-			return true;
-		} else {
-			if (l->source[l->index+2] != '\'') {
-				add_error(l, "unclosed character literal.");
-				return false;
-			}
-			l->index += 1;
-			add_token(l, TOKEN_CHAR, 1);
-			l->index += 2;
-			return true;
-		}
-	default:
-		return false;
-	}
-}
-
-static void parse(lexer *l)
-{
-	char c;
-
-	while (l->index <= l->size) {
-		c = l->source[l->index];
-		l->column += 1;
-
-		if (c == '\n') {
-			l->index += 1;
-			l->row += 1;
-			l->column = 0;
-			continue;
-		}
-
-		if (isspace(c)) {
-			l->index += 1;
-			continue;
-		}
-
-		usize head = l->index;
-
-		if (parse_special(l)) {
-			l->column += (l->index - head - 1);
-			continue;
-		}
-
-		if (isdigit(c)) {
-			parse_number(l);
-			l->column += (l->index - head - 1);
-			continue;
-		}
-
-		if (isalpha(c)) {
-			parse_identifier(l);
-			l->column += (l->index - head - 1);
-			continue;
-		}
-
-		if (c == '"') {
-			l->index += 1;
-			parse_string(l);
-			l->column += (l->index - head - 1);
-			continue;
-		}
-
-		l->index += 1;
-	}
-}
-
-lexer *lexer_init(char *source, usize size, arena *arena)
-{
-	lexer *lex = arena_alloc(arena, sizeof(lexer));
-	lex->column = 0;
-	lex->row = 0;
-	lex->index = 0;
-	lex->size = size;
-	lex->tokens = 0;
-	lex->tail = 0;
-	lex->allocator = arena;
-	lex->source = source;
-
-	keywords = arena_alloc(arena, sizeof(trie_node));
-	trie_insert(keywords, lex->allocator, "while", TOKEN_WHILE);
-	trie_insert(keywords, lex->allocator, "for", TOKEN_FOR);
-	trie_insert(keywords, lex->allocator, "goto", TOKEN_GOTO);
-	trie_insert(keywords, lex->allocator, "if", TOKEN_IF);
-	trie_insert(keywords, lex->allocator, "else", TOKEN_ELSE);
-	trie_insert(keywords, lex->allocator, "switch", TOKEN_SWITCH);
-	trie_insert(keywords, lex->allocator, "case", TOKEN_CASE);
-	trie_insert(keywords, lex->allocator, "do", TOKEN_DO);
-	trie_insert(keywords, lex->allocator, "defer", TOKEN_DEFER);
-	trie_insert(keywords, lex->allocator, "module", TOKEN_MODULE);
-	trie_insert(keywords, lex->allocator, "static", TOKEN_STATIC);
-	trie_insert(keywords, lex->allocator, "const", TOKEN_CONST);
-	trie_insert(keywords, lex->allocator, "extern", TOKEN_EXTERN);
-	trie_insert(keywords, lex->allocator, "volatile", TOKEN_VOLATILE);
-
-	parse(lex);
-
-	return lex;
-}
+hello+3-(--ciao)
--- a/utils.c
+++ b/utils.c
@ -3,6 +3,37 @@
 #include <string.h>
 #include <stdio.h>

+u64 parse_int(char *s, usize len)
+{
+	u64 int_part = 0;
+	for (usize i=0; i < len; i++) {
+		int_part = (int_part * 10) + (s[i] - '0');
+	}
+
+	return int_part;
+}
+
+f64 parse_float(char *s, usize len)
+{
+	f64 decimal_part = (f64)parse_int(s, len);
+	usize point_pos = 0;
+
+	for (usize i=0; i < len; i++) {
+		if (s[i] == '.') {
+			point_pos = i;
+			break;
+		}
+	}
+	point_pos += 1;
+
+	for (usize i=0; i < len - point_pos; i++) {
+		decimal_part /= 10.0;
+	}
+
+	return decimal_part;
+}
+
+
 void trie_insert(trie_node *root, arena *a, char *key, uint16_t value)
 {
 	trie_node *node = root;
--- a/utils.h
+++ b/utils.h
@ -20,6 +20,9 @@ typedef size_t usize;
 typedef float f32;
 typedef double f64;

+u64 parse_int(char *s, usize len);
+f64 parse_float(char *s, usize len);
+
 typedef struct {
 	usize capacity;
 	usize position;