Compare commits

..

1 commit
master ... son

Author SHA1 Message Date
23126974b5 starting over 2025-12-14 16:37:10 +01:00
22 changed files with 0 additions and 6938 deletions

View file

@ -1,59 +0,0 @@
# cc - C compiler
# See LICENSE file for copyright and license details.
include config.mk
SRC = lc.c utils.c lexer.c parser.c sema.c codegen.c
HDR = config.def.h utils.h lexer.h parser.h sema.h codegen.h
OBJ = ${SRC:.c=.o}
all: options lc
options:
@echo lc build options:
@echo "CFLAGS = ${CFLAGS}"
@echo "LDFLAGS = ${LDFLAGS}"
@echo "CC = ${CC}"
.c.o:
${CC} -c ${CFLAGS} $<
${OBJ}: config.h config.mk
config.h:
cp config.def.h $@
users.h:
cp users.def.h $@
lc: ${OBJ}
${CC} -o $@ ${OBJ} ${LDFLAGS}
clean:
rm -f lc ${OBJ} lc-${VERSION}.tar.gz
dist: clean
mkdir -p lc-${VERSION}
cp -R LICENSE Makefile README config.mk\
lc.1 ${HDR} ${SRC} lc-${VERSION}
tar -cf lc-${VERSION}.tar lc-${VERSION}
gzip lc-${VERSION}.tar
rm -rf lc-${VERSION}
install: all
mkdir -p ${DESTDIR}${PREFIX}/bin
cp -f lc ${DESTDIR}${PREFIX}/bin
chmod 755 ${DESTDIR}${PREFIX}/bin/lc
mkdir -p ${DESTDIR}${MANPREFIX}/man1
sed "s/VERSION/${VERSION}/g" < lc.1 > ${DESTDIR}${MANPREFIX}/man1/lc.1
chmod 644 ${DESTDIR}${MANPREFIX}/man1/lc.1
uninstall:
rm -f ${DESTDIR}${PREFIX}/bin/lc\
${DESTDIR}${MANPREFIX}/man1/lc.1
graph: clean all
./lc > graph.dot
dot -Tpdf graph.dot > graph.pdf
zathura ./graph.pdf
.PHONY: all options clean dist install uninstall

24
README
View file

@ -1,24 +0,0 @@
lc - L compiler
============================
lc is a L compiler. It can compile L code.
Requirements
------------
In order to build lc you need... a computer
Installation
------------
Edit config.mk to match your local setup (lc is installed into
the /usr/local namespace by default).
Afterwards enter the following command to build and install lc (if
necessary as root):
make clean install
Usage
-----------
lc file

1168
codegen.c

File diff suppressed because it is too large Load diff

View file

@ -1,8 +0,0 @@
#ifndef CODEGEN_H
#define CODEGEN_H
#include "parser.h"
void generate(ast_node *node);
#endif

View file

@ -1,4 +0,0 @@
#ifndef CONFIG_H
#define CONFIG_H
#endif

View file

@ -1,4 +0,0 @@
#ifndef CONFIG_H
#define CONFIG_H
#endif

View file

@ -1,27 +0,0 @@
# cc version
VERSION = 0.1
# Customize below to fit your system
# paths
PREFIX = /usr
MANPREFIX = ${PREFIX}/share/man
# OpenBSD (uncomment)
#MANPREFIX = ${PREFIX}/man
# includes and libs
INCS = -I.
LIBS =
# flags
CPPFLAGS = -DVERSION=\"${VERSION}\"
CFLAGS := -std=c23 -pedantic -Wall -O0 ${INCS} ${CPPFLAGS}
CFLAGS := ${CFLAGS} -g
LDFLAGS = ${LIBS}
# Solaris
#CFLAGS = -fast ${INCS} -DVERSION=\"${VERSION}\"
#LDFLAGS = ${LIBS}
# compiler and linker
CC = cc

View file

@ -1,16 +0,0 @@
import std;
i32 main()
{
u32 x = 4;
loop {
u32 b = 3;
}
x == 3;
loop (0.., test) |k, i| {
}
u32 b = 3;
}

View file

@ -1,10 +0,0 @@
struct b {
i32 a,
u32 b,
u32 c,
}
u32 test()
{
f32 a = 5.0;
}

241
lc.c
View file

@ -1,241 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include "utils.h"
#include "lexer.h"
#include "parser.h"
#include "sema.h"
#include "codegen.h"
void print_indent(int depth) {
for (int i = 0; i < depth; i++) printf(" ");
}
const char* get_op_str(binary_op op) {
switch(op) {
case OP_PLUS: return "+";
case OP_MINUS: return "-";
case OP_DIV: return "/";
case OP_MUL: return "*";
case OP_EQ: return "==";
case OP_ASSIGN: return "=";
case OP_ASSIGN_PTR: return "<-";
case OP_AND: return "&&";
case OP_OR: return "||";
case OP_NEQ: return "!=";
case OP_GT: return ">";
case OP_LT: return "<";
case OP_GE: return ">=";
case OP_LE: return "<=";
case OP_BOR: return "|";
case OP_BAND: return "&";
case OP_BXOR: return "^";
case OP_MOD: return "%";
case OP_PLUS_EQ: return "+=";
case OP_MINUS_EQ: return "-=";
case OP_DIV_EQ: return "/=";
case OP_MUL_EQ: return "*=";
default: return "?";
}
}
const char *get_uop_str(unary_op op) {
switch (op) {
case UOP_INCR: return "++";
case UOP_MINUS: return "-";
case UOP_DECR: return "--";
case UOP_DEREF: return "*";
case UOP_REF: return "&";
case UOP_NOT: return "!";
default: return "?";
}
}
void print_ast(ast_node *node, int depth) {
if (!node) return;
print_indent(depth);
switch (node->type) {
case NODE_INTEGER:
printf("Integer: %lu\n", node->expr.integer);
break;
case NODE_FLOAT:
printf("Float: %f\n", node->expr.flt);
break;
case NODE_CHAR:
printf("Char: '%c'\n", node->expr.ch);
break;
case NODE_STRING:
printf("String: \"%.*s\"\n", (int)node->expr.string.len, node->expr.string.start);
break;
case NODE_IDENTIFIER:
printf("Identifier: %.*s\n", (int)node->expr.string.len, node->expr.string.start);
break;
case NODE_CAST:
printf("Cast:\n");
print_ast(node->expr.cast.type, depth);
print_ast(node->expr.cast.value, depth + 1);
break;
case NODE_ACCESS:
printf("Access:\n");
print_ast(node->expr.access.expr, depth + 1);
print_ast(node->expr.access.member, depth + 1);
break;
case NODE_LABEL:
printf("Label: %.*s\n", (int)node->expr.label.name_len, node->expr.label.name);
break;
case NODE_GOTO:
printf("Goto: %.*s\n", (int)node->expr.label.name_len, node->expr.label.name);
break;
case NODE_BINARY:
printf("BinaryOp (%s)\n", get_op_str(node->expr.binary.operator));
print_ast(node->expr.binary.left, depth + 1);
print_ast(node->expr.binary.right, depth + 1);
break;
case NODE_ARRAY_SUBSCRIPT:
printf("Array subscript\n");
print_ast(node->expr.subscript.expr, depth + 1);
print_ast(node->expr.subscript.index, depth + 1);
break;
case NODE_UNARY:
printf("UnaryOp (%s)\n", get_uop_str(node->expr.unary.operator));
print_ast(node->expr.unary.right, depth + 1);
break;
case NODE_POSTFIX:
printf("Postfix (%s)\n", get_uop_str(node->expr.unary.operator));
print_ast(node->expr.unary.right, depth + 1);
break;
case NODE_BREAK:
printf("Break\n");
break;
case NODE_TERNARY:
printf("Ternary (? :)\n");
print_indent(depth + 1); printf("Condition:\n");
print_ast(node->expr.ternary.condition, depth + 2);
print_indent(depth + 1); printf("Then:\n");
print_ast(node->expr.ternary.then, depth + 2);
print_indent(depth + 1); printf("Else:\n");
print_ast(node->expr.ternary.otherwise, depth + 2);
break;
case NODE_UNIT:
printf("Unit\n");
ast_node *current = node;
while (current && current->type == NODE_UNIT) {
print_ast(current->expr.unit_node.expr, depth + 1);
current = current->expr.unit_node.next;
}
break;
case NODE_CALL:
printf("Call: %.*s\n", (int)node->expr.call.name_len, node->expr.call.name);
current = node->expr.call.parameters;
while (current && current->type == NODE_UNIT) {
print_ast(current->expr.unit_node.expr, depth + 1);
current = current->expr.unit_node.next;
}
break;
case NODE_STRUCT_INIT:
printf("Struct init:\n");
current = node->expr.struct_init.members;
while (current && current->type == NODE_UNIT) {
print_ast(current->expr.unit_node.expr, depth + 1);
current = current->expr.unit_node.next;
}
break;
case NODE_STRUCT:
printf("Struct: %.*s\n", (int)node->expr.structure.name_len, node->expr.structure.name);
member *m = node->expr.structure.members;
while (m) {
print_ast(m->type, depth + 1);
m = m->next;
}
break;
case NODE_UNION:
printf("Union: %.*s\n", (int)node->expr.structure.name_len, node->expr.structure.name);
m = node->expr.structure.members;
while (m) {
print_ast(m->type, depth + 1);
m = m->next;
}
break;
case NODE_ENUM:
printf("Enum: %.*s\n", (int)node->expr.enm.name_len, node->expr.enm.name);
variant *v = node->expr.enm.variants;
while (v) {
printf("\t%.*s\n", (int)v->name_len, v->name);
v = v->next;
}
break;
case NODE_IF:
printf("If:\n");
print_ast(node->expr.whle.condition, depth + 1);
print_ast(node->expr.whle.body, depth + 1);
break;
case NODE_VAR_DECL:
printf("VarDecl: ");
print_ast(node->expr.var_decl.type, 0);
print_ast(node->expr.var_decl.value, depth + 1);
break;
case NODE_FUNCTION:
printf("Function: %.*s\n", (int)node->expr.function.name_len, node->expr.function.name);
m = node->expr.function.parameters;
while (m) {
print_ast(m->type, depth + 1);
m = m->next;
}
print_ast(node->expr.function.body, depth + 1);
break;
case NODE_RETURN:
printf("Return:\n");
print_ast(node->expr.ret.value, depth + 1);
break;
case NODE_IMPORT:
printf("Import:\n");
print_ast(node->expr.import.path, depth + 1);
break;
case NODE_WHILE:
printf("While:\n");
print_ast(node->expr.whle.condition, depth + 1);
print_ast(node->expr.whle.body, depth + 1);
break;
case NODE_FOR:
printf("For:\n");
print_ast(node->expr.fr.slices, depth + 1);
print_ast(node->expr.fr.captures, depth + 1);
print_indent(depth + 1);
print_ast(node->expr.fr.body, depth + 1);
break;
case NODE_RANGE:
printf("Range:\n");
print_ast(node->expr.binary.left, depth + 1);
print_ast(node->expr.binary.right, depth + 1);
break;
default:
printf("Unknown Node Type: %d\n", node->type);
break;
}
}
int main(void)
{
FILE *fp = fopen("test.l", "r");
usize size = 0;
fseek(fp, 0, SEEK_END);
size = ftell(fp);
fseek(fp, 0, SEEK_SET);
char *src = malloc(size+1);
fread(src, size, 1, fp);
fclose(fp);
src[size] = '\0';
arena a = arena_init(0x1000 * 0x1000 * 64);
lexer *l = lexer_init(src, size, &a);
parser *p = parser_init(l, &a);
print_ast(p->ast, 0);
sema_init(p, &a);
generate(p->ast);
arena_deinit(a);
return 0;
}

422
lexer.c
View file

@ -1,422 +0,0 @@
#include "lexer.h"
#include <stdbool.h>
#include <ctype.h>
#include <stdio.h>
#include <string.h>
trie_node *keywords;
static void add_token(lexer *l, token_type type, usize len)
{
token *t = arena_alloc(l->allocator, sizeof(token));
t->type = type;
t->lexeme_len = len;
t->lexeme = l->source + l->index;
t->position.row = l->row;
t->position.column = l->column;
if (!l->tokens) {
l->tokens = t;
l->tail = t;
} else {
l->tail->next = t;
l->tail = t;
}
}
static void add_error(lexer *l, char *msg)
{
token *t = arena_alloc(l->allocator, sizeof(token));
t->type = TOKEN_ERROR;
t->lexeme_len = strlen(msg);
t->lexeme = msg;
t->position.row = l->row;
t->position.column = l->column;
if (!l->tokens) {
l->tokens = t;
l->tail = t;
} else {
l->tail->next = t;
l->tail = t;
}
}
static void parse_number(lexer *l)
{
char c = l->source[l->index];
/* Is the number a float? */
bool f = false;
usize len = 0;
while (isdigit(c)) {
/* If a dot is found, and the character after it is a digit, this is a float. */
if (l->source[l->index+1] == '.' && isdigit(l->source[l->index+2])) {
f = true;
len += 3;
l->index += 3;
} else {
len += 1;
l->index += 1;
}
c = l->source[l->index];
}
l->index -= len;
if (f) {
add_token(l, TOKEN_FLOAT, len);
} else {
add_token(l, TOKEN_INTEGER, len);
}
l->index += len;
}
static void parse_identifier(lexer *l)
{
char c = l->source[l->index];
usize len = 0;
while (isalnum(c) || c == '_') {
len += 1;
l->index += 1;
c = l->source[l->index];
}
l->index -= len;
token_type keyword = trie_get(keywords, l->source + l->index, len);
if (keyword) {
add_token(l, keyword, len);
} else {
add_token(l, TOKEN_IDENTIFIER, len);
}
l->index += len;
}
static void parse_string(lexer *l)
{
char c = l->source[l->index];
usize len = 0;
while (c != '"') {
if (c == '\0' || c == '\n') {
l->index -= len;
add_error(l, "unclosed string literal.");
l->index += len;
return;
}
len += 1;
l->index += 1;
c = l->source[l->index];
}
l->index -= len;
add_token(l, TOKEN_STRING, len);
l->index += len + 1;
}
static bool parse_special(lexer *l)
{
switch (l->source[l->index]) {
case '+':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_PLUS_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '+') {
add_token(l, TOKEN_PLUS_PLUS, 2);
l->index += 2;
} else {
add_token(l, TOKEN_PLUS, 1);
l->index += 1;
}
return true;
case '-':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_MINUS_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '-') {
add_token(l, TOKEN_MINUS_MINUS, 2);
l->index += 2;
} else {
add_token(l, TOKEN_MINUS, 1);
l->index += 1;
}
return true;
case '/':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_SLASH_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_SLASH, 1);
l->index += 1;
}
return true;
case '*':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_STAR_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_STAR, 1);
l->index += 1;
}
return true;
case '%':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_PERC_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_PERC, 1);
l->index += 1;
}
return true;
case '&':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_AND_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '&') {
add_token(l, TOKEN_DOUBLE_AND, 2);
l->index += 2;
} else {
add_token(l, TOKEN_AND, 1);
l->index += 1;
}
return true;
case '^':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_HAT_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_HAT, 1);
l->index += 1;
}
return true;
case '|':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_PIPE_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '|') {
add_token(l, TOKEN_OR, 2);
l->index += 2;
} else {
add_token(l, TOKEN_PIPE, 1);
l->index += 1;
}
return true;
case '=':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_DOUBLE_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_EQ, 1);
l->index += 1;
}
return true;
case '>':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_GREATER_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '>') {
if (l->source[l->index+2] == '=') {
add_token(l, TOKEN_RSHIFT_EQ, 3);
l->index += 3;
return true;
}
add_token(l, TOKEN_RSHIFT, 2);
l->index += 2;
} else {
add_token(l, TOKEN_GREATER_THAN, 1);
l->index += 1;
}
return true;
case '<':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_LESS_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '-') {
add_token(l, TOKEN_ARROW, 2);
l->index += 2;
} else if (l->source[l->index+1] == '<') {
if (l->source[l->index+2] == '=') {
add_token(l, TOKEN_LSHIFT_EQ, 3);
l->index += 3;
return true;
}
add_token(l, TOKEN_LSHIFT, 2);
l->index += 2;
} else {
add_token(l, TOKEN_LESS_THAN, 1);
l->index += 1;
}
return true;
case '!':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_NOT_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_BANG, 1);
l->index += 1;
}
return true;
case ':':
add_token(l, TOKEN_COLON, 1);
l->index += 1;
return true;
case ';':
add_token(l, TOKEN_SEMICOLON, 1);
l->index += 1;
return true;
case '.':
if (l->source[l->index+1] == '.') {
add_token(l, TOKEN_DOUBLE_DOT, 2);
l->index += 2;
} else {
add_token(l, TOKEN_DOT, 1);
l->index += 1;
}
return true;
case ',':
add_token(l, TOKEN_COMMA, 1);
l->index += 1;
return true;
case '(':
add_token(l, TOKEN_LPAREN, 1);
l->index += 1;
return true;
case ')':
add_token(l, TOKEN_RPAREN, 1);
l->index += 1;
return true;
case '[':
add_token(l, TOKEN_LSQUARE, 1);
l->index += 1;
return true;
case ']':
add_token(l, TOKEN_RSQUARE, 1);
l->index += 1;
return true;
case '{':
add_token(l, TOKEN_LCURLY, 1);
l->index += 1;
return true;
case '}':
add_token(l, TOKEN_RCURLY, 1);
l->index += 1;
return true;
case '\'':
if (l->source[l->index+1] == '\\') {
if (l->source[l->index+3] != '\'') {
add_error(l, "unclosed character literal.");
l->index += 1;
return true;
}
l->index += 1;
add_token(l, TOKEN_CHAR, 2);
l->index += 3;
return true;
} else {
if (l->source[l->index+2] != '\'') {
add_error(l, "unclosed character literal.");
l->index += 1;
return true;
}
l->index += 1;
add_token(l, TOKEN_CHAR, 1);
l->index += 2;
return true;
}
default:
return false;
}
}
static void parse(lexer *l)
{
char c;
while (l->index <= l->size) {
c = l->source[l->index];
l->column += 1;
if (c == '\n') {
l->index += 1;
l->row += 1;
l->column = 0;
continue;
}
usize head = l->index;
if (c == '/' && l->source[l->index+1] == '/') {
while (l->source[l->index] != '\n') {
l->index += 1;
}
l->column += (l->index - head - 1);
}
if (isspace(c)) {
l->index += 1;
continue;
}
if (parse_special(l)) {
l->column += (l->index - head - 1);
continue;
}
if (isdigit(c)) {
parse_number(l);
l->column += (l->index - head - 1);
continue;
}
if (isalpha(c)) {
parse_identifier(l);
l->column += (l->index - head - 1);
continue;
}
if (c == '"') {
l->index += 1;
parse_string(l);
l->column += (l->index - head - 1);
continue;
}
l->index += 1;
}
}
lexer *lexer_init(char *source, usize size, arena *arena)
{
lexer *lex = arena_alloc(arena, sizeof(lexer));
lex->column = 1;
lex->row = 1;
lex->index = 0;
lex->size = size;
lex->tokens = 0;
lex->tail = 0;
lex->allocator = arena;
lex->source = source;
keywords = arena_alloc(arena, sizeof(trie_node));
trie_insert(keywords, lex->allocator, "true", TOKEN_TRUE);
trie_insert(keywords, lex->allocator, "false", TOKEN_FALSE);
trie_insert(keywords, lex->allocator, "struct", TOKEN_STRUCT);
trie_insert(keywords, lex->allocator, "enum", TOKEN_ENUM);
trie_insert(keywords, lex->allocator, "union", TOKEN_UNION);
trie_insert(keywords, lex->allocator, "loop", TOKEN_LOOP);
trie_insert(keywords, lex->allocator, "while", TOKEN_WHILE);
trie_insert(keywords, lex->allocator, "until", TOKEN_UNTIL);
trie_insert(keywords, lex->allocator, "goto", TOKEN_GOTO);
trie_insert(keywords, lex->allocator, "if", TOKEN_IF);
trie_insert(keywords, lex->allocator, "else", TOKEN_ELSE);
trie_insert(keywords, lex->allocator, "switch", TOKEN_SWITCH);
trie_insert(keywords, lex->allocator, "break", TOKEN_BREAK);
trie_insert(keywords, lex->allocator, "defer", TOKEN_DEFER);
trie_insert(keywords, lex->allocator, "return", TOKEN_RETURN);
trie_insert(keywords, lex->allocator, "import", TOKEN_IMPORT);
trie_insert(keywords, lex->allocator, "const", TOKEN_CONST);
trie_insert(keywords, lex->allocator, "extern", TOKEN_EXTERN);
trie_insert(keywords, lex->allocator, "volatile", TOKEN_VOLATILE);
parse(lex);
return lex;
}

97
lexer.h
View file

@ -1,97 +0,0 @@
#ifndef LEXER_H
#define LEXER_H
#include "utils.h"
typedef enum {
TOKEN_ERROR,
TOKEN_END,
TOKEN_PLUS, // +
TOKEN_PLUS_PLUS, // ++
TOKEN_MINUS, // -
TOKEN_MINUS_MINUS, // --
TOKEN_SLASH, // /
TOKEN_PERC, // %
TOKEN_STAR, // *
TOKEN_AND, // &
TOKEN_HAT, // ^
TOKEN_PIPE, // |
TOKEN_LSHIFT, // <<
TOKEN_RSHIFT, // >>
TOKEN_DOUBLE_EQ, // ==
TOKEN_ARROW, // <-
TOKEN_EQ, // =
TOKEN_LESS_THAN, // <
TOKEN_GREATER_THAN, // >
TOKEN_LESS_EQ, // <=
TOKEN_GREATER_EQ, // >=
TOKEN_NOT_EQ, // !=
TOKEN_PLUS_EQ, // +=
TOKEN_MINUS_EQ, // -=
TOKEN_STAR_EQ, // *=
TOKEN_SLASH_EQ, // /=
TOKEN_AND_EQ, // &=
TOKEN_HAT_EQ, // ^=
TOKEN_PIPE_EQ, // |=
TOKEN_PERC_EQ, // %=
TOKEN_LSHIFT_EQ, // <<=
TOKEN_RSHIFT_EQ, // >>=
TOKEN_OR, // ||
TOKEN_DOUBLE_AND, // &&
TOKEN_COLON, // :
TOKEN_SEMICOLON, // ;
TOKEN_DOT, // .
TOKEN_DOUBLE_DOT, // ..
TOKEN_BANG, // !
TOKEN_COMMA, // ,
TOKEN_LPAREN, // (
TOKEN_RPAREN, // )
TOKEN_LSQUARE, // [
TOKEN_RSQUARE, // ]
TOKEN_LCURLY, // {
TOKEN_RCURLY, // }
TOKEN_INTEGER,
TOKEN_FLOAT,
TOKEN_IDENTIFIER,
TOKEN_STRING,
TOKEN_CHAR,
TOKEN_TRUE,
TOKEN_FALSE,
TOKEN_GOTO,
TOKEN_LOOP,
TOKEN_WHILE,
TOKEN_UNTIL,
TOKEN_IF,
TOKEN_ELSE,
TOKEN_SWITCH,
TOKEN_BREAK,
TOKEN_DEFER,
TOKEN_RETURN,
TOKEN_IMPORT,
TOKEN_CONST,
TOKEN_EXTERN,
TOKEN_VOLATILE,
TOKEN_STRUCT,
TOKEN_ENUM,
TOKEN_UNION
} token_type;
typedef struct _token {
token_type type;
source_pos position;
char *lexeme;
usize lexeme_len;
struct _token *next;
} token;
typedef struct {
usize column, row, index, size;
char *source;
token *tokens;
token *tail;
arena *allocator;
} lexer;
lexer *lexer_init(char *source, usize size, arena *arena);
#endif

1385
parser.c

File diff suppressed because it is too large Load diff

256
parser.h
View file

@ -1,256 +0,0 @@
#ifndef PARSER_H
#define PARSER_H
#include "lexer.h"
#include "utils.h"
#include <stdbool.h>
struct _type;
struct _ast_node;
typedef enum {
OP_PLUS, // +
OP_MINUS, // -
OP_DIV, // /
OP_MUL, // *
OP_MOD, // %
OP_BOR, // |
OP_BAND, // &
OP_BXOR, // ^
OP_ASSIGN, // =
OP_ASSIGN_PTR, // <-
OP_RSHIFT_EQ, // >>=
OP_LSHIFT_EQ, // <<=
OP_PLUS_EQ, // +=
OP_MINUS_EQ, // -=
OP_DIV_EQ, // /=
OP_MUL_EQ, // *=
OP_BOR_EQ, // |=
OP_BAND_EQ, // &=
OP_BXOR_EQ, // ^=
OP_MOD_EQ, // %=
OP_EQ, // ==
OP_AND, // &&
OP_OR, // ||
OP_NEQ, // !=
OP_GT, // >
OP_LT, // <
OP_GE, // >=
OP_LE, // <=
} binary_op;
typedef enum {
UOP_INCR, // ++
UOP_MINUS, // -
UOP_DECR, // --
UOP_DEREF, // *
UOP_REF, // &
UOP_NOT, // !
} unary_op;
typedef enum {
LAYOUT_AUTO,
LAYOUT_PACKED,
LAYOUT_EXTERN
} struct_layout;
typedef struct _member {
struct _ast_node *type;
char *name;
usize name_len;
struct _member *next;
usize offset;
} member;
typedef struct {
char *name;
usize name_len;
member *params;
} function;
typedef struct _variant {
struct _ast_node *value;
char *name;
usize name_len;
struct _variant *next;
} variant;
typedef enum {
NODE_IDENTIFIER,
NODE_INTEGER,
NODE_FLOAT,
NODE_STRING,
NODE_CHAR,
NODE_BOOL,
NODE_CAST,
NODE_UNARY,
NODE_BINARY,
NODE_RANGE,
NODE_ARRAY_SUBSCRIPT,
NODE_POSTFIX,
NODE_CALL,
NODE_ACCESS,
NODE_STRUCT_INIT,
NODE_TERNARY, /* TODO */
NODE_BREAK,
NODE_RETURN,
NODE_IMPORT,
NODE_FOR,
NODE_WHILE,
NODE_IF,
NODE_VAR_DECL,
NODE_LABEL,
NODE_GOTO,
NODE_ENUM,
NODE_STRUCT,
NODE_UNION,
NODE_FUNCTION,
NODE_PTR_TYPE,
NODE_SWITCH, /* TODO */
NODE_UNIT,
} node_type;
#define PTR_SLICE 0x0
#define PTR_RAW 0x1
#define PTR_CONST 0x2
#define PTR_VOLATILE 0x4
#define LOOP_WHILE 0x1
#define LOOP_UNTIL 0x2
#define LOOP_AFTER 0x4
typedef struct _ast_node {
node_type type;
source_pos position;
struct _type *expr_type;
bool address_taken; // used in IR generation.
union {
struct {
struct _ast_node *type;
u8 flags;
} ptr_type;
struct {
char *name;
usize name_len;
} label; // both label and goto
struct {
struct _ast_node *left;
struct _ast_node *right;
binary_op operator;
} binary;
struct {
struct _ast_node *right;
unary_op operator;
} unary;
u8 boolean;
i64 integer;
f64 flt; // float
struct {
char *start;
usize len;
} string;
char ch; // char;
struct {
struct _ast_node *condition;
struct _ast_node *then;
struct _ast_node *otherwise;
} ternary;
struct {
struct _ast_node *value;
struct _ast_node *type;
} cast;
struct {
struct _ast_node *expr;
struct _ast_node *index;
} subscript;
struct {
struct _ast_node *expr;
struct _ast_node *member;
} access;
struct {
struct _ast_node *expr;
struct _ast_node *next;
} unit_node;
struct {
/* This should be a list of unit_node */
struct _ast_node *parameters;
usize param_len;
char *name;
usize name_len;
} call;
struct {
struct _ast_node *value;
} ret;
struct {
/* This should be an access. */
struct _ast_node *path;
} import;
struct {
/* These should be lists of unit_node */
struct _ast_node *slices;
usize slice_len;
struct _ast_node *captures;
usize capture_len;
struct _ast_node* body;
} fr; // for
struct {
struct _ast_node *condition;
struct _ast_node *body;
u8 flags;
} whle; // while
struct {
struct _ast_node *condition;
struct _ast_node *body;
struct _ast_node *otherwise;
u8 flags;
} if_stmt; // while
struct {
struct _ast_node **statements;
usize stmt_len;
} compound;
struct {
struct _ast_node *value;
char *name;
usize name_len;
struct _ast_node *type;
} var_decl;
struct {
member *members;
char *name;
usize name_len;
} structure;
struct {
member *parameters;
usize parameters_len;
char *name;
usize name_len;
struct _ast_node *type;
struct _ast_node *body;
bool is_extern;
} function;
struct {
variant *variants;
char *name;
usize name_len;
} enm; // enum
struct {
struct _ast_node *members;
usize members_len;
} struct_init;
} expr;
} ast_node;
typedef struct {
token *tokens;
token *previous;
ast_node *ast;
arena *allocator;
} parser;
parser *parser_init(lexer *l, arena *allocator);
#endif

920
sema.c
View file

@ -1,920 +0,0 @@
#define STB_DS_IMPLEMENTATION
#include "sema.h"
#include <string.h>
#include <stdio.h>
typedef struct _res_node {
struct _res_node **in;
struct _res_node **out;
type *value;
} res_node;
typedef struct { res_node node; bool complete; } pair;
typedef struct { u8 flags; char *name; } type_key;
static struct { char *key; pair *value; } *types;
static struct { char *key; type *value; } *type_reg;
static struct { char *key; prototype *value; } *prototypes;
static scope *global_scope = NULL;
static scope *current_scope = NULL;
static type *current_return = NULL;
static type *const_int = NULL;
static type *const_float = NULL;
static bool in_loop = false;
static bool has_errors = false;
static void error(ast_node *n, char *msg)
{
has_errors = true;
if (n) {
printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:%ld:%ld:\x1b[0m %s\n", n->position.row, n->position.column, msg);
} else {
printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:\x1b[0m %s\n", msg);
}
}
static char *intern_string(sema *s, char *str, usize len)
{
(void) s;
char *ptr = malloc(len + 1);
memcpy(ptr, str, len);
ptr[len] = '\0';
return ptr;
}
static type *create_integer(sema *s, char *name, u8 bits, bool sign)
{
type *t = arena_alloc(s->allocator, sizeof(type));
t->name = name;
t->tag = sign ? TYPE_INTEGER : TYPE_UINTEGER;
t->data.integer = bits;
pair *graph_node = arena_alloc(s->allocator, sizeof(pair));
graph_node->node.value = t;
graph_node->node.in = NULL;
graph_node->node.out = NULL;
shput(types, name, graph_node);
return t;
}
static type *create_float(sema *s, char *name, u8 bits)
{
type *t = arena_alloc(s->allocator, sizeof(type));
t->name = name;
t->tag = TYPE_FLOAT;
t->data.flt = bits;
pair *graph_node = arena_alloc(s->allocator, sizeof(pair));
graph_node->node.value = t;
graph_node->node.in = NULL;
graph_node->node.out = NULL;
shput(types, name, graph_node);
return t;
}
static void order_type(sema *s, ast_node *node)
{
if (node->type == NODE_STRUCT || node->type == NODE_UNION) {
type *t = arena_alloc(s->allocator, sizeof(type));
t->tag = node->type == NODE_STRUCT ? TYPE_STRUCT : TYPE_UNION;
t->data.structure.name = node->expr.structure.name;
t->data.structure.name_len = node->expr.structure.name_len;
t->data.structure.members = node->expr.structure.members;
char *k = intern_string(s, node->expr.structure.name, node->expr.structure.name_len);
t->name = k;
pair *graph_node = shget(types, k);
if (!graph_node) {
graph_node = arena_alloc(s->allocator, sizeof(pair));
graph_node->node.in = NULL;
graph_node->node.out = NULL;
} else if (graph_node->complete) {
error(node, "type already defined.");
return;
}
graph_node->node.value = t;
member *m = t->data.structure.members;
while (m) {
if (m->type->type != NODE_IDENTIFIER) {
m = m->next;
continue;
}
char *name = intern_string(s, m->type->expr.string.start, m->type->expr.string.len);
pair *p = shget(types, name);
if (!p) {
p = arena_alloc(s->allocator, sizeof(pair));
p->node.out = NULL;
p->node.in = NULL;
p->node.value = NULL;
p->complete = false;
shput(types, name, p);
}
arrput(graph_node->node.in, &p->node);
arrput(p->node.out, &graph_node->node);
m = m->next;
}
shput(types, k, graph_node);
graph_node->complete = true;
}
}
static type *get_type(sema *s, ast_node *n)
{
char *name = NULL;
type *t = NULL;
switch (n->type) {
case NODE_ACCESS:
t = get_type(s, n->expr.access.expr);
name = intern_string(s, n->expr.access.member->expr.string.start, n->expr.access.member->expr.string.len);
if (t->tag != TYPE_STRUCT) {
error(n->expr.access.expr, "expected structure.");
return NULL;
}
t = shget(t->data.structure.member_types, name);
return t;
case NODE_IDENTIFIER:
name = intern_string(s, n->expr.string.start, n->expr.string.len);
t = shget(type_reg, name);
free(name);
return t;
case NODE_PTR_TYPE:
t = malloc(sizeof(type));
t->alignment = sizeof(usize);
if (n->expr.ptr_type.flags & PTR_RAW) {
t->name = "ptr";
t->tag = TYPE_PTR;
t->size = sizeof(usize);
t->data.ptr.child = get_type(s, n->expr.ptr_type.type);
t->data.ptr.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0;
t->data.ptr.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0;
} else {
t->name = "slice";
t->tag = TYPE_SLICE;
t->size = sizeof(usize) * 2; // ptr + len = 16 bytes
t->data.slice.child = get_type(s, n->expr.ptr_type.type);
t->data.slice.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0;
t->data.slice.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0;
}
return t;
default:
error(n, "expected type.");
return NULL;
}
}
static void register_struct(sema *s, char *name, type *t)
{
usize alignment = 0;
member *m = t->data.structure.members;
usize offset = 0;
type *m_type = NULL;
while (m) {
m_type = get_type(s, m->type);
if (!m_type) {
error(m->type, "unknown type.");
return;
}
char *n = intern_string(s, m->name, m->name_len);
shput(t->data.structure.member_types, n, m_type);
if (m_type->size == 0) {
error(m->type, "a struct member can't be of type `void`.");
return;
}
if (alignment < m_type->alignment) {
alignment = m_type->alignment;
}
usize padding = (m_type->alignment - (offset % m_type->alignment)) % m_type->alignment;
offset += padding;
m->offset = offset;
offset += m_type->size;
m = m->next;
}
t->alignment = alignment;
if (t->alignment > 0) {
usize trailing_padding = (t->alignment - (offset % t->alignment)) % t->alignment;
offset += trailing_padding;
}
t->size = offset;
}
static void register_union(sema *s, char *name, type *t)
{
usize alignment = 0;
usize size = 0;
member *m = t->data.structure.members;
while (m) {
type *m_type = get_type(s, m->type);
if (!m_type) {
error(m->type, "unknown type.");
return;
}
char *n = intern_string(s, m->name, m->name_len);
shput(t->data.structure.member_types, n, m_type);
if (alignment < m_type->alignment) {
alignment = m_type->alignment;
}
if (size < m_type->size) {
size = m_type->size;
}
m = m->next;
}
t->alignment = alignment;
t->size = size;
}
static void register_type(sema *s, char *name, type *t)
{
switch (t->tag) {
case TYPE_INTEGER:
case TYPE_UINTEGER:
t->size = t->data.integer / 8;
t->alignment = t->data.integer / 8;
break;
case TYPE_PTR:
t->size = 8;
t->alignment = 8;
break;
case TYPE_FLOAT:
t->size = t->data.flt / 8;
t->alignment = t->data.flt / 8;
break;
case TYPE_STRUCT:
register_struct(s, name, t);
break;
case TYPE_UNION:
register_union(s, name, t);
break;
default:
error(NULL, "registering an invalid type.");
return;
}
shput(type_reg, name, t);
}
static void create_types(sema *s)
{
res_node **nodes = NULL;
res_node **ordered = NULL;
usize node_count = shlen(types);
for (int i=0; i < node_count; i++) {
if (arrlen(types[i].value->node.in) == 0) {
arrput(nodes, &types[i].value->node);
}
}
while (arrlen(nodes) > 0) {
res_node *n = nodes[0];
arrdel(nodes, 0);
arrput(ordered, n);
while (arrlen(n->out) > 0) {
res_node *dep = n->out[0];
arrdel(n->out, 0);
for (int j=0; j < arrlen(dep->in); j++) {
if (dep->in[j] == n) {
arrdel(dep->in, j);
}
}
if (arrlen(dep->in) == 0) {
arrput(nodes, dep);
}
}
}
if (arrlen(ordered) < node_count) {
error(NULL, "cycling struct definition.");
}
for (int i=0; i < arrlen(ordered); i++) {
type *t = ordered[i]->value;
if (t && (t->tag == TYPE_STRUCT || t->tag == TYPE_UNION)) {
char *name = t->name;
register_type(s, name, t);
}
}
}
static void create_prototype(sema *s, ast_node *node)
{
prototype *p = arena_alloc(s->allocator, sizeof(prototype));
p->name = intern_string(s, node->expr.function.name, node->expr.function.name_len);
node->expr.function.name = p->name;
if (shget(prototypes, p->name)) {
error(node, "function already defined.");
}
member *m = node->expr.function.parameters;
while (m) {
type *t = get_type(s, m->type);
if (!t) {
error(m->type, "unknown type.");
return;
}
arrput(p->parameters, t);
m = m->next;
}
p->type = get_type(s, node->expr.function.type);
shput(prototypes, p->name, p);
}
static void push_scope(sema *s)
{
scope *scp = arena_alloc(s->allocator, sizeof(scope));
scp->parent = current_scope;
current_scope = scp;
}
static void pop_scope(sema *s)
{
current_scope = current_scope->parent;
}
static ast_node *get_def(sema *s, char *name)
{
scope *current = current_scope;
while (current) {
ast_node *def = shget(current->defs, name);
if (def) return def;
current = current->parent;
}
return NULL;
}
static type *get_string_type(sema *s, ast_node *node)
{
type *string_type = arena_alloc(s->allocator, sizeof(type));
string_type->tag = TYPE_SLICE;
string_type->size = sizeof(usize) * 2; // ptr + len = 16 bytes
string_type->alignment = sizeof(usize);
string_type->name = "slice";
string_type->data.slice.child = shget(type_reg, "u8");
string_type->data.slice.is_const = true;
string_type->data.slice.is_volatile = false;
string_type->data.slice.len = node->expr.string.len;
return string_type;
}
static type *get_range_type(sema *s, ast_node *node)
{
type *range_type = arena_alloc(s->allocator, sizeof(type));
range_type->tag = TYPE_PTR;
range_type->size = sizeof(usize);
range_type->alignment = sizeof(usize);
range_type->name = "slice";
range_type->data.slice.child = shget(type_reg, "usize");
range_type->data.slice.is_const = true;
range_type->data.slice.is_volatile = false;
range_type->data.slice.len = node->expr.binary.right->expr.integer - node->expr.binary.left->expr.integer;
return range_type;
}
static type *get_expression_type(sema *s, ast_node *node);
static type *get_access_type(sema *s, ast_node *node)
{
type *t = get_expression_type(s, node->expr.access.expr);
ast_node *member = node->expr.access.member;
char *name_start = member->expr.string.start;
usize name_len = member->expr.string.len;
// Handle slice field access
if (t && t->tag == TYPE_SLICE) {
char *name = intern_string(s, name_start, name_len);
if (strcmp(name, "ptr") == 0) {
// Return pointer to element type
type *ptr_type = arena_alloc(s->allocator, sizeof(type));
ptr_type->tag = TYPE_PTR;
ptr_type->size = 8;
ptr_type->alignment = 8;
ptr_type->name = "ptr";
ptr_type->data.ptr.child = t->data.slice.child;
ptr_type->data.ptr.is_const = t->data.slice.is_const;
ptr_type->data.ptr.is_volatile = t->data.slice.is_volatile;
free(name);
return ptr_type;
} else if (strcmp(name, "len") == 0) {
// Return usize type
free(name);
return shget(type_reg, "usize");
} else {
error(node, "slice doesn't have that field");
free(name);
return NULL;
}
}
if (!t || (t->tag != TYPE_STRUCT && t->tag != TYPE_UNION)) {
error(node, "invalid expression.");
return NULL;
}
char *name = intern_string(s, name_start, name_len);
type *res = shget(t->data.structure.member_types, name);
if (!res) {
error(node, "struct doesn't have that member");
return NULL;
}
return res;
}
static type *get_identifier_type(sema *s, ast_node *node)
{
char *name_start = node->expr.string.start;
usize name_len = node->expr.string.len;
char *name = intern_string(s, name_start, name_len);
node->expr.string.start = name;
ast_node *def = get_def(s, name);
if (!def) {
error(node, "unknown identifier.");
}
return def->expr_type;
}
static bool match(type *t1, type *t2);
static bool can_cast(type *source, type *dest)
{
if (!dest || !source) return false;
switch (dest->tag) {
case TYPE_INTEGER:
case TYPE_UINTEGER:
case TYPE_INTEGER_CONST:
return source->tag == TYPE_INTEGER_CONST || source->tag == TYPE_INTEGER || source->tag == TYPE_UINTEGER;
case TYPE_FLOAT:
return source->tag == TYPE_FLOAT_CONST;
default:
return false;
}
}
static type *get_expression_type(sema *s, ast_node *node)
{
if (!node) {
return shget(type_reg, "void");
}
type *t = NULL;
prototype *prot = NULL;
switch (node->type) {
case NODE_IDENTIFIER:
t = get_identifier_type(s, node);
node->expr_type = t;
return t;
case NODE_INTEGER:
node->expr_type = const_int;
return const_int;
case NODE_FLOAT:
node->expr_type = const_float;
return const_float;
case NODE_STRING:
t = get_string_type(s, node);
node->expr_type = t;
return t;
case NODE_CHAR:
t = shget(type_reg, "u8");
node->expr_type = t;
return t;
case NODE_BOOL:
t = shget(type_reg, "bool");
node->expr_type = t;
return t;
case NODE_CAST:
t = get_type(s, node->expr.cast.type);
node->expr_type = t;
return t;
case NODE_POSTFIX:
case NODE_UNARY:
t = get_expression_type(s, node->expr.unary.right);
if (node->expr.unary.operator == UOP_REF) {
ast_node *target = node->expr.unary.right;
while (target->type == NODE_ACCESS) {
target = target->expr.access.expr;
}
if (target->type != NODE_IDENTIFIER) {
error(node, "expected identifier.");
return NULL;
}
char *name = target->expr.string.start;
ast_node *def = get_def(s, name);
if (def) {
def->address_taken = true;
target->address_taken = true;
}
type *tmp = t;
t = arena_alloc(s->allocator, sizeof(type));
t->tag = TYPE_PTR;
t->size = sizeof(usize);
t->alignment = sizeof(usize);
t->name = "ptr";
t->data.ptr.is_const = false;
t->data.ptr.is_volatile = false;
t->data.ptr.child = tmp;
} else if (node->expr.unary.operator == UOP_DEREF) {
if (t->tag != TYPE_PTR) {
error(node, "only pointers can be dereferenced.");
return NULL;
}
t = t->data.ptr.child;
}
node->expr_type = t;
return t;
case NODE_BINARY:
t = get_expression_type(s, node->expr.binary.left);
if (!t) return NULL;
if (node->expr.binary.operator == OP_ASSIGN_PTR) {
if (t->tag != TYPE_PTR) {
error(node, "expected pointer.");
return NULL;
}
t = t->data.ptr.child;
}
if (!can_cast(get_expression_type(s, node->expr.binary.right), t) && !match(t, get_expression_type(s, node->expr.binary.right))) {
error(node, "type mismatch.");
node->expr_type = NULL;
return NULL;
}
if (node->expr.binary.operator >= OP_EQ) {
t = shget(type_reg, "bool");
} else if (node->expr.binary.operator >= OP_ASSIGN && node->expr.binary.operator <= OP_MOD_EQ) {
t = shget(type_reg, "void");
}
node->expr_type = t;
return t;
case NODE_RANGE:
t = get_range_type(s, node);
node->expr_type = t;
return t;
case NODE_ARRAY_SUBSCRIPT:
t = get_expression_type(s, node->expr.subscript.expr);
// Check if this is range subscripting (creates a slice)
if (node->expr.subscript.index && node->expr.subscript.index->type == NODE_RANGE) {
type *element_type = NULL;
switch (t->tag) {
case TYPE_SLICE:
element_type = t->data.slice.child;
break;
case TYPE_PTR:
element_type = t->data.ptr.child;
break;
default:
error(node, "only pointers and slices can be indexed.");
return NULL;
}
// Return a slice type
type *slice_type = arena_alloc(s->allocator, sizeof(type));
slice_type->tag = TYPE_SLICE;
slice_type->size = sizeof(usize) * 2;
slice_type->alignment = sizeof(usize);
slice_type->data.slice.child = element_type;
slice_type->data.slice.is_const = false;
slice_type->data.slice.len = 0;
node->expr_type = slice_type;
return slice_type;
}
// Regular subscript - return element type
switch (t->tag) {
case TYPE_SLICE:
t = t->data.slice.child;
break;
case TYPE_PTR:
t = t->data.ptr.child;
break;
default:
error(node, "only pointers and slices can be indexed.");
return NULL;
}
node->expr_type = t;
return t;
case NODE_CALL:
node->expr.call.name = intern_string(s, node->expr.call.name, node->expr.call.name_len);
prot = shget(prototypes, node->expr.call.name);
if (!prot) {
error(node, "unknown function.");
return NULL;
}
// Process call arguments
ast_node *arg = node->expr.call.parameters;
while (arg && arg->type == NODE_UNIT) {
if (arg->expr.unit_node.expr) {
get_expression_type(s, arg->expr.unit_node.expr);
}
arg = arg->expr.unit_node.next;
}
t = prot->type;
node->expr_type = t;
return t;
case NODE_ACCESS:
t = get_access_type(s, node);
node->expr_type = t;
return t;
default:
t = shget(type_reg, "void");
node->expr_type = t;
return t;
}
}
static bool match(type *t1, type *t2)
{
if (!t1 || !t2) return false;
if (t1->tag != t2->tag) return false;
switch(t1->tag) {
case TYPE_VOID:
case TYPE_BOOL:
return true;
case TYPE_PTR:
return (t1->data.ptr.is_const == t2->data.ptr.is_const) && (t1->data.ptr.is_volatile == t2->data.ptr.is_volatile) && match(t1->data.ptr.child, t2->data.ptr.child);
case TYPE_SLICE:
return (t1->data.slice.is_const == t2->data.slice.is_const) && (t1->data.slice.is_volatile == t2->data.slice.is_volatile) && match(t1->data.slice.child, t2->data.slice.child) && t1->data.slice.len == t2->data.slice.len;
case TYPE_STRUCT:
case TYPE_UNION:
return t1 == t2;
case TYPE_INTEGER:
case TYPE_UINTEGER:
return t1->data.integer == t2->data.integer;
case TYPE_FLOAT:
return t1->data.flt == t2->data.flt;
case TYPE_ENUM:
case TYPE_GENERIC:
/* TODO */
return false;
case TYPE_INTEGER_CONST:
case TYPE_FLOAT_CONST:
return false;
}
return false;
}
static void check_statement(sema *s, ast_node *node);
static void check_body(sema *s, ast_node *node)
{
push_scope(s);
ast_node *current = node;
while (current && current->type == NODE_UNIT) {
check_statement(s, current->expr.unit_node.expr);
current = current->expr.unit_node.next;
}
pop_scope(s);
}
static void check_for(sema *s, ast_node *node)
{
ast_node *slices = node->expr.fr.slices;
ast_node *captures = node->expr.fr.captures;
push_scope(s);
ast_node *current_capture = captures;
ast_node *current_slice = slices;
while (current_capture) {
type *c_type = get_expression_type(s, current_slice->expr.unit_node.expr);
char *c_name = intern_string(s, current_capture->expr.unit_node.expr->expr.string.start, current_capture->expr.unit_node.expr->expr.string.len);
ast_node *cap_node = arena_alloc(s->allocator, sizeof(ast_node));
cap_node->type = NODE_VAR_DECL;
cap_node->expr_type = c_type;
cap_node->address_taken = false;
cap_node->expr.var_decl.name = c_name;
shput(current_scope->defs, c_name, cap_node);
current_capture = current_capture->expr.unit_node.next;
current_slice = current_slice->expr.unit_node.next;
}
ast_node *current = node->expr.fr.body;
in_loop = true;
while (current && current->type == NODE_UNIT) {
check_statement(s, current->expr.unit_node.expr);
current = current->expr.unit_node.next;
}
in_loop = false;
pop_scope(s);
}
static void check_statement(sema *s, ast_node *node)
{
if (!node) return;
type *t = NULL;
char *name = NULL;
switch(node->type) {
case NODE_RETURN:
if (!can_cast(get_expression_type(s, node->expr.ret.value), current_return) && !match(get_expression_type(s, node->expr.ret.value), current_return)) {
error(node, "return type doesn't match function's one.");
}
break;
case NODE_BREAK:
if (!in_loop) {
error(node, "`break` isn't in a loop.");
}
break;
case NODE_WHILE:
if (!match(get_expression_type(s, node->expr.whle.condition), shget(type_reg, "bool"))) {
error(node, "expected boolean value.");
return;
}
in_loop = true;
check_body(s, node->expr.whle.body);
in_loop = false;
break;
case NODE_IF:
if (!match(get_expression_type(s, node->expr.if_stmt.condition), shget(type_reg, "bool"))) {
error(node, "expected boolean value.");
return;
}
check_body(s, node->expr.if_stmt.body);
if (node->expr.if_stmt.otherwise) check_body(s, node->expr.if_stmt.otherwise);
break;
case NODE_FOR:
check_for(s, node);
break;
case NODE_VAR_DECL:
t = get_type(s, node->expr.var_decl.type);
node->expr_type = t;
name = intern_string(s, node->expr.var_decl.name, node->expr.var_decl.name_len);
node->expr.var_decl.name = name;
if (get_def(s, name)) {
error(node, "redeclaration of variable.");
break;
}
if (t->tag == TYPE_STRUCT) {
// Struct initialization with NODE_STRUCT_INIT is allowed
} else if (node->expr.var_decl.value && node->expr.var_decl.value->type == NODE_STRUCT_INIT &&
(t->tag == TYPE_SLICE || t->tag == TYPE_PTR)) {
// Array/slice initialization with NODE_STRUCT_INIT is allowed
} else if (node->expr.var_decl.value && node->expr.var_decl.value->type == NODE_RANGE &&
t->tag == TYPE_SLICE) {
// Range initialization for slices is allowed
get_expression_type(s, node->expr.var_decl.value);
} else if (node->expr.var_decl.value && node->expr.var_decl.value->type == NODE_STRING &&
t->tag == TYPE_SLICE) {
// String literal can be assigned to slice
get_expression_type(s, node->expr.var_decl.value);
} else if (!can_cast(get_expression_type(s, node->expr.var_decl.value), t) && !match(t, get_expression_type(s, node->expr.var_decl.value))) {
error(node, "type mismatch (decl).");
}
shput(current_scope->defs, name, node);
break;
default:
get_expression_type(s, node);
break;
}
}
static void check_function(sema *s, ast_node *f)
{
push_scope(s);
current_return = get_type(s, f->expr.function.type);
member *param = f->expr.function.parameters;
while (param) {
type *p_type = get_type(s, param->type);
char *t_name = intern_string(s, param->name, param->name_len);
param->name = t_name;
ast_node *param_node = arena_alloc(s->allocator, sizeof(ast_node));
param_node->type = NODE_VAR_DECL;
param_node->expr_type = p_type;
param_node->address_taken = false;
param_node->expr.var_decl.name = t_name;
shput(current_scope->defs, t_name, param_node);
param = param->next;
}
// Skip body checking for extern functions
if (!f->expr.function.is_extern && f->expr.function.body) {
ast_node *current = f->expr.function.body;
while (current && current->type == NODE_UNIT) {
check_statement(s, current->expr.unit_node.expr);
current = current->expr.unit_node.next;
}
}
pop_scope(s);
}
static void analyze_unit(sema *s, ast_node *node)
{
ast_node *current = node;
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr)
order_type(s, current->expr.unit_node.expr);
current = current->expr.unit_node.next;
}
create_types(s);
current = node;
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) {
create_prototype(s, current->expr.unit_node.expr);
}
current = current->expr.unit_node.next;
}
current = node;
while (current && current->type == NODE_UNIT) {
if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) {
check_function(s, current->expr.unit_node.expr);
} else {
check_statement(s, current->expr.unit_node.expr);
}
current = current->expr.unit_node.next;
}
}
void sema_init(parser *p, arena *a)
{
sema *s = arena_alloc(a, sizeof(sema));
s->allocator = a;
types = NULL;
s->ast = p->ast;
global_scope = arena_alloc(a, sizeof(scope));
global_scope->parent = NULL;
global_scope->defs = NULL;
current_scope = global_scope;
register_type(s, "void", create_integer(s, "void", 0, false));
register_type(s, "bool", create_integer(s, "bool", 8, false));
register_type(s, "u8", create_integer(s, "u8", 8, false));
register_type(s, "u16", create_integer(s, "u16", 16, false));
register_type(s, "u32", create_integer(s, "u32", 32, false));
register_type(s, "u64", create_integer(s, "u64", 64, false));
register_type(s, "usize", create_integer(s, "usize", 64, false));
register_type(s, "i8", create_integer(s, "i8", 8, true));
register_type(s, "i16", create_integer(s, "i16", 16, true));
register_type(s, "i32", create_integer(s, "i32", 32, true));
register_type(s, "i64", create_integer(s, "i64", 64, true));
register_type(s, "f32", create_float(s, "f32", 32));
register_type(s, "f64", create_float(s, "f64", 64));
const_int = arena_alloc(s->allocator, sizeof(type));
const_int->name = "const_int";
const_int->tag = TYPE_INTEGER_CONST;
const_int->data.integer = 0;
const_float = arena_alloc(s->allocator, sizeof(type));
const_float->name = "const_float";
const_float->tag = TYPE_FLOAT_CONST;
const_float->data.flt = 0;
analyze_unit(s, s->ast);
if (has_errors) {
printf("Compilation failed.\n");
exit(1);
}
}

76
sema.h
View file

@ -1,76 +0,0 @@
#ifndef SEMA_H
#define SEMA_H
#include <stdbool.h>
#include "parser.h"
#include "stb_ds.h"
#include "utils.h"
typedef enum {
TYPE_VOID,
TYPE_BOOL,
TYPE_PTR,
TYPE_SLICE,
TYPE_FLOAT,
TYPE_FLOAT_CONST,
TYPE_INTEGER,
TYPE_INTEGER_CONST,
TYPE_UINTEGER,
TYPE_STRUCT,
TYPE_UNION,
TYPE_ENUM, /* TODO */
TYPE_GENERIC, /* TODO */
} type_tag;
typedef struct _type {
type_tag tag;
usize size;
usize alignment;
char *name;
union {
u8 integer;
u8 flt; // float
struct {
bool is_const;
bool is_volatile;
struct _type *child;
} ptr;
struct {
usize len;
bool is_const;
bool is_volatile;
struct _type *child;
} slice;
struct {
char *name;
usize name_len;
member *members;
struct { char *key; struct _type *value; } *member_types;
} structure;
struct {
char *name;
usize name_len;
variant *variants;
} enm; /* TODO */
} data;
} type;
typedef struct {
char *name;
type *type;
type **parameters;
} prototype;
typedef struct _scope {
struct _scope *parent;
struct { char *key; ast_node *value; } *defs;
} scope;
typedef struct {
arena *allocator;
ast_node *ast;
} sema;
void sema_init(parser *p, arena *a);
#endif

1895
stb_ds.h

File diff suppressed because it is too large Load diff

BIN
test

Binary file not shown.

20
test.l
View file

@ -1,20 +0,0 @@
extern i64 write(i32 fd, *u8 buf, u64 count);
extern void exit(i32 code);
extern *u8 malloc(usize size);
i32 main()
{
[u8] message = "Hello world!\n";
*u8 message_heap = malloc(message.len);
[u8] new_message = message_heap[0..13];
u32 i = 0;
loop while i < message.len {
new_message[i] = message[i];
i = i + 1;
}
write(1, new_message.ptr, new_message.len);
return 0;
}

90
test.s
View file

@ -1,90 +0,0 @@
.section .text
.global main
main:
push %rbp
mov %rsp, %rbp
sub $256, %rsp
movb $72, -32(%rbp)
movb $101, -31(%rbp)
movb $108, -30(%rbp)
movb $108, -29(%rbp)
movb $111, -28(%rbp)
movb $32, -27(%rbp)
movb $119, -26(%rbp)
movb $111, -25(%rbp)
movb $114, -24(%rbp)
movb $108, -23(%rbp)
movb $100, -22(%rbp)
movb $33, -21(%rbp)
movb $10, -20(%rbp)
lea -32(%rbp), %rax
mov %rax, -48(%rbp)
mov $14, %rax
mov %rax, -40(%rbp)
mov -40(%rbp), %rax
push %rax
pop %rdi
call malloc
mov %rax, -56(%rbp)
mov -56(%rbp), %rcx
mov $0, %rax
push %rax
mov $13, %rax
mov %rax, %rdx
pop %rax
mov %rdx, %r8
sub %rax, %r8
inc %r8
add %rcx, %rax
mov %rax, -88(%rbp)
mov %r8, -80(%rbp)
lea -88(%rbp), %rax
mov (%rax), %rcx
mov 8(%rax), %rdx
mov %rcx, -72(%rbp)
mov %rdx, -64(%rbp)
mov $0, %rax
mov %rax, -96(%rbp)
.L0:
mov -96(%rbp), %rax
mov %rax, %rcx
mov -40(%rbp), %rax
cmp %rax, %rcx
setl %al
movzx %al, %rax
test %rax, %rax
jz .L1
mov -72(%rbp), %rcx
mov -96(%rbp), %rax
add %rcx, %rax
push %rax
mov -48(%rbp), %rcx
mov -96(%rbp), %rax
add %rcx, %rax
movzbl (%rax), %eax
pop %rcx
mov %al, (%rcx)
mov -96(%rbp), %rax
mov %rax, %rcx
mov $1, %rax
add %rcx, %rax
mov %rax, -96(%rbp)
jmp .L0
.L1:
mov $1, %rax
push %rax
mov -72(%rbp), %rax
push %rax
mov -64(%rbp), %rax
push %rax
pop %rdx
pop %rsi
pop %rdi
call write
mov $0, %rax
mov %rbp, %rsp
pop %rbp
ret
mov %rbp, %rsp
pop %rbp
ret

152
utils.c
View file

@ -1,152 +0,0 @@
#include "utils.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdbool.h>
i64 parse_int(char *s, usize len)
{
bool negative = false;
if (*s == '-') {
s += 1;
len -= 1;
negative = true;
}
u64 int_part = 0;
for (usize i=0; i < len; i++) {
int_part = (int_part * 10) + (s[i] - '0');
}
if (negative) {
int_part *= -1;
}
return int_part;
}
f64 parse_float(char *s, usize len)
{
bool negative = false;
if (*s == '-') {
s += 1;
len -= 1;
negative = true;
}
usize point_pos = 0;
for (usize i=0; i < len; i++) {
if (s[i] == '.') {
point_pos = i;
break;
}
}
i64 int_part = parse_int(s, point_pos);
i64 dec_part = parse_int(s+point_pos+1, len-point_pos-1);
for (usize i=0; i < len-point_pos-1; i++) {
int_part *= 10;
}
int_part += dec_part;
f64 f = (f64) int_part;
point_pos += 1;
for (usize i=0; i < len - point_pos; i++) {
f /= 10.0;
}
if (negative) {
f *= -1;
}
return f;
}
void trie_insert(trie_node *root, arena *a, char *key, uint16_t value)
{
trie_node *node = root;
while (*key) {
if (!node->children[(usize)*key]) {
node->children[(usize)*key] = arena_alloc(a, sizeof(trie_node));
memset(node->children[(usize)*key], 0x0, sizeof(trie_node));
}
node = node->children[(usize)*key];
key++;
}
node->value = value;
}
uint16_t trie_get(trie_node *root, char *key, usize len)
{
trie_node *node = root;
for (usize i=0; i < len; i++) {
if (!node->children[(usize)(key[i])]) {
return 0;
}
node = node->children[(usize)(key[i])];
}
return node->value;
}
#ifndef DEFAULT_ALIGNMENT
#define DEFAULT_ALIGNMENT (2 * sizeof(void *))
#endif
static usize align_forward(usize ptr, usize align) {
uintptr_t p = ptr;
uintptr_t a = (uintptr_t)align;
uintptr_t modulo = p & (a - 1);
if (modulo != 0) {
p += a - modulo;
}
return (usize)p;
}
arena arena_init(usize size)
{
void *memory = malloc(size);
memset(memory, 0x0, size);
return (arena){
.capacity = size,
.position = 0,
.memory = memory,
};
}
void *arena_alloc(arena *a, usize size) {
uintptr_t current_addr = (uintptr_t)a->memory + a->position;
uintptr_t padding = align_forward(current_addr, DEFAULT_ALIGNMENT) - current_addr;
if (a->position + padding + size > a->capacity) return NULL;
void *ret = (unsigned char *)a->memory + a->position + padding;
a->position += (size + padding);
return ret;
}
snapshot arena_snapshot(arena *a)
{
return a->position;
}
void arena_reset_to_snapshot(arena *a, snapshot s)
{
a->position = s;
}
void arena_reset(arena *a)
{
arena_reset_to_snapshot(a, 0);
}
void arena_deinit(arena a)
{
free(a.memory);
}

64
utils.h
View file

@ -1,64 +0,0 @@
#ifndef UTILS_H
#define UTILS_H
#include <stdint.h>
#include <stdint.h>
#include <stddef.h>
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int8_t i8;
typedef int16_t i16;
typedef int32_t i32;
typedef int64_t i64;
typedef size_t usize;
typedef float f32;
typedef double f64;
i64 parse_int(char *s, usize len);
f64 parse_float(char *s, usize len);
typedef struct {
usize capacity;
usize position;
void* memory;
} arena;
typedef usize snapshot;
/*
* NOTE(ernesto): faulty initialization is signalided by the arena.memory
* being null. It is the responsability of the caller to check for fulty
* initialization.
*/
arena arena_init(usize size);
/*
* Returns null on unsuccessfull allocation.
* In this implemention an allocation is only unsuccessfull if the arena
* does not have enough memory to allocate the requested space
*/
void *arena_alloc(arena *a, usize size);
snapshot arena_snapshot(arena *a);
void arena_reset_to_snapshot(arena *a, snapshot s);
void arena_reset(arena *a);
/* This call should never fail, also, do we even care if it does? */
void arena_deinit(arena a);
typedef struct _trie_node {
uint16_t value;
struct _trie_node *children[256];
} trie_node;
void trie_insert(trie_node *root, arena *a, char *key, uint16_t value);
uint16_t trie_get(trie_node *root, char *key, usize len);
typedef struct {
usize row, column;
} source_pos;
#endif