Compare commits
3 commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 667769d1c0 | |||
| 870cf8f0b4 | |||
| ed0ad1d095 |
22 changed files with 6938 additions and 0 deletions
59
Makefile
Normal file
59
Makefile
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
# cc - C compiler
|
||||
# See LICENSE file for copyright and license details.
|
||||
|
||||
include config.mk
|
||||
|
||||
SRC = lc.c utils.c lexer.c parser.c sema.c codegen.c
|
||||
HDR = config.def.h utils.h lexer.h parser.h sema.h codegen.h
|
||||
OBJ = ${SRC:.c=.o}
|
||||
|
||||
all: options lc
|
||||
|
||||
options:
|
||||
@echo lc build options:
|
||||
@echo "CFLAGS = ${CFLAGS}"
|
||||
@echo "LDFLAGS = ${LDFLAGS}"
|
||||
@echo "CC = ${CC}"
|
||||
|
||||
.c.o:
|
||||
${CC} -c ${CFLAGS} $<
|
||||
|
||||
${OBJ}: config.h config.mk
|
||||
|
||||
config.h:
|
||||
cp config.def.h $@
|
||||
|
||||
users.h:
|
||||
cp users.def.h $@
|
||||
|
||||
lc: ${OBJ}
|
||||
${CC} -o $@ ${OBJ} ${LDFLAGS}
|
||||
|
||||
clean:
|
||||
rm -f lc ${OBJ} lc-${VERSION}.tar.gz
|
||||
|
||||
dist: clean
|
||||
mkdir -p lc-${VERSION}
|
||||
cp -R LICENSE Makefile README config.mk\
|
||||
lc.1 ${HDR} ${SRC} lc-${VERSION}
|
||||
tar -cf lc-${VERSION}.tar lc-${VERSION}
|
||||
gzip lc-${VERSION}.tar
|
||||
rm -rf lc-${VERSION}
|
||||
|
||||
install: all
|
||||
mkdir -p ${DESTDIR}${PREFIX}/bin
|
||||
cp -f lc ${DESTDIR}${PREFIX}/bin
|
||||
chmod 755 ${DESTDIR}${PREFIX}/bin/lc
|
||||
mkdir -p ${DESTDIR}${MANPREFIX}/man1
|
||||
sed "s/VERSION/${VERSION}/g" < lc.1 > ${DESTDIR}${MANPREFIX}/man1/lc.1
|
||||
chmod 644 ${DESTDIR}${MANPREFIX}/man1/lc.1
|
||||
|
||||
uninstall:
|
||||
rm -f ${DESTDIR}${PREFIX}/bin/lc\
|
||||
${DESTDIR}${MANPREFIX}/man1/lc.1
|
||||
graph: clean all
|
||||
./lc > graph.dot
|
||||
dot -Tpdf graph.dot > graph.pdf
|
||||
zathura ./graph.pdf
|
||||
|
||||
.PHONY: all options clean dist install uninstall
|
||||
24
README
Normal file
24
README
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
lc - L compiler
|
||||
============================
|
||||
lc is a L compiler. It can compile L code.
|
||||
|
||||
|
||||
Requirements
|
||||
------------
|
||||
In order to build lc you need... a computer
|
||||
|
||||
|
||||
Installation
|
||||
------------
|
||||
Edit config.mk to match your local setup (lc is installed into
|
||||
the /usr/local namespace by default).
|
||||
|
||||
Afterwards enter the following command to build and install lc (if
|
||||
necessary as root):
|
||||
|
||||
make clean install
|
||||
|
||||
|
||||
Usage
|
||||
-----------
|
||||
lc file
|
||||
8
codegen.h
Normal file
8
codegen.h
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
#ifndef CODEGEN_H
|
||||
#define CODEGEN_H
|
||||
|
||||
#include "parser.h"
|
||||
|
||||
void generate(ast_node *node);
|
||||
|
||||
#endif
|
||||
4
config.def.h
Normal file
4
config.def.h
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
#ifndef CONFIG_H
|
||||
#define CONFIG_H
|
||||
|
||||
#endif
|
||||
4
config.h
Normal file
4
config.h
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
#ifndef CONFIG_H
|
||||
#define CONFIG_H
|
||||
|
||||
#endif
|
||||
27
config.mk
Normal file
27
config.mk
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# cc version
|
||||
VERSION = 0.1
|
||||
|
||||
# Customize below to fit your system
|
||||
|
||||
# paths
|
||||
PREFIX = /usr
|
||||
MANPREFIX = ${PREFIX}/share/man
|
||||
|
||||
# OpenBSD (uncomment)
|
||||
#MANPREFIX = ${PREFIX}/man
|
||||
|
||||
# includes and libs
|
||||
INCS = -I.
|
||||
LIBS =
|
||||
# flags
|
||||
CPPFLAGS = -DVERSION=\"${VERSION}\"
|
||||
CFLAGS := -std=c23 -pedantic -Wall -O0 ${INCS} ${CPPFLAGS}
|
||||
CFLAGS := ${CFLAGS} -g
|
||||
LDFLAGS = ${LIBS}
|
||||
|
||||
# Solaris
|
||||
#CFLAGS = -fast ${INCS} -DVERSION=\"${VERSION}\"
|
||||
#LDFLAGS = ${LIBS}
|
||||
|
||||
# compiler and linker
|
||||
CC = cc
|
||||
16
examples/for.l
Normal file
16
examples/for.l
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
import std;
|
||||
|
||||
i32 main()
|
||||
{
|
||||
u32 x = 4;
|
||||
loop {
|
||||
u32 b = 3;
|
||||
}
|
||||
x == 3;
|
||||
|
||||
loop (0.., test) |k, i| {
|
||||
|
||||
}
|
||||
u32 b = 3;
|
||||
|
||||
}
|
||||
10
examples/hello_world.l
Normal file
10
examples/hello_world.l
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
struct b {
|
||||
i32 a,
|
||||
u32 b,
|
||||
u32 c,
|
||||
}
|
||||
|
||||
u32 test()
|
||||
{
|
||||
f32 a = 5.0;
|
||||
}
|
||||
241
lc.c
Normal file
241
lc.c
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "utils.h"
|
||||
#include "lexer.h"
|
||||
#include "parser.h"
|
||||
#include "sema.h"
|
||||
#include "codegen.h"
|
||||
|
||||
void print_indent(int depth) {
|
||||
for (int i = 0; i < depth; i++) printf(" ");
|
||||
}
|
||||
|
||||
const char* get_op_str(binary_op op) {
|
||||
switch(op) {
|
||||
case OP_PLUS: return "+";
|
||||
case OP_MINUS: return "-";
|
||||
case OP_DIV: return "/";
|
||||
case OP_MUL: return "*";
|
||||
case OP_EQ: return "==";
|
||||
case OP_ASSIGN: return "=";
|
||||
case OP_ASSIGN_PTR: return "<-";
|
||||
case OP_AND: return "&&";
|
||||
case OP_OR: return "||";
|
||||
case OP_NEQ: return "!=";
|
||||
case OP_GT: return ">";
|
||||
case OP_LT: return "<";
|
||||
case OP_GE: return ">=";
|
||||
case OP_LE: return "<=";
|
||||
case OP_BOR: return "|";
|
||||
case OP_BAND: return "&";
|
||||
case OP_BXOR: return "^";
|
||||
case OP_MOD: return "%";
|
||||
case OP_PLUS_EQ: return "+=";
|
||||
case OP_MINUS_EQ: return "-=";
|
||||
case OP_DIV_EQ: return "/=";
|
||||
case OP_MUL_EQ: return "*=";
|
||||
default: return "?";
|
||||
}
|
||||
}
|
||||
|
||||
const char *get_uop_str(unary_op op) {
|
||||
switch (op) {
|
||||
case UOP_INCR: return "++";
|
||||
case UOP_MINUS: return "-";
|
||||
case UOP_DECR: return "--";
|
||||
case UOP_DEREF: return "*";
|
||||
case UOP_REF: return "&";
|
||||
case UOP_NOT: return "!";
|
||||
default: return "?";
|
||||
}
|
||||
}
|
||||
|
||||
void print_ast(ast_node *node, int depth) {
|
||||
if (!node) return;
|
||||
|
||||
print_indent(depth);
|
||||
|
||||
switch (node->type) {
|
||||
case NODE_INTEGER:
|
||||
printf("Integer: %lu\n", node->expr.integer);
|
||||
break;
|
||||
case NODE_FLOAT:
|
||||
printf("Float: %f\n", node->expr.flt);
|
||||
break;
|
||||
case NODE_CHAR:
|
||||
printf("Char: '%c'\n", node->expr.ch);
|
||||
break;
|
||||
case NODE_STRING:
|
||||
printf("String: \"%.*s\"\n", (int)node->expr.string.len, node->expr.string.start);
|
||||
break;
|
||||
case NODE_IDENTIFIER:
|
||||
printf("Identifier: %.*s\n", (int)node->expr.string.len, node->expr.string.start);
|
||||
break;
|
||||
case NODE_CAST:
|
||||
printf("Cast:\n");
|
||||
print_ast(node->expr.cast.type, depth);
|
||||
print_ast(node->expr.cast.value, depth + 1);
|
||||
break;
|
||||
case NODE_ACCESS:
|
||||
printf("Access:\n");
|
||||
print_ast(node->expr.access.expr, depth + 1);
|
||||
print_ast(node->expr.access.member, depth + 1);
|
||||
break;
|
||||
case NODE_LABEL:
|
||||
printf("Label: %.*s\n", (int)node->expr.label.name_len, node->expr.label.name);
|
||||
break;
|
||||
case NODE_GOTO:
|
||||
printf("Goto: %.*s\n", (int)node->expr.label.name_len, node->expr.label.name);
|
||||
break;
|
||||
case NODE_BINARY:
|
||||
printf("BinaryOp (%s)\n", get_op_str(node->expr.binary.operator));
|
||||
print_ast(node->expr.binary.left, depth + 1);
|
||||
print_ast(node->expr.binary.right, depth + 1);
|
||||
break;
|
||||
case NODE_ARRAY_SUBSCRIPT:
|
||||
printf("Array subscript\n");
|
||||
print_ast(node->expr.subscript.expr, depth + 1);
|
||||
print_ast(node->expr.subscript.index, depth + 1);
|
||||
break;
|
||||
case NODE_UNARY:
|
||||
printf("UnaryOp (%s)\n", get_uop_str(node->expr.unary.operator));
|
||||
print_ast(node->expr.unary.right, depth + 1);
|
||||
break;
|
||||
case NODE_POSTFIX:
|
||||
printf("Postfix (%s)\n", get_uop_str(node->expr.unary.operator));
|
||||
print_ast(node->expr.unary.right, depth + 1);
|
||||
break;
|
||||
case NODE_BREAK:
|
||||
printf("Break\n");
|
||||
break;
|
||||
case NODE_TERNARY:
|
||||
printf("Ternary (? :)\n");
|
||||
print_indent(depth + 1); printf("Condition:\n");
|
||||
print_ast(node->expr.ternary.condition, depth + 2);
|
||||
print_indent(depth + 1); printf("Then:\n");
|
||||
print_ast(node->expr.ternary.then, depth + 2);
|
||||
print_indent(depth + 1); printf("Else:\n");
|
||||
print_ast(node->expr.ternary.otherwise, depth + 2);
|
||||
break;
|
||||
case NODE_UNIT:
|
||||
printf("Unit\n");
|
||||
ast_node *current = node;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
print_ast(current->expr.unit_node.expr, depth + 1);
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
break;
|
||||
case NODE_CALL:
|
||||
printf("Call: %.*s\n", (int)node->expr.call.name_len, node->expr.call.name);
|
||||
current = node->expr.call.parameters;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
print_ast(current->expr.unit_node.expr, depth + 1);
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
break;
|
||||
case NODE_STRUCT_INIT:
|
||||
printf("Struct init:\n");
|
||||
current = node->expr.struct_init.members;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
print_ast(current->expr.unit_node.expr, depth + 1);
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
break;
|
||||
case NODE_STRUCT:
|
||||
printf("Struct: %.*s\n", (int)node->expr.structure.name_len, node->expr.structure.name);
|
||||
member *m = node->expr.structure.members;
|
||||
while (m) {
|
||||
print_ast(m->type, depth + 1);
|
||||
m = m->next;
|
||||
}
|
||||
break;
|
||||
case NODE_UNION:
|
||||
printf("Union: %.*s\n", (int)node->expr.structure.name_len, node->expr.structure.name);
|
||||
m = node->expr.structure.members;
|
||||
while (m) {
|
||||
print_ast(m->type, depth + 1);
|
||||
m = m->next;
|
||||
}
|
||||
break;
|
||||
case NODE_ENUM:
|
||||
printf("Enum: %.*s\n", (int)node->expr.enm.name_len, node->expr.enm.name);
|
||||
variant *v = node->expr.enm.variants;
|
||||
while (v) {
|
||||
printf("\t%.*s\n", (int)v->name_len, v->name);
|
||||
v = v->next;
|
||||
}
|
||||
break;
|
||||
case NODE_IF:
|
||||
printf("If:\n");
|
||||
print_ast(node->expr.whle.condition, depth + 1);
|
||||
print_ast(node->expr.whle.body, depth + 1);
|
||||
break;
|
||||
case NODE_VAR_DECL:
|
||||
printf("VarDecl: ");
|
||||
print_ast(node->expr.var_decl.type, 0);
|
||||
print_ast(node->expr.var_decl.value, depth + 1);
|
||||
break;
|
||||
case NODE_FUNCTION:
|
||||
printf("Function: %.*s\n", (int)node->expr.function.name_len, node->expr.function.name);
|
||||
m = node->expr.function.parameters;
|
||||
while (m) {
|
||||
print_ast(m->type, depth + 1);
|
||||
m = m->next;
|
||||
}
|
||||
print_ast(node->expr.function.body, depth + 1);
|
||||
break;
|
||||
case NODE_RETURN:
|
||||
printf("Return:\n");
|
||||
print_ast(node->expr.ret.value, depth + 1);
|
||||
break;
|
||||
case NODE_IMPORT:
|
||||
printf("Import:\n");
|
||||
print_ast(node->expr.import.path, depth + 1);
|
||||
break;
|
||||
case NODE_WHILE:
|
||||
printf("While:\n");
|
||||
print_ast(node->expr.whle.condition, depth + 1);
|
||||
print_ast(node->expr.whle.body, depth + 1);
|
||||
break;
|
||||
case NODE_FOR:
|
||||
printf("For:\n");
|
||||
print_ast(node->expr.fr.slices, depth + 1);
|
||||
print_ast(node->expr.fr.captures, depth + 1);
|
||||
print_indent(depth + 1);
|
||||
print_ast(node->expr.fr.body, depth + 1);
|
||||
break;
|
||||
case NODE_RANGE:
|
||||
printf("Range:\n");
|
||||
print_ast(node->expr.binary.left, depth + 1);
|
||||
print_ast(node->expr.binary.right, depth + 1);
|
||||
break;
|
||||
default:
|
||||
printf("Unknown Node Type: %d\n", node->type);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
FILE *fp = fopen("test.l", "r");
|
||||
usize size = 0;
|
||||
fseek(fp, 0, SEEK_END);
|
||||
size = ftell(fp);
|
||||
fseek(fp, 0, SEEK_SET);
|
||||
char *src = malloc(size+1);
|
||||
fread(src, size, 1, fp);
|
||||
fclose(fp);
|
||||
src[size] = '\0';
|
||||
|
||||
arena a = arena_init(0x1000 * 0x1000 * 64);
|
||||
lexer *l = lexer_init(src, size, &a);
|
||||
parser *p = parser_init(l, &a);
|
||||
print_ast(p->ast, 0);
|
||||
sema_init(p, &a);
|
||||
|
||||
generate(p->ast);
|
||||
|
||||
arena_deinit(a);
|
||||
|
||||
return 0;
|
||||
}
|
||||
422
lexer.c
Normal file
422
lexer.c
Normal file
|
|
@ -0,0 +1,422 @@
|
|||
#include "lexer.h"
|
||||
#include <stdbool.h>
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
trie_node *keywords;
|
||||
|
||||
static void add_token(lexer *l, token_type type, usize len)
|
||||
{
|
||||
token *t = arena_alloc(l->allocator, sizeof(token));
|
||||
t->type = type;
|
||||
t->lexeme_len = len;
|
||||
t->lexeme = l->source + l->index;
|
||||
t->position.row = l->row;
|
||||
t->position.column = l->column;
|
||||
|
||||
if (!l->tokens) {
|
||||
l->tokens = t;
|
||||
l->tail = t;
|
||||
} else {
|
||||
l->tail->next = t;
|
||||
l->tail = t;
|
||||
}
|
||||
}
|
||||
|
||||
static void add_error(lexer *l, char *msg)
|
||||
{
|
||||
token *t = arena_alloc(l->allocator, sizeof(token));
|
||||
t->type = TOKEN_ERROR;
|
||||
t->lexeme_len = strlen(msg);
|
||||
t->lexeme = msg;
|
||||
t->position.row = l->row;
|
||||
t->position.column = l->column;
|
||||
|
||||
if (!l->tokens) {
|
||||
l->tokens = t;
|
||||
l->tail = t;
|
||||
} else {
|
||||
l->tail->next = t;
|
||||
l->tail = t;
|
||||
}
|
||||
}
|
||||
|
||||
static void parse_number(lexer *l)
|
||||
{
|
||||
char c = l->source[l->index];
|
||||
/* Is the number a float? */
|
||||
bool f = false;
|
||||
usize len = 0;
|
||||
|
||||
while (isdigit(c)) {
|
||||
/* If a dot is found, and the character after it is a digit, this is a float. */
|
||||
if (l->source[l->index+1] == '.' && isdigit(l->source[l->index+2])) {
|
||||
f = true;
|
||||
len += 3;
|
||||
l->index += 3;
|
||||
} else {
|
||||
len += 1;
|
||||
l->index += 1;
|
||||
}
|
||||
c = l->source[l->index];
|
||||
}
|
||||
l->index -= len;
|
||||
if (f) {
|
||||
add_token(l, TOKEN_FLOAT, len);
|
||||
} else {
|
||||
add_token(l, TOKEN_INTEGER, len);
|
||||
}
|
||||
l->index += len;
|
||||
}
|
||||
|
||||
static void parse_identifier(lexer *l)
|
||||
{
|
||||
char c = l->source[l->index];
|
||||
usize len = 0;
|
||||
|
||||
while (isalnum(c) || c == '_') {
|
||||
len += 1;
|
||||
l->index += 1;
|
||||
c = l->source[l->index];
|
||||
}
|
||||
l->index -= len;
|
||||
token_type keyword = trie_get(keywords, l->source + l->index, len);
|
||||
if (keyword) {
|
||||
add_token(l, keyword, len);
|
||||
} else {
|
||||
add_token(l, TOKEN_IDENTIFIER, len);
|
||||
}
|
||||
l->index += len;
|
||||
}
|
||||
|
||||
static void parse_string(lexer *l)
|
||||
{
|
||||
char c = l->source[l->index];
|
||||
usize len = 0;
|
||||
|
||||
while (c != '"') {
|
||||
if (c == '\0' || c == '\n') {
|
||||
l->index -= len;
|
||||
add_error(l, "unclosed string literal.");
|
||||
l->index += len;
|
||||
return;
|
||||
}
|
||||
len += 1;
|
||||
l->index += 1;
|
||||
c = l->source[l->index];
|
||||
}
|
||||
l->index -= len;
|
||||
add_token(l, TOKEN_STRING, len);
|
||||
l->index += len + 1;
|
||||
}
|
||||
|
||||
static bool parse_special(lexer *l)
|
||||
{
|
||||
switch (l->source[l->index]) {
|
||||
case '+':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_PLUS_EQ, 2);
|
||||
l->index += 2;
|
||||
} else if (l->source[l->index+1] == '+') {
|
||||
add_token(l, TOKEN_PLUS_PLUS, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_PLUS, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '-':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_MINUS_EQ, 2);
|
||||
l->index += 2;
|
||||
} else if (l->source[l->index+1] == '-') {
|
||||
add_token(l, TOKEN_MINUS_MINUS, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_MINUS, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '/':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_SLASH_EQ, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_SLASH, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '*':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_STAR_EQ, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_STAR, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '%':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_PERC_EQ, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_PERC, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '&':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_AND_EQ, 2);
|
||||
l->index += 2;
|
||||
} else if (l->source[l->index+1] == '&') {
|
||||
add_token(l, TOKEN_DOUBLE_AND, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_AND, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '^':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_HAT_EQ, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_HAT, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '|':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_PIPE_EQ, 2);
|
||||
l->index += 2;
|
||||
} else if (l->source[l->index+1] == '|') {
|
||||
add_token(l, TOKEN_OR, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_PIPE, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '=':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_DOUBLE_EQ, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_EQ, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '>':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_GREATER_EQ, 2);
|
||||
l->index += 2;
|
||||
} else if (l->source[l->index+1] == '>') {
|
||||
if (l->source[l->index+2] == '=') {
|
||||
add_token(l, TOKEN_RSHIFT_EQ, 3);
|
||||
l->index += 3;
|
||||
return true;
|
||||
}
|
||||
add_token(l, TOKEN_RSHIFT, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_GREATER_THAN, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '<':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_LESS_EQ, 2);
|
||||
l->index += 2;
|
||||
} else if (l->source[l->index+1] == '-') {
|
||||
add_token(l, TOKEN_ARROW, 2);
|
||||
l->index += 2;
|
||||
} else if (l->source[l->index+1] == '<') {
|
||||
if (l->source[l->index+2] == '=') {
|
||||
add_token(l, TOKEN_LSHIFT_EQ, 3);
|
||||
l->index += 3;
|
||||
return true;
|
||||
}
|
||||
add_token(l, TOKEN_LSHIFT, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_LESS_THAN, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case '!':
|
||||
if (l->source[l->index+1] == '=') {
|
||||
add_token(l, TOKEN_NOT_EQ, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_BANG, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case ':':
|
||||
add_token(l, TOKEN_COLON, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case ';':
|
||||
add_token(l, TOKEN_SEMICOLON, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case '.':
|
||||
if (l->source[l->index+1] == '.') {
|
||||
add_token(l, TOKEN_DOUBLE_DOT, 2);
|
||||
l->index += 2;
|
||||
} else {
|
||||
add_token(l, TOKEN_DOT, 1);
|
||||
l->index += 1;
|
||||
}
|
||||
return true;
|
||||
case ',':
|
||||
add_token(l, TOKEN_COMMA, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case '(':
|
||||
add_token(l, TOKEN_LPAREN, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case ')':
|
||||
add_token(l, TOKEN_RPAREN, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case '[':
|
||||
add_token(l, TOKEN_LSQUARE, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case ']':
|
||||
add_token(l, TOKEN_RSQUARE, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case '{':
|
||||
add_token(l, TOKEN_LCURLY, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case '}':
|
||||
add_token(l, TOKEN_RCURLY, 1);
|
||||
l->index += 1;
|
||||
return true;
|
||||
case '\'':
|
||||
if (l->source[l->index+1] == '\\') {
|
||||
if (l->source[l->index+3] != '\'') {
|
||||
add_error(l, "unclosed character literal.");
|
||||
l->index += 1;
|
||||
return true;
|
||||
}
|
||||
l->index += 1;
|
||||
add_token(l, TOKEN_CHAR, 2);
|
||||
l->index += 3;
|
||||
return true;
|
||||
} else {
|
||||
if (l->source[l->index+2] != '\'') {
|
||||
add_error(l, "unclosed character literal.");
|
||||
l->index += 1;
|
||||
return true;
|
||||
}
|
||||
l->index += 1;
|
||||
add_token(l, TOKEN_CHAR, 1);
|
||||
l->index += 2;
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void parse(lexer *l)
|
||||
{
|
||||
char c;
|
||||
|
||||
while (l->index <= l->size) {
|
||||
c = l->source[l->index];
|
||||
l->column += 1;
|
||||
|
||||
if (c == '\n') {
|
||||
l->index += 1;
|
||||
l->row += 1;
|
||||
l->column = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
usize head = l->index;
|
||||
|
||||
if (c == '/' && l->source[l->index+1] == '/') {
|
||||
while (l->source[l->index] != '\n') {
|
||||
l->index += 1;
|
||||
}
|
||||
l->column += (l->index - head - 1);
|
||||
}
|
||||
|
||||
if (isspace(c)) {
|
||||
l->index += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (parse_special(l)) {
|
||||
l->column += (l->index - head - 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isdigit(c)) {
|
||||
parse_number(l);
|
||||
l->column += (l->index - head - 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isalpha(c)) {
|
||||
parse_identifier(l);
|
||||
l->column += (l->index - head - 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '"') {
|
||||
l->index += 1;
|
||||
parse_string(l);
|
||||
l->column += (l->index - head - 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
l->index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
lexer *lexer_init(char *source, usize size, arena *arena)
|
||||
{
|
||||
lexer *lex = arena_alloc(arena, sizeof(lexer));
|
||||
lex->column = 1;
|
||||
lex->row = 1;
|
||||
lex->index = 0;
|
||||
lex->size = size;
|
||||
lex->tokens = 0;
|
||||
lex->tail = 0;
|
||||
lex->allocator = arena;
|
||||
lex->source = source;
|
||||
|
||||
keywords = arena_alloc(arena, sizeof(trie_node));
|
||||
trie_insert(keywords, lex->allocator, "true", TOKEN_TRUE);
|
||||
trie_insert(keywords, lex->allocator, "false", TOKEN_FALSE);
|
||||
trie_insert(keywords, lex->allocator, "struct", TOKEN_STRUCT);
|
||||
trie_insert(keywords, lex->allocator, "enum", TOKEN_ENUM);
|
||||
trie_insert(keywords, lex->allocator, "union", TOKEN_UNION);
|
||||
trie_insert(keywords, lex->allocator, "loop", TOKEN_LOOP);
|
||||
trie_insert(keywords, lex->allocator, "while", TOKEN_WHILE);
|
||||
trie_insert(keywords, lex->allocator, "until", TOKEN_UNTIL);
|
||||
trie_insert(keywords, lex->allocator, "goto", TOKEN_GOTO);
|
||||
trie_insert(keywords, lex->allocator, "if", TOKEN_IF);
|
||||
trie_insert(keywords, lex->allocator, "else", TOKEN_ELSE);
|
||||
trie_insert(keywords, lex->allocator, "switch", TOKEN_SWITCH);
|
||||
trie_insert(keywords, lex->allocator, "break", TOKEN_BREAK);
|
||||
trie_insert(keywords, lex->allocator, "defer", TOKEN_DEFER);
|
||||
trie_insert(keywords, lex->allocator, "return", TOKEN_RETURN);
|
||||
trie_insert(keywords, lex->allocator, "import", TOKEN_IMPORT);
|
||||
trie_insert(keywords, lex->allocator, "const", TOKEN_CONST);
|
||||
trie_insert(keywords, lex->allocator, "extern", TOKEN_EXTERN);
|
||||
trie_insert(keywords, lex->allocator, "volatile", TOKEN_VOLATILE);
|
||||
|
||||
parse(lex);
|
||||
|
||||
return lex;
|
||||
}
|
||||
97
lexer.h
Normal file
97
lexer.h
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
#ifndef LEXER_H
|
||||
#define LEXER_H
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
typedef enum {
|
||||
TOKEN_ERROR,
|
||||
TOKEN_END,
|
||||
TOKEN_PLUS, // +
|
||||
TOKEN_PLUS_PLUS, // ++
|
||||
TOKEN_MINUS, // -
|
||||
TOKEN_MINUS_MINUS, // --
|
||||
TOKEN_SLASH, // /
|
||||
TOKEN_PERC, // %
|
||||
TOKEN_STAR, // *
|
||||
TOKEN_AND, // &
|
||||
TOKEN_HAT, // ^
|
||||
TOKEN_PIPE, // |
|
||||
TOKEN_LSHIFT, // <<
|
||||
TOKEN_RSHIFT, // >>
|
||||
TOKEN_DOUBLE_EQ, // ==
|
||||
TOKEN_ARROW, // <-
|
||||
TOKEN_EQ, // =
|
||||
TOKEN_LESS_THAN, // <
|
||||
TOKEN_GREATER_THAN, // >
|
||||
TOKEN_LESS_EQ, // <=
|
||||
TOKEN_GREATER_EQ, // >=
|
||||
TOKEN_NOT_EQ, // !=
|
||||
TOKEN_PLUS_EQ, // +=
|
||||
TOKEN_MINUS_EQ, // -=
|
||||
TOKEN_STAR_EQ, // *=
|
||||
TOKEN_SLASH_EQ, // /=
|
||||
TOKEN_AND_EQ, // &=
|
||||
TOKEN_HAT_EQ, // ^=
|
||||
TOKEN_PIPE_EQ, // |=
|
||||
TOKEN_PERC_EQ, // %=
|
||||
TOKEN_LSHIFT_EQ, // <<=
|
||||
TOKEN_RSHIFT_EQ, // >>=
|
||||
TOKEN_OR, // ||
|
||||
TOKEN_DOUBLE_AND, // &&
|
||||
TOKEN_COLON, // :
|
||||
TOKEN_SEMICOLON, // ;
|
||||
TOKEN_DOT, // .
|
||||
TOKEN_DOUBLE_DOT, // ..
|
||||
TOKEN_BANG, // !
|
||||
TOKEN_COMMA, // ,
|
||||
TOKEN_LPAREN, // (
|
||||
TOKEN_RPAREN, // )
|
||||
TOKEN_LSQUARE, // [
|
||||
TOKEN_RSQUARE, // ]
|
||||
TOKEN_LCURLY, // {
|
||||
TOKEN_RCURLY, // }
|
||||
TOKEN_INTEGER,
|
||||
TOKEN_FLOAT,
|
||||
TOKEN_IDENTIFIER,
|
||||
TOKEN_STRING,
|
||||
TOKEN_CHAR,
|
||||
TOKEN_TRUE,
|
||||
TOKEN_FALSE,
|
||||
TOKEN_GOTO,
|
||||
TOKEN_LOOP,
|
||||
TOKEN_WHILE,
|
||||
TOKEN_UNTIL,
|
||||
TOKEN_IF,
|
||||
TOKEN_ELSE,
|
||||
TOKEN_SWITCH,
|
||||
TOKEN_BREAK,
|
||||
TOKEN_DEFER,
|
||||
TOKEN_RETURN,
|
||||
TOKEN_IMPORT,
|
||||
TOKEN_CONST,
|
||||
TOKEN_EXTERN,
|
||||
TOKEN_VOLATILE,
|
||||
TOKEN_STRUCT,
|
||||
TOKEN_ENUM,
|
||||
TOKEN_UNION
|
||||
} token_type;
|
||||
|
||||
typedef struct _token {
|
||||
token_type type;
|
||||
source_pos position;
|
||||
char *lexeme;
|
||||
usize lexeme_len;
|
||||
struct _token *next;
|
||||
} token;
|
||||
|
||||
typedef struct {
|
||||
usize column, row, index, size;
|
||||
char *source;
|
||||
token *tokens;
|
||||
token *tail;
|
||||
arena *allocator;
|
||||
} lexer;
|
||||
|
||||
lexer *lexer_init(char *source, usize size, arena *arena);
|
||||
|
||||
#endif
|
||||
256
parser.h
Normal file
256
parser.h
Normal file
|
|
@ -0,0 +1,256 @@
|
|||
#ifndef PARSER_H
|
||||
#define PARSER_H
|
||||
|
||||
#include "lexer.h"
|
||||
#include "utils.h"
|
||||
#include <stdbool.h>
|
||||
|
||||
struct _type;
|
||||
struct _ast_node;
|
||||
|
||||
typedef enum {
|
||||
OP_PLUS, // +
|
||||
OP_MINUS, // -
|
||||
OP_DIV, // /
|
||||
OP_MUL, // *
|
||||
OP_MOD, // %
|
||||
OP_BOR, // |
|
||||
OP_BAND, // &
|
||||
OP_BXOR, // ^
|
||||
|
||||
OP_ASSIGN, // =
|
||||
OP_ASSIGN_PTR, // <-
|
||||
OP_RSHIFT_EQ, // >>=
|
||||
OP_LSHIFT_EQ, // <<=
|
||||
OP_PLUS_EQ, // +=
|
||||
OP_MINUS_EQ, // -=
|
||||
OP_DIV_EQ, // /=
|
||||
OP_MUL_EQ, // *=
|
||||
OP_BOR_EQ, // |=
|
||||
OP_BAND_EQ, // &=
|
||||
OP_BXOR_EQ, // ^=
|
||||
OP_MOD_EQ, // %=
|
||||
|
||||
OP_EQ, // ==
|
||||
OP_AND, // &&
|
||||
OP_OR, // ||
|
||||
OP_NEQ, // !=
|
||||
OP_GT, // >
|
||||
OP_LT, // <
|
||||
OP_GE, // >=
|
||||
OP_LE, // <=
|
||||
} binary_op;
|
||||
|
||||
typedef enum {
|
||||
UOP_INCR, // ++
|
||||
UOP_MINUS, // -
|
||||
UOP_DECR, // --
|
||||
UOP_DEREF, // *
|
||||
UOP_REF, // &
|
||||
UOP_NOT, // !
|
||||
} unary_op;
|
||||
|
||||
typedef enum {
|
||||
LAYOUT_AUTO,
|
||||
LAYOUT_PACKED,
|
||||
LAYOUT_EXTERN
|
||||
} struct_layout;
|
||||
|
||||
typedef struct _member {
|
||||
struct _ast_node *type;
|
||||
char *name;
|
||||
usize name_len;
|
||||
struct _member *next;
|
||||
usize offset;
|
||||
} member;
|
||||
|
||||
typedef struct {
|
||||
char *name;
|
||||
usize name_len;
|
||||
member *params;
|
||||
} function;
|
||||
|
||||
typedef struct _variant {
|
||||
struct _ast_node *value;
|
||||
char *name;
|
||||
usize name_len;
|
||||
struct _variant *next;
|
||||
} variant;
|
||||
|
||||
typedef enum {
|
||||
NODE_IDENTIFIER,
|
||||
NODE_INTEGER,
|
||||
NODE_FLOAT,
|
||||
NODE_STRING,
|
||||
NODE_CHAR,
|
||||
NODE_BOOL,
|
||||
NODE_CAST,
|
||||
NODE_UNARY,
|
||||
NODE_BINARY,
|
||||
NODE_RANGE,
|
||||
NODE_ARRAY_SUBSCRIPT,
|
||||
NODE_POSTFIX,
|
||||
NODE_CALL,
|
||||
NODE_ACCESS,
|
||||
NODE_STRUCT_INIT,
|
||||
NODE_TERNARY, /* TODO */
|
||||
|
||||
NODE_BREAK,
|
||||
NODE_RETURN,
|
||||
NODE_IMPORT,
|
||||
NODE_FOR,
|
||||
NODE_WHILE,
|
||||
NODE_IF,
|
||||
NODE_VAR_DECL,
|
||||
NODE_LABEL,
|
||||
NODE_GOTO,
|
||||
|
||||
NODE_ENUM,
|
||||
NODE_STRUCT,
|
||||
NODE_UNION,
|
||||
NODE_FUNCTION,
|
||||
NODE_PTR_TYPE,
|
||||
NODE_SWITCH, /* TODO */
|
||||
NODE_UNIT,
|
||||
} node_type;
|
||||
|
||||
#define PTR_SLICE 0x0
|
||||
#define PTR_RAW 0x1
|
||||
#define PTR_CONST 0x2
|
||||
#define PTR_VOLATILE 0x4
|
||||
|
||||
#define LOOP_WHILE 0x1
|
||||
#define LOOP_UNTIL 0x2
|
||||
#define LOOP_AFTER 0x4
|
||||
|
||||
typedef struct _ast_node {
|
||||
node_type type;
|
||||
source_pos position;
|
||||
struct _type *expr_type;
|
||||
bool address_taken; // used in IR generation.
|
||||
union {
|
||||
struct {
|
||||
struct _ast_node *type;
|
||||
u8 flags;
|
||||
} ptr_type;
|
||||
struct {
|
||||
char *name;
|
||||
usize name_len;
|
||||
} label; // both label and goto
|
||||
struct {
|
||||
struct _ast_node *left;
|
||||
struct _ast_node *right;
|
||||
binary_op operator;
|
||||
} binary;
|
||||
struct {
|
||||
struct _ast_node *right;
|
||||
unary_op operator;
|
||||
} unary;
|
||||
u8 boolean;
|
||||
i64 integer;
|
||||
f64 flt; // float
|
||||
struct {
|
||||
char *start;
|
||||
usize len;
|
||||
} string;
|
||||
char ch; // char;
|
||||
struct {
|
||||
struct _ast_node *condition;
|
||||
struct _ast_node *then;
|
||||
struct _ast_node *otherwise;
|
||||
} ternary;
|
||||
struct {
|
||||
struct _ast_node *value;
|
||||
struct _ast_node *type;
|
||||
} cast;
|
||||
struct {
|
||||
struct _ast_node *expr;
|
||||
struct _ast_node *index;
|
||||
} subscript;
|
||||
struct {
|
||||
struct _ast_node *expr;
|
||||
struct _ast_node *member;
|
||||
} access;
|
||||
struct {
|
||||
struct _ast_node *expr;
|
||||
struct _ast_node *next;
|
||||
} unit_node;
|
||||
struct {
|
||||
/* This should be a list of unit_node */
|
||||
struct _ast_node *parameters;
|
||||
usize param_len;
|
||||
char *name;
|
||||
usize name_len;
|
||||
} call;
|
||||
struct {
|
||||
struct _ast_node *value;
|
||||
} ret;
|
||||
struct {
|
||||
/* This should be an access. */
|
||||
struct _ast_node *path;
|
||||
} import;
|
||||
struct {
|
||||
/* These should be lists of unit_node */
|
||||
struct _ast_node *slices;
|
||||
usize slice_len;
|
||||
struct _ast_node *captures;
|
||||
usize capture_len;
|
||||
struct _ast_node* body;
|
||||
} fr; // for
|
||||
struct {
|
||||
struct _ast_node *condition;
|
||||
struct _ast_node *body;
|
||||
u8 flags;
|
||||
} whle; // while
|
||||
struct {
|
||||
struct _ast_node *condition;
|
||||
struct _ast_node *body;
|
||||
struct _ast_node *otherwise;
|
||||
u8 flags;
|
||||
} if_stmt; // while
|
||||
struct {
|
||||
struct _ast_node **statements;
|
||||
usize stmt_len;
|
||||
} compound;
|
||||
struct {
|
||||
struct _ast_node *value;
|
||||
char *name;
|
||||
usize name_len;
|
||||
struct _ast_node *type;
|
||||
} var_decl;
|
||||
struct {
|
||||
member *members;
|
||||
char *name;
|
||||
usize name_len;
|
||||
} structure;
|
||||
struct {
|
||||
member *parameters;
|
||||
usize parameters_len;
|
||||
char *name;
|
||||
usize name_len;
|
||||
struct _ast_node *type;
|
||||
struct _ast_node *body;
|
||||
bool is_extern;
|
||||
} function;
|
||||
struct {
|
||||
variant *variants;
|
||||
char *name;
|
||||
usize name_len;
|
||||
} enm; // enum
|
||||
struct {
|
||||
struct _ast_node *members;
|
||||
usize members_len;
|
||||
} struct_init;
|
||||
} expr;
|
||||
} ast_node;
|
||||
|
||||
typedef struct {
|
||||
token *tokens;
|
||||
token *previous;
|
||||
ast_node *ast;
|
||||
arena *allocator;
|
||||
} parser;
|
||||
|
||||
parser *parser_init(lexer *l, arena *allocator);
|
||||
|
||||
#endif
|
||||
920
sema.c
Normal file
920
sema.c
Normal file
|
|
@ -0,0 +1,920 @@
|
|||
#define STB_DS_IMPLEMENTATION
|
||||
#include "sema.h"
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
typedef struct _res_node {
|
||||
struct _res_node **in;
|
||||
struct _res_node **out;
|
||||
type *value;
|
||||
} res_node;
|
||||
|
||||
typedef struct { res_node node; bool complete; } pair;
|
||||
|
||||
typedef struct { u8 flags; char *name; } type_key;
|
||||
|
||||
static struct { char *key; pair *value; } *types;
|
||||
static struct { char *key; type *value; } *type_reg;
|
||||
|
||||
static struct { char *key; prototype *value; } *prototypes;
|
||||
|
||||
static scope *global_scope = NULL;
|
||||
static scope *current_scope = NULL;
|
||||
static type *current_return = NULL;
|
||||
|
||||
static type *const_int = NULL;
|
||||
static type *const_float = NULL;
|
||||
|
||||
static bool in_loop = false;
|
||||
static bool has_errors = false;
|
||||
|
||||
static void error(ast_node *n, char *msg)
|
||||
{
|
||||
has_errors = true;
|
||||
if (n) {
|
||||
printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:%ld:%ld:\x1b[0m %s\n", n->position.row, n->position.column, msg);
|
||||
} else {
|
||||
printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:\x1b[0m %s\n", msg);
|
||||
}
|
||||
}
|
||||
|
||||
static char *intern_string(sema *s, char *str, usize len)
|
||||
{
|
||||
(void) s;
|
||||
char *ptr = malloc(len + 1);
|
||||
memcpy(ptr, str, len);
|
||||
ptr[len] = '\0';
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static type *create_integer(sema *s, char *name, u8 bits, bool sign)
|
||||
{
|
||||
type *t = arena_alloc(s->allocator, sizeof(type));
|
||||
t->name = name;
|
||||
t->tag = sign ? TYPE_INTEGER : TYPE_UINTEGER;
|
||||
t->data.integer = bits;
|
||||
|
||||
pair *graph_node = arena_alloc(s->allocator, sizeof(pair));
|
||||
graph_node->node.value = t;
|
||||
graph_node->node.in = NULL;
|
||||
graph_node->node.out = NULL;
|
||||
|
||||
shput(types, name, graph_node);
|
||||
return t;
|
||||
}
|
||||
|
||||
static type *create_float(sema *s, char *name, u8 bits)
|
||||
{
|
||||
type *t = arena_alloc(s->allocator, sizeof(type));
|
||||
t->name = name;
|
||||
t->tag = TYPE_FLOAT;
|
||||
t->data.flt = bits;
|
||||
|
||||
pair *graph_node = arena_alloc(s->allocator, sizeof(pair));
|
||||
graph_node->node.value = t;
|
||||
graph_node->node.in = NULL;
|
||||
graph_node->node.out = NULL;
|
||||
|
||||
shput(types, name, graph_node);
|
||||
return t;
|
||||
}
|
||||
|
||||
static void order_type(sema *s, ast_node *node)
|
||||
{
|
||||
if (node->type == NODE_STRUCT || node->type == NODE_UNION) {
|
||||
type *t = arena_alloc(s->allocator, sizeof(type));
|
||||
t->tag = node->type == NODE_STRUCT ? TYPE_STRUCT : TYPE_UNION;
|
||||
t->data.structure.name = node->expr.structure.name;
|
||||
t->data.structure.name_len = node->expr.structure.name_len;
|
||||
t->data.structure.members = node->expr.structure.members;
|
||||
|
||||
char *k = intern_string(s, node->expr.structure.name, node->expr.structure.name_len);
|
||||
t->name = k;
|
||||
pair *graph_node = shget(types, k);
|
||||
|
||||
if (!graph_node) {
|
||||
graph_node = arena_alloc(s->allocator, sizeof(pair));
|
||||
graph_node->node.in = NULL;
|
||||
graph_node->node.out = NULL;
|
||||
} else if (graph_node->complete) {
|
||||
error(node, "type already defined.");
|
||||
return;
|
||||
}
|
||||
graph_node->node.value = t;
|
||||
|
||||
member *m = t->data.structure.members;
|
||||
while (m) {
|
||||
if (m->type->type != NODE_IDENTIFIER) {
|
||||
m = m->next;
|
||||
continue;
|
||||
}
|
||||
char *name = intern_string(s, m->type->expr.string.start, m->type->expr.string.len);
|
||||
pair *p = shget(types, name);
|
||||
if (!p) {
|
||||
p = arena_alloc(s->allocator, sizeof(pair));
|
||||
p->node.out = NULL;
|
||||
p->node.in = NULL;
|
||||
p->node.value = NULL;
|
||||
p->complete = false;
|
||||
shput(types, name, p);
|
||||
}
|
||||
|
||||
arrput(graph_node->node.in, &p->node);
|
||||
arrput(p->node.out, &graph_node->node);
|
||||
|
||||
m = m->next;
|
||||
}
|
||||
|
||||
shput(types, k, graph_node);
|
||||
graph_node->complete = true;
|
||||
}
|
||||
}
|
||||
|
||||
static type *get_type(sema *s, ast_node *n)
|
||||
{
|
||||
char *name = NULL;
|
||||
type *t = NULL;
|
||||
switch (n->type) {
|
||||
case NODE_ACCESS:
|
||||
t = get_type(s, n->expr.access.expr);
|
||||
name = intern_string(s, n->expr.access.member->expr.string.start, n->expr.access.member->expr.string.len);
|
||||
if (t->tag != TYPE_STRUCT) {
|
||||
error(n->expr.access.expr, "expected structure.");
|
||||
return NULL;
|
||||
}
|
||||
t = shget(t->data.structure.member_types, name);
|
||||
|
||||
return t;
|
||||
case NODE_IDENTIFIER:
|
||||
name = intern_string(s, n->expr.string.start, n->expr.string.len);
|
||||
t = shget(type_reg, name);
|
||||
free(name);
|
||||
return t;
|
||||
case NODE_PTR_TYPE:
|
||||
t = malloc(sizeof(type));
|
||||
t->alignment = sizeof(usize);
|
||||
if (n->expr.ptr_type.flags & PTR_RAW) {
|
||||
t->name = "ptr";
|
||||
t->tag = TYPE_PTR;
|
||||
t->size = sizeof(usize);
|
||||
t->data.ptr.child = get_type(s, n->expr.ptr_type.type);
|
||||
t->data.ptr.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0;
|
||||
t->data.ptr.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0;
|
||||
} else {
|
||||
t->name = "slice";
|
||||
t->tag = TYPE_SLICE;
|
||||
t->size = sizeof(usize) * 2; // ptr + len = 16 bytes
|
||||
t->data.slice.child = get_type(s, n->expr.ptr_type.type);
|
||||
t->data.slice.is_const = (n->expr.ptr_type.flags & PTR_CONST) != 0;
|
||||
t->data.slice.is_volatile = (n->expr.ptr_type.flags & PTR_VOLATILE) != 0;
|
||||
}
|
||||
return t;
|
||||
default:
|
||||
error(n, "expected type.");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void register_struct(sema *s, char *name, type *t)
|
||||
{
|
||||
usize alignment = 0;
|
||||
member *m = t->data.structure.members;
|
||||
|
||||
usize offset = 0;
|
||||
type *m_type = NULL;
|
||||
while (m) {
|
||||
m_type = get_type(s, m->type);
|
||||
|
||||
if (!m_type) {
|
||||
error(m->type, "unknown type.");
|
||||
return;
|
||||
}
|
||||
|
||||
char *n = intern_string(s, m->name, m->name_len);
|
||||
shput(t->data.structure.member_types, n, m_type);
|
||||
|
||||
if (m_type->size == 0) {
|
||||
error(m->type, "a struct member can't be of type `void`.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (alignment < m_type->alignment) {
|
||||
alignment = m_type->alignment;
|
||||
}
|
||||
|
||||
usize padding = (m_type->alignment - (offset % m_type->alignment)) % m_type->alignment;
|
||||
offset += padding;
|
||||
m->offset = offset;
|
||||
offset += m_type->size;
|
||||
|
||||
m = m->next;
|
||||
}
|
||||
|
||||
t->alignment = alignment;
|
||||
|
||||
if (t->alignment > 0) {
|
||||
usize trailing_padding = (t->alignment - (offset % t->alignment)) % t->alignment;
|
||||
offset += trailing_padding;
|
||||
}
|
||||
|
||||
t->size = offset;
|
||||
}
|
||||
|
||||
static void register_union(sema *s, char *name, type *t)
|
||||
{
|
||||
usize alignment = 0;
|
||||
usize size = 0;
|
||||
member *m = t->data.structure.members;
|
||||
while (m) {
|
||||
type *m_type = get_type(s, m->type);
|
||||
|
||||
if (!m_type) {
|
||||
error(m->type, "unknown type.");
|
||||
return;
|
||||
}
|
||||
|
||||
char *n = intern_string(s, m->name, m->name_len);
|
||||
shput(t->data.structure.member_types, n, m_type);
|
||||
|
||||
if (alignment < m_type->alignment) {
|
||||
alignment = m_type->alignment;
|
||||
}
|
||||
|
||||
if (size < m_type->size) {
|
||||
size = m_type->size;
|
||||
}
|
||||
|
||||
m = m->next;
|
||||
}
|
||||
|
||||
t->alignment = alignment;
|
||||
t->size = size;
|
||||
}
|
||||
|
||||
static void register_type(sema *s, char *name, type *t)
|
||||
{
|
||||
switch (t->tag) {
|
||||
case TYPE_INTEGER:
|
||||
case TYPE_UINTEGER:
|
||||
t->size = t->data.integer / 8;
|
||||
t->alignment = t->data.integer / 8;
|
||||
break;
|
||||
case TYPE_PTR:
|
||||
t->size = 8;
|
||||
t->alignment = 8;
|
||||
break;
|
||||
case TYPE_FLOAT:
|
||||
t->size = t->data.flt / 8;
|
||||
t->alignment = t->data.flt / 8;
|
||||
break;
|
||||
case TYPE_STRUCT:
|
||||
register_struct(s, name, t);
|
||||
break;
|
||||
case TYPE_UNION:
|
||||
register_union(s, name, t);
|
||||
break;
|
||||
default:
|
||||
error(NULL, "registering an invalid type.");
|
||||
return;
|
||||
}
|
||||
|
||||
shput(type_reg, name, t);
|
||||
}
|
||||
|
||||
static void create_types(sema *s)
|
||||
{
|
||||
res_node **nodes = NULL;
|
||||
res_node **ordered = NULL;
|
||||
usize node_count = shlen(types);
|
||||
for (int i=0; i < node_count; i++) {
|
||||
if (arrlen(types[i].value->node.in) == 0) {
|
||||
arrput(nodes, &types[i].value->node);
|
||||
}
|
||||
}
|
||||
|
||||
while (arrlen(nodes) > 0) {
|
||||
res_node *n = nodes[0];
|
||||
arrdel(nodes, 0);
|
||||
arrput(ordered, n);
|
||||
while (arrlen(n->out) > 0) {
|
||||
res_node *dep = n->out[0];
|
||||
arrdel(n->out, 0);
|
||||
|
||||
for (int j=0; j < arrlen(dep->in); j++) {
|
||||
if (dep->in[j] == n) {
|
||||
arrdel(dep->in, j);
|
||||
}
|
||||
}
|
||||
|
||||
if (arrlen(dep->in) == 0) {
|
||||
arrput(nodes, dep);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (arrlen(ordered) < node_count) {
|
||||
error(NULL, "cycling struct definition.");
|
||||
}
|
||||
|
||||
for (int i=0; i < arrlen(ordered); i++) {
|
||||
type *t = ordered[i]->value;
|
||||
if (t && (t->tag == TYPE_STRUCT || t->tag == TYPE_UNION)) {
|
||||
char *name = t->name;
|
||||
register_type(s, name, t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void create_prototype(sema *s, ast_node *node)
|
||||
{
|
||||
prototype *p = arena_alloc(s->allocator, sizeof(prototype));
|
||||
p->name = intern_string(s, node->expr.function.name, node->expr.function.name_len);
|
||||
node->expr.function.name = p->name;
|
||||
if (shget(prototypes, p->name)) {
|
||||
error(node, "function already defined.");
|
||||
}
|
||||
|
||||
member *m = node->expr.function.parameters;
|
||||
while (m) {
|
||||
type *t = get_type(s, m->type);
|
||||
if (!t) {
|
||||
error(m->type, "unknown type.");
|
||||
return;
|
||||
}
|
||||
|
||||
arrput(p->parameters, t);
|
||||
m = m->next;
|
||||
}
|
||||
|
||||
p->type = get_type(s, node->expr.function.type);
|
||||
shput(prototypes, p->name, p);
|
||||
}
|
||||
|
||||
static void push_scope(sema *s)
|
||||
{
|
||||
scope *scp = arena_alloc(s->allocator, sizeof(scope));
|
||||
scp->parent = current_scope;
|
||||
current_scope = scp;
|
||||
}
|
||||
|
||||
static void pop_scope(sema *s)
|
||||
{
|
||||
current_scope = current_scope->parent;
|
||||
}
|
||||
|
||||
static ast_node *get_def(sema *s, char *name)
|
||||
{
|
||||
scope *current = current_scope;
|
||||
while (current) {
|
||||
ast_node *def = shget(current->defs, name);
|
||||
if (def) return def;
|
||||
|
||||
current = current->parent;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static type *get_string_type(sema *s, ast_node *node)
|
||||
{
|
||||
type *string_type = arena_alloc(s->allocator, sizeof(type));
|
||||
string_type->tag = TYPE_SLICE;
|
||||
string_type->size = sizeof(usize) * 2; // ptr + len = 16 bytes
|
||||
string_type->alignment = sizeof(usize);
|
||||
string_type->name = "slice";
|
||||
string_type->data.slice.child = shget(type_reg, "u8");
|
||||
string_type->data.slice.is_const = true;
|
||||
string_type->data.slice.is_volatile = false;
|
||||
string_type->data.slice.len = node->expr.string.len;
|
||||
return string_type;
|
||||
}
|
||||
|
||||
static type *get_range_type(sema *s, ast_node *node)
|
||||
{
|
||||
type *range_type = arena_alloc(s->allocator, sizeof(type));
|
||||
range_type->tag = TYPE_PTR;
|
||||
range_type->size = sizeof(usize);
|
||||
range_type->alignment = sizeof(usize);
|
||||
range_type->name = "slice";
|
||||
range_type->data.slice.child = shget(type_reg, "usize");
|
||||
range_type->data.slice.is_const = true;
|
||||
range_type->data.slice.is_volatile = false;
|
||||
range_type->data.slice.len = node->expr.binary.right->expr.integer - node->expr.binary.left->expr.integer;
|
||||
return range_type;
|
||||
}
|
||||
|
||||
static type *get_expression_type(sema *s, ast_node *node);
|
||||
static type *get_access_type(sema *s, ast_node *node)
|
||||
{
|
||||
type *t = get_expression_type(s, node->expr.access.expr);
|
||||
ast_node *member = node->expr.access.member;
|
||||
char *name_start = member->expr.string.start;
|
||||
usize name_len = member->expr.string.len;
|
||||
|
||||
// Handle slice field access
|
||||
if (t && t->tag == TYPE_SLICE) {
|
||||
char *name = intern_string(s, name_start, name_len);
|
||||
if (strcmp(name, "ptr") == 0) {
|
||||
// Return pointer to element type
|
||||
type *ptr_type = arena_alloc(s->allocator, sizeof(type));
|
||||
ptr_type->tag = TYPE_PTR;
|
||||
ptr_type->size = 8;
|
||||
ptr_type->alignment = 8;
|
||||
ptr_type->name = "ptr";
|
||||
ptr_type->data.ptr.child = t->data.slice.child;
|
||||
ptr_type->data.ptr.is_const = t->data.slice.is_const;
|
||||
ptr_type->data.ptr.is_volatile = t->data.slice.is_volatile;
|
||||
free(name);
|
||||
return ptr_type;
|
||||
} else if (strcmp(name, "len") == 0) {
|
||||
// Return usize type
|
||||
free(name);
|
||||
return shget(type_reg, "usize");
|
||||
} else {
|
||||
error(node, "slice doesn't have that field");
|
||||
free(name);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (!t || (t->tag != TYPE_STRUCT && t->tag != TYPE_UNION)) {
|
||||
error(node, "invalid expression.");
|
||||
return NULL;
|
||||
}
|
||||
char *name = intern_string(s, name_start, name_len);
|
||||
type *res = shget(t->data.structure.member_types, name);
|
||||
if (!res) {
|
||||
error(node, "struct doesn't have that member");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static type *get_identifier_type(sema *s, ast_node *node)
|
||||
{
|
||||
char *name_start = node->expr.string.start;
|
||||
usize name_len = node->expr.string.len;
|
||||
char *name = intern_string(s, name_start, name_len);
|
||||
node->expr.string.start = name;
|
||||
ast_node *def = get_def(s, name);
|
||||
if (!def) {
|
||||
error(node, "unknown identifier.");
|
||||
}
|
||||
return def->expr_type;
|
||||
}
|
||||
|
||||
static bool match(type *t1, type *t2);
|
||||
|
||||
static bool can_cast(type *source, type *dest)
|
||||
{
|
||||
if (!dest || !source) return false;
|
||||
|
||||
switch (dest->tag) {
|
||||
case TYPE_INTEGER:
|
||||
case TYPE_UINTEGER:
|
||||
case TYPE_INTEGER_CONST:
|
||||
return source->tag == TYPE_INTEGER_CONST || source->tag == TYPE_INTEGER || source->tag == TYPE_UINTEGER;
|
||||
case TYPE_FLOAT:
|
||||
return source->tag == TYPE_FLOAT_CONST;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static type *get_expression_type(sema *s, ast_node *node)
|
||||
{
|
||||
if (!node) {
|
||||
return shget(type_reg, "void");
|
||||
}
|
||||
|
||||
type *t = NULL;
|
||||
prototype *prot = NULL;
|
||||
switch (node->type) {
|
||||
case NODE_IDENTIFIER:
|
||||
t = get_identifier_type(s, node);
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_INTEGER:
|
||||
node->expr_type = const_int;
|
||||
return const_int;
|
||||
case NODE_FLOAT:
|
||||
node->expr_type = const_float;
|
||||
return const_float;
|
||||
case NODE_STRING:
|
||||
t = get_string_type(s, node);
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_CHAR:
|
||||
t = shget(type_reg, "u8");
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_BOOL:
|
||||
t = shget(type_reg, "bool");
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_CAST:
|
||||
t = get_type(s, node->expr.cast.type);
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_POSTFIX:
|
||||
case NODE_UNARY:
|
||||
t = get_expression_type(s, node->expr.unary.right);
|
||||
if (node->expr.unary.operator == UOP_REF) {
|
||||
ast_node *target = node->expr.unary.right;
|
||||
while (target->type == NODE_ACCESS) {
|
||||
target = target->expr.access.expr;
|
||||
}
|
||||
|
||||
if (target->type != NODE_IDENTIFIER) {
|
||||
error(node, "expected identifier.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *name = target->expr.string.start;
|
||||
ast_node *def = get_def(s, name);
|
||||
|
||||
if (def) {
|
||||
def->address_taken = true;
|
||||
target->address_taken = true;
|
||||
}
|
||||
|
||||
type *tmp = t;
|
||||
t = arena_alloc(s->allocator, sizeof(type));
|
||||
t->tag = TYPE_PTR;
|
||||
t->size = sizeof(usize);
|
||||
t->alignment = sizeof(usize);
|
||||
t->name = "ptr";
|
||||
t->data.ptr.is_const = false;
|
||||
t->data.ptr.is_volatile = false;
|
||||
t->data.ptr.child = tmp;
|
||||
} else if (node->expr.unary.operator == UOP_DEREF) {
|
||||
if (t->tag != TYPE_PTR) {
|
||||
error(node, "only pointers can be dereferenced.");
|
||||
return NULL;
|
||||
}
|
||||
t = t->data.ptr.child;
|
||||
}
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_BINARY:
|
||||
t = get_expression_type(s, node->expr.binary.left);
|
||||
if (!t) return NULL;
|
||||
if (node->expr.binary.operator == OP_ASSIGN_PTR) {
|
||||
if (t->tag != TYPE_PTR) {
|
||||
error(node, "expected pointer.");
|
||||
return NULL;
|
||||
}
|
||||
t = t->data.ptr.child;
|
||||
}
|
||||
if (!can_cast(get_expression_type(s, node->expr.binary.right), t) && !match(t, get_expression_type(s, node->expr.binary.right))) {
|
||||
error(node, "type mismatch.");
|
||||
node->expr_type = NULL;
|
||||
return NULL;
|
||||
}
|
||||
if (node->expr.binary.operator >= OP_EQ) {
|
||||
t = shget(type_reg, "bool");
|
||||
} else if (node->expr.binary.operator >= OP_ASSIGN && node->expr.binary.operator <= OP_MOD_EQ) {
|
||||
t = shget(type_reg, "void");
|
||||
}
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_RANGE:
|
||||
t = get_range_type(s, node);
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_ARRAY_SUBSCRIPT:
|
||||
t = get_expression_type(s, node->expr.subscript.expr);
|
||||
|
||||
// Check if this is range subscripting (creates a slice)
|
||||
if (node->expr.subscript.index && node->expr.subscript.index->type == NODE_RANGE) {
|
||||
type *element_type = NULL;
|
||||
switch (t->tag) {
|
||||
case TYPE_SLICE:
|
||||
element_type = t->data.slice.child;
|
||||
break;
|
||||
case TYPE_PTR:
|
||||
element_type = t->data.ptr.child;
|
||||
break;
|
||||
default:
|
||||
error(node, "only pointers and slices can be indexed.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Return a slice type
|
||||
type *slice_type = arena_alloc(s->allocator, sizeof(type));
|
||||
slice_type->tag = TYPE_SLICE;
|
||||
slice_type->size = sizeof(usize) * 2;
|
||||
slice_type->alignment = sizeof(usize);
|
||||
slice_type->data.slice.child = element_type;
|
||||
slice_type->data.slice.is_const = false;
|
||||
slice_type->data.slice.len = 0;
|
||||
|
||||
node->expr_type = slice_type;
|
||||
return slice_type;
|
||||
}
|
||||
|
||||
// Regular subscript - return element type
|
||||
switch (t->tag) {
|
||||
case TYPE_SLICE:
|
||||
t = t->data.slice.child;
|
||||
break;
|
||||
case TYPE_PTR:
|
||||
t = t->data.ptr.child;
|
||||
break;
|
||||
default:
|
||||
error(node, "only pointers and slices can be indexed.");
|
||||
return NULL;
|
||||
}
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_CALL:
|
||||
node->expr.call.name = intern_string(s, node->expr.call.name, node->expr.call.name_len);
|
||||
prot = shget(prototypes, node->expr.call.name);
|
||||
if (!prot) {
|
||||
error(node, "unknown function.");
|
||||
return NULL;
|
||||
}
|
||||
// Process call arguments
|
||||
ast_node *arg = node->expr.call.parameters;
|
||||
while (arg && arg->type == NODE_UNIT) {
|
||||
if (arg->expr.unit_node.expr) {
|
||||
get_expression_type(s, arg->expr.unit_node.expr);
|
||||
}
|
||||
arg = arg->expr.unit_node.next;
|
||||
}
|
||||
t = prot->type;
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
case NODE_ACCESS:
|
||||
t = get_access_type(s, node);
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
default:
|
||||
t = shget(type_reg, "void");
|
||||
node->expr_type = t;
|
||||
return t;
|
||||
}
|
||||
}
|
||||
|
||||
static bool match(type *t1, type *t2)
|
||||
{
|
||||
if (!t1 || !t2) return false;
|
||||
if (t1->tag != t2->tag) return false;
|
||||
|
||||
switch(t1->tag) {
|
||||
case TYPE_VOID:
|
||||
case TYPE_BOOL:
|
||||
return true;
|
||||
case TYPE_PTR:
|
||||
return (t1->data.ptr.is_const == t2->data.ptr.is_const) && (t1->data.ptr.is_volatile == t2->data.ptr.is_volatile) && match(t1->data.ptr.child, t2->data.ptr.child);
|
||||
case TYPE_SLICE:
|
||||
return (t1->data.slice.is_const == t2->data.slice.is_const) && (t1->data.slice.is_volatile == t2->data.slice.is_volatile) && match(t1->data.slice.child, t2->data.slice.child) && t1->data.slice.len == t2->data.slice.len;
|
||||
case TYPE_STRUCT:
|
||||
case TYPE_UNION:
|
||||
return t1 == t2;
|
||||
case TYPE_INTEGER:
|
||||
case TYPE_UINTEGER:
|
||||
return t1->data.integer == t2->data.integer;
|
||||
case TYPE_FLOAT:
|
||||
return t1->data.flt == t2->data.flt;
|
||||
case TYPE_ENUM:
|
||||
case TYPE_GENERIC:
|
||||
/* TODO */
|
||||
return false;
|
||||
case TYPE_INTEGER_CONST:
|
||||
case TYPE_FLOAT_CONST:
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void check_statement(sema *s, ast_node *node);
|
||||
static void check_body(sema *s, ast_node *node)
|
||||
{
|
||||
push_scope(s);
|
||||
|
||||
ast_node *current = node;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
check_statement(s, current->expr.unit_node.expr);
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
|
||||
pop_scope(s);
|
||||
}
|
||||
|
||||
static void check_for(sema *s, ast_node *node)
|
||||
{
|
||||
ast_node *slices = node->expr.fr.slices;
|
||||
ast_node *captures = node->expr.fr.captures;
|
||||
|
||||
push_scope(s);
|
||||
|
||||
ast_node *current_capture = captures;
|
||||
ast_node *current_slice = slices;
|
||||
|
||||
while (current_capture) {
|
||||
type *c_type = get_expression_type(s, current_slice->expr.unit_node.expr);
|
||||
char *c_name = intern_string(s, current_capture->expr.unit_node.expr->expr.string.start, current_capture->expr.unit_node.expr->expr.string.len);
|
||||
|
||||
ast_node *cap_node = arena_alloc(s->allocator, sizeof(ast_node));
|
||||
cap_node->type = NODE_VAR_DECL;
|
||||
cap_node->expr_type = c_type;
|
||||
cap_node->address_taken = false;
|
||||
cap_node->expr.var_decl.name = c_name;
|
||||
|
||||
shput(current_scope->defs, c_name, cap_node);
|
||||
current_capture = current_capture->expr.unit_node.next;
|
||||
current_slice = current_slice->expr.unit_node.next;
|
||||
}
|
||||
|
||||
ast_node *current = node->expr.fr.body;
|
||||
|
||||
in_loop = true;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
check_statement(s, current->expr.unit_node.expr);
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
in_loop = false;
|
||||
|
||||
pop_scope(s);
|
||||
}
|
||||
|
||||
static void check_statement(sema *s, ast_node *node)
|
||||
{
|
||||
if (!node) return;
|
||||
|
||||
type *t = NULL;
|
||||
char *name = NULL;
|
||||
switch(node->type) {
|
||||
case NODE_RETURN:
|
||||
if (!can_cast(get_expression_type(s, node->expr.ret.value), current_return) && !match(get_expression_type(s, node->expr.ret.value), current_return)) {
|
||||
error(node, "return type doesn't match function's one.");
|
||||
}
|
||||
break;
|
||||
case NODE_BREAK:
|
||||
if (!in_loop) {
|
||||
error(node, "`break` isn't in a loop.");
|
||||
}
|
||||
break;
|
||||
case NODE_WHILE:
|
||||
if (!match(get_expression_type(s, node->expr.whle.condition), shget(type_reg, "bool"))) {
|
||||
error(node, "expected boolean value.");
|
||||
return;
|
||||
}
|
||||
|
||||
in_loop = true;
|
||||
check_body(s, node->expr.whle.body);
|
||||
in_loop = false;
|
||||
break;
|
||||
case NODE_IF:
|
||||
if (!match(get_expression_type(s, node->expr.if_stmt.condition), shget(type_reg, "bool"))) {
|
||||
error(node, "expected boolean value.");
|
||||
return;
|
||||
}
|
||||
|
||||
check_body(s, node->expr.if_stmt.body);
|
||||
if (node->expr.if_stmt.otherwise) check_body(s, node->expr.if_stmt.otherwise);
|
||||
break;
|
||||
case NODE_FOR:
|
||||
check_for(s, node);
|
||||
break;
|
||||
case NODE_VAR_DECL:
|
||||
t = get_type(s, node->expr.var_decl.type);
|
||||
node->expr_type = t;
|
||||
name = intern_string(s, node->expr.var_decl.name, node->expr.var_decl.name_len);
|
||||
node->expr.var_decl.name = name;
|
||||
if (get_def(s, name)) {
|
||||
error(node, "redeclaration of variable.");
|
||||
break;
|
||||
}
|
||||
if (t->tag == TYPE_STRUCT) {
|
||||
// Struct initialization with NODE_STRUCT_INIT is allowed
|
||||
} else if (node->expr.var_decl.value && node->expr.var_decl.value->type == NODE_STRUCT_INIT &&
|
||||
(t->tag == TYPE_SLICE || t->tag == TYPE_PTR)) {
|
||||
// Array/slice initialization with NODE_STRUCT_INIT is allowed
|
||||
} else if (node->expr.var_decl.value && node->expr.var_decl.value->type == NODE_RANGE &&
|
||||
t->tag == TYPE_SLICE) {
|
||||
// Range initialization for slices is allowed
|
||||
get_expression_type(s, node->expr.var_decl.value);
|
||||
} else if (node->expr.var_decl.value && node->expr.var_decl.value->type == NODE_STRING &&
|
||||
t->tag == TYPE_SLICE) {
|
||||
// String literal can be assigned to slice
|
||||
get_expression_type(s, node->expr.var_decl.value);
|
||||
} else if (!can_cast(get_expression_type(s, node->expr.var_decl.value), t) && !match(t, get_expression_type(s, node->expr.var_decl.value))) {
|
||||
error(node, "type mismatch (decl).");
|
||||
}
|
||||
shput(current_scope->defs, name, node);
|
||||
break;
|
||||
default:
|
||||
get_expression_type(s, node);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void check_function(sema *s, ast_node *f)
|
||||
{
|
||||
push_scope(s);
|
||||
current_return = get_type(s, f->expr.function.type);
|
||||
|
||||
member *param = f->expr.function.parameters;
|
||||
while (param) {
|
||||
type *p_type = get_type(s, param->type);
|
||||
char *t_name = intern_string(s, param->name, param->name_len);
|
||||
param->name = t_name;
|
||||
ast_node *param_node = arena_alloc(s->allocator, sizeof(ast_node));
|
||||
param_node->type = NODE_VAR_DECL;
|
||||
param_node->expr_type = p_type;
|
||||
param_node->address_taken = false;
|
||||
param_node->expr.var_decl.name = t_name;
|
||||
|
||||
shput(current_scope->defs, t_name, param_node);
|
||||
param = param->next;
|
||||
}
|
||||
|
||||
// Skip body checking for extern functions
|
||||
if (!f->expr.function.is_extern && f->expr.function.body) {
|
||||
ast_node *current = f->expr.function.body;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
check_statement(s, current->expr.unit_node.expr);
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
}
|
||||
|
||||
pop_scope(s);
|
||||
}
|
||||
|
||||
static void analyze_unit(sema *s, ast_node *node)
|
||||
{
|
||||
ast_node *current = node;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
if (current->expr.unit_node.expr)
|
||||
order_type(s, current->expr.unit_node.expr);
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
|
||||
create_types(s);
|
||||
|
||||
current = node;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) {
|
||||
create_prototype(s, current->expr.unit_node.expr);
|
||||
}
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
|
||||
current = node;
|
||||
while (current && current->type == NODE_UNIT) {
|
||||
if (current->expr.unit_node.expr && current->expr.unit_node.expr->type == NODE_FUNCTION) {
|
||||
check_function(s, current->expr.unit_node.expr);
|
||||
} else {
|
||||
check_statement(s, current->expr.unit_node.expr);
|
||||
}
|
||||
current = current->expr.unit_node.next;
|
||||
}
|
||||
}
|
||||
|
||||
void sema_init(parser *p, arena *a)
|
||||
{
|
||||
sema *s = arena_alloc(a, sizeof(sema));
|
||||
s->allocator = a;
|
||||
types = NULL;
|
||||
s->ast = p->ast;
|
||||
|
||||
global_scope = arena_alloc(a, sizeof(scope));
|
||||
global_scope->parent = NULL;
|
||||
global_scope->defs = NULL;
|
||||
current_scope = global_scope;
|
||||
|
||||
register_type(s, "void", create_integer(s, "void", 0, false));
|
||||
register_type(s, "bool", create_integer(s, "bool", 8, false));
|
||||
register_type(s, "u8", create_integer(s, "u8", 8, false));
|
||||
register_type(s, "u16", create_integer(s, "u16", 16, false));
|
||||
register_type(s, "u32", create_integer(s, "u32", 32, false));
|
||||
register_type(s, "u64", create_integer(s, "u64", 64, false));
|
||||
register_type(s, "usize", create_integer(s, "usize", 64, false));
|
||||
register_type(s, "i8", create_integer(s, "i8", 8, true));
|
||||
register_type(s, "i16", create_integer(s, "i16", 16, true));
|
||||
register_type(s, "i32", create_integer(s, "i32", 32, true));
|
||||
register_type(s, "i64", create_integer(s, "i64", 64, true));
|
||||
register_type(s, "f32", create_float(s, "f32", 32));
|
||||
register_type(s, "f64", create_float(s, "f64", 64));
|
||||
|
||||
const_int = arena_alloc(s->allocator, sizeof(type));
|
||||
const_int->name = "const_int";
|
||||
const_int->tag = TYPE_INTEGER_CONST;
|
||||
const_int->data.integer = 0;
|
||||
|
||||
const_float = arena_alloc(s->allocator, sizeof(type));
|
||||
const_float->name = "const_float";
|
||||
const_float->tag = TYPE_FLOAT_CONST;
|
||||
const_float->data.flt = 0;
|
||||
|
||||
analyze_unit(s, s->ast);
|
||||
|
||||
if (has_errors) {
|
||||
printf("Compilation failed.\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
76
sema.h
Normal file
76
sema.h
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
#ifndef SEMA_H
|
||||
#define SEMA_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "parser.h"
|
||||
#include "stb_ds.h"
|
||||
#include "utils.h"
|
||||
|
||||
typedef enum {
|
||||
TYPE_VOID,
|
||||
TYPE_BOOL,
|
||||
TYPE_PTR,
|
||||
TYPE_SLICE,
|
||||
TYPE_FLOAT,
|
||||
TYPE_FLOAT_CONST,
|
||||
TYPE_INTEGER,
|
||||
TYPE_INTEGER_CONST,
|
||||
TYPE_UINTEGER,
|
||||
TYPE_STRUCT,
|
||||
TYPE_UNION,
|
||||
TYPE_ENUM, /* TODO */
|
||||
TYPE_GENERIC, /* TODO */
|
||||
} type_tag;
|
||||
|
||||
typedef struct _type {
|
||||
type_tag tag;
|
||||
usize size;
|
||||
usize alignment;
|
||||
char *name;
|
||||
union {
|
||||
u8 integer;
|
||||
u8 flt; // float
|
||||
struct {
|
||||
bool is_const;
|
||||
bool is_volatile;
|
||||
struct _type *child;
|
||||
} ptr;
|
||||
struct {
|
||||
usize len;
|
||||
bool is_const;
|
||||
bool is_volatile;
|
||||
struct _type *child;
|
||||
} slice;
|
||||
struct {
|
||||
char *name;
|
||||
usize name_len;
|
||||
member *members;
|
||||
struct { char *key; struct _type *value; } *member_types;
|
||||
} structure;
|
||||
struct {
|
||||
char *name;
|
||||
usize name_len;
|
||||
variant *variants;
|
||||
} enm; /* TODO */
|
||||
} data;
|
||||
} type;
|
||||
|
||||
typedef struct {
|
||||
char *name;
|
||||
type *type;
|
||||
type **parameters;
|
||||
} prototype;
|
||||
|
||||
typedef struct _scope {
|
||||
struct _scope *parent;
|
||||
struct { char *key; ast_node *value; } *defs;
|
||||
} scope;
|
||||
|
||||
typedef struct {
|
||||
arena *allocator;
|
||||
ast_node *ast;
|
||||
} sema;
|
||||
|
||||
void sema_init(parser *p, arena *a);
|
||||
|
||||
#endif
|
||||
BIN
test
Executable file
BIN
test
Executable file
Binary file not shown.
20
test.l
Normal file
20
test.l
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
extern i64 write(i32 fd, *u8 buf, u64 count);
|
||||
extern void exit(i32 code);
|
||||
extern *u8 malloc(usize size);
|
||||
|
||||
i32 main()
|
||||
{
|
||||
[u8] message = "Hello world!\n";
|
||||
*u8 message_heap = malloc(message.len);
|
||||
[u8] new_message = message_heap[0..13];
|
||||
u32 i = 0;
|
||||
|
||||
loop while i < message.len {
|
||||
new_message[i] = message[i];
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
write(1, new_message.ptr, new_message.len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
90
test.s
Normal file
90
test.s
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
.section .text
|
||||
.global main
|
||||
main:
|
||||
push %rbp
|
||||
mov %rsp, %rbp
|
||||
sub $256, %rsp
|
||||
movb $72, -32(%rbp)
|
||||
movb $101, -31(%rbp)
|
||||
movb $108, -30(%rbp)
|
||||
movb $108, -29(%rbp)
|
||||
movb $111, -28(%rbp)
|
||||
movb $32, -27(%rbp)
|
||||
movb $119, -26(%rbp)
|
||||
movb $111, -25(%rbp)
|
||||
movb $114, -24(%rbp)
|
||||
movb $108, -23(%rbp)
|
||||
movb $100, -22(%rbp)
|
||||
movb $33, -21(%rbp)
|
||||
movb $10, -20(%rbp)
|
||||
lea -32(%rbp), %rax
|
||||
mov %rax, -48(%rbp)
|
||||
mov $14, %rax
|
||||
mov %rax, -40(%rbp)
|
||||
mov -40(%rbp), %rax
|
||||
push %rax
|
||||
pop %rdi
|
||||
call malloc
|
||||
mov %rax, -56(%rbp)
|
||||
mov -56(%rbp), %rcx
|
||||
mov $0, %rax
|
||||
push %rax
|
||||
mov $13, %rax
|
||||
mov %rax, %rdx
|
||||
pop %rax
|
||||
mov %rdx, %r8
|
||||
sub %rax, %r8
|
||||
inc %r8
|
||||
add %rcx, %rax
|
||||
mov %rax, -88(%rbp)
|
||||
mov %r8, -80(%rbp)
|
||||
lea -88(%rbp), %rax
|
||||
mov (%rax), %rcx
|
||||
mov 8(%rax), %rdx
|
||||
mov %rcx, -72(%rbp)
|
||||
mov %rdx, -64(%rbp)
|
||||
mov $0, %rax
|
||||
mov %rax, -96(%rbp)
|
||||
.L0:
|
||||
mov -96(%rbp), %rax
|
||||
mov %rax, %rcx
|
||||
mov -40(%rbp), %rax
|
||||
cmp %rax, %rcx
|
||||
setl %al
|
||||
movzx %al, %rax
|
||||
test %rax, %rax
|
||||
jz .L1
|
||||
mov -72(%rbp), %rcx
|
||||
mov -96(%rbp), %rax
|
||||
add %rcx, %rax
|
||||
push %rax
|
||||
mov -48(%rbp), %rcx
|
||||
mov -96(%rbp), %rax
|
||||
add %rcx, %rax
|
||||
movzbl (%rax), %eax
|
||||
pop %rcx
|
||||
mov %al, (%rcx)
|
||||
mov -96(%rbp), %rax
|
||||
mov %rax, %rcx
|
||||
mov $1, %rax
|
||||
add %rcx, %rax
|
||||
mov %rax, -96(%rbp)
|
||||
jmp .L0
|
||||
.L1:
|
||||
mov $1, %rax
|
||||
push %rax
|
||||
mov -72(%rbp), %rax
|
||||
push %rax
|
||||
mov -64(%rbp), %rax
|
||||
push %rax
|
||||
pop %rdx
|
||||
pop %rsi
|
||||
pop %rdi
|
||||
call write
|
||||
mov $0, %rax
|
||||
mov %rbp, %rsp
|
||||
pop %rbp
|
||||
ret
|
||||
mov %rbp, %rsp
|
||||
pop %rbp
|
||||
ret
|
||||
152
utils.c
Normal file
152
utils.c
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
#include "utils.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
i64 parse_int(char *s, usize len)
|
||||
{
|
||||
bool negative = false;
|
||||
if (*s == '-') {
|
||||
s += 1;
|
||||
len -= 1;
|
||||
negative = true;
|
||||
}
|
||||
|
||||
u64 int_part = 0;
|
||||
for (usize i=0; i < len; i++) {
|
||||
int_part = (int_part * 10) + (s[i] - '0');
|
||||
}
|
||||
|
||||
if (negative) {
|
||||
int_part *= -1;
|
||||
}
|
||||
|
||||
return int_part;
|
||||
}
|
||||
|
||||
f64 parse_float(char *s, usize len)
|
||||
{
|
||||
bool negative = false;
|
||||
if (*s == '-') {
|
||||
s += 1;
|
||||
len -= 1;
|
||||
negative = true;
|
||||
}
|
||||
|
||||
usize point_pos = 0;
|
||||
for (usize i=0; i < len; i++) {
|
||||
if (s[i] == '.') {
|
||||
point_pos = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
i64 int_part = parse_int(s, point_pos);
|
||||
i64 dec_part = parse_int(s+point_pos+1, len-point_pos-1);
|
||||
for (usize i=0; i < len-point_pos-1; i++) {
|
||||
int_part *= 10;
|
||||
}
|
||||
|
||||
int_part += dec_part;
|
||||
|
||||
f64 f = (f64) int_part;
|
||||
|
||||
point_pos += 1;
|
||||
|
||||
for (usize i=0; i < len - point_pos; i++) {
|
||||
f /= 10.0;
|
||||
}
|
||||
|
||||
if (negative) {
|
||||
f *= -1;
|
||||
}
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
|
||||
void trie_insert(trie_node *root, arena *a, char *key, uint16_t value)
|
||||
{
|
||||
trie_node *node = root;
|
||||
while (*key) {
|
||||
if (!node->children[(usize)*key]) {
|
||||
node->children[(usize)*key] = arena_alloc(a, sizeof(trie_node));
|
||||
memset(node->children[(usize)*key], 0x0, sizeof(trie_node));
|
||||
}
|
||||
node = node->children[(usize)*key];
|
||||
|
||||
key++;
|
||||
}
|
||||
|
||||
node->value = value;
|
||||
}
|
||||
|
||||
uint16_t trie_get(trie_node *root, char *key, usize len)
|
||||
{
|
||||
trie_node *node = root;
|
||||
for (usize i=0; i < len; i++) {
|
||||
if (!node->children[(usize)(key[i])]) {
|
||||
return 0;
|
||||
}
|
||||
node = node->children[(usize)(key[i])];
|
||||
}
|
||||
|
||||
return node->value;
|
||||
}
|
||||
|
||||
#ifndef DEFAULT_ALIGNMENT
|
||||
#define DEFAULT_ALIGNMENT (2 * sizeof(void *))
|
||||
#endif
|
||||
|
||||
static usize align_forward(usize ptr, usize align) {
|
||||
uintptr_t p = ptr;
|
||||
uintptr_t a = (uintptr_t)align;
|
||||
uintptr_t modulo = p & (a - 1);
|
||||
|
||||
if (modulo != 0) {
|
||||
p += a - modulo;
|
||||
}
|
||||
return (usize)p;
|
||||
}
|
||||
|
||||
arena arena_init(usize size)
|
||||
{
|
||||
void *memory = malloc(size);
|
||||
memset(memory, 0x0, size);
|
||||
return (arena){
|
||||
.capacity = size,
|
||||
.position = 0,
|
||||
.memory = memory,
|
||||
};
|
||||
}
|
||||
|
||||
void *arena_alloc(arena *a, usize size) {
|
||||
uintptr_t current_addr = (uintptr_t)a->memory + a->position;
|
||||
uintptr_t padding = align_forward(current_addr, DEFAULT_ALIGNMENT) - current_addr;
|
||||
if (a->position + padding + size > a->capacity) return NULL;
|
||||
void *ret = (unsigned char *)a->memory + a->position + padding;
|
||||
a->position += (size + padding);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
snapshot arena_snapshot(arena *a)
|
||||
{
|
||||
return a->position;
|
||||
}
|
||||
|
||||
void arena_reset_to_snapshot(arena *a, snapshot s)
|
||||
{
|
||||
a->position = s;
|
||||
}
|
||||
|
||||
void arena_reset(arena *a)
|
||||
{
|
||||
arena_reset_to_snapshot(a, 0);
|
||||
}
|
||||
|
||||
void arena_deinit(arena a)
|
||||
{
|
||||
free(a.memory);
|
||||
}
|
||||
64
utils.h
Normal file
64
utils.h
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
#ifndef UTILS_H
|
||||
#define UTILS_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
typedef uint8_t u8;
|
||||
typedef uint16_t u16;
|
||||
typedef uint32_t u32;
|
||||
typedef uint64_t u64;
|
||||
|
||||
typedef int8_t i8;
|
||||
typedef int16_t i16;
|
||||
typedef int32_t i32;
|
||||
typedef int64_t i64;
|
||||
|
||||
typedef size_t usize;
|
||||
|
||||
typedef float f32;
|
||||
typedef double f64;
|
||||
|
||||
i64 parse_int(char *s, usize len);
|
||||
f64 parse_float(char *s, usize len);
|
||||
|
||||
typedef struct {
|
||||
usize capacity;
|
||||
usize position;
|
||||
void* memory;
|
||||
} arena;
|
||||
|
||||
typedef usize snapshot;
|
||||
|
||||
/*
|
||||
* NOTE(ernesto): faulty initialization is signalided by the arena.memory
|
||||
* being null. It is the responsability of the caller to check for fulty
|
||||
* initialization.
|
||||
*/
|
||||
arena arena_init(usize size);
|
||||
/*
|
||||
* Returns null on unsuccessfull allocation.
|
||||
* In this implemention an allocation is only unsuccessfull if the arena
|
||||
* does not have enough memory to allocate the requested space
|
||||
*/
|
||||
void *arena_alloc(arena *a, usize size);
|
||||
snapshot arena_snapshot(arena *a);
|
||||
void arena_reset_to_snapshot(arena *a, snapshot s);
|
||||
void arena_reset(arena *a);
|
||||
/* This call should never fail, also, do we even care if it does? */
|
||||
void arena_deinit(arena a);
|
||||
|
||||
typedef struct _trie_node {
|
||||
uint16_t value;
|
||||
struct _trie_node *children[256];
|
||||
} trie_node;
|
||||
|
||||
void trie_insert(trie_node *root, arena *a, char *key, uint16_t value);
|
||||
uint16_t trie_get(trie_node *root, char *key, usize len);
|
||||
|
||||
typedef struct {
|
||||
usize row, column;
|
||||
} source_pos;
|
||||
|
||||
#endif
|
||||
Loading…
Add table
Add a link
Reference in a new issue