feat: first commit

2026-01-25 10:20:10 +01:00 · 2026-01-25 10:20:10 +01:00 · 701734097e
commit 701734097e
13 changed files with 2655 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,6 @@
 **/*.o
 **/*~
 xcc
 **/*.swp
 **/*.pdf
 **/*.dot
--- a/24
+++ b/24
@ -0,0 +1,24 @@
 Copyright (c) 2025, Lorenzo Torres
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
 3. Neither the name of the <organization> nor the
   names of its contributors may be used to endorse or promote products
   derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ''AS IS'' AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/53
+++ b/53
@ -0,0 +1,53 @@
 # xcc - eXtended C compiler
 # See LICENSE file for copyright and license details.
 include config.mk
 SRC = xcc.c utils.c lexer.c
 HDR = config.def.h utils.h lexer.h
 OBJ = ${SRC:.c=.o}
 all: options xcc
 options:
 	@echo xcc build options:
 	@echo "CFLAGS   = ${CFLAGS}"
 	@echo "LDFLAGS  = ${LDFLAGS}"
 	@echo "CC       = ${CC}"
 .c.o:
 	${CC} -c ${CFLAGS} $<
 ${OBJ}: config.mk
 xcc: ${OBJ}
 	${CC} -o $@ ${OBJ} ${LDFLAGS}
 clean:
 	rm -f xcc ${OBJ} xcc-${VERSION}.tar.gz
 dist: clean
 	mkdir -p xcc-${VERSION}
 	cp -R LICENSE Makefile README config.mk\
 		xcc.1 ${HDR} ${SRC} xcc-${VERSION}
 	tar -cf xcc-${VERSION}.tar xcc-${VERSION}
 	gzip xcc-${VERSION}.tar
 	rm -rf xcc-${VERSION}
 install: all
 	mkdir -p ${DESTDIR}${PREFIX}/bin
 	cp -f xcc ${DESTDIR}${PREFIX}/bin
 	chmod 755 ${DESTDIR}${PREFIX}/bin/xcc
 	mkdir -p ${DESTDIR}${MANPREFIX}/man1
 	sed "s/VERSION/${VERSION}/g" < xcc.1 > ${DESTDIR}${MANPREFIX}/man1/xcc.1
 	chmod 644 ${DESTDIR}${MANPREFIX}/man1/xcc.1
 uninstall:
 	rm -f ${DESTDIR}${PREFIX}/bin/xcc\
 		${DESTDIR}${MANPREFIX}/man1/xcc.1
 graph: clean all
 	./xcc > graph.dot
 	dot -Tpdf graph.dot > graph.pdf
 	zathura ./graph.pdf
 .PHONY: all options clean dist install uninstall
--- a/24
+++ b/24
@ -0,0 +1,24 @@
 lc - L compiler
 ============================
 lc is a L compiler. It can compile L code.
 Requirements
 ------------
 In order to build lc you need... a computer
 Installation
 ------------
 Edit config.mk to match your local setup (lc is installed into
 the /usr/local namespace by default).
 Afterwards enter the following command to build and install lc (if
 necessary as root):
    make clean install
 Usage
 -----------
 lc file
--- a/config.mk
+++ b/config.mk
@ -0,0 +1,27 @@
 # cc version
 VERSION = 0.1
 # Customize below to fit your system
 # paths
 PREFIX = /usr
 MANPREFIX = ${PREFIX}/share/man
 # OpenBSD (uncomment)
 #MANPREFIX = ${PREFIX}/man
 # includes and libs
 INCS = -I.
 LIBS =
 # flags
 CPPFLAGS = -DVERSION=\"${VERSION}\" 
 CFLAGS  := -std=c11 -pedantic -Wall -O0 ${INCS} ${CPPFLAGS} 
 CFLAGS  := ${CFLAGS} -g
 LDFLAGS  = ${LIBS}
 # Solaris
 #CFLAGS = -fast ${INCS} -DVERSION=\"${VERSION}\"
 #LDFLAGS = ${LIBS}
 # compiler and linker
 CC = cc
--- a/lexer.c
+++ b/lexer.c
@ -0,0 +1,410 @@
 #include "lexer.h"
 #include <stdbool.h>
 #include <ctype.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 static void add_token(lexer *l, token_type type, usize len)
 {
 	token *t = malloc(sizeof(token));
 	t->type = type;
 	t->lexeme_len = len;
 	t->lexeme = l->source + l->index;
 	t->position.row = l->row;
 	t->position.column = l->column;
 	if (!l->tokens) {
 		l->tokens = t;
 		l->tail = t;
 	} else {
 		l->tail->next = t;
 		l->tail = t;
 	}
 }
 static void add_error(lexer *l, char *msg)
 {
 	token *t = malloc(sizeof(token));
 	t->type = TOKEN_ERROR;
 	t->lexeme_len = strlen(msg);
 	t->lexeme = msg;
 	t->position.row = l->row;
 	t->position.column = l->column;
 	if (!l->tokens) {
 		l->tokens = t;
 		l->tail = t;
 	} else {
 		l->tail->next = t;
 		l->tail = t;
 	}
 }
 static void parse_number(lexer *l)
 {
 	char c = l->source[l->index];
 	/* Is the number a float? */
 	bool f = false;
 	usize len = 0;
 	while (isdigit(c)) {
 		/* If a dot is found, and the character after it is a digit, this is a float. */
 		if (l->source[l->index+1] == '.' && isdigit(l->source[l->index+2])) {
 			f = true;
 			len += 3;
 			l->index += 3;
 		} else {
 			len += 1;
 			l->index += 1;
 		}
 		c = l->source[l->index];
 	}
 	l->index -= len;
 	if (f) {
 		add_token(l, TOKEN_FLOAT, len);
 	} else {
 		add_token(l, TOKEN_INTEGER, len);
 	}
 	l->index += len;
 }
 static void parse_identifier(lexer *l)
 {
 	char c = l->source[l->index];
 	usize len = 0;
 	while (isalnum(c) || c == '_') {
 		len += 1;
 		l->index += 1;
 		c = l->source[l->index];
 	}
 	l->index -= len;
 	//token_type keyword = trie_get(keywords, l->source + l->index, len);
 	if (0) {
 		//add_token(l, keyword, len);
 	} else {
 		add_token(l, TOKEN_IDENTIFIER, len);
 	}
 	l->index += len;
 }
 static void parse_string(lexer *l)
 {
 	char c = l->source[l->index];
 	usize len = 0;
 	while (c != '"') {
 		if (c == '\0' || c == '\n') {
 			l->index -= len;
 			add_error(l, "unclosed string literal.");
 			l->index += len;
 			return;
 		}
 		len += 1;
 		l->index += 1;
 		c = l->source[l->index];
 	}
 	l->index -= len;
 	add_token(l, TOKEN_STRING, len);
 	l->index += len + 1;
 }
 static bool parse_special(lexer *l)
 {
 	switch (l->source[l->index]) {
 	case '+':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_PLUS_EQ, 2);
 			l->index += 2;
 		} else if (l->source[l->index+1] == '+') {
 			add_token(l, TOKEN_PLUS_PLUS, 2);
 			l->index += 2;
 		} else {
 			add_token(l, TOKEN_PLUS, 1);
 			l->index += 1;
 		}
 		return true;
 	case '-':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_MINUS_EQ, 2);
 			l->index += 2;
 		} else if (l->source[l->index+1] == '-') {
 			add_token(l, TOKEN_MINUS_MINUS, 2);
 			l->index += 2;
 		} else if (l->source[l->index+1] == '>') {
 			add_token(l, TOKEN_ARROW, 2);
 			l->index += 2;
 		} else {
 			add_token(l, TOKEN_MINUS, 1);
 			l->index += 1;
 		}
 		return true;
 	case '/':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_SLASH_EQ, 2);
 			l->index += 2;
 		}  else {
 			add_token(l, TOKEN_SLASH, 1);
 			l->index += 1;
 		}
 		return true;
 	case '*':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_STAR_EQ, 2);
 			l->index += 2;
 		}  else {
 			add_token(l, TOKEN_STAR, 1);
 			l->index += 1;
 		}
 		return true;
 	case '%':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_PERC_EQ, 2);
 			l->index += 2;
 		}  else {
 			add_token(l, TOKEN_PERC, 1);
 			l->index += 1;
 		}
 		return true;
 	case '&':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_AND_EQ, 2);
 			l->index += 2;
 		} else if (l->source[l->index+1] == '&') {
 			add_token(l, TOKEN_DOUBLE_AND, 2);
 			l->index += 2;
 		} else {
 			add_token(l, TOKEN_AND, 1);
 			l->index += 1;
 		}
 		return true;
 	case '^':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_HAT_EQ, 2);
 			l->index += 2;
 		}  else {
 			add_token(l, TOKEN_HAT, 1);
 			l->index += 1;
 		}
 		return true;
 	case '|':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_PIPE_EQ, 2);
 			l->index += 2;
 		} else if (l->source[l->index+1] == '|') {
 			add_token(l, TOKEN_OR, 2);
 			l->index += 2;
 		} else {
 			add_token(l, TOKEN_PIPE, 1);
 			l->index += 1;
 		}
 		return true;
 	case '=':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_DOUBLE_EQ, 2);
 			l->index += 2;
 		}  else {
 			add_token(l, TOKEN_EQ, 1);
 			l->index += 1;
 		}
 		return true;
 	case '>':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_GREATER_EQ, 2);
 			l->index += 2;
 		} else if (l->source[l->index+1] == '>') {
 			if (l->source[l->index+2] == '=') {
 				add_token(l, TOKEN_RSHIFT_EQ, 3);
 				l->index += 3;
 				return true;
 			}
 			add_token(l, TOKEN_RSHIFT, 2);
 			l->index += 2;
 		} else {
 			add_token(l, TOKEN_GREATER_THAN, 1);
 			l->index += 1;
 		}
 		return true;
 	case '<':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_LESS_EQ, 2);
 			l->index += 2;
 		} else if (l->source[l->index+1] == '<') {
 			if (l->source[l->index+2] == '=') {
 				add_token(l, TOKEN_LSHIFT_EQ, 3);
 				l->index += 3;
 				return true;
 			}
 			add_token(l, TOKEN_LSHIFT, 2);
 			l->index += 2;
 		} else {
 			add_token(l, TOKEN_LESS_THAN, 1);
 			l->index += 1;
 		}
 		return true;
 	case '!':
 		if (l->source[l->index+1] == '=') {
 			add_token(l, TOKEN_NOT_EQ, 2);
 			l->index += 2;
 		}  else {
 			add_token(l, TOKEN_BANG, 1);
 			l->index += 1;
 		}
 		return true;
 	case ':':
 		add_token(l, TOKEN_COLON, 1);
 		l->index += 1;
 		return true;
 	case ';':
 		add_token(l, TOKEN_SEMICOLON, 1);
 		l->index += 1;
 		return true;
 	case '.':
 		add_token(l, TOKEN_DOT, 1);
 		l->index += 1;
 		return true;
 	case ',':
 		add_token(l, TOKEN_COMMA, 1);
 		l->index += 1;
 		return true;
 	case '(':
 		add_token(l, TOKEN_LPAREN, 1);
 		l->index += 1;
 		return true;
 	case ')':
 		add_token(l, TOKEN_RPAREN, 1);
 		l->index += 1;
 		return true;
 	case '[':
 		add_token(l, TOKEN_LSQUARE, 1);
 		l->index += 1;
 		return true;
 	case ']':
 		add_token(l, TOKEN_RSQUARE, 1);
 		l->index += 1;
 		return true;
 	case '{':
 		add_token(l, TOKEN_LCURLY, 1);
 		l->index += 1;
 		return true;
 	case '}':
 		add_token(l, TOKEN_RCURLY, 1);
 		l->index += 1;
 		return true;
 	case '\'':
 		if (l->source[l->index+1] == '\\') {
 			if (l->source[l->index+3] != '\'') {
 				add_error(l, "unclosed character literal.");
 				l->index += 1;
 				return true;
 			}
 			l->index += 1;
 			add_token(l, TOKEN_CHAR, 2);
 			l->index += 3;
 			return true;
 		} else {
 			if (l->source[l->index+2] != '\'') {
 				add_error(l, "unclosed character literal.");
 				l->index += 1;
 				return true;
 			}
 			l->index += 1;
 			add_token(l, TOKEN_CHAR, 1);
 			l->index += 2;
 			return true;
 		}
 	default:
 		return false;
 	}
 }
 static void parse(lexer *l)
 {
 	char c;
 	while (l->index <= l->size) {
 		c = l->source[l->index];
 		l->column += 1;
 		if (c == '\n') {
 			l->index += 1;
 			l->row += 1;
 			l->column = 0;
 			continue;
 		}
 		usize head = l->index;
 		if (c == '/' && l->source[l->index+1] == '/') {
 			while (l->source[l->index] != '\n') {
 				l->index += 1;
 			}
 			l->column += (l->index - head - 1);
 		} else if (c == '/' && l->source[l->index+1] == '*') {
 			l->index += 2;
 			while (l->index < l->size) {
 				if (l->source[l->index] == '*' && l->source[l->index+1] == '/') {
 					l->index += 2;
 					break;
 				}
 				if (l->source[l->index] == '\n') {
 					l->row += 1;
 					l->column = 0;
 				}
 				l->index += 1;
 			}
 			l->column += (l->index - head - 1);
 			continue;
 		}
 		if (isspace(c)) {
 			l->index += 1;
 			continue;
 		}
 		if (parse_special(l)) {
 			l->column += (l->index - head - 1);
 			continue;
 		}
 		if (isdigit(c)) {
 			parse_number(l);
 			l->column += (l->index - head - 1);
 			continue;
 		}
 		if (isalpha(c)) {
 			parse_identifier(l);
 			l->column += (l->index - head - 1);
 			continue;
 		}
 		if (c == '"') {
 			l->index += 1;
 			parse_string(l);
 			l->column += (l->index - head - 1);
 			continue;
 		}
 		l->index += 1;
 	}
 }
 lexer *lexer_init(char *source, usize size)
 {
 	lexer *lex = malloc(sizeof(lexer));
 	lex->column = 1;
 	lex->row = 1;
 	lex->index = 0;
 	lex->size = size;
 	lex->tokens = 0;
 	lex->tail = 0;
 	lex->source = source;
 	parse(lex);
 	return lex;
 }
--- a/lexer.h
+++ b/lexer.h
@ -0,0 +1,108 @@
 #ifndef LEXER_H
 #define LEXER_H
 #include "utils.h"
 typedef enum {
 	TOKEN_ERROR,
 	TOKEN_END,
 	TOKEN_PLUS, // +
 	TOKEN_PLUS_PLUS, // ++
 	TOKEN_MINUS, // -
 	TOKEN_MINUS_MINUS, // --
 	TOKEN_SLASH, // /
 	TOKEN_PERC, // %
 	TOKEN_STAR, // *
 	TOKEN_AND, // &
 	TOKEN_HAT, // ^
 	TOKEN_PIPE, // |
 	TOKEN_LSHIFT, // <<
 	TOKEN_RSHIFT, // >>
 	TOKEN_DOUBLE_EQ, // ==
 	TOKEN_EQ, // =
 	TOKEN_LESS_THAN, // <
 	TOKEN_GREATER_THAN, // >
 	TOKEN_LESS_EQ, // <=
 	TOKEN_GREATER_EQ, // >=
 	TOKEN_NOT_EQ, // !=
 	TOKEN_PLUS_EQ, // +=
 	TOKEN_MINUS_EQ, // -=
 	TOKEN_STAR_EQ, // *=
 	TOKEN_SLASH_EQ, // /=
 	TOKEN_AND_EQ, // &=
 	TOKEN_HAT_EQ, // ^=
 	TOKEN_PIPE_EQ, // |=
 	TOKEN_PERC_EQ, // %=
 	TOKEN_LSHIFT_EQ, // <<=
 	TOKEN_RSHIFT_EQ, // >>=
 	TOKEN_OR, // ||
 	TOKEN_DOUBLE_AND, // &&
 	TOKEN_ARROW, // ->
 	TOKEN_COLON, // :
 	TOKEN_SEMICOLON, // ;
 	TOKEN_DOT, // .
 	TOKEN_BANG, // !
 	TOKEN_COMMA, // ,
 	TOKEN_LPAREN, // (
 	TOKEN_RPAREN, // )
 	TOKEN_LSQUARE, // [
 	TOKEN_RSQUARE, // ]
 	TOKEN_LCURLY, // {
 	TOKEN_RCURLY, // }
 	TOKEN_INTEGER,
 	TOKEN_FLOAT,
 	TOKEN_IDENTIFIER,
 	TOKEN_STRING,
 	TOKEN_CHAR,
 	TOKEN_AUTO,
 	TOKEN_BREAK,
 	TOKEN_CASE,
 	TOKEN_CONTINUE,
 	TOKEN_DEFAULT,
 	TOKEN_DO,
 	TOKEN_ELSE,
 	TOKEN_ENUM,
 	TOKEN_EXTERN,
 	TOKEN_FOR,
 	TOKEN_GOTO,
 	TOKEN_IF,
 	TOKEN_REGISTER,
 	TOKEN_RETURN,
 	TOKEN_SIZEOF,
 	TOKEN_STATIC,
 	TOKEN_STRUCT,
 	TOKEN_SWITCH,
 	TOKEN_TYPEDEF,
 	TOKEN_UNION,
 	TOKEN_WHILE,
 	TOKEN_CONST,
 	TOKEN_VOLATILE,
 	TOKEN_TYPE_VOID,
 	TOKEN_TYPE_CHAR,
 	TOKEN_TYPE_SHORT,
 	TOKEN_TYPE_INT,
 	TOKEN_TYPE_LONG,
 	TOKEN_TYPE_FLOAT,
 	TOKEN_TYPE_DOUBLE,
 	TOKEN_TYPE_SIGNED,
 	TOKEN_TYPE_UNSIGNED
 } token_type;
 typedef struct _token {
 	token_type type;
 	source_pos position;
 	char *lexeme;
 	usize lexeme_len;
 	struct _token *next;
 } token;
 typedef struct {
 	usize column, row, index, size;
 	char *source;
 	token *tokens;
 	token *tail;
 } lexer;
 lexer *lexer_init(char *source, usize size);
 #endif
--- a/parser.c
+++ b/parser.c
--- a/parser.h
+++ b/parser.h
@ -0,0 +1,4 @@
 #ifndef PARSER_H
 #define PARSER_H
 #endif
--- a/stb_ds.h
+++ b/stb_ds.h
--- a/utils.c
+++ b/utils.c
@ -0,0 +1,66 @@
 #include "utils.h"
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <stdbool.h>
 i64 parse_int(char *s, usize len)
 {
 	bool negative = false;
 	if (*s == '-') {
 		s += 1;
 		len -= 1;
 		negative = true;
 	}
 	u64 int_part = 0;
 	for (usize i=0; i < len; i++) {
 		int_part = (int_part * 10) + (s[i] - '0');
 	}
 	if (negative) {
 		int_part *= -1;
 	}
 	return int_part;
 }
 f64 parse_float(char *s, usize len)
 {
 	bool negative = false;
 	if (*s == '-') {
 		s += 1;
 		len -= 1;
 		negative = true;
 	}
 	usize point_pos = 0;
 	for (usize i=0; i < len; i++) {
 		if (s[i] == '.') {
 			point_pos = i;
 			break;
 		}
 	}
 	i64 int_part = parse_int(s, point_pos);
 	i64 dec_part = parse_int(s+point_pos+1, len-point_pos-1);
 	for (usize i=0; i < len-point_pos-1; i++) {
 		int_part *= 10;
 	}
 	int_part += dec_part;
 	f64 f = (f64) int_part;
 	point_pos += 1;
 	for (usize i=0; i < len - point_pos; i++) {
 		f /= 10.0;
 	}
 	if (negative) {
 		f *= -1;
 	}
 	return f;
 }
--- a/utils.h
+++ b/utils.h
@ -0,0 +1,30 @@
 #ifndef UTILS_H
 #define UTILS_H
 #include <stdint.h>
 #include <stdint.h>
 #include <stddef.h>
 typedef uint8_t u8;
 typedef uint16_t u16;
 typedef uint32_t u32;
 typedef uint64_t u64;
 typedef int8_t i8;
 typedef int16_t i16;
 typedef int32_t i32;
 typedef int64_t i64;
 typedef size_t usize;
 typedef float f32;
 typedef double f64;
 i64 parse_int(char *s, usize len);
 f64 parse_float(char *s, usize len);
 typedef struct {
 	usize row, column;
 } source_pos;
 #endif
--- a/xcc.c
+++ b/xcc.c
@ -0,0 +1,8 @@
 #include "lexer.h"
 #include <string.h>
 int main(void)
 {
 	char *source = "int main(void)\n{\n1+1;\n}\n";
 	lexer *l = lexer_init(source, strlen(source));
 }