feat: first commit

This commit is contained in:
Lorenzo Torres 2026-01-25 10:20:10 +01:00
commit 701734097e
13 changed files with 2655 additions and 0 deletions

6
.gitignore vendored Normal file
View file

@ -0,0 +1,6 @@
**/*.o
**/*~
xcc
**/*.swp
**/*.pdf
**/*.dot

24
LICENSE Normal file
View file

@ -0,0 +1,24 @@
Copyright (c) 2025, Lorenzo Torres
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the <organization> nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY <COPYRIGHT HOLDER> ''AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

53
Makefile Normal file
View file

@ -0,0 +1,53 @@
# xcc - eXtended C compiler
# See LICENSE file for copyright and license details.
include config.mk
SRC = xcc.c utils.c lexer.c
HDR = config.def.h utils.h lexer.h
OBJ = ${SRC:.c=.o}
all: options xcc
options:
@echo xcc build options:
@echo "CFLAGS = ${CFLAGS}"
@echo "LDFLAGS = ${LDFLAGS}"
@echo "CC = ${CC}"
.c.o:
${CC} -c ${CFLAGS} $<
${OBJ}: config.mk
xcc: ${OBJ}
${CC} -o $@ ${OBJ} ${LDFLAGS}
clean:
rm -f xcc ${OBJ} xcc-${VERSION}.tar.gz
dist: clean
mkdir -p xcc-${VERSION}
cp -R LICENSE Makefile README config.mk\
xcc.1 ${HDR} ${SRC} xcc-${VERSION}
tar -cf xcc-${VERSION}.tar xcc-${VERSION}
gzip xcc-${VERSION}.tar
rm -rf xcc-${VERSION}
install: all
mkdir -p ${DESTDIR}${PREFIX}/bin
cp -f xcc ${DESTDIR}${PREFIX}/bin
chmod 755 ${DESTDIR}${PREFIX}/bin/xcc
mkdir -p ${DESTDIR}${MANPREFIX}/man1
sed "s/VERSION/${VERSION}/g" < xcc.1 > ${DESTDIR}${MANPREFIX}/man1/xcc.1
chmod 644 ${DESTDIR}${MANPREFIX}/man1/xcc.1
uninstall:
rm -f ${DESTDIR}${PREFIX}/bin/xcc\
${DESTDIR}${MANPREFIX}/man1/xcc.1
graph: clean all
./xcc > graph.dot
dot -Tpdf graph.dot > graph.pdf
zathura ./graph.pdf
.PHONY: all options clean dist install uninstall

24
README Normal file
View file

@ -0,0 +1,24 @@
lc - L compiler
============================
lc is a L compiler. It can compile L code.
Requirements
------------
In order to build lc you need... a computer
Installation
------------
Edit config.mk to match your local setup (lc is installed into
the /usr/local namespace by default).
Afterwards enter the following command to build and install lc (if
necessary as root):
make clean install
Usage
-----------
lc file

27
config.mk Normal file
View file

@ -0,0 +1,27 @@
# cc version
VERSION = 0.1
# Customize below to fit your system
# paths
PREFIX = /usr
MANPREFIX = ${PREFIX}/share/man
# OpenBSD (uncomment)
#MANPREFIX = ${PREFIX}/man
# includes and libs
INCS = -I.
LIBS =
# flags
CPPFLAGS = -DVERSION=\"${VERSION}\"
CFLAGS := -std=c11 -pedantic -Wall -O0 ${INCS} ${CPPFLAGS}
CFLAGS := ${CFLAGS} -g
LDFLAGS = ${LIBS}
# Solaris
#CFLAGS = -fast ${INCS} -DVERSION=\"${VERSION}\"
#LDFLAGS = ${LIBS}
# compiler and linker
CC = cc

410
lexer.c Normal file
View file

@ -0,0 +1,410 @@
#include "lexer.h"
#include <stdbool.h>
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
static void add_token(lexer *l, token_type type, usize len)
{
token *t = malloc(sizeof(token));
t->type = type;
t->lexeme_len = len;
t->lexeme = l->source + l->index;
t->position.row = l->row;
t->position.column = l->column;
if (!l->tokens) {
l->tokens = t;
l->tail = t;
} else {
l->tail->next = t;
l->tail = t;
}
}
static void add_error(lexer *l, char *msg)
{
token *t = malloc(sizeof(token));
t->type = TOKEN_ERROR;
t->lexeme_len = strlen(msg);
t->lexeme = msg;
t->position.row = l->row;
t->position.column = l->column;
if (!l->tokens) {
l->tokens = t;
l->tail = t;
} else {
l->tail->next = t;
l->tail = t;
}
}
static void parse_number(lexer *l)
{
char c = l->source[l->index];
/* Is the number a float? */
bool f = false;
usize len = 0;
while (isdigit(c)) {
/* If a dot is found, and the character after it is a digit, this is a float. */
if (l->source[l->index+1] == '.' && isdigit(l->source[l->index+2])) {
f = true;
len += 3;
l->index += 3;
} else {
len += 1;
l->index += 1;
}
c = l->source[l->index];
}
l->index -= len;
if (f) {
add_token(l, TOKEN_FLOAT, len);
} else {
add_token(l, TOKEN_INTEGER, len);
}
l->index += len;
}
static void parse_identifier(lexer *l)
{
char c = l->source[l->index];
usize len = 0;
while (isalnum(c) || c == '_') {
len += 1;
l->index += 1;
c = l->source[l->index];
}
l->index -= len;
//token_type keyword = trie_get(keywords, l->source + l->index, len);
if (0) {
//add_token(l, keyword, len);
} else {
add_token(l, TOKEN_IDENTIFIER, len);
}
l->index += len;
}
static void parse_string(lexer *l)
{
char c = l->source[l->index];
usize len = 0;
while (c != '"') {
if (c == '\0' || c == '\n') {
l->index -= len;
add_error(l, "unclosed string literal.");
l->index += len;
return;
}
len += 1;
l->index += 1;
c = l->source[l->index];
}
l->index -= len;
add_token(l, TOKEN_STRING, len);
l->index += len + 1;
}
static bool parse_special(lexer *l)
{
switch (l->source[l->index]) {
case '+':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_PLUS_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '+') {
add_token(l, TOKEN_PLUS_PLUS, 2);
l->index += 2;
} else {
add_token(l, TOKEN_PLUS, 1);
l->index += 1;
}
return true;
case '-':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_MINUS_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '-') {
add_token(l, TOKEN_MINUS_MINUS, 2);
l->index += 2;
} else if (l->source[l->index+1] == '>') {
add_token(l, TOKEN_ARROW, 2);
l->index += 2;
} else {
add_token(l, TOKEN_MINUS, 1);
l->index += 1;
}
return true;
case '/':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_SLASH_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_SLASH, 1);
l->index += 1;
}
return true;
case '*':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_STAR_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_STAR, 1);
l->index += 1;
}
return true;
case '%':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_PERC_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_PERC, 1);
l->index += 1;
}
return true;
case '&':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_AND_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '&') {
add_token(l, TOKEN_DOUBLE_AND, 2);
l->index += 2;
} else {
add_token(l, TOKEN_AND, 1);
l->index += 1;
}
return true;
case '^':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_HAT_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_HAT, 1);
l->index += 1;
}
return true;
case '|':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_PIPE_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '|') {
add_token(l, TOKEN_OR, 2);
l->index += 2;
} else {
add_token(l, TOKEN_PIPE, 1);
l->index += 1;
}
return true;
case '=':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_DOUBLE_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_EQ, 1);
l->index += 1;
}
return true;
case '>':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_GREATER_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '>') {
if (l->source[l->index+2] == '=') {
add_token(l, TOKEN_RSHIFT_EQ, 3);
l->index += 3;
return true;
}
add_token(l, TOKEN_RSHIFT, 2);
l->index += 2;
} else {
add_token(l, TOKEN_GREATER_THAN, 1);
l->index += 1;
}
return true;
case '<':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_LESS_EQ, 2);
l->index += 2;
} else if (l->source[l->index+1] == '<') {
if (l->source[l->index+2] == '=') {
add_token(l, TOKEN_LSHIFT_EQ, 3);
l->index += 3;
return true;
}
add_token(l, TOKEN_LSHIFT, 2);
l->index += 2;
} else {
add_token(l, TOKEN_LESS_THAN, 1);
l->index += 1;
}
return true;
case '!':
if (l->source[l->index+1] == '=') {
add_token(l, TOKEN_NOT_EQ, 2);
l->index += 2;
} else {
add_token(l, TOKEN_BANG, 1);
l->index += 1;
}
return true;
case ':':
add_token(l, TOKEN_COLON, 1);
l->index += 1;
return true;
case ';':
add_token(l, TOKEN_SEMICOLON, 1);
l->index += 1;
return true;
case '.':
add_token(l, TOKEN_DOT, 1);
l->index += 1;
return true;
case ',':
add_token(l, TOKEN_COMMA, 1);
l->index += 1;
return true;
case '(':
add_token(l, TOKEN_LPAREN, 1);
l->index += 1;
return true;
case ')':
add_token(l, TOKEN_RPAREN, 1);
l->index += 1;
return true;
case '[':
add_token(l, TOKEN_LSQUARE, 1);
l->index += 1;
return true;
case ']':
add_token(l, TOKEN_RSQUARE, 1);
l->index += 1;
return true;
case '{':
add_token(l, TOKEN_LCURLY, 1);
l->index += 1;
return true;
case '}':
add_token(l, TOKEN_RCURLY, 1);
l->index += 1;
return true;
case '\'':
if (l->source[l->index+1] == '\\') {
if (l->source[l->index+3] != '\'') {
add_error(l, "unclosed character literal.");
l->index += 1;
return true;
}
l->index += 1;
add_token(l, TOKEN_CHAR, 2);
l->index += 3;
return true;
} else {
if (l->source[l->index+2] != '\'') {
add_error(l, "unclosed character literal.");
l->index += 1;
return true;
}
l->index += 1;
add_token(l, TOKEN_CHAR, 1);
l->index += 2;
return true;
}
default:
return false;
}
}
static void parse(lexer *l)
{
char c;
while (l->index <= l->size) {
c = l->source[l->index];
l->column += 1;
if (c == '\n') {
l->index += 1;
l->row += 1;
l->column = 0;
continue;
}
usize head = l->index;
if (c == '/' && l->source[l->index+1] == '/') {
while (l->source[l->index] != '\n') {
l->index += 1;
}
l->column += (l->index - head - 1);
} else if (c == '/' && l->source[l->index+1] == '*') {
l->index += 2;
while (l->index < l->size) {
if (l->source[l->index] == '*' && l->source[l->index+1] == '/') {
l->index += 2;
break;
}
if (l->source[l->index] == '\n') {
l->row += 1;
l->column = 0;
}
l->index += 1;
}
l->column += (l->index - head - 1);
continue;
}
if (isspace(c)) {
l->index += 1;
continue;
}
if (parse_special(l)) {
l->column += (l->index - head - 1);
continue;
}
if (isdigit(c)) {
parse_number(l);
l->column += (l->index - head - 1);
continue;
}
if (isalpha(c)) {
parse_identifier(l);
l->column += (l->index - head - 1);
continue;
}
if (c == '"') {
l->index += 1;
parse_string(l);
l->column += (l->index - head - 1);
continue;
}
l->index += 1;
}
}
lexer *lexer_init(char *source, usize size)
{
lexer *lex = malloc(sizeof(lexer));
lex->column = 1;
lex->row = 1;
lex->index = 0;
lex->size = size;
lex->tokens = 0;
lex->tail = 0;
lex->source = source;
parse(lex);
return lex;
}

108
lexer.h Normal file
View file

@ -0,0 +1,108 @@
#ifndef LEXER_H
#define LEXER_H
#include "utils.h"
typedef enum {
TOKEN_ERROR,
TOKEN_END,
TOKEN_PLUS, // +
TOKEN_PLUS_PLUS, // ++
TOKEN_MINUS, // -
TOKEN_MINUS_MINUS, // --
TOKEN_SLASH, // /
TOKEN_PERC, // %
TOKEN_STAR, // *
TOKEN_AND, // &
TOKEN_HAT, // ^
TOKEN_PIPE, // |
TOKEN_LSHIFT, // <<
TOKEN_RSHIFT, // >>
TOKEN_DOUBLE_EQ, // ==
TOKEN_EQ, // =
TOKEN_LESS_THAN, // <
TOKEN_GREATER_THAN, // >
TOKEN_LESS_EQ, // <=
TOKEN_GREATER_EQ, // >=
TOKEN_NOT_EQ, // !=
TOKEN_PLUS_EQ, // +=
TOKEN_MINUS_EQ, // -=
TOKEN_STAR_EQ, // *=
TOKEN_SLASH_EQ, // /=
TOKEN_AND_EQ, // &=
TOKEN_HAT_EQ, // ^=
TOKEN_PIPE_EQ, // |=
TOKEN_PERC_EQ, // %=
TOKEN_LSHIFT_EQ, // <<=
TOKEN_RSHIFT_EQ, // >>=
TOKEN_OR, // ||
TOKEN_DOUBLE_AND, // &&
TOKEN_ARROW, // ->
TOKEN_COLON, // :
TOKEN_SEMICOLON, // ;
TOKEN_DOT, // .
TOKEN_BANG, // !
TOKEN_COMMA, // ,
TOKEN_LPAREN, // (
TOKEN_RPAREN, // )
TOKEN_LSQUARE, // [
TOKEN_RSQUARE, // ]
TOKEN_LCURLY, // {
TOKEN_RCURLY, // }
TOKEN_INTEGER,
TOKEN_FLOAT,
TOKEN_IDENTIFIER,
TOKEN_STRING,
TOKEN_CHAR,
TOKEN_AUTO,
TOKEN_BREAK,
TOKEN_CASE,
TOKEN_CONTINUE,
TOKEN_DEFAULT,
TOKEN_DO,
TOKEN_ELSE,
TOKEN_ENUM,
TOKEN_EXTERN,
TOKEN_FOR,
TOKEN_GOTO,
TOKEN_IF,
TOKEN_REGISTER,
TOKEN_RETURN,
TOKEN_SIZEOF,
TOKEN_STATIC,
TOKEN_STRUCT,
TOKEN_SWITCH,
TOKEN_TYPEDEF,
TOKEN_UNION,
TOKEN_WHILE,
TOKEN_CONST,
TOKEN_VOLATILE,
TOKEN_TYPE_VOID,
TOKEN_TYPE_CHAR,
TOKEN_TYPE_SHORT,
TOKEN_TYPE_INT,
TOKEN_TYPE_LONG,
TOKEN_TYPE_FLOAT,
TOKEN_TYPE_DOUBLE,
TOKEN_TYPE_SIGNED,
TOKEN_TYPE_UNSIGNED
} token_type;
typedef struct _token {
token_type type;
source_pos position;
char *lexeme;
usize lexeme_len;
struct _token *next;
} token;
typedef struct {
usize column, row, index, size;
char *source;
token *tokens;
token *tail;
} lexer;
lexer *lexer_init(char *source, usize size);
#endif

0
parser.c Normal file
View file

4
parser.h Normal file
View file

@ -0,0 +1,4 @@
#ifndef PARSER_H
#define PARSER_H
#endif

1895
stb_ds.h Normal file

File diff suppressed because it is too large Load diff

66
utils.c Normal file
View file

@ -0,0 +1,66 @@
#include "utils.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <stdbool.h>
i64 parse_int(char *s, usize len)
{
bool negative = false;
if (*s == '-') {
s += 1;
len -= 1;
negative = true;
}
u64 int_part = 0;
for (usize i=0; i < len; i++) {
int_part = (int_part * 10) + (s[i] - '0');
}
if (negative) {
int_part *= -1;
}
return int_part;
}
f64 parse_float(char *s, usize len)
{
bool negative = false;
if (*s == '-') {
s += 1;
len -= 1;
negative = true;
}
usize point_pos = 0;
for (usize i=0; i < len; i++) {
if (s[i] == '.') {
point_pos = i;
break;
}
}
i64 int_part = parse_int(s, point_pos);
i64 dec_part = parse_int(s+point_pos+1, len-point_pos-1);
for (usize i=0; i < len-point_pos-1; i++) {
int_part *= 10;
}
int_part += dec_part;
f64 f = (f64) int_part;
point_pos += 1;
for (usize i=0; i < len - point_pos; i++) {
f /= 10.0;
}
if (negative) {
f *= -1;
}
return f;
}

30
utils.h Normal file
View file

@ -0,0 +1,30 @@
#ifndef UTILS_H
#define UTILS_H
#include <stdint.h>
#include <stdint.h>
#include <stddef.h>
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef int8_t i8;
typedef int16_t i16;
typedef int32_t i32;
typedef int64_t i64;
typedef size_t usize;
typedef float f32;
typedef double f64;
i64 parse_int(char *s, usize len);
f64 parse_float(char *s, usize len);
typedef struct {
usize row, column;
} source_pos;
#endif

8
xcc.c Normal file
View file

@ -0,0 +1,8 @@
#include "lexer.h"
#include <string.h>
int main(void)
{
char *source = "int main(void)\n{\n1+1;\n}\n";
lexer *l = lexer_init(source, strlen(source));
}