#include "parser.h" #include #include #include #include bool has_errors = false; ast_node *parse_expression(parser *p); static ast_node *parse_statement(parser *p); static ast_node *parse_type(parser *p); /* Consume a token in the list. */ static void advance(parser *p) { p->previous = p->tokens; if (p->tokens) p->tokens = p->tokens->next; } /* Get the current token in the list, without consuming */ static token *peek(parser *p) { return p->tokens; } /* * Check if the current token type is the same as `type`, * without consuming it. */ static bool match_peek(parser *p, token_type type) { if (p->tokens) { return p->tokens->type == type; } else { return false; } } /* Same as `match_peek()` but it consumes the token. */ static bool match(parser *p, token_type type) { if (p->tokens) { if (p->tokens->type == type) { advance(p); return true; } } return false; } /* * When an error is encountered, try to find a * token that could define a part of the code * which doesn't depend on the one giving the * error. This is needed to print multiple errors * instead of just failing at the first one. */ static void parser_sync(parser *p) { advance(p); while (p->tokens) { if (p->previous->type == TOKEN_SEMICOLON || p->previous->type == TOKEN_RCURLY) { return; } switch (p->tokens->type) { case TOKEN_STRUCT: case TOKEN_ENUM: case TOKEN_IF: case TOKEN_LOOP: case TOKEN_DO: case TOKEN_RETURN: case TOKEN_SWITCH: return; default: advance(p); } } } /* Print the error message and sync the parser. */ static void error(parser *p, char *msg) { printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:%ld:%ld:\x1b[0m %s\n", p->previous->position.row, p->previous->position.column, msg); has_errors = true; parser_sync(p); } static ast_node *parse_call(parser *p) { ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_CALL; node->expr.call.name = peek(p)->lexeme; node->expr.call.name_len = peek(p)->lexeme_len; advance(p); /* Skip also the opening `(` */ advance(p); /* Call without parameters */ if (match(p, TOKEN_RPAREN)) { node->expr.call.parameters = NULL; return node; } snapshot arena_start = arena_snapshot(p->allocator); node->expr.call.parameters = arena_alloc(p->allocator, sizeof(ast_node)); node->expr.call.parameters->type = NODE_UNIT; node->expr.call.parameters->expr.unit_node.expr = parse_expression(p); ast_node *tail = node->expr.call.parameters; node->expr.call.param_len = 1; /* In this case, there is only one parameter */ if (match(p, TOKEN_RPAREN)) { return node; } if (match(p, TOKEN_COMMA)) { ast_node *expr = parse_expression(p); if (expr) { while (!match(p, TOKEN_RPAREN)) { if (!match(p, TOKEN_COMMA)) { error(p, "expected `)`."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); tail->expr.unit_node.next->expr.unit_node.expr = expr; tail = tail->expr.unit_node.next; tail->type = NODE_UNIT; expr = parse_expression(p); if (!expr) { error(p, "expected `)`."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } node->expr.call.param_len += 1; } tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); tail->expr.unit_node.next->expr.unit_node.expr = expr; tail = tail->expr.unit_node.next; tail->type = NODE_UNIT; } else { error(p, "expected expression."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } } else { error(p, "expected `)`."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } return node; } /* Parse expressions with the highest precedence. */ static ast_node *parse_factor(parser *p) { token *t = peek(p); if (match(p, TOKEN_INTEGER)) { ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_INTEGER; node->expr.integer = parse_int(t->lexeme, t->lexeme_len); if (match(p, TOKEN_DOUBLE_DOT)) { ast_node *range = arena_alloc(p->allocator, sizeof(ast_node)); range->type = NODE_RANGE; range->expr.binary.left = node; range->expr.binary.operator = OP_PLUS; snapshot snap = arena_snapshot(p->allocator); ast_node *end = parse_factor(p); if (!end) { range->expr.binary.right = NULL; } else if (end->type != NODE_INTEGER) { arena_reset_to_snapshot(p->allocator, snap); error(p, "expected integer."); return NULL; } else { range->expr.binary.right = end; } return range; } return node; } else if (match(p, TOKEN_FLOAT)) { ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_FLOAT; node->expr.flt = parse_float(t->lexeme, t->lexeme_len); return node; } else if (match_peek(p, TOKEN_IDENTIFIER)) { /* If a `(` is found after an identifier, it should be a call. */ if (p->tokens->next && p->tokens->next->type == TOKEN_LPAREN) { return parse_call(p); } advance(p); ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_IDENTIFIER; node->expr.string.start = t->lexeme; node->expr.string.len = t->lexeme_len; return node; } else if (match(p, TOKEN_STRING)) { ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_STRING; node->expr.string.start = t->lexeme; node->expr.string.len = t->lexeme_len; return node; } else if (match(p, TOKEN_CHAR)) { ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_CHAR; if (t->lexeme_len == 2) { char c; switch (t->lexeme[1]) { case 'n': c = '\n'; break; case 't': c = '\t'; break; case 'r': c = '\r'; break; case '0': c = '\0'; break; case '\\': c = '\\'; break; case '\'': c = '\''; break; default: error(p, "invalid escape code."); return NULL; } node->expr.ch = c; } else { node->expr.ch = *(t->lexeme); } return node; } else if (match(p, TOKEN_LPAREN)) { ast_node *node = parse_expression(p); if (!match(p, TOKEN_RPAREN)) { error(p, "unclosed parenthesis"); return NULL; } return node; } return NULL; } ast_node *parse_unary(parser *p) { if (match(p, TOKEN_PLUS_PLUS) || match(p, TOKEN_MINUS) || match(p, TOKEN_MINUS_MINUS) || match(p, TOKEN_STAR) || match(p, TOKEN_AND) || match(p, TOKEN_BANG)) { unary_op op; switch (p->previous->type) { case TOKEN_PLUS_PLUS: op = UOP_INCR; break; case TOKEN_MINUS: op = UOP_MINUS; break; case TOKEN_MINUS_MINUS: op = UOP_DECR; break; case TOKEN_STAR: op = UOP_DEREF; break; case TOKEN_AND: op = UOP_REF; break; case TOKEN_BANG: op = UOP_NOT; break; default: goto end; } ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_UNARY; node->expr.unary.operator = op; node->expr.unary.right = parse_expression(p); return node; } /* Type cast. */ if (match_peek(p, TOKEN_LPAREN) && p->tokens->next && p->tokens->next->type == TOKEN_IDENTIFIER && p->tokens->next->next && p->tokens->next->next->type == TOKEN_RPAREN) { advance(p); ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_CAST; node->expr.cast.type = parse_type(p); advance(p); advance(p); node->expr.cast.value = parse_expression(p); return node; } end: return parse_factor(p); } ast_node *parse_term(parser *p) { ast_node *left = parse_unary(p); while (match_peek(p, TOKEN_STAR) || match_peek(p, TOKEN_SLASH)) { binary_op op = peek(p)->type == TOKEN_STAR ? OP_MUL : OP_DIV; advance(p); ast_node *right = parse_factor(p); ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_BINARY; node->expr.binary.left = left; node->expr.binary.right = right; node->expr.binary.operator = op; left = node; } return left; } /* * Following the recursive descent parser algorithm, this * parses all the arithmetic expressions. */ ast_node *parse_expression(parser *p) { ast_node *left = parse_term(p); while (match_peek(p, TOKEN_PLUS) || match_peek(p, TOKEN_MINUS)) { binary_op op = peek(p)->type == TOKEN_PLUS ? OP_PLUS : OP_MINUS; advance(p); ast_node *right = parse_term(p); ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_BINARY; node->expr.binary.left = left; node->expr.binary.right = right; node->expr.binary.operator = op; left = node; } /* * If after parsing an expression a `[` character * is found, it should be an array subscript expression. */ if (match(p, TOKEN_LSQUARE)) { ast_node *index = parse_expression(p); ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_ARRAY_SUBSCRIPT; node->expr.subscript.expr = left; node->expr.subscript.index = index; if (!match(p, TOKEN_RSQUARE)) { error(p, "expected `]`."); return NULL; } return node; } /* * If after parsing an expression a `.` character * is found, it should be a member access expression. */ if (match(p, TOKEN_DOT)) { if (!match_peek(p, TOKEN_IDENTIFIER)) { error(p, "expected identifier after member access."); return NULL; } ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_ACCESS; node->expr.access.expr = left; node->expr.access.member = parse_expression(p); return node; } /* * If after parsing an expression a `++` or a `--` * token is found, it should be a postfix expression. */ if (match(p, TOKEN_PLUS_PLUS) || match(p, TOKEN_MINUS_MINUS)) { unary_op op; switch (p->previous->type) { case TOKEN_PLUS_PLUS: op = UOP_INCR; break; case TOKEN_MINUS_MINUS: op = UOP_DECR; break; default: break; } ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_POSTFIX; node->expr.unary.operator = op; node->expr.unary.right = left; return node; } if (match(p, TOKEN_DOT)) { if (match(p, TOKEN_LCURLY)) { ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_STRUCT_INIT; if (match(p, TOKEN_RCURLY)) { node->expr.struct_init.members = NULL; return node; } snapshot arena_start = arena_snapshot(p->allocator); node->expr.struct_init.members = arena_alloc(p->allocator, sizeof(ast_node)); node->expr.struct_init.members->type = NODE_UNIT; node->expr.struct_init.members->expr.unit_node.expr = parse_expression(p); ast_node *tail = node->expr.struct_init.members; node->expr.struct_init.members_len = 1; /* In this case, there is only one parameter */ if (match(p, TOKEN_RCURLY)) { return node; } if (match(p, TOKEN_COMMA)) { ast_node *expr = parse_expression(p); if (expr) { while (!match(p, TOKEN_RCURLY)) { if (!match(p, TOKEN_COMMA)) { error(p, "expected `}`."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); tail->expr.unit_node.next->expr.unit_node.expr = expr; tail = tail->expr.unit_node.next; tail->type = NODE_UNIT; expr = parse_expression(p); if (!expr) { error(p, "expected `}`."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } node->expr.struct_init.members_len += 1; } tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); tail->expr.unit_node.next->expr.unit_node.expr = expr; tail = tail->expr.unit_node.next; tail->type = NODE_UNIT; } else { error(p, "expected member initialization."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } } else { error(p, "expected `}`."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } return node; } else { error(p, "unexpected `.`"); return NULL; } } if (p->tokens && ((p->tokens->type >= TOKEN_DOUBLE_EQ && p->tokens->type <= TOKEN_NOT_EQ) || (p->tokens->type >= TOKEN_LSHIFT_EQ && p->tokens->type <= TOKEN_DOUBLE_AND))) { binary_op op; switch (p->tokens->type) { case TOKEN_EQ: op = OP_ASSIGN; break; case TOKEN_DOUBLE_EQ: op = OP_EQ; break; case TOKEN_LESS_THAN: op = OP_LT; break; case TOKEN_GREATER_THAN: op = OP_GT; break; case TOKEN_LESS_EQ: op = OP_LE; break; case TOKEN_GREATER_EQ: op = OP_GE; break; case TOKEN_NOT_EQ: op = OP_NEQ; break; case TOKEN_LSHIFT_EQ: op = OP_LSHIFT_EQ; break; case TOKEN_RSHIFT_EQ: op = OP_RSHIFT_EQ; break; case TOKEN_OR: op = OP_OR; break; case TOKEN_DOUBLE_AND: op = OP_AND; break; default: break; } advance(p); ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_BINARY; node->expr.binary.left = left; node->expr.binary.operator = op; node->expr.binary.right = parse_expression(p); if (!node->expr.binary.right) { error(p, "expected expression."); return NULL; } return node; } return left; } static ast_node *parse_compound(parser *p) { if (!match(p, TOKEN_LCURLY)) { error(p, "expected `{`."); return NULL; } ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_UNIT; if (match(p, TOKEN_RCURLY)) { node->expr.unit_node.expr = NULL; node->expr.unit_node.next = NULL; return node; } snapshot arena_start = arena_snapshot(p->allocator); node->expr.unit_node.expr = parse_statement(p); ast_node *tail = node; /* In this case, there is only one parameter */ if (match(p, TOKEN_RCURLY)) { return node; } ast_node *expr = parse_statement(p); if (expr) { while (!match(p, TOKEN_RCURLY)) { tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); tail->expr.unit_node.next->expr.unit_node.expr = expr; tail = tail->expr.unit_node.next; tail->type = NODE_UNIT; expr = parse_statement(p); if (!expr) { error(p, "expected `}`."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } } tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); tail->expr.unit_node.next->expr.unit_node.expr = expr; tail = tail->expr.unit_node.next; tail->type = NODE_UNIT; } return node; } static ast_node *parse_for(parser *p) { advance(p); ast_node* node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_FOR; snapshot arena_start = arena_snapshot(p->allocator); node->expr.fr.slices = arena_alloc(p->allocator, sizeof(ast_node)); node->expr.fr.slices->type = NODE_UNIT; node->expr.fr.slices->expr.unit_node.expr = parse_expression(p); ast_node *tail = node->expr.fr.slices; node->expr.fr.slice_len = 1; /* In this case, there is only one slice. */ if (match(p, TOKEN_RPAREN)) { goto parse_captures; } if (match(p, TOKEN_COMMA)) { ast_node *expr = parse_expression(p); if (expr) { while (!match(p, TOKEN_RPAREN)) { if (!match(p, TOKEN_COMMA)) { error(p, "expected `)`."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); tail->expr.unit_node.next->expr.unit_node.expr = expr; tail = tail->expr.unit_node.next; tail->type = NODE_UNIT; expr = parse_expression(p); if (!expr) { error(p, "expected `)`."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } } tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); tail->expr.unit_node.next->expr.unit_node.expr = expr; tail = tail->expr.unit_node.next; node->expr.fr.slice_len += 1; tail->type = NODE_UNIT; } else { error(p, "expected expression."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } } else { error(p, "expected `)`."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } parse_captures: if (!match(p, TOKEN_PIPE)) { error(p, "expected capture."); return NULL; } arena_start = arena_snapshot(p->allocator); node->expr.fr.captures = arena_alloc(p->allocator, sizeof(ast_node)); node->expr.fr.captures->type = NODE_UNIT; node->expr.fr.captures->expr.unit_node.expr = parse_expression(p); if (node->expr.fr.captures->expr.unit_node.expr && node->expr.fr.captures->expr.unit_node.expr->type != NODE_IDENTIFIER) { error(p, "captures must be identifiers."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } tail = node->expr.fr.captures; node->expr.fr.capture_len = 1; /* In this case, there is only one capture */ if (match(p, TOKEN_PIPE)) { goto parse_body; } if (match(p, TOKEN_COMMA)) { ast_node *expr = parse_expression(p); if (expr) { while (!match(p, TOKEN_PIPE)) { if (!match(p, TOKEN_COMMA)) { error(p, "expected `)`."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); tail->expr.unit_node.next->expr.unit_node.expr = expr; tail = tail->expr.unit_node.next; tail->type = NODE_UNIT; expr = parse_expression(p); if (!expr) { error(p, "expected `|`."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } } tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); tail->expr.unit_node.next->expr.unit_node.expr = expr; tail = tail->expr.unit_node.next; tail->type = NODE_UNIT; node->expr.fr.capture_len += 1; } else { error(p, "expected identifier."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } } else { error(p, "expected `|`."); arena_reset_to_snapshot(p->allocator, arena_start); return NULL; } parse_body:; printf("%d %d\n", node->expr.fr.capture_len, node->expr.fr.slice_len); if (node->expr.fr.capture_len != node->expr.fr.slice_len) { error(p, "invalid number of captures."); return NULL; } ast_node* body = parse_compound(p); node->expr.fr.body = body; return node; } static ast_node *parse_while(parser *p) { ast_node *condition = parse_expression(p); //printf("%d %d\n", p->tokens->type, TOKEN_RCURLY); ast_node *body = parse_compound(p); ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_WHILE; node->expr.whle.body = body; node->expr.whle.condition = condition; return node; } static ast_node *parse_if(parser *p) { ast_node *condition = parse_expression(p); ast_node *body = parse_compound(p); ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_IF; node->expr.whle.body = body; node->expr.whle.condition = condition; return node; } static ast_node *parse_struct(parser *p); static ast_node *parse_type(parser *p) { ast_node *type = NULL; if (match(p, TOKEN_STRUCT)) { type = parse_struct(p); } if (match(p, TOKEN_UNION)) { type = parse_struct(p); type->type = NODE_UNION; } if (match(p, TOKEN_LSQUARE)) { /* Array/slice type */ type = arena_alloc(p->allocator, sizeof(ast_node)); type->type = NODE_PTR_TYPE; if (match(p, TOKEN_STAR)) { type->expr.ptr_type.flags |= PTR_ARRAY; } else { type->expr.ptr_type.flags |= PTR_SLICE; } type->expr.ptr_type.type = parse_type(p); if (!type->expr.ptr_type.type) { error(p, "expected type."); return NULL; } if (!match(p, TOKEN_RSQUARE)) { error(p, "expected `]`."); return NULL; } } if (match(p, TOKEN_STAR)) { type = arena_alloc(p->allocator, sizeof(ast_node)); type->type = NODE_PTR_TYPE; type->expr.ptr_type.flags |= PTR_RAW; type->expr.ptr_type.type = parse_type(p); if (!type->expr.ptr_type.type) { error(p, "expected type."); return NULL; } } if (!type) { type = parse_factor(p); if (!type) { return NULL; } if (type->type != NODE_IDENTIFIER) { return NULL; } } return type; } static member *parse_member(parser *p) { ast_node *type = parse_type(p); if (!match_peek(p, TOKEN_IDENTIFIER)) { error(p, "expected identifier."); return NULL; } member *m = arena_alloc(p->allocator, sizeof(member)); m->type = type; m->name = peek(p)->lexeme; m->name_len = peek(p)->lexeme_len; advance(p); return m; } static variant *parse_variant(parser *p) { if (!match_peek(p, TOKEN_IDENTIFIER)) { error(p, "expected identifier."); return NULL; } variant *v = arena_alloc(p->allocator, sizeof(variant)); v->name = peek(p)->lexeme; v->name_len = peek(p)->lexeme_len; advance(p); if (match(p, TOKEN_EQ)) { v->value = parse_factor(p); if (!v->value) { error(p, "expected integer."); return NULL; } if (v->value->type != NODE_INTEGER) { error(p, "expected integer."); return NULL; } } return v; } static ast_node *parse_enum(parser *p) { ast_node *enm = arena_alloc(p->allocator, sizeof(ast_node)); enm->type = NODE_ENUM; if (match_peek(p, TOKEN_IDENTIFIER)) { /* Named enum */ enm->expr.enm.name = peek(p)->lexeme; enm->expr.enm.name_len = peek(p)->lexeme_len; advance(p); } else if (!match_peek(p, TOKEN_LCURLY)) { error(p, "expected identifier or `{`."); return NULL; } else { enm->expr.enm.name = NULL; enm->expr.enm.name_len = 0; } if (!match(p, TOKEN_LCURLY)) { error(p, "expected `{`."); return NULL; } variant *prev = parse_variant(p); variant *head = prev; enm->expr.enm.variants = head; if (!prev) { error(p, "invalid enum definition. Enums should contain at least 1 variant."); return NULL; } if (!match(p, TOKEN_COMMA)) { if (!match(p, TOKEN_RCURLY)) { error(p, "expected `,`."); return NULL; } else { return enm; } } while (!match(p, TOKEN_RCURLY)) { variant *current = parse_variant(p); if (!current) { error(p, "expected variant definition."); return NULL; } prev->next = current; if (!match(p, TOKEN_COMMA)) { if (!match_peek(p, TOKEN_RCURLY)) { error(p, "expected `,`."); return NULL; } } prev = current; } return enm; } static ast_node *parse_struct(parser *p) { ast_node *structure = arena_alloc(p->allocator, sizeof(ast_node)); structure->type = NODE_STRUCT; if (match_peek(p, TOKEN_IDENTIFIER)) { /* Named structure */ structure->expr.structure.name = peek(p)->lexeme; structure->expr.structure.name_len = peek(p)->lexeme_len; advance(p); } else if (!match_peek(p, TOKEN_LCURLY)) { error(p, "expected identifier or `{`."); return NULL; } else { structure->expr.structure.name = NULL; structure->expr.structure.name_len = 0; } if (!match(p, TOKEN_LCURLY)) { error(p, "expected `{`."); return NULL; } member *prev = parse_member(p); member *head = prev; structure->expr.structure.members = head; if (!prev) { error(p, "invalid struct definition. Structs should contain at least 1 member."); return NULL; } if (!match(p, TOKEN_COMMA)) { if (!match(p, TOKEN_RCURLY)) { error(p, "expected `,`."); return NULL; } else { return structure; } } while (!match(p, TOKEN_RCURLY)) { member *current = parse_member(p); if (!current) { error(p, "expected member definition."); return NULL; } prev->next = current; if (!match(p, TOKEN_COMMA)) { if (!match_peek(p, TOKEN_RCURLY)) { error(p, "expected `,`."); return NULL; } } prev = current; } return structure; } static ast_node *parse_function(parser *p) { ast_node *fn = arena_alloc(p->allocator, sizeof(ast_node)); fn->type = NODE_FUNCTION; fn->expr.function.type = parse_type(p); fn->expr.function.name = peek(p)->lexeme; fn->expr.function.name_len = peek(p)->lexeme_len; advance(p); /* Consume `(` */ advance(p); if (match(p, TOKEN_RPAREN)) { fn->expr.function.body = parse_compound(p);; fn->expr.function.parameters = NULL; fn->expr.function.parameters_len = 0; return fn; } member *prev = parse_member(p); member *head = prev; fn->expr.function.parameters = head; fn->expr.function.parameters_len = 1; if (!match(p, TOKEN_COMMA)) { if (!match(p, TOKEN_RPAREN)) { error(p, "expected `,`."); return NULL; } else { fn->expr.function.body = parse_compound(p); return fn; } } while (!match(p, TOKEN_RPAREN)) { member *current = parse_member(p); if (!current) { error(p, "expected parameter."); return NULL; } prev->next = current; if (!match(p, TOKEN_COMMA)) { if (!match_peek(p, TOKEN_RPAREN)) { error(p, "expected `,`."); return NULL; } } fn->expr.function.parameters_len += 1; prev = current; } fn->expr.function.body = parse_compound(p); return fn; } static ast_node *parse_statement(parser *p) { token *current = p->tokens; ast_node *type = parse_type(p); if (type && match_peek(p, TOKEN_IDENTIFIER)) { if (p->tokens->next && p->tokens->next->type == TOKEN_LPAREN) { /* Function definition. */ p->tokens = current; return parse_function(p); } p->tokens = current; /* Variable declaration. */ ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_VAR_DECL; node->expr.var_decl.type = parse_type(p); node->expr.var_decl.name = p->tokens->lexeme; node->expr.var_decl.name_len = p->tokens->lexeme_len; advance(p); if (match(p, TOKEN_EQ)) { node->expr.var_decl.value = parse_expression(p); } else { node->expr.var_decl.value = NULL; } if (!match(p, TOKEN_SEMICOLON)) { error(p, "expected `;` after statement."); return NULL; } return node; } else { p->tokens = current; } if (match(p, TOKEN_BREAK)) { if (!match(p, TOKEN_SEMICOLON)) { error(p, "expected `;` after `break`."); return NULL; } ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_BREAK; return node; } else if (match(p, TOKEN_RETURN)) { ast_node *expr = parse_expression(p); if (!expr) { error(p, "expected expression after `return`."); return NULL; } if (!match(p, TOKEN_SEMICOLON)) { error(p, "expected `;`."); return NULL; } ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_RETURN; node->expr.ret.value = expr; return node; } else if (match_peek(p, TOKEN_IDENTIFIER) && p->tokens->next && p->tokens->next->type == TOKEN_COLON) { /* In this case, this is a label. */ ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_LABEL; node->expr.label.name = p->tokens->lexeme; node->expr.label.name_len = p->tokens->lexeme_len; advance(p); /* Consume `:` */ advance(p); return node; } else if (match(p, TOKEN_GOTO)) { if (!match_peek(p, TOKEN_IDENTIFIER)) { error(p, "expected label identifier after `goto`."); return NULL; } ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_GOTO; node->expr.label.name = p->tokens->lexeme; node->expr.label.name_len = p->tokens->lexeme_len; advance(p); if (!match(p, TOKEN_SEMICOLON)) { error(p, "expected `;` after `goto`."); return NULL; } return node; } else if (match(p, TOKEN_IMPORT)) { ast_node *expr = parse_expression(p); if (!expr) { error(p, "expected module path after `import`."); return NULL; } if (expr->type != NODE_ACCESS && expr->type != NODE_IDENTIFIER) { error(p, "expected module path after `import`."); return NULL; } ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_IMPORT; node->expr.import.path = expr; if (!match(p, TOKEN_SEMICOLON)) { error(p, "expected `;` after `import`."); return NULL; } return node; } else if (match(p, TOKEN_LOOP)) { if (p->tokens->type == TOKEN_LPAREN) { return parse_for(p); } else { return parse_while(p); } } else if (match(p, TOKEN_IF)) { return parse_if(p); } else if (match(p, TOKEN_STRUCT)) { return parse_struct(p); } else if (match(p, TOKEN_ENUM)) { return parse_enum(p); } else if (match(p, TOKEN_UNION)) { ast_node *u = parse_struct(p); u->type = NODE_UNION; return u; } else { ast_node *expr = parse_expression(p); if (!expr) { return NULL; } if (!match(p, TOKEN_SEMICOLON)) { error(p, "expected `;` after expression."); return NULL; } return expr; } } /* Get a list of expressions to form a full AST. */ static void parse(parser *p) { p->ast = arena_alloc(p->allocator, sizeof(ast_node)); p->ast->type = NODE_UNIT; p->ast->expr.unit_node.expr = parse_statement(p); ast_node *tail = p->ast; ast_node *expr = parse_statement(p); while (expr) { if (expr->type != NODE_FUNCTION && expr->type != NODE_VAR_DECL && expr->type != NODE_IMPORT && expr->type != NODE_STRUCT && expr->type != NODE_ENUM && expr->type != NODE_ENUM) { error(p, "expected function, struct, enum, union, global variable or import statement."); return; } tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); tail->expr.unit_node.next->expr.unit_node.expr = expr; tail = tail->expr.unit_node.next; tail->type = NODE_UNIT; expr = parse_statement(p); } } parser *parser_init(lexer *l, arena *allocator) { parser *p = arena_alloc(allocator, sizeof(parser)); p->tokens = l->tokens; p->allocator= allocator; parse(p); if (has_errors) { printf("Compilation failed.\n"); exit(1); } return p; }