#include "parser.h" #include #include ast_node *parse_expression(parser *p); /* Consume a token in the list. */ static void advance(parser *p) { p->previous = p->tokens; if (p->tokens) p->tokens = p->tokens->next; } /* Get the current token in the list, without consuming */ static token *peek(parser *p) { return p->tokens; } /* * Check if the current token type is the same as `type`, * without consuming it. */ static bool match_peek(parser *p, token_type type) { if (p->tokens) { return p->tokens->type == type; } else { return false; } } /* Same as `match_peek()` but it consumes the token. */ static bool match(parser *p, token_type type) { if (p->tokens) { if (p->tokens->type == type) { advance(p); return true; } } return false; } /* * When an error is encountered, try to find a * token that could define a part of the code * which doesn't depend on the one giving the * error. This is needed to print multiple errors * instead of just failing at the first one. */ static void parser_sync(parser *p) { advance(p); while (p->tokens) { if (p->previous->type == TOKEN_SEMICOLON || p->previous->type == TOKEN_RCURLY) { return; } switch (p->tokens->type) { case TOKEN_STRUCT: case TOKEN_ENUM: case TOKEN_IF: case TOKEN_WHILE: case TOKEN_FOR: case TOKEN_DO: case TOKEN_RETURN: case TOKEN_SWITCH: return; default: advance(p); } } } /* Print the error message and sync the parser. */ static void error(parser *p, char *msg) { printf("\x1b[31m\x1b[1merror\x1b[0m\x1b[1m:%ld:%ld:\x1b[0m %s\n", p->previous->position.row, p->previous->position.column, msg); parser_sync(p); } /* Parse expressions with the highest precedence. */ static ast_node *parse_factor(parser *p) { token *t = peek(p); if (match(p, TOKEN_INTEGER)) { ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_INTEGER; node->expr.integer = parse_int(t->lexeme, t->lexeme_len); return node; } else if (match(p, TOKEN_FLOAT)) { ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_FLOAT; node->expr.flt = parse_float(t->lexeme, t->lexeme_len); return node; } else if (match(p, TOKEN_IDENTIFIER)) { ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_IDENTIFIER; node->expr.string.start = t->lexeme; node->expr.string.len = t->lexeme_len; return node; } else if (match(p, TOKEN_STRING)) { ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_STRING; node->expr.string.start = t->lexeme; node->expr.string.len = t->lexeme_len; return node; } else if (match(p, TOKEN_CHAR)) { ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_CHAR; if (t->lexeme_len == 2) { char c; switch (t->lexeme[1]) { case 'n': c = '\n'; break; case 't': c = '\t'; break; case 'r': c = '\r'; break; case '0': c = '\0'; break; case '\\': c = '\\'; break; case '\'': c = '\''; break; default: error(p, "invalid escape code."); return NULL; } node->expr.ch = c; } else { node->expr.ch = *(t->lexeme); } return node; } else if (match(p, TOKEN_LPAREN)) { ast_node *node = parse_expression(p); if (!match(p, TOKEN_RPAREN)) { error(p, "unclosed parenthesis"); return NULL; } return node; } return NULL; } ast_node *parse_unary(parser *p) { if (match(p, TOKEN_PLUS_PLUS) || match(p, TOKEN_MINUS) || match(p, TOKEN_MINUS_MINUS) || match(p, TOKEN_STAR) || match(p, TOKEN_AND) || match(p, TOKEN_BANG)) { unary_op op; switch (p->previous->type) { case TOKEN_PLUS_PLUS: op = UOP_INCR; break; case TOKEN_MINUS: op = UOP_MINUS; break; case TOKEN_MINUS_MINUS: op = UOP_DECR; break; case TOKEN_STAR: op = UOP_DEREF; break; case TOKEN_AND: op = UOP_REF; break; case TOKEN_BANG: op = UOP_NOT; break; default: goto end; } ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_UNARY; node->expr.unary.operator = op; node->expr.unary.right = parse_expression(p); return node; } end: return parse_factor(p); } ast_node *parse_term(parser *p) { ast_node *left = parse_unary(p); while (match_peek(p, TOKEN_STAR) || match_peek(p, TOKEN_SLASH)) { binary_op op = peek(p)->type == TOKEN_STAR ? OP_MUL : OP_DIV; advance(p); ast_node *right = parse_factor(p); ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_BINARY; node->expr.binary.left = left; node->expr.binary.right = right; node->expr.binary.operator = op; left = node; } return left; } /* * Following the recursive descent parser algorithm, this * parses all the arithmetic expressions. */ ast_node *parse_expression(parser *p) { ast_node *left = parse_term(p); while (match_peek(p, TOKEN_PLUS) || match_peek(p, TOKEN_MINUS)) { binary_op op = peek(p)->type == TOKEN_PLUS ? OP_PLUS : OP_MINUS; advance(p); ast_node *right = parse_term(p); ast_node *node = arena_alloc(p->allocator, sizeof(ast_node)); node->type = NODE_BINARY; node->expr.binary.left = left; node->expr.binary.right = right; node->expr.binary.operator = op; left = node; } return left; } /* Get a list of expressions to form a full AST. */ static void parse(parser *p) { p->ast = arena_alloc(p->allocator, sizeof(ast_node)); p->ast->type = NODE_UNIT; p->ast->expr.unit_node.expr = parse_expression(p); ast_node *tail = p->ast; ast_node *expr = parse_expression(p); while (expr) { tail->expr.unit_node.next = arena_alloc(p->allocator, sizeof(ast_node)); tail->expr.unit_node.next->expr.unit_node.expr = expr; tail = tail->expr.unit_node.next; tail->type = NODE_UNIT; expr = parse_expression(p); } } parser *parser_init(lexer *l, arena *allocator) { parser *p = arena_alloc(allocator, sizeof(parser)); p->tokens = l->tokens; p->allocator= allocator; parse(p); return p; }