From 542a94ef8126f16cac0a24efab6dbf9844098990 Mon Sep 17 00:00:00 2001 From: laentropia Date: Wed, 13 May 2026 11:09:22 -0600 Subject: [PATCH] refactor: All of parser.c DAMN, it wasn't that difficult, just bothers me a bit the part that checks if both lbp and rbp of the infix are valid, like i do validation twice but is fine i guess, maybe using an else?, i'll see if i change it, for now i need to change the evaluator --- include/parser.h | 6 +- src/parser.c | 334 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 244 insertions(+), 96 deletions(-) diff --git a/include/parser.h b/include/parser.h index 6dcc4fd..0937d70 100644 --- a/include/parser.h +++ b/include/parser.h @@ -11,9 +11,10 @@ typedef enum { NODE_INT, NODE_BINARY_OP, NODE_UNARY_OP, + NODE_PARENTHESIS, } NodeType; -typedef struct { +typedef struct Node { NodeType type; union { int64_t num; @@ -26,6 +27,7 @@ typedef struct { Operator op; struct Node *to; }unary; + Operator par; }; } Node; @@ -64,6 +66,8 @@ typedef struct { }; } ParserU8Result; +Node token_to_node(Token token); + ParserU8Result prefix_rbp(Node node); ParserU8Result postfix_lbp(Node node); ParserU8Result infix_lbp(Node node); diff --git a/src/parser.c b/src/parser.c index 2a09988..f110657 100644 --- a/src/parser.c +++ b/src/parser.c @@ -2,146 +2,281 @@ #include "arraylist.h" #include "lexer.h" #include "arena.h" +#include #include #include #include -uint8_t prefix_rbp(ASTNode node) { - if (node.type == NODE_INTEGER) { - return 0; +ParserU8Result prefix_rbp(Node node) { + if (node.type != NODE_UNARY_OP) { + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN + }; } - switch (node.data.unary.op) { + switch (node.unary.op) { case OP_SUB: case OP_ADD: - return 30; + return (ParserU8Result) { + .is_valid = true, + .num = 30, + }; default: - return -1; + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } } -uint8_t postfix_lbp(ASTNode node) { - if (node.type == NODE_INTEGER) { - return 0; +ParserU8Result postfix_lbp(Node node) { + if (node.type != NODE_UNARY_OP) { + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } - switch (node.data.unary.op) { + switch (node.unary.op) { case OP_FACTORIAL: - return 40; + return (ParserU8Result) { + .is_valid = true, + .num = 40, + }; default: - // needs to be dealt with with resulttypes - return 255; + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } } -uint8_t infix_lbp(ASTNode node) { - if (node.type == NODE_INTEGER) { - return 0; +ParserU8Result infix_lbp(Node node) { + if (node.type != NODE_BINARY_OP) { + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } - switch (node.data.binary.op) { + switch (node.binary.op) { case OP_ADD: case OP_SUB: - return 10; - break; + return (ParserU8Result) { + .is_valid = true, + .num = 10, + }; case OP_DIV: case OP_MUL: - return 20; + return (ParserU8Result) { + .is_valid = true, + .num = 20, + }; case OP_POW: - return 51; + return (ParserU8Result) { + .is_valid = true, + .num = 51, + }; default: - return 0; + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } } -uint8_t infix_rbp(ASTNode node) { - if (node.type == NODE_INTEGER) { - return 0; +ParserU8Result infix_rbp(Node node) { + if (node.type != NODE_BINARY_OP) { + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } - switch (node.data.binary.op) { + switch (node.binary.op) { case OP_ADD: case OP_SUB: - return 11; - break; + return (ParserU8Result) { + .is_valid = true, + .num = 11, + }; case OP_DIV: case OP_MUL: - return 21; + return (ParserU8Result) { + .is_valid = true, + .num = 21, + }; case OP_POW: - return 50; + return (ParserU8Result) { + .is_valid = true, + .num = 50, + }; default: - return 0; + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } } -ParseResult parse(TokenizeResult tokens) { +Node token_to_node(Token token) { + if (token.type == TOKEN_INTEGER) { + return (Node) { + .type = NODE_INT, + .num = token.num, + }; + } + + switch (token.op) { + case OP_ADD: + return (Node) { + .type = NODE_BINARY_OP, + .binary.op = token.op, + }; + case OP_SUB: + return (Node) { + .type = NODE_BINARY_OP, + .binary.op = token.op, + }; + case OP_MUL: + return (Node) { + .type = NODE_BINARY_OP, + .binary.op = token.op, + }; + case OP_DIV: + return (Node) { + .type = NODE_BINARY_OP, + .binary.op = token.op, + }; + case OP_POW: + return (Node) { + .type = NODE_UNARY_OP, + .binary.op = token.op, + }; + case OP_FACTORIAL: + return (Node) { + .type = NODE_UNARY_OP, + .binary.op = token.op, + }; + case OP_START_PAR: + return (Node) { + .type = NODE_PARENTHESIS, + .binary.op = token.op, + }; + case OP_END_PAR: + return (Node) { + .type = NODE_PARENTHESIS, + .binary.op = token.op, + }; + } +} + +ParserResult parse(TokenizeResult tokens) { ArraySlice *context = arraylist_slice(tokens.arr, 0, arraylist_size(tokens.arr)); - Arena arena = arena_init(sizeof(ASTNode) * arraylist_size(tokens.arr)).arena; + Arena arena = arena_init(sizeof(Node) * arraylist_size(tokens.arr)).arena; - return (ParseResult) { + NodeResult result = parse_expr(context, &arena, 0); + if (!result.is_valid) { + return (ParserResult) { + .is_valid = false, + .err = result.err, + }; + } + + return (ParserResult) { .is_valid = true, .arena = arena, - .tree = parse_expr(context, &arena, 0)}; + .tree = result.node}; } -ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { - // First: Consume a first number +NodeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { arena_ensure_capacity( arena, - sizeof(ASTNode), - alignof(ASTNode) + sizeof(Node), + alignof(Node) ); // shouldn't fail but if it does then what a shame // Get pointer in the arena - ASTNode *left_side = arena_unwrap_pointer( + Node *left_side = arena_unwrap_pointer( arena_alloc( arena, - sizeof(ASTNode), - alignof(ASTNode) + sizeof(Node), + alignof(Node) ) ); - arrayslice_next(slice, left_side); + // convert token to node :) + Token current_token; + arrayslice_next(slice, ¤t_token); + *left_side = token_to_node(current_token); if (left_side->type == NODE_PARENTHESIS && - left_side->data.parenthesis.op == OP_START_PAR) { - left_side = parse_expr(slice, arena, 0); - // HERE CHEKC LATER if slice.next != ')' - ASTNode *end_par; - arrayslice_next(slice, &end_par); - if (end_par->type != NODE_PARENTHESIS || - end_par->data.parenthesis.op != OP_END_PAR) { - // todo - } - return left_side; - } - // if is unary then take prefix bp and continue - // to the right, no need to allocate left side - // because we just did and right side - // WILL return a valid allocated pointer. - if (left_side->type == NODE_UNARY_OP) { - uint8_t rbp = prefix_rbp(*left_side); - ASTNode *righ_side = parse_expr(slice, arena, rbp); + left_side->par == OP_START_PAR) { + NodeResult result = parse_expr(slice, arena, 0); - left_side->data.unary.val = righ_side; + if (!result.is_valid) { + return result; + } + + left_side = result.node; + + // We dont really need to convert to node + // parenthesis are there just to change up + // the bp + Token end_par; + arrayslice_next(slice, &end_par); + + if (end_par.type != TOKEN_OPERATOR || + end_par.op != OP_END_PAR) { + return (NodeResult) { + .is_valid = false, + .err = PARSER_UNMATCHED_PAREN, + }; + } + return (NodeResult) { + .is_valid = true, + .node = left_side, + }; + } + + // If prefix + if (left_side->type == NODE_UNARY_OP) { + ParserU8Result rbp_result = prefix_rbp(*left_side); + if (!rbp_result.is_valid) { + return (NodeResult) { + .is_valid = false, + .err = rbp_result.err, + }; + } + + NodeResult righ_side_result = parse_expr(slice, arena, rbp_result.num); + if (!righ_side_result.is_valid) { + return righ_side_result; + } + + left_side->unary.to = righ_side_result.node; } while (true) { - // Second: Get next one and checn bp if (!arrayslice_is_valid(slice)) { break; } - // Here check if not OP error - - ASTNode operator; - // Here should chekc if is operator not some bs - // Third, get operator and binding powers - arrayslice_peek(slice, &operator); + Token operator_token; + arrayslice_peek(slice, &operator_token); + if (operator_token.type != TOKEN_OPERATOR) { + return (NodeResult) { + .is_valid = false, + .err = PARSER_MISSING_OPERAND, + }; + } + Node operator_node = token_to_node(operator_token); // temporary for bad error handling - if (postfix_lbp(operator) != 255) { - if (postfix_lbp(operator) < min_bp) { + ParserU8Result postfix_lbp_result = postfix_lbp(operator_node); + if (postfix_lbp_result.is_valid) { + if (postfix_lbp_result.num < min_bp) { break; } @@ -149,59 +284,65 @@ ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { arrayslice_next(slice, NULL); arena_ensure_capacity( arena, - sizeof(ASTNode), - alignof(ASTNode)); - ASTNode *new_node = arena_unwrap_pointer( + sizeof(Node), + alignof(Node)); + Node *new_node = arena_unwrap_pointer( arena_alloc( arena, - sizeof(ASTNode), - alignof(ASTNode) + sizeof(Node), + alignof(Node) ) ); - *new_node = operator; + *new_node = operator_node; - new_node->data.unary.val = left_side; + new_node->unary.to = left_side; left_side = new_node; continue; } - // check if it has infix or not, if not then error - uint8_t rbp = infix_rbp(operator); - uint8_t lbp = infix_lbp(operator); - - if (rbp != 255 && lbp != 255) { + ParserU8Result rbp_result = infix_rbp(operator_node); + ParserU8Result lbp_result = infix_lbp(operator_node); + if (!rbp_result.is_valid || !lbp_result.is_valid) { + return (NodeResult) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; + } + if (rbp_result.is_valid && lbp_result.is_valid) { // If lbp is LESS then stop recursion, // we found the next smaller binding power // or the one with more precedence - if (lbp < min_bp) { + if (lbp_result.num < min_bp) { break; } - // If NOT, then we continue wtching ahead // for the next one but taking our current // concern that is rbp of the current operator arrayslice_next(slice, NULL); - ASTNode *right_side = parse_expr(slice, arena, rbp); + NodeResult right_side_result = parse_expr(slice, arena, rbp_result.num); + if (!right_side_result.is_valid) { + return right_side_result; + } arena_ensure_capacity( arena, - sizeof(ASTNode), - alignof(ASTNode)); - ASTNode *new_node = arena_unwrap_pointer( + sizeof(Node), + alignof(Node)); + Node *new_node = arena_unwrap_pointer( arena_alloc( arena, - sizeof(ASTNode), - alignof(ASTNode) + sizeof(Node), + alignof(Node) ) ); - *new_node = operator; + *new_node = operator_node; - new_node->data.binary.left = left_side; - new_node->data.binary.right = right_side; + new_node->binary.left = left_side; + new_node->binary.right = right_side_result.node; left_side = new_node; @@ -213,5 +354,8 @@ ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { // Final: return left side - return left_side; + return (NodeResult){ + .is_valid = true, + .node = left_side, + }; }