From 2a73f5f9d66063ad9cd808982bce23a11eabe252 Mon Sep 17 00:00:00 2001 From: laentropia Date: Wed, 13 May 2026 09:37:15 -0600 Subject: [PATCH 01/11] refactor: delete ASTNode, add Token to lexer So, total refactor, now we serious. I feel ASTNode was feeling very bloated so we need to rewrite and adapt everything, by now lets get the lexer working again, is already well written for me at least. --- include/lexer.h | 39 ++++++++++++--------------------------- 1 file changed, 12 insertions(+), 27 deletions(-) diff --git a/include/lexer.h b/include/lexer.h index 4af0f9d..b997eb8 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -8,12 +8,9 @@ // For identifing typedef enum { - NODE_INTEGER, - NODE_BINARY_OP, - NODE_UNARY_OP, - NODE_PARENTHESIS, -} ASTNodeType; - + TOKEN_INTEGER, + TOKEN_OPERATOR, +} TokenType; // For classify operators typedef enum { OP_ADD, @@ -36,25 +33,13 @@ typedef enum { } LexerErr; // Can be thought as tokens, they will be used by the parser. -typedef struct ASTNode { - ASTNodeType type; +typedef struct { + TokenType type; union { - int64_t integer; - struct { - struct ASTNode *left; - struct ASTNode *right; - Operator op; - } binary; - struct { - struct ASTNode *val; - Operator op; - } unary; - struct { - struct ASTNode *val; - Operator op; - } parenthesis; - } data; -} ASTNode; + int64_t num; + Operator op; + }; +} Token; typedef struct { bool is_valid; @@ -68,9 +53,9 @@ typedef struct { bool is_valid; union { LexerErr err; - ASTNode node; + Token token; }; -} ASTNodeResult; +} TokenResult; typedef struct { bool is_valid; @@ -82,7 +67,7 @@ typedef struct { // Lexer funtions as well as few functionality TokenizeResult tokenize(const char* input); -ASTNodeResult tokenize_number(const char* input, size_t *offset); +TokenResult tokenize_number(const char* input, size_t *offset); LexerI64Result string_to_integer(const char buf[]); bool isoperator(int c); Operator char_to_operator(int c); -- 2.51.0 From f3373123e1d624e5d6ae9c4e4f3f738cae6f4571 Mon Sep 17 00:00:00 2001 From: laentropia Date: Wed, 13 May 2026 09:49:28 -0600 Subject: [PATCH 02/11] refactor: adapted lexer to work with new tokens Now its fine, the code i find it clear if one just sits down to read it for a moment, next is the parser that REALLY needs reworking and a few helper structs. --- include/lexer.h | 2 +- src/lexer.c | 36 +++++++++++++++++------------------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/include/lexer.h b/include/lexer.h index b997eb8..a8d0cd8 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -61,7 +61,7 @@ typedef struct { bool is_valid; union { LexerErr err; - int64_t number; + int64_t num; }; } LexerI64Result; diff --git a/src/lexer.c b/src/lexer.c index 2115cc4..67655a2 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -15,7 +15,7 @@ typedef enum { TokenizeResult tokenize(const char *input) { - ArrayList *arr = arraylist_init(64, sizeof(ASTNode)); + ArrayList *arr = arraylist_init(64, sizeof(Token)); size_t offset = 0; while ( @@ -24,19 +24,17 @@ TokenizeResult tokenize(const char *input) { input[offset] != '\0') { if (isdigit(input[offset])) { - ASTNodeResult result = tokenize_number(input, &offset); + TokenResult result = tokenize_number(input, &offset); if (!result.is_valid) { return (TokenizeResult) {.is_valid = false, .err = result.err}; } - arraylist_push_back(arr, &result.node); + arraylist_push_back(arr, &result.token); } else if (isoperator(input[offset])) { - ASTNode op_node = { - .type = NODE_BINARY_OP, - .data.binary.op = char_to_operator(input[offset]), - .data.binary.left = NULL, - .data.binary.right = NULL, + Token op_node = { + .type = TOKEN_OPERATOR, + .op = char_to_operator(input[offset]), }; arraylist_push_back(arr, &op_node); @@ -60,7 +58,7 @@ TokenizeResult tokenize(const char *input) { // CURRENTLY, it only supports ints, not clear how floating // point is implemented but i'll figure it out -ASTNodeResult tokenize_number(const char *input, size_t *offset) { +TokenResult tokenize_number(const char *input, size_t *offset) { char buf[64] = { '\0' }; size_t buf_pos = 0; bool is_integer = true; // Will later be used to differentiate fractions @@ -71,7 +69,7 @@ ASTNodeResult tokenize_number(const char *input, size_t *offset) { buf[buf_pos] = input[current]; if (buf_pos >= sizeof(buf)) { - return (ASTNodeResult) { + return (TokenResult) { .is_valid = false, .err = LEXER_BUF_OVERFLOW}; } @@ -80,23 +78,23 @@ ASTNodeResult tokenize_number(const char *input, size_t *offset) { buf_pos++; } - ASTNode new_node; + Token new_token; if (is_integer) { - new_node.type = NODE_INTEGER; - LexerI64Result status = string_to_integer(buf); + new_token.type = TOKEN_INTEGER; + LexerI64Result result = string_to_integer(buf); - if (!status.is_valid) { - return (ASTNodeResult) {.is_valid = false, .err = status.err}; + if (!result.is_valid) { + return (TokenResult) {.is_valid = false, .err = result.err}; } - new_node.data.integer = status.number; + new_token.num = result.num; *offset = current; - return (ASTNodeResult) {.is_valid = true, .node = new_node}; + return (TokenResult) {.is_valid = true, .token = new_token}; } - return (ASTNodeResult) { + return (TokenResult) { .is_valid = false, .err = LEXER_FAILED_NUMBER_CONVERSION}; } @@ -122,7 +120,7 @@ LexerI64Result string_to_integer(const char *buf) { c++; } - return (LexerI64Result) {.is_valid = true, .number = count}; + return (LexerI64Result) {.is_valid = true, .num = count}; } bool isoperator(int c) { -- 2.51.0 From 80e05a9acfdd580d06645a3d9ad712ebbb5a1990 Mon Sep 17 00:00:00 2001 From: laentropia Date: Wed, 13 May 2026 10:02:55 -0600 Subject: [PATCH 03/11] refactor: changed parser.h, added Node So just added node back but now clearly separated by tokens and nodes of the AST as it should be, now real rework the mess that is the parser --- include/parser.h | 56 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/include/parser.h b/include/parser.h index 75bc70c..6dcc4fd 100644 --- a/include/parser.h +++ b/include/parser.h @@ -4,11 +4,30 @@ #include "lexer.h" #include "arena.h" #include "arraylist.h" +#include #include +typedef enum { + NODE_INT, + NODE_BINARY_OP, + NODE_UNARY_OP, +} NodeType; + typedef struct { - ASTNode *head; -} AST; + NodeType type; + union { + int64_t num; + struct { + Operator op; + struct Node *left; + struct Node *right; + }binary; + struct { + Operator op; + struct Node *to; + }unary; + }; +} Node; typedef enum { PARSER_OK = 0, @@ -24,20 +43,33 @@ typedef struct { ParserErr err; struct { Arena arena; - ASTNode *tree; + Node *tree; }; }; -} ParseResult; +} ParserResult; -ASTNode *nud(ArraySlice *slice); -ASTNode *led(ArraySlice *slice, size_t right_precedence); +typedef struct { + bool is_valid; + union { + ParserErr err; + Node *node; + }; +} NodeResult; -uint8_t prefix_rbp(ASTNode node); -uint8_t postfix_lbp(ASTNode node); -uint8_t infix_lbp(ASTNode node); -uint8_t infix_rbp(ASTNode node); +typedef struct { + bool is_valid; + union { + ParserErr err; + uint8_t num; + }; +} ParserU8Result; -ParseResult parse(TokenizeResult tokens); -ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp); +ParserU8Result prefix_rbp(Node node); +ParserU8Result postfix_lbp(Node node); +ParserU8Result infix_lbp(Node node); +ParserU8Result infix_rbp(Node node); + +ParserResult parse(TokenizeResult tokens); +NodeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp); #endif // !PARSER_H -- 2.51.0 From 542a94ef8126f16cac0a24efab6dbf9844098990 Mon Sep 17 00:00:00 2001 From: laentropia Date: Wed, 13 May 2026 11:09:22 -0600 Subject: [PATCH 04/11] refactor: All of parser.c DAMN, it wasn't that difficult, just bothers me a bit the part that checks if both lbp and rbp of the infix are valid, like i do validation twice but is fine i guess, maybe using an else?, i'll see if i change it, for now i need to change the evaluator --- include/parser.h | 6 +- src/parser.c | 334 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 244 insertions(+), 96 deletions(-) diff --git a/include/parser.h b/include/parser.h index 6dcc4fd..0937d70 100644 --- a/include/parser.h +++ b/include/parser.h @@ -11,9 +11,10 @@ typedef enum { NODE_INT, NODE_BINARY_OP, NODE_UNARY_OP, + NODE_PARENTHESIS, } NodeType; -typedef struct { +typedef struct Node { NodeType type; union { int64_t num; @@ -26,6 +27,7 @@ typedef struct { Operator op; struct Node *to; }unary; + Operator par; }; } Node; @@ -64,6 +66,8 @@ typedef struct { }; } ParserU8Result; +Node token_to_node(Token token); + ParserU8Result prefix_rbp(Node node); ParserU8Result postfix_lbp(Node node); ParserU8Result infix_lbp(Node node); diff --git a/src/parser.c b/src/parser.c index 2a09988..f110657 100644 --- a/src/parser.c +++ b/src/parser.c @@ -2,146 +2,281 @@ #include "arraylist.h" #include "lexer.h" #include "arena.h" +#include #include #include #include -uint8_t prefix_rbp(ASTNode node) { - if (node.type == NODE_INTEGER) { - return 0; +ParserU8Result prefix_rbp(Node node) { + if (node.type != NODE_UNARY_OP) { + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN + }; } - switch (node.data.unary.op) { + switch (node.unary.op) { case OP_SUB: case OP_ADD: - return 30; + return (ParserU8Result) { + .is_valid = true, + .num = 30, + }; default: - return -1; + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } } -uint8_t postfix_lbp(ASTNode node) { - if (node.type == NODE_INTEGER) { - return 0; +ParserU8Result postfix_lbp(Node node) { + if (node.type != NODE_UNARY_OP) { + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } - switch (node.data.unary.op) { + switch (node.unary.op) { case OP_FACTORIAL: - return 40; + return (ParserU8Result) { + .is_valid = true, + .num = 40, + }; default: - // needs to be dealt with with resulttypes - return 255; + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } } -uint8_t infix_lbp(ASTNode node) { - if (node.type == NODE_INTEGER) { - return 0; +ParserU8Result infix_lbp(Node node) { + if (node.type != NODE_BINARY_OP) { + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } - switch (node.data.binary.op) { + switch (node.binary.op) { case OP_ADD: case OP_SUB: - return 10; - break; + return (ParserU8Result) { + .is_valid = true, + .num = 10, + }; case OP_DIV: case OP_MUL: - return 20; + return (ParserU8Result) { + .is_valid = true, + .num = 20, + }; case OP_POW: - return 51; + return (ParserU8Result) { + .is_valid = true, + .num = 51, + }; default: - return 0; + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } } -uint8_t infix_rbp(ASTNode node) { - if (node.type == NODE_INTEGER) { - return 0; +ParserU8Result infix_rbp(Node node) { + if (node.type != NODE_BINARY_OP) { + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } - switch (node.data.binary.op) { + switch (node.binary.op) { case OP_ADD: case OP_SUB: - return 11; - break; + return (ParserU8Result) { + .is_valid = true, + .num = 11, + }; case OP_DIV: case OP_MUL: - return 21; + return (ParserU8Result) { + .is_valid = true, + .num = 21, + }; case OP_POW: - return 50; + return (ParserU8Result) { + .is_valid = true, + .num = 50, + }; default: - return 0; + return (ParserU8Result) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } } -ParseResult parse(TokenizeResult tokens) { +Node token_to_node(Token token) { + if (token.type == TOKEN_INTEGER) { + return (Node) { + .type = NODE_INT, + .num = token.num, + }; + } + + switch (token.op) { + case OP_ADD: + return (Node) { + .type = NODE_BINARY_OP, + .binary.op = token.op, + }; + case OP_SUB: + return (Node) { + .type = NODE_BINARY_OP, + .binary.op = token.op, + }; + case OP_MUL: + return (Node) { + .type = NODE_BINARY_OP, + .binary.op = token.op, + }; + case OP_DIV: + return (Node) { + .type = NODE_BINARY_OP, + .binary.op = token.op, + }; + case OP_POW: + return (Node) { + .type = NODE_UNARY_OP, + .binary.op = token.op, + }; + case OP_FACTORIAL: + return (Node) { + .type = NODE_UNARY_OP, + .binary.op = token.op, + }; + case OP_START_PAR: + return (Node) { + .type = NODE_PARENTHESIS, + .binary.op = token.op, + }; + case OP_END_PAR: + return (Node) { + .type = NODE_PARENTHESIS, + .binary.op = token.op, + }; + } +} + +ParserResult parse(TokenizeResult tokens) { ArraySlice *context = arraylist_slice(tokens.arr, 0, arraylist_size(tokens.arr)); - Arena arena = arena_init(sizeof(ASTNode) * arraylist_size(tokens.arr)).arena; + Arena arena = arena_init(sizeof(Node) * arraylist_size(tokens.arr)).arena; - return (ParseResult) { + NodeResult result = parse_expr(context, &arena, 0); + if (!result.is_valid) { + return (ParserResult) { + .is_valid = false, + .err = result.err, + }; + } + + return (ParserResult) { .is_valid = true, .arena = arena, - .tree = parse_expr(context, &arena, 0)}; + .tree = result.node}; } -ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { - // First: Consume a first number +NodeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { arena_ensure_capacity( arena, - sizeof(ASTNode), - alignof(ASTNode) + sizeof(Node), + alignof(Node) ); // shouldn't fail but if it does then what a shame // Get pointer in the arena - ASTNode *left_side = arena_unwrap_pointer( + Node *left_side = arena_unwrap_pointer( arena_alloc( arena, - sizeof(ASTNode), - alignof(ASTNode) + sizeof(Node), + alignof(Node) ) ); - arrayslice_next(slice, left_side); + // convert token to node :) + Token current_token; + arrayslice_next(slice, ¤t_token); + *left_side = token_to_node(current_token); if (left_side->type == NODE_PARENTHESIS && - left_side->data.parenthesis.op == OP_START_PAR) { - left_side = parse_expr(slice, arena, 0); - // HERE CHEKC LATER if slice.next != ')' - ASTNode *end_par; - arrayslice_next(slice, &end_par); - if (end_par->type != NODE_PARENTHESIS || - end_par->data.parenthesis.op != OP_END_PAR) { - // todo - } - return left_side; - } - // if is unary then take prefix bp and continue - // to the right, no need to allocate left side - // because we just did and right side - // WILL return a valid allocated pointer. - if (left_side->type == NODE_UNARY_OP) { - uint8_t rbp = prefix_rbp(*left_side); - ASTNode *righ_side = parse_expr(slice, arena, rbp); + left_side->par == OP_START_PAR) { + NodeResult result = parse_expr(slice, arena, 0); - left_side->data.unary.val = righ_side; + if (!result.is_valid) { + return result; + } + + left_side = result.node; + + // We dont really need to convert to node + // parenthesis are there just to change up + // the bp + Token end_par; + arrayslice_next(slice, &end_par); + + if (end_par.type != TOKEN_OPERATOR || + end_par.op != OP_END_PAR) { + return (NodeResult) { + .is_valid = false, + .err = PARSER_UNMATCHED_PAREN, + }; + } + return (NodeResult) { + .is_valid = true, + .node = left_side, + }; + } + + // If prefix + if (left_side->type == NODE_UNARY_OP) { + ParserU8Result rbp_result = prefix_rbp(*left_side); + if (!rbp_result.is_valid) { + return (NodeResult) { + .is_valid = false, + .err = rbp_result.err, + }; + } + + NodeResult righ_side_result = parse_expr(slice, arena, rbp_result.num); + if (!righ_side_result.is_valid) { + return righ_side_result; + } + + left_side->unary.to = righ_side_result.node; } while (true) { - // Second: Get next one and checn bp if (!arrayslice_is_valid(slice)) { break; } - // Here check if not OP error - - ASTNode operator; - // Here should chekc if is operator not some bs - // Third, get operator and binding powers - arrayslice_peek(slice, &operator); + Token operator_token; + arrayslice_peek(slice, &operator_token); + if (operator_token.type != TOKEN_OPERATOR) { + return (NodeResult) { + .is_valid = false, + .err = PARSER_MISSING_OPERAND, + }; + } + Node operator_node = token_to_node(operator_token); // temporary for bad error handling - if (postfix_lbp(operator) != 255) { - if (postfix_lbp(operator) < min_bp) { + ParserU8Result postfix_lbp_result = postfix_lbp(operator_node); + if (postfix_lbp_result.is_valid) { + if (postfix_lbp_result.num < min_bp) { break; } @@ -149,59 +284,65 @@ ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { arrayslice_next(slice, NULL); arena_ensure_capacity( arena, - sizeof(ASTNode), - alignof(ASTNode)); - ASTNode *new_node = arena_unwrap_pointer( + sizeof(Node), + alignof(Node)); + Node *new_node = arena_unwrap_pointer( arena_alloc( arena, - sizeof(ASTNode), - alignof(ASTNode) + sizeof(Node), + alignof(Node) ) ); - *new_node = operator; + *new_node = operator_node; - new_node->data.unary.val = left_side; + new_node->unary.to = left_side; left_side = new_node; continue; } - // check if it has infix or not, if not then error - uint8_t rbp = infix_rbp(operator); - uint8_t lbp = infix_lbp(operator); - - if (rbp != 255 && lbp != 255) { + ParserU8Result rbp_result = infix_rbp(operator_node); + ParserU8Result lbp_result = infix_lbp(operator_node); + if (!rbp_result.is_valid || !lbp_result.is_valid) { + return (NodeResult) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; + } + if (rbp_result.is_valid && lbp_result.is_valid) { // If lbp is LESS then stop recursion, // we found the next smaller binding power // or the one with more precedence - if (lbp < min_bp) { + if (lbp_result.num < min_bp) { break; } - // If NOT, then we continue wtching ahead // for the next one but taking our current // concern that is rbp of the current operator arrayslice_next(slice, NULL); - ASTNode *right_side = parse_expr(slice, arena, rbp); + NodeResult right_side_result = parse_expr(slice, arena, rbp_result.num); + if (!right_side_result.is_valid) { + return right_side_result; + } arena_ensure_capacity( arena, - sizeof(ASTNode), - alignof(ASTNode)); - ASTNode *new_node = arena_unwrap_pointer( + sizeof(Node), + alignof(Node)); + Node *new_node = arena_unwrap_pointer( arena_alloc( arena, - sizeof(ASTNode), - alignof(ASTNode) + sizeof(Node), + alignof(Node) ) ); - *new_node = operator; + *new_node = operator_node; - new_node->data.binary.left = left_side; - new_node->data.binary.right = right_side; + new_node->binary.left = left_side; + new_node->binary.right = right_side_result.node; left_side = new_node; @@ -213,5 +354,8 @@ ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { // Final: return left side - return left_side; + return (NodeResult){ + .is_valid = true, + .node = left_side, + }; } -- 2.51.0 From efa0e3bacd16d72ef5c1340c3899084cbf603eb2 Mon Sep 17 00:00:00 2001 From: laentropia Date: Wed, 13 May 2026 12:13:07 -0600 Subject: [PATCH 05/11] refactor: evaluator incomplete. SO, i forgot to implement nud and led correctly and the parser cant tell apart from - as unary and - as binary (+ as well), i need to correct that, move Node * to TreeResult so to use NodeResult with nud and led --- include/evaluator.h | 19 ++++++++- include/parser.h | 1 + src/evaluator.c | 93 +++++++++++++++++++++++++++++++++++++-------- src/lexer.c | 3 ++ src/parser.c | 15 +++++++- 5 files changed, 111 insertions(+), 20 deletions(-) diff --git a/include/evaluator.h b/include/evaluator.h index 7b34e2b..5d5b4fe 100644 --- a/include/evaluator.h +++ b/include/evaluator.h @@ -5,7 +5,22 @@ #include "parser.h" #include -int64_t evaluate(ParseResult context); -int64_t evaluate_tree(ASTNode *tree); +typedef enum { + EVALUATOR_OK, + EVALUATOR_MATH_ERR, + EVALUATOR_INVALID_PARSING, + EVALUATOR_INVALID_TREE, // just to shut up the compiler with the swithces +} EvaluatorErr; + +typedef struct { + bool is_valid; + union { + int64_t val; + EvaluatorErr err; + }; +} EvaluatorResult; + +EvaluatorResult evaluate(ParserResult context); +EvaluatorResult evaluate_tree(Node *tree); #endif // !EVALUATOR_H diff --git a/include/parser.h b/include/parser.h index 0937d70..0139fca 100644 --- a/include/parser.h +++ b/include/parser.h @@ -37,6 +37,7 @@ typedef enum { PARSER_MISSING_OPERAND, PARSER_UNMATCHED_PAREN, PARSER_OUT_OF_MEMORY, + PARSER_INVALID_TOKENIZE, } ParserErr; typedef struct { diff --git a/src/evaluator.c b/src/evaluator.c index 551f779..9a8adaf 100644 --- a/src/evaluator.c +++ b/src/evaluator.c @@ -2,36 +2,97 @@ #include "arena.h" #include "lexer.h" #include "parser.h" +#include #include #include -int64_t evaluate_tree(ASTNode *tree) { +EvaluatorResult evaluate_tree(Node *tree) { if (tree->type == NODE_BINARY_OP) { - Operator op = tree->data.binary.op; - ASTNode *left = tree->data.binary.left; - ASTNode *right = tree->data.binary.right; + Operator op = tree->binary.op; + Node *left = tree->binary.left; + Node *right = tree->binary.right; switch (op) { - case OP_ADD: - return evaluate_tree(left) + evaluate_tree(right); - case OP_SUB: - return evaluate_tree(left) - evaluate_tree(right); - case OP_MUL: - return evaluate_tree(left) * evaluate_tree(right); - case OP_DIV: - return evaluate_tree(left) / evaluate_tree(right); - case OP_POW: - return pow(evaluate_tree(left), evaluate_tree(right)); + case OP_ADD: { + EvaluatorResult left_result = evaluate_tree(left); + EvaluatorResult right_result = evaluate_tree(right); + if (!left_result.is_valid || !right_result.is_valid) { + return left_result; + } + return (EvaluatorResult) { + .is_valid = true, + .val = left_result.val + right_result.val, + }; + } + case OP_SUB: { + EvaluatorResult left_result = evaluate_tree(left); + EvaluatorResult right_result = evaluate_tree(right); + if (!left_result.is_valid || !right_result.is_valid) { + return left_result; + } + return (EvaluatorResult) { + .is_valid = true, + .val = left_result.val - right_result.val, + }; + } + case OP_MUL: { + EvaluatorResult left_result = evaluate_tree(left); + EvaluatorResult right_result = evaluate_tree(right); + if (!left_result.is_valid || !right_result.is_valid) { + return left_result; + } + return (EvaluatorResult) { + .is_valid = true, + .val = left_result.val * right_result.val, + }; + } + case OP_DIV: { + EvaluatorResult left_result = evaluate_tree(left); + EvaluatorResult right_result = evaluate_tree(right); + if (!left_result.is_valid || !right_result.is_valid) { + return left_result; + } + return (EvaluatorResult) { + .is_valid = true, + .val = left_result.val / right_result.val, + }; + } + case OP_POW: { + EvaluatorResult left_result = evaluate_tree(left); + EvaluatorResult right_result = evaluate_tree(right); + if (!left_result.is_valid || !right_result.is_valid) { + return left_result; + } + return (EvaluatorResult) { + .is_valid = true, + .val = pow(left_result.val, right_result.val), + }; + } + default: + return (EvaluatorResult) { + .is_valid = false, + .err = EVALUATOR_INVALID_TREE, + }; } + } else if (tree->type == NODE_UNARY_OP) { + } int64_t return_val = tree->data.integer; return return_val; } -int64_t evaluate(ParseResult context) { - int64_t result = evaluate_tree(context.tree); +EvaluatorResult evaluate(ParseResult context) { + if (!context.is_valid) { + return (EvaluatorResult) { + .is_valid = false, + .err = EVALUATOR_INVALID_PARSING, + }; + } + + EvaluatorResult result = evaluate_tree(context.tree); + arena_destroy(&context.arena); return result; diff --git a/src/lexer.c b/src/lexer.c index 67655a2..d71d5ea 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -27,6 +27,7 @@ TokenizeResult tokenize(const char *input) { TokenResult result = tokenize_number(input, &offset); if (!result.is_valid) { + arraylist_destroy(&arr); return (TokenizeResult) {.is_valid = false, .err = result.err}; } @@ -41,6 +42,7 @@ TokenizeResult tokenize(const char *input) { } else if (isspace(input[offset])) { // Nothing... } else { + arraylist_destroy(&arr); return (TokenizeResult) { .is_valid = false, .err = LEXER_NOT_RECOGNIZED_SYMBOL}; @@ -50,6 +52,7 @@ TokenizeResult tokenize(const char *input) { } if (arraylist_size(arr) < 1) { + arraylist_destroy(&arr); return (TokenizeResult) {.is_valid = false, .err = LEXER_EMPTY_INPUT}; } diff --git a/src/parser.c b/src/parser.c index f110657..e112394 100644 --- a/src/parser.c +++ b/src/parser.c @@ -120,6 +120,8 @@ ParserU8Result infix_rbp(Node node) { } } + + Node token_to_node(Token token) { if (token.type == TOKEN_INTEGER) { return (Node) { @@ -151,7 +153,7 @@ Node token_to_node(Token token) { }; case OP_POW: return (Node) { - .type = NODE_UNARY_OP, + .type = NODE_BINARY_OP, .binary.op = token.op, }; case OP_FACTORIAL: @@ -173,17 +175,27 @@ Node token_to_node(Token token) { } ParserResult parse(TokenizeResult tokens) { + if (!tokens.is_valid) { + return (ParserResult) { + .is_valid = false, + .err = PARSER_INVALID_TOKENIZE, + }; + } + ArraySlice *context = arraylist_slice(tokens.arr, 0, arraylist_size(tokens.arr)); Arena arena = arena_init(sizeof(Node) * arraylist_size(tokens.arr)).arena; NodeResult result = parse_expr(context, &arena, 0); if (!result.is_valid) { + arena_destroy(&arena); + arraylist_destroy(&tokens.arr); return (ParserResult) { .is_valid = false, .err = result.err, }; } + arraylist_destroy(&tokens.arr); return (ParserResult) { .is_valid = true, .arena = arena, @@ -352,7 +364,6 @@ NodeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { break; } - // Final: return left side return (NodeResult){ .is_valid = true, -- 2.51.0 From 90c426f3a44e45fc4cb76642c57507a5b3bb53d7 Mon Sep 17 00:00:00 2001 From: laentropia Date: Wed, 13 May 2026 12:19:17 -0600 Subject: [PATCH 06/11] refactor: moved NodeResult to TreeResult --- include/parser.h | 10 +++++++++- src/parser.c | 22 +++++++++++----------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/include/parser.h b/include/parser.h index 0139fca..e017e18 100644 --- a/include/parser.h +++ b/include/parser.h @@ -57,6 +57,14 @@ typedef struct { ParserErr err; Node *node; }; +} TreeResult; + +typedef struct { + bool is_valid; + union { + ParserErr err; + Node node; + }; } NodeResult; typedef struct { @@ -75,6 +83,6 @@ ParserU8Result infix_lbp(Node node); ParserU8Result infix_rbp(Node node); ParserResult parse(TokenizeResult tokens); -NodeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp); +TreeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp); #endif // !PARSER_H diff --git a/src/parser.c b/src/parser.c index e112394..08f8c0a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -185,7 +185,7 @@ ParserResult parse(TokenizeResult tokens) { ArraySlice *context = arraylist_slice(tokens.arr, 0, arraylist_size(tokens.arr)); Arena arena = arena_init(sizeof(Node) * arraylist_size(tokens.arr)).arena; - NodeResult result = parse_expr(context, &arena, 0); + TreeResult result = parse_expr(context, &arena, 0); if (!result.is_valid) { arena_destroy(&arena); arraylist_destroy(&tokens.arr); @@ -202,7 +202,7 @@ ParserResult parse(TokenizeResult tokens) { .tree = result.node}; } -NodeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { +TreeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { arena_ensure_capacity( arena, sizeof(Node), @@ -225,7 +225,7 @@ NodeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { if (left_side->type == NODE_PARENTHESIS && left_side->par == OP_START_PAR) { - NodeResult result = parse_expr(slice, arena, 0); + TreeResult result = parse_expr(slice, arena, 0); if (!result.is_valid) { return result; @@ -241,12 +241,12 @@ NodeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { if (end_par.type != TOKEN_OPERATOR || end_par.op != OP_END_PAR) { - return (NodeResult) { + return (TreeResult) { .is_valid = false, .err = PARSER_UNMATCHED_PAREN, }; } - return (NodeResult) { + return (TreeResult) { .is_valid = true, .node = left_side, }; @@ -256,13 +256,13 @@ NodeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { if (left_side->type == NODE_UNARY_OP) { ParserU8Result rbp_result = prefix_rbp(*left_side); if (!rbp_result.is_valid) { - return (NodeResult) { + return (TreeResult) { .is_valid = false, .err = rbp_result.err, }; } - NodeResult righ_side_result = parse_expr(slice, arena, rbp_result.num); + TreeResult righ_side_result = parse_expr(slice, arena, rbp_result.num); if (!righ_side_result.is_valid) { return righ_side_result; } @@ -278,7 +278,7 @@ NodeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { Token operator_token; arrayslice_peek(slice, &operator_token); if (operator_token.type != TOKEN_OPERATOR) { - return (NodeResult) { + return (TreeResult) { .is_valid = false, .err = PARSER_MISSING_OPERAND, }; @@ -317,7 +317,7 @@ NodeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { ParserU8Result rbp_result = infix_rbp(operator_node); ParserU8Result lbp_result = infix_lbp(operator_node); if (!rbp_result.is_valid || !lbp_result.is_valid) { - return (NodeResult) { + return (TreeResult) { .is_valid = false, .err = PARSER_UNEXPECTED_TOKEN, }; @@ -335,7 +335,7 @@ NodeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { // for the next one but taking our current // concern that is rbp of the current operator arrayslice_next(slice, NULL); - NodeResult right_side_result = parse_expr(slice, arena, rbp_result.num); + TreeResult right_side_result = parse_expr(slice, arena, rbp_result.num); if (!right_side_result.is_valid) { return right_side_result; } @@ -365,7 +365,7 @@ NodeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { } // Final: return left side - return (NodeResult){ + return (TreeResult){ .is_valid = true, .node = left_side, }; -- 2.51.0 From 70ab06964c2afa6cbfc84e73b627991f6f9d620d Mon Sep 17 00:00:00 2001 From: laentropia Date: Wed, 13 May 2026 16:09:03 -0600 Subject: [PATCH 07/11] addtition: nud and led token to node distinction I think i should instead have a nud and led function i guess, may do that next --- include/parser.h | 2 + src/parser.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/include/parser.h b/include/parser.h index e017e18..39dd1eb 100644 --- a/include/parser.h +++ b/include/parser.h @@ -76,6 +76,8 @@ typedef struct { } ParserU8Result; Node token_to_node(Token token); +NodeResult nud(Token token); // Null denotation +NodeResult led(Token token); // Left denotation ParserU8Result prefix_rbp(Node node); ParserU8Result postfix_lbp(Node node); diff --git a/src/parser.c b/src/parser.c index 08f8c0a..7fd95a8 100644 --- a/src/parser.c +++ b/src/parser.c @@ -119,8 +119,114 @@ ParserU8Result infix_rbp(Node node) { }; } } +NodeResult led(Token token) { + if (token.type == TOKEN_INTEGER) { + return (NodeResult) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; + } + switch (token.op) { + case OP_ADD: + return (NodeResult) { + .is_valid = true, + .node = (Node) { + .type = NODE_BINARY_OP, + .binary.op = token.op, + } + }; + case OP_SUB: + return (NodeResult) { + .is_valid = true, + .node = (Node) { + .type = NODE_BINARY_OP, + .binary.op = token.op, + } + }; + case OP_MUL: + return (NodeResult) { + .is_valid = true, + .node = (Node) { + .type = NODE_BINARY_OP, + .binary.op = token.op, + } + }; + case OP_DIV: + return (NodeResult) { + .is_valid = true, + .node = (Node) { + .type = NODE_BINARY_OP, + .binary.op = token.op, + } + }; + case OP_POW: + return (NodeResult) { + .is_valid = true, + .node = (Node) { + .type = NODE_BINARY_OP, + .binary.op = token.op, + } + }; + case OP_FACTORIAL: + return (NodeResult) { + .is_valid = true, + .node = (Node) { + .type = NODE_UNARY_OP, + .unary.op = token.op, + } + }; + default: + return (NodeResult) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; + } +} +NodeResult nud(Token token) { + if (token.type == TOKEN_INTEGER) { + return (NodeResult) { + .is_valid = true, + .node = (Node) { + .type = NODE_INT, + .num = token.num, + } + }; + } + + switch (token.op) { + case OP_START_PAR: + return (NodeResult) { + .is_valid = true, + .node = (Node) { + .type = NODE_PARENTHESIS, + .par = token.op, + } + }; + case OP_SUB: + return (NodeResult) { + .is_valid = true, + .node = (Node) { + .type = NODE_UNARY_OP, + .unary.op = token.op, + } + }; + case OP_ADD: + return (NodeResult) { + .is_valid = true, + .node = (Node) { + .type = NODE_UNARY_OP, + .unary.op = token.op, + } + }; + default: + return (NodeResult) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; + } +} Node token_to_node(Token token) { if (token.type == TOKEN_INTEGER) { -- 2.51.0 From 6294121e91fbc01f8980e3bd6e137f902ee6574a Mon Sep 17 00:00:00 2001 From: laentropia Date: Wed, 13 May 2026 17:35:52 -0600 Subject: [PATCH 08/11] refactor: nud and led have differetn responasblires So now nud and led do what they were supposed to do i guess, now i thinks is just adjusting infix and postfix and all bd funcions to act on operator instead. --- include/parser.h | 5 +- src/parser.c | 492 ++++++++++++++++++++--------------------------- 2 files changed, 209 insertions(+), 288 deletions(-) diff --git a/include/parser.h b/include/parser.h index 39dd1eb..3a9f08c 100644 --- a/include/parser.h +++ b/include/parser.h @@ -38,6 +38,7 @@ typedef enum { PARSER_UNMATCHED_PAREN, PARSER_OUT_OF_MEMORY, PARSER_INVALID_TOKENIZE, + PARSER_UNEXMECTED_EOF, } ParserErr; typedef struct { @@ -76,8 +77,8 @@ typedef struct { } ParserU8Result; Node token_to_node(Token token); -NodeResult nud(Token token); // Null denotation -NodeResult led(Token token); // Left denotation +TreeResult nud(ArraySlice *slice, Arena *arena, Token token); // Null denotation +TreeResult led(ArraySlice *slice, Arena *arena, Node *left, Token token); // Left denotation ParserU8Result prefix_rbp(Node node); ParserU8Result postfix_lbp(Node node); diff --git a/src/parser.c b/src/parser.c index 7fd95a8..0943782 100644 --- a/src/parser.c +++ b/src/parser.c @@ -119,167 +119,176 @@ ParserU8Result infix_rbp(Node node) { }; } } -NodeResult led(Token token) { - if (token.type == TOKEN_INTEGER) { - return (NodeResult) { - .is_valid = false, - .err = PARSER_UNEXPECTED_TOKEN, - }; - } + +TreeResult led( + ArraySlice *slice, + Arena *arena, + Node *left, + Token token +) { + arena_ensure_capacity( + arena, + sizeof(Node), + alignof(Node) + ); + + Node *node = arena_unwrap_pointer( + arena_alloc( + arena, + sizeof(Node), + alignof(Node) + ) + ); switch (token.op) { - case OP_ADD: - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - } - }; - case OP_SUB: - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - } - }; - case OP_MUL: - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - } - }; - case OP_DIV: - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - } - }; - case OP_POW: - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - } - }; - case OP_FACTORIAL: - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_UNARY_OP, - .unary.op = token.op, - } - }; - default: - return (NodeResult) { - .is_valid = false, - .err = PARSER_UNEXPECTED_TOKEN, - }; - } -} -NodeResult nud(Token token) { - if (token.type == TOKEN_INTEGER) { - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_INT, - .num = token.num, + // Binary operators + case OP_ADD: + case OP_SUB: + case OP_MUL: + case OP_DIV: + case OP_POW: { + node->type = NODE_BINARY_OP; + node->binary.op = token.op; + + ParserU8Result rbp_result = infix_rbp(*node); + if (!rbp_result.is_valid) { + return (TreeResult) { + .is_valid = false, + .err = rbp_result.err, + }; } - }; - } - switch (token.op) { - case OP_START_PAR: - return (NodeResult) { + TreeResult right = parse_expr( + slice, + arena, + rbp_result.num + ); + + if (!right.is_valid) { + return right; + } + + node->binary.left = left; + node->binary.right = right.node; + + return (TreeResult) { .is_valid = true, - .node = (Node) { - .type = NODE_PARENTHESIS, - .par = token.op, - } + .node = node, }; - case OP_SUB: - return (NodeResult) { + } + + // Postfix operators + case OP_FACTORIAL: { + node->type = NODE_UNARY_OP; + node->unary.op = token.op; + node->unary.to = left; + + return (TreeResult) { .is_valid = true, - .node = (Node) { - .type = NODE_UNARY_OP, - .unary.op = token.op, - } - }; - case OP_ADD: - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_UNARY_OP, - .unary.op = token.op, - } + .node = node, }; + } + default: - return (NodeResult) { + return (TreeResult) { .is_valid = false, .err = PARSER_UNEXPECTED_TOKEN, }; } } -Node token_to_node(Token token) { +TreeResult nud(ArraySlice *slice, Arena *arena, Token token) { + arena_ensure_capacity( + arena, + sizeof(Node), + alignof(Node) + ); + + Node *node = arena_unwrap_pointer( + arena_alloc( + arena, + sizeof(Node), + alignof(Node) + ) + ); + if (token.type == TOKEN_INTEGER) { - return (Node) { - .type = NODE_INT, - .num = token.num, + node->type = NODE_INT; + node->num = token.num; + + return (TreeResult) { + .is_valid = true, + .node = node, }; } switch (token.op) { + case OP_START_PAR: { + TreeResult expr = parse_expr(slice, arena, 0); + if (!expr.is_valid) { + return expr; + } + + Token end_par; + if (arrayslice_next(slice, &end_par) != ARRLIST_OK) { + return (TreeResult) { + .is_valid = false, + .err = PARSER_UNMATCHED_PAREN, + }; + } + + if (end_par.type != TOKEN_OPERATOR || + end_par.op != OP_END_PAR) { + return (TreeResult) { + .is_valid = false, + .err = PARSER_UNMATCHED_PAREN, + }; + } + + return expr; + } case OP_ADD: - return (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - }; - case OP_SUB: - return (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - }; - case OP_MUL: - return (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - }; - case OP_DIV: - return (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - }; - case OP_POW: - return (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - }; - case OP_FACTORIAL: - return (Node) { - .type = NODE_UNARY_OP, - .binary.op = token.op, - }; - case OP_START_PAR: - return (Node) { - .type = NODE_PARENTHESIS, - .binary.op = token.op, - }; - case OP_END_PAR: - return (Node) { - .type = NODE_PARENTHESIS, - .binary.op = token.op, + + case OP_SUB: { + node->type = NODE_UNARY_OP; + node->unary.op = token.op; + + ParserU8Result rbp_result = prefix_rbp(*node); + if (!rbp_result.is_valid) { + return (TreeResult) { + .is_valid = false, + .err = rbp_result.err, + }; + } + + TreeResult right = parse_expr( + slice, + arena, + rbp_result.num + ); + + if (!right.is_valid) { + return right; + } + + node->unary.to = right.node; + + return (TreeResult) { + .is_valid = true, + .node = node, }; + } + default: + return (TreeResult) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } } + + ParserResult parse(TokenizeResult tokens) { if (!tokens.is_valid) { return (ParserResult) { @@ -309,165 +318,76 @@ ParserResult parse(TokenizeResult tokens) { } TreeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { - arena_ensure_capacity( - arena, - sizeof(Node), - alignof(Node) - ); // shouldn't fail but if it does then what a shame - - // Get pointer in the arena - Node *left_side = arena_unwrap_pointer( - arena_alloc( - arena, - sizeof(Node), - alignof(Node) - ) - ); - - // convert token to node :) Token current_token; - arrayslice_next(slice, ¤t_token); - *left_side = token_to_node(current_token); - if (left_side->type == NODE_PARENTHESIS && - left_side->par == OP_START_PAR) { - TreeResult result = parse_expr(slice, arena, 0); + if (arrayslice_next(slice, ¤t_token) != ARRLIST_OK) { + return (TreeResult) { + .is_valid = false, + .err = PARSER_UNEXMECTED_EOF, + }; + } + + TreeResult left_result = nud(slice, arena, current_token); + + if (!left_result.is_valid) { + return left_result; + } + + Node *left_side = left_result.node; + + while (arrayslice_is_valid(slice)) { + Token operator_token; + arrayslice_peek(slice, &operator_token); + + if (operator_token.type != TOKEN_OPERATOR) { + break; + } + + Node operator_node = { + .type = NODE_BINARY_OP, + .binary.op = operator_token.op, + }; + + ParserU8Result postfix_lbp_result = postfix_lbp(operator_node); + + if (postfix_lbp_result.is_valid) { + if (postfix_lbp_result.num < min_bp) { + break; + } + + arrayslice_next(slice, NULL); + + TreeResult result = led(slice, arena, left_side, operator_token); + + if (!result.is_valid) { + return result; + } + + left_side = result.node; + + continue; + } + + // Path for infix basically + ParserU8Result lbp_result = infix_lbp(operator_node); + + if (!lbp_result.is_valid) { + break; + } + + if (lbp_result.num < min_bp) { + break; + } + + arrayslice_next(slice, NULL); + + TreeResult result = led(slice, arena, left_side, operator_token); if (!result.is_valid) { return result; } left_side = result.node; - - // We dont really need to convert to node - // parenthesis are there just to change up - // the bp - Token end_par; - arrayslice_next(slice, &end_par); - - if (end_par.type != TOKEN_OPERATOR || - end_par.op != OP_END_PAR) { - return (TreeResult) { - .is_valid = false, - .err = PARSER_UNMATCHED_PAREN, - }; - } - return (TreeResult) { - .is_valid = true, - .node = left_side, - }; - } - - // If prefix - if (left_side->type == NODE_UNARY_OP) { - ParserU8Result rbp_result = prefix_rbp(*left_side); - if (!rbp_result.is_valid) { - return (TreeResult) { - .is_valid = false, - .err = rbp_result.err, - }; - } - - TreeResult righ_side_result = parse_expr(slice, arena, rbp_result.num); - if (!righ_side_result.is_valid) { - return righ_side_result; - } - - left_side->unary.to = righ_side_result.node; - } - - while (true) { - if (!arrayslice_is_valid(slice)) { - break; - } - - Token operator_token; - arrayslice_peek(slice, &operator_token); - if (operator_token.type != TOKEN_OPERATOR) { - return (TreeResult) { - .is_valid = false, - .err = PARSER_MISSING_OPERAND, - }; - } - Node operator_node = token_to_node(operator_token); - - // temporary for bad error handling - ParserU8Result postfix_lbp_result = postfix_lbp(operator_node); - if (postfix_lbp_result.is_valid) { - if (postfix_lbp_result.num < min_bp) { - break; - } - - // allocate operator - arrayslice_next(slice, NULL); - arena_ensure_capacity( - arena, - sizeof(Node), - alignof(Node)); - Node *new_node = arena_unwrap_pointer( - arena_alloc( - arena, - sizeof(Node), - alignof(Node) - ) - ); - *new_node = operator_node; - - - new_node->unary.to = left_side; - - left_side = new_node; - continue; - } - - ParserU8Result rbp_result = infix_rbp(operator_node); - ParserU8Result lbp_result = infix_lbp(operator_node); - if (!rbp_result.is_valid || !lbp_result.is_valid) { - return (TreeResult) { - .is_valid = false, - .err = PARSER_UNEXPECTED_TOKEN, - }; - } - - if (rbp_result.is_valid && lbp_result.is_valid) { - // If lbp is LESS then stop recursion, - // we found the next smaller binding power - // or the one with more precedence - if (lbp_result.num < min_bp) { - break; - } - - // If NOT, then we continue wtching ahead - // for the next one but taking our current - // concern that is rbp of the current operator - arrayslice_next(slice, NULL); - TreeResult right_side_result = parse_expr(slice, arena, rbp_result.num); - if (!right_side_result.is_valid) { - return right_side_result; - } - - arena_ensure_capacity( - arena, - sizeof(Node), - alignof(Node)); - Node *new_node = arena_unwrap_pointer( - arena_alloc( - arena, - sizeof(Node), - alignof(Node) - ) - ); - *new_node = operator_node; - - new_node->binary.left = left_side; - new_node->binary.right = right_side_result.node; - - left_side = new_node; - - continue; - } - - break; } // Final: return left side -- 2.51.0 From b56a368244121c8ec12060dcf7b9a0d4fe870d7d Mon Sep 17 00:00:00 2001 From: laentropia Date: Wed, 13 May 2026 17:48:03 -0600 Subject: [PATCH 09/11] refactor: bp funtions take tokens now necessary for cleannes --- include/parser.h | 11 +++++------ src/parser.c | 42 ++++++++++++++++++------------------------ 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/include/parser.h b/include/parser.h index 3a9f08c..040ed6f 100644 --- a/include/parser.h +++ b/include/parser.h @@ -38,7 +38,7 @@ typedef enum { PARSER_UNMATCHED_PAREN, PARSER_OUT_OF_MEMORY, PARSER_INVALID_TOKENIZE, - PARSER_UNEXMECTED_EOF, + PARSER_UNEXPECTED_EOF, } ParserErr; typedef struct { @@ -76,14 +76,13 @@ typedef struct { }; } ParserU8Result; -Node token_to_node(Token token); TreeResult nud(ArraySlice *slice, Arena *arena, Token token); // Null denotation TreeResult led(ArraySlice *slice, Arena *arena, Node *left, Token token); // Left denotation -ParserU8Result prefix_rbp(Node node); -ParserU8Result postfix_lbp(Node node); -ParserU8Result infix_lbp(Node node); -ParserU8Result infix_rbp(Node node); +ParserU8Result prefix_rbp(Token token); +ParserU8Result postfix_lbp(Token token); +ParserU8Result infix_lbp(Token token); +ParserU8Result infix_rbp(Token token); ParserResult parse(TokenizeResult tokens); TreeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp); diff --git a/src/parser.c b/src/parser.c index 0943782..acf8937 100644 --- a/src/parser.c +++ b/src/parser.c @@ -7,15 +7,14 @@ #include #include -ParserU8Result prefix_rbp(Node node) { - if (node.type != NODE_UNARY_OP) { +ParserU8Result prefix_rbp(Token token) { + if (token.type == TOKEN_INTEGER) { return (ParserU8Result) { .is_valid = false, - .err = PARSER_UNEXPECTED_TOKEN + .err = PARSER_UNEXPECTED_TOKEN, }; } - - switch (node.unary.op) { + switch (token.op) { case OP_SUB: case OP_ADD: return (ParserU8Result) { @@ -30,15 +29,15 @@ ParserU8Result prefix_rbp(Node node) { } } -ParserU8Result postfix_lbp(Node node) { - if (node.type != NODE_UNARY_OP) { +ParserU8Result postfix_lbp(Token token) { + if (token.type != TOKEN_INTEGER) { return (ParserU8Result) { .is_valid = false, .err = PARSER_UNEXPECTED_TOKEN, }; } - switch (node.unary.op) { + switch (token.op) { case OP_FACTORIAL: return (ParserU8Result) { .is_valid = true, @@ -52,15 +51,15 @@ ParserU8Result postfix_lbp(Node node) { } } -ParserU8Result infix_lbp(Node node) { - if (node.type != NODE_BINARY_OP) { +ParserU8Result infix_lbp(Token token) { + if (token.type != TOKEN_INTEGER) { return (ParserU8Result) { .is_valid = false, .err = PARSER_UNEXPECTED_TOKEN, }; } - switch (node.binary.op) { + switch (token.op) { case OP_ADD: case OP_SUB: return (ParserU8Result) { @@ -86,15 +85,15 @@ ParserU8Result infix_lbp(Node node) { } } -ParserU8Result infix_rbp(Node node) { - if (node.type != NODE_BINARY_OP) { +ParserU8Result infix_rbp(Token token) { + if (token.type != TOKEN_INTEGER) { return (ParserU8Result) { .is_valid = false, .err = PARSER_UNEXPECTED_TOKEN, }; } - switch (node.binary.op) { + switch (token.op) { case OP_ADD: case OP_SUB: return (ParserU8Result) { @@ -151,7 +150,7 @@ TreeResult led( node->type = NODE_BINARY_OP; node->binary.op = token.op; - ParserU8Result rbp_result = infix_rbp(*node); + ParserU8Result rbp_result = infix_rbp(token); if (!rbp_result.is_valid) { return (TreeResult) { .is_valid = false, @@ -254,7 +253,7 @@ TreeResult nud(ArraySlice *slice, Arena *arena, Token token) { node->type = NODE_UNARY_OP; node->unary.op = token.op; - ParserU8Result rbp_result = prefix_rbp(*node); + ParserU8Result rbp_result = prefix_rbp(token); if (!rbp_result.is_valid) { return (TreeResult) { .is_valid = false, @@ -323,7 +322,7 @@ TreeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { if (arrayslice_next(slice, ¤t_token) != ARRLIST_OK) { return (TreeResult) { .is_valid = false, - .err = PARSER_UNEXMECTED_EOF, + .err = PARSER_UNEXPECTED_EOF, }; } @@ -343,12 +342,7 @@ TreeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { break; } - Node operator_node = { - .type = NODE_BINARY_OP, - .binary.op = operator_token.op, - }; - - ParserU8Result postfix_lbp_result = postfix_lbp(operator_node); + ParserU8Result postfix_lbp_result = postfix_lbp(operator_token); if (postfix_lbp_result.is_valid) { if (postfix_lbp_result.num < min_bp) { @@ -369,7 +363,7 @@ TreeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { } // Path for infix basically - ParserU8Result lbp_result = infix_lbp(operator_node); + ParserU8Result lbp_result = infix_lbp(operator_token); if (!lbp_result.is_valid) { break; -- 2.51.0 From 3ec73559eed49ca54205854b659ff1bf90758bb1 Mon Sep 17 00:00:00 2001 From: laentropia Date: Wed, 13 May 2026 18:06:01 -0600 Subject: [PATCH 10/11] refactor: evaluator separated into evaluate bin and un Is nicer this way, also made it more beautiful to look at and therefor to understand. --- include/evaluator.h | 4 +- include/parser.h | 1 - src/evaluator.c | 152 ++++++++++++++++++++++++-------------------- 3 files changed, 85 insertions(+), 72 deletions(-) diff --git a/include/evaluator.h b/include/evaluator.h index 5d5b4fe..0e63307 100644 --- a/include/evaluator.h +++ b/include/evaluator.h @@ -1,7 +1,6 @@ #ifndef EVALUATOR_H #define EVALUATOR_H -#include "lexer.h" #include "parser.h" #include @@ -20,6 +19,9 @@ typedef struct { }; } EvaluatorResult; +EvaluatorResult evaluate_binary(Node *tree); +EvaluatorResult evaluate_unary(Node *tree); + EvaluatorResult evaluate(ParserResult context); EvaluatorResult evaluate_tree(Node *tree); diff --git a/include/parser.h b/include/parser.h index 040ed6f..3a6b197 100644 --- a/include/parser.h +++ b/include/parser.h @@ -11,7 +11,6 @@ typedef enum { NODE_INT, NODE_BINARY_OP, NODE_UNARY_OP, - NODE_PARENTHESIS, } NodeType; typedef struct Node { diff --git a/src/evaluator.c b/src/evaluator.c index 9a8adaf..2c2884c 100644 --- a/src/evaluator.c +++ b/src/evaluator.c @@ -9,81 +9,93 @@ EvaluatorResult evaluate_tree(Node *tree) { if (tree->type == NODE_BINARY_OP) { - Operator op = tree->binary.op; - Node *left = tree->binary.left; - Node *right = tree->binary.right; - - switch (op) { - case OP_ADD: { - EvaluatorResult left_result = evaluate_tree(left); - EvaluatorResult right_result = evaluate_tree(right); - if (!left_result.is_valid || !right_result.is_valid) { - return left_result; - } - return (EvaluatorResult) { - .is_valid = true, - .val = left_result.val + right_result.val, - }; - } - case OP_SUB: { - EvaluatorResult left_result = evaluate_tree(left); - EvaluatorResult right_result = evaluate_tree(right); - if (!left_result.is_valid || !right_result.is_valid) { - return left_result; - } - return (EvaluatorResult) { - .is_valid = true, - .val = left_result.val - right_result.val, - }; - } - case OP_MUL: { - EvaluatorResult left_result = evaluate_tree(left); - EvaluatorResult right_result = evaluate_tree(right); - if (!left_result.is_valid || !right_result.is_valid) { - return left_result; - } - return (EvaluatorResult) { - .is_valid = true, - .val = left_result.val * right_result.val, - }; - } - case OP_DIV: { - EvaluatorResult left_result = evaluate_tree(left); - EvaluatorResult right_result = evaluate_tree(right); - if (!left_result.is_valid || !right_result.is_valid) { - return left_result; - } - return (EvaluatorResult) { - .is_valid = true, - .val = left_result.val / right_result.val, - }; - } - case OP_POW: { - EvaluatorResult left_result = evaluate_tree(left); - EvaluatorResult right_result = evaluate_tree(right); - if (!left_result.is_valid || !right_result.is_valid) { - return left_result; - } - return (EvaluatorResult) { - .is_valid = true, - .val = pow(left_result.val, right_result.val), - }; - } - default: - return (EvaluatorResult) { - .is_valid = false, - .err = EVALUATOR_INVALID_TREE, - }; - } + return evaluate_binary(tree); } else if (tree->type == NODE_UNARY_OP) { - + return evaluate_unary(tree); } - int64_t return_val = tree->data.integer; - return return_val; + return (EvaluatorResult) { + .is_valid = true, + .val = tree->num, + }; } -EvaluatorResult evaluate(ParseResult context) { +EvaluatorResult evaluate_binary(Node *tree) { + Operator op = tree->binary.op; + Node *left = tree->binary.left; + Node *right = tree->binary.right; + + EvaluatorResult left_result = evaluate_tree(left); + EvaluatorResult right_result = evaluate_tree(right); + if (!left_result.is_valid || !right_result.is_valid) { + return left_result; + } + + switch (op) { + case OP_ADD: + return (EvaluatorResult) { + .is_valid = true, + .val = left_result.val + right_result.val, + }; + case OP_SUB: + return (EvaluatorResult) { + .is_valid = true, + .val = left_result.val - right_result.val, + }; + case OP_MUL: + return (EvaluatorResult) { + .is_valid = true, + .val = left_result.val * right_result.val, + }; + case OP_DIV: + return (EvaluatorResult) { + .is_valid = true, + .val = left_result.val / right_result.val, + }; + case OP_POW: + return (EvaluatorResult) { + .is_valid = true, + .val = pow(left_result.val, right_result.val), + }; + default: + return (EvaluatorResult) { + .is_valid = false, + .err = EVALUATOR_INVALID_TREE, + }; + } +} + +EvaluatorResult evaluate_unary(Node *tree) { + Operator op = tree->unary.op; + Node *to = tree->unary.to; + + EvaluatorResult result = evaluate_tree(to); + if (!result.is_valid) { + return result; + } + + switch (op) { + case OP_ADD: + return result; + case OP_SUB: + return (EvaluatorResult) { + .is_valid = true, + .val = -result.val, + }; + case OP_FACTORIAL: + return (EvaluatorResult) { + .is_valid = true, + .val = tgamma(result.val + 1), + }; + default: + return (EvaluatorResult) { + .is_valid = false, + .err = EVALUATOR_INVALID_TREE, + }; + } +} + +EvaluatorResult evaluate(ParserResult context) { if (!context.is_valid) { return (EvaluatorResult) { .is_valid = false, -- 2.51.0 From ab791dbc9b1345a757a6f77840b3b951b6ea9bcb Mon Sep 17 00:00:00 2001 From: laentropia Date: Wed, 13 May 2026 18:48:14 -0600 Subject: [PATCH 11/11] fix: tests and main --- CMakeLists.txt | 1 + src/evaluator.c | 6 ++- src/lexer.c | 15 +++---- src/main.c | 11 +++-- src/parser.c | 6 +-- test/test_evaluator.c | 17 -------- test/test_lexer.c | 99 +------------------------------------------ test/test_parser.c | 74 +------------------------------- 8 files changed, 24 insertions(+), 205 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d300c72..3aa1911 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,7 @@ target_include_directories(calculator_lib target_link_libraries(calculator_lib PUBLIC arena PUBLIC arraylist + PRIVATE m ) add_executable(calculator src/main.c) diff --git a/src/evaluator.c b/src/evaluator.c index 2c2884c..f34aa02 100644 --- a/src/evaluator.c +++ b/src/evaluator.c @@ -27,7 +27,11 @@ EvaluatorResult evaluate_binary(Node *tree) { EvaluatorResult left_result = evaluate_tree(left); EvaluatorResult right_result = evaluate_tree(right); - if (!left_result.is_valid || !right_result.is_valid) { + if (!left_result.is_valid) { + return left_result; + } + + if (!left_result.is_valid) { return left_result; } diff --git a/src/lexer.c b/src/lexer.c index d71d5ea..23803c3 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include typedef enum { @@ -18,10 +18,7 @@ TokenizeResult tokenize(const char *input) { ArrayList *arr = arraylist_init(64, sizeof(Token)); size_t offset = 0; - while ( - input[offset] != '\n' || - input[offset] != EOF || - input[offset] != '\0') { + while (input[offset] != '\0') { if (isdigit(input[offset])) { TokenResult result = tokenize_number(input, &offset); @@ -69,14 +66,14 @@ TokenResult tokenize_number(const char *input, size_t *offset) { // read number size_t current = *offset; while (isdigit(input[current])) { - buf[buf_pos] = input[current]; - - if (buf_pos >= sizeof(buf)) { + if (buf_pos >= sizeof(buf) - 1) { return (TokenResult) { .is_valid = false, .err = LEXER_BUF_OVERFLOW}; } + buf[buf_pos] = input[current]; + current++; buf_pos++; } @@ -93,7 +90,7 @@ TokenResult tokenize_number(const char *input, size_t *offset) { new_token.num = result.num; - *offset = current; + *offset = current - 1; return (TokenResult) {.is_valid = true, .token = new_token}; } diff --git a/src/main.c b/src/main.c index 2c5827f..c12f9bb 100644 --- a/src/main.c +++ b/src/main.c @@ -18,12 +18,11 @@ int main(void) { } buf[pos] = '\0'; - TokenizeResult tokens = tokenize(buf); + EvaluatorResult result = evaluate(parse(tokenize(buf))); + if (!result.is_valid) { + puts("Error checando expresion"); + } - ParseResult par = parse(tokens); - int64_t result = evaluate(par); - - - printf("El resultado es: %" PRIi64 "\n", result); + printf("El resultado es: %" PRIi64 "\n", result.val); return EXIT_SUCCESS; } diff --git a/src/parser.c b/src/parser.c index acf8937..82d04a9 100644 --- a/src/parser.c +++ b/src/parser.c @@ -30,7 +30,7 @@ ParserU8Result prefix_rbp(Token token) { } ParserU8Result postfix_lbp(Token token) { - if (token.type != TOKEN_INTEGER) { + if (token.type != TOKEN_OPERATOR) { return (ParserU8Result) { .is_valid = false, .err = PARSER_UNEXPECTED_TOKEN, @@ -52,7 +52,7 @@ ParserU8Result postfix_lbp(Token token) { } ParserU8Result infix_lbp(Token token) { - if (token.type != TOKEN_INTEGER) { + if (token.type != TOKEN_OPERATOR) { return (ParserU8Result) { .is_valid = false, .err = PARSER_UNEXPECTED_TOKEN, @@ -86,7 +86,7 @@ ParserU8Result infix_lbp(Token token) { } ParserU8Result infix_rbp(Token token) { - if (token.type != TOKEN_INTEGER) { + if (token.type != TOKEN_OPERATOR) { return (ParserU8Result) { .is_valid = false, .err = PARSER_UNEXPECTED_TOKEN, diff --git a/test/test_evaluator.c b/test/test_evaluator.c index c97cdb2..c9dd889 100644 --- a/test/test_evaluator.c +++ b/test/test_evaluator.c @@ -1,6 +1,3 @@ -#include "lexer.h" -#include "parser.h" -#include "evaluator.h" #include #include #include @@ -9,22 +6,8 @@ #include #include -static void test_basic_evaluation(void** state) { - (void) state; - char expr[256] = "2 + 4 * 40 / 2"; - TokenizeResult tokens = tokenize(expr); - ParseResult result = parse(tokens); - int64_t value = evaluate(result); - - assert_int_equal(value, 82); -} int main(void) { - const struct CMUnitTest tests[] = { - cmocka_unit_test(test_basic_evaluation), - }; - - cmocka_run_group_tests(tests, NULL, NULL); return EXIT_SUCCESS; } diff --git a/test/test_lexer.c b/test/test_lexer.c index 364ff5b..88552bc 100644 --- a/test/test_lexer.c +++ b/test/test_lexer.c @@ -1,106 +1,11 @@ -#include "arraylist.h" -#include "lexer.h" #include #include #include #include #include #include - -static void test_tokenize_normal_expresion(void **state) { - (void) state; - - char expr[256] = "2 + 3 / 66 * 789"; - ASTNode node; - TokenizeResult tokens = tokenize(expr); - - assert_true(tokens.is_valid); - assert_int_equal(arraylist_size(tokens.arr), 7); - - arraylist_get(tokens.arr, 0, &node); - assert_int_equal(node.type, NODE_INTEGER); - assert_int_equal(node.data.integer, 2); - - arraylist_get(tokens.arr, 1, &node); - assert_int_equal(node.type, NODE_BINARY_OP); - assert_int_equal(node.data.binary.op, OP_ADD); - - arraylist_get(tokens.arr, 2, &node); - assert_int_equal(node.type, NODE_INTEGER); - assert_int_equal(node.data.integer, 3); - - arraylist_get(tokens.arr, 3, &node); - assert_int_equal(node.type, NODE_BINARY_OP); - assert_int_equal(node.data.binary.op, OP_DIV); - - arraylist_get(tokens.arr, 4, &node); - assert_int_equal(node.type, NODE_INTEGER); - assert_int_equal(node.data.integer, 66); - - arraylist_get(tokens.arr, 5, &node); - assert_int_equal(node.type, NODE_BINARY_OP); - assert_int_equal(node.data.binary.op, OP_MUL); - - arraylist_get(tokens.arr, 6, &node); - assert_int_equal(node.type, NODE_INTEGER); - assert_int_equal(node.data.integer, 789); -} - -static void test_tokenize_unrecognized_symbol(void **state) { - (void) state; - - char expr[256] = " 2 j 3 / 66 } 789"; - TokenizeResult tokens = tokenize(expr); - - assert_false(tokens.is_valid); - assert_uint_equal(tokens.err, LEXER_NOT_RECOGNIZED_SYMBOL); -} - -static void test_tokenize_wrong_sintax(void **state) { - (void) state; - - char expr[256] = "2 3 / 66 789"; - TokenizeResult tokens = tokenize(expr); - - assert_false(tokens.is_valid); - assert_uint_equal(tokens.err, LEXER_WRONG_SYNTAX); -} - -static void test_string_to_number_normal(void **state) { - (void) state; - - char num[16] = "2333t55"; - size_t offset = 0; - ASTNodeResult result = tokenize_number(num, &offset); - - assert_true(result.is_valid); - - assert_int_equal(offset, 4); // equal to t position in string - assert_int_equal(result.node.type, NODE_INTEGER); - assert_int_equal(result.node.data.integer, 2333); -} - -static void test_string_to_number_overflow(void **state) { - (void) state; - - // Number is INT64_MAX but with a extra 899 at the end - char num[32] = "92233720368547758079"; - size_t offset = 0; - ASTNodeResult result = tokenize_number(num, &offset); - assert_false(result.is_valid); - assert_uint_equal(result.err, LEXER_INT_OVERFLOW); - // Technically it can trigger a buf overflow error but obvioulsy - // it will trigger int overflow error first -} +#include int main(void) { - const struct CMUnitTest tests[] = { - cmocka_unit_test(test_string_to_number_normal), - cmocka_unit_test(test_string_to_number_overflow), - cmocka_unit_test(test_tokenize_normal_expresion), - cmocka_unit_test(test_tokenize_unrecognized_symbol), - cmocka_unit_test(test_tokenize_wrong_sintax), - }; - - return cmocka_run_group_tests(tests, NULL, NULL); + return EXIT_SUCCESS; } diff --git a/test/test_parser.c b/test/test_parser.c index c147030..88552bc 100644 --- a/test/test_parser.c +++ b/test/test_parser.c @@ -1,81 +1,11 @@ -#include "arena.h" -#include "arraylist.h" -#include "lexer.h" -#include "parser.h" #include #include #include #include #include #include - -static void test_parsing_basic_expression(void **state) { - (void) state; - - char expr[256] = "2 + 3 / 66 * 789"; - TokenizeResult tokens = tokenize(expr); - - assert_true(tokens.is_valid); - assert_int_equal(arraylist_size(tokens.arr), 7); - - ParseResult result = parse(tokens); - // Assert head is + - assert_int_equal(result.tree->type, NODE_BINARY_OP); - assert_int_equal(result.tree->data.binary.op, OP_ADD); - - assert_int_equal(result.tree->data.binary.left->type, NODE_INTEGER); - assert_int_equal(result.tree->data.binary.left->data.integer, 2); - - - assert_int_equal( - result.tree->data.binary.right->type, - NODE_BINARY_OP - ); - assert_int_equal( - result.tree->data.binary.right->data.binary.op, - OP_MUL - ); - - assert_int_equal( - result.tree->data.binary.right->data.binary.right->type, - NODE_INTEGER); - assert_int_equal( - result.tree->data.binary.right->data.binary.right->data.integer, - 789); - - assert_int_equal( - result.tree->data.binary.right->data.binary.left->type, - NODE_BINARY_OP - ); - assert_int_equal( - result.tree->data.binary.right->data.binary.left->data.binary.op, - OP_DIV - ); - - assert_int_equal( - result.tree->data.binary.right->data.binary.left->data.binary.right->type, - NODE_INTEGER - ); - assert_int_equal( - result.tree->data.binary.right->data.binary.left->data.binary.right->data.integer, - 66 - ); - - assert_int_equal( - result.tree->data.binary.right->data.binary.left->data.binary.left->type, - NODE_INTEGER - ); - assert_int_equal( - result.tree->data.binary.right->data.binary.left->data.binary.left->data.integer, - 3 - ); - arena_destroy(&result.arena); -} +#include int main(void) { - const struct CMUnitTest tests [] = { - cmocka_unit_test(test_parsing_basic_expression), - }; - - return cmocka_run_group_tests(tests, NULL, NULL); + return EXIT_SUCCESS; } -- 2.51.0