From f2c906c6aa5d7d7920bec6522ea05ed29d992d8f Mon Sep 17 00:00:00 2001 From: laentropia Date: Thu, 30 Apr 2026 10:40:17 -0600 Subject: [PATCH 1/6] initial-commit From c41847e120e6fce7da12663cd39552c6e7ec22a6 Mon Sep 17 00:00:00 2001 From: laentropia Date: Thu, 30 Apr 2026 21:34:27 -0600 Subject: [PATCH 2/6] refactor: rewrote tokenize and modified ohter funcs Well i wanted to wildly change a lot of things about the lexer thinking i could do something better but really all i found was automatic lexers that at least for me don't really fit the project so a manual one it is, i guess technically is a automata. Whatever, is good enough. --- include/lexer.h | 4 +-- src/lexer.c | 72 +++++++++++++++++++++++++++---------------------- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/include/lexer.h b/include/lexer.h index ed8c1e2..430b0a4 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -66,12 +66,12 @@ typedef struct { LexerErr err; int64_t number; }; -} I64Result; +} LexerI64Result; // Lexer funtions as well as few functionality TokenizeResult tokenize(const char* input); ASTNodeResult tokenize_number(const char* input, size_t *offset); -I64Result string_to_integer(const char buf[]); +LexerI64Result string_to_integer(const char buf[]); bool isoperator(int c); Operator char_to_operator(int c); char operator_to_char(Operator op); diff --git a/src/lexer.c b/src/lexer.c index 7baf998..f139394 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -14,45 +15,37 @@ typedef enum { TokenizeResult tokenize(const char *input) { - size_t offset = 0; - LexerState state = WAIT_FOR_NUMBER; ArrayList *arr = arraylist_init(64, sizeof(ASTNode)); + size_t offset = 0; - while (input[offset] != '\n' && input[offset] != '\0') { - int current = input[offset]; + while ( + input[offset] != '\n' || + input[offset] != EOF || + input[offset] != '\0') { - if (isdigit(current)) { - if (state != WAIT_FOR_NUMBER) { - arraylist_destroy(&arr); - return (TokenizeResult) {.is_valid = false, .err = LEXER_WRONG_SYNTAX}; - } + if (isdigit(input[offset])) { ASTNodeResult result = tokenize_number(input, &offset); if (!result.is_valid) { - arraylist_destroy(&arr); return (TokenizeResult) {.is_valid = false, .err = result.err}; } arraylist_push_back(arr, &result.node); - state = WAIT_FOR_OPERATOR; - } else if (isoperator(current)) { - if (state != WAIT_FOR_OPERATOR) { - return (TokenizeResult) {.is_valid = false, .err =LEXER_WRONG_SYNTAX}; - } - ASTNode new_node = { + } else if (isoperator(input[offset])) { + ASTNode op_node = { .type = NODE_BINARY_OP, - .data.binary.op = char_to_operator(current), - .data.binary.right = NULL, + .data.binary.op = char_to_operator(input[offset]), .data.binary.left = NULL, + .data.binary.right = NULL, }; - - arraylist_push_back(arr, &new_node); - state = WAIT_FOR_NUMBER; - } else if (isspace(current)) { + + arraylist_push_back(arr, &op_node); + } else if (isspace(input[offset])) { // Nothing... } else { - arraylist_destroy(&arr); - return (TokenizeResult) {.is_valid = false, .err = LEXER_NOT_RECOGNIZED_SYMBOL}; + return (TokenizeResult) { + .is_valid = false, + .err = LEXER_NOT_RECOGNIZED_SYMBOL}; } offset++; @@ -68,17 +61,21 @@ TokenizeResult tokenize(const char *input) { // CURRENTLY, it only supports ints, not clear how floating // point is implemented but i'll figure it out ASTNodeResult tokenize_number(const char *input, size_t *offset) { - char buf[128] = { '\0' }; + char buf[64] = { '\0' }; size_t buf_pos = 0; bool is_integer = true; // Will later be used to differentiate fractions + // read number size_t current = *offset; while (isdigit(input[current])) { buf[buf_pos] = input[current]; if (buf_pos >= sizeof(buf)) { - return (ASTNodeResult) {.is_valid = false, .err = LEXER_BUF_OVERFLOW}; + return (ASTNodeResult) { + .is_valid = false, + .err = LEXER_BUF_OVERFLOW}; } + current++; buf_pos++; } @@ -86,35 +83,46 @@ ASTNodeResult tokenize_number(const char *input, size_t *offset) { ASTNode new_node; if (is_integer) { new_node.type = NODE_INTEGER; - I64Result status = string_to_integer(buf); + LexerI64Result status = string_to_integer(buf); + + if (!status.is_valid) { return (ASTNodeResult) {.is_valid = false, .err = status.err}; } + new_node.data.integer = status.number; + *offset = current; return (ASTNodeResult) {.is_valid = true, .node = new_node}; } - return (ASTNodeResult) {.is_valid = false, .err = LEXER_FAILED_NUMBER_CONVERSION}; + return (ASTNodeResult) { + .is_valid = false, + .err = LEXER_FAILED_NUMBER_CONVERSION}; } -I64Result string_to_integer(const char *buf) { +LexerI64Result string_to_integer(const char *buf) { int c = 0; int64_t count = 0; + while (buf[c] != '\0') { - + + // Extracts number from char int digit = buf[c] - '0'; if (count > (INT64_MAX - digit) / 10) { - return (I64Result) {.is_valid = false, .err = LEXER_INT_OVERFLOW}; + return (LexerI64Result) { + .is_valid = false, + .err = LEXER_INT_OVERFLOW}; } + count = count * 10; count += digit; c++; } - return (I64Result) {.is_valid = true, .number = count}; + return (LexerI64Result) {.is_valid = true, .number = count}; } bool isoperator(int c) { From 59f99059bb68ce0e54cb448ebfcb71f4c619da12 Mon Sep 17 00:00:00 2001 From: laentropia Date: Tue, 12 May 2026 18:15:36 -0600 Subject: [PATCH 3/6] refactor: changes and additions ot parser --- include/lexer.h | 10 +++++++--- include/parser.h | 6 ++++-- src/evaluator.c | 4 +++- src/lexer.c | 6 ++++++ src/parser.c | 42 +++++++++++++++++++++++++++++++++++++----- 5 files changed, 57 insertions(+), 11 deletions(-) diff --git a/include/lexer.h b/include/lexer.h index 430b0a4..5081605 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -10,6 +10,7 @@ typedef enum { NODE_INTEGER, NODE_BINARY_OP, + NODE_UNARY_OP, } ASTNodeType; // For classify operators @@ -17,7 +18,8 @@ typedef enum { OP_ADD, OP_SUB, OP_MUL, - OP_DIV + OP_DIV, + OP_POW, } Operator; typedef enum { @@ -26,8 +28,6 @@ typedef enum { LEXER_FAILED_NUMBER_CONVERSION, LEXER_NOT_RECOGNIZED_SYMBOL, LEXER_EMPTY_INPUT, - LEXER_NULL_ARG, - LEXER_WRONG_SYNTAX, LEXER_BUF_OVERFLOW, } LexerErr; @@ -41,6 +41,10 @@ typedef struct ASTNode { struct ASTNode *right; Operator op; } binary; + struct { + struct ASTNode *val; + Operator op; + } unary; } data; } ASTNode; diff --git a/include/parser.h b/include/parser.h index 158b348..0c8f080 100644 --- a/include/parser.h +++ b/include/parser.h @@ -32,8 +32,10 @@ typedef struct { ASTNode *nud(ArraySlice *slice); ASTNode *led(ArraySlice *slice, size_t right_precedence); -uint8_t node_lbp(ASTNode node); -uint8_t node_rbp(ASTNode node); +uint8_t prefix_lbp(ASTNode node); +uint8_t prefix_rbp(ASTNode node); +uint8_t infix_lbp(ASTNode node); +uint8_t infix_rbp(ASTNode node); ParseResult parse(TokenizeResult tokens); ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp); diff --git a/src/evaluator.c b/src/evaluator.c index 36d9fb1..551f779 100644 --- a/src/evaluator.c +++ b/src/evaluator.c @@ -3,6 +3,7 @@ #include "lexer.h" #include "parser.h" #include +#include int64_t evaluate_tree(ASTNode *tree) { @@ -20,7 +21,8 @@ int64_t evaluate_tree(ASTNode *tree) { return evaluate_tree(left) * evaluate_tree(right); case OP_DIV: return evaluate_tree(left) / evaluate_tree(right); - + case OP_POW: + return pow(evaluate_tree(left), evaluate_tree(right)); } } diff --git a/src/lexer.c b/src/lexer.c index f139394..201a187 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -131,6 +131,7 @@ bool isoperator(int c) { case '-': case '/': case '*': + case '^': return true; default: return false; @@ -151,6 +152,9 @@ Operator char_to_operator(int c) { case '/': return OP_DIV; break; + case '^': + return OP_POW; + break; default: // I mean shouldn't be used, we assume return -1; } @@ -166,5 +170,7 @@ char operator_to_char(Operator op) { return '*'; case OP_DIV: return '/'; + case OP_POW: + return '^'; } } diff --git a/src/parser.c b/src/parser.c index a3efae0..fc4c142 100644 --- a/src/parser.c +++ b/src/parser.c @@ -6,7 +6,21 @@ #include #include -uint8_t node_lbp(ASTNode node) { +uint8_t prefix_rbp(ASTNode node) { + if (node.type == NODE_INTEGER) { + return 0; + } + + switch (node.data.unary.op) { + case OP_SUB: + case OP_ADD: + return 5; + default: + return -1; + } +} + +uint8_t infix_lbp(ASTNode node) { if (node.type == NODE_INTEGER) { return 0; } @@ -19,12 +33,14 @@ uint8_t node_lbp(ASTNode node) { case OP_DIV: case OP_MUL: return 20; + case OP_POW: + return 31; default: return 0; } } -uint8_t node_rbp(ASTNode node) { +uint8_t infix_rbp(ASTNode node) { if (node.type == NODE_INTEGER) { return 0; } @@ -37,6 +53,8 @@ uint8_t node_rbp(ASTNode node) { case OP_DIV: case OP_MUL: return 21; + case OP_POW: + return 30; default: return 0; } @@ -53,12 +71,14 @@ ParseResult parse(TokenizeResult tokens) { } ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { + // First: Consume a first number arena_ensure_capacity( arena, sizeof(ASTNode), alignof(ASTNode) - ); + ); // shouldn't fail but if it does then what a shame + // Get pointer in the arena ASTNode *left_side = arena_unwrap_pointer( arena_alloc( arena, @@ -67,22 +87,33 @@ ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { ) ); + // Should check if is Integer or number arrayslice_next(slice, left_side); while (true) { + // Second: Get next one and checn bp if (!arrayslice_is_valid(slice)) { break; } ASTNode operator; + // Here should chekc if is operator not some bs + // Third, get operator and binding powers arrayslice_peek(slice, &operator); - uint8_t rbp = node_rbp(operator); - uint8_t lbp = node_lbp(operator); + uint8_t rbp = infix_rbp(operator); + uint8_t lbp = infix_lbp(operator); + // If lbp is LESS then stop recursion, + // we found the next smaller binding power + // or the one with more precedence if (lbp < min_bp) { break; } + + // If NOT, then we continue wtching ahead + // for the next one but taking our current + // concern that is rbp of the current operator arrayslice_next(slice, NULL); ASTNode *right_side = parse_expr(slice, arena, rbp); @@ -106,6 +137,7 @@ ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { } + // Final: return left side return left_side; } From e30b3d7175dfbda028e377e553a789165fd80c1b Mon Sep 17 00:00:00 2001 From: laentropia Date: Tue, 12 May 2026 18:33:52 -0600 Subject: [PATCH 4/6] addition: proccessing of prefix op --- include/parser.h | 2 +- src/parser.c | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/include/parser.h b/include/parser.h index 0c8f080..75bc70c 100644 --- a/include/parser.h +++ b/include/parser.h @@ -32,8 +32,8 @@ typedef struct { ASTNode *nud(ArraySlice *slice); ASTNode *led(ArraySlice *slice, size_t right_precedence); -uint8_t prefix_lbp(ASTNode node); uint8_t prefix_rbp(ASTNode node); +uint8_t postfix_lbp(ASTNode node); uint8_t infix_lbp(ASTNode node); uint8_t infix_rbp(ASTNode node); diff --git a/src/parser.c b/src/parser.c index fc4c142..2d956f5 100644 --- a/src/parser.c +++ b/src/parser.c @@ -87,6 +87,17 @@ ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { ) ); + // if is unary then take prefix bp and continue + // to the right, no need to allocate left side + // because we just did and right side + // WILL return a valid allocated pointer. + if (left_side->type == NODE_UNARY_OP) { + uint8_t rbp = prefix_rbp(*left_side); + ASTNode *righ_side = parse_expr(slice, arena, rbp); + + left_side->data.unary.val = righ_side; + return left_side; + } // Should check if is Integer or number arrayslice_next(slice, left_side); From 7f390a8c6b4889e11cf61c8c120e0bc59723bf1f Mon Sep 17 00:00:00 2001 From: laentropia Date: Tue, 12 May 2026 19:40:42 -0600 Subject: [PATCH 5/6] addition: postfix operator capability, may work --- include/lexer.h | 1 + src/lexer.c | 7 +++++++ src/parser.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/include/lexer.h b/include/lexer.h index 5081605..387c77c 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -20,6 +20,7 @@ typedef enum { OP_MUL, OP_DIV, OP_POW, + OP_FACTORIAL, } Operator; typedef enum { diff --git a/src/lexer.c b/src/lexer.c index 201a187..4696fe7 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -155,6 +155,9 @@ Operator char_to_operator(int c) { case '^': return OP_POW; break; + case '!': + return OP_FACTORIAL; + break; default: // I mean shouldn't be used, we assume return -1; } @@ -172,5 +175,9 @@ char operator_to_char(Operator op) { return '/'; case OP_POW: return '^'; + case OP_FACTORIAL: + return '!'; + default: + return EOF; } } diff --git a/src/parser.c b/src/parser.c index 2d956f5..101c995 100644 --- a/src/parser.c +++ b/src/parser.c @@ -14,12 +14,26 @@ uint8_t prefix_rbp(ASTNode node) { switch (node.data.unary.op) { case OP_SUB: case OP_ADD: - return 5; + return 30; default: return -1; } } +uint8_t postfix_lbp(ASTNode node) { + if (node.type == NODE_INTEGER) { + return 0; + } + + switch (node.data.unary.op) { + case OP_FACTORIAL: + return 40; + default: + // needs to be dealt with with resulttypes + return 255; + } +} + uint8_t infix_lbp(ASTNode node) { if (node.type == NODE_INTEGER) { return 0; @@ -34,7 +48,7 @@ uint8_t infix_lbp(ASTNode node) { case OP_MUL: return 20; case OP_POW: - return 31; + return 51; default: return 0; } @@ -54,7 +68,7 @@ uint8_t infix_rbp(ASTNode node) { case OP_MUL: return 21; case OP_POW: - return 30; + return 50; default: return 0; } @@ -107,10 +121,39 @@ ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { break; } + // Here check if not OP error + ASTNode operator; // Here should chekc if is operator not some bs // Third, get operator and binding powers arrayslice_peek(slice, &operator); + + // temporary for bad error handling + if (postfix_lbp(operator) != 255) { + if (postfix_lbp(operator) < min_bp) { + break; + } + + // allocate operator + arrayslice_next(slice, NULL); + arena_ensure_capacity( + arena, + sizeof(ASTNode), + alignof(ASTNode)); + ASTNode *new_node = arena_unwrap_pointer( + arena_alloc( + arena, + sizeof(ASTNode), + alignof(ASTNode) + ) + ); + *new_node = operator; + + + new_node->data.unary.val = left_side; + + left_side = new_node; + } uint8_t rbp = infix_rbp(operator); uint8_t lbp = infix_lbp(operator); From 56c80fa071b52f17fac4cabae4bd2bc973a44f95 Mon Sep 17 00:00:00 2001 From: laentropia Date: Tue, 12 May 2026 20:04:41 -0600 Subject: [PATCH 6/6] addition: Managing of parenthesis Its a fucking mess, i was writting straight bullshit but it conceptually should work, just need to refactor the shit out of it to make it way more clean than it actually is and also later fix the fucking evaluator like damn it sucks ASSS now (not that much really is nice but obviously doesn't work, i like my code a lot :) --- include/lexer.h | 7 ++++ src/lexer.c | 13 ++++++++ src/parser.c | 88 +++++++++++++++++++++++++++++-------------------- 3 files changed, 73 insertions(+), 35 deletions(-) diff --git a/include/lexer.h b/include/lexer.h index 387c77c..4af0f9d 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -11,6 +11,7 @@ typedef enum { NODE_INTEGER, NODE_BINARY_OP, NODE_UNARY_OP, + NODE_PARENTHESIS, } ASTNodeType; // For classify operators @@ -21,6 +22,8 @@ typedef enum { OP_DIV, OP_POW, OP_FACTORIAL, + OP_START_PAR, + OP_END_PAR, } Operator; typedef enum { @@ -46,6 +49,10 @@ typedef struct ASTNode { struct ASTNode *val; Operator op; } unary; + struct { + struct ASTNode *val; + Operator op; + } parenthesis; } data; } ASTNode; diff --git a/src/lexer.c b/src/lexer.c index 4696fe7..2115cc4 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -132,6 +132,9 @@ bool isoperator(int c) { case '/': case '*': case '^': + case '!': + case '(': + case ')': return true; default: return false; @@ -158,6 +161,12 @@ Operator char_to_operator(int c) { case '!': return OP_FACTORIAL; break; + case '(': + return OP_START_PAR; + break; + case ')': + return OP_END_PAR; + break; default: // I mean shouldn't be used, we assume return -1; } @@ -177,6 +186,10 @@ char operator_to_char(Operator op) { return '^'; case OP_FACTORIAL: return '!'; + case OP_START_PAR: + return '('; + case OP_END_PAR: + return ')'; default: return EOF; } diff --git a/src/parser.c b/src/parser.c index 101c995..2a09988 100644 --- a/src/parser.c +++ b/src/parser.c @@ -101,6 +101,20 @@ ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { ) ); + arrayslice_next(slice, left_side); + + if (left_side->type == NODE_PARENTHESIS && + left_side->data.parenthesis.op == OP_START_PAR) { + left_side = parse_expr(slice, arena, 0); + // HERE CHEKC LATER if slice.next != ')' + ASTNode *end_par; + arrayslice_next(slice, &end_par); + if (end_par->type != NODE_PARENTHESIS || + end_par->data.parenthesis.op != OP_END_PAR) { + // todo + } + return left_side; + } // if is unary then take prefix bp and continue // to the right, no need to allocate left side // because we just did and right side @@ -110,10 +124,7 @@ ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { ASTNode *righ_side = parse_expr(slice, arena, rbp); left_side->data.unary.val = righ_side; - return left_side; } - // Should check if is Integer or number - arrayslice_next(slice, left_side); while (true) { // Second: Get next one and checn bp @@ -153,47 +164,54 @@ ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { new_node->data.unary.val = left_side; left_side = new_node; + continue; } + + // check if it has infix or not, if not then error uint8_t rbp = infix_rbp(operator); uint8_t lbp = infix_lbp(operator); - // If lbp is LESS then stop recursion, - // we found the next smaller binding power - // or the one with more precedence - if (lbp < min_bp) { - break; + if (rbp != 255 && lbp != 255) { + + // If lbp is LESS then stop recursion, + // we found the next smaller binding power + // or the one with more precedence + if (lbp < min_bp) { + break; + } + + + // If NOT, then we continue wtching ahead + // for the next one but taking our current + // concern that is rbp of the current operator + arrayslice_next(slice, NULL); + ASTNode *right_side = parse_expr(slice, arena, rbp); + + arena_ensure_capacity( + arena, + sizeof(ASTNode), + alignof(ASTNode)); + ASTNode *new_node = arena_unwrap_pointer( + arena_alloc( + arena, + sizeof(ASTNode), + alignof(ASTNode) + ) + ); + *new_node = operator; + + new_node->data.binary.left = left_side; + new_node->data.binary.right = right_side; + + left_side = new_node; + + continue; } - - // If NOT, then we continue wtching ahead - // for the next one but taking our current - // concern that is rbp of the current operator - arrayslice_next(slice, NULL); - ASTNode *right_side = parse_expr(slice, arena, rbp); - - arena_ensure_capacity( - arena, - sizeof(ASTNode), - alignof(ASTNode)); - ASTNode *new_node = arena_unwrap_pointer( - arena_alloc( - arena, - sizeof(ASTNode), - alignof(ASTNode) - ) - ); - *new_node = operator; - - new_node->data.binary.left = left_side; - new_node->data.binary.right = right_side; - - left_side = new_node; + break; } // Final: return left side return left_side; } - - -