From 576bcd95047dcb9d4d592108974718460757f8a6 Mon Sep 17 00:00:00 2001 From: laentropia Date: Thu, 23 Apr 2026 15:37:16 -0600 Subject: [PATCH 1/7] addition: starting to refactor NodeArray into ArrayList --- CMakeLists.txt | 1 + include/lexer.h | 1 + src/lexer.c | 1 - 3 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2cffd3d..0749c57 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ target_include_directories(calculator_lib # 🔥 aquí está la magia target_link_libraries(calculator_lib PUBLIC arena + PUBLIC arraylist ) add_executable(calculator src/main.c) diff --git a/include/lexer.h b/include/lexer.h index 9ca60c0..f4b7d81 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -1,6 +1,7 @@ #ifndef LEXER_H #define LEXER_H +#include "arraylist.h" #include #include #include diff --git a/src/lexer.c b/src/lexer.c index 7febdfc..4567c7b 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,6 +1,5 @@ #include "lexer.h" #include -#include #include #include #include -- 2.51.0 From 855d68300599b7b631338094fb1ea4d95b7c0423 Mon Sep 17 00:00:00 2001 From: laentropia Date: Fri, 24 Apr 2026 07:02:00 -0600 Subject: [PATCH 2/7] addition: Resul structs for rework --- include/lexer.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/lexer.h b/include/lexer.h index f4b7d81..3995b2e 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -53,6 +53,29 @@ typedef struct ASTNode { } data; } ASTNode; +typedef struct { + bool is_valid; + union { + LexerErr err; + ArrayList *arr; + }; +} TokenizeResult; + +typedef struct { + bool is_valid; + union { + LexerErr err; + ASTNode node; + }; +} ASTNodeResult; + +typedef struct { + bool is_valid; + union { + LexerErr err; + int64_t number; + }; +} I64Result; // I prefer ot have a dynamic array for storing the "tokens" typedef struct { size_t len; -- 2.51.0 From 19c84c382b77149a8f25ed269a06bd0bbe6e5b1a Mon Sep 17 00:00:00 2001 From: laentropia Date: Fri, 24 Apr 2026 07:17:35 -0600 Subject: [PATCH 3/7] refactor: changed funtions definitions, modified tokenize --- include/lexer.h | 6 +++--- src/lexer.c | 36 ++++++++++++++++++------------------ 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/include/lexer.h b/include/lexer.h index 3995b2e..fcb6529 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -92,9 +92,9 @@ ASTNodeArrayErr ASTNodeArray_pop(ASTNodeArray *arr, size_t index, ASTNode *out); size_t ASTNodeArray_len(ASTNodeArray *arr); // Lexer funtions as well as few functionality -LexerErr tokenize(const char* input, ASTNodeArray *out); -LexerErr tokenize_number(const char* input, size_t *offset, ASTNode *out); -LexerErr string_to_integer(const char buf[], int64_t *number); +TokenizeResult tokenize(const char* input); +ASTNodeResult tokenize_number(const char* input, size_t *offset); +I64Result string_to_integer(const char buf[]); bool isoperator(int c); Operator char_to_operator(int c); char operator_to_char(Operator op); diff --git a/src/lexer.c b/src/lexer.c index 4567c7b..8d80264 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,5 +1,7 @@ #include "lexer.h" +#include "arraylist.h" #include +#include #include #include #include @@ -11,32 +13,31 @@ typedef enum { } LexerState; -LexerErr tokenize(const char *input, ASTNodeArray *out) { +TokenizeResult tokenize(const char *input) { size_t offset = 0; LexerState state = WAIT_FOR_NUMBER; - ASTNodeArray arr = ASTNodeArray_init(0); // 0 defaults to 64 + ArrayList *arr = arraylist_init(64, sizeof(ASTNode)); while (input[offset] != '\n' && input[offset] != '\0') { int current = input[offset]; if (isdigit(current)) { if (state != WAIT_FOR_NUMBER) { - ASTNodeArray_free(&arr); - return LEXER_WRONG_SYNTAX; + arraylist_destroy(&arr); + return (TokenizeResult) {.is_valid = false, .err = LEXER_WRONG_SYNTAX}; } - ASTNode new_node; - LexerErr result = tokenize_number(input, &offset, &new_node); + ASTNodeResult result = tokenize_number(input, &offset); - if (result != LEXER_OK) { - ASTNodeArray_free(&arr); - return result; + if (!result.is_valid) { + arraylist_destroy(&arr); + return (TokenizeResult) {.is_valid = false, .err = result.err}; } - ASTNodeArray_push(&arr, new_node); + arraylist_push_back(arr, &result.node); state = WAIT_FOR_OPERATOR; } else if (isoperator(current)) { if (state != WAIT_FOR_OPERATOR) { - return LEXER_WRONG_SYNTAX; + return (TokenizeResult) {.is_valid = false, .err =LEXER_WRONG_SYNTAX}; } ASTNode new_node = { .type = NODE_BINARY_OP, @@ -45,24 +46,23 @@ LexerErr tokenize(const char *input, ASTNodeArray *out) { .data.binary.left = NULL, }; - ASTNodeArray_push(&arr, new_node); + arraylist_push_back(arr, &new_node); state = WAIT_FOR_NUMBER; } else if (isspace(current)) { // Nothing... } else { - ASTNodeArray_free(&arr); - return LEXER_NOT_RECOGNIZED_SYMBOL; + arraylist_destroy(&arr); + return (TokenizeResult) {.is_valid = false, .err = LEXER_NOT_RECOGNIZED_SYMBOL}; } offset++; } - if (arr.len < 1) { - return LEXER_EMPTY_INPUT; + if (arraylist_size(arr) < 1) { + return (TokenizeResult) {.is_valid = false, .err = LEXER_EMPTY_INPUT}; } - *out = arr; - return LEXER_OK; + return (TokenizeResult) {.is_valid = true, .arr = arr}; } // CURRENTLY, it only supports ints, not clear how floating -- 2.51.0 From cef046f7db1158f0722a567fae9b411ab514a425 Mon Sep 17 00:00:00 2001 From: laentropia Date: Fri, 24 Apr 2026 08:09:31 -0600 Subject: [PATCH 4/7] refactor: changed string to int adn tokenize number --- src/lexer.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/lexer.c b/src/lexer.c index 8d80264..abb9f31 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -67,7 +67,7 @@ TokenizeResult tokenize(const char *input) { // CURRENTLY, it only supports ints, not clear how floating // point is implemented but i'll figure it out -LexerErr tokenize_number(const char *input, size_t *offset, ASTNode *out) { +ASTNodeResult tokenize_number(const char *input, size_t *offset) { char buf[128] = { '\0' }; size_t buf_pos = 0; bool is_integer = true; // Will later be used to differentiate fractions @@ -77,7 +77,7 @@ LexerErr tokenize_number(const char *input, size_t *offset, ASTNode *out) { buf[buf_pos] = input[current]; if (buf_pos >= sizeof(buf)) { - return LEXER_BUF_OVERFLOW; + return (ASTNodeResult) {.is_valid = false, .err = LEXER_BUF_OVERFLOW}; } current++; buf_pos++; @@ -86,18 +86,18 @@ LexerErr tokenize_number(const char *input, size_t *offset, ASTNode *out) { ASTNode new_node; if (is_integer) { new_node.type = NODE_INTEGER; - LexerErr status = string_to_integer(buf, &new_node.data.integer); - if (status == LEXER_OK) { - *out = new_node; + I64Result status = string_to_integer(buf); + if (status.is_valid == LEXER_OK) { + new_node.data.integer = status.number; } *offset = current; - return status; + return (ASTNodeResult) {.is_valid = true, .node = new_node}; } - return LEXER_FAILED_NUMBER_CONVERSION; + return (ASTNodeResult) {.is_valid = false, .err = LEXER_FAILED_NUMBER_CONVERSION}; } -LexerErr string_to_integer(const char *buf, int64_t *number) { +I64Result string_to_integer(const char *buf) { int c = 0; int64_t count = 0; while (buf[c] != '\0') { @@ -105,7 +105,7 @@ LexerErr string_to_integer(const char *buf, int64_t *number) { int digit = buf[c] - '0'; if (count > (INT64_MAX - digit) / 10) { - return LEXER_INT_OVERFLOW; + return (I64Result) {.is_valid = false, .err = LEXER_INT_OVERFLOW}; } count = count * 10; count += digit; @@ -113,8 +113,7 @@ LexerErr string_to_integer(const char *buf, int64_t *number) { c++; } - *number = count; - return LEXER_OK; + return (I64Result) {.is_valid = true, .number = count}; } bool isoperator(int c) { -- 2.51.0 From b7e1cdf3a64cf380747a6e9317a64d54e75f6f41 Mon Sep 17 00:00:00 2001 From: laentropia Date: Fri, 24 Apr 2026 09:06:47 -0600 Subject: [PATCH 5/7] refactor: made parser work with arrayslices and new result types --- include/lexer.h | 23 ---------- include/parser.h | 34 ++++++++------ src/ASTNodeArray.c | 108 --------------------------------------------- src/main.c | 5 +-- src/parser.c | 43 +++++------------- 5 files changed, 34 insertions(+), 179 deletions(-) delete mode 100644 src/ASTNodeArray.c diff --git a/include/lexer.h b/include/lexer.h index fcb6529..ed8c1e2 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -20,15 +20,6 @@ typedef enum { OP_DIV } Operator; -typedef enum { - ARRAY_OK = 0, - ARRAY_NULL, - ARRAY_EMPTY, - ARRAY_OUT_OF_BOUNDS, - ARRAY_NULL_ARG, - ARRAY_ALLOC, -} ASTNodeArrayErr; - typedef enum { LEXER_OK = 0, LEXER_INT_OVERFLOW, @@ -76,20 +67,6 @@ typedef struct { int64_t number; }; } I64Result; -// I prefer ot have a dynamic array for storing the "tokens" -typedef struct { - size_t len; - size_t cap; - ASTNode *data; -} ASTNodeArray; - -ASTNodeArray ASTNodeArray_init(size_t size); -void ASTNodeArray_free(ASTNodeArray *arr); -ASTNodeArrayErr ASTNodeArray_push(ASTNodeArray *arr, ASTNode node); -ASTNodeArrayErr ASTNodeArray_get(const ASTNodeArray *arr, size_t index, ASTNode *out); -// Out in pop can be NULL so it doesn't return anything -ASTNodeArrayErr ASTNodeArray_pop(ASTNodeArray *arr, size_t index, ASTNode *out); -size_t ASTNodeArray_len(ASTNodeArray *arr); // Lexer funtions as well as few functionality TokenizeResult tokenize(const char* input); diff --git a/include/parser.h b/include/parser.h index 7c2fa4f..158b348 100644 --- a/include/parser.h +++ b/include/parser.h @@ -3,33 +3,39 @@ #include "lexer.h" #include "arena.h" +#include "arraylist.h" #include typedef struct { ASTNode *head; } AST; -typedef struct { - ASTNodeArray *arr; - size_t pos; -} ASTNodeSlice; +typedef enum { + PARSER_OK = 0, + PARSER_UNEXPECTED_TOKEN, + PARSER_MISSING_OPERAND, + PARSER_UNMATCHED_PAREN, + PARSER_OUT_OF_MEMORY, +} ParserErr; typedef struct { - Arena arena; - ASTNode *tree; + bool is_valid; + union { + ParserErr err; + struct { + Arena arena; + ASTNode *tree; + }; + }; } ParseResult; -ASTNode ASTNodeSlice_peek(ASTNodeSlice *slice); -ASTNode ASTNodeSlice_next(ASTNodeSlice *slice); -bool ASTNodeSlice_is_valid(ASTNodeSlice *slice); - -ASTNode *nud(ASTNodeSlice *slice); -ASTNode *led(ASTNodeSlice *slice, size_t right_precedence); +ASTNode *nud(ArraySlice *slice); +ASTNode *led(ArraySlice *slice, size_t right_precedence); uint8_t node_lbp(ASTNode node); uint8_t node_rbp(ASTNode node); -ParseResult parse(ASTNodeArray *arr); -ASTNode *parse_expr(ASTNodeSlice *slice, Arena *arena, uint8_t min_bp); +ParseResult parse(TokenizeResult tokens); +ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp); #endif // !PARSER_H diff --git a/src/ASTNodeArray.c b/src/ASTNodeArray.c deleted file mode 100644 index a9f2053..0000000 --- a/src/ASTNodeArray.c +++ /dev/null @@ -1,108 +0,0 @@ -#include "lexer.h" -#include - -#define NODE_ARRAY_DEFAULT_SIZE 64 -// Helps state machine for the lexer :) -typedef enum { - WAIT_FOR_NUMBER, - WAIT_FOR_OPERATOR, -} LexerState; - -ASTNodeArray ASTNodeArray_init(size_t size) { - ASTNodeArray new; - new.len = 0; // if 0 then use default - new.cap = size == 0 ? NODE_ARRAY_DEFAULT_SIZE : size; - new.data = malloc(new.cap * sizeof(ASTNode)); - return new; -} - -void ASTNodeArray_free(ASTNodeArray *arr) { - free(arr->data); - arr->cap = 0; - arr->len = 0; -} - -ASTNodeArrayErr ASTNodeArray_get(const ASTNodeArray *arr, size_t index, ASTNode *out) { - if (arr == NULL) { - return ARRAY_NULL; - } - - if (out == NULL) { - return ARRAY_NULL_ARG; - } - - if (arr->len == 0) { - return ARRAY_EMPTY; - } - - if (index >= arr->len) { - return ARRAY_OUT_OF_BOUNDS; - } - - *out = arr->data[index]; - - return ARRAY_OK; -} - -ASTNodeArrayErr ASTNodeArray_push(ASTNodeArray *arr, ASTNode node) { - if (arr == NULL) { - return ARRAY_NULL; - } - - if (arr->len >= arr->cap) { - size_t new_cap = arr->cap * 2; - ASTNode *tmp = realloc(arr->data, new_cap * sizeof(ASTNode)); - if (tmp == NULL) { - return ARRAY_ALLOC; - } - arr->data = tmp; - arr->cap = new_cap; - } - - arr->data[arr->len] = node; - arr->len = arr->len + 1; - - return ARRAY_OK; -} - -ASTNodeArrayErr ASTNodeArray_pop(ASTNodeArray *arr, size_t index, ASTNode *out) { - if (arr == NULL) { - return ARRAY_NULL; - } - - if (arr->len == 0) { - return ARRAY_EMPTY; - } - - if (index >= arr->len) { - return ARRAY_OUT_OF_BOUNDS; - } - - if (arr->cap / 4 > arr->len) { - size_t new_cap = arr->cap / 2; - ASTNode *tmp = realloc(arr->data, new_cap * sizeof(ASTNode)); - if (tmp == NULL) { - return ARRAY_ALLOC; - } - arr->data = tmp; - arr->cap = new_cap; - } - - if (out != NULL) { - ASTNode node_to_delete = arr->data[index]; - *out = node_to_delete; - } - - for (size_t i = index; i < arr->len - 1; i++) { - arr->data[index] = arr->data[index + 1]; - } - - return ARRAY_OK; -} - -size_t ASTNodeArray_len(ASTNodeArray *arr) { - if (arr == NULL) { - return 0; - } - return arr->len; -} diff --git a/src/main.c b/src/main.c index 9cc1240..89cfc64 100644 --- a/src/main.c +++ b/src/main.c @@ -19,10 +19,9 @@ int main(void) { } buf[pos] = '\0'; - ASTNodeArray context; - tokenize(buf, &context); + TokenizeResult tokens = tokenize(buf); - ParseResult par = parse(&context); + ParseResult par = parse(tokens); int64_t result = evaluate(par); diff --git a/src/parser.c b/src/parser.c index 9f454ef..a3efae0 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,4 +1,5 @@ #include "parser.h" +#include "arraylist.h" #include "lexer.h" #include "arena.h" #include @@ -41,38 +42,17 @@ uint8_t node_rbp(ASTNode node) { } } -ASTNode ASTNodeSlice_next(ASTNodeSlice *slice) { - return slice->arr->data[slice->pos++]; -} - -ASTNode ASTNodeSlice_peek(ASTNodeSlice *slice) { - return slice->arr->data[slice->pos]; -} - -bool ASTNodeSlice_is_valid(ASTNodeSlice *slice) { - if (slice->arr->len < 1) { - return false; - } - if (slice->pos >= slice->arr->len) { - return false; - } - - return true; -} - -ParseResult parse(ASTNodeArray *arr) { - ASTNodeSlice context = { - .arr = arr, - .pos = 0, - }; - Arena arena = arena_init(sizeof(ASTNode) * arr->len).arena; +ParseResult parse(TokenizeResult tokens) { + ArraySlice *context = arraylist_slice(tokens.arr, 0, arraylist_size(tokens.arr)); + Arena arena = arena_init(sizeof(ASTNode) * arraylist_size(tokens.arr)).arena; return (ParseResult) { + .is_valid = true, .arena = arena, - .tree = parse_expr(&context, &arena, 0)}; + .tree = parse_expr(context, &arena, 0)}; } -ASTNode *parse_expr(ASTNodeSlice *slice, Arena *arena, uint8_t min_bp) { +ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { arena_ensure_capacity( arena, sizeof(ASTNode), @@ -87,14 +67,15 @@ ASTNode *parse_expr(ASTNodeSlice *slice, Arena *arena, uint8_t min_bp) { ) ); - *left_side = ASTNodeSlice_next(slice); + arrayslice_next(slice, left_side); while (true) { - if (!ASTNodeSlice_is_valid(slice)) { + if (!arrayslice_is_valid(slice)) { break; } - ASTNode operator = ASTNodeSlice_peek(slice); + ASTNode operator; + arrayslice_peek(slice, &operator); uint8_t rbp = node_rbp(operator); uint8_t lbp = node_lbp(operator); @@ -102,7 +83,7 @@ ASTNode *parse_expr(ASTNodeSlice *slice, Arena *arena, uint8_t min_bp) { break; } - ASTNodeSlice_next(slice); + arrayslice_next(slice, NULL); ASTNode *right_side = parse_expr(slice, arena, rbp); arena_ensure_capacity( -- 2.51.0 From 630d9f53e1c57d81203e99e50dcd377d7890d816 Mon Sep 17 00:00:00 2001 From: laentropia Date: Fri, 24 Apr 2026 09:36:03 -0600 Subject: [PATCH 6/7] test: changed lexer tests --- CMakeLists.txt | 2 - src/main.c | 1 - test/test_ASTNodeArray.c | 86 ---------------------------------------- test/test_lexer.c | 56 ++++++++++++-------------- 4 files changed, 25 insertions(+), 120 deletions(-) delete mode 100644 test/test_ASTNodeArray.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 0749c57..d300c72 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,14 +31,12 @@ add_library(calculator_lib src/lexer.c src/parser.c src/evaluator.c - src/ASTNodeArray.c ) target_include_directories(calculator_lib PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include ) -# 🔥 aquí está la magia target_link_libraries(calculator_lib PUBLIC arena PUBLIC arraylist diff --git a/src/main.c b/src/main.c index 89cfc64..2c5827f 100644 --- a/src/main.c +++ b/src/main.c @@ -1,4 +1,3 @@ -#include "arena.h" #include "evaluator.h" #include "lexer.h" #include "parser.h" diff --git a/test/test_ASTNodeArray.c b/test/test_ASTNodeArray.c deleted file mode 100644 index 59c98cb..0000000 --- a/test/test_ASTNodeArray.c +++ /dev/null @@ -1,86 +0,0 @@ -#include "lexer.h" -#include -#include -#include -#include -#include -#include - -static void test_array_push(void **state) { - (void) state; - - // We use 2 to force resize and checking anything wrong with malloc - ASTNodeArray arr = ASTNodeArray_init(2); - ASTNode node1 = { - .type = NODE_INTEGER, - .data = { .integer = 90 } - }; - - ASTNode node2 = { - .type = NODE_INTEGER, - .data = { .integer = 80 } - }; - - ASTNode node3 = { - .type = NODE_INTEGER, - .data = { .integer = 70 } - }; - - assert_int_equal(ASTNodeArray_push(&arr, node1), ARRAY_OK); - assert_int_equal(ASTNodeArray_len(&arr), 1); - - assert_int_equal(ASTNodeArray_push(&arr, node2), ARRAY_OK); - assert_int_equal(ASTNodeArray_len(&arr), 2); - - assert_int_equal(ASTNodeArray_push(&arr, node3), ARRAY_OK); - assert_int_equal(ASTNodeArray_len(&arr), 3); - - ASTNodeArray_free(&arr); -} - -static void test_array_pop(void **state) { - (void) state; - - // Set to force desize - ASTNodeArray arr = ASTNodeArray_init(16); - ASTNode node1 = { - .type = NODE_INTEGER, - .data = { .integer = 90 } - }; - - ASTNode node2 = { - .type = NODE_INTEGER, - .data = { .integer = 80 } - }; - - ASTNode node3 = { - .type = NODE_INTEGER, - .data = { .integer = 70 } - }; - - assert_int_equal(ASTNodeArray_push(&arr, node1), ARRAY_OK); - assert_int_equal(ASTNodeArray_len(&arr), 1); - - assert_int_equal(ASTNodeArray_push(&arr, node2), ARRAY_OK); - assert_int_equal(ASTNodeArray_len(&arr), 2); - - assert_int_equal(ASTNodeArray_push(&arr, node3), ARRAY_OK); - assert_int_equal(ASTNodeArray_len(&arr), 3); - - ASTNode node4; - assert_int_equal(ASTNodeArray_pop(&arr, 1, &node4), ARRAY_OK); - assert_int_equal(node4.type, NODE_INTEGER); - assert_int_equal(node4.data.integer, 80); - - ASTNodeArray_free(&arr); -} - - -int main(void) { - const struct CMUnitTest tests[] = { - cmocka_unit_test(test_array_push), - cmocka_unit_test(test_array_pop), - }; - - return cmocka_run_group_tests(tests, NULL, NULL); -} diff --git a/test/test_lexer.c b/test/test_lexer.c index db9b914..7540119 100644 --- a/test/test_lexer.c +++ b/test/test_lexer.c @@ -1,3 +1,4 @@ +#include "arraylist.h" #include "lexer.h" #include #include @@ -10,37 +11,37 @@ static void test_tokenize_normal_expresion(void **state) { (void) state; char expr[256] = "2 + 3 / 66 * 789"; - ASTNodeArray tokens; ASTNode node; - - assert_int_equal(tokenize(expr, &tokens), LEXER_OK); - assert_int_equal(tokens.len, 7); + TokenizeResult tokens = tokenize(expr); - ASTNodeArray_get(&tokens, 0, &node); + assert_true(tokens.is_valid); + assert_int_equal(arraylist_size(tokens.arr), 7); + + arraylist_get(tokens.arr, 0, &node); assert_int_equal(node.type, NODE_INTEGER); assert_int_equal(node.data.integer, 2); - ASTNodeArray_get(&tokens, 1, &node); + arraylist_get(tokens.arr, 0, &node); assert_int_equal(node.type, NODE_BINARY_OP); assert_int_equal(node.data.binary.op, OP_ADD); - ASTNodeArray_get(&tokens, 2, &node); + arraylist_get(tokens.arr, 0, &node); assert_int_equal(node.type, NODE_INTEGER); assert_int_equal(node.data.integer, 3); - ASTNodeArray_get(&tokens, 3, &node); + arraylist_get(tokens.arr, 0, &node); assert_int_equal(node.type, NODE_BINARY_OP); assert_int_equal(node.data.binary.op, OP_DIV); - ASTNodeArray_get(&tokens, 4, &node); + arraylist_get(tokens.arr, 0, &node); assert_int_equal(node.type, NODE_INTEGER); assert_int_equal(node.data.integer, 66); - ASTNodeArray_get(&tokens, 5, &node); + arraylist_get(tokens.arr, 0, &node); assert_int_equal(node.type, NODE_BINARY_OP); assert_int_equal(node.data.binary.op, OP_MUL); - ASTNodeArray_get(&tokens, 6, &node); + arraylist_get(tokens.arr, 0, &node); assert_int_equal(node.type, NODE_INTEGER); assert_int_equal(node.data.integer, 789); } @@ -49,28 +50,20 @@ static void test_tokenize_unrecognized_symbol(void **state) { (void) state; char expr[256] = " 2 j 3 / 66 } 789"; - ASTNodeArray tokens = { - .len = 0, - .cap = 0, - }; + TokenizeResult tokens = tokenize(expr); - assert_int_equal(tokenize(expr, &tokens), LEXER_NOT_RECOGNIZED_SYMBOL); - assert_int_equal(tokens.len, 0); - assert_int_equal(tokens.cap, 0); + assert_false(tokens.is_valid); + assert_uint_equal(tokens.err, LEXER_WRONG_SYNTAX); } static void test_tokenize_wrong_sintax(void **state) { (void) state; char expr[256] = "2 3 / 66 789"; - ASTNodeArray tokens = { - .len = 0, - .cap = 0, - }; + TokenizeResult tokens = tokenize(expr); - assert_int_equal(tokenize(expr, &tokens), LEXER_WRONG_SYNTAX); - assert_int_equal(tokens.len, 0); - assert_int_equal(tokens.cap, 0); + assert_false(tokens.is_valid); + assert_uint_equal(tokens.err, LEXER_WRONG_SYNTAX); } static void test_string_to_number_normal(void **state) { @@ -78,13 +71,13 @@ static void test_string_to_number_normal(void **state) { char num[16] = "2333t55"; size_t offset = 0; - ASTNode result; + ASTNodeResult result = tokenize_number(num, &offset); - assert_int_equal(tokenize_number(num, &offset, &result), LEXER_OK); + assert_true(result.is_valid); assert_int_equal(offset, 4); // equal to t position in string - assert_int_equal(result.type, NODE_INTEGER); - assert_int_equal(result.data.integer, 2333); + assert_int_equal(result.node.type, NODE_INTEGER); + assert_int_equal(result.node.data.integer, 2333); } static void test_string_to_number_overflow(void **state) { @@ -93,8 +86,9 @@ static void test_string_to_number_overflow(void **state) { // Number is INT64_MAX but with a extra 8 at the end char num[32] = "92233720368547758078yy7"; size_t offset = 0; - ASTNode result; - assert_int_equal(tokenize_number(num, &offset, &result), LEXER_INT_OVERFLOW); + ASTNodeResult result = tokenize_number(num, &offset); + assert_false(result.is_valid); + assert_uint_equal(result.err, LEXER_INT_OVERFLOW); // Technically it can trigger a buf overflow error but obvioulsy // it will trigger int overflow error first } -- 2.51.0 From ac2e783ccc852ff62801e06688142a209d56d0b2 Mon Sep 17 00:00:00 2001 From: laentropia Date: Thu, 30 Apr 2026 09:58:27 -0600 Subject: [PATCH 7/7] fix: tests and implementation of lexer Just a few details here and there, nothing wrong, everything else is going well. --- src/lexer.c | 5 +++-- test/CMakeLists.txt | 7 ------- test/test_evaluator.c | 6 ++---- test/test_lexer.c | 18 +++++++++--------- test/test_parser.c | 12 ++++++------ 5 files changed, 20 insertions(+), 28 deletions(-) diff --git a/src/lexer.c b/src/lexer.c index abb9f31..7baf998 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -87,9 +87,10 @@ ASTNodeResult tokenize_number(const char *input, size_t *offset) { if (is_integer) { new_node.type = NODE_INTEGER; I64Result status = string_to_integer(buf); - if (status.is_valid == LEXER_OK) { - new_node.data.integer = status.number; + if (!status.is_valid) { + return (ASTNodeResult) {.is_valid = false, .err = status.err}; } + new_node.data.integer = status.number; *offset = current; return (ASTNodeResult) {.is_valid = true, .node = new_node}; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b48356f..fba3b31 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,15 +1,9 @@ find_package(cmocka REQUIRED) -add_executable(test_nodeArray test_ASTNodeArray.c) add_executable(test_lexer test_lexer.c) add_executable(test_parser test_parser.c) add_executable(test_evaluator test_evaluator.c) -target_link_libraries(test_nodeArray - calculator_lib - cmocka::cmocka -) - target_link_libraries(test_lexer calculator_lib cmocka::cmocka @@ -25,7 +19,6 @@ target_link_libraries(test_evaluator cmocka::cmocka ) -add_test(NAME nodeArray_tests COMMAND test_nodeArray) add_test(NAME lexer_tests COMMAND test_lexer) add_test(NAME parser_tests COMMAND test_parser) add_test(NAME evaluator_tests COMMAND test_evaluator) diff --git a/test/test_evaluator.c b/test/test_evaluator.c index a8f5bd1..c97cdb2 100644 --- a/test/test_evaluator.c +++ b/test/test_evaluator.c @@ -13,10 +13,8 @@ static void test_basic_evaluation(void** state) { (void) state; char expr[256] = "2 + 4 * 40 / 2"; - ASTNodeArray context; - - tokenize(expr, &context); - ParseResult result = parse(&context); + TokenizeResult tokens = tokenize(expr); + ParseResult result = parse(tokens); int64_t value = evaluate(result); assert_int_equal(value, 82); diff --git a/test/test_lexer.c b/test/test_lexer.c index 7540119..364ff5b 100644 --- a/test/test_lexer.c +++ b/test/test_lexer.c @@ -21,27 +21,27 @@ static void test_tokenize_normal_expresion(void **state) { assert_int_equal(node.type, NODE_INTEGER); assert_int_equal(node.data.integer, 2); - arraylist_get(tokens.arr, 0, &node); + arraylist_get(tokens.arr, 1, &node); assert_int_equal(node.type, NODE_BINARY_OP); assert_int_equal(node.data.binary.op, OP_ADD); - arraylist_get(tokens.arr, 0, &node); + arraylist_get(tokens.arr, 2, &node); assert_int_equal(node.type, NODE_INTEGER); assert_int_equal(node.data.integer, 3); - arraylist_get(tokens.arr, 0, &node); + arraylist_get(tokens.arr, 3, &node); assert_int_equal(node.type, NODE_BINARY_OP); assert_int_equal(node.data.binary.op, OP_DIV); - arraylist_get(tokens.arr, 0, &node); + arraylist_get(tokens.arr, 4, &node); assert_int_equal(node.type, NODE_INTEGER); assert_int_equal(node.data.integer, 66); - arraylist_get(tokens.arr, 0, &node); + arraylist_get(tokens.arr, 5, &node); assert_int_equal(node.type, NODE_BINARY_OP); assert_int_equal(node.data.binary.op, OP_MUL); - arraylist_get(tokens.arr, 0, &node); + arraylist_get(tokens.arr, 6, &node); assert_int_equal(node.type, NODE_INTEGER); assert_int_equal(node.data.integer, 789); } @@ -53,7 +53,7 @@ static void test_tokenize_unrecognized_symbol(void **state) { TokenizeResult tokens = tokenize(expr); assert_false(tokens.is_valid); - assert_uint_equal(tokens.err, LEXER_WRONG_SYNTAX); + assert_uint_equal(tokens.err, LEXER_NOT_RECOGNIZED_SYMBOL); } static void test_tokenize_wrong_sintax(void **state) { @@ -83,8 +83,8 @@ static void test_string_to_number_normal(void **state) { static void test_string_to_number_overflow(void **state) { (void) state; - // Number is INT64_MAX but with a extra 8 at the end - char num[32] = "92233720368547758078yy7"; + // Number is INT64_MAX but with a extra 899 at the end + char num[32] = "92233720368547758079"; size_t offset = 0; ASTNodeResult result = tokenize_number(num, &offset); assert_false(result.is_valid); diff --git a/test/test_parser.c b/test/test_parser.c index 0fe3c20..c147030 100644 --- a/test/test_parser.c +++ b/test/test_parser.c @@ -1,4 +1,5 @@ #include "arena.h" +#include "arraylist.h" #include "lexer.h" #include "parser.h" #include @@ -12,13 +13,12 @@ static void test_parsing_basic_expression(void **state) { (void) state; char expr[256] = "2 + 3 / 66 * 789"; - ASTNodeArray tokens; - ASTNode node; - - assert_int_equal(tokenize(expr, &tokens), LEXER_OK); - assert_int_equal(tokens.len, 7); + TokenizeResult tokens = tokenize(expr); - ParseResult result = parse(&tokens); + assert_true(tokens.is_valid); + assert_int_equal(arraylist_size(tokens.arr), 7); + + ParseResult result = parse(tokens); // Assert head is + assert_int_equal(result.tree->type, NODE_BINARY_OP); assert_int_equal(result.tree->data.binary.op, OP_ADD); -- 2.51.0