Compare commits

...

12 Commits

Author SHA1 Message Date
9ea1da549f Merge pull request 'refactor-error-handling' (#12) from refactor-error-handling into main
Reviewed-on: #12
2026-05-13 19:05:54 -06:00
ab791dbc9b fix: tests and main 2026-05-13 18:48:14 -06:00
3ec73559ee refactor: evaluator separated into evaluate bin and un
Is nicer this way, also made it more beautiful to look at and therefor
to understand.
2026-05-13 18:06:01 -06:00
b56a368244 refactor: bp funtions take tokens now
necessary for cleannes
2026-05-13 17:48:03 -06:00
6294121e91 refactor: nud and led have differetn responasblires
So now nud and led do what they were supposed to do i guess, now i
thinks is just adjusting infix and postfix and all bd funcions to act on
operator instead.
2026-05-13 17:35:52 -06:00
70ab06964c addtition: nud and led token to node distinction
I think i should instead have a nud and led function i guess, may do
that next
2026-05-13 16:09:03 -06:00
90c426f3a4 refactor: moved NodeResult to TreeResult 2026-05-13 12:19:17 -06:00
efa0e3bacd refactor: evaluator incomplete.
SO, i forgot to implement nud and led correctly and the parser cant tell
apart from - as unary and - as binary (+ as well), i need to correct
that, move Node * to TreeResult so to use NodeResult with nud and led
2026-05-13 12:13:07 -06:00
542a94ef81 refactor: All of parser.c
DAMN, it wasn't that difficult, just bothers me a bit the part that
checks if both lbp and rbp of the infix are valid, like i do validation
twice but is fine i guess, maybe using an else?, i'll see if i change
it, for now i need to change the evaluator
2026-05-13 11:09:22 -06:00
80e05a9acf refactor: changed parser.h, added Node
So just added node back but now clearly separated by tokens and nodes of
the AST as it should be, now real rework the mess that is the parser
2026-05-13 10:02:55 -06:00
f3373123e1 refactor: adapted lexer to work with new tokens
Now its fine, the code i find it clear if one just sits down to read it
for a moment, next is the parser that REALLY needs reworking and a few
helper structs.
2026-05-13 09:49:28 -06:00
2a73f5f9d6 refactor: delete ASTNode, add Token to lexer
So, total refactor, now we serious. I feel ASTNode was feeling very
bloated so we need to rewrite and adapt everything, by now lets get the
lexer working again, is already well written for me at least.
2026-05-13 09:37:15 -06:00
11 changed files with 541 additions and 425 deletions

View File

@@ -40,6 +40,7 @@ target_include_directories(calculator_lib
target_link_libraries(calculator_lib target_link_libraries(calculator_lib
PUBLIC arena PUBLIC arena
PUBLIC arraylist PUBLIC arraylist
PRIVATE m
) )
add_executable(calculator src/main.c) add_executable(calculator src/main.c)

View File

@@ -1,11 +1,28 @@
#ifndef EVALUATOR_H #ifndef EVALUATOR_H
#define EVALUATOR_H #define EVALUATOR_H
#include "lexer.h"
#include "parser.h" #include "parser.h"
#include <stdint.h> #include <stdint.h>
int64_t evaluate(ParseResult context); typedef enum {
int64_t evaluate_tree(ASTNode *tree); EVALUATOR_OK,
EVALUATOR_MATH_ERR,
EVALUATOR_INVALID_PARSING,
EVALUATOR_INVALID_TREE, // just to shut up the compiler with the swithces
} EvaluatorErr;
typedef struct {
bool is_valid;
union {
int64_t val;
EvaluatorErr err;
};
} EvaluatorResult;
EvaluatorResult evaluate_binary(Node *tree);
EvaluatorResult evaluate_unary(Node *tree);
EvaluatorResult evaluate(ParserResult context);
EvaluatorResult evaluate_tree(Node *tree);
#endif // !EVALUATOR_H #endif // !EVALUATOR_H

View File

@@ -8,12 +8,9 @@
// For identifing // For identifing
typedef enum { typedef enum {
NODE_INTEGER, TOKEN_INTEGER,
NODE_BINARY_OP, TOKEN_OPERATOR,
NODE_UNARY_OP, } TokenType;
NODE_PARENTHESIS,
} ASTNodeType;
// For classify operators // For classify operators
typedef enum { typedef enum {
OP_ADD, OP_ADD,
@@ -36,25 +33,13 @@ typedef enum {
} LexerErr; } LexerErr;
// Can be thought as tokens, they will be used by the parser. // Can be thought as tokens, they will be used by the parser.
typedef struct ASTNode { typedef struct {
ASTNodeType type; TokenType type;
union { union {
int64_t integer; int64_t num;
struct { Operator op;
struct ASTNode *left; };
struct ASTNode *right; } Token;
Operator op;
} binary;
struct {
struct ASTNode *val;
Operator op;
} unary;
struct {
struct ASTNode *val;
Operator op;
} parenthesis;
} data;
} ASTNode;
typedef struct { typedef struct {
bool is_valid; bool is_valid;
@@ -68,21 +53,21 @@ typedef struct {
bool is_valid; bool is_valid;
union { union {
LexerErr err; LexerErr err;
ASTNode node; Token token;
}; };
} ASTNodeResult; } TokenResult;
typedef struct { typedef struct {
bool is_valid; bool is_valid;
union { union {
LexerErr err; LexerErr err;
int64_t number; int64_t num;
}; };
} LexerI64Result; } LexerI64Result;
// Lexer funtions as well as few functionality // Lexer funtions as well as few functionality
TokenizeResult tokenize(const char* input); TokenizeResult tokenize(const char* input);
ASTNodeResult tokenize_number(const char* input, size_t *offset); TokenResult tokenize_number(const char* input, size_t *offset);
LexerI64Result string_to_integer(const char buf[]); LexerI64Result string_to_integer(const char buf[]);
bool isoperator(int c); bool isoperator(int c);
Operator char_to_operator(int c); Operator char_to_operator(int c);

View File

@@ -4,11 +4,31 @@
#include "lexer.h" #include "lexer.h"
#include "arena.h" #include "arena.h"
#include "arraylist.h" #include "arraylist.h"
#include <stdbool.h>
#include <stdint.h> #include <stdint.h>
typedef struct { typedef enum {
ASTNode *head; NODE_INT,
} AST; NODE_BINARY_OP,
NODE_UNARY_OP,
} NodeType;
typedef struct Node {
NodeType type;
union {
int64_t num;
struct {
Operator op;
struct Node *left;
struct Node *right;
}binary;
struct {
Operator op;
struct Node *to;
}unary;
Operator par;
};
} Node;
typedef enum { typedef enum {
PARSER_OK = 0, PARSER_OK = 0,
@@ -16,6 +36,8 @@ typedef enum {
PARSER_MISSING_OPERAND, PARSER_MISSING_OPERAND,
PARSER_UNMATCHED_PAREN, PARSER_UNMATCHED_PAREN,
PARSER_OUT_OF_MEMORY, PARSER_OUT_OF_MEMORY,
PARSER_INVALID_TOKENIZE,
PARSER_UNEXPECTED_EOF,
} ParserErr; } ParserErr;
typedef struct { typedef struct {
@@ -24,20 +46,44 @@ typedef struct {
ParserErr err; ParserErr err;
struct { struct {
Arena arena; Arena arena;
ASTNode *tree; Node *tree;
}; };
}; };
} ParseResult; } ParserResult;
ASTNode *nud(ArraySlice *slice); typedef struct {
ASTNode *led(ArraySlice *slice, size_t right_precedence); bool is_valid;
union {
ParserErr err;
Node *node;
};
} TreeResult;
uint8_t prefix_rbp(ASTNode node); typedef struct {
uint8_t postfix_lbp(ASTNode node); bool is_valid;
uint8_t infix_lbp(ASTNode node); union {
uint8_t infix_rbp(ASTNode node); ParserErr err;
Node node;
};
} NodeResult;
ParseResult parse(TokenizeResult tokens); typedef struct {
ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp); bool is_valid;
union {
ParserErr err;
uint8_t num;
};
} ParserU8Result;
TreeResult nud(ArraySlice *slice, Arena *arena, Token token); // Null denotation
TreeResult led(ArraySlice *slice, Arena *arena, Node *left, Token token); // Left denotation
ParserU8Result prefix_rbp(Token token);
ParserU8Result postfix_lbp(Token token);
ParserU8Result infix_lbp(Token token);
ParserU8Result infix_rbp(Token token);
ParserResult parse(TokenizeResult tokens);
TreeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp);
#endif // !PARSER_H #endif // !PARSER_H

View File

@@ -2,36 +2,113 @@
#include "arena.h" #include "arena.h"
#include "lexer.h" #include "lexer.h"
#include "parser.h" #include "parser.h"
#include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include <math.h> #include <math.h>
int64_t evaluate_tree(ASTNode *tree) { EvaluatorResult evaluate_tree(Node *tree) {
if (tree->type == NODE_BINARY_OP) { if (tree->type == NODE_BINARY_OP) {
Operator op = tree->data.binary.op; return evaluate_binary(tree);
ASTNode *left = tree->data.binary.left; } else if (tree->type == NODE_UNARY_OP) {
ASTNode *right = tree->data.binary.right; return evaluate_unary(tree);
switch (op) {
case OP_ADD:
return evaluate_tree(left) + evaluate_tree(right);
case OP_SUB:
return evaluate_tree(left) - evaluate_tree(right);
case OP_MUL:
return evaluate_tree(left) * evaluate_tree(right);
case OP_DIV:
return evaluate_tree(left) / evaluate_tree(right);
case OP_POW:
return pow(evaluate_tree(left), evaluate_tree(right));
}
} }
int64_t return_val = tree->data.integer; return (EvaluatorResult) {
return return_val; .is_valid = true,
.val = tree->num,
};
} }
int64_t evaluate(ParseResult context) { EvaluatorResult evaluate_binary(Node *tree) {
int64_t result = evaluate_tree(context.tree); Operator op = tree->binary.op;
Node *left = tree->binary.left;
Node *right = tree->binary.right;
EvaluatorResult left_result = evaluate_tree(left);
EvaluatorResult right_result = evaluate_tree(right);
if (!left_result.is_valid) {
return left_result;
}
if (!left_result.is_valid) {
return left_result;
}
switch (op) {
case OP_ADD:
return (EvaluatorResult) {
.is_valid = true,
.val = left_result.val + right_result.val,
};
case OP_SUB:
return (EvaluatorResult) {
.is_valid = true,
.val = left_result.val - right_result.val,
};
case OP_MUL:
return (EvaluatorResult) {
.is_valid = true,
.val = left_result.val * right_result.val,
};
case OP_DIV:
return (EvaluatorResult) {
.is_valid = true,
.val = left_result.val / right_result.val,
};
case OP_POW:
return (EvaluatorResult) {
.is_valid = true,
.val = pow(left_result.val, right_result.val),
};
default:
return (EvaluatorResult) {
.is_valid = false,
.err = EVALUATOR_INVALID_TREE,
};
}
}
EvaluatorResult evaluate_unary(Node *tree) {
Operator op = tree->unary.op;
Node *to = tree->unary.to;
EvaluatorResult result = evaluate_tree(to);
if (!result.is_valid) {
return result;
}
switch (op) {
case OP_ADD:
return result;
case OP_SUB:
return (EvaluatorResult) {
.is_valid = true,
.val = -result.val,
};
case OP_FACTORIAL:
return (EvaluatorResult) {
.is_valid = true,
.val = tgamma(result.val + 1),
};
default:
return (EvaluatorResult) {
.is_valid = false,
.err = EVALUATOR_INVALID_TREE,
};
}
}
EvaluatorResult evaluate(ParserResult context) {
if (!context.is_valid) {
return (EvaluatorResult) {
.is_valid = false,
.err = EVALUATOR_INVALID_PARSING,
};
}
EvaluatorResult result = evaluate_tree(context.tree);
arena_destroy(&context.arena); arena_destroy(&context.arena);
return result; return result;

View File

@@ -5,7 +5,7 @@
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <strings.h> #include <string.h>
#include <limits.h> #include <limits.h>
typedef enum { typedef enum {
@@ -15,34 +15,31 @@ typedef enum {
TokenizeResult tokenize(const char *input) { TokenizeResult tokenize(const char *input) {
ArrayList *arr = arraylist_init(64, sizeof(ASTNode)); ArrayList *arr = arraylist_init(64, sizeof(Token));
size_t offset = 0; size_t offset = 0;
while ( while (input[offset] != '\0') {
input[offset] != '\n' ||
input[offset] != EOF ||
input[offset] != '\0') {
if (isdigit(input[offset])) { if (isdigit(input[offset])) {
ASTNodeResult result = tokenize_number(input, &offset); TokenResult result = tokenize_number(input, &offset);
if (!result.is_valid) { if (!result.is_valid) {
arraylist_destroy(&arr);
return (TokenizeResult) {.is_valid = false, .err = result.err}; return (TokenizeResult) {.is_valid = false, .err = result.err};
} }
arraylist_push_back(arr, &result.node); arraylist_push_back(arr, &result.token);
} else if (isoperator(input[offset])) { } else if (isoperator(input[offset])) {
ASTNode op_node = { Token op_node = {
.type = NODE_BINARY_OP, .type = TOKEN_OPERATOR,
.data.binary.op = char_to_operator(input[offset]), .op = char_to_operator(input[offset]),
.data.binary.left = NULL,
.data.binary.right = NULL,
}; };
arraylist_push_back(arr, &op_node); arraylist_push_back(arr, &op_node);
} else if (isspace(input[offset])) { } else if (isspace(input[offset])) {
// Nothing... // Nothing...
} else { } else {
arraylist_destroy(&arr);
return (TokenizeResult) { return (TokenizeResult) {
.is_valid = false, .is_valid = false,
.err = LEXER_NOT_RECOGNIZED_SYMBOL}; .err = LEXER_NOT_RECOGNIZED_SYMBOL};
@@ -52,6 +49,7 @@ TokenizeResult tokenize(const char *input) {
} }
if (arraylist_size(arr) < 1) { if (arraylist_size(arr) < 1) {
arraylist_destroy(&arr);
return (TokenizeResult) {.is_valid = false, .err = LEXER_EMPTY_INPUT}; return (TokenizeResult) {.is_valid = false, .err = LEXER_EMPTY_INPUT};
} }
@@ -60,7 +58,7 @@ TokenizeResult tokenize(const char *input) {
// CURRENTLY, it only supports ints, not clear how floating // CURRENTLY, it only supports ints, not clear how floating
// point is implemented but i'll figure it out // point is implemented but i'll figure it out
ASTNodeResult tokenize_number(const char *input, size_t *offset) { TokenResult tokenize_number(const char *input, size_t *offset) {
char buf[64] = { '\0' }; char buf[64] = { '\0' };
size_t buf_pos = 0; size_t buf_pos = 0;
bool is_integer = true; // Will later be used to differentiate fractions bool is_integer = true; // Will later be used to differentiate fractions
@@ -68,35 +66,35 @@ ASTNodeResult tokenize_number(const char *input, size_t *offset) {
// read number // read number
size_t current = *offset; size_t current = *offset;
while (isdigit(input[current])) { while (isdigit(input[current])) {
buf[buf_pos] = input[current]; if (buf_pos >= sizeof(buf) - 1) {
return (TokenResult) {
if (buf_pos >= sizeof(buf)) {
return (ASTNodeResult) {
.is_valid = false, .is_valid = false,
.err = LEXER_BUF_OVERFLOW}; .err = LEXER_BUF_OVERFLOW};
} }
buf[buf_pos] = input[current];
current++; current++;
buf_pos++; buf_pos++;
} }
ASTNode new_node; Token new_token;
if (is_integer) { if (is_integer) {
new_node.type = NODE_INTEGER; new_token.type = TOKEN_INTEGER;
LexerI64Result status = string_to_integer(buf); LexerI64Result result = string_to_integer(buf);
if (!status.is_valid) { if (!result.is_valid) {
return (ASTNodeResult) {.is_valid = false, .err = status.err}; return (TokenResult) {.is_valid = false, .err = result.err};
} }
new_node.data.integer = status.number; new_token.num = result.num;
*offset = current; *offset = current - 1;
return (ASTNodeResult) {.is_valid = true, .node = new_node}; return (TokenResult) {.is_valid = true, .token = new_token};
} }
return (ASTNodeResult) { return (TokenResult) {
.is_valid = false, .is_valid = false,
.err = LEXER_FAILED_NUMBER_CONVERSION}; .err = LEXER_FAILED_NUMBER_CONVERSION};
} }
@@ -122,7 +120,7 @@ LexerI64Result string_to_integer(const char *buf) {
c++; c++;
} }
return (LexerI64Result) {.is_valid = true, .number = count}; return (LexerI64Result) {.is_valid = true, .num = count};
} }
bool isoperator(int c) { bool isoperator(int c) {

View File

@@ -18,12 +18,11 @@ int main(void) {
} }
buf[pos] = '\0'; buf[pos] = '\0';
TokenizeResult tokens = tokenize(buf); EvaluatorResult result = evaluate(parse(tokenize(buf)));
if (!result.is_valid) {
puts("Error checando expresion");
}
ParseResult par = parse(tokens); printf("El resultado es: %" PRIi64 "\n", result.val);
int64_t result = evaluate(par);
printf("El resultado es: %" PRIi64 "\n", result);
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

View File

@@ -2,216 +2,391 @@
#include "arraylist.h" #include "arraylist.h"
#include "lexer.h" #include "lexer.h"
#include "arena.h" #include "arena.h"
#include <cmocka.h>
#include <stdalign.h> #include <stdalign.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
uint8_t prefix_rbp(ASTNode node) { ParserU8Result prefix_rbp(Token token) {
if (node.type == NODE_INTEGER) { if (token.type == TOKEN_INTEGER) {
return 0; return (ParserU8Result) {
.is_valid = false,
.err = PARSER_UNEXPECTED_TOKEN,
};
} }
switch (token.op) {
switch (node.data.unary.op) {
case OP_SUB: case OP_SUB:
case OP_ADD: case OP_ADD:
return 30; return (ParserU8Result) {
.is_valid = true,
.num = 30,
};
default: default:
return -1; return (ParserU8Result) {
.is_valid = false,
.err = PARSER_UNEXPECTED_TOKEN,
};
} }
} }
uint8_t postfix_lbp(ASTNode node) { ParserU8Result postfix_lbp(Token token) {
if (node.type == NODE_INTEGER) { if (token.type != TOKEN_OPERATOR) {
return 0; return (ParserU8Result) {
.is_valid = false,
.err = PARSER_UNEXPECTED_TOKEN,
};
} }
switch (node.data.unary.op) { switch (token.op) {
case OP_FACTORIAL: case OP_FACTORIAL:
return 40; return (ParserU8Result) {
.is_valid = true,
.num = 40,
};
default: default:
// needs to be dealt with with resulttypes return (ParserU8Result) {
return 255; .is_valid = false,
.err = PARSER_UNEXPECTED_TOKEN,
};
} }
} }
uint8_t infix_lbp(ASTNode node) { ParserU8Result infix_lbp(Token token) {
if (node.type == NODE_INTEGER) { if (token.type != TOKEN_OPERATOR) {
return 0; return (ParserU8Result) {
.is_valid = false,
.err = PARSER_UNEXPECTED_TOKEN,
};
} }
switch (node.data.binary.op) { switch (token.op) {
case OP_ADD: case OP_ADD:
case OP_SUB: case OP_SUB:
return 10; return (ParserU8Result) {
break; .is_valid = true,
.num = 10,
};
case OP_DIV: case OP_DIV:
case OP_MUL: case OP_MUL:
return 20; return (ParserU8Result) {
.is_valid = true,
.num = 20,
};
case OP_POW: case OP_POW:
return 51; return (ParserU8Result) {
.is_valid = true,
.num = 51,
};
default: default:
return 0; return (ParserU8Result) {
.is_valid = false,
.err = PARSER_UNEXPECTED_TOKEN,
};
} }
} }
uint8_t infix_rbp(ASTNode node) { ParserU8Result infix_rbp(Token token) {
if (node.type == NODE_INTEGER) { if (token.type != TOKEN_OPERATOR) {
return 0; return (ParserU8Result) {
.is_valid = false,
.err = PARSER_UNEXPECTED_TOKEN,
};
} }
switch (node.data.binary.op) { switch (token.op) {
case OP_ADD: case OP_ADD:
case OP_SUB: case OP_SUB:
return 11; return (ParserU8Result) {
break; .is_valid = true,
.num = 11,
};
case OP_DIV: case OP_DIV:
case OP_MUL: case OP_MUL:
return 21; return (ParserU8Result) {
.is_valid = true,
.num = 21,
};
case OP_POW: case OP_POW:
return 50; return (ParserU8Result) {
.is_valid = true,
.num = 50,
};
default: default:
return 0; return (ParserU8Result) {
.is_valid = false,
.err = PARSER_UNEXPECTED_TOKEN,
};
} }
} }
ParseResult parse(TokenizeResult tokens) { TreeResult led(
ArraySlice *context = arraylist_slice(tokens.arr, 0, arraylist_size(tokens.arr)); ArraySlice *slice,
Arena arena = arena_init(sizeof(ASTNode) * arraylist_size(tokens.arr)).arena; Arena *arena,
Node *left,
return (ParseResult) { Token token
.is_valid = true, ) {
.arena = arena,
.tree = parse_expr(context, &arena, 0)};
}
ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) {
// First: Consume a first number
arena_ensure_capacity( arena_ensure_capacity(
arena, arena,
sizeof(ASTNode), sizeof(Node),
alignof(ASTNode) alignof(Node)
); // shouldn't fail but if it does then what a shame );
// Get pointer in the arena Node *node = arena_unwrap_pointer(
ASTNode *left_side = arena_unwrap_pointer(
arena_alloc( arena_alloc(
arena, arena,
sizeof(ASTNode), sizeof(Node),
alignof(ASTNode) alignof(Node)
) )
); );
arrayslice_next(slice, left_side); switch (token.op) {
if (left_side->type == NODE_PARENTHESIS && // Binary operators
left_side->data.parenthesis.op == OP_START_PAR) { case OP_ADD:
left_side = parse_expr(slice, arena, 0); case OP_SUB:
// HERE CHEKC LATER if slice.next != ')' case OP_MUL:
ASTNode *end_par; case OP_DIV:
arrayslice_next(slice, &end_par); case OP_POW: {
if (end_par->type != NODE_PARENTHESIS || node->type = NODE_BINARY_OP;
end_par->data.parenthesis.op != OP_END_PAR) { node->binary.op = token.op;
// todo
ParserU8Result rbp_result = infix_rbp(token);
if (!rbp_result.is_valid) {
return (TreeResult) {
.is_valid = false,
.err = rbp_result.err,
};
}
TreeResult right = parse_expr(
slice,
arena,
rbp_result.num
);
if (!right.is_valid) {
return right;
}
node->binary.left = left;
node->binary.right = right.node;
return (TreeResult) {
.is_valid = true,
.node = node,
};
} }
return left_side;
}
// if is unary then take prefix bp and continue
// to the right, no need to allocate left side
// because we just did and right side
// WILL return a valid allocated pointer.
if (left_side->type == NODE_UNARY_OP) {
uint8_t rbp = prefix_rbp(*left_side);
ASTNode *righ_side = parse_expr(slice, arena, rbp);
left_side->data.unary.val = righ_side; // Postfix operators
case OP_FACTORIAL: {
node->type = NODE_UNARY_OP;
node->unary.op = token.op;
node->unary.to = left;
return (TreeResult) {
.is_valid = true,
.node = node,
};
}
default:
return (TreeResult) {
.is_valid = false,
.err = PARSER_UNEXPECTED_TOKEN,
};
}
}
TreeResult nud(ArraySlice *slice, Arena *arena, Token token) {
arena_ensure_capacity(
arena,
sizeof(Node),
alignof(Node)
);
Node *node = arena_unwrap_pointer(
arena_alloc(
arena,
sizeof(Node),
alignof(Node)
)
);
if (token.type == TOKEN_INTEGER) {
node->type = NODE_INT;
node->num = token.num;
return (TreeResult) {
.is_valid = true,
.node = node,
};
} }
while (true) { switch (token.op) {
// Second: Get next one and checn bp case OP_START_PAR: {
if (!arrayslice_is_valid(slice)) { TreeResult expr = parse_expr(slice, arena, 0);
if (!expr.is_valid) {
return expr;
}
Token end_par;
if (arrayslice_next(slice, &end_par) != ARRLIST_OK) {
return (TreeResult) {
.is_valid = false,
.err = PARSER_UNMATCHED_PAREN,
};
}
if (end_par.type != TOKEN_OPERATOR ||
end_par.op != OP_END_PAR) {
return (TreeResult) {
.is_valid = false,
.err = PARSER_UNMATCHED_PAREN,
};
}
return expr;
}
case OP_ADD:
case OP_SUB: {
node->type = NODE_UNARY_OP;
node->unary.op = token.op;
ParserU8Result rbp_result = prefix_rbp(token);
if (!rbp_result.is_valid) {
return (TreeResult) {
.is_valid = false,
.err = rbp_result.err,
};
}
TreeResult right = parse_expr(
slice,
arena,
rbp_result.num
);
if (!right.is_valid) {
return right;
}
node->unary.to = right.node;
return (TreeResult) {
.is_valid = true,
.node = node,
};
}
default:
return (TreeResult) {
.is_valid = false,
.err = PARSER_UNEXPECTED_TOKEN,
};
}
}
ParserResult parse(TokenizeResult tokens) {
if (!tokens.is_valid) {
return (ParserResult) {
.is_valid = false,
.err = PARSER_INVALID_TOKENIZE,
};
}
ArraySlice *context = arraylist_slice(tokens.arr, 0, arraylist_size(tokens.arr));
Arena arena = arena_init(sizeof(Node) * arraylist_size(tokens.arr)).arena;
TreeResult result = parse_expr(context, &arena, 0);
if (!result.is_valid) {
arena_destroy(&arena);
arraylist_destroy(&tokens.arr);
return (ParserResult) {
.is_valid = false,
.err = result.err,
};
}
arraylist_destroy(&tokens.arr);
return (ParserResult) {
.is_valid = true,
.arena = arena,
.tree = result.node};
}
TreeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) {
Token current_token;
if (arrayslice_next(slice, &current_token) != ARRLIST_OK) {
return (TreeResult) {
.is_valid = false,
.err = PARSER_UNEXPECTED_EOF,
};
}
TreeResult left_result = nud(slice, arena, current_token);
if (!left_result.is_valid) {
return left_result;
}
Node *left_side = left_result.node;
while (arrayslice_is_valid(slice)) {
Token operator_token;
arrayslice_peek(slice, &operator_token);
if (operator_token.type != TOKEN_OPERATOR) {
break; break;
} }
// Here check if not OP error ParserU8Result postfix_lbp_result = postfix_lbp(operator_token);
ASTNode operator; if (postfix_lbp_result.is_valid) {
// Here should chekc if is operator not some bs if (postfix_lbp_result.num < min_bp) {
// Third, get operator and binding powers
arrayslice_peek(slice, &operator);
// temporary for bad error handling
if (postfix_lbp(operator) != 255) {
if (postfix_lbp(operator) < min_bp) {
break; break;
} }
// allocate operator
arrayslice_next(slice, NULL); arrayslice_next(slice, NULL);
arena_ensure_capacity(
arena,
sizeof(ASTNode),
alignof(ASTNode));
ASTNode *new_node = arena_unwrap_pointer(
arena_alloc(
arena,
sizeof(ASTNode),
alignof(ASTNode)
)
);
*new_node = operator;
TreeResult result = led(slice, arena, left_side, operator_token);
new_node->data.unary.val = left_side; if (!result.is_valid) {
return result;
left_side = new_node;
continue;
}
// check if it has infix or not, if not then error
uint8_t rbp = infix_rbp(operator);
uint8_t lbp = infix_lbp(operator);
if (rbp != 255 && lbp != 255) {
// If lbp is LESS then stop recursion,
// we found the next smaller binding power
// or the one with more precedence
if (lbp < min_bp) {
break;
} }
left_side = result.node;
// If NOT, then we continue wtching ahead
// for the next one but taking our current
// concern that is rbp of the current operator
arrayslice_next(slice, NULL);
ASTNode *right_side = parse_expr(slice, arena, rbp);
arena_ensure_capacity(
arena,
sizeof(ASTNode),
alignof(ASTNode));
ASTNode *new_node = arena_unwrap_pointer(
arena_alloc(
arena,
sizeof(ASTNode),
alignof(ASTNode)
)
);
*new_node = operator;
new_node->data.binary.left = left_side;
new_node->data.binary.right = right_side;
left_side = new_node;
continue; continue;
} }
break; // Path for infix basically
ParserU8Result lbp_result = infix_lbp(operator_token);
if (!lbp_result.is_valid) {
break;
}
if (lbp_result.num < min_bp) {
break;
}
arrayslice_next(slice, NULL);
TreeResult result = led(slice, arena, left_side, operator_token);
if (!result.is_valid) {
return result;
}
left_side = result.node;
} }
// Final: return left side // Final: return left side
return left_side; return (TreeResult){
.is_valid = true,
.node = left_side,
};
} }

View File

@@ -1,6 +1,3 @@
#include "lexer.h"
#include "parser.h"
#include "evaluator.h"
#include <stdarg.h> #include <stdarg.h>
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h> #include <stddef.h>
@@ -9,22 +6,8 @@
#include <cmocka.h> #include <cmocka.h>
#include <stdlib.h> #include <stdlib.h>
static void test_basic_evaluation(void** state) {
(void) state;
char expr[256] = "2 + 4 * 40 / 2";
TokenizeResult tokens = tokenize(expr);
ParseResult result = parse(tokens);
int64_t value = evaluate(result);
assert_int_equal(value, 82);
}
int main(void) { int main(void) {
const struct CMUnitTest tests[] = {
cmocka_unit_test(test_basic_evaluation),
};
cmocka_run_group_tests(tests, NULL, NULL);
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

View File

@@ -1,106 +1,11 @@
#include "arraylist.h"
#include "lexer.h"
#include <stdarg.h> #include <stdarg.h>
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <setjmp.h> #include <setjmp.h>
#include <cmocka.h> #include <cmocka.h>
#include <stdlib.h>
static void test_tokenize_normal_expresion(void **state) {
(void) state;
char expr[256] = "2 + 3 / 66 * 789";
ASTNode node;
TokenizeResult tokens = tokenize(expr);
assert_true(tokens.is_valid);
assert_int_equal(arraylist_size(tokens.arr), 7);
arraylist_get(tokens.arr, 0, &node);
assert_int_equal(node.type, NODE_INTEGER);
assert_int_equal(node.data.integer, 2);
arraylist_get(tokens.arr, 1, &node);
assert_int_equal(node.type, NODE_BINARY_OP);
assert_int_equal(node.data.binary.op, OP_ADD);
arraylist_get(tokens.arr, 2, &node);
assert_int_equal(node.type, NODE_INTEGER);
assert_int_equal(node.data.integer, 3);
arraylist_get(tokens.arr, 3, &node);
assert_int_equal(node.type, NODE_BINARY_OP);
assert_int_equal(node.data.binary.op, OP_DIV);
arraylist_get(tokens.arr, 4, &node);
assert_int_equal(node.type, NODE_INTEGER);
assert_int_equal(node.data.integer, 66);
arraylist_get(tokens.arr, 5, &node);
assert_int_equal(node.type, NODE_BINARY_OP);
assert_int_equal(node.data.binary.op, OP_MUL);
arraylist_get(tokens.arr, 6, &node);
assert_int_equal(node.type, NODE_INTEGER);
assert_int_equal(node.data.integer, 789);
}
static void test_tokenize_unrecognized_symbol(void **state) {
(void) state;
char expr[256] = " 2 j 3 / 66 } 789";
TokenizeResult tokens = tokenize(expr);
assert_false(tokens.is_valid);
assert_uint_equal(tokens.err, LEXER_NOT_RECOGNIZED_SYMBOL);
}
static void test_tokenize_wrong_sintax(void **state) {
(void) state;
char expr[256] = "2 3 / 66 789";
TokenizeResult tokens = tokenize(expr);
assert_false(tokens.is_valid);
assert_uint_equal(tokens.err, LEXER_WRONG_SYNTAX);
}
static void test_string_to_number_normal(void **state) {
(void) state;
char num[16] = "2333t55";
size_t offset = 0;
ASTNodeResult result = tokenize_number(num, &offset);
assert_true(result.is_valid);
assert_int_equal(offset, 4); // equal to t position in string
assert_int_equal(result.node.type, NODE_INTEGER);
assert_int_equal(result.node.data.integer, 2333);
}
static void test_string_to_number_overflow(void **state) {
(void) state;
// Number is INT64_MAX but with a extra 899 at the end
char num[32] = "92233720368547758079";
size_t offset = 0;
ASTNodeResult result = tokenize_number(num, &offset);
assert_false(result.is_valid);
assert_uint_equal(result.err, LEXER_INT_OVERFLOW);
// Technically it can trigger a buf overflow error but obvioulsy
// it will trigger int overflow error first
}
int main(void) { int main(void) {
const struct CMUnitTest tests[] = { return EXIT_SUCCESS;
cmocka_unit_test(test_string_to_number_normal),
cmocka_unit_test(test_string_to_number_overflow),
cmocka_unit_test(test_tokenize_normal_expresion),
cmocka_unit_test(test_tokenize_unrecognized_symbol),
cmocka_unit_test(test_tokenize_wrong_sintax),
};
return cmocka_run_group_tests(tests, NULL, NULL);
} }

View File

@@ -1,81 +1,11 @@
#include "arena.h"
#include "arraylist.h"
#include "lexer.h"
#include "parser.h"
#include <stdarg.h> #include <stdarg.h>
#include <stdbool.h> #include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <setjmp.h> #include <setjmp.h>
#include <cmocka.h> #include <cmocka.h>
#include <stdlib.h>
static void test_parsing_basic_expression(void **state) {
(void) state;
char expr[256] = "2 + 3 / 66 * 789";
TokenizeResult tokens = tokenize(expr);
assert_true(tokens.is_valid);
assert_int_equal(arraylist_size(tokens.arr), 7);
ParseResult result = parse(tokens);
// Assert head is +
assert_int_equal(result.tree->type, NODE_BINARY_OP);
assert_int_equal(result.tree->data.binary.op, OP_ADD);
assert_int_equal(result.tree->data.binary.left->type, NODE_INTEGER);
assert_int_equal(result.tree->data.binary.left->data.integer, 2);
assert_int_equal(
result.tree->data.binary.right->type,
NODE_BINARY_OP
);
assert_int_equal(
result.tree->data.binary.right->data.binary.op,
OP_MUL
);
assert_int_equal(
result.tree->data.binary.right->data.binary.right->type,
NODE_INTEGER);
assert_int_equal(
result.tree->data.binary.right->data.binary.right->data.integer,
789);
assert_int_equal(
result.tree->data.binary.right->data.binary.left->type,
NODE_BINARY_OP
);
assert_int_equal(
result.tree->data.binary.right->data.binary.left->data.binary.op,
OP_DIV
);
assert_int_equal(
result.tree->data.binary.right->data.binary.left->data.binary.right->type,
NODE_INTEGER
);
assert_int_equal(
result.tree->data.binary.right->data.binary.left->data.binary.right->data.integer,
66
);
assert_int_equal(
result.tree->data.binary.right->data.binary.left->data.binary.left->type,
NODE_INTEGER
);
assert_int_equal(
result.tree->data.binary.right->data.binary.left->data.binary.left->data.integer,
3
);
arena_destroy(&result.arena);
}
int main(void) { int main(void) {
const struct CMUnitTest tests [] = { return EXIT_SUCCESS;
cmocka_unit_test(test_parsing_basic_expression),
};
return cmocka_run_group_tests(tests, NULL, NULL);
} }