Compare commits

..

7 Commits

Author SHA1 Message Date
e3d64596ab Merge pull request 'refactor-lexer' (#11) from refactor-lexer into main
Reviewed-on: #11
2026-05-12 20:08:39 -06:00
56c80fa071 addition: Managing of parenthesis
Its a fucking mess, i was writting straight bullshit but it conceptually
should work, just need to refactor the shit out of it to make it way
more clean than it actually is and also later fix the fucking evaluator
like damn it sucks ASSS now (not that much really is nice but obviously
doesn't work, i like my code a lot :)
2026-05-12 20:04:41 -06:00
7f390a8c6b addition: postfix operator capability, may work 2026-05-12 19:40:42 -06:00
e30b3d7175 addition: proccessing of prefix op 2026-05-12 18:33:52 -06:00
59f99059bb refactor: changes and additions ot parser 2026-05-12 18:15:36 -06:00
c41847e120 refactor: rewrote tokenize and modified ohter funcs
Well i wanted to wildly change a lot of things about the lexer thinking
i could do something better but really all i found was automatic lexers
that at least for me don't really fit the project so a manual one it is,
i guess technically is a automata. Whatever, is good enough.
2026-04-30 21:34:27 -06:00
f2c906c6aa initial-commit 2026-04-30 10:40:17 -06:00
5 changed files with 223 additions and 69 deletions

View File

@@ -10,6 +10,8 @@
typedef enum {
NODE_INTEGER,
NODE_BINARY_OP,
NODE_UNARY_OP,
NODE_PARENTHESIS,
} ASTNodeType;
// For classify operators
@@ -17,7 +19,11 @@ typedef enum {
OP_ADD,
OP_SUB,
OP_MUL,
OP_DIV
OP_DIV,
OP_POW,
OP_FACTORIAL,
OP_START_PAR,
OP_END_PAR,
} Operator;
typedef enum {
@@ -26,8 +32,6 @@ typedef enum {
LEXER_FAILED_NUMBER_CONVERSION,
LEXER_NOT_RECOGNIZED_SYMBOL,
LEXER_EMPTY_INPUT,
LEXER_NULL_ARG,
LEXER_WRONG_SYNTAX,
LEXER_BUF_OVERFLOW,
} LexerErr;
@@ -41,6 +45,14 @@ typedef struct ASTNode {
struct ASTNode *right;
Operator op;
} binary;
struct {
struct ASTNode *val;
Operator op;
} unary;
struct {
struct ASTNode *val;
Operator op;
} parenthesis;
} data;
} ASTNode;
@@ -66,12 +78,12 @@ typedef struct {
LexerErr err;
int64_t number;
};
} I64Result;
} LexerI64Result;
// Lexer funtions as well as few functionality
TokenizeResult tokenize(const char* input);
ASTNodeResult tokenize_number(const char* input, size_t *offset);
I64Result string_to_integer(const char buf[]);
LexerI64Result string_to_integer(const char buf[]);
bool isoperator(int c);
Operator char_to_operator(int c);
char operator_to_char(Operator op);

View File

@@ -32,8 +32,10 @@ typedef struct {
ASTNode *nud(ArraySlice *slice);
ASTNode *led(ArraySlice *slice, size_t right_precedence);
uint8_t node_lbp(ASTNode node);
uint8_t node_rbp(ASTNode node);
uint8_t prefix_rbp(ASTNode node);
uint8_t postfix_lbp(ASTNode node);
uint8_t infix_lbp(ASTNode node);
uint8_t infix_rbp(ASTNode node);
ParseResult parse(TokenizeResult tokens);
ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp);

View File

@@ -3,6 +3,7 @@
#include "lexer.h"
#include "parser.h"
#include <stdint.h>
#include <math.h>
int64_t evaluate_tree(ASTNode *tree) {
@@ -20,7 +21,8 @@ int64_t evaluate_tree(ASTNode *tree) {
return evaluate_tree(left) * evaluate_tree(right);
case OP_DIV:
return evaluate_tree(left) / evaluate_tree(right);
case OP_POW:
return pow(evaluate_tree(left), evaluate_tree(right));
}
}

View File

@@ -3,6 +3,7 @@
#include <ctype.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <limits.h>
@@ -14,45 +15,37 @@ typedef enum {
TokenizeResult tokenize(const char *input) {
size_t offset = 0;
LexerState state = WAIT_FOR_NUMBER;
ArrayList *arr = arraylist_init(64, sizeof(ASTNode));
size_t offset = 0;
while (input[offset] != '\n' && input[offset] != '\0') {
int current = input[offset];
while (
input[offset] != '\n' ||
input[offset] != EOF ||
input[offset] != '\0') {
if (isdigit(current)) {
if (state != WAIT_FOR_NUMBER) {
arraylist_destroy(&arr);
return (TokenizeResult) {.is_valid = false, .err = LEXER_WRONG_SYNTAX};
}
if (isdigit(input[offset])) {
ASTNodeResult result = tokenize_number(input, &offset);
if (!result.is_valid) {
arraylist_destroy(&arr);
return (TokenizeResult) {.is_valid = false, .err = result.err};
}
arraylist_push_back(arr, &result.node);
state = WAIT_FOR_OPERATOR;
} else if (isoperator(current)) {
if (state != WAIT_FOR_OPERATOR) {
return (TokenizeResult) {.is_valid = false, .err =LEXER_WRONG_SYNTAX};
}
ASTNode new_node = {
} else if (isoperator(input[offset])) {
ASTNode op_node = {
.type = NODE_BINARY_OP,
.data.binary.op = char_to_operator(current),
.data.binary.right = NULL,
.data.binary.op = char_to_operator(input[offset]),
.data.binary.left = NULL,
.data.binary.right = NULL,
};
arraylist_push_back(arr, &new_node);
state = WAIT_FOR_NUMBER;
} else if (isspace(current)) {
arraylist_push_back(arr, &op_node);
} else if (isspace(input[offset])) {
// Nothing...
} else {
arraylist_destroy(&arr);
return (TokenizeResult) {.is_valid = false, .err = LEXER_NOT_RECOGNIZED_SYMBOL};
return (TokenizeResult) {
.is_valid = false,
.err = LEXER_NOT_RECOGNIZED_SYMBOL};
}
offset++;
@@ -68,17 +61,21 @@ TokenizeResult tokenize(const char *input) {
// CURRENTLY, it only supports ints, not clear how floating
// point is implemented but i'll figure it out
ASTNodeResult tokenize_number(const char *input, size_t *offset) {
char buf[128] = { '\0' };
char buf[64] = { '\0' };
size_t buf_pos = 0;
bool is_integer = true; // Will later be used to differentiate fractions
// read number
size_t current = *offset;
while (isdigit(input[current])) {
buf[buf_pos] = input[current];
if (buf_pos >= sizeof(buf)) {
return (ASTNodeResult) {.is_valid = false, .err = LEXER_BUF_OVERFLOW};
return (ASTNodeResult) {
.is_valid = false,
.err = LEXER_BUF_OVERFLOW};
}
current++;
buf_pos++;
}
@@ -86,35 +83,46 @@ ASTNodeResult tokenize_number(const char *input, size_t *offset) {
ASTNode new_node;
if (is_integer) {
new_node.type = NODE_INTEGER;
I64Result status = string_to_integer(buf);
LexerI64Result status = string_to_integer(buf);
if (!status.is_valid) {
return (ASTNodeResult) {.is_valid = false, .err = status.err};
}
new_node.data.integer = status.number;
*offset = current;
return (ASTNodeResult) {.is_valid = true, .node = new_node};
}
return (ASTNodeResult) {.is_valid = false, .err = LEXER_FAILED_NUMBER_CONVERSION};
return (ASTNodeResult) {
.is_valid = false,
.err = LEXER_FAILED_NUMBER_CONVERSION};
}
I64Result string_to_integer(const char *buf) {
LexerI64Result string_to_integer(const char *buf) {
int c = 0;
int64_t count = 0;
while (buf[c] != '\0') {
// Extracts number from char
int digit = buf[c] - '0';
if (count > (INT64_MAX - digit) / 10) {
return (I64Result) {.is_valid = false, .err = LEXER_INT_OVERFLOW};
return (LexerI64Result) {
.is_valid = false,
.err = LEXER_INT_OVERFLOW};
}
count = count * 10;
count += digit;
c++;
}
return (I64Result) {.is_valid = true, .number = count};
return (LexerI64Result) {.is_valid = true, .number = count};
}
bool isoperator(int c) {
@@ -123,6 +131,10 @@ bool isoperator(int c) {
case '-':
case '/':
case '*':
case '^':
case '!':
case '(':
case ')':
return true;
default:
return false;
@@ -143,6 +155,18 @@ Operator char_to_operator(int c) {
case '/':
return OP_DIV;
break;
case '^':
return OP_POW;
break;
case '!':
return OP_FACTORIAL;
break;
case '(':
return OP_START_PAR;
break;
case ')':
return OP_END_PAR;
break;
default: // I mean shouldn't be used, we assume
return -1;
}
@@ -158,5 +182,15 @@ char operator_to_char(Operator op) {
return '*';
case OP_DIV:
return '/';
case OP_POW:
return '^';
case OP_FACTORIAL:
return '!';
case OP_START_PAR:
return '(';
case OP_END_PAR:
return ')';
default:
return EOF;
}
}

View File

@@ -6,7 +6,35 @@
#include <stdbool.h>
#include <stdint.h>
uint8_t node_lbp(ASTNode node) {
uint8_t prefix_rbp(ASTNode node) {
if (node.type == NODE_INTEGER) {
return 0;
}
switch (node.data.unary.op) {
case OP_SUB:
case OP_ADD:
return 30;
default:
return -1;
}
}
uint8_t postfix_lbp(ASTNode node) {
if (node.type == NODE_INTEGER) {
return 0;
}
switch (node.data.unary.op) {
case OP_FACTORIAL:
return 40;
default:
// needs to be dealt with with resulttypes
return 255;
}
}
uint8_t infix_lbp(ASTNode node) {
if (node.type == NODE_INTEGER) {
return 0;
}
@@ -19,12 +47,14 @@ uint8_t node_lbp(ASTNode node) {
case OP_DIV:
case OP_MUL:
return 20;
case OP_POW:
return 51;
default:
return 0;
}
}
uint8_t node_rbp(ASTNode node) {
uint8_t infix_rbp(ASTNode node) {
if (node.type == NODE_INTEGER) {
return 0;
}
@@ -37,6 +67,8 @@ uint8_t node_rbp(ASTNode node) {
case OP_DIV:
case OP_MUL:
return 21;
case OP_POW:
return 50;
default:
return 0;
}
@@ -53,12 +85,14 @@ ParseResult parse(TokenizeResult tokens) {
}
ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) {
// First: Consume a first number
arena_ensure_capacity(
arena,
sizeof(ASTNode),
alignof(ASTNode)
);
); // shouldn't fail but if it does then what a shame
// Get pointer in the arena
ASTNode *left_side = arena_unwrap_pointer(
arena_alloc(
arena,
@@ -69,45 +103,115 @@ ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) {
arrayslice_next(slice, left_side);
if (left_side->type == NODE_PARENTHESIS &&
left_side->data.parenthesis.op == OP_START_PAR) {
left_side = parse_expr(slice, arena, 0);
// HERE CHEKC LATER if slice.next != ')'
ASTNode *end_par;
arrayslice_next(slice, &end_par);
if (end_par->type != NODE_PARENTHESIS ||
end_par->data.parenthesis.op != OP_END_PAR) {
// todo
}
return left_side;
}
// if is unary then take prefix bp and continue
// to the right, no need to allocate left side
// because we just did and right side
// WILL return a valid allocated pointer.
if (left_side->type == NODE_UNARY_OP) {
uint8_t rbp = prefix_rbp(*left_side);
ASTNode *righ_side = parse_expr(slice, arena, rbp);
left_side->data.unary.val = righ_side;
}
while (true) {
// Second: Get next one and checn bp
if (!arrayslice_is_valid(slice)) {
break;
}
ASTNode operator;
arrayslice_peek(slice, &operator);
uint8_t rbp = node_rbp(operator);
uint8_t lbp = node_lbp(operator);
// Here check if not OP error
if (lbp < min_bp) {
break;
ASTNode operator;
// Here should chekc if is operator not some bs
// Third, get operator and binding powers
arrayslice_peek(slice, &operator);
// temporary for bad error handling
if (postfix_lbp(operator) != 255) {
if (postfix_lbp(operator) < min_bp) {
break;
}
// allocate operator
arrayslice_next(slice, NULL);
arena_ensure_capacity(
arena,
sizeof(ASTNode),
alignof(ASTNode));
ASTNode *new_node = arena_unwrap_pointer(
arena_alloc(
arena,
sizeof(ASTNode),
alignof(ASTNode)
)
);
*new_node = operator;
new_node->data.unary.val = left_side;
left_side = new_node;
continue;
}
arrayslice_next(slice, NULL);
ASTNode *right_side = parse_expr(slice, arena, rbp);
// check if it has infix or not, if not then error
uint8_t rbp = infix_rbp(operator);
uint8_t lbp = infix_lbp(operator);
arena_ensure_capacity(
arena,
sizeof(ASTNode),
alignof(ASTNode));
ASTNode *new_node = arena_unwrap_pointer(
arena_alloc(
arena,
sizeof(ASTNode),
alignof(ASTNode)
)
);
*new_node = operator;
if (rbp != 255 && lbp != 255) {
new_node->data.binary.left = left_side;
new_node->data.binary.right = right_side;
// If lbp is LESS then stop recursion,
// we found the next smaller binding power
// or the one with more precedence
if (lbp < min_bp) {
break;
}
left_side = new_node;
// If NOT, then we continue wtching ahead
// for the next one but taking our current
// concern that is rbp of the current operator
arrayslice_next(slice, NULL);
ASTNode *right_side = parse_expr(slice, arena, rbp);
arena_ensure_capacity(
arena,
sizeof(ASTNode),
alignof(ASTNode));
ASTNode *new_node = arena_unwrap_pointer(
arena_alloc(
arena,
sizeof(ASTNode),
alignof(ASTNode)
)
);
*new_node = operator;
new_node->data.binary.left = left_side;
new_node->data.binary.right = right_side;
left_side = new_node;
continue;
}
break;
}
// Final: return left side
return left_side;
}