Compare commits

...

5 Commits

Author SHA1 Message Date
56c80fa071 addition: Managing of parenthesis
Its a fucking mess, i was writting straight bullshit but it conceptually
should work, just need to refactor the shit out of it to make it way
more clean than it actually is and also later fix the fucking evaluator
like damn it sucks ASSS now (not that much really is nice but obviously
doesn't work, i like my code a lot :)
2026-05-12 20:04:41 -06:00
7f390a8c6b addition: postfix operator capability, may work 2026-05-12 19:40:42 -06:00
e30b3d7175 addition: proccessing of prefix op 2026-05-12 18:33:52 -06:00
59f99059bb refactor: changes and additions ot parser 2026-05-12 18:15:36 -06:00
c41847e120 refactor: rewrote tokenize and modified ohter funcs
Well i wanted to wildly change a lot of things about the lexer thinking
i could do something better but really all i found was automatic lexers
that at least for me don't really fit the project so a manual one it is,
i guess technically is a automata. Whatever, is good enough.
2026-04-30 21:34:27 -06:00
5 changed files with 223 additions and 69 deletions

View File

@@ -10,6 +10,8 @@
typedef enum { typedef enum {
NODE_INTEGER, NODE_INTEGER,
NODE_BINARY_OP, NODE_BINARY_OP,
NODE_UNARY_OP,
NODE_PARENTHESIS,
} ASTNodeType; } ASTNodeType;
// For classify operators // For classify operators
@@ -17,7 +19,11 @@ typedef enum {
OP_ADD, OP_ADD,
OP_SUB, OP_SUB,
OP_MUL, OP_MUL,
OP_DIV OP_DIV,
OP_POW,
OP_FACTORIAL,
OP_START_PAR,
OP_END_PAR,
} Operator; } Operator;
typedef enum { typedef enum {
@@ -26,8 +32,6 @@ typedef enum {
LEXER_FAILED_NUMBER_CONVERSION, LEXER_FAILED_NUMBER_CONVERSION,
LEXER_NOT_RECOGNIZED_SYMBOL, LEXER_NOT_RECOGNIZED_SYMBOL,
LEXER_EMPTY_INPUT, LEXER_EMPTY_INPUT,
LEXER_NULL_ARG,
LEXER_WRONG_SYNTAX,
LEXER_BUF_OVERFLOW, LEXER_BUF_OVERFLOW,
} LexerErr; } LexerErr;
@@ -41,6 +45,14 @@ typedef struct ASTNode {
struct ASTNode *right; struct ASTNode *right;
Operator op; Operator op;
} binary; } binary;
struct {
struct ASTNode *val;
Operator op;
} unary;
struct {
struct ASTNode *val;
Operator op;
} parenthesis;
} data; } data;
} ASTNode; } ASTNode;
@@ -66,12 +78,12 @@ typedef struct {
LexerErr err; LexerErr err;
int64_t number; int64_t number;
}; };
} I64Result; } LexerI64Result;
// Lexer funtions as well as few functionality // Lexer funtions as well as few functionality
TokenizeResult tokenize(const char* input); TokenizeResult tokenize(const char* input);
ASTNodeResult tokenize_number(const char* input, size_t *offset); ASTNodeResult tokenize_number(const char* input, size_t *offset);
I64Result string_to_integer(const char buf[]); LexerI64Result string_to_integer(const char buf[]);
bool isoperator(int c); bool isoperator(int c);
Operator char_to_operator(int c); Operator char_to_operator(int c);
char operator_to_char(Operator op); char operator_to_char(Operator op);

View File

@@ -32,8 +32,10 @@ typedef struct {
ASTNode *nud(ArraySlice *slice); ASTNode *nud(ArraySlice *slice);
ASTNode *led(ArraySlice *slice, size_t right_precedence); ASTNode *led(ArraySlice *slice, size_t right_precedence);
uint8_t node_lbp(ASTNode node); uint8_t prefix_rbp(ASTNode node);
uint8_t node_rbp(ASTNode node); uint8_t postfix_lbp(ASTNode node);
uint8_t infix_lbp(ASTNode node);
uint8_t infix_rbp(ASTNode node);
ParseResult parse(TokenizeResult tokens); ParseResult parse(TokenizeResult tokens);
ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp); ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp);

View File

@@ -3,6 +3,7 @@
#include "lexer.h" #include "lexer.h"
#include "parser.h" #include "parser.h"
#include <stdint.h> #include <stdint.h>
#include <math.h>
int64_t evaluate_tree(ASTNode *tree) { int64_t evaluate_tree(ASTNode *tree) {
@@ -20,7 +21,8 @@ int64_t evaluate_tree(ASTNode *tree) {
return evaluate_tree(left) * evaluate_tree(right); return evaluate_tree(left) * evaluate_tree(right);
case OP_DIV: case OP_DIV:
return evaluate_tree(left) / evaluate_tree(right); return evaluate_tree(left) / evaluate_tree(right);
case OP_POW:
return pow(evaluate_tree(left), evaluate_tree(right));
} }
} }

View File

@@ -3,6 +3,7 @@
#include <ctype.h> #include <ctype.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <strings.h> #include <strings.h>
#include <limits.h> #include <limits.h>
@@ -14,45 +15,37 @@ typedef enum {
TokenizeResult tokenize(const char *input) { TokenizeResult tokenize(const char *input) {
size_t offset = 0;
LexerState state = WAIT_FOR_NUMBER;
ArrayList *arr = arraylist_init(64, sizeof(ASTNode)); ArrayList *arr = arraylist_init(64, sizeof(ASTNode));
size_t offset = 0;
while (input[offset] != '\n' && input[offset] != '\0') { while (
int current = input[offset]; input[offset] != '\n' ||
input[offset] != EOF ||
input[offset] != '\0') {
if (isdigit(current)) { if (isdigit(input[offset])) {
if (state != WAIT_FOR_NUMBER) {
arraylist_destroy(&arr);
return (TokenizeResult) {.is_valid = false, .err = LEXER_WRONG_SYNTAX};
}
ASTNodeResult result = tokenize_number(input, &offset); ASTNodeResult result = tokenize_number(input, &offset);
if (!result.is_valid) { if (!result.is_valid) {
arraylist_destroy(&arr);
return (TokenizeResult) {.is_valid = false, .err = result.err}; return (TokenizeResult) {.is_valid = false, .err = result.err};
} }
arraylist_push_back(arr, &result.node); arraylist_push_back(arr, &result.node);
state = WAIT_FOR_OPERATOR; } else if (isoperator(input[offset])) {
} else if (isoperator(current)) { ASTNode op_node = {
if (state != WAIT_FOR_OPERATOR) {
return (TokenizeResult) {.is_valid = false, .err =LEXER_WRONG_SYNTAX};
}
ASTNode new_node = {
.type = NODE_BINARY_OP, .type = NODE_BINARY_OP,
.data.binary.op = char_to_operator(current), .data.binary.op = char_to_operator(input[offset]),
.data.binary.right = NULL,
.data.binary.left = NULL, .data.binary.left = NULL,
.data.binary.right = NULL,
}; };
arraylist_push_back(arr, &new_node); arraylist_push_back(arr, &op_node);
state = WAIT_FOR_NUMBER; } else if (isspace(input[offset])) {
} else if (isspace(current)) {
// Nothing... // Nothing...
} else { } else {
arraylist_destroy(&arr); return (TokenizeResult) {
return (TokenizeResult) {.is_valid = false, .err = LEXER_NOT_RECOGNIZED_SYMBOL}; .is_valid = false,
.err = LEXER_NOT_RECOGNIZED_SYMBOL};
} }
offset++; offset++;
@@ -68,17 +61,21 @@ TokenizeResult tokenize(const char *input) {
// CURRENTLY, it only supports ints, not clear how floating // CURRENTLY, it only supports ints, not clear how floating
// point is implemented but i'll figure it out // point is implemented but i'll figure it out
ASTNodeResult tokenize_number(const char *input, size_t *offset) { ASTNodeResult tokenize_number(const char *input, size_t *offset) {
char buf[128] = { '\0' }; char buf[64] = { '\0' };
size_t buf_pos = 0; size_t buf_pos = 0;
bool is_integer = true; // Will later be used to differentiate fractions bool is_integer = true; // Will later be used to differentiate fractions
// read number
size_t current = *offset; size_t current = *offset;
while (isdigit(input[current])) { while (isdigit(input[current])) {
buf[buf_pos] = input[current]; buf[buf_pos] = input[current];
if (buf_pos >= sizeof(buf)) { if (buf_pos >= sizeof(buf)) {
return (ASTNodeResult) {.is_valid = false, .err = LEXER_BUF_OVERFLOW}; return (ASTNodeResult) {
.is_valid = false,
.err = LEXER_BUF_OVERFLOW};
} }
current++; current++;
buf_pos++; buf_pos++;
} }
@@ -86,35 +83,46 @@ ASTNodeResult tokenize_number(const char *input, size_t *offset) {
ASTNode new_node; ASTNode new_node;
if (is_integer) { if (is_integer) {
new_node.type = NODE_INTEGER; new_node.type = NODE_INTEGER;
I64Result status = string_to_integer(buf); LexerI64Result status = string_to_integer(buf);
if (!status.is_valid) { if (!status.is_valid) {
return (ASTNodeResult) {.is_valid = false, .err = status.err}; return (ASTNodeResult) {.is_valid = false, .err = status.err};
} }
new_node.data.integer = status.number; new_node.data.integer = status.number;
*offset = current; *offset = current;
return (ASTNodeResult) {.is_valid = true, .node = new_node}; return (ASTNodeResult) {.is_valid = true, .node = new_node};
} }
return (ASTNodeResult) {.is_valid = false, .err = LEXER_FAILED_NUMBER_CONVERSION}; return (ASTNodeResult) {
.is_valid = false,
.err = LEXER_FAILED_NUMBER_CONVERSION};
} }
I64Result string_to_integer(const char *buf) { LexerI64Result string_to_integer(const char *buf) {
int c = 0; int c = 0;
int64_t count = 0; int64_t count = 0;
while (buf[c] != '\0') { while (buf[c] != '\0') {
// Extracts number from char
int digit = buf[c] - '0'; int digit = buf[c] - '0';
if (count > (INT64_MAX - digit) / 10) { if (count > (INT64_MAX - digit) / 10) {
return (I64Result) {.is_valid = false, .err = LEXER_INT_OVERFLOW}; return (LexerI64Result) {
.is_valid = false,
.err = LEXER_INT_OVERFLOW};
} }
count = count * 10; count = count * 10;
count += digit; count += digit;
c++; c++;
} }
return (I64Result) {.is_valid = true, .number = count}; return (LexerI64Result) {.is_valid = true, .number = count};
} }
bool isoperator(int c) { bool isoperator(int c) {
@@ -123,6 +131,10 @@ bool isoperator(int c) {
case '-': case '-':
case '/': case '/':
case '*': case '*':
case '^':
case '!':
case '(':
case ')':
return true; return true;
default: default:
return false; return false;
@@ -143,6 +155,18 @@ Operator char_to_operator(int c) {
case '/': case '/':
return OP_DIV; return OP_DIV;
break; break;
case '^':
return OP_POW;
break;
case '!':
return OP_FACTORIAL;
break;
case '(':
return OP_START_PAR;
break;
case ')':
return OP_END_PAR;
break;
default: // I mean shouldn't be used, we assume default: // I mean shouldn't be used, we assume
return -1; return -1;
} }
@@ -158,5 +182,15 @@ char operator_to_char(Operator op) {
return '*'; return '*';
case OP_DIV: case OP_DIV:
return '/'; return '/';
case OP_POW:
return '^';
case OP_FACTORIAL:
return '!';
case OP_START_PAR:
return '(';
case OP_END_PAR:
return ')';
default:
return EOF;
} }
} }

View File

@@ -6,7 +6,35 @@
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
uint8_t node_lbp(ASTNode node) { uint8_t prefix_rbp(ASTNode node) {
if (node.type == NODE_INTEGER) {
return 0;
}
switch (node.data.unary.op) {
case OP_SUB:
case OP_ADD:
return 30;
default:
return -1;
}
}
uint8_t postfix_lbp(ASTNode node) {
if (node.type == NODE_INTEGER) {
return 0;
}
switch (node.data.unary.op) {
case OP_FACTORIAL:
return 40;
default:
// needs to be dealt with with resulttypes
return 255;
}
}
uint8_t infix_lbp(ASTNode node) {
if (node.type == NODE_INTEGER) { if (node.type == NODE_INTEGER) {
return 0; return 0;
} }
@@ -19,12 +47,14 @@ uint8_t node_lbp(ASTNode node) {
case OP_DIV: case OP_DIV:
case OP_MUL: case OP_MUL:
return 20; return 20;
case OP_POW:
return 51;
default: default:
return 0; return 0;
} }
} }
uint8_t node_rbp(ASTNode node) { uint8_t infix_rbp(ASTNode node) {
if (node.type == NODE_INTEGER) { if (node.type == NODE_INTEGER) {
return 0; return 0;
} }
@@ -37,6 +67,8 @@ uint8_t node_rbp(ASTNode node) {
case OP_DIV: case OP_DIV:
case OP_MUL: case OP_MUL:
return 21; return 21;
case OP_POW:
return 50;
default: default:
return 0; return 0;
} }
@@ -53,12 +85,14 @@ ParseResult parse(TokenizeResult tokens) {
} }
ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) {
// First: Consume a first number
arena_ensure_capacity( arena_ensure_capacity(
arena, arena,
sizeof(ASTNode), sizeof(ASTNode),
alignof(ASTNode) alignof(ASTNode)
); ); // shouldn't fail but if it does then what a shame
// Get pointer in the arena
ASTNode *left_side = arena_unwrap_pointer( ASTNode *left_side = arena_unwrap_pointer(
arena_alloc( arena_alloc(
arena, arena,
@@ -69,45 +103,115 @@ ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) {
arrayslice_next(slice, left_side); arrayslice_next(slice, left_side);
if (left_side->type == NODE_PARENTHESIS &&
left_side->data.parenthesis.op == OP_START_PAR) {
left_side = parse_expr(slice, arena, 0);
// HERE CHEKC LATER if slice.next != ')'
ASTNode *end_par;
arrayslice_next(slice, &end_par);
if (end_par->type != NODE_PARENTHESIS ||
end_par->data.parenthesis.op != OP_END_PAR) {
// todo
}
return left_side;
}
// if is unary then take prefix bp and continue
// to the right, no need to allocate left side
// because we just did and right side
// WILL return a valid allocated pointer.
if (left_side->type == NODE_UNARY_OP) {
uint8_t rbp = prefix_rbp(*left_side);
ASTNode *righ_side = parse_expr(slice, arena, rbp);
left_side->data.unary.val = righ_side;
}
while (true) { while (true) {
// Second: Get next one and checn bp
if (!arrayslice_is_valid(slice)) { if (!arrayslice_is_valid(slice)) {
break; break;
} }
ASTNode operator; // Here check if not OP error
arrayslice_peek(slice, &operator);
uint8_t rbp = node_rbp(operator);
uint8_t lbp = node_lbp(operator);
if (lbp < min_bp) { ASTNode operator;
break; // Here should chekc if is operator not some bs
// Third, get operator and binding powers
arrayslice_peek(slice, &operator);
// temporary for bad error handling
if (postfix_lbp(operator) != 255) {
if (postfix_lbp(operator) < min_bp) {
break;
}
// allocate operator
arrayslice_next(slice, NULL);
arena_ensure_capacity(
arena,
sizeof(ASTNode),
alignof(ASTNode));
ASTNode *new_node = arena_unwrap_pointer(
arena_alloc(
arena,
sizeof(ASTNode),
alignof(ASTNode)
)
);
*new_node = operator;
new_node->data.unary.val = left_side;
left_side = new_node;
continue;
} }
arrayslice_next(slice, NULL); // check if it has infix or not, if not then error
ASTNode *right_side = parse_expr(slice, arena, rbp); uint8_t rbp = infix_rbp(operator);
uint8_t lbp = infix_lbp(operator);
arena_ensure_capacity( if (rbp != 255 && lbp != 255) {
arena,
sizeof(ASTNode),
alignof(ASTNode));
ASTNode *new_node = arena_unwrap_pointer(
arena_alloc(
arena,
sizeof(ASTNode),
alignof(ASTNode)
)
);
*new_node = operator;
new_node->data.binary.left = left_side; // If lbp is LESS then stop recursion,
new_node->data.binary.right = right_side; // we found the next smaller binding power
// or the one with more precedence
if (lbp < min_bp) {
break;
}
left_side = new_node;
// If NOT, then we continue wtching ahead
// for the next one but taking our current
// concern that is rbp of the current operator
arrayslice_next(slice, NULL);
ASTNode *right_side = parse_expr(slice, arena, rbp);
arena_ensure_capacity(
arena,
sizeof(ASTNode),
alignof(ASTNode));
ASTNode *new_node = arena_unwrap_pointer(
arena_alloc(
arena,
sizeof(ASTNode),
alignof(ASTNode)
)
);
*new_node = operator;
new_node->data.binary.left = left_side;
new_node->data.binary.right = right_side;
left_side = new_node;
continue;
}
break;
} }
// Final: return left side
return left_side; return left_side;
} }