refactor: rewrote tokenize and modified ohter funcs

Well i wanted to wildly change a lot of things about the lexer thinking
i could do something better but really all i found was automatic lexers
that at least for me don't really fit the project so a manual one it is,
i guess technically is a automata. Whatever, is good enough.
This commit is contained in:
2026-04-30 21:34:27 -06:00
parent f2c906c6aa
commit c41847e120
2 changed files with 42 additions and 34 deletions

View File

@@ -66,12 +66,12 @@ typedef struct {
LexerErr err; LexerErr err;
int64_t number; int64_t number;
}; };
} I64Result; } LexerI64Result;
// Lexer funtions as well as few functionality // Lexer funtions as well as few functionality
TokenizeResult tokenize(const char* input); TokenizeResult tokenize(const char* input);
ASTNodeResult tokenize_number(const char* input, size_t *offset); ASTNodeResult tokenize_number(const char* input, size_t *offset);
I64Result string_to_integer(const char buf[]); LexerI64Result string_to_integer(const char buf[]);
bool isoperator(int c); bool isoperator(int c);
Operator char_to_operator(int c); Operator char_to_operator(int c);
char operator_to_char(Operator op); char operator_to_char(Operator op);

View File

@@ -3,6 +3,7 @@
#include <ctype.h> #include <ctype.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <strings.h> #include <strings.h>
#include <limits.h> #include <limits.h>
@@ -14,45 +15,37 @@ typedef enum {
TokenizeResult tokenize(const char *input) { TokenizeResult tokenize(const char *input) {
size_t offset = 0;
LexerState state = WAIT_FOR_NUMBER;
ArrayList *arr = arraylist_init(64, sizeof(ASTNode)); ArrayList *arr = arraylist_init(64, sizeof(ASTNode));
size_t offset = 0;
while (input[offset] != '\n' && input[offset] != '\0') { while (
int current = input[offset]; input[offset] != '\n' ||
input[offset] != EOF ||
input[offset] != '\0') {
if (isdigit(current)) { if (isdigit(input[offset])) {
if (state != WAIT_FOR_NUMBER) {
arraylist_destroy(&arr);
return (TokenizeResult) {.is_valid = false, .err = LEXER_WRONG_SYNTAX};
}
ASTNodeResult result = tokenize_number(input, &offset); ASTNodeResult result = tokenize_number(input, &offset);
if (!result.is_valid) { if (!result.is_valid) {
arraylist_destroy(&arr);
return (TokenizeResult) {.is_valid = false, .err = result.err}; return (TokenizeResult) {.is_valid = false, .err = result.err};
} }
arraylist_push_back(arr, &result.node); arraylist_push_back(arr, &result.node);
state = WAIT_FOR_OPERATOR; } else if (isoperator(input[offset])) {
} else if (isoperator(current)) { ASTNode op_node = {
if (state != WAIT_FOR_OPERATOR) {
return (TokenizeResult) {.is_valid = false, .err =LEXER_WRONG_SYNTAX};
}
ASTNode new_node = {
.type = NODE_BINARY_OP, .type = NODE_BINARY_OP,
.data.binary.op = char_to_operator(current), .data.binary.op = char_to_operator(input[offset]),
.data.binary.right = NULL,
.data.binary.left = NULL, .data.binary.left = NULL,
.data.binary.right = NULL,
}; };
arraylist_push_back(arr, &new_node); arraylist_push_back(arr, &op_node);
state = WAIT_FOR_NUMBER; } else if (isspace(input[offset])) {
} else if (isspace(current)) {
// Nothing... // Nothing...
} else { } else {
arraylist_destroy(&arr); return (TokenizeResult) {
return (TokenizeResult) {.is_valid = false, .err = LEXER_NOT_RECOGNIZED_SYMBOL}; .is_valid = false,
.err = LEXER_NOT_RECOGNIZED_SYMBOL};
} }
offset++; offset++;
@@ -68,17 +61,21 @@ TokenizeResult tokenize(const char *input) {
// CURRENTLY, it only supports ints, not clear how floating // CURRENTLY, it only supports ints, not clear how floating
// point is implemented but i'll figure it out // point is implemented but i'll figure it out
ASTNodeResult tokenize_number(const char *input, size_t *offset) { ASTNodeResult tokenize_number(const char *input, size_t *offset) {
char buf[128] = { '\0' }; char buf[64] = { '\0' };
size_t buf_pos = 0; size_t buf_pos = 0;
bool is_integer = true; // Will later be used to differentiate fractions bool is_integer = true; // Will later be used to differentiate fractions
// read number
size_t current = *offset; size_t current = *offset;
while (isdigit(input[current])) { while (isdigit(input[current])) {
buf[buf_pos] = input[current]; buf[buf_pos] = input[current];
if (buf_pos >= sizeof(buf)) { if (buf_pos >= sizeof(buf)) {
return (ASTNodeResult) {.is_valid = false, .err = LEXER_BUF_OVERFLOW}; return (ASTNodeResult) {
.is_valid = false,
.err = LEXER_BUF_OVERFLOW};
} }
current++; current++;
buf_pos++; buf_pos++;
} }
@@ -86,35 +83,46 @@ ASTNodeResult tokenize_number(const char *input, size_t *offset) {
ASTNode new_node; ASTNode new_node;
if (is_integer) { if (is_integer) {
new_node.type = NODE_INTEGER; new_node.type = NODE_INTEGER;
I64Result status = string_to_integer(buf); LexerI64Result status = string_to_integer(buf);
if (!status.is_valid) { if (!status.is_valid) {
return (ASTNodeResult) {.is_valid = false, .err = status.err}; return (ASTNodeResult) {.is_valid = false, .err = status.err};
} }
new_node.data.integer = status.number; new_node.data.integer = status.number;
*offset = current; *offset = current;
return (ASTNodeResult) {.is_valid = true, .node = new_node}; return (ASTNodeResult) {.is_valid = true, .node = new_node};
} }
return (ASTNodeResult) {.is_valid = false, .err = LEXER_FAILED_NUMBER_CONVERSION}; return (ASTNodeResult) {
.is_valid = false,
.err = LEXER_FAILED_NUMBER_CONVERSION};
} }
I64Result string_to_integer(const char *buf) { LexerI64Result string_to_integer(const char *buf) {
int c = 0; int c = 0;
int64_t count = 0; int64_t count = 0;
while (buf[c] != '\0') { while (buf[c] != '\0') {
// Extracts number from char
int digit = buf[c] - '0'; int digit = buf[c] - '0';
if (count > (INT64_MAX - digit) / 10) { if (count > (INT64_MAX - digit) / 10) {
return (I64Result) {.is_valid = false, .err = LEXER_INT_OVERFLOW}; return (LexerI64Result) {
.is_valid = false,
.err = LEXER_INT_OVERFLOW};
} }
count = count * 10; count = count * 10;
count += digit; count += digit;
c++; c++;
} }
return (I64Result) {.is_valid = true, .number = count}; return (LexerI64Result) {.is_valid = true, .number = count};
} }
bool isoperator(int c) { bool isoperator(int c) {