Modified the structure of the lexer, now is more easy to add types of numbers like fractions, like i could enev consider roots, irrationals, complex or imaginary, that would be dope. For now only support for integer, we need to get this shit running

This commit is contained in:
2026-03-09 11:58:55 -06:00
parent afae8fbe3a
commit 0de6cf5024
3 changed files with 45 additions and 13 deletions

View File

@@ -30,6 +30,7 @@ typedef enum {
typedef enum {
LEXER_OK = 0,
LEXER_INT_OVERFLOW,
LEXER_FAILED_NUMBER_CONVERSION,
LEXER_NOT_RECOGNIZED_SYMBOL,
LEXER_EMPTY_INPUT,
@@ -70,7 +71,6 @@ size_t ASTNodeArray_len(ASTNodeArray *arr);
// Lexer funtions as well as few functionality
LexerErr tokenize(const char* input, ASTNodeArray *out);
LexerErr tokenize_number(const char* input, size_t *offset, ASTNode *out);
LexerErr string_to_number(const char* input, size_t *offset, int64_t *number);
void reverser_string(char* input);
LexerErr string_to_integer(const char buf[], int64_t *number);
#endif // !LEXER_H

View File

@@ -112,11 +112,14 @@ size_t ASTNodeArray_len(ASTNodeArray *arr) {
return arr->len;
}
// CURRENTLY, it only supports ints, not clear how floating
// point is implemented but i'll figure it out
LexerErr string_to_number(const char *input, size_t *offset, int64_t *number) {
LexerErr tokenize_number(const char *input, size_t *offset, ASTNode *out) {
char buf[128] = { '\0' };
size_t buf_pos = 0;
bool is_integer = true; // Will later be used to differentiate fractions
size_t current = *offset;
while (isdigit(input[current])) {
@@ -128,15 +131,30 @@ LexerErr string_to_number(const char *input, size_t *offset, int64_t *number) {
current++;
buf_pos++;
}
ASTNode new_node;
if (is_integer) {
new_node.type = NODE_INTEGER;
LexerErr status = string_to_integer(buf, &new_node.data.integer);
if (status == LEXER_OK) {
*out = new_node;
}
*offset = current;
return status;
}
return LEXER_FAILED_NUMBER_CONVERSION;
}
LexerErr string_to_integer(const char *buf, int64_t *number) {
int c = 0;
int64_t count = 0;
while (buf[c] != '\0') {
int digit = buf[c] - '0';
if (count > (INT_MAX - digit) / 10) {
return LEXER_FAILED_NUMBER_CONVERSION;
if (count > (INT64_MAX - digit) / 10) {
return LEXER_INT_OVERFLOW;
}
count = count * 10;
count += digit;
@@ -145,6 +163,5 @@ LexerErr string_to_number(const char *input, size_t *offset, int64_t *number) {
}
*number = count;
*offset = current;
return LEXER_OK;
}

View File

@@ -6,21 +6,36 @@
#include <setjmp.h>
#include <cmocka.h>
static void test_string_to_number(void **state) {
static void test_string_to_number_normal(void **state) {
(void) state;
char num[16] = "2333t55";
size_t offset = 0;
int64_t result = 0;
assert_int_equal(string_to_number(num, &offset, &result), 0);
ASTNode result;
assert_int_equal(offset, 4);
assert_double_equal(result, 2333, 1e-6);
assert_int_equal(tokenize_number(num, &offset, &result), LEXER_OK);
assert_int_equal(offset, 4); // equal to t position in string
assert_int_equal(result.type, NODE_INTEGER);
assert_int_equal(result.data.integer, 2333);
}
static void test_string_to_number_overflow(void **state) {
(void) state;
// Number is INT64_MAX but with a extra 8 at the end
char num[32] = "92233720368547758078yy7";
size_t offset = 0;
ASTNode result;
assert_int_equal(tokenize_number(num, &offset, &result), LEXER_INT_OVERFLOW);
// Technically it can trigger a buf overflow error but obvioulsy
// it will trigger int overflow error first
}
int main(void) {
const struct CMUnitTest tests[] = {
cmocka_unit_test(test_string_to_number),
cmocka_unit_test(test_string_to_number_normal),
cmocka_unit_test(test_string_to_number_overflow),
};
return cmocka_run_group_tests(tests, NULL, NULL);