Modified the structure of the lexer, now is more easy to add types of numbers like fractions, like i could enev consider roots, irrationals, complex or imaginary, that would be dope. For now only support for integer, we need to get this shit running
This commit is contained in:
@@ -30,6 +30,7 @@ typedef enum {
|
|||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
LEXER_OK = 0,
|
LEXER_OK = 0,
|
||||||
|
LEXER_INT_OVERFLOW,
|
||||||
LEXER_FAILED_NUMBER_CONVERSION,
|
LEXER_FAILED_NUMBER_CONVERSION,
|
||||||
LEXER_NOT_RECOGNIZED_SYMBOL,
|
LEXER_NOT_RECOGNIZED_SYMBOL,
|
||||||
LEXER_EMPTY_INPUT,
|
LEXER_EMPTY_INPUT,
|
||||||
@@ -70,7 +71,6 @@ size_t ASTNodeArray_len(ASTNodeArray *arr);
|
|||||||
// Lexer funtions as well as few functionality
|
// Lexer funtions as well as few functionality
|
||||||
LexerErr tokenize(const char* input, ASTNodeArray *out);
|
LexerErr tokenize(const char* input, ASTNodeArray *out);
|
||||||
LexerErr tokenize_number(const char* input, size_t *offset, ASTNode *out);
|
LexerErr tokenize_number(const char* input, size_t *offset, ASTNode *out);
|
||||||
LexerErr string_to_number(const char* input, size_t *offset, int64_t *number);
|
LexerErr string_to_integer(const char buf[], int64_t *number);
|
||||||
void reverser_string(char* input);
|
|
||||||
|
|
||||||
#endif // !LEXER_H
|
#endif // !LEXER_H
|
||||||
|
|||||||
25
src/lexer.c
25
src/lexer.c
@@ -112,11 +112,14 @@ size_t ASTNodeArray_len(ASTNodeArray *arr) {
|
|||||||
return arr->len;
|
return arr->len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// CURRENTLY, it only supports ints, not clear how floating
|
// CURRENTLY, it only supports ints, not clear how floating
|
||||||
// point is implemented but i'll figure it out
|
// point is implemented but i'll figure it out
|
||||||
LexerErr string_to_number(const char *input, size_t *offset, int64_t *number) {
|
LexerErr tokenize_number(const char *input, size_t *offset, ASTNode *out) {
|
||||||
char buf[128] = { '\0' };
|
char buf[128] = { '\0' };
|
||||||
size_t buf_pos = 0;
|
size_t buf_pos = 0;
|
||||||
|
bool is_integer = true; // Will later be used to differentiate fractions
|
||||||
|
|
||||||
size_t current = *offset;
|
size_t current = *offset;
|
||||||
while (isdigit(input[current])) {
|
while (isdigit(input[current])) {
|
||||||
@@ -129,14 +132,29 @@ LexerErr string_to_number(const char *input, size_t *offset, int64_t *number) {
|
|||||||
buf_pos++;
|
buf_pos++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ASTNode new_node;
|
||||||
|
if (is_integer) {
|
||||||
|
new_node.type = NODE_INTEGER;
|
||||||
|
LexerErr status = string_to_integer(buf, &new_node.data.integer);
|
||||||
|
if (status == LEXER_OK) {
|
||||||
|
*out = new_node;
|
||||||
|
}
|
||||||
|
*offset = current;
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
return LEXER_FAILED_NUMBER_CONVERSION;
|
||||||
|
}
|
||||||
|
|
||||||
|
LexerErr string_to_integer(const char *buf, int64_t *number) {
|
||||||
int c = 0;
|
int c = 0;
|
||||||
int64_t count = 0;
|
int64_t count = 0;
|
||||||
while (buf[c] != '\0') {
|
while (buf[c] != '\0') {
|
||||||
|
|
||||||
int digit = buf[c] - '0';
|
int digit = buf[c] - '0';
|
||||||
|
|
||||||
if (count > (INT_MAX - digit) / 10) {
|
if (count > (INT64_MAX - digit) / 10) {
|
||||||
return LEXER_FAILED_NUMBER_CONVERSION;
|
return LEXER_INT_OVERFLOW;
|
||||||
}
|
}
|
||||||
count = count * 10;
|
count = count * 10;
|
||||||
count += digit;
|
count += digit;
|
||||||
@@ -145,6 +163,5 @@ LexerErr string_to_number(const char *input, size_t *offset, int64_t *number) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
*number = count;
|
*number = count;
|
||||||
*offset = current;
|
|
||||||
return LEXER_OK;
|
return LEXER_OK;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,21 +6,36 @@
|
|||||||
#include <setjmp.h>
|
#include <setjmp.h>
|
||||||
#include <cmocka.h>
|
#include <cmocka.h>
|
||||||
|
|
||||||
static void test_string_to_number(void **state) {
|
static void test_string_to_number_normal(void **state) {
|
||||||
(void) state;
|
(void) state;
|
||||||
|
|
||||||
char num[16] = "2333t55";
|
char num[16] = "2333t55";
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
int64_t result = 0;
|
ASTNode result;
|
||||||
assert_int_equal(string_to_number(num, &offset, &result), 0);
|
|
||||||
|
|
||||||
assert_int_equal(offset, 4);
|
assert_int_equal(tokenize_number(num, &offset, &result), LEXER_OK);
|
||||||
assert_double_equal(result, 2333, 1e-6);
|
|
||||||
|
assert_int_equal(offset, 4); // equal to t position in string
|
||||||
|
assert_int_equal(result.type, NODE_INTEGER);
|
||||||
|
assert_int_equal(result.data.integer, 2333);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test_string_to_number_overflow(void **state) {
|
||||||
|
(void) state;
|
||||||
|
|
||||||
|
// Number is INT64_MAX but with a extra 8 at the end
|
||||||
|
char num[32] = "92233720368547758078yy7";
|
||||||
|
size_t offset = 0;
|
||||||
|
ASTNode result;
|
||||||
|
assert_int_equal(tokenize_number(num, &offset, &result), LEXER_INT_OVERFLOW);
|
||||||
|
// Technically it can trigger a buf overflow error but obvioulsy
|
||||||
|
// it will trigger int overflow error first
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
const struct CMUnitTest tests[] = {
|
const struct CMUnitTest tests[] = {
|
||||||
cmocka_unit_test(test_string_to_number),
|
cmocka_unit_test(test_string_to_number_normal),
|
||||||
|
cmocka_unit_test(test_string_to_number_overflow),
|
||||||
};
|
};
|
||||||
|
|
||||||
return cmocka_run_group_tests(tests, NULL, NULL);
|
return cmocka_run_group_tests(tests, NULL, NULL);
|
||||||
|
|||||||
Reference in New Issue
Block a user