diff --git a/include/lexer.h b/include/lexer.h index ed8c1e2..430b0a4 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -66,12 +66,12 @@ typedef struct { LexerErr err; int64_t number; }; -} I64Result; +} LexerI64Result; // Lexer funtions as well as few functionality TokenizeResult tokenize(const char* input); ASTNodeResult tokenize_number(const char* input, size_t *offset); -I64Result string_to_integer(const char buf[]); +LexerI64Result string_to_integer(const char buf[]); bool isoperator(int c); Operator char_to_operator(int c); char operator_to_char(Operator op); diff --git a/src/lexer.c b/src/lexer.c index 7baf998..f139394 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -14,45 +15,37 @@ typedef enum { TokenizeResult tokenize(const char *input) { - size_t offset = 0; - LexerState state = WAIT_FOR_NUMBER; ArrayList *arr = arraylist_init(64, sizeof(ASTNode)); + size_t offset = 0; - while (input[offset] != '\n' && input[offset] != '\0') { - int current = input[offset]; + while ( + input[offset] != '\n' || + input[offset] != EOF || + input[offset] != '\0') { - if (isdigit(current)) { - if (state != WAIT_FOR_NUMBER) { - arraylist_destroy(&arr); - return (TokenizeResult) {.is_valid = false, .err = LEXER_WRONG_SYNTAX}; - } + if (isdigit(input[offset])) { ASTNodeResult result = tokenize_number(input, &offset); if (!result.is_valid) { - arraylist_destroy(&arr); return (TokenizeResult) {.is_valid = false, .err = result.err}; } arraylist_push_back(arr, &result.node); - state = WAIT_FOR_OPERATOR; - } else if (isoperator(current)) { - if (state != WAIT_FOR_OPERATOR) { - return (TokenizeResult) {.is_valid = false, .err =LEXER_WRONG_SYNTAX}; - } - ASTNode new_node = { + } else if (isoperator(input[offset])) { + ASTNode op_node = { .type = NODE_BINARY_OP, - .data.binary.op = char_to_operator(current), - .data.binary.right = NULL, + .data.binary.op = char_to_operator(input[offset]), .data.binary.left = NULL, + .data.binary.right = NULL, }; - - arraylist_push_back(arr, &new_node); - state = WAIT_FOR_NUMBER; - } else if (isspace(current)) { + + arraylist_push_back(arr, &op_node); + } else if (isspace(input[offset])) { // Nothing... } else { - arraylist_destroy(&arr); - return (TokenizeResult) {.is_valid = false, .err = LEXER_NOT_RECOGNIZED_SYMBOL}; + return (TokenizeResult) { + .is_valid = false, + .err = LEXER_NOT_RECOGNIZED_SYMBOL}; } offset++; @@ -68,17 +61,21 @@ TokenizeResult tokenize(const char *input) { // CURRENTLY, it only supports ints, not clear how floating // point is implemented but i'll figure it out ASTNodeResult tokenize_number(const char *input, size_t *offset) { - char buf[128] = { '\0' }; + char buf[64] = { '\0' }; size_t buf_pos = 0; bool is_integer = true; // Will later be used to differentiate fractions + // read number size_t current = *offset; while (isdigit(input[current])) { buf[buf_pos] = input[current]; if (buf_pos >= sizeof(buf)) { - return (ASTNodeResult) {.is_valid = false, .err = LEXER_BUF_OVERFLOW}; + return (ASTNodeResult) { + .is_valid = false, + .err = LEXER_BUF_OVERFLOW}; } + current++; buf_pos++; } @@ -86,35 +83,46 @@ ASTNodeResult tokenize_number(const char *input, size_t *offset) { ASTNode new_node; if (is_integer) { new_node.type = NODE_INTEGER; - I64Result status = string_to_integer(buf); + LexerI64Result status = string_to_integer(buf); + + if (!status.is_valid) { return (ASTNodeResult) {.is_valid = false, .err = status.err}; } + new_node.data.integer = status.number; + *offset = current; return (ASTNodeResult) {.is_valid = true, .node = new_node}; } - return (ASTNodeResult) {.is_valid = false, .err = LEXER_FAILED_NUMBER_CONVERSION}; + return (ASTNodeResult) { + .is_valid = false, + .err = LEXER_FAILED_NUMBER_CONVERSION}; } -I64Result string_to_integer(const char *buf) { +LexerI64Result string_to_integer(const char *buf) { int c = 0; int64_t count = 0; + while (buf[c] != '\0') { - + + // Extracts number from char int digit = buf[c] - '0'; if (count > (INT64_MAX - digit) / 10) { - return (I64Result) {.is_valid = false, .err = LEXER_INT_OVERFLOW}; + return (LexerI64Result) { + .is_valid = false, + .err = LEXER_INT_OVERFLOW}; } + count = count * 10; count += digit; c++; } - return (I64Result) {.is_valid = true, .number = count}; + return (LexerI64Result) {.is_valid = true, .number = count}; } bool isoperator(int c) {