diff --git a/include/lexer.h b/include/lexer.h index 0f83a53..943e4a5 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -72,5 +72,6 @@ size_t ASTNodeArray_len(ASTNodeArray *arr); LexerErr tokenize(const char* input, ASTNodeArray *out); LexerErr tokenize_number(const char* input, size_t *offset, ASTNode *out); LexerErr string_to_integer(const char buf[], int64_t *number); +bool isoperator(int c); #endif // !LEXER_H diff --git a/src/lexer.c b/src/lexer.c index 4e8bbea..0cc6c6e 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -112,7 +112,48 @@ size_t ASTNodeArray_len(ASTNodeArray *arr) { return arr->len; } +LexerErr tokenize(const char *input, ASTNodeArray *out) { + size_t offset = 0; + LexerState state = WAIT_FOR_NUMBER; + ASTNodeArray arr = ASTNodeArray_init(0); // 0 defaults to 64 + while (input[offset] != '\n' && input[offset] != '\0') { + int current = input[offset]; + + if (isdigit(current) && state == WAIT_FOR_NUMBER) { + ASTNode new_node; + LexerErr result = tokenize_number(input, &offset, &new_node); + + if (result != LEXER_OK) { + ASTNodeArray_free(&arr); + return result; + } + + ASTNodeArray_push(&arr, new_node); + state = WAIT_FOR_OPERATOR; + } else if (isoperator(current) && state == WAIT_FOR_OPERATOR) { + ASTNode new_node = { + .type = NODE_BINARY_OP, + .data.binary.op = current, + .data.binary.right = NULL, + .data.binary.left = NULL, + }; + + ASTNodeArray_push(&arr, new_node); + state = WAIT_FOR_NUMBER; + } else if (isspace(current)) { + // Nothing... + } else { + ASTNodeArray_free(&arr); + return LEXER_NOT_RECOGNIZED_SYMBOL; + } + + offset++; + } + + *out = arr; + return LEXER_OK; +} // CURRENTLY, it only supports ints, not clear how floating // point is implemented but i'll figure it out @@ -165,3 +206,15 @@ LexerErr string_to_integer(const char *buf, int64_t *number) { *number = count; return LEXER_OK; } + +bool isoperator(int c) { + switch (c) { + case '+': + case '-': + case '/': + case '*': + return true; + default: + return false; + } +} diff --git a/test/test_lexer.c b/test/test_lexer.c index 860ad5c..d10b6d2 100644 --- a/test/test_lexer.c +++ b/test/test_lexer.c @@ -6,6 +6,45 @@ #include #include +static void test_tokenize_normal_expresion(void **state) { + (void) state; + + char expr[256] = "2 + 3 / 66 * 789"; + ASTNodeArray tokens; + ASTNode node; + + assert_int_equal(tokenize(expr, &tokens), LEXER_OK); + assert_int_equal(tokens.len, 7); + + ASTNodeArray_get(&tokens, 0, &node); + assert_int_equal(node.type, NODE_INTEGER); + assert_int_equal(node.data.integer, 2); + + ASTNodeArray_get(&tokens, 1, &node); + assert_int_equal(node.type, NODE_BINARY_OP); + assert_int_equal(node.data.binary.op, '+'); + + ASTNodeArray_get(&tokens, 2, &node); + assert_int_equal(node.type, NODE_INTEGER); + assert_int_equal(node.data.integer, 3); + + ASTNodeArray_get(&tokens, 3, &node); + assert_int_equal(node.type, NODE_BINARY_OP); + assert_int_equal(node.data.binary.op, '/'); + + ASTNodeArray_get(&tokens, 4, &node); + assert_int_equal(node.type, NODE_INTEGER); + assert_int_equal(node.data.integer, 66); + + ASTNodeArray_get(&tokens, 5, &node); + assert_int_equal(node.type, NODE_BINARY_OP); + assert_int_equal(node.data.binary.op, '*'); + + ASTNodeArray_get(&tokens, 6, &node); + assert_int_equal(node.type, NODE_INTEGER); + assert_int_equal(node.data.integer, 789); +} + static void test_string_to_number_normal(void **state) { (void) state; @@ -36,6 +75,7 @@ int main(void) { const struct CMUnitTest tests[] = { cmocka_unit_test(test_string_to_number_normal), cmocka_unit_test(test_string_to_number_overflow), + cmocka_unit_test(test_tokenize_normal_expresion), }; return cmocka_run_group_tests(tests, NULL, NULL);