diff --git a/include/lexer.h b/include/lexer.h index ed8c1e2..4af0f9d 100644 --- a/include/lexer.h +++ b/include/lexer.h @@ -10,6 +10,8 @@ typedef enum { NODE_INTEGER, NODE_BINARY_OP, + NODE_UNARY_OP, + NODE_PARENTHESIS, } ASTNodeType; // For classify operators @@ -17,7 +19,11 @@ typedef enum { OP_ADD, OP_SUB, OP_MUL, - OP_DIV + OP_DIV, + OP_POW, + OP_FACTORIAL, + OP_START_PAR, + OP_END_PAR, } Operator; typedef enum { @@ -26,8 +32,6 @@ typedef enum { LEXER_FAILED_NUMBER_CONVERSION, LEXER_NOT_RECOGNIZED_SYMBOL, LEXER_EMPTY_INPUT, - LEXER_NULL_ARG, - LEXER_WRONG_SYNTAX, LEXER_BUF_OVERFLOW, } LexerErr; @@ -41,6 +45,14 @@ typedef struct ASTNode { struct ASTNode *right; Operator op; } binary; + struct { + struct ASTNode *val; + Operator op; + } unary; + struct { + struct ASTNode *val; + Operator op; + } parenthesis; } data; } ASTNode; @@ -66,12 +78,12 @@ typedef struct { LexerErr err; int64_t number; }; -} I64Result; +} LexerI64Result; // Lexer funtions as well as few functionality TokenizeResult tokenize(const char* input); ASTNodeResult tokenize_number(const char* input, size_t *offset); -I64Result string_to_integer(const char buf[]); +LexerI64Result string_to_integer(const char buf[]); bool isoperator(int c); Operator char_to_operator(int c); char operator_to_char(Operator op); diff --git a/include/parser.h b/include/parser.h index 158b348..75bc70c 100644 --- a/include/parser.h +++ b/include/parser.h @@ -32,8 +32,10 @@ typedef struct { ASTNode *nud(ArraySlice *slice); ASTNode *led(ArraySlice *slice, size_t right_precedence); -uint8_t node_lbp(ASTNode node); -uint8_t node_rbp(ASTNode node); +uint8_t prefix_rbp(ASTNode node); +uint8_t postfix_lbp(ASTNode node); +uint8_t infix_lbp(ASTNode node); +uint8_t infix_rbp(ASTNode node); ParseResult parse(TokenizeResult tokens); ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp); diff --git a/src/evaluator.c b/src/evaluator.c index 36d9fb1..551f779 100644 --- a/src/evaluator.c +++ b/src/evaluator.c @@ -3,6 +3,7 @@ #include "lexer.h" #include "parser.h" #include +#include int64_t evaluate_tree(ASTNode *tree) { @@ -20,7 +21,8 @@ int64_t evaluate_tree(ASTNode *tree) { return evaluate_tree(left) * evaluate_tree(right); case OP_DIV: return evaluate_tree(left) / evaluate_tree(right); - + case OP_POW: + return pow(evaluate_tree(left), evaluate_tree(right)); } } diff --git a/src/lexer.c b/src/lexer.c index 7baf998..2115cc4 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -14,45 +15,37 @@ typedef enum { TokenizeResult tokenize(const char *input) { - size_t offset = 0; - LexerState state = WAIT_FOR_NUMBER; ArrayList *arr = arraylist_init(64, sizeof(ASTNode)); + size_t offset = 0; - while (input[offset] != '\n' && input[offset] != '\0') { - int current = input[offset]; + while ( + input[offset] != '\n' || + input[offset] != EOF || + input[offset] != '\0') { - if (isdigit(current)) { - if (state != WAIT_FOR_NUMBER) { - arraylist_destroy(&arr); - return (TokenizeResult) {.is_valid = false, .err = LEXER_WRONG_SYNTAX}; - } + if (isdigit(input[offset])) { ASTNodeResult result = tokenize_number(input, &offset); if (!result.is_valid) { - arraylist_destroy(&arr); return (TokenizeResult) {.is_valid = false, .err = result.err}; } arraylist_push_back(arr, &result.node); - state = WAIT_FOR_OPERATOR; - } else if (isoperator(current)) { - if (state != WAIT_FOR_OPERATOR) { - return (TokenizeResult) {.is_valid = false, .err =LEXER_WRONG_SYNTAX}; - } - ASTNode new_node = { + } else if (isoperator(input[offset])) { + ASTNode op_node = { .type = NODE_BINARY_OP, - .data.binary.op = char_to_operator(current), - .data.binary.right = NULL, + .data.binary.op = char_to_operator(input[offset]), .data.binary.left = NULL, + .data.binary.right = NULL, }; - - arraylist_push_back(arr, &new_node); - state = WAIT_FOR_NUMBER; - } else if (isspace(current)) { + + arraylist_push_back(arr, &op_node); + } else if (isspace(input[offset])) { // Nothing... } else { - arraylist_destroy(&arr); - return (TokenizeResult) {.is_valid = false, .err = LEXER_NOT_RECOGNIZED_SYMBOL}; + return (TokenizeResult) { + .is_valid = false, + .err = LEXER_NOT_RECOGNIZED_SYMBOL}; } offset++; @@ -68,17 +61,21 @@ TokenizeResult tokenize(const char *input) { // CURRENTLY, it only supports ints, not clear how floating // point is implemented but i'll figure it out ASTNodeResult tokenize_number(const char *input, size_t *offset) { - char buf[128] = { '\0' }; + char buf[64] = { '\0' }; size_t buf_pos = 0; bool is_integer = true; // Will later be used to differentiate fractions + // read number size_t current = *offset; while (isdigit(input[current])) { buf[buf_pos] = input[current]; if (buf_pos >= sizeof(buf)) { - return (ASTNodeResult) {.is_valid = false, .err = LEXER_BUF_OVERFLOW}; + return (ASTNodeResult) { + .is_valid = false, + .err = LEXER_BUF_OVERFLOW}; } + current++; buf_pos++; } @@ -86,35 +83,46 @@ ASTNodeResult tokenize_number(const char *input, size_t *offset) { ASTNode new_node; if (is_integer) { new_node.type = NODE_INTEGER; - I64Result status = string_to_integer(buf); + LexerI64Result status = string_to_integer(buf); + + if (!status.is_valid) { return (ASTNodeResult) {.is_valid = false, .err = status.err}; } + new_node.data.integer = status.number; + *offset = current; return (ASTNodeResult) {.is_valid = true, .node = new_node}; } - return (ASTNodeResult) {.is_valid = false, .err = LEXER_FAILED_NUMBER_CONVERSION}; + return (ASTNodeResult) { + .is_valid = false, + .err = LEXER_FAILED_NUMBER_CONVERSION}; } -I64Result string_to_integer(const char *buf) { +LexerI64Result string_to_integer(const char *buf) { int c = 0; int64_t count = 0; + while (buf[c] != '\0') { - + + // Extracts number from char int digit = buf[c] - '0'; if (count > (INT64_MAX - digit) / 10) { - return (I64Result) {.is_valid = false, .err = LEXER_INT_OVERFLOW}; + return (LexerI64Result) { + .is_valid = false, + .err = LEXER_INT_OVERFLOW}; } + count = count * 10; count += digit; c++; } - return (I64Result) {.is_valid = true, .number = count}; + return (LexerI64Result) {.is_valid = true, .number = count}; } bool isoperator(int c) { @@ -123,6 +131,10 @@ bool isoperator(int c) { case '-': case '/': case '*': + case '^': + case '!': + case '(': + case ')': return true; default: return false; @@ -143,6 +155,18 @@ Operator char_to_operator(int c) { case '/': return OP_DIV; break; + case '^': + return OP_POW; + break; + case '!': + return OP_FACTORIAL; + break; + case '(': + return OP_START_PAR; + break; + case ')': + return OP_END_PAR; + break; default: // I mean shouldn't be used, we assume return -1; } @@ -158,5 +182,15 @@ char operator_to_char(Operator op) { return '*'; case OP_DIV: return '/'; + case OP_POW: + return '^'; + case OP_FACTORIAL: + return '!'; + case OP_START_PAR: + return '('; + case OP_END_PAR: + return ')'; + default: + return EOF; } } diff --git a/src/parser.c b/src/parser.c index a3efae0..2a09988 100644 --- a/src/parser.c +++ b/src/parser.c @@ -6,7 +6,35 @@ #include #include -uint8_t node_lbp(ASTNode node) { +uint8_t prefix_rbp(ASTNode node) { + if (node.type == NODE_INTEGER) { + return 0; + } + + switch (node.data.unary.op) { + case OP_SUB: + case OP_ADD: + return 30; + default: + return -1; + } +} + +uint8_t postfix_lbp(ASTNode node) { + if (node.type == NODE_INTEGER) { + return 0; + } + + switch (node.data.unary.op) { + case OP_FACTORIAL: + return 40; + default: + // needs to be dealt with with resulttypes + return 255; + } +} + +uint8_t infix_lbp(ASTNode node) { if (node.type == NODE_INTEGER) { return 0; } @@ -19,12 +47,14 @@ uint8_t node_lbp(ASTNode node) { case OP_DIV: case OP_MUL: return 20; + case OP_POW: + return 51; default: return 0; } } -uint8_t node_rbp(ASTNode node) { +uint8_t infix_rbp(ASTNode node) { if (node.type == NODE_INTEGER) { return 0; } @@ -37,6 +67,8 @@ uint8_t node_rbp(ASTNode node) { case OP_DIV: case OP_MUL: return 21; + case OP_POW: + return 50; default: return 0; } @@ -53,12 +85,14 @@ ParseResult parse(TokenizeResult tokens) { } ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { + // First: Consume a first number arena_ensure_capacity( arena, sizeof(ASTNode), alignof(ASTNode) - ); + ); // shouldn't fail but if it does then what a shame + // Get pointer in the arena ASTNode *left_side = arena_unwrap_pointer( arena_alloc( arena, @@ -69,45 +103,115 @@ ASTNode *parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { arrayslice_next(slice, left_side); + if (left_side->type == NODE_PARENTHESIS && + left_side->data.parenthesis.op == OP_START_PAR) { + left_side = parse_expr(slice, arena, 0); + // HERE CHEKC LATER if slice.next != ')' + ASTNode *end_par; + arrayslice_next(slice, &end_par); + if (end_par->type != NODE_PARENTHESIS || + end_par->data.parenthesis.op != OP_END_PAR) { + // todo + } + return left_side; + } + // if is unary then take prefix bp and continue + // to the right, no need to allocate left side + // because we just did and right side + // WILL return a valid allocated pointer. + if (left_side->type == NODE_UNARY_OP) { + uint8_t rbp = prefix_rbp(*left_side); + ASTNode *righ_side = parse_expr(slice, arena, rbp); + + left_side->data.unary.val = righ_side; + } + while (true) { + // Second: Get next one and checn bp if (!arrayslice_is_valid(slice)) { break; } - ASTNode operator; - arrayslice_peek(slice, &operator); - uint8_t rbp = node_rbp(operator); - uint8_t lbp = node_lbp(operator); + // Here check if not OP error - if (lbp < min_bp) { - break; + ASTNode operator; + // Here should chekc if is operator not some bs + // Third, get operator and binding powers + arrayslice_peek(slice, &operator); + + // temporary for bad error handling + if (postfix_lbp(operator) != 255) { + if (postfix_lbp(operator) < min_bp) { + break; + } + + // allocate operator + arrayslice_next(slice, NULL); + arena_ensure_capacity( + arena, + sizeof(ASTNode), + alignof(ASTNode)); + ASTNode *new_node = arena_unwrap_pointer( + arena_alloc( + arena, + sizeof(ASTNode), + alignof(ASTNode) + ) + ); + *new_node = operator; + + + new_node->data.unary.val = left_side; + + left_side = new_node; + continue; } - arrayslice_next(slice, NULL); - ASTNode *right_side = parse_expr(slice, arena, rbp); + // check if it has infix or not, if not then error + uint8_t rbp = infix_rbp(operator); + uint8_t lbp = infix_lbp(operator); - arena_ensure_capacity( - arena, - sizeof(ASTNode), - alignof(ASTNode)); - ASTNode *new_node = arena_unwrap_pointer( - arena_alloc( - arena, - sizeof(ASTNode), - alignof(ASTNode) - ) - ); - *new_node = operator; + if (rbp != 255 && lbp != 255) { - new_node->data.binary.left = left_side; - new_node->data.binary.right = right_side; + // If lbp is LESS then stop recursion, + // we found the next smaller binding power + // or the one with more precedence + if (lbp < min_bp) { + break; + } - left_side = new_node; + + // If NOT, then we continue wtching ahead + // for the next one but taking our current + // concern that is rbp of the current operator + arrayslice_next(slice, NULL); + ASTNode *right_side = parse_expr(slice, arena, rbp); + + arena_ensure_capacity( + arena, + sizeof(ASTNode), + alignof(ASTNode)); + ASTNode *new_node = arena_unwrap_pointer( + arena_alloc( + arena, + sizeof(ASTNode), + alignof(ASTNode) + ) + ); + *new_node = operator; + + new_node->data.binary.left = left_side; + new_node->data.binary.right = right_side; + + left_side = new_node; + + continue; + } + + break; } + // Final: return left side return left_side; } - - -