diff --git a/include/parser.h b/include/parser.h index 39dd1eb..3a9f08c 100644 --- a/include/parser.h +++ b/include/parser.h @@ -38,6 +38,7 @@ typedef enum { PARSER_UNMATCHED_PAREN, PARSER_OUT_OF_MEMORY, PARSER_INVALID_TOKENIZE, + PARSER_UNEXMECTED_EOF, } ParserErr; typedef struct { @@ -76,8 +77,8 @@ typedef struct { } ParserU8Result; Node token_to_node(Token token); -NodeResult nud(Token token); // Null denotation -NodeResult led(Token token); // Left denotation +TreeResult nud(ArraySlice *slice, Arena *arena, Token token); // Null denotation +TreeResult led(ArraySlice *slice, Arena *arena, Node *left, Token token); // Left denotation ParserU8Result prefix_rbp(Node node); ParserU8Result postfix_lbp(Node node); diff --git a/src/parser.c b/src/parser.c index 7fd95a8..0943782 100644 --- a/src/parser.c +++ b/src/parser.c @@ -119,167 +119,176 @@ ParserU8Result infix_rbp(Node node) { }; } } -NodeResult led(Token token) { - if (token.type == TOKEN_INTEGER) { - return (NodeResult) { - .is_valid = false, - .err = PARSER_UNEXPECTED_TOKEN, - }; - } + +TreeResult led( + ArraySlice *slice, + Arena *arena, + Node *left, + Token token +) { + arena_ensure_capacity( + arena, + sizeof(Node), + alignof(Node) + ); + + Node *node = arena_unwrap_pointer( + arena_alloc( + arena, + sizeof(Node), + alignof(Node) + ) + ); switch (token.op) { - case OP_ADD: - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - } - }; - case OP_SUB: - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - } - }; - case OP_MUL: - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - } - }; - case OP_DIV: - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - } - }; - case OP_POW: - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - } - }; - case OP_FACTORIAL: - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_UNARY_OP, - .unary.op = token.op, - } - }; - default: - return (NodeResult) { - .is_valid = false, - .err = PARSER_UNEXPECTED_TOKEN, - }; - } -} -NodeResult nud(Token token) { - if (token.type == TOKEN_INTEGER) { - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_INT, - .num = token.num, + // Binary operators + case OP_ADD: + case OP_SUB: + case OP_MUL: + case OP_DIV: + case OP_POW: { + node->type = NODE_BINARY_OP; + node->binary.op = token.op; + + ParserU8Result rbp_result = infix_rbp(*node); + if (!rbp_result.is_valid) { + return (TreeResult) { + .is_valid = false, + .err = rbp_result.err, + }; } - }; - } - switch (token.op) { - case OP_START_PAR: - return (NodeResult) { + TreeResult right = parse_expr( + slice, + arena, + rbp_result.num + ); + + if (!right.is_valid) { + return right; + } + + node->binary.left = left; + node->binary.right = right.node; + + return (TreeResult) { .is_valid = true, - .node = (Node) { - .type = NODE_PARENTHESIS, - .par = token.op, - } + .node = node, }; - case OP_SUB: - return (NodeResult) { + } + + // Postfix operators + case OP_FACTORIAL: { + node->type = NODE_UNARY_OP; + node->unary.op = token.op; + node->unary.to = left; + + return (TreeResult) { .is_valid = true, - .node = (Node) { - .type = NODE_UNARY_OP, - .unary.op = token.op, - } - }; - case OP_ADD: - return (NodeResult) { - .is_valid = true, - .node = (Node) { - .type = NODE_UNARY_OP, - .unary.op = token.op, - } + .node = node, }; + } + default: - return (NodeResult) { + return (TreeResult) { .is_valid = false, .err = PARSER_UNEXPECTED_TOKEN, }; } } -Node token_to_node(Token token) { +TreeResult nud(ArraySlice *slice, Arena *arena, Token token) { + arena_ensure_capacity( + arena, + sizeof(Node), + alignof(Node) + ); + + Node *node = arena_unwrap_pointer( + arena_alloc( + arena, + sizeof(Node), + alignof(Node) + ) + ); + if (token.type == TOKEN_INTEGER) { - return (Node) { - .type = NODE_INT, - .num = token.num, + node->type = NODE_INT; + node->num = token.num; + + return (TreeResult) { + .is_valid = true, + .node = node, }; } switch (token.op) { + case OP_START_PAR: { + TreeResult expr = parse_expr(slice, arena, 0); + if (!expr.is_valid) { + return expr; + } + + Token end_par; + if (arrayslice_next(slice, &end_par) != ARRLIST_OK) { + return (TreeResult) { + .is_valid = false, + .err = PARSER_UNMATCHED_PAREN, + }; + } + + if (end_par.type != TOKEN_OPERATOR || + end_par.op != OP_END_PAR) { + return (TreeResult) { + .is_valid = false, + .err = PARSER_UNMATCHED_PAREN, + }; + } + + return expr; + } case OP_ADD: - return (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - }; - case OP_SUB: - return (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - }; - case OP_MUL: - return (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - }; - case OP_DIV: - return (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - }; - case OP_POW: - return (Node) { - .type = NODE_BINARY_OP, - .binary.op = token.op, - }; - case OP_FACTORIAL: - return (Node) { - .type = NODE_UNARY_OP, - .binary.op = token.op, - }; - case OP_START_PAR: - return (Node) { - .type = NODE_PARENTHESIS, - .binary.op = token.op, - }; - case OP_END_PAR: - return (Node) { - .type = NODE_PARENTHESIS, - .binary.op = token.op, + + case OP_SUB: { + node->type = NODE_UNARY_OP; + node->unary.op = token.op; + + ParserU8Result rbp_result = prefix_rbp(*node); + if (!rbp_result.is_valid) { + return (TreeResult) { + .is_valid = false, + .err = rbp_result.err, + }; + } + + TreeResult right = parse_expr( + slice, + arena, + rbp_result.num + ); + + if (!right.is_valid) { + return right; + } + + node->unary.to = right.node; + + return (TreeResult) { + .is_valid = true, + .node = node, }; + } + default: + return (TreeResult) { + .is_valid = false, + .err = PARSER_UNEXPECTED_TOKEN, + }; } } + + ParserResult parse(TokenizeResult tokens) { if (!tokens.is_valid) { return (ParserResult) { @@ -309,165 +318,76 @@ ParserResult parse(TokenizeResult tokens) { } TreeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) { - arena_ensure_capacity( - arena, - sizeof(Node), - alignof(Node) - ); // shouldn't fail but if it does then what a shame - - // Get pointer in the arena - Node *left_side = arena_unwrap_pointer( - arena_alloc( - arena, - sizeof(Node), - alignof(Node) - ) - ); - - // convert token to node :) Token current_token; - arrayslice_next(slice, ¤t_token); - *left_side = token_to_node(current_token); - if (left_side->type == NODE_PARENTHESIS && - left_side->par == OP_START_PAR) { - TreeResult result = parse_expr(slice, arena, 0); + if (arrayslice_next(slice, ¤t_token) != ARRLIST_OK) { + return (TreeResult) { + .is_valid = false, + .err = PARSER_UNEXMECTED_EOF, + }; + } + + TreeResult left_result = nud(slice, arena, current_token); + + if (!left_result.is_valid) { + return left_result; + } + + Node *left_side = left_result.node; + + while (arrayslice_is_valid(slice)) { + Token operator_token; + arrayslice_peek(slice, &operator_token); + + if (operator_token.type != TOKEN_OPERATOR) { + break; + } + + Node operator_node = { + .type = NODE_BINARY_OP, + .binary.op = operator_token.op, + }; + + ParserU8Result postfix_lbp_result = postfix_lbp(operator_node); + + if (postfix_lbp_result.is_valid) { + if (postfix_lbp_result.num < min_bp) { + break; + } + + arrayslice_next(slice, NULL); + + TreeResult result = led(slice, arena, left_side, operator_token); + + if (!result.is_valid) { + return result; + } + + left_side = result.node; + + continue; + } + + // Path for infix basically + ParserU8Result lbp_result = infix_lbp(operator_node); + + if (!lbp_result.is_valid) { + break; + } + + if (lbp_result.num < min_bp) { + break; + } + + arrayslice_next(slice, NULL); + + TreeResult result = led(slice, arena, left_side, operator_token); if (!result.is_valid) { return result; } left_side = result.node; - - // We dont really need to convert to node - // parenthesis are there just to change up - // the bp - Token end_par; - arrayslice_next(slice, &end_par); - - if (end_par.type != TOKEN_OPERATOR || - end_par.op != OP_END_PAR) { - return (TreeResult) { - .is_valid = false, - .err = PARSER_UNMATCHED_PAREN, - }; - } - return (TreeResult) { - .is_valid = true, - .node = left_side, - }; - } - - // If prefix - if (left_side->type == NODE_UNARY_OP) { - ParserU8Result rbp_result = prefix_rbp(*left_side); - if (!rbp_result.is_valid) { - return (TreeResult) { - .is_valid = false, - .err = rbp_result.err, - }; - } - - TreeResult righ_side_result = parse_expr(slice, arena, rbp_result.num); - if (!righ_side_result.is_valid) { - return righ_side_result; - } - - left_side->unary.to = righ_side_result.node; - } - - while (true) { - if (!arrayslice_is_valid(slice)) { - break; - } - - Token operator_token; - arrayslice_peek(slice, &operator_token); - if (operator_token.type != TOKEN_OPERATOR) { - return (TreeResult) { - .is_valid = false, - .err = PARSER_MISSING_OPERAND, - }; - } - Node operator_node = token_to_node(operator_token); - - // temporary for bad error handling - ParserU8Result postfix_lbp_result = postfix_lbp(operator_node); - if (postfix_lbp_result.is_valid) { - if (postfix_lbp_result.num < min_bp) { - break; - } - - // allocate operator - arrayslice_next(slice, NULL); - arena_ensure_capacity( - arena, - sizeof(Node), - alignof(Node)); - Node *new_node = arena_unwrap_pointer( - arena_alloc( - arena, - sizeof(Node), - alignof(Node) - ) - ); - *new_node = operator_node; - - - new_node->unary.to = left_side; - - left_side = new_node; - continue; - } - - ParserU8Result rbp_result = infix_rbp(operator_node); - ParserU8Result lbp_result = infix_lbp(operator_node); - if (!rbp_result.is_valid || !lbp_result.is_valid) { - return (TreeResult) { - .is_valid = false, - .err = PARSER_UNEXPECTED_TOKEN, - }; - } - - if (rbp_result.is_valid && lbp_result.is_valid) { - // If lbp is LESS then stop recursion, - // we found the next smaller binding power - // or the one with more precedence - if (lbp_result.num < min_bp) { - break; - } - - // If NOT, then we continue wtching ahead - // for the next one but taking our current - // concern that is rbp of the current operator - arrayslice_next(slice, NULL); - TreeResult right_side_result = parse_expr(slice, arena, rbp_result.num); - if (!right_side_result.is_valid) { - return right_side_result; - } - - arena_ensure_capacity( - arena, - sizeof(Node), - alignof(Node)); - Node *new_node = arena_unwrap_pointer( - arena_alloc( - arena, - sizeof(Node), - alignof(Node) - ) - ); - *new_node = operator_node; - - new_node->binary.left = left_side; - new_node->binary.right = right_side_result.node; - - left_side = new_node; - - continue; - } - - break; } // Final: return left side