2026-03-13 07:58:38 -06:00
|
|
|
#include "parser.h"
|
2026-04-24 09:06:47 -06:00
|
|
|
#include "arraylist.h"
|
2026-03-13 07:58:38 -06:00
|
|
|
#include "lexer.h"
|
2026-04-13 06:40:31 -06:00
|
|
|
#include "arena.h"
|
2026-05-13 11:09:22 -06:00
|
|
|
#include <cmocka.h>
|
2026-04-13 07:57:36 -06:00
|
|
|
#include <stdalign.h>
|
2026-03-24 21:51:28 -06:00
|
|
|
#include <stdbool.h>
|
2026-03-24 21:36:14 -06:00
|
|
|
#include <stdint.h>
|
2026-03-13 07:58:38 -06:00
|
|
|
|
2026-05-13 17:48:03 -06:00
|
|
|
ParserU8Result prefix_rbp(Token token) {
|
|
|
|
|
if (token.type == TOKEN_INTEGER) {
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = false,
|
2026-05-13 17:48:03 -06:00
|
|
|
.err = PARSER_UNEXPECTED_TOKEN,
|
2026-05-13 11:09:22 -06:00
|
|
|
};
|
2026-05-12 18:15:36 -06:00
|
|
|
}
|
2026-05-13 17:48:03 -06:00
|
|
|
switch (token.op) {
|
2026-05-12 18:15:36 -06:00
|
|
|
case OP_SUB:
|
|
|
|
|
case OP_ADD:
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = true,
|
|
|
|
|
.num = 30,
|
|
|
|
|
};
|
2026-05-12 18:15:36 -06:00
|
|
|
default:
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = PARSER_UNEXPECTED_TOKEN,
|
|
|
|
|
};
|
2026-05-12 18:15:36 -06:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-13 17:48:03 -06:00
|
|
|
ParserU8Result postfix_lbp(Token token) {
|
2026-05-13 18:48:14 -06:00
|
|
|
if (token.type != TOKEN_OPERATOR) {
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = PARSER_UNEXPECTED_TOKEN,
|
|
|
|
|
};
|
2026-05-12 19:40:42 -06:00
|
|
|
}
|
|
|
|
|
|
2026-05-13 17:48:03 -06:00
|
|
|
switch (token.op) {
|
2026-05-12 19:40:42 -06:00
|
|
|
case OP_FACTORIAL:
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = true,
|
|
|
|
|
.num = 40,
|
|
|
|
|
};
|
2026-05-12 19:40:42 -06:00
|
|
|
default:
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = PARSER_UNEXPECTED_TOKEN,
|
|
|
|
|
};
|
2026-05-12 19:40:42 -06:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-13 17:48:03 -06:00
|
|
|
ParserU8Result infix_lbp(Token token) {
|
2026-05-13 18:48:14 -06:00
|
|
|
if (token.type != TOKEN_OPERATOR) {
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = PARSER_UNEXPECTED_TOKEN,
|
|
|
|
|
};
|
2026-03-24 21:04:36 -06:00
|
|
|
}
|
|
|
|
|
|
2026-05-13 17:48:03 -06:00
|
|
|
switch (token.op) {
|
2026-03-13 07:58:38 -06:00
|
|
|
case OP_ADD:
|
|
|
|
|
case OP_SUB:
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = true,
|
|
|
|
|
.num = 10,
|
|
|
|
|
};
|
2026-03-13 07:58:38 -06:00
|
|
|
case OP_DIV:
|
|
|
|
|
case OP_MUL:
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = true,
|
|
|
|
|
.num = 20,
|
|
|
|
|
};
|
2026-05-12 18:15:36 -06:00
|
|
|
case OP_POW:
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = true,
|
|
|
|
|
.num = 51,
|
|
|
|
|
};
|
2026-03-24 21:04:36 -06:00
|
|
|
default:
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = PARSER_UNEXPECTED_TOKEN,
|
|
|
|
|
};
|
2026-03-13 07:58:38 -06:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-13 17:48:03 -06:00
|
|
|
ParserU8Result infix_rbp(Token token) {
|
2026-05-13 18:48:14 -06:00
|
|
|
if (token.type != TOKEN_OPERATOR) {
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = PARSER_UNEXPECTED_TOKEN,
|
|
|
|
|
};
|
2026-03-24 21:04:36 -06:00
|
|
|
}
|
|
|
|
|
|
2026-05-13 17:48:03 -06:00
|
|
|
switch (token.op) {
|
2026-03-13 07:58:38 -06:00
|
|
|
case OP_ADD:
|
|
|
|
|
case OP_SUB:
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = true,
|
|
|
|
|
.num = 11,
|
|
|
|
|
};
|
2026-03-13 07:58:38 -06:00
|
|
|
case OP_DIV:
|
|
|
|
|
case OP_MUL:
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = true,
|
|
|
|
|
.num = 21,
|
|
|
|
|
};
|
2026-05-12 18:15:36 -06:00
|
|
|
case OP_POW:
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = true,
|
|
|
|
|
.num = 50,
|
|
|
|
|
};
|
2026-03-25 12:25:15 -06:00
|
|
|
default:
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserU8Result) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = PARSER_UNEXPECTED_TOKEN,
|
|
|
|
|
};
|
2026-03-13 07:58:38 -06:00
|
|
|
}
|
|
|
|
|
}
|
2026-05-13 17:35:52 -06:00
|
|
|
|
|
|
|
|
TreeResult led(
|
|
|
|
|
ArraySlice *slice,
|
|
|
|
|
Arena *arena,
|
|
|
|
|
Node *left,
|
|
|
|
|
Token token
|
|
|
|
|
) {
|
|
|
|
|
arena_ensure_capacity(
|
|
|
|
|
arena,
|
|
|
|
|
sizeof(Node),
|
|
|
|
|
alignof(Node)
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Node *node = arena_unwrap_pointer(
|
|
|
|
|
arena_alloc(
|
|
|
|
|
arena,
|
|
|
|
|
sizeof(Node),
|
|
|
|
|
alignof(Node)
|
|
|
|
|
)
|
|
|
|
|
);
|
2026-03-24 21:04:36 -06:00
|
|
|
|
2026-05-13 16:09:03 -06:00
|
|
|
switch (token.op) {
|
2026-05-13 17:35:52 -06:00
|
|
|
|
|
|
|
|
// Binary operators
|
2026-05-13 16:09:03 -06:00
|
|
|
case OP_ADD:
|
|
|
|
|
case OP_SUB:
|
|
|
|
|
case OP_MUL:
|
|
|
|
|
case OP_DIV:
|
2026-05-13 17:35:52 -06:00
|
|
|
case OP_POW: {
|
|
|
|
|
node->type = NODE_BINARY_OP;
|
|
|
|
|
node->binary.op = token.op;
|
|
|
|
|
|
2026-05-13 17:48:03 -06:00
|
|
|
ParserU8Result rbp_result = infix_rbp(token);
|
2026-05-13 17:35:52 -06:00
|
|
|
if (!rbp_result.is_valid) {
|
|
|
|
|
return (TreeResult) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = rbp_result.err,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TreeResult right = parse_expr(
|
|
|
|
|
slice,
|
|
|
|
|
arena,
|
|
|
|
|
rbp_result.num
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if (!right.is_valid) {
|
|
|
|
|
return right;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
node->binary.left = left;
|
|
|
|
|
node->binary.right = right.node;
|
|
|
|
|
|
|
|
|
|
return (TreeResult) {
|
2026-05-13 16:09:03 -06:00
|
|
|
.is_valid = true,
|
2026-05-13 17:35:52 -06:00
|
|
|
.node = node,
|
2026-05-13 16:09:03 -06:00
|
|
|
};
|
2026-05-13 17:35:52 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Postfix operators
|
|
|
|
|
case OP_FACTORIAL: {
|
|
|
|
|
node->type = NODE_UNARY_OP;
|
|
|
|
|
node->unary.op = token.op;
|
|
|
|
|
node->unary.to = left;
|
|
|
|
|
|
|
|
|
|
return (TreeResult) {
|
2026-05-13 16:09:03 -06:00
|
|
|
.is_valid = true,
|
2026-05-13 17:35:52 -06:00
|
|
|
.node = node,
|
2026-05-13 16:09:03 -06:00
|
|
|
};
|
2026-05-13 17:35:52 -06:00
|
|
|
}
|
|
|
|
|
|
2026-05-13 16:09:03 -06:00
|
|
|
default:
|
2026-05-13 17:35:52 -06:00
|
|
|
return (TreeResult) {
|
2026-05-13 16:09:03 -06:00
|
|
|
.is_valid = false,
|
|
|
|
|
.err = PARSER_UNEXPECTED_TOKEN,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-13 17:35:52 -06:00
|
|
|
TreeResult nud(ArraySlice *slice, Arena *arena, Token token) {
|
|
|
|
|
arena_ensure_capacity(
|
|
|
|
|
arena,
|
|
|
|
|
sizeof(Node),
|
|
|
|
|
alignof(Node)
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Node *node = arena_unwrap_pointer(
|
|
|
|
|
arena_alloc(
|
|
|
|
|
arena,
|
|
|
|
|
sizeof(Node),
|
|
|
|
|
alignof(Node)
|
|
|
|
|
)
|
|
|
|
|
);
|
|
|
|
|
|
2026-05-13 16:09:03 -06:00
|
|
|
if (token.type == TOKEN_INTEGER) {
|
2026-05-13 17:35:52 -06:00
|
|
|
node->type = NODE_INT;
|
|
|
|
|
node->num = token.num;
|
|
|
|
|
|
|
|
|
|
return (TreeResult) {
|
2026-05-13 16:09:03 -06:00
|
|
|
.is_valid = true,
|
2026-05-13 17:35:52 -06:00
|
|
|
.node = node,
|
2026-05-13 16:09:03 -06:00
|
|
|
};
|
|
|
|
|
}
|
2026-05-13 12:13:07 -06:00
|
|
|
|
2026-05-13 16:09:03 -06:00
|
|
|
switch (token.op) {
|
2026-05-13 17:35:52 -06:00
|
|
|
case OP_START_PAR: {
|
|
|
|
|
TreeResult expr = parse_expr(slice, arena, 0);
|
|
|
|
|
if (!expr.is_valid) {
|
|
|
|
|
return expr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Token end_par;
|
|
|
|
|
if (arrayslice_next(slice, &end_par) != ARRLIST_OK) {
|
|
|
|
|
return (TreeResult) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = PARSER_UNMATCHED_PAREN,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (end_par.type != TOKEN_OPERATOR ||
|
|
|
|
|
end_par.op != OP_END_PAR) {
|
|
|
|
|
return (TreeResult) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = PARSER_UNMATCHED_PAREN,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return expr;
|
|
|
|
|
}
|
2026-05-13 16:09:03 -06:00
|
|
|
case OP_ADD:
|
2026-05-13 17:35:52 -06:00
|
|
|
|
|
|
|
|
case OP_SUB: {
|
|
|
|
|
node->type = NODE_UNARY_OP;
|
|
|
|
|
node->unary.op = token.op;
|
|
|
|
|
|
2026-05-13 17:48:03 -06:00
|
|
|
ParserU8Result rbp_result = prefix_rbp(token);
|
2026-05-13 17:35:52 -06:00
|
|
|
if (!rbp_result.is_valid) {
|
|
|
|
|
return (TreeResult) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = rbp_result.err,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TreeResult right = parse_expr(
|
|
|
|
|
slice,
|
|
|
|
|
arena,
|
|
|
|
|
rbp_result.num
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if (!right.is_valid) {
|
|
|
|
|
return right;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
node->unary.to = right.node;
|
|
|
|
|
|
|
|
|
|
return (TreeResult) {
|
2026-05-13 16:09:03 -06:00
|
|
|
.is_valid = true,
|
2026-05-13 17:35:52 -06:00
|
|
|
.node = node,
|
2026-05-13 16:09:03 -06:00
|
|
|
};
|
2026-05-13 17:35:52 -06:00
|
|
|
}
|
2026-05-13 16:09:03 -06:00
|
|
|
default:
|
2026-05-13 17:35:52 -06:00
|
|
|
return (TreeResult) {
|
2026-05-13 16:09:03 -06:00
|
|
|
.is_valid = false,
|
|
|
|
|
.err = PARSER_UNEXPECTED_TOKEN,
|
2026-05-13 17:35:52 -06:00
|
|
|
};
|
2026-05-13 16:09:03 -06:00
|
|
|
}
|
|
|
|
|
}
|
2026-05-13 12:13:07 -06:00
|
|
|
|
2026-05-13 11:09:22 -06:00
|
|
|
|
|
|
|
|
|
|
|
|
|
ParserResult parse(TokenizeResult tokens) {
|
2026-05-13 12:13:07 -06:00
|
|
|
if (!tokens.is_valid) {
|
|
|
|
|
return (ParserResult) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = PARSER_INVALID_TOKENIZE,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-14 08:36:21 -06:00
|
|
|
ArraySlice *context;
|
|
|
|
|
arraylist_slice(&context, tokens.arr, 0, arraylist_size(tokens.arr));
|
|
|
|
|
Arena *arena;
|
|
|
|
|
arena_init(&arena, sizeof(Node) * arraylist_size(tokens.arr));
|
2026-05-13 11:09:22 -06:00
|
|
|
|
2026-05-14 08:36:21 -06:00
|
|
|
TreeResult result = parse_expr(context, arena, 0);
|
2026-05-13 11:09:22 -06:00
|
|
|
if (!result.is_valid) {
|
2026-05-13 12:13:07 -06:00
|
|
|
arena_destroy(&arena);
|
|
|
|
|
arraylist_destroy(&tokens.arr);
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserResult) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = result.err,
|
|
|
|
|
};
|
|
|
|
|
}
|
2026-03-24 21:44:08 -06:00
|
|
|
|
2026-05-13 12:13:07 -06:00
|
|
|
arraylist_destroy(&tokens.arr);
|
2026-05-13 11:09:22 -06:00
|
|
|
return (ParserResult) {
|
2026-04-24 09:06:47 -06:00
|
|
|
.is_valid = true,
|
2026-04-13 08:44:30 -06:00
|
|
|
.arena = arena,
|
2026-05-13 11:09:22 -06:00
|
|
|
.tree = result.node};
|
2026-03-24 21:44:08 -06:00
|
|
|
}
|
|
|
|
|
|
2026-05-13 12:19:17 -06:00
|
|
|
TreeResult parse_expr(ArraySlice *slice, Arena *arena, uint8_t min_bp) {
|
2026-05-13 11:09:22 -06:00
|
|
|
Token current_token;
|
|
|
|
|
|
2026-05-13 17:35:52 -06:00
|
|
|
if (arrayslice_next(slice, ¤t_token) != ARRLIST_OK) {
|
2026-05-13 12:19:17 -06:00
|
|
|
return (TreeResult) {
|
2026-05-13 17:35:52 -06:00
|
|
|
.is_valid = false,
|
2026-05-13 17:48:03 -06:00
|
|
|
.err = PARSER_UNEXPECTED_EOF,
|
2026-05-13 11:09:22 -06:00
|
|
|
};
|
2026-05-12 20:04:41 -06:00
|
|
|
}
|
2026-05-13 11:09:22 -06:00
|
|
|
|
2026-05-13 17:35:52 -06:00
|
|
|
TreeResult left_result = nud(slice, arena, current_token);
|
2026-05-12 18:33:52 -06:00
|
|
|
|
2026-05-13 17:35:52 -06:00
|
|
|
if (!left_result.is_valid) {
|
|
|
|
|
return left_result;
|
2026-05-12 18:33:52 -06:00
|
|
|
}
|
2026-03-24 21:36:14 -06:00
|
|
|
|
2026-05-13 17:35:52 -06:00
|
|
|
Node *left_side = left_result.node;
|
2026-03-24 21:36:14 -06:00
|
|
|
|
2026-05-13 17:35:52 -06:00
|
|
|
while (arrayslice_is_valid(slice)) {
|
2026-05-13 11:09:22 -06:00
|
|
|
Token operator_token;
|
|
|
|
|
arrayslice_peek(slice, &operator_token);
|
2026-05-13 17:35:52 -06:00
|
|
|
|
2026-05-13 11:09:22 -06:00
|
|
|
if (operator_token.type != TOKEN_OPERATOR) {
|
2026-05-13 17:35:52 -06:00
|
|
|
break;
|
2026-05-13 11:09:22 -06:00
|
|
|
}
|
2026-05-12 19:40:42 -06:00
|
|
|
|
2026-05-13 17:48:03 -06:00
|
|
|
ParserU8Result postfix_lbp_result = postfix_lbp(operator_token);
|
2026-05-13 17:35:52 -06:00
|
|
|
|
2026-05-13 11:09:22 -06:00
|
|
|
if (postfix_lbp_result.is_valid) {
|
|
|
|
|
if (postfix_lbp_result.num < min_bp) {
|
2026-05-12 19:40:42 -06:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
arrayslice_next(slice, NULL);
|
|
|
|
|
|
2026-05-13 17:35:52 -06:00
|
|
|
TreeResult result = led(slice, arena, left_side, operator_token);
|
2026-05-12 19:40:42 -06:00
|
|
|
|
2026-05-13 17:35:52 -06:00
|
|
|
if (!result.is_valid) {
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
left_side = result.node;
|
2026-05-12 19:40:42 -06:00
|
|
|
|
2026-05-12 20:04:41 -06:00
|
|
|
continue;
|
2026-05-12 19:40:42 -06:00
|
|
|
}
|
2026-05-12 20:04:41 -06:00
|
|
|
|
2026-05-13 17:35:52 -06:00
|
|
|
// Path for infix basically
|
2026-05-13 17:48:03 -06:00
|
|
|
ParserU8Result lbp_result = infix_lbp(operator_token);
|
2026-03-24 21:36:14 -06:00
|
|
|
|
2026-05-13 17:35:52 -06:00
|
|
|
if (!lbp_result.is_valid) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2026-05-12 20:04:41 -06:00
|
|
|
|
2026-05-13 17:35:52 -06:00
|
|
|
if (lbp_result.num < min_bp) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2026-05-12 20:04:41 -06:00
|
|
|
|
2026-05-13 17:35:52 -06:00
|
|
|
arrayslice_next(slice, NULL);
|
2026-05-12 20:04:41 -06:00
|
|
|
|
2026-05-13 17:35:52 -06:00
|
|
|
TreeResult result = led(slice, arena, left_side, operator_token);
|
2026-05-12 20:04:41 -06:00
|
|
|
|
2026-05-13 17:35:52 -06:00
|
|
|
if (!result.is_valid) {
|
|
|
|
|
return result;
|
2026-05-12 20:04:41 -06:00
|
|
|
}
|
|
|
|
|
|
2026-05-13 17:35:52 -06:00
|
|
|
left_side = result.node;
|
2026-03-24 21:36:14 -06:00
|
|
|
}
|
|
|
|
|
|
2026-05-12 18:15:36 -06:00
|
|
|
// Final: return left side
|
2026-05-13 12:19:17 -06:00
|
|
|
return (TreeResult){
|
2026-05-13 11:09:22 -06:00
|
|
|
.is_valid = true,
|
|
|
|
|
.node = left_side,
|
|
|
|
|
};
|
2026-03-24 21:36:14 -06:00
|
|
|
}
|