2026-03-04 18:54:46 -06:00
|
|
|
#include "lexer.h"
|
2026-04-24 07:17:35 -06:00
|
|
|
#include "arraylist.h"
|
2026-03-09 09:06:06 -06:00
|
|
|
#include <ctype.h>
|
2026-04-24 07:17:35 -06:00
|
|
|
#include <stdbool.h>
|
2026-03-09 09:06:06 -06:00
|
|
|
#include <stdint.h>
|
2026-04-30 21:34:27 -06:00
|
|
|
#include <stdio.h>
|
2026-03-04 19:30:56 -06:00
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <strings.h>
|
2026-03-09 09:06:06 -06:00
|
|
|
#include <limits.h>
|
2026-03-04 18:54:46 -06:00
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
|
WAIT_FOR_NUMBER,
|
2026-03-26 10:01:17 -06:00
|
|
|
WAIT_FOR_OPERATOR
|
2026-03-04 18:54:46 -06:00
|
|
|
} LexerState;
|
|
|
|
|
|
2026-03-09 09:06:06 -06:00
|
|
|
|
2026-04-24 07:17:35 -06:00
|
|
|
TokenizeResult tokenize(const char *input) {
|
|
|
|
|
ArrayList *arr = arraylist_init(64, sizeof(ASTNode));
|
2026-04-30 21:34:27 -06:00
|
|
|
size_t offset = 0;
|
2026-03-10 07:08:12 -06:00
|
|
|
|
2026-04-30 21:34:27 -06:00
|
|
|
while (
|
|
|
|
|
input[offset] != '\n' ||
|
|
|
|
|
input[offset] != EOF ||
|
|
|
|
|
input[offset] != '\0') {
|
2026-03-10 07:08:12 -06:00
|
|
|
|
2026-04-30 21:34:27 -06:00
|
|
|
if (isdigit(input[offset])) {
|
2026-04-24 07:17:35 -06:00
|
|
|
ASTNodeResult result = tokenize_number(input, &offset);
|
2026-03-10 07:08:12 -06:00
|
|
|
|
2026-04-24 07:17:35 -06:00
|
|
|
if (!result.is_valid) {
|
|
|
|
|
return (TokenizeResult) {.is_valid = false, .err = result.err};
|
2026-03-10 07:08:12 -06:00
|
|
|
}
|
|
|
|
|
|
2026-04-24 07:17:35 -06:00
|
|
|
arraylist_push_back(arr, &result.node);
|
2026-04-30 21:34:27 -06:00
|
|
|
} else if (isoperator(input[offset])) {
|
|
|
|
|
ASTNode op_node = {
|
2026-03-10 07:08:12 -06:00
|
|
|
.type = NODE_BINARY_OP,
|
2026-04-30 21:34:27 -06:00
|
|
|
.data.binary.op = char_to_operator(input[offset]),
|
2026-03-10 07:08:12 -06:00
|
|
|
.data.binary.left = NULL,
|
2026-04-30 21:34:27 -06:00
|
|
|
.data.binary.right = NULL,
|
2026-03-10 07:08:12 -06:00
|
|
|
};
|
2026-04-30 21:34:27 -06:00
|
|
|
|
|
|
|
|
arraylist_push_back(arr, &op_node);
|
|
|
|
|
} else if (isspace(input[offset])) {
|
2026-03-10 07:08:12 -06:00
|
|
|
// Nothing...
|
|
|
|
|
} else {
|
2026-04-30 21:34:27 -06:00
|
|
|
return (TokenizeResult) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = LEXER_NOT_RECOGNIZED_SYMBOL};
|
2026-03-10 07:08:12 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
offset++;
|
|
|
|
|
}
|
2026-03-09 11:58:55 -06:00
|
|
|
|
2026-04-24 07:17:35 -06:00
|
|
|
if (arraylist_size(arr) < 1) {
|
|
|
|
|
return (TokenizeResult) {.is_valid = false, .err = LEXER_EMPTY_INPUT};
|
2026-03-13 07:58:38 -06:00
|
|
|
}
|
|
|
|
|
|
2026-04-24 07:17:35 -06:00
|
|
|
return (TokenizeResult) {.is_valid = true, .arr = arr};
|
2026-03-10 07:08:12 -06:00
|
|
|
}
|
2026-03-09 11:58:55 -06:00
|
|
|
|
2026-03-09 09:06:06 -06:00
|
|
|
// CURRENTLY, it only supports ints, not clear how floating
|
|
|
|
|
// point is implemented but i'll figure it out
|
2026-04-24 08:09:31 -06:00
|
|
|
ASTNodeResult tokenize_number(const char *input, size_t *offset) {
|
2026-04-30 21:34:27 -06:00
|
|
|
char buf[64] = { '\0' };
|
2026-03-09 09:06:06 -06:00
|
|
|
size_t buf_pos = 0;
|
2026-03-09 11:58:55 -06:00
|
|
|
bool is_integer = true; // Will later be used to differentiate fractions
|
2026-03-09 09:06:06 -06:00
|
|
|
|
2026-04-30 21:34:27 -06:00
|
|
|
// read number
|
2026-03-09 09:06:06 -06:00
|
|
|
size_t current = *offset;
|
|
|
|
|
while (isdigit(input[current])) {
|
|
|
|
|
buf[buf_pos] = input[current];
|
|
|
|
|
|
|
|
|
|
if (buf_pos >= sizeof(buf)) {
|
2026-04-30 21:34:27 -06:00
|
|
|
return (ASTNodeResult) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = LEXER_BUF_OVERFLOW};
|
2026-03-09 09:06:06 -06:00
|
|
|
}
|
2026-04-30 21:34:27 -06:00
|
|
|
|
2026-03-09 09:06:06 -06:00
|
|
|
current++;
|
|
|
|
|
buf_pos++;
|
|
|
|
|
}
|
2026-03-09 11:58:55 -06:00
|
|
|
|
|
|
|
|
ASTNode new_node;
|
|
|
|
|
if (is_integer) {
|
|
|
|
|
new_node.type = NODE_INTEGER;
|
2026-04-30 21:34:27 -06:00
|
|
|
LexerI64Result status = string_to_integer(buf);
|
|
|
|
|
|
|
|
|
|
|
2026-04-30 09:58:27 -06:00
|
|
|
if (!status.is_valid) {
|
|
|
|
|
return (ASTNodeResult) {.is_valid = false, .err = status.err};
|
2026-03-09 11:58:55 -06:00
|
|
|
}
|
2026-04-30 21:34:27 -06:00
|
|
|
|
2026-04-30 09:58:27 -06:00
|
|
|
new_node.data.integer = status.number;
|
2026-04-30 21:34:27 -06:00
|
|
|
|
2026-03-09 11:58:55 -06:00
|
|
|
*offset = current;
|
2026-04-24 08:09:31 -06:00
|
|
|
return (ASTNodeResult) {.is_valid = true, .node = new_node};
|
2026-03-09 11:58:55 -06:00
|
|
|
}
|
|
|
|
|
|
2026-04-30 21:34:27 -06:00
|
|
|
return (ASTNodeResult) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = LEXER_FAILED_NUMBER_CONVERSION};
|
2026-03-09 11:58:55 -06:00
|
|
|
}
|
|
|
|
|
|
2026-04-30 21:34:27 -06:00
|
|
|
LexerI64Result string_to_integer(const char *buf) {
|
2026-03-09 09:06:06 -06:00
|
|
|
int c = 0;
|
2026-03-09 09:23:06 -06:00
|
|
|
int64_t count = 0;
|
2026-04-30 21:34:27 -06:00
|
|
|
|
2026-03-09 09:06:06 -06:00
|
|
|
while (buf[c] != '\0') {
|
2026-04-30 21:34:27 -06:00
|
|
|
|
|
|
|
|
// Extracts number from char
|
2026-03-09 09:06:06 -06:00
|
|
|
int digit = buf[c] - '0';
|
|
|
|
|
|
2026-03-09 11:58:55 -06:00
|
|
|
if (count > (INT64_MAX - digit) / 10) {
|
2026-04-30 21:34:27 -06:00
|
|
|
return (LexerI64Result) {
|
|
|
|
|
.is_valid = false,
|
|
|
|
|
.err = LEXER_INT_OVERFLOW};
|
2026-03-09 09:06:06 -06:00
|
|
|
}
|
2026-04-30 21:34:27 -06:00
|
|
|
|
2026-03-09 09:06:06 -06:00
|
|
|
count = count * 10;
|
|
|
|
|
count += digit;
|
|
|
|
|
|
|
|
|
|
c++;
|
|
|
|
|
}
|
|
|
|
|
|
2026-04-30 21:34:27 -06:00
|
|
|
return (LexerI64Result) {.is_valid = true, .number = count};
|
2026-03-09 09:06:06 -06:00
|
|
|
}
|
2026-03-10 07:08:12 -06:00
|
|
|
|
|
|
|
|
bool isoperator(int c) {
|
|
|
|
|
switch (c) {
|
|
|
|
|
case '+':
|
|
|
|
|
case '-':
|
|
|
|
|
case '/':
|
|
|
|
|
case '*':
|
2026-05-12 18:15:36 -06:00
|
|
|
case '^':
|
2026-03-10 07:08:12 -06:00
|
|
|
return true;
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-03-13 07:58:38 -06:00
|
|
|
|
|
|
|
|
Operator char_to_operator(int c) {
|
|
|
|
|
switch (c) {
|
|
|
|
|
case '+':
|
|
|
|
|
return OP_ADD;
|
|
|
|
|
break;
|
|
|
|
|
case '-':
|
|
|
|
|
return OP_SUB;
|
|
|
|
|
break;
|
|
|
|
|
case '*':
|
|
|
|
|
return OP_MUL;
|
|
|
|
|
break;
|
|
|
|
|
case '/':
|
|
|
|
|
return OP_DIV;
|
|
|
|
|
break;
|
2026-05-12 18:15:36 -06:00
|
|
|
case '^':
|
|
|
|
|
return OP_POW;
|
|
|
|
|
break;
|
2026-03-13 07:58:38 -06:00
|
|
|
default: // I mean shouldn't be used, we assume
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
}
|
2026-03-25 06:59:52 -06:00
|
|
|
|
|
|
|
|
char operator_to_char(Operator op) {
|
|
|
|
|
switch (op) {
|
|
|
|
|
case OP_ADD:
|
|
|
|
|
return '+';
|
|
|
|
|
case OP_SUB:
|
|
|
|
|
return '-';
|
|
|
|
|
case OP_MUL:
|
|
|
|
|
return '*';
|
|
|
|
|
case OP_DIV:
|
|
|
|
|
return '/';
|
2026-05-12 18:15:36 -06:00
|
|
|
case OP_POW:
|
|
|
|
|
return '^';
|
2026-03-25 06:59:52 -06:00
|
|
|
}
|
|
|
|
|
}
|