refactor: changed funtions definitions, modified tokenize
This commit is contained in:
@@ -92,9 +92,9 @@ ASTNodeArrayErr ASTNodeArray_pop(ASTNodeArray *arr, size_t index, ASTNode *out);
|
|||||||
size_t ASTNodeArray_len(ASTNodeArray *arr);
|
size_t ASTNodeArray_len(ASTNodeArray *arr);
|
||||||
|
|
||||||
// Lexer funtions as well as few functionality
|
// Lexer funtions as well as few functionality
|
||||||
LexerErr tokenize(const char* input, ASTNodeArray *out);
|
TokenizeResult tokenize(const char* input);
|
||||||
LexerErr tokenize_number(const char* input, size_t *offset, ASTNode *out);
|
ASTNodeResult tokenize_number(const char* input, size_t *offset);
|
||||||
LexerErr string_to_integer(const char buf[], int64_t *number);
|
I64Result string_to_integer(const char buf[]);
|
||||||
bool isoperator(int c);
|
bool isoperator(int c);
|
||||||
Operator char_to_operator(int c);
|
Operator char_to_operator(int c);
|
||||||
char operator_to_char(Operator op);
|
char operator_to_char(Operator op);
|
||||||
|
|||||||
36
src/lexer.c
36
src/lexer.c
@@ -1,5 +1,7 @@
|
|||||||
#include "lexer.h"
|
#include "lexer.h"
|
||||||
|
#include "arraylist.h"
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
#include <stdbool.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <strings.h>
|
#include <strings.h>
|
||||||
@@ -11,32 +13,31 @@ typedef enum {
|
|||||||
} LexerState;
|
} LexerState;
|
||||||
|
|
||||||
|
|
||||||
LexerErr tokenize(const char *input, ASTNodeArray *out) {
|
TokenizeResult tokenize(const char *input) {
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
LexerState state = WAIT_FOR_NUMBER;
|
LexerState state = WAIT_FOR_NUMBER;
|
||||||
ASTNodeArray arr = ASTNodeArray_init(0); // 0 defaults to 64
|
ArrayList *arr = arraylist_init(64, sizeof(ASTNode));
|
||||||
|
|
||||||
while (input[offset] != '\n' && input[offset] != '\0') {
|
while (input[offset] != '\n' && input[offset] != '\0') {
|
||||||
int current = input[offset];
|
int current = input[offset];
|
||||||
|
|
||||||
if (isdigit(current)) {
|
if (isdigit(current)) {
|
||||||
if (state != WAIT_FOR_NUMBER) {
|
if (state != WAIT_FOR_NUMBER) {
|
||||||
ASTNodeArray_free(&arr);
|
arraylist_destroy(&arr);
|
||||||
return LEXER_WRONG_SYNTAX;
|
return (TokenizeResult) {.is_valid = false, .err = LEXER_WRONG_SYNTAX};
|
||||||
}
|
}
|
||||||
ASTNode new_node;
|
ASTNodeResult result = tokenize_number(input, &offset);
|
||||||
LexerErr result = tokenize_number(input, &offset, &new_node);
|
|
||||||
|
|
||||||
if (result != LEXER_OK) {
|
if (!result.is_valid) {
|
||||||
ASTNodeArray_free(&arr);
|
arraylist_destroy(&arr);
|
||||||
return result;
|
return (TokenizeResult) {.is_valid = false, .err = result.err};
|
||||||
}
|
}
|
||||||
|
|
||||||
ASTNodeArray_push(&arr, new_node);
|
arraylist_push_back(arr, &result.node);
|
||||||
state = WAIT_FOR_OPERATOR;
|
state = WAIT_FOR_OPERATOR;
|
||||||
} else if (isoperator(current)) {
|
} else if (isoperator(current)) {
|
||||||
if (state != WAIT_FOR_OPERATOR) {
|
if (state != WAIT_FOR_OPERATOR) {
|
||||||
return LEXER_WRONG_SYNTAX;
|
return (TokenizeResult) {.is_valid = false, .err =LEXER_WRONG_SYNTAX};
|
||||||
}
|
}
|
||||||
ASTNode new_node = {
|
ASTNode new_node = {
|
||||||
.type = NODE_BINARY_OP,
|
.type = NODE_BINARY_OP,
|
||||||
@@ -45,24 +46,23 @@ LexerErr tokenize(const char *input, ASTNodeArray *out) {
|
|||||||
.data.binary.left = NULL,
|
.data.binary.left = NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
ASTNodeArray_push(&arr, new_node);
|
arraylist_push_back(arr, &new_node);
|
||||||
state = WAIT_FOR_NUMBER;
|
state = WAIT_FOR_NUMBER;
|
||||||
} else if (isspace(current)) {
|
} else if (isspace(current)) {
|
||||||
// Nothing...
|
// Nothing...
|
||||||
} else {
|
} else {
|
||||||
ASTNodeArray_free(&arr);
|
arraylist_destroy(&arr);
|
||||||
return LEXER_NOT_RECOGNIZED_SYMBOL;
|
return (TokenizeResult) {.is_valid = false, .err = LEXER_NOT_RECOGNIZED_SYMBOL};
|
||||||
}
|
}
|
||||||
|
|
||||||
offset++;
|
offset++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (arr.len < 1) {
|
if (arraylist_size(arr) < 1) {
|
||||||
return LEXER_EMPTY_INPUT;
|
return (TokenizeResult) {.is_valid = false, .err = LEXER_EMPTY_INPUT};
|
||||||
}
|
}
|
||||||
|
|
||||||
*out = arr;
|
return (TokenizeResult) {.is_valid = true, .arr = arr};
|
||||||
return LEXER_OK;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// CURRENTLY, it only supports ints, not clear how floating
|
// CURRENTLY, it only supports ints, not clear how floating
|
||||||
|
|||||||
Reference in New Issue
Block a user