From: Alexander Goussas Date: Fri, 1 Nov 2024 12:50:41 +0000 (-0500) Subject: fix: tokenize eol X-Git-Url: http://git.frustrated-labs.net/?a=commitdiff_plain;h=a1a10c7bae3c6c9dc7e7a37a5880a460942f20f9;p=so-test.git fix: tokenize eol --- diff --git a/assets/batch-mode.png b/assets/batch-mode.png new file mode 100644 index 0000000..afd335b Binary files /dev/null and b/assets/batch-mode.png differ diff --git a/assets/interactive-mode.png b/assets/interactive-mode.png new file mode 100644 index 0000000..03c8e85 Binary files /dev/null and b/assets/interactive-mode.png differ diff --git a/lexer.c b/lexer.c index bedc58f..818a6bc 100644 --- a/lexer.c +++ b/lexer.c @@ -5,81 +5,77 @@ #include static so_token so_lexer_string(so_lexer *); + static so_token so_lexer_integer(so_lexer *); + static so_token so_lexer_bareword(so_lexer *); -void so_token_deinit(so_token *t) -{ +void so_token_deinit(so_token *t) { free(t->lexeme); t->lexeme = NULL; t->type = SO_TT_INVALID; } -void so_token_type_to_string(so_token_type tt, char buffer[], int size) -{ - switch (tt) - { - case SO_TT_USE: - strncpy(buffer, "USE", size); - break; - case SO_TT_CALL: - strncpy(buffer, "CALL", size); - break; - case SO_TT_BARE: - strncpy(buffer, "BAREWORD", size); - break; - case SO_TT_STRING: - strncpy(buffer, "STRING", size); - break; - case SO_TT_INTEGER: - strncpy(buffer, "INTEGER", size); - break; - case SO_TT_EOF: - strncpy(buffer, "EOF", size); - break; - case SO_TT_INVALID: - strncpy(buffer, "INVALID", size); - break; +void so_token_type_to_string(so_token_type tt, char buffer[], int size) { + switch (tt) { + case SO_TT_USE: + strncpy(buffer, "USE", size); + break; + case SO_TT_CALL: + strncpy(buffer, "CALL", size); + break; + case SO_TT_BARE: + strncpy(buffer, "BAREWORD", size); + break; + case SO_TT_STRING: + strncpy(buffer, "STRING", size); + break; + case SO_TT_INTEGER: + strncpy(buffer, "INTEGER", size); + break; + case SO_TT_EOL: + strncpy(buffer, "EOL", size); + break; + case SO_TT_EOF: + strncpy(buffer, "EOF", size); + break; + case SO_TT_INVALID: + strncpy(buffer, "INVALID", size); + break; } } -static char so_lexer_advance(so_lexer *l) -{ +static char so_lexer_advance(so_lexer *l) { if (l->current >= l->source_length) return '\0'; return l->source[l->current++]; } -static char so_lexer_peek(so_lexer *l) -{ +static char so_lexer_peek(so_lexer *l) { if (l->current >= l->source_length) return '\0'; return l->source[l->current]; } -static int so_lexer_eof(so_lexer *l) -{ +static int so_lexer_eof(so_lexer *l) { return so_lexer_peek(l) == '\0'; } -void so_lexer_init(so_lexer *l, const char *source) -{ +void so_lexer_init(so_lexer *l, const char *source) { l->current = 0; l->start = 0; l->source_length = strlen(source); l->source = source; } -void so_lexer_deinit(so_lexer *l) -{ +void so_lexer_deinit(so_lexer *l) { l->current = 0; l->start = 0; l->source_length = 0; l->source = NULL; } -so_token so_lexer_next_token(so_lexer *l) -{ +so_token so_lexer_next_token(so_lexer *l) { l->start = l->current; char c = so_lexer_advance(l); @@ -87,35 +83,33 @@ so_token so_lexer_next_token(so_lexer *l) if (c == '\0') return (so_token){.lexeme = strdup("eof"), .type = SO_TT_EOF}; - switch (c) - { - case '"': - return so_lexer_string(l); - case '\n': - case ' ': - return so_lexer_next_token(l); - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - return so_lexer_integer(l); - default: - return so_lexer_bareword(l); + switch (c) { + case '"': + return so_lexer_string(l); + case '\n': + return (so_token){.lexeme = strdup("eol"), .type = SO_TT_EOL}; + case ' ': + return so_lexer_next_token(l); + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return so_lexer_integer(l); + default: + return so_lexer_bareword(l); } } -so_token so_lexer_string(so_lexer *l) -{ +so_token so_lexer_string(so_lexer *l) { while (!so_lexer_eof(l) && so_lexer_peek(l) != '"') so_lexer_advance(l); - if (so_lexer_peek(l) != '"') - { + if (so_lexer_peek(l) != '"') { fprintf(stderr, "warning: unterminated string literal at %d\n", l->current); return (so_token){.lexeme = strdup("invalid"), .type = SO_TT_INVALID}; } @@ -129,8 +123,7 @@ so_token so_lexer_string(so_lexer *l) return (so_token){.lexeme = lexeme, .type = SO_TT_STRING}; } -so_token so_lexer_integer(so_lexer *l) -{ +so_token so_lexer_integer(so_lexer *l) { while (!so_lexer_eof(l) && so_lexer_peek(l) >= '0' && so_lexer_peek(l) <= '9') so_lexer_advance(l); @@ -141,8 +134,7 @@ so_token so_lexer_integer(so_lexer *l) return (so_token){.lexeme = lexeme, .type = SO_TT_INTEGER}; } -so_token so_lexer_bareword(so_lexer *l) -{ +so_token so_lexer_bareword(so_lexer *l) { while (!so_lexer_eof(l) && !isspace(so_lexer_peek(l))) so_lexer_advance(l); diff --git a/lexer.h b/lexer.h index cc140fe..9df2c72 100644 --- a/lexer.h +++ b/lexer.h @@ -1,21 +1,20 @@ #ifndef lexer_h__ #define lexer_h__ -typedef enum -{ +typedef enum { SO_TT_USE, SO_TT_CALL, SO_TT_BARE, SO_TT_STRING, SO_TT_INTEGER, + SO_TT_EOL, SO_TT_EOF, SO_TT_INVALID, } so_token_type; void so_token_type_to_string(so_token_type tt, char buffer[], int size); -typedef struct -{ +typedef struct { so_token_type type; char *lexeme; } so_token; @@ -23,8 +22,7 @@ typedef struct /// @brief Deinitialize resources for the provided token. void so_token_deinit(so_token *); -typedef struct -{ +typedef struct { int start; int current; int source_length; diff --git a/main.c b/main.c index 4ad6192..236df64 100644 --- a/main.c +++ b/main.c @@ -1,4 +1,5 @@ #include +#include #include #include "interpreter.h" @@ -25,8 +26,48 @@ int repl() { return 0; } +int file(const char *filename) { + FILE *fp = fopen(filename, "r"); + if (!fp) { + fprintf(stderr, "error: could not read file: %s\n", filename); + return -1; + } + + fseek(fp, 0, SEEK_END); + long size = ftell(fp); + rewind(fp); + + char *contents = malloc(size + 1); + int nread = fread(contents, sizeof(char), size, fp); + + if (nread < size) { + fprintf(stderr, "error: read less than %ld bytes\n", size); + return -1; + } + + contents[nread] = '\0'; + + so_interpreter interp; + so_interpreter_init(&interp); + so_interpreter_run(&interp, contents); + so_interpreter_deinit(&interp); + + free(contents); + fclose(fp); + + return 0; +} + int main(int argc, char **argv) { if (argc == 1) { return repl(); } + + if (argc == 2) { + char *filename = argv[1]; + return file(filename); + } + + fprintf(stderr, "usage: sotest [filename]\n"); + return -1; } diff --git a/parser.c b/parser.c index b01276c..b5a609f 100644 --- a/parser.c +++ b/parser.c @@ -5,80 +5,73 @@ #include static void so_parse_call(so_parser *); + static void so_parse_load(so_parser *); + static so_expr *so_parser_parse_simple_expression(so_parser *); -static so_token so_parser_advance(so_parser *p) -{ +static so_token so_parser_advance(so_parser *p) { p->current = so_lexer_next_token(&p->lexer); return p->current; } -static int so_parser_expect(so_parser *p, so_token_type tt) -{ - if (p->current.type != tt) - { +static int so_parser_expect(so_parser *p, so_token_type tt) { + if (p->current.type != tt) { char buffer[20] = {0}; so_token_type_to_string(tt, buffer, sizeof(buffer)); - fprintf(stderr, "warning: expected a different type of token: %s\n", buffer); + fprintf(stderr, "warning: expected a token of type: %s\n", buffer); return 0; } return 1; } -static void add_command(so_parser *p, so_expr *e) -{ - if (p->ncommands >= MAX_COMMANDS) - { +static void add_command(so_parser *p, so_expr *e) { + if (p->ncommands >= MAX_COMMANDS) { fprintf(stderr, "warning: maximum number of commands reached\n"); return; } p->commands[p->ncommands++] = e; } -void so_parser_init(so_parser *p, const char *source) -{ +void so_parser_init(so_parser *p, const char *source) { p->source = source; p->ncommands = 0; so_lexer_init(&p->lexer, source); } -void so_parser_deinit(so_parser *p) -{ +void so_parser_deinit(so_parser *p) { p->source = NULL; p->ncommands = 0; memset(p->commands, 0, MAX_COMMANDS * sizeof(so_expr *)); so_lexer_deinit(&p->lexer); } -void so_parser_parse(so_parser *p) -{ - while (so_parser_advance(p).type != SO_TT_EOF) - { - switch (p->current.type) - { - case SO_TT_USE: - so_parse_load(p); - break; - case SO_TT_CALL: - so_parse_call(p); - break; - case SO_TT_EOF: - so_token_deinit(&p->current); - return; - default: - fprintf(stderr, "warning: invalid start of expression, expected use or call: %s\n", p->current.lexeme); - so_token_deinit(&p->current); +void so_parser_parse(so_parser *p) { + while (so_parser_advance(p).type != SO_TT_EOF) { + switch (p->current.type) { + case SO_TT_USE: + so_parse_load(p); + break; + case SO_TT_CALL: + so_parse_call(p); + break; + case SO_TT_EOL: // Happens on an empty line + so_token_deinit(&p->current); + break; + case SO_TT_EOF: // Happens at end of input + so_token_deinit(&p->current); + return; + default: + fprintf(stderr, "warning: invalid start of expression, expected use or call: %s\n", p->current.lexeme); + so_token_deinit(&p->current); } } so_token_deinit(&p->current); } -void so_parse_call(so_parser *p) -{ - if (!so_parser_expect(p, SO_TT_CALL)) - { +void so_parse_call(so_parser *p) { + if (!so_parser_expect(p, SO_TT_CALL)) { so_token_deinit(&p->current); return; } @@ -86,8 +79,7 @@ void so_parse_call(so_parser *p) so_token_deinit(&p->current); so_parser_advance(p); - if (!so_parser_expect(p, SO_TT_BARE)) - { + if (!so_parser_expect(p, SO_TT_BARE)) { so_token_deinit(&p->current); return; } @@ -96,12 +88,15 @@ void so_parse_call(so_parser *p) so_expr *args[MAX_CALL_ARGS]; int nargs = 0; - while (so_parser_advance(p).type != SO_TT_EOF && nargs < MAX_CALL_ARGS) - { + while (so_parser_advance(p).type != SO_TT_EOF && p->current.type != SO_TT_EOL && nargs < MAX_CALL_ARGS) { so_expr *e = so_parser_parse_simple_expression(p); - if (e == NULL) - { - fprintf(stderr, "warning: expected either a string or a number\n"); + if (e == NULL) { + char type[10] = {0}; + so_token_type_to_string(p->current.type, type, sizeof(type)); + fprintf( + stderr, + "warning: expected either a string or a number, but got %s\n", + type); so_token_deinit(&p->current); continue; } @@ -109,9 +104,8 @@ void so_parse_call(so_parser *p) args[nargs++] = e; } - if (!so_parser_expect(p, SO_TT_EOF)) - { - fprintf(stderr, "warning: expected EOF"); + if (!so_parser_expect(p, SO_TT_EOL)) { + fprintf(stderr, "warning: expected an end of line\n"); so_token_deinit(&p->current); return; } @@ -123,10 +117,8 @@ void so_parse_call(so_parser *p) so_token_deinit(&name); } -void so_parse_load(so_parser *p) -{ - if (!so_parser_expect(p, SO_TT_USE)) - { +void so_parse_load(so_parser *p) { + if (!so_parser_expect(p, SO_TT_USE)) { so_token_deinit(&p->current); return; } @@ -134,32 +126,39 @@ void so_parse_load(so_parser *p) so_token use = p->current; so_parser_advance(p); - if (!so_parser_expect(p, SO_TT_STRING)) - { + if (!so_parser_expect(p, SO_TT_STRING)) { so_token_deinit(&use); so_token_deinit(&p->current); return; } - so_expr *e = create_load_node(p->current.lexeme); + so_token name = p->current; + so_parser_advance(p); + + if (!so_parser_expect(p, SO_TT_EOL)) { + so_token_deinit(&use); + so_token_deinit(&name); + so_token_deinit(&p->current); + return; + } + + so_expr *e = create_load_node(name.lexeme); add_command(p, e); so_token_deinit(&use); + so_token_deinit(&name); so_token_deinit(&p->current); } -so_expr *so_parser_parse_simple_expression(so_parser *p) -{ +so_expr *so_parser_parse_simple_expression(so_parser *p) { so_expr *e = NULL; - if (p->current.type == SO_TT_STRING) - { + if (p->current.type == SO_TT_STRING) { e = create_string_node(p->current.lexeme); so_token_deinit(&p->current); } - if (p->current.type == SO_TT_INTEGER) - { + if (p->current.type == SO_TT_INTEGER) { int n = atoi(p->current.lexeme); e = create_number_node(n); so_token_deinit(&p->current); diff --git a/test.sc b/test.sc new file mode 100644 index 0000000..c2f18d9 --- /dev/null +++ b/test.sc @@ -0,0 +1,8 @@ +use "lib/libm.so.6" +use "lib/libc.so.6" + +call puts "Calling abs" + +call abs 12 + +call puts "abs finished"