]> git.frustrated-labs.net Git - so-test.git/commitdiff
feat: lex strings
authorAlexander Goussas <[email protected]>
Thu, 31 Oct 2024 02:16:18 +0000 (21:16 -0500)
committerAlexander Goussas <[email protected]>
Thu, 31 Oct 2024 02:16:18 +0000 (21:16 -0500)
.gitignore [new file with mode: 0644]
CMakeLists.txt [new file with mode: 0644]
README.md [new file with mode: 0644]
lexer.c [new file with mode: 0644]
lexer.h [new file with mode: 0644]
main.c [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..d6536ba
--- /dev/null
@@ -0,0 +1,2 @@
+build
+compile_commands.json
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644 (file)
index 0000000..48a3c9b
--- /dev/null
@@ -0,0 +1,8 @@
+cmake_minimum_required(VERSION 3.18)
+
+project(sotest)
+
+set(CMAKE_C_STANDARD 11)
+
+add_executable(sotest main.c lexer.c)
+target_compile_options(sotest PRIVATE -Wall -Wextra)
diff --git a/README.md b/README.md
new file mode 100644 (file)
index 0000000..f6b4e7a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,9 @@
+# load-so
+
+A tiny interpreter for loading and testing shared libraries.
+
+## Installation
+
+## Usage
+
+## Demo
diff --git a/lexer.c b/lexer.c
new file mode 100644 (file)
index 0000000..a6c54ec
--- /dev/null
+++ b/lexer.c
@@ -0,0 +1,109 @@
+#include "lexer.h"
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+static so_token so_lexer_string(so_lexer *);
+static so_token so_lexer_integer(so_lexer *);
+static so_token so_lexer_bareword(so_lexer *);
+
+void so_token_deinit(so_token *t)
+{
+    free(t->lexeme);
+    t->lexeme = NULL;
+    t->type = SO_TT_INVALID;
+}
+
+static char so_lexer_advance(so_lexer *l)
+{
+    if (l->current >= l->source_length)
+        return '\0';
+    return l->source[l->current++];
+}
+
+static char so_lexer_peek(so_lexer *l)
+{
+    if (l->current >= l->source_length)
+        return '\0';
+    return l->source[l->current];
+}
+
+static int so_lexer_eof(so_lexer *l)
+{
+    return so_lexer_peek(l) == '\0';
+}
+
+void so_lexer_init(so_lexer *l, const char *source)
+{
+    l->current = 0;
+    l->start = 0;
+    l->source_length = strlen(source);
+    l->source = source;
+}
+
+void so_lexer_deinit(so_lexer *l)
+{
+    l->current = 0;
+    l->start = 0;
+    l->source_length = 0;
+    l->source = NULL;
+}
+
+so_token so_lexer_next_token(so_lexer *l)
+{
+    char c = so_lexer_advance(l);
+
+    if (c == '\0')
+        return (so_token){.lexeme = strdup("eof"), .type = SO_TT_EOF};
+
+    l->start = l->current;
+
+    switch (c)
+    {
+    case '"':
+        return so_lexer_string(l);
+    case '\n':
+    case ' ':
+        return so_lexer_next_token(l);
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+        return so_lexer_integer(l);
+    default:
+        return so_lexer_bareword(l);
+    }
+}
+
+so_token so_lexer_string(so_lexer *l)
+{
+    while (!so_lexer_eof(l) && so_lexer_peek(l) != '"')
+        so_lexer_advance(l);
+
+    if (so_lexer_peek(l) != '"')
+    {
+        fprintf(stderr, "Unterminated string literal at %d\n", l->current);
+        return (so_token){.lexeme = strdup("invalid"), .type = SO_TT_INVALID};
+    }
+
+    char *lexeme = strndup(
+        &l->source[l->start],
+        l->current - l->start);
+
+    so_lexer_advance(l);
+
+    return (so_token){.lexeme = lexeme, .type = SO_TT_STRING};
+}
+
+so_token so_lexer_integer(so_lexer *l)
+{
+}
+
+so_token so_lexer_bareword(so_lexer *l)
+{
+}
diff --git a/lexer.h b/lexer.h
new file mode 100644 (file)
index 0000000..8aafe79
--- /dev/null
+++ b/lexer.h
@@ -0,0 +1,45 @@
+#ifndef lexer_h__
+#define lexer_h__
+
+typedef enum
+{
+    SO_TT_USE,
+    SO_TT_CALL,
+    SO_TT_BARE,
+    SO_TT_STRING,
+    SO_TT_INTEGER,
+    SO_TT_EOF,
+    SO_TT_INVALID,
+} so_token_type;
+
+typedef struct
+{
+    so_token_type type;
+    char *lexeme;
+} so_token;
+
+/// @brief Deinitialize resources for the provided token.
+void so_token_deinit(so_token *);
+
+typedef struct
+{
+    int start;
+    int current;
+    int source_length;
+    const char *source;
+} so_lexer;
+
+/// @brief Initialize the lexer.
+/// @param
+void so_lexer_init(so_lexer *, const char *source);
+
+/// @brief Deinitialize the lexer, after which it will be unusable.
+/// @param
+void so_lexer_deinit(so_lexer *);
+
+/// @brief  Lex the next token in the input.
+/// @param
+/// @return
+so_token so_lexer_next_token(so_lexer *);
+
+#endif
diff --git a/main.c b/main.c
new file mode 100644 (file)
index 0000000..824ac19
--- /dev/null
+++ b/main.c
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include <string.h>
+#include "lexer.h"
+
+int repl()
+{
+    char buffer[4096] = {0};
+
+    printf("> ");
+
+    while (fgets(buffer, sizeof(buffer), stdin) != NULL)
+    {
+        if (strcmp(buffer, ".quit\n") == 0)
+            return 0;
+
+        so_lexer lexer;
+        so_lexer_init(&lexer, buffer);
+
+        so_token token;
+        while ((token = so_lexer_next_token(&lexer)).type != SO_TT_EOF)
+        {
+            printf("%s\n", token.lexeme);
+            so_token_deinit(&token);
+        }
+
+        so_token_deinit(&token);
+
+        memset(buffer, 0, sizeof(buffer));
+        printf("> ");
+    }
+
+    return 0;
+}
+
+int main(int argc, char **argv)
+{
+    if (argc == 1)
+    {
+        return repl();
+    }
+}