start parsing markdown bodies

author Alexander Goussas <[email protected]>

Mon, 13 Apr 2026 02:43:13 +0000 (21:43 -0500)

committer Alexander Goussas <[email protected]>

Mon, 13 Apr 2026 02:43:13 +0000 (21:43 -0500)
author Alexander Goussas <[email protected]>
Mon, 13 Apr 2026 02:43:13 +0000 (21:43 -0500)
committer Alexander Goussas <[email protected]>
Mon, 13 Apr 2026 02:43:13 +0000 (21:43 -0500)
diff --git a/bin/blog-processor/src/markdown_parser.zig b/bin/blog-processor/src/markdown_parser.zig

index e909dba4d10a9f59457ab035e2e11b8ff7fd3f06..738d021447842f2fdbf78fe91c44786bf0813cfb 100644 (file)
--- a/bin/blog-processor/src/markdown_parser.zig
+++ b/bin/blog-processor/src/markdown_parser.zig
@@ -7,36 +7,88 @@ const MarkdownParserError = error{
      UnexpectedToken,
  };
  
+const MarkdownNode = union(enum) {
+    h1: []const u8,
+    h2: []const u8,
+    p: []const u8,
+};
+
  pub const MarkdownDoc = struct {
      date: []const u8,
      summary: []const u8,
-    content: []const u8,
+    content: std.ArrayList(MarkdownNode),
  
-    pub fn parse(doc: []const u8, alloc: std.mem.Allocator) MarkdownParserError!@This() {
-        _ = alloc;
+    pub fn deinit(self: *@This(), alloc: std.mem.Allocator) void {
+        self.content.deinit(alloc);
+    }
  
+    pub fn parse(doc: []const u8, alloc: std.mem.Allocator) !@This() {
          var start: usize = 0;
          var current: usize = 0;
  
          var date: []const u8 = undefined;
          var summary: []const u8 = undefined;
+        var nodes: std.ArrayList(MarkdownNode) = .empty;
  
          while (current < doc.len) {
              start = current;
              const c = doc[current];
              switch (c) {
                  '-' => try parse_frontmatter(&current, doc, &date, &summary),
-                else => current += 1,
+                else => try parse_body(alloc, doc, &current, &nodes),
              }
          }
  
          return .{
              .date = date,
              .summary = summary,
-            .content = undefined
+            .content = nodes,
          };
      }
  
+    fn parse_body(alloc: std.mem.Allocator, doc: []const u8, current: *usize, nodes: *std.ArrayList(MarkdownNode)) !void {
+        advanceWhileFn(doc, current, std.ascii.isWhitespace);
+
+        while (current.* < doc.len) {
+            switch (doc[current.*]) {
+                '#' => {
+                    if (current.* + 1 < doc.len and doc[current.* + 1] == '#') {
+                        try parse_header_2(doc, current, nodes);
+                    } else {
+                        try parse_header_1(alloc, doc, current, nodes);
+                    }
+                },
+                else => try parse_paragraph(doc, current, nodes),
+            }
+        }
+    }
+
+    fn parse_header_1(alloc: std.mem.Allocator, doc: []const u8, current: *usize, nodes: *std.ArrayList(MarkdownNode)) !void {
+        try expectToken(doc, '#', current);
+        advanceWhile(doc, current, ' ');
+
+        const start = current.*;
+
+        advanceWhileNot(doc, current, '\n');
+        try nodes.append(alloc, .{ .h1 = doc[start..current.*] });
+
+        if (current.* < doc.len and doc[current.*] == '\n') {
+            current.* += 1;
+        }
+    }
+
+    fn parse_header_2(doc: []const u8, current: *usize, nodes: *std.ArrayList(MarkdownNode)) !void {
+        _ = doc;
+        _ = current;
+        _ = nodes;
+    }
+
+    fn parse_paragraph(doc: []const u8, current: *usize, nodes: *std.ArrayList(MarkdownNode)) !void {
+        _ = doc;
+        _ = current;
+        _ = nodes;
+    }
+
      fn parse_frontmatter(
          current: *usize,
          doc: []const u8,
@@ -72,13 +124,12 @@ pub const MarkdownDoc = struct {
      }
  
      fn expectToken(doc: []const u8, c: u8, current: *usize) MarkdownParserError!void {
-        if (doc[current.*] != c) {
-            const errMsg = "Expected '" ++ [1]u8{c} ++ "', but got '" ++ [1]u8{doc[current.*]} ++ "'";
+        if (current.* < doc.len and doc[current.*] != c) {
              if (!@inComptime()) {
-                logger.err(errMsg, .{});
+                logger.err("Expected '{}' but got '{}'", .{c, doc[current.*]});
                  return error.UnexpectedToken;
              } else {
-                @compileError(errMsg);
+                @compileError("Expected '" ++ [1]u8{c} ++ "', but got '" ++ [1]u8{doc[current.*]} ++ "'");
              }
          }
          assert(doc[current.*] == c);
@@ -133,3 +184,22 @@ test "can parse summary in frontmatter" {
  
      try std.testing.expectEqualStrings("This is the shit!", result.summary);
  }
+
+test "can parse h1 in body without newline at end" {
+    const doc =
+        \\----
+        \\date: 12/04/2026
+        \\summary: This is the shit!
+        \\----
+        \\
+        \\# The Post's Title
+        ;
+
+    const alloc = std.testing.allocator;
+
+    var result = MarkdownDoc.parse(doc, alloc) catch unreachable;
+    defer result.deinit(alloc);
+
+    try std.testing.expectEqual(1, result.content.items.len);
+    try std.testing.expectEqualStrings("The Post's Title", result.content.items[0].h1);
+}
author	Alexander Goussas <[email protected]>
	Mon, 13 Apr 2026 02:43:13 +0000 (21:43 -0500)
committer	Alexander Goussas <[email protected]>
	Mon, 13 Apr 2026 02:43:13 +0000 (21:43 -0500)