From: Alexander Goussas Date: Mon, 13 Apr 2026 02:43:13 +0000 (-0500) Subject: start parsing markdown bodies X-Git-Tag: v0.0.1~12 X-Git-Url: http://git.frustrated-labs.net/?a=commitdiff_plain;h=a428c8dfca79d546eef3a2ba423f3bfb5100e4c1;p=frustrated-functor.dev.git start parsing markdown bodies --- diff --git a/bin/blog-processor/src/markdown_parser.zig b/bin/blog-processor/src/markdown_parser.zig index e909dba..738d021 100644 --- a/bin/blog-processor/src/markdown_parser.zig +++ b/bin/blog-processor/src/markdown_parser.zig @@ -7,36 +7,88 @@ const MarkdownParserError = error{ UnexpectedToken, }; +const MarkdownNode = union(enum) { + h1: []const u8, + h2: []const u8, + p: []const u8, +}; + pub const MarkdownDoc = struct { date: []const u8, summary: []const u8, - content: []const u8, + content: std.ArrayList(MarkdownNode), - pub fn parse(doc: []const u8, alloc: std.mem.Allocator) MarkdownParserError!@This() { - _ = alloc; + pub fn deinit(self: *@This(), alloc: std.mem.Allocator) void { + self.content.deinit(alloc); + } + pub fn parse(doc: []const u8, alloc: std.mem.Allocator) !@This() { var start: usize = 0; var current: usize = 0; var date: []const u8 = undefined; var summary: []const u8 = undefined; + var nodes: std.ArrayList(MarkdownNode) = .empty; while (current < doc.len) { start = current; const c = doc[current]; switch (c) { '-' => try parse_frontmatter(¤t, doc, &date, &summary), - else => current += 1, + else => try parse_body(alloc, doc, ¤t, &nodes), } } return .{ .date = date, .summary = summary, - .content = undefined + .content = nodes, }; } + fn parse_body(alloc: std.mem.Allocator, doc: []const u8, current: *usize, nodes: *std.ArrayList(MarkdownNode)) !void { + advanceWhileFn(doc, current, std.ascii.isWhitespace); + + while (current.* < doc.len) { + switch (doc[current.*]) { + '#' => { + if (current.* + 1 < doc.len and doc[current.* + 1] == '#') { + try parse_header_2(doc, current, nodes); + } else { + try parse_header_1(alloc, doc, current, nodes); + } + }, + else => try parse_paragraph(doc, current, nodes), + } + } + } + + fn parse_header_1(alloc: std.mem.Allocator, doc: []const u8, current: *usize, nodes: *std.ArrayList(MarkdownNode)) !void { + try expectToken(doc, '#', current); + advanceWhile(doc, current, ' '); + + const start = current.*; + + advanceWhileNot(doc, current, '\n'); + try nodes.append(alloc, .{ .h1 = doc[start..current.*] }); + + if (current.* < doc.len and doc[current.*] == '\n') { + current.* += 1; + } + } + + fn parse_header_2(doc: []const u8, current: *usize, nodes: *std.ArrayList(MarkdownNode)) !void { + _ = doc; + _ = current; + _ = nodes; + } + + fn parse_paragraph(doc: []const u8, current: *usize, nodes: *std.ArrayList(MarkdownNode)) !void { + _ = doc; + _ = current; + _ = nodes; + } + fn parse_frontmatter( current: *usize, doc: []const u8, @@ -72,13 +124,12 @@ pub const MarkdownDoc = struct { } fn expectToken(doc: []const u8, c: u8, current: *usize) MarkdownParserError!void { - if (doc[current.*] != c) { - const errMsg = "Expected '" ++ [1]u8{c} ++ "', but got '" ++ [1]u8{doc[current.*]} ++ "'"; + if (current.* < doc.len and doc[current.*] != c) { if (!@inComptime()) { - logger.err(errMsg, .{}); + logger.err("Expected '{}' but got '{}'", .{c, doc[current.*]}); return error.UnexpectedToken; } else { - @compileError(errMsg); + @compileError("Expected '" ++ [1]u8{c} ++ "', but got '" ++ [1]u8{doc[current.*]} ++ "'"); } } assert(doc[current.*] == c); @@ -133,3 +184,22 @@ test "can parse summary in frontmatter" { try std.testing.expectEqualStrings("This is the shit!", result.summary); } + +test "can parse h1 in body without newline at end" { + const doc = + \\---- + \\date: 12/04/2026 + \\summary: This is the shit! + \\---- + \\ + \\# The Post's Title + ; + + const alloc = std.testing.allocator; + + var result = MarkdownDoc.parse(doc, alloc) catch unreachable; + defer result.deinit(alloc); + + try std.testing.expectEqual(1, result.content.items.len); + try std.testing.expectEqualStrings("The Post's Title", result.content.items[0].h1); +}