summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorStefan Weigl-Bosker <stefan@s00.xyz>2024-12-20 17:22:33 -0500
committerStefan Weigl-Bosker <stefan@s00.xyz>2024-12-20 17:22:33 -0500
commit32d0ac56c27719c6fcde70c51238f11cf5f44141 (patch)
treeda490e13f07be4573a0530c184fd873dc9c824dc /src
parentdca73fef741a57c8cc1913def98a305c2b709391 (diff)
downloadlg-32d0ac56c27719c6fcde70c51238f11cf5f44141.tar.gz
regex parsing
Diffstat (limited to 'src')
-rw-r--r--src/main.zig37
-rw-r--r--src/parser.zig153
-rw-r--r--src/regex.zig60
3 files changed, 229 insertions, 21 deletions
diff --git a/src/main.zig b/src/main.zig
index 8964c3c..f28ab18 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -9,26 +9,23 @@ pub fn main() !void {
defer _ = gpa.deinit();
const lexer = try allocator.create(Regex.Lexer);
+
lexer.* = Regex.Lexer.init("abc|123[a-Z]()+", &allocator);
- defer allocator.destroy(lexer);
-
- while (true) {
- const token = lexer.advance() catch |err| switch (err) {
- Regex.Lexer.Error.EndOfBuffer => {
- break;
- },
- else => {
- return err;
- },
- };
-
- try stdio.print("{}\n", .{token});
-
- if (token.value) |v| {
- switch (v) {
- .Class => |*rl| rl.deinit(),
- else => {},
- }
- }
+
+ const tl = try lexer.scan();
+
+ var it = tl.first;
+ while (it) |node| : (it = node.next) {
+ try stdio.print("{}\n", .{&(node.data)});
}
+
+ const parser = try allocator.create(Regex.Parser);
+
+ parser.* = Regex.Parser.init(&lexer.tokens, &allocator);
+
+ _ = try parser.parseRe();
+
+ lexer.deinit();
+ allocator.destroy(lexer);
+ allocator.destroy(parser);
}
diff --git a/src/parser.zig b/src/parser.zig
new file mode 100644
index 0000000..49fcdb6
--- /dev/null
+++ b/src/parser.zig
@@ -0,0 +1,153 @@
+const std = @import("std");
+const Token = @import("regex.zig").Token;
+
+const TokenList = std.SinglyLinkedList(Token);
+
+pub const Rule = enum { Terminal, Re, Rer, Cat, Catr, E, L };
+
+pub const ParseTree = struct {
+ rule: Rule,
+ token: ?Token,
+ sibling: ?*ParseTree,
+ child: ?*ParseTree,
+
+ pub fn init(rule: Rule, allocator: *std.mem.Allocator) !*ParseTree {
+ const t = try allocator.create(ParseTree);
+
+ t.* = .{
+ .rule = rule,
+ .token = null,
+ .sibling = null,
+ .child = null,
+ };
+
+ return t;
+ }
+
+ pub fn appendChild(self: *ParseTree, child: *ParseTree) void {
+ if (self.child) |c| {
+ var p = c;
+ while (p.sibling) |sibling| {
+ p = sibling;
+ }
+ p.sibling = child;
+ } else {
+ self.child = child;
+ }
+ }
+};
+
+pub const Parser = struct {
+ tokens: *const TokenList,
+ itr: ?*TokenList.Node,
+ allocator: *std.mem.Allocator,
+
+ pub const Error = error{ UnexpectedToken, ExpectedToken };
+
+ pub fn init(tl: *const TokenList, allocator: *std.mem.Allocator) Parser {
+ return .{ .tokens = tl, .allocator = allocator, .itr = tl.first.? };
+ }
+
+ pub fn expectToken(self: *Parser, kind: Token.Kind) !*ParseTree {
+ if (self.itr) |node| {
+ if (node.data.kind == kind) {
+ const t = try ParseTree.init(Rule.Terminal, self.allocator);
+
+ t.token = node.data;
+
+ self.itr = node.next;
+
+ return t;
+ } else {
+ std.debug.print("{d} | error: expected token of kind {s} but found token of kind {s}", .{ node.data.pos, @tagName(kind), @tagName(node.data.kind) });
+ return Error.UnexpectedToken;
+ }
+ } else {
+ std.debug.print("error: expected token of kind {s} but reached end of stream!\n", .{@tagName(kind)}); // should use actual logging eventually
+ return Error.ExpectedToken;
+ }
+ }
+
+ pub fn parseRe(self: *Parser) error{ ExpectedToken, UnexpectedToken, OutOfMemory }!*ParseTree {
+ const t = try ParseTree.init(Rule.Re, self.allocator);
+
+ t.appendChild(try self.parseCat());
+ t.appendChild(try self.parseRer());
+
+ return t;
+ }
+
+ pub fn parseRer(self: *Parser) !*ParseTree {
+ const t = try ParseTree.init(Rule.Rer, self.allocator);
+
+ if (self.itr) |_| {
+ t.appendChild(try self.expectToken(Token.Kind.Or));
+ t.appendChild(try self.parseCat());
+ } // else -> epsilon
+
+ return t;
+ }
+
+ pub fn parseCat(self: *Parser) !*ParseTree {
+ const t = try ParseTree.init(Rule.Cat, self.allocator);
+
+ t.appendChild(try self.parseE());
+ t.appendChild(try self.parseCatr());
+
+ return t;
+ }
+
+ pub fn parseCatr(self: *Parser) !*ParseTree {
+ const t = try ParseTree.init(Rule.Catr, self.allocator);
+
+ if (self.itr) |_| {
+ t.appendChild(try self.parseE());
+ t.appendChild(try self.parseCatr());
+ }
+
+ return t;
+ }
+
+ pub fn parseE(self: *Parser) !*ParseTree {
+ const t = try ParseTree.init(Rule.E, self.allocator);
+
+ t.appendChild(try self.parseL());
+
+ if (self.itr) |node| {
+ switch (node.data.kind) {
+ .Plus, .Question, .Star => {
+ t.appendChild(try self.expectToken(node.data.kind)); // bruh
+ },
+ else => {},
+ }
+ } else {
+ return Error.ExpectedToken;
+ }
+
+ return t;
+ }
+
+ pub fn parseL(self: *Parser) !*ParseTree {
+ const t = try ParseTree.init(Rule.L, self.allocator);
+
+ if (self.itr) |node| {
+ switch (node.data.kind) {
+ .Literal, .Class, .Dot => {
+ t.appendChild(try self.expectToken(node.data.kind)); // bruh
+ },
+ .LParen => {
+ t.appendChild(try self.expectToken(Token.Kind.LParen));
+ t.appendChild(try self.parseRe());
+ t.appendChild(try self.expectToken(Token.Kind.RParen));
+ },
+ else => {
+ return Error.UnexpectedToken;
+ },
+ }
+ } else {
+ return Error.ExpectedToken;
+ }
+
+ return t;
+ }
+};
diff --git a/src/regex.zig b/src/regex.zig
index a2f9c79..de24dc0 100644
--- a/src/regex.zig
+++ b/src/regex.zig
@@ -1,4 +1,6 @@
const std = @import("std");
+pub const Parser = @import("parser.zig").Parser;
+pub const ParseTree = @import("parser.zig").ParseTree;
pub const Token = struct {
pub const Kind = enum { Literal, LParen, RParen, Class, Dot, Star, Plus, Question, Or };
@@ -25,6 +27,15 @@ pub const Token = struct {
try writer.print(" }}", .{});
}
+
+ pub fn deinit(self: *const Token) void {
+ if (self.value) |val| {
+ switch (val) {
+ .Class => |*rl| rl.deinit(),
+ else => {},
+ }
+ }
+ }
};
pub const Lexer = struct {
@@ -32,11 +43,27 @@ pub const Lexer = struct {
cursor: usize,
regexp: []const u8,
allocator: *std.mem.Allocator,
+ tokens: std.SinglyLinkedList(Token),
pub const Error = error{ EndOfBuffer, InvalidCharacter, UnexpectedCharater };
pub fn init(regexp: []const u8, allocator: *std.mem.Allocator) Lexer {
- return .{ .cursor = 0, .start = 0, .regexp = regexp, .allocator = allocator };
+ return .{ .cursor = 0, .start = 0, .regexp = regexp, .allocator = allocator, .tokens = std.SinglyLinkedList(Token){ .first = null } };
+ }
+
+ pub fn deinit(self: *Lexer) void {
+ var it = self.tokens.first;
+
+ while (it) |node| {
+ if (node.data.value) |val| {
+ switch (val) {
+ .Class => |*rl| rl.deinit(),
+ else => {},
+ }
+ }
+ it = node.next;
+ self.allocator.destroy(node);
+ }
}
fn readChar(self: *Lexer) !u8 {
@@ -47,6 +74,37 @@ pub const Lexer = struct {
return self.regexp[self.cursor];
}
+ pub fn scan(self: *Lexer) !std.SinglyLinkedList(Token) {
+ if (self.tokens.first) |_| {
+ return self.tokens;
+ } else {
+ self.tokens.first = try self.allocator.create(std.SinglyLinkedList(Token).Node);
+ }
+
+ var tail: ?*std.SinglyLinkedList(Token).Node = null;
+ var node = self.tokens.first;
+
+ while (true) {
+ const token: Token = self.advance() catch |err| switch (err) {
+ Error.EndOfBuffer => {
+ self.allocator.destroy(node.?);
+ if (tail) |t| {
+ t.next = null;
+ }
+ return self.tokens;
+ },
+ else => {
+ return err;
+ },
+ };
+
+ node.?.data = token;
+ node.?.next = try self.allocator.create(std.SinglyLinkedList(Token).Node);
+ tail = node;
+ node = node.?.next;
+ }
+ }
+
pub fn advance(self: *Lexer) !Token {
self.start = self.cursor;