summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Weigl-Bosker <stefan@s00.xyz>2024-12-14 18:10:39 -0500
committerStefan Weigl-Bosker <stefan@s00.xyz>2024-12-14 18:10:39 -0500
commit94b86481395cb2d2a594bb98a1380b9ddc8aa900 (patch)
treefb73afee136f0895392e7cc7eee3528c242883a8
downloadlg-94b86481395cb2d2a594bb98a1380b9ddc8aa900.tar.gz
inital commit
-rw-r--r--LICENSE16
-rw-r--r--build.zig55
-rw-r--r--build.zig.zon72
-rw-r--r--src/main.zig34
-rw-r--r--src/regex.zig200
5 files changed, 377 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..1909081
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,16 @@
+ISC License
+
+Copyright (c) 2024 Stefan Weigl-Bosker <stefan@s00.xyz>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+
diff --git a/build.zig b/build.zig
new file mode 100644
index 0000000..f7e28eb
--- /dev/null
+++ b/build.zig
@@ -0,0 +1,55 @@
+const std = @import("std");
+
+pub fn build(b: *std.Build) void {
+ const target = b.standardTargetOptions(.{});
+ const optimize = b.standardOptimizeOption(.{});
+
+ const exe = b.addExecutable(.{
+ .name = "lg",
+ .root_source_file = b.path("src/main.zig"),
+ .target = target,
+ .optimize = optimize,
+ });
+
+ // This declares intent for the executable to be installed into the
+ // standard location when the user invokes the "install" step (the default
+ // step when running `zig build`).
+ b.installArtifact(exe);
+
+ // This *creates* a Run step in the build graph, to be executed when another
+ // step is evaluated that depends on it. The next line below will establish
+ // such a dependency.
+ const run_cmd = b.addRunArtifact(exe);
+
+ // By making the run step depend on the install step, it will be run from the
+ // installation directory rather than directly from within the cache directory.
+ // This is not necessary, however, if the application depends on other installed
+ // files, this ensures they will be present and in the expected location.
+ run_cmd.step.dependOn(b.getInstallStep());
+
+ // This allows the user to pass arguments to the application in the build
+ // command itself, like this: `zig build run -- arg1 arg2 etc`
+ if (b.args) |args| {
+ run_cmd.addArgs(args);
+ }
+
+ // This creates a build step. It will be visible in the `zig build --help` menu,
+ // and can be selected like this: `zig build run`
+ // This will evaluate the `run` step rather than the default, which is "install".
+ const run_step = b.step("run", "Run the app");
+ run_step.dependOn(&run_cmd.step);
+
+ const exe_unit_tests = b.addTest(.{
+ .root_source_file = b.path("src/main.zig"),
+ .target = target,
+ .optimize = optimize,
+ });
+
+ const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests);
+
+ // Similar to creating the run step earlier, this exposes a `test` step to
+ // the `zig build --help` menu, providing a way for the user to request
+ // running the unit tests.
+ const test_step = b.step("test", "Run unit tests");
+ test_step.dependOn(&run_exe_unit_tests.step);
+}
diff --git a/build.zig.zon b/build.zig.zon
new file mode 100644
index 0000000..6d6fadb
--- /dev/null
+++ b/build.zig.zon
@@ -0,0 +1,72 @@
+.{
+ // This is the default name used by packages depending on this one. For
+ // example, when a user runs `zig fetch --save <url>`, this field is used
+ // as the key in the `dependencies` table. Although the user can choose a
+ // different name, most users will stick with this provided value.
+ //
+ // It is redundant to include "zig" in this name because it is already
+ // within the Zig package namespace.
+ .name = "lg",
+
+ // This is a [Semantic Version](https://semver.org/).
+ // In a future version of Zig it will be used for package deduplication.
+ .version = "0.0.0",
+
+ // This field is optional.
+ // This is currently advisory only; Zig does not yet do anything
+ // with this value.
+ //.minimum_zig_version = "0.11.0",
+
+ // This field is optional.
+ // Each dependency must either provide a `url` and `hash`, or a `path`.
+ // `zig build --fetch` can be used to fetch all dependencies of a package, recursively.
+ // Once all dependencies are fetched, `zig build` no longer requires
+ // internet connectivity.
+ .dependencies = .{
+ // See `zig fetch --save <url>` for a command-line interface for adding dependencies.
+ //.example = .{
+ // // When updating this field to a new URL, be sure to delete the corresponding
+ // // `hash`, otherwise you are communicating that you expect to find the old hash at
+ // // the new URL.
+ // .url = "https://example.com/foo.tar.gz",
+ //
+ // // This is computed from the file contents of the directory of files that is
+ // // obtained after fetching `url` and applying the inclusion rules given by
+ // // `paths`.
+ // //
+ // // This field is the source of truth; packages do not come from a `url`; they
+ // // come from a `hash`. `url` is just one of many possible mirrors for how to
+ // // obtain a package matching this `hash`.
+ // //
+ // // Uses the [multihash](https://multiformats.io/multihash/) format.
+ // .hash = "...",
+ //
+ // // When this is provided, the package is found in a directory relative to the
+ // // build root. In this case the package's hash is irrelevant and therefore not
+ // // computed. This field and `url` are mutually exclusive.
+ // .path = "foo",
+
+ // // When this is set to `true`, a package is declared to be lazily
+ // // fetched. This makes the dependency only get fetched if it is
+ // // actually used.
+ // .lazy = false,
+ //},
+ },
+
+ // Specifies the set of files and directories that are included in this package.
+ // Only files and directories listed here are included in the `hash` that
+ // is computed for this package. Only files listed here will remain on disk
+ // when using the zig package manager. As a rule of thumb, one should list
+ // files required for compilation plus any license(s).
+ // Paths are relative to the build root. Use the empty string (`""`) to refer to
+ // the build root itself.
+ // A directory listed here means that all files within, recursively, are included.
+ .paths = .{
+ "build.zig",
+ "build.zig.zon",
+ "src",
+ // For example...
+ //"LICENSE",
+ //"README.md",
+ },
+}
diff --git a/src/main.zig b/src/main.zig
new file mode 100644
index 0000000..8964c3c
--- /dev/null
+++ b/src/main.zig
@@ -0,0 +1,34 @@
+const std = @import("std");
+const Regex = @import("regex.zig");
+
+pub fn main() !void {
+ const stdio = std.io.getStdIn().writer();
+ var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+ var allocator = gpa.allocator();
+
+ defer _ = gpa.deinit();
+
+ const lexer = try allocator.create(Regex.Lexer);
+ lexer.* = Regex.Lexer.init("abc|123[a-Z]()+", &allocator);
+ defer allocator.destroy(lexer);
+
+ while (true) {
+ const token = lexer.advance() catch |err| switch (err) {
+ Regex.Lexer.Error.EndOfBuffer => {
+ break;
+ },
+ else => {
+ return err;
+ },
+ };
+
+ try stdio.print("{}\n", .{token});
+
+ if (token.value) |v| {
+ switch (v) {
+ .Class => |*rl| rl.deinit(),
+ else => {},
+ }
+ }
+ }
+}
diff --git a/src/regex.zig b/src/regex.zig
new file mode 100644
index 0000000..ed66df8
--- /dev/null
+++ b/src/regex.zig
@@ -0,0 +1,200 @@
+const std = @import("std");
+
+pub const Token = struct {
+ pub const Kind = enum { Literal, LParen, RParen, Class, Dot, Star, Plus, Question, Or };
+ pub const Value = union(Token.Kind) { Literal: u8, LParen, RParen, Class: RangeList, Dot, Star, Plus, Question, Or };
+
+ kind: Token.Kind,
+ value: ?Token.Value,
+ pos: usize,
+
+ pub fn format(self: *const Token, comptime fmt: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
+ if (fmt.len != 0) { // must be {} or {any}
+ return std.invalidFmtError(fmt, self);
+ }
+
+ try writer.print("{{ kind: {s}", .{@tagName(self.kind)});
+
+ if (self.value) |val| {
+ switch (val) {
+ .Literal => |literal| try writer.print(", value: {c}", .{literal}),
+ .Class => |rl| try writer.print(", value: {}", .{rl}),
+ else => {},
+ }
+ }
+
+ try writer.print(" }}", .{});
+ }
+};
+
+pub const Lexer = struct {
+ start: usize,
+ cursor: usize,
+ regexp: []const u8,
+ allocator: *std.mem.Allocator,
+
+ pub const Error = error{ EndOfBuffer, InvalidCharacter, UnexpectedCharater };
+
+ pub fn init(regexp: []const u8, allocator: *std.mem.Allocator) Lexer {
+ return .{ .cursor = 0, .start = 0, .regexp = regexp, .allocator = allocator };
+ }
+
+ fn readChar(self: *Lexer) !u8 {
+ if (self.cursor >= self.regexp.len) {
+ return Error.EndOfBuffer;
+ }
+
+ return self.regexp[self.cursor];
+ }
+
+ pub fn advance(self: *Lexer) !Token {
+ self.start = self.cursor;
+
+ var c = try self.readChar();
+
+ const inferred_type: Token.Kind = switch (c) {
+ '(' => .LParen,
+ ')' => .RParen,
+ '[' => .Class,
+ '.' => .Dot,
+ '*' => .Star,
+ '+' => .Plus,
+ '?' => .Question,
+ '|' => .Or,
+ else => .Literal,
+ };
+
+ switch (inferred_type) {
+ .Literal => {
+ if (c == '\\') {
+ self.cursor += 1;
+ c = try self.readChar(); // needs more verbose error handling
+ c = escapedChar(c);
+ }
+ self.cursor += 1;
+ return .{ .kind = inferred_type, .value = .{ .Literal = c }, .pos = self.start };
+ },
+ .Class => {
+ while (c != ']') { // needs more verbose error handling
+ if (self.cursor >= self.regexp.len) {
+ return Error.EndOfBuffer;
+ }
+ self.cursor += 1;
+ }
+ return .{ .kind = inferred_type, .value = .{ .Class = try RangeList.init(self.regexp[self.start + 1 .. self.cursor - 1], self.allocator) }, .pos = self.start };
+ },
+ else => {
+ self.cursor += 1;
+ return .{ .kind = inferred_type, .pos = self.start, .value = null };
+ },
+ }
+ }
+
+ fn escapedChar(c: u8) u8 {
+ return switch (c) {
+ 'n' => '\n', // placeholder for now
+ 't' => '\t',
+ 'r' => '\r',
+ else => c,
+ };
+ }
+};
+
+// could use something like a bst, but thinking about what regex classes people actually write makes me think that a list is more efficient
+pub const RangeList = struct {
+ negated: bool,
+ head: ?*Node,
+ allocator: *std.mem.Allocator,
+
+ const Node = struct {
+ range: struct { min: u8, max: u8 },
+ next: ?*Node,
+ };
+
+ // need to do more error handling
+ pub fn init(str: []const u8, allocator: *std.mem.Allocator) !RangeList {
+ var rl = RangeList{
+ .negated = false,
+ .head = null,
+ .allocator = allocator,
+ };
+
+ var i: usize = 0;
+
+ if (str.len == 0) {
+ return rl;
+ }
+
+ if (str[0] == '^') {
+ rl.negated = true;
+ i += 1;
+ }
+
+ var p = rl.head;
+ while (i < str.len) : (i += 1) {
+ const node = try rl.allocator.create(Node);
+
+ const min = str[i];
+ var max = min;
+
+ i += 1;
+
+ if (i + 1 < str.len and str[i] == '-') {
+ i += 1;
+ max = str[i];
+ i += 1;
+ }
+
+ node.* = .{
+ .range = .{ .min = min, .max = max },
+ .next = null,
+ };
+
+ if (p) |tail| {
+ tail.next = node;
+ p = tail.next;
+ } else {
+ rl.head = node;
+ p = rl.head;
+ }
+ }
+ return rl;
+ }
+
+ pub fn deinit(self: *const RangeList) void {
+ var itr = self.head;
+
+ while (itr) |node| {
+ const prev = node;
+ itr = node.next;
+ self.allocator.destroy(prev);
+ }
+ }
+
+ pub fn format(self: *const RangeList, comptime fmt: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
+ var itr = self.head;
+
+ if (fmt.len != 0) { // must be {} or {any}
+ return std.invalidFmtError(fmt, self);
+ }
+
+ if (self.negated) {
+ try writer.print("^", .{});
+ }
+
+ try writer.print("[", .{});
+
+ while (itr) |node| : (itr = node.next) {
+ if (node.range.min == node.range.max) {
+ try writer.print("{c}", .{node.range.min});
+ } else {
+ try writer.print("{c}-{c}", .{ node.range.min, node.range.max });
+ }
+
+ if (node.next) |_| {
+ try writer.print(", ", .{});
+ }
+ }
+ try writer.print("]", .{});
+ }
+};