summaryrefslogtreecommitdiff
path: root/willow/tools/willowc
diff options
context:
space:
mode:
authorStefan Weigl-Bosker <stefan@s00.xyz>2026-02-03 14:59:53 -0500
committerGitHub <noreply@github.com>2026-02-03 14:59:53 -0500
commitadd95b14f74e6dbe04a6efe98ff0f20424930b73 (patch)
tree13ce413ee4190a4c8f8743c7740aaa8d04353f14 /willow/tools/willowc
parentc5b2905c5a64433f8519531a77d3acc42d881f17 (diff)
downloadcompiler-dev/stefan.tar.gz
[willow]: initial frontend work, unit tests (#8)dev/stefan
Diffstat (limited to 'willow/tools/willowc')
-rw-r--r--willow/tools/willowc/BUILD.bazel27
-rw-r--r--willow/tools/willowc/include/ast.hpp62
-rw-r--r--willow/tools/willowc/include/expr.hpp4
-rw-r--r--willow/tools/willowc/include/parser.hpp33
-rw-r--r--willow/tools/willowc/include/sourcemanager.hpp26
-rw-r--r--willow/tools/willowc/include/tokenizer.hpp72
-rw-r--r--willow/tools/willowc/lib/driver.cpp0
-rw-r--r--willow/tools/willowc/lib/parser.cpp9
-rw-r--r--willow/tools/willowc/lib/sourcemanager.cpp41
-rw-r--r--willow/tools/willowc/lib/tokenizer.cpp176
-rw-r--r--willow/tools/willowc/main.cpp1
11 files changed, 451 insertions, 0 deletions
diff --git a/willow/tools/willowc/BUILD.bazel b/willow/tools/willowc/BUILD.bazel
index e69de29..708de13 100644
--- a/willow/tools/willowc/BUILD.bazel
+++ b/willow/tools/willowc/BUILD.bazel
@@ -0,0 +1,27 @@
+load("@rules_cc//cc:defs.bzl", "cc_library", "cc_binary")
+
+cc_library(
+ name = "willowc_lib",
+ srcs = glob([
+ "lib/*.cpp",
+ ]),
+ hdrs = glob([
+ "include/*.hpp",
+ ]),
+ copts = [
+ "-std=c++23",
+ "-Wall",
+ ],
+ deps = ["//willow"],
+ strip_include_prefix = "include",
+ visibility = ["//visibility:public"],
+)
+
+cc_binary(
+ name = "willowc",
+ srcs = [
+ "main.cpp",
+ ],
+ deps = [":willowc_lib"],
+ visibility = ["//visibility:public"],
+)
diff --git a/willow/tools/willowc/include/ast.hpp b/willow/tools/willowc/include/ast.hpp
new file mode 100644
index 0000000..8c59067
--- /dev/null
+++ b/willow/tools/willowc/include/ast.hpp
@@ -0,0 +1,62 @@
+#ifndef WILLOWC_INCLUDE_AST_HPP
+#define WILLOWC_INCLUDE_AST_HPP
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include <willow/IR/Instructions.h>
+#include <willow/IR/Types.h>
+
+#include <tokenizer.hpp>
+
+namespace willowc {
+
+using Opcode = willow::Instruction::Opcode;
+using TokenIndex = std::size_t;
+
+// this is like willow::ValueKind, but treats groups all ssa values into 'Value'
+// (because they can't be differentiated by syntax alone)
+enum class ExprKind { Constant, BasicBlock, Function, Value };
+
+struct ExprAST {
+ ExprKind kind;
+
+ std::string name;
+ // token??
+};
+
+struct InstAST {
+ Opcode op;
+ std::string name;
+
+ std::vector<std::unique_ptr<ExprAST>> args;
+};
+
+struct BlockAST {
+ std::string label;
+ std::vector<std::unique_ptr<InstAST>> body;
+};
+
+struct ParameterAST {
+ std::string name;
+ willow::Type type;
+};
+
+struct FunctionDeclAST {
+ std::string name;
+ std::vector<std::unique_ptr<ParameterAST>> parameters;
+ std::string returntype;
+
+ std::vector<std::unique_ptr<BlockAST>> body;
+ // TODO: movable symbol table
+};
+
+struct ModuleAST {
+ std::vector<std::unique_ptr<FunctionDeclAST>> Functions;
+ // TODO: imports, symbol table
+};
+
+}; // namespace willowc
+
+#endif // WILLOWC_INCLUDE_AST_HPP
diff --git a/willow/tools/willowc/include/expr.hpp b/willow/tools/willowc/include/expr.hpp
new file mode 100644
index 0000000..15d2985
--- /dev/null
+++ b/willow/tools/willowc/include/expr.hpp
@@ -0,0 +1,4 @@
+#ifndef WILLOWC_INCLUDE_EXPR_HPP
+#define WILLOWC_INCLUDE_EXPR_HPP
+
+#endif // WILLOWC_INCLUDE_EXPR_HPP
diff --git a/willow/tools/willowc/include/parser.hpp b/willow/tools/willowc/include/parser.hpp
new file mode 100644
index 0000000..825dfdd
--- /dev/null
+++ b/willow/tools/willowc/include/parser.hpp
@@ -0,0 +1,33 @@
+#ifndef WILLOWC_INCLUDE_PARSER_HPP
+#define WILLOWC_INCLUDE_PARSER_HPP
+
+#include <tokenizer.hpp>
+#include <ast.hpp>
+
+#include <optional>
+#include <memory>
+#include <vector>
+
+namespace willowc {
+
+class Parser {
+ std::string_view buf;
+
+ std::vector<TokenKind> kinds;
+ std::vector<std::size_t> starts;
+ Tokenizer tokenizer;
+
+ std::size_t pos;
+
+public:
+ Parser(std::string_view buf) : buf(buf), tokenizer(buf) {}
+
+ std::optional<std::unique_ptr<ModuleAST>> parse();
+
+ TokenKind kind() const { return kinds[pos]; }
+ std::size_t start() const { return starts[pos]; }
+};
+
+} // namespace willowc
+
+#endif // WILLOWC_INCLUDE_PARSER_HPP
diff --git a/willow/tools/willowc/include/sourcemanager.hpp b/willow/tools/willowc/include/sourcemanager.hpp
new file mode 100644
index 0000000..a526e48
--- /dev/null
+++ b/willow/tools/willowc/include/sourcemanager.hpp
@@ -0,0 +1,26 @@
+#ifndef WILLOWC_INCLUDE_SOURCEMANAGER_HPP
+#define WILLOWC_INCLUDE_SOURCEMANAGER_HPP
+
+#include <filesystem>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace willowc {
+
+using FileID = std::uint32_t;
+
+class SourceManager {
+struct File {
+ std::string path;
+ std::unique_ptr<char[]> buf;
+};
+public:
+ std::optional<FileID> addFile(std::string_view path);
+private:
+ std::vector<File> file_table;
+};
+
+} // namespace willowc
+
+#endif // WILLOWC_INCLUDE_SOURCEMANAGER_HPP
diff --git a/willow/tools/willowc/include/tokenizer.hpp b/willow/tools/willowc/include/tokenizer.hpp
new file mode 100644
index 0000000..3de9d32
--- /dev/null
+++ b/willow/tools/willowc/include/tokenizer.hpp
@@ -0,0 +1,72 @@
+#ifndef WILLOWC_INCLUDE_TOKENIZER_HPP
+#define WILLOWC_INCLUDE_TOKENIZER_HPP
+
+#include <willow/IR/Location.h>
+
+namespace willowc {
+
+enum class TokenKind {
+ Function,
+ Variable,
+ Constant,
+ Type,
+ Label,
+ Inst,
+
+ Comma,
+ Semicolon,
+ LParen,
+ RParen,
+ LCurly,
+ RCurly,
+ Equals,
+ RArrow,
+ Comment,
+
+ FuncKW,
+ Eof,
+ Invalid,
+};
+
+struct Token {
+ std::size_t start, end;
+ TokenKind kind;
+};
+
+class Tokenizer {
+ std::string_view buf;
+ std::size_t offset;
+
+ void skip(std::size_t idx = 1) { offset += idx; }
+
+ char eat(std::size_t num = 1) {
+ if (offset >= buf.length())
+ return '\0';
+
+ char c = buf[offset];
+ offset += num;
+ return c;
+ }
+
+ char peek(std::size_t idx = 0) {
+ if (offset + idx >= buf.length())
+ return '\0';
+
+ return buf[offset + idx];
+ }
+
+ bool scan_id(bool accept_digits);
+ bool scan_dec();
+ bool scan_hex();
+ bool scan_constant();
+public:
+ explicit Tokenizer(std::string_view buf, std::size_t offset = 0)
+ : buf{buf}, offset{offset} {}
+
+ Token scan();
+ void seek(uint64_t offset);
+};
+
+} // namespace willowc
+
+#endif // WILLOWC_INCLUDE_TOKENIZER_HPP
diff --git a/willow/tools/willowc/lib/driver.cpp b/willow/tools/willowc/lib/driver.cpp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/willow/tools/willowc/lib/driver.cpp
diff --git a/willow/tools/willowc/lib/parser.cpp b/willow/tools/willowc/lib/parser.cpp
new file mode 100644
index 0000000..becc171
--- /dev/null
+++ b/willow/tools/willowc/lib/parser.cpp
@@ -0,0 +1,9 @@
+#include <parser.hpp>
+
+namespace willowc {
+
+std::optional<std::unique_ptr<ModuleAST>> parse() {
+
+}
+
+}
diff --git a/willow/tools/willowc/lib/sourcemanager.cpp b/willow/tools/willowc/lib/sourcemanager.cpp
new file mode 100644
index 0000000..e2a8e72
--- /dev/null
+++ b/willow/tools/willowc/lib/sourcemanager.cpp
@@ -0,0 +1,41 @@
+#include <filesystem>
+
+#include <fstream>
+#include <sourcemanager.hpp>
+
+namespace willowc {
+
+std::optional<FileID> SourceManager::addFile(std::string_view _path) {
+ std::error_code ec;
+
+ std::filesystem::path uncanonical_path{_path};
+ auto path = std::filesystem::weakly_canonical(uncanonical_path, ec);
+ if (ec) {
+ return false;
+ }
+ std::string display_path = path.make_preferred();
+
+ if (!std::filesystem::exists(path, ec) || ec)
+ return std::nullopt;
+
+ if (!std::filesystem::is_regular_file(path, ec) || ec)
+ return std::nullopt;
+
+ std::size_t filesize = std::filesystem::file_size(path, ec);
+ if (ec)
+ return std::nullopt;
+
+ std::ifstream f{display_path, std::ios::binary};
+ if (!f)
+ return std::nullopt;
+
+ auto buf = std::make_unique<char[]>(filesize);
+ f.read(buf.get(), filesize);
+
+ const FileID id = file_table.size();
+ file_table.push_back(File{std::move(display_path), std::move(buf)});
+
+ return id;
+}
+
+} // namespace willowc
diff --git a/willow/tools/willowc/lib/tokenizer.cpp b/willow/tools/willowc/lib/tokenizer.cpp
new file mode 100644
index 0000000..0c1f917
--- /dev/null
+++ b/willow/tools/willowc/lib/tokenizer.cpp
@@ -0,0 +1,176 @@
+#include <tokenizer.hpp>
+
+namespace willowc {
+
+static inline bool is_space(unsigned char c) {
+ return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+}
+static inline bool is_digit(unsigned char c) { return c >= '0' && c <= '9'; }
+static inline bool is_xdigit(unsigned char c) {
+ return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
+}
+static inline bool is_alpha(unsigned char c) {
+ unsigned char x = static_cast<unsigned char>(c | 0x20);
+ return x >= 'a' && x <= 'z';
+}
+
+static inline bool valid_id_start(int c) {
+ return is_alpha(c) || c == '$' || c == '.' || c == '_' || c == '-';
+}
+
+bool Tokenizer::scan_id(bool accept_digits = true) {
+ char c = peek();
+
+ if (accept_digits && is_digit(c)) {
+ // if it starts with a digit, must be all digits
+ while (is_digit(peek()))
+ skip();
+ return true;
+ }
+
+ if (!valid_id_start(c))
+ return false;
+
+ while (valid_id_start(peek()) || isdigit(peek()))
+ skip();
+
+ return true;
+}
+
+Token Tokenizer::scan() {
+ std::size_t start = this->offset;
+
+ while (isspace(peek()))
+ skip();
+
+ TokenKind k = [&] {
+ switch (peek()) {
+ case '@':
+ skip();
+ if (scan_id(false))
+ return TokenKind::Function;
+ return TokenKind::Invalid;
+ case '%':
+ skip();
+ if (scan_id())
+ return TokenKind::Variable;
+ return TokenKind::Invalid;
+ case '^':
+ skip();
+ if (scan_id())
+ return TokenKind::Label;
+ return TokenKind::Invalid;
+ case ',':
+ skip();
+ return TokenKind::Comma;
+ case ';':
+ skip();
+ return TokenKind::Semicolon;
+ case '(':
+ skip();
+ return TokenKind::LParen;
+ case ')':
+ skip();
+ return TokenKind::RParen;
+ case '{':
+ skip();
+ return TokenKind::LCurly;
+ case '}':
+ skip();
+ return TokenKind::RCurly;
+ case '=':
+ skip();
+ return TokenKind::Equals;
+ case '-': {
+ if (peek(1) == '>') {
+ skip(2);
+ return TokenKind::RArrow;
+ }
+ if (isdigit(peek(1))) {
+ skip();
+ if (scan_dec())
+ return TokenKind::Constant;
+ }
+ return TokenKind::Invalid;
+ }
+ case '/': {
+ skip();
+ if (peek() != '/')
+ return TokenKind::Invalid;
+
+ skip();
+ char c = eat();
+ while (c != '\0' && c != '\n')
+ c = eat();
+
+ return TokenKind::Comment;
+ }
+ case '\0':
+ return TokenKind::Eof;
+ default: {
+ if (is_digit(peek()))
+ return scan_constant() ? TokenKind::Constant : TokenKind::Invalid;
+
+ if (peek() == 'i') {
+ skip();
+ if (scan_dec())
+ return TokenKind::Type;
+ }
+
+ if (isalpha(peek())) {
+ skip();
+ while (isalnum(peek()) || peek() == '.')
+ skip();
+ return TokenKind::Inst;
+ }
+
+ return TokenKind::Invalid;
+ }
+ }
+ }();
+
+ return Token{start, offset, k};
+}
+
+bool Tokenizer::scan_dec() {
+ if (!is_digit(peek()))
+ return false;
+ skip();
+ while (is_digit(peek()))
+ skip();
+
+ return true;
+}
+
+bool Tokenizer::scan_hex() {
+ if (!is_xdigit(peek()))
+ return false;
+ skip();
+ while (is_xdigit(peek()))
+ skip();
+
+ return true;
+}
+
+bool Tokenizer::scan_constant() {
+ if (peek() == '-')
+ skip();
+
+ if (peek() == '0') {
+ skip();
+ if (is_digit(peek()))
+ return false;
+ if (peek() == 'x') {
+ skip();
+ return scan_hex();
+ } else {
+ return true; // 0
+ }
+ } else if (is_digit(peek())) {
+ return scan_dec();
+ }
+
+ return false;
+}
+
+} // namespace willowc
diff --git a/willow/tools/willowc/main.cpp b/willow/tools/willowc/main.cpp
new file mode 100644
index 0000000..237c8ce
--- /dev/null
+++ b/willow/tools/willowc/main.cpp
@@ -0,0 +1 @@
+int main() {}