summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Weigl-Bosker <stefan@s00.xyz>2026-02-03 14:59:53 -0500
committerGitHub <noreply@github.com>2026-02-03 14:59:53 -0500
commitadd95b14f74e6dbe04a6efe98ff0f20424930b73 (patch)
tree13ce413ee4190a4c8f8743c7740aaa8d04353f14
parentc5b2905c5a64433f8519531a77d3acc42d881f17 (diff)
downloadcompiler-add95b14f74e6dbe04a6efe98ff0f20424930b73.tar.gz
[willow]: initial frontend work, unit tests (#8)dev/stefan
-rw-r--r--BUILD.bazel2
-rw-r--r--MODULE.bazel1
-rw-r--r--MODULE.bazel.lock12
-rw-r--r--nix/flake.nix3
-rw-r--r--willow/BUILD.bazel18
-rw-r--r--willow/include/willow/ADT/IList.h76
-rw-r--r--willow/include/willow/IR/BasicBlock.h24
-rw-r--r--willow/include/willow/IR/Constant.h16
-rw-r--r--willow/include/willow/IR/Context.h7
-rw-r--r--willow/include/willow/IR/Function.h7
-rw-r--r--willow/include/willow/IR/IRBuilder.h3
-rw-r--r--willow/include/willow/IR/Instruction.h28
-rw-r--r--willow/include/willow/IR/Location.h2
-rw-r--r--willow/include/willow/IR/Types.h23
-rw-r--r--willow/include/willow/IR/Value.h4
-rw-r--r--willow/lib/IR/Instruction.cpp18
-rw-r--r--willow/lib/IR/Value.cpp27
-rw-r--r--willow/tools/willowc/BUILD.bazel27
-rw-r--r--willow/tools/willowc/include/ast.hpp62
-rw-r--r--willow/tools/willowc/include/expr.hpp4
-rw-r--r--willow/tools/willowc/include/parser.hpp33
-rw-r--r--willow/tools/willowc/include/sourcemanager.hpp26
-rw-r--r--willow/tools/willowc/include/tokenizer.hpp72
-rw-r--r--willow/tools/willowc/lib/driver.cpp0
-rw-r--r--willow/tools/willowc/lib/parser.cpp9
-rw-r--r--willow/tools/willowc/lib/sourcemanager.cpp41
-rw-r--r--willow/tools/willowc/lib/tokenizer.cpp176
-rw-r--r--willow/tools/willowc/main.cpp1
-rw-r--r--willow/unittest/BUILD.bazel6
-rw-r--r--willow/unittest/ir/BUILD.bazel16
-rw-r--r--willow/unittest/ir/VerifierTest.cpp51
-rw-r--r--willow/unittest/willow_unittest.bzl8
32 files changed, 767 insertions, 36 deletions
diff --git a/BUILD.bazel b/BUILD.bazel
index 6cbf83c..59c5d8f 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -5,5 +5,7 @@ refresh_compile_commands(
targets = {
"//willow:willow": "",
+ "//willow:willowc": "",
+ "//willow/unittest/ir:verifier": "",
},
)
diff --git a/MODULE.bazel b/MODULE.bazel
index 4e9fd6e..a42cd9e 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -10,6 +10,7 @@ git_override(
remote = "https://github.com/hedronvision/bazel-compile-commands-extractor.git",
commit = "abb61a688167623088f8768cc9264798df6a9d10",
)
+bazel_dep(name = "catch2", version = "3.12.0")
bazel_dep(name = "willow", version = "0.0.1")
diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock
index aafa0b4..9883741 100644
--- a/MODULE.bazel.lock
+++ b/MODULE.bazel.lock
@@ -1,5 +1,5 @@
{
- "lockFileVersion": 24,
+ "lockFileVersion": 18,
"registryFileHashes": {
"https://bcr.bazel.build/bazel_registry.json": "8a28e4aff06ee60aed2a8c281907fb8bcbf3b753c91fb5a5c57da3215d5b3497",
"https://bcr.bazel.build/modules/abseil-cpp/20210324.2/MODULE.bazel": "7cd0312e064fde87c8d1cd79ba06c876bd23630c83466e9500321be55c96ace2",
@@ -32,9 +32,12 @@
"https://bcr.bazel.build/modules/bazel_skylib/1.6.1/MODULE.bazel": "8fdee2dbaace6c252131c00e1de4b165dc65af02ea278476187765e1a617b917",
"https://bcr.bazel.build/modules/bazel_skylib/1.7.0/MODULE.bazel": "0db596f4563de7938de764cc8deeabec291f55e8ec15299718b93c4423e9796d",
"https://bcr.bazel.build/modules/bazel_skylib/1.7.1/MODULE.bazel": "3120d80c5861aa616222ec015332e5f8d3171e062e3e804a2a0253e1be26e59b",
- "https://bcr.bazel.build/modules/bazel_skylib/1.7.1/source.json": "f121b43eeefc7c29efbd51b83d08631e2347297c95aac9764a701f2a6a2bb953",
+ "https://bcr.bazel.build/modules/bazel_skylib/1.9.0/MODULE.bazel": "72997b29dfd95c3fa0d0c48322d05590418edef451f8db8db5509c57875fb4b7",
+ "https://bcr.bazel.build/modules/bazel_skylib/1.9.0/source.json": "7ad77c1e8c1b84222d9b3f3cae016a76639435744c19330b0b37c0a3c9da7dc0",
"https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84",
"https://bcr.bazel.build/modules/buildozer/7.1.2/source.json": "c9028a501d2db85793a6996205c8de120944f50a0d570438fcae0457a5f9d1f8",
+ "https://bcr.bazel.build/modules/catch2/3.12.0/MODULE.bazel": "4df354bebac2f8798b2d61b317b6f6b238174065dce26229e67ce97a5f19fbf7",
+ "https://bcr.bazel.build/modules/catch2/3.12.0/source.json": "77b2afdd0ead4cdd3e78f75ad3ddfe6c91e199da0bf26b209bff5bd992c0ecaf",
"https://bcr.bazel.build/modules/google_benchmark/1.8.2/MODULE.bazel": "a70cf1bba851000ba93b58ae2f6d76490a9feb74192e57ab8e8ff13c34ec50cb",
"https://bcr.bazel.build/modules/googletest/1.11.0/MODULE.bazel": "3a83f095183f66345ca86aa13c58b59f9f94a2f81999c093d4eeaa2d262d12f4",
"https://bcr.bazel.build/modules/googletest/1.14.0.bcr.1/MODULE.bazel": "22c31a561553727960057361aa33bf20fb2e98584bc4fec007906e27053f80c6",
@@ -140,7 +143,7 @@
"moduleExtensions": {
"@@rules_kotlin+//src/main/starlark/core/repositories:bzlmod_setup.bzl%rules_kotlin_extensions": {
"general": {
- "bzlTransitiveDigest": "rL/34P1aFDq2GqVC2zCFgQ8nTuOC6ziogocpvG50Qz8=",
+ "bzlTransitiveDigest": "OlvsB0HsvxbR8ZN+J9Vf00X/+WVz/Y/5Xrq2LgcVfdo=",
"usagesDigest": "QI2z8ZUR+mqtbwsf2fLqYdJAkPOHdOV+tF2yVAUgRzw=",
"recordedFileInputs": {},
"recordedDirentsInputs": {},
@@ -202,6 +205,5 @@
]
}
}
- },
- "facts": {}
+ }
}
diff --git a/nix/flake.nix b/nix/flake.nix
index 91ec511..fa570f3 100644
--- a/nix/flake.nix
+++ b/nix/flake.nix
@@ -41,9 +41,10 @@
ninja
clang-tools
+ clang
z3
- gtest
+ catch2_3
pkg-config
lit
diff --git a/willow/BUILD.bazel b/willow/BUILD.bazel
index 8726cbd..b38911e 100644
--- a/willow/BUILD.bazel
+++ b/willow/BUILD.bazel
@@ -1,13 +1,25 @@
+load("@rules_cc//cc:defs.bzl", "cc_library")
+
cc_library(
name = "willow",
- srcs = [
- "lib/IR/TypeContext.cpp",
- ],
+ srcs = glob([
+ "lib/**/*.cpp",
+ ]),
hdrs = glob([
"include/willow/**/*.h",
]),
+ copts = [
+ "-std=c++23",
+ "-Wall",
+ ],
strip_include_prefix = "include",
visibility = ["//visibility:public"],
deps = [
],
)
+
+alias(
+ name = "willowc",
+ actual = "//willow/tools/willowc:willowc",
+ visibility = ["//visibility:public"],
+)
diff --git a/willow/include/willow/ADT/IList.h b/willow/include/willow/ADT/IList.h
new file mode 100644
index 0000000..0c55ad5
--- /dev/null
+++ b/willow/include/willow/ADT/IList.h
@@ -0,0 +1,76 @@
+#ifndef WILLOW_INCLUDE_ADT_ILIST_H
+#define WILLOW_INCLUDE_ADT_ILIST_H
+
+#include <type_traits>
+
+namespace willow {
+
+template <typename T>
+class IList;
+
+/// An instrusive list node
+template <typename T>
+class IListTrait {
+ friend class IList<T>;
+
+public:
+ IListTrait() = default;
+ IListTrait(const IListTrait &) = delete;
+ IListTrait &operator=(const IListTrait &) = delete;
+ IListTrait(IListTrait &&) = delete;
+ IListTrait &operator=(IListTrait &&) = delete;
+
+ T *getNext() const { return static_cast<T *>(next); }
+ T *getNext() { return static_cast<T *>(next); }
+ T *getPrev() const { return static_cast<T *>(prev); }
+ T *getPrev() { return static_cast<T *>(prev); }
+
+ void insertAfter(IListTrait<T> &node) {
+ auto old = this->next;
+ this->next = &node;
+ node->prev = this;
+ node->next = old;
+ };
+
+ void insertBefore(IListTrait<T> &node) {
+ auto old = this->prev;
+ this->prev = node;
+ node->next = this;
+ node->prev = old;
+ };
+
+ /// Erase the node from the list. Return a pointer to the unliked node
+ T *removeFromList() {
+ prev->next = next;
+ next->prev = prev;
+ prev = nullptr;
+ next = nullptr;
+ return static_cast<T *>(this);
+ }
+
+private:
+ IListTrait<T> *prev = nullptr;
+ IListTrait<T> *next = nullptr;
+};
+
+/// Instrusive list
+template <typename T>
+class IList {
+ static_assert(std::is_base_of_v<IListTrait<T>, T>,
+ "T must implement IListTrait");
+ using node = IListTrait<T>;
+
+public:
+ IList() : dummyBegin{nullptr, &dummyEnd}, dummyEnd{&dummyBegin, dummyEnd} {}
+ IList(const IList &) = delete;
+ IList(IList &&) = delete;
+
+private:
+ // yes we could save 16 bytes, no i dont really care
+ node dummyBegin;
+ node dummyEnd;
+};
+
+} // namespace willow
+
+#endif // WILLOW_INCLUDE_ADT_ILIST_H
diff --git a/willow/include/willow/IR/BasicBlock.h b/willow/include/willow/IR/BasicBlock.h
index c18dfc2..5db8538 100644
--- a/willow/include/willow/IR/BasicBlock.h
+++ b/willow/include/willow/IR/BasicBlock.h
@@ -70,14 +70,14 @@ public:
std::unordered_map<BasicBlock*, size_t>& preds() { return predecessors; }
const std::unordered_map<BasicBlock*, size_t>& preds() const { return predecessors; }
- inline void addPred(BasicBlock *bb) {
+ void addPred(BasicBlock *bb) {
auto [it, inserted] = predecessors.try_emplace(bb, 1);
if (!inserted)
it->second += 1;
}
- inline void delPred(BasicBlock *bb) {
+ void delPred(BasicBlock *bb) {
auto it = preds().find(bb);
it->second -= 1;
@@ -87,21 +87,35 @@ public:
}
};
-Instruction *BasicBlock::trailer() {
+inline Instruction *BasicBlock::trailer() {
if (empty())
return nullptr;
else
return body.back().get();
}
-Instruction *BasicBlock::leader() {
+inline Instruction *BasicBlock::leader() {
if (empty())
return nullptr;
else
return body.back().get();
}
-Instruction *BasicBlock::addInstruction(std::unique_ptr<Instruction> inst) {
+inline const Instruction *BasicBlock::trailer() const {
+ if (empty())
+ return nullptr;
+ else
+ return body.back().get();
+}
+
+inline const Instruction *BasicBlock::leader() const {
+ if (empty())
+ return nullptr;
+ else
+ return body.back().get();
+}
+
+inline Instruction *BasicBlock::addInstruction(std::unique_ptr<Instruction> inst) {
Instruction *p = inst.get();
p->setParent(this);
body.push_back(std::move(inst));
diff --git a/willow/include/willow/IR/Constant.h b/willow/include/willow/IR/Constant.h
index d93d65d..4e2a3d7 100644
--- a/willow/include/willow/IR/Constant.h
+++ b/willow/include/willow/IR/Constant.h
@@ -1,6 +1,7 @@
#ifndef WILLOW_INCLUDE_IR_CONSTANT_H
#define WILLOW_INCLUDE_IR_CONSTANT_H
+#include <utility>
#include <willow/IR/Value.h>
namespace willow {
@@ -89,4 +90,19 @@ public:
} // namespace willow
+// TODO lol
+inline std::ostream& operator<<(std::ostream &os, const willow::Constant& c) {
+ if (c.isPoison())
+ return os << "poison";
+
+ if (c.isUndef())
+ return os << "undef";
+
+ if (c.isInt()) {
+ return os << static_cast<const willow::ConstantInt*>(&c)->getSExtVal();
+ }
+
+ std::unreachable();
+}
+
#endif // WILLOW_INCLUDE_IR_CONSTANT_H
diff --git a/willow/include/willow/IR/Context.h b/willow/include/willow/IR/Context.h
index 3f4f2ba..eda0241 100644
--- a/willow/include/willow/IR/Context.h
+++ b/willow/include/willow/IR/Context.h
@@ -17,6 +17,13 @@ class WillowContext {
public:
TypeContext &types() { return typectx; }
ConstantPool &constants() { return constantpool; }
+
+ template<typename ...Args>
+ [[nodiscard]]
+ Module *addModule(Args&&...args) {
+ modules.push_back(std::make_unique<Module>(std::forward<Args>(args)...));
+ return modules.back().get();
+ }
};
} // namespace willow
diff --git a/willow/include/willow/IR/Function.h b/willow/include/willow/IR/Function.h
index 4be4de7..43a301b 100644
--- a/willow/include/willow/IR/Function.h
+++ b/willow/include/willow/IR/Function.h
@@ -38,6 +38,9 @@ public:
: Value(ValueKind::Function, std::move(name), fty), parent(parent),
params(std::move(params)) {}
+ Function(std::string name, Module *parent, Type fty)
+ : Value(ValueKind::Function, std::move(name), fty), parent(parent) {}
+
/// \return Parent module or nullptr.
Module *getParent() { return parent; }
const Module *getParent() const { return parent; }
@@ -112,14 +115,14 @@ public:
}
};
-const BasicBlock *Function::entryBlock() const {
+inline const BasicBlock *Function::entryBlock() const {
if (blocks.empty())
return nullptr;
else
return blocks.front().get();
}
-BasicBlock *Function::entryBlock() {
+inline BasicBlock *Function::entryBlock() {
if (blocks.empty())
return nullptr;
else
diff --git a/willow/include/willow/IR/IRBuilder.h b/willow/include/willow/IR/IRBuilder.h
index 635c41b..6036e33 100644
--- a/willow/include/willow/IR/IRBuilder.h
+++ b/willow/include/willow/IR/IRBuilder.h
@@ -1,6 +1,7 @@
#ifndef WILLOW_INCLUDE_IR_IR_BUILDER_H
#define WILLOW_INCLUDE_IR_IR_BUILDER_H
+#include <willow/IR/Context.h>
#include <willow/IR/BasicBlock.h>
#include <willow/IR/Function.h>
#include <willow/IR/Instruction.h>
@@ -9,6 +10,8 @@ namespace willow {
/// Helper for constructing and modifiying IR.
class IRBuilder {
+ WillowContext &ctx;
+ const Module &mod;
BasicBlock *block = nullptr;
public:
diff --git a/willow/include/willow/IR/Instruction.h b/willow/include/willow/IR/Instruction.h
index a74c409..1198546 100644
--- a/willow/include/willow/IR/Instruction.h
+++ b/willow/include/willow/IR/Instruction.h
@@ -25,7 +25,11 @@ class Function;
class Successors {
friend class Instruction;
std::array<BasicBlock *, 2> data;
- uint8_t n = 0;
+ uint8_t n;
+
+ Successors() : data{}, n{0} {}
+ explicit Successors(BasicBlock *a) : data{a}, n{1} {}
+ Successors(BasicBlock *a, BasicBlock *b) : data{a, b}, n{2} {}
public:
auto begin() { return data.data(); }
@@ -198,6 +202,8 @@ constexpr std::string_view Instruction::opcodeName(Instruction::Opcode op) {
case Alloca:
return "alloca";
}
+
+ std::unreachable();
}
} // namespace willow
@@ -212,11 +218,19 @@ struct std::formatter<willow::Instruction::Opcode> {
}
};
-// std::ostream& operator<<(std::ostream &os, const willow::Instruction &inst) {
-// auto vty = inst.getType();
-//
-// // int add %i %
-// os << vty << " " << inst.opcode() << " "
-// }
+inline std::ostream& operator<<(std::ostream &os, const willow::Instruction::Opcode op) {
+ return os << willow::Instruction::opcodeName(op);
+}
+
+inline std::ostream& operator<<(std::ostream &os, const willow::Instruction &inst) {
+ auto vty = inst.getType();
+
+ os << vty << " " << willow::Instruction::opcodeName(inst.opcode());
+
+ for (auto *operand : inst.getOperands())
+ os << " " << operand;
+
+ return os;
+}
#endif // WILLOW_INCLUDE_IR_INSTRUCTION_H
diff --git a/willow/include/willow/IR/Location.h b/willow/include/willow/IR/Location.h
index e0ce838..c3c241d 100644
--- a/willow/include/willow/IR/Location.h
+++ b/willow/include/willow/IR/Location.h
@@ -25,7 +25,7 @@ struct std::formatter<willow::Location> {
}
};
-std::ostream &operator<<(std::ostream &os, const willow::Location &loc) {
+inline std::ostream &operator<<(std::ostream &os, const willow::Location &loc) {
return os << std::format("{}", loc);
}
diff --git a/willow/include/willow/IR/Types.h b/willow/include/willow/IR/Types.h
index 9a4f868..40bb098 100644
--- a/willow/include/willow/IR/Types.h
+++ b/willow/include/willow/IR/Types.h
@@ -4,6 +4,7 @@
#include <cstddef>
#include <format>
#include <functional>
+#include <utility>
#include <willow/Util/Hashing.h>
@@ -118,23 +119,25 @@ public:
};
constexpr std::string_view primitiveTypeName(TypeID tid) {
+ using namespace std::string_view_literals;
using enum TypeID;
switch (tid) {
case TypeID::Void:
- return "void";
+ return "void"sv;
case TypeID::BasicBlock:
- return "label";
+ return "label"sv;
case TypeID::Int:
- return "int";
+ return "int"sv;
case TypeID::Function:
- return "function";
+ return "function"sv;
case TypeID::Ptr:
- return "ptr";
+ return "ptr"sv;
case TypeID::Struct:
- return "struct";
+ return "struct"sv;
case TypeID::Vector:
- return "vector";
+ return "vector"sv;
}
+ std::unreachable();
}
class BasicBlockTypeImpl : public TypeImpl {
@@ -162,10 +165,10 @@ public:
struct Hash {
size_t operator()(const Key &k) const noexcept {
std::size_t seed = 0;
- willow::hashing::combine(seed, k.ret);
+ willow::hashing::combine(seed, k.ret.getImpl());
willow::hashing::combine(seed, k.params.size());
for (const auto &ty : k.params)
- willow::hashing::combine(seed, ty);
+ willow::hashing::combine(seed, ty.getImpl());
return seed;
}
@@ -204,7 +207,7 @@ struct std::formatter<willow::Type> {
}
};
-std::ostream &operator<<(std::ostream &os, willow::Type ty) {
+inline std::ostream &operator<<(std::ostream &os, willow::Type ty) {
return os << std::format("{}", ty);
}
diff --git a/willow/include/willow/IR/Value.h b/willow/include/willow/IR/Value.h
index c219788..650e773 100644
--- a/willow/include/willow/IR/Value.h
+++ b/willow/include/willow/IR/Value.h
@@ -7,11 +7,13 @@
#include <string>
#include <string_view>
#include <unordered_map>
+#include <ostream>
namespace willow {
class Instruction;
class BasicBlock;
+class Constant;
enum class ValueKind {
Instruction, ///< the identified result of an instruction
@@ -82,4 +84,6 @@ public:
} // namespace willow
+inline std::ostream &operator<<(std::ostream &os, const willow::Value &v);
+
#endif // WILLOW_INCLUDE_IR_VALUE_H
diff --git a/willow/lib/IR/Instruction.cpp b/willow/lib/IR/Instruction.cpp
index 587736d..a6e0c9b 100644
--- a/willow/lib/IR/Instruction.cpp
+++ b/willow/lib/IR/Instruction.cpp
@@ -1,4 +1,5 @@
#include <willow/IR/Instruction.h>
+#include <willow/IR/Instructions.h>
namespace willow {
@@ -33,6 +34,19 @@ bool Instruction::isTerminatorOp(Opcode op) {
}
}
-Successors Instruction::succs();
-
+Successors Instruction::succs() {
+ using enum Opcode;
+ switch (op) {
+ case Jmp: {
+ auto inst = static_cast<JmpInst *>(this);
+ return Successors{inst->getTarget()};
+ }
+ case Br: {
+ auto inst = static_cast<BrInst *>(this);
+ return Successors{inst->getTrueTarget(), inst->getFalseTarget()};
+ }
+ default:
+ return Successors{};
+ }
+}
}; // namespace willow
diff --git a/willow/lib/IR/Value.cpp b/willow/lib/IR/Value.cpp
new file mode 100644
index 0000000..13e029f
--- /dev/null
+++ b/willow/lib/IR/Value.cpp
@@ -0,0 +1,27 @@
+#include <willow/IR/Value.h>
+#include <willow/IR/Constant.h>
+#include <ostream>
+
+std::ostream &operator<<(std::ostream &os, const willow::Value &v) {
+ using willow::ValueKind;
+ auto ty = v.getType();
+ if (!v.isVoid())
+ os << ty << " ";
+
+ switch (v.getValueKind()) {
+ case ValueKind::Parameter:
+ [[fallthrough]];
+ case ValueKind::Instruction: {
+ return os << "%" << v.getName();
+ }
+ case ValueKind::BasicBlock: {
+ return os << "^" << v.getName();
+ }
+ case ValueKind::Function: {
+ return os << "@" << v.getName();
+ }
+ case ValueKind::Constant: {
+ return os << *static_cast<const willow::Constant*>(&v);
+ }
+ }
+}
diff --git a/willow/tools/willowc/BUILD.bazel b/willow/tools/willowc/BUILD.bazel
index e69de29..708de13 100644
--- a/willow/tools/willowc/BUILD.bazel
+++ b/willow/tools/willowc/BUILD.bazel
@@ -0,0 +1,27 @@
+load("@rules_cc//cc:defs.bzl", "cc_library", "cc_binary")
+
+cc_library(
+ name = "willowc_lib",
+ srcs = glob([
+ "lib/*.cpp",
+ ]),
+ hdrs = glob([
+ "include/*.hpp",
+ ]),
+ copts = [
+ "-std=c++23",
+ "-Wall",
+ ],
+ deps = ["//willow"],
+ strip_include_prefix = "include",
+ visibility = ["//visibility:public"],
+)
+
+cc_binary(
+ name = "willowc",
+ srcs = [
+ "main.cpp",
+ ],
+ deps = [":willowc_lib"],
+ visibility = ["//visibility:public"],
+)
diff --git a/willow/tools/willowc/include/ast.hpp b/willow/tools/willowc/include/ast.hpp
new file mode 100644
index 0000000..8c59067
--- /dev/null
+++ b/willow/tools/willowc/include/ast.hpp
@@ -0,0 +1,62 @@
+#ifndef WILLOWC_INCLUDE_AST_HPP
+#define WILLOWC_INCLUDE_AST_HPP
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include <willow/IR/Instructions.h>
+#include <willow/IR/Types.h>
+
+#include <tokenizer.hpp>
+
+namespace willowc {
+
+using Opcode = willow::Instruction::Opcode;
+using TokenIndex = std::size_t;
+
+// this is like willow::ValueKind, but treats groups all ssa values into 'Value'
+// (because they can't be differentiated by syntax alone)
+enum class ExprKind { Constant, BasicBlock, Function, Value };
+
+struct ExprAST {
+ ExprKind kind;
+
+ std::string name;
+ // token??
+};
+
+struct InstAST {
+ Opcode op;
+ std::string name;
+
+ std::vector<std::unique_ptr<ExprAST>> args;
+};
+
+struct BlockAST {
+ std::string label;
+ std::vector<std::unique_ptr<InstAST>> body;
+};
+
+struct ParameterAST {
+ std::string name;
+ willow::Type type;
+};
+
+struct FunctionDeclAST {
+ std::string name;
+ std::vector<std::unique_ptr<ParameterAST>> parameters;
+ std::string returntype;
+
+ std::vector<std::unique_ptr<BlockAST>> body;
+ // TODO: movable symbol table
+};
+
+struct ModuleAST {
+ std::vector<std::unique_ptr<FunctionDeclAST>> Functions;
+ // TODO: imports, symbol table
+};
+
+}; // namespace willowc
+
+#endif // WILLOWC_INCLUDE_AST_HPP
diff --git a/willow/tools/willowc/include/expr.hpp b/willow/tools/willowc/include/expr.hpp
new file mode 100644
index 0000000..15d2985
--- /dev/null
+++ b/willow/tools/willowc/include/expr.hpp
@@ -0,0 +1,4 @@
+#ifndef WILLOWC_INCLUDE_EXPR_HPP
+#define WILLOWC_INCLUDE_EXPR_HPP
+
+#endif // WILLOWC_INCLUDE_EXPR_HPP
diff --git a/willow/tools/willowc/include/parser.hpp b/willow/tools/willowc/include/parser.hpp
new file mode 100644
index 0000000..825dfdd
--- /dev/null
+++ b/willow/tools/willowc/include/parser.hpp
@@ -0,0 +1,33 @@
+#ifndef WILLOWC_INCLUDE_PARSER_HPP
+#define WILLOWC_INCLUDE_PARSER_HPP
+
+#include <tokenizer.hpp>
+#include <ast.hpp>
+
+#include <optional>
+#include <memory>
+#include <vector>
+
+namespace willowc {
+
+class Parser {
+ std::string_view buf;
+
+ std::vector<TokenKind> kinds;
+ std::vector<std::size_t> starts;
+ Tokenizer tokenizer;
+
+ std::size_t pos;
+
+public:
+ Parser(std::string_view buf) : buf(buf), tokenizer(buf) {}
+
+ std::optional<std::unique_ptr<ModuleAST>> parse();
+
+ TokenKind kind() const { return kinds[pos]; }
+ std::size_t start() const { return starts[pos]; }
+};
+
+} // namespace willowc
+
+#endif // WILLOWC_INCLUDE_PARSER_HPP
diff --git a/willow/tools/willowc/include/sourcemanager.hpp b/willow/tools/willowc/include/sourcemanager.hpp
new file mode 100644
index 0000000..a526e48
--- /dev/null
+++ b/willow/tools/willowc/include/sourcemanager.hpp
@@ -0,0 +1,26 @@
+#ifndef WILLOWC_INCLUDE_SOURCEMANAGER_HPP
+#define WILLOWC_INCLUDE_SOURCEMANAGER_HPP
+
+#include <filesystem>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace willowc {
+
+using FileID = std::uint32_t;
+
+class SourceManager {
+struct File {
+ std::string path;
+ std::unique_ptr<char[]> buf;
+};
+public:
+ std::optional<FileID> addFile(std::string_view path);
+private:
+ std::vector<File> file_table;
+};
+
+} // namespace willowc
+
+#endif // WILLOWC_INCLUDE_SOURCEMANAGER_HPP
diff --git a/willow/tools/willowc/include/tokenizer.hpp b/willow/tools/willowc/include/tokenizer.hpp
new file mode 100644
index 0000000..3de9d32
--- /dev/null
+++ b/willow/tools/willowc/include/tokenizer.hpp
@@ -0,0 +1,72 @@
+#ifndef WILLOWC_INCLUDE_TOKENIZER_HPP
+#define WILLOWC_INCLUDE_TOKENIZER_HPP
+
+#include <willow/IR/Location.h>
+
+namespace willowc {
+
+enum class TokenKind {
+ Function,
+ Variable,
+ Constant,
+ Type,
+ Label,
+ Inst,
+
+ Comma,
+ Semicolon,
+ LParen,
+ RParen,
+ LCurly,
+ RCurly,
+ Equals,
+ RArrow,
+ Comment,
+
+ FuncKW,
+ Eof,
+ Invalid,
+};
+
+struct Token {
+ std::size_t start, end;
+ TokenKind kind;
+};
+
+class Tokenizer {
+ std::string_view buf;
+ std::size_t offset;
+
+ void skip(std::size_t idx = 1) { offset += idx; }
+
+ char eat(std::size_t num = 1) {
+ if (offset >= buf.length())
+ return '\0';
+
+ char c = buf[offset];
+ offset += num;
+ return c;
+ }
+
+ char peek(std::size_t idx = 0) {
+ if (offset + idx >= buf.length())
+ return '\0';
+
+ return buf[offset + idx];
+ }
+
+ bool scan_id(bool accept_digits);
+ bool scan_dec();
+ bool scan_hex();
+ bool scan_constant();
+public:
+ explicit Tokenizer(std::string_view buf, std::size_t offset = 0)
+ : buf{buf}, offset{offset} {}
+
+ Token scan();
+ void seek(uint64_t offset);
+};
+
+} // namespace willowc
+
+#endif // WILLOWC_INCLUDE_TOKENIZER_HPP
diff --git a/willow/tools/willowc/lib/driver.cpp b/willow/tools/willowc/lib/driver.cpp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/willow/tools/willowc/lib/driver.cpp
diff --git a/willow/tools/willowc/lib/parser.cpp b/willow/tools/willowc/lib/parser.cpp
new file mode 100644
index 0000000..becc171
--- /dev/null
+++ b/willow/tools/willowc/lib/parser.cpp
@@ -0,0 +1,9 @@
+#include <parser.hpp>
+
+namespace willowc {
+
+std::optional<std::unique_ptr<ModuleAST>> parse() {
+
+}
+
+}
diff --git a/willow/tools/willowc/lib/sourcemanager.cpp b/willow/tools/willowc/lib/sourcemanager.cpp
new file mode 100644
index 0000000..e2a8e72
--- /dev/null
+++ b/willow/tools/willowc/lib/sourcemanager.cpp
@@ -0,0 +1,41 @@
+#include <filesystem>
+
+#include <fstream>
+#include <sourcemanager.hpp>
+
+namespace willowc {
+
+std::optional<FileID> SourceManager::addFile(std::string_view _path) {
+ std::error_code ec;
+
+ std::filesystem::path uncanonical_path{_path};
+ auto path = std::filesystem::weakly_canonical(uncanonical_path, ec);
+ if (ec) {
+ return false;
+ }
+ std::string display_path = path.make_preferred();
+
+ if (!std::filesystem::exists(path, ec) || ec)
+ return std::nullopt;
+
+ if (!std::filesystem::is_regular_file(path, ec) || ec)
+ return std::nullopt;
+
+ std::size_t filesize = std::filesystem::file_size(path, ec);
+ if (ec)
+ return std::nullopt;
+
+ std::ifstream f{display_path, std::ios::binary};
+ if (!f)
+ return std::nullopt;
+
+ auto buf = std::make_unique<char[]>(filesize);
+ f.read(buf.get(), filesize);
+
+ const FileID id = file_table.size();
+ file_table.push_back(File{std::move(display_path), std::move(buf)});
+
+ return id;
+}
+
+} // namespace willowc
diff --git a/willow/tools/willowc/lib/tokenizer.cpp b/willow/tools/willowc/lib/tokenizer.cpp
new file mode 100644
index 0000000..0c1f917
--- /dev/null
+++ b/willow/tools/willowc/lib/tokenizer.cpp
@@ -0,0 +1,176 @@
+#include <tokenizer.hpp>
+
+namespace willowc {
+
+static inline bool is_space(unsigned char c) {
+ return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+}
+static inline bool is_digit(unsigned char c) { return c >= '0' && c <= '9'; }
+static inline bool is_xdigit(unsigned char c) {
+ return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
+}
+static inline bool is_alpha(unsigned char c) {
+ unsigned char x = static_cast<unsigned char>(c | 0x20);
+ return x >= 'a' && x <= 'z';
+}
+
+static inline bool valid_id_start(int c) {
+ return is_alpha(c) || c == '$' || c == '.' || c == '_' || c == '-';
+}
+
+bool Tokenizer::scan_id(bool accept_digits = true) {
+ char c = peek();
+
+ if (accept_digits && is_digit(c)) {
+ // if it starts with a digit, must be all digits
+ while (is_digit(peek()))
+ skip();
+ return true;
+ }
+
+ if (!valid_id_start(c))
+ return false;
+
+ while (valid_id_start(peek()) || isdigit(peek()))
+ skip();
+
+ return true;
+}
+
+Token Tokenizer::scan() {
+ std::size_t start = this->offset;
+
+ while (isspace(peek()))
+ skip();
+
+ TokenKind k = [&] {
+ switch (peek()) {
+ case '@':
+ skip();
+ if (scan_id(false))
+ return TokenKind::Function;
+ return TokenKind::Invalid;
+ case '%':
+ skip();
+ if (scan_id())
+ return TokenKind::Variable;
+ return TokenKind::Invalid;
+ case '^':
+ skip();
+ if (scan_id())
+ return TokenKind::Label;
+ return TokenKind::Invalid;
+ case ',':
+ skip();
+ return TokenKind::Comma;
+ case ';':
+ skip();
+ return TokenKind::Semicolon;
+ case '(':
+ skip();
+ return TokenKind::LParen;
+ case ')':
+ skip();
+ return TokenKind::RParen;
+ case '{':
+ skip();
+ return TokenKind::LCurly;
+ case '}':
+ skip();
+ return TokenKind::RCurly;
+ case '=':
+ skip();
+ return TokenKind::Equals;
+ case '-': {
+ if (peek(1) == '>') {
+ skip(2);
+ return TokenKind::RArrow;
+ }
+ if (isdigit(peek(1))) {
+ skip();
+ if (scan_dec())
+ return TokenKind::Constant;
+ }
+ return TokenKind::Invalid;
+ }
+ case '/': {
+ skip();
+ if (peek() != '/')
+ return TokenKind::Invalid;
+
+ skip();
+ char c = eat();
+ while (c != '\0' && c != '\n')
+ c = eat();
+
+ return TokenKind::Comment;
+ }
+ case '\0':
+ return TokenKind::Eof;
+ default: {
+ if (is_digit(peek()))
+ return scan_constant() ? TokenKind::Constant : TokenKind::Invalid;
+
+ if (peek() == 'i') {
+ skip();
+ if (scan_dec())
+ return TokenKind::Type;
+ }
+
+ if (isalpha(peek())) {
+ skip();
+ while (isalnum(peek()) || peek() == '.')
+ skip();
+ return TokenKind::Inst;
+ }
+
+ return TokenKind::Invalid;
+ }
+ }
+ }();
+
+ return Token{start, offset, k};
+}
+
+bool Tokenizer::scan_dec() {
+ if (!is_digit(peek()))
+ return false;
+ skip();
+ while (is_digit(peek()))
+ skip();
+
+ return true;
+}
+
+bool Tokenizer::scan_hex() {
+ if (!is_xdigit(peek()))
+ return false;
+ skip();
+ while (is_xdigit(peek()))
+ skip();
+
+ return true;
+}
+
+bool Tokenizer::scan_constant() {
+ if (peek() == '-')
+ skip();
+
+ if (peek() == '0') {
+ skip();
+ if (is_digit(peek()))
+ return false;
+ if (peek() == 'x') {
+ skip();
+ return scan_hex();
+ } else {
+ return true; // 0
+ }
+ } else if (is_digit(peek())) {
+ return scan_dec();
+ }
+
+ return false;
+}
+
+} // namespace willowc
diff --git a/willow/tools/willowc/main.cpp b/willow/tools/willowc/main.cpp
new file mode 100644
index 0000000..237c8ce
--- /dev/null
+++ b/willow/tools/willowc/main.cpp
@@ -0,0 +1 @@
+int main() {}
diff --git a/willow/unittest/BUILD.bazel b/willow/unittest/BUILD.bazel
new file mode 100644
index 0000000..8f1c55d
--- /dev/null
+++ b/willow/unittest/BUILD.bazel
@@ -0,0 +1,6 @@
+test_suite(
+ name = "unittest",
+ tests = [
+ "//willow/unittest/ir:ir_tests",
+ ]
+)
diff --git a/willow/unittest/ir/BUILD.bazel b/willow/unittest/ir/BUILD.bazel
new file mode 100644
index 0000000..b41dfcd
--- /dev/null
+++ b/willow/unittest/ir/BUILD.bazel
@@ -0,0 +1,16 @@
+cc_test(
+ name = "verifier",
+ srcs = ["VerifierTest.cpp"],
+ deps = [
+ "//willow",
+ "@catch2//:catch2_main"
+ ],
+ tags = ["ir"]
+)
+
+test_suite(
+ name = "ir_tests",
+ tests = [
+ ":verifier"
+ ],
+)
diff --git a/willow/unittest/ir/VerifierTest.cpp b/willow/unittest/ir/VerifierTest.cpp
new file mode 100644
index 0000000..959d72a
--- /dev/null
+++ b/willow/unittest/ir/VerifierTest.cpp
@@ -0,0 +1,51 @@
+#include <catch2/catch_test_macros.hpp>
+
+#include <willow/IR/Context.h>
+#include <willow/IR/Module.h>
+#include <willow/IR/Verifier.h>
+#include <willow/IR/Diagnostic.h>
+#include <willow/IR/DiagnosticEngine.h>
+#include <willow/IR/Function.h>
+
+using namespace willow;
+
+TEST_CASE("valid modules", "[verifier]") {
+ WillowContext ctx;
+ std::vector<Diagnostic> diags;
+ DiagnosticEngine eng(
+ [&](Diagnostic d) { diags.push_back(std::move(d)); });
+
+ auto &m = *ctx.addModule("test");
+ SECTION("empty module") {
+ REQUIRE(succeeded(verifyModule(ctx, m, eng)));
+ REQUIRE(diags.empty());
+ }
+}
+
+TEST_CASE("valid function", "[verifier]") {
+ WillowContext ctx;
+ std::vector<Diagnostic> diags;
+ DiagnosticEngine eng(
+ [&](Diagnostic d) { diags.push_back(std::move(d)); });
+
+ auto &m = *ctx.addModule("test");
+
+ Type fty = ctx.types().FunctionType(ctx.types().VoidType(), {});
+ auto &fn = *m.emplaceFunction("fn", &m, fty);
+
+ REQUIRE(succeeded(verifyFunction(ctx, fn, eng)));
+ REQUIRE(diags.empty());
+}
+
+TEST_CASE("invalid basic block", "[verifier]") {
+ WillowContext ctx;
+ std::vector<Diagnostic> diags;
+ DiagnosticEngine eng(
+ [&](Diagnostic d) { diags.push_back(std::move(d)); });
+
+ auto &m = *ctx.addModule("test");
+
+ Type fty = ctx.types().FunctionType(ctx.types().VoidType(), {});
+ auto &fn = *m.emplaceFunction("fn", &m, fty);
+ // TODO
+}
diff --git a/willow/unittest/willow_unittest.bzl b/willow/unittest/willow_unittest.bzl
new file mode 100644
index 0000000..f1bb102
--- /dev/null
+++ b/willow/unittest/willow_unittest.bzl
@@ -0,0 +1,8 @@
+def willow_unittest(name, srcs, deps = [], tags = [], **kwargs):
+ native.cc_test(
+ name = name,
+ srcs = srcs,
+ deps = deps + ["@catch2//:catch2_main"],
+ tags = tags,
+ **kwargs
+ )