diff options
| author | Stefan Weigl-Bosker <stefan@s00.xyz> | 2026-02-03 14:59:53 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-02-03 14:59:53 -0500 |
| commit | add95b14f74e6dbe04a6efe98ff0f20424930b73 (patch) | |
| tree | 13ce413ee4190a4c8f8743c7740aaa8d04353f14 | |
| parent | c5b2905c5a64433f8519531a77d3acc42d881f17 (diff) | |
| download | compiler-dev/stefan.tar.gz | |
[willow]: initial frontend work, unit tests (#8)dev/stefan
32 files changed, 767 insertions, 36 deletions
diff --git a/BUILD.bazel b/BUILD.bazel index 6cbf83c..59c5d8f 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -5,5 +5,7 @@ refresh_compile_commands( targets = { "//willow:willow": "", + "//willow:willowc": "", + "//willow/unittest/ir:verifier": "", }, ) diff --git a/MODULE.bazel b/MODULE.bazel index 4e9fd6e..a42cd9e 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -10,6 +10,7 @@ git_override( remote = "https://github.com/hedronvision/bazel-compile-commands-extractor.git", commit = "abb61a688167623088f8768cc9264798df6a9d10", ) +bazel_dep(name = "catch2", version = "3.12.0") bazel_dep(name = "willow", version = "0.0.1") diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock index aafa0b4..9883741 100644 --- a/MODULE.bazel.lock +++ b/MODULE.bazel.lock @@ -1,5 +1,5 @@ { - "lockFileVersion": 24, + "lockFileVersion": 18, "registryFileHashes": { "https://bcr.bazel.build/bazel_registry.json": "8a28e4aff06ee60aed2a8c281907fb8bcbf3b753c91fb5a5c57da3215d5b3497", "https://bcr.bazel.build/modules/abseil-cpp/20210324.2/MODULE.bazel": "7cd0312e064fde87c8d1cd79ba06c876bd23630c83466e9500321be55c96ace2", @@ -32,9 +32,12 @@ "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/MODULE.bazel": "8fdee2dbaace6c252131c00e1de4b165dc65af02ea278476187765e1a617b917", "https://bcr.bazel.build/modules/bazel_skylib/1.7.0/MODULE.bazel": "0db596f4563de7938de764cc8deeabec291f55e8ec15299718b93c4423e9796d", "https://bcr.bazel.build/modules/bazel_skylib/1.7.1/MODULE.bazel": "3120d80c5861aa616222ec015332e5f8d3171e062e3e804a2a0253e1be26e59b", - "https://bcr.bazel.build/modules/bazel_skylib/1.7.1/source.json": "f121b43eeefc7c29efbd51b83d08631e2347297c95aac9764a701f2a6a2bb953", + "https://bcr.bazel.build/modules/bazel_skylib/1.9.0/MODULE.bazel": "72997b29dfd95c3fa0d0c48322d05590418edef451f8db8db5509c57875fb4b7", + "https://bcr.bazel.build/modules/bazel_skylib/1.9.0/source.json": "7ad77c1e8c1b84222d9b3f3cae016a76639435744c19330b0b37c0a3c9da7dc0", "https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84", "https://bcr.bazel.build/modules/buildozer/7.1.2/source.json": "c9028a501d2db85793a6996205c8de120944f50a0d570438fcae0457a5f9d1f8", + "https://bcr.bazel.build/modules/catch2/3.12.0/MODULE.bazel": "4df354bebac2f8798b2d61b317b6f6b238174065dce26229e67ce97a5f19fbf7", + "https://bcr.bazel.build/modules/catch2/3.12.0/source.json": "77b2afdd0ead4cdd3e78f75ad3ddfe6c91e199da0bf26b209bff5bd992c0ecaf", "https://bcr.bazel.build/modules/google_benchmark/1.8.2/MODULE.bazel": "a70cf1bba851000ba93b58ae2f6d76490a9feb74192e57ab8e8ff13c34ec50cb", "https://bcr.bazel.build/modules/googletest/1.11.0/MODULE.bazel": "3a83f095183f66345ca86aa13c58b59f9f94a2f81999c093d4eeaa2d262d12f4", "https://bcr.bazel.build/modules/googletest/1.14.0.bcr.1/MODULE.bazel": "22c31a561553727960057361aa33bf20fb2e98584bc4fec007906e27053f80c6", @@ -140,7 +143,7 @@ "moduleExtensions": { "@@rules_kotlin+//src/main/starlark/core/repositories:bzlmod_setup.bzl%rules_kotlin_extensions": { "general": { - "bzlTransitiveDigest": "rL/34P1aFDq2GqVC2zCFgQ8nTuOC6ziogocpvG50Qz8=", + "bzlTransitiveDigest": "OlvsB0HsvxbR8ZN+J9Vf00X/+WVz/Y/5Xrq2LgcVfdo=", "usagesDigest": "QI2z8ZUR+mqtbwsf2fLqYdJAkPOHdOV+tF2yVAUgRzw=", "recordedFileInputs": {}, "recordedDirentsInputs": {}, @@ -202,6 +205,5 @@ ] } } - }, - "facts": {} + } } diff --git a/nix/flake.nix b/nix/flake.nix index 91ec511..fa570f3 100644 --- a/nix/flake.nix +++ b/nix/flake.nix @@ -41,9 +41,10 @@ ninja clang-tools + clang z3 - gtest + catch2_3 pkg-config lit diff --git a/willow/BUILD.bazel b/willow/BUILD.bazel index 8726cbd..b38911e 100644 --- a/willow/BUILD.bazel +++ b/willow/BUILD.bazel @@ -1,13 +1,25 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") + cc_library( name = "willow", - srcs = [ - "lib/IR/TypeContext.cpp", - ], + srcs = glob([ + "lib/**/*.cpp", + ]), hdrs = glob([ "include/willow/**/*.h", ]), + copts = [ + "-std=c++23", + "-Wall", + ], strip_include_prefix = "include", visibility = ["//visibility:public"], deps = [ ], ) + +alias( + name = "willowc", + actual = "//willow/tools/willowc:willowc", + visibility = ["//visibility:public"], +) diff --git a/willow/include/willow/ADT/IList.h b/willow/include/willow/ADT/IList.h new file mode 100644 index 0000000..0c55ad5 --- /dev/null +++ b/willow/include/willow/ADT/IList.h @@ -0,0 +1,76 @@ +#ifndef WILLOW_INCLUDE_ADT_ILIST_H +#define WILLOW_INCLUDE_ADT_ILIST_H + +#include <type_traits> + +namespace willow { + +template <typename T> +class IList; + +/// An instrusive list node +template <typename T> +class IListTrait { + friend class IList<T>; + +public: + IListTrait() = default; + IListTrait(const IListTrait &) = delete; + IListTrait &operator=(const IListTrait &) = delete; + IListTrait(IListTrait &&) = delete; + IListTrait &operator=(IListTrait &&) = delete; + + T *getNext() const { return static_cast<T *>(next); } + T *getNext() { return static_cast<T *>(next); } + T *getPrev() const { return static_cast<T *>(prev); } + T *getPrev() { return static_cast<T *>(prev); } + + void insertAfter(IListTrait<T> &node) { + auto old = this->next; + this->next = &node; + node->prev = this; + node->next = old; + }; + + void insertBefore(IListTrait<T> &node) { + auto old = this->prev; + this->prev = node; + node->next = this; + node->prev = old; + }; + + /// Erase the node from the list. Return a pointer to the unliked node + T *removeFromList() { + prev->next = next; + next->prev = prev; + prev = nullptr; + next = nullptr; + return static_cast<T *>(this); + } + +private: + IListTrait<T> *prev = nullptr; + IListTrait<T> *next = nullptr; +}; + +/// Instrusive list +template <typename T> +class IList { + static_assert(std::is_base_of_v<IListTrait<T>, T>, + "T must implement IListTrait"); + using node = IListTrait<T>; + +public: + IList() : dummyBegin{nullptr, &dummyEnd}, dummyEnd{&dummyBegin, dummyEnd} {} + IList(const IList &) = delete; + IList(IList &&) = delete; + +private: + // yes we could save 16 bytes, no i dont really care + node dummyBegin; + node dummyEnd; +}; + +} // namespace willow + +#endif // WILLOW_INCLUDE_ADT_ILIST_H diff --git a/willow/include/willow/IR/BasicBlock.h b/willow/include/willow/IR/BasicBlock.h index c18dfc2..5db8538 100644 --- a/willow/include/willow/IR/BasicBlock.h +++ b/willow/include/willow/IR/BasicBlock.h @@ -70,14 +70,14 @@ public: std::unordered_map<BasicBlock*, size_t>& preds() { return predecessors; } const std::unordered_map<BasicBlock*, size_t>& preds() const { return predecessors; } - inline void addPred(BasicBlock *bb) { + void addPred(BasicBlock *bb) { auto [it, inserted] = predecessors.try_emplace(bb, 1); if (!inserted) it->second += 1; } - inline void delPred(BasicBlock *bb) { + void delPred(BasicBlock *bb) { auto it = preds().find(bb); it->second -= 1; @@ -87,21 +87,35 @@ public: } }; -Instruction *BasicBlock::trailer() { +inline Instruction *BasicBlock::trailer() { if (empty()) return nullptr; else return body.back().get(); } -Instruction *BasicBlock::leader() { +inline Instruction *BasicBlock::leader() { if (empty()) return nullptr; else return body.back().get(); } -Instruction *BasicBlock::addInstruction(std::unique_ptr<Instruction> inst) { +inline const Instruction *BasicBlock::trailer() const { + if (empty()) + return nullptr; + else + return body.back().get(); +} + +inline const Instruction *BasicBlock::leader() const { + if (empty()) + return nullptr; + else + return body.back().get(); +} + +inline Instruction *BasicBlock::addInstruction(std::unique_ptr<Instruction> inst) { Instruction *p = inst.get(); p->setParent(this); body.push_back(std::move(inst)); diff --git a/willow/include/willow/IR/Constant.h b/willow/include/willow/IR/Constant.h index d93d65d..4e2a3d7 100644 --- a/willow/include/willow/IR/Constant.h +++ b/willow/include/willow/IR/Constant.h @@ -1,6 +1,7 @@ #ifndef WILLOW_INCLUDE_IR_CONSTANT_H #define WILLOW_INCLUDE_IR_CONSTANT_H +#include <utility> #include <willow/IR/Value.h> namespace willow { @@ -89,4 +90,19 @@ public: } // namespace willow +// TODO lol +inline std::ostream& operator<<(std::ostream &os, const willow::Constant& c) { + if (c.isPoison()) + return os << "poison"; + + if (c.isUndef()) + return os << "undef"; + + if (c.isInt()) { + return os << static_cast<const willow::ConstantInt*>(&c)->getSExtVal(); + } + + std::unreachable(); +} + #endif // WILLOW_INCLUDE_IR_CONSTANT_H diff --git a/willow/include/willow/IR/Context.h b/willow/include/willow/IR/Context.h index 3f4f2ba..eda0241 100644 --- a/willow/include/willow/IR/Context.h +++ b/willow/include/willow/IR/Context.h @@ -17,6 +17,13 @@ class WillowContext { public: TypeContext &types() { return typectx; } ConstantPool &constants() { return constantpool; } + + template<typename ...Args> + [[nodiscard]] + Module *addModule(Args&&...args) { + modules.push_back(std::make_unique<Module>(std::forward<Args>(args)...)); + return modules.back().get(); + } }; } // namespace willow diff --git a/willow/include/willow/IR/Function.h b/willow/include/willow/IR/Function.h index 4be4de7..43a301b 100644 --- a/willow/include/willow/IR/Function.h +++ b/willow/include/willow/IR/Function.h @@ -38,6 +38,9 @@ public: : Value(ValueKind::Function, std::move(name), fty), parent(parent), params(std::move(params)) {} + Function(std::string name, Module *parent, Type fty) + : Value(ValueKind::Function, std::move(name), fty), parent(parent) {} + /// \return Parent module or nullptr. Module *getParent() { return parent; } const Module *getParent() const { return parent; } @@ -112,14 +115,14 @@ public: } }; -const BasicBlock *Function::entryBlock() const { +inline const BasicBlock *Function::entryBlock() const { if (blocks.empty()) return nullptr; else return blocks.front().get(); } -BasicBlock *Function::entryBlock() { +inline BasicBlock *Function::entryBlock() { if (blocks.empty()) return nullptr; else diff --git a/willow/include/willow/IR/IRBuilder.h b/willow/include/willow/IR/IRBuilder.h index 635c41b..6036e33 100644 --- a/willow/include/willow/IR/IRBuilder.h +++ b/willow/include/willow/IR/IRBuilder.h @@ -1,6 +1,7 @@ #ifndef WILLOW_INCLUDE_IR_IR_BUILDER_H #define WILLOW_INCLUDE_IR_IR_BUILDER_H +#include <willow/IR/Context.h> #include <willow/IR/BasicBlock.h> #include <willow/IR/Function.h> #include <willow/IR/Instruction.h> @@ -9,6 +10,8 @@ namespace willow { /// Helper for constructing and modifiying IR. class IRBuilder { + WillowContext &ctx; + const Module &mod; BasicBlock *block = nullptr; public: diff --git a/willow/include/willow/IR/Instruction.h b/willow/include/willow/IR/Instruction.h index a74c409..1198546 100644 --- a/willow/include/willow/IR/Instruction.h +++ b/willow/include/willow/IR/Instruction.h @@ -25,7 +25,11 @@ class Function; class Successors { friend class Instruction; std::array<BasicBlock *, 2> data; - uint8_t n = 0; + uint8_t n; + + Successors() : data{}, n{0} {} + explicit Successors(BasicBlock *a) : data{a}, n{1} {} + Successors(BasicBlock *a, BasicBlock *b) : data{a, b}, n{2} {} public: auto begin() { return data.data(); } @@ -198,6 +202,8 @@ constexpr std::string_view Instruction::opcodeName(Instruction::Opcode op) { case Alloca: return "alloca"; } + + std::unreachable(); } } // namespace willow @@ -212,11 +218,19 @@ struct std::formatter<willow::Instruction::Opcode> { } }; -// std::ostream& operator<<(std::ostream &os, const willow::Instruction &inst) { -// auto vty = inst.getType(); -// -// // int add %i % -// os << vty << " " << inst.opcode() << " " -// } +inline std::ostream& operator<<(std::ostream &os, const willow::Instruction::Opcode op) { + return os << willow::Instruction::opcodeName(op); +} + +inline std::ostream& operator<<(std::ostream &os, const willow::Instruction &inst) { + auto vty = inst.getType(); + + os << vty << " " << willow::Instruction::opcodeName(inst.opcode()); + + for (auto *operand : inst.getOperands()) + os << " " << operand; + + return os; +} #endif // WILLOW_INCLUDE_IR_INSTRUCTION_H diff --git a/willow/include/willow/IR/Location.h b/willow/include/willow/IR/Location.h index e0ce838..c3c241d 100644 --- a/willow/include/willow/IR/Location.h +++ b/willow/include/willow/IR/Location.h @@ -25,7 +25,7 @@ struct std::formatter<willow::Location> { } }; -std::ostream &operator<<(std::ostream &os, const willow::Location &loc) { +inline std::ostream &operator<<(std::ostream &os, const willow::Location &loc) { return os << std::format("{}", loc); } diff --git a/willow/include/willow/IR/Types.h b/willow/include/willow/IR/Types.h index 9a4f868..40bb098 100644 --- a/willow/include/willow/IR/Types.h +++ b/willow/include/willow/IR/Types.h @@ -4,6 +4,7 @@ #include <cstddef> #include <format> #include <functional> +#include <utility> #include <willow/Util/Hashing.h> @@ -118,23 +119,25 @@ public: }; constexpr std::string_view primitiveTypeName(TypeID tid) { + using namespace std::string_view_literals; using enum TypeID; switch (tid) { case TypeID::Void: - return "void"; + return "void"sv; case TypeID::BasicBlock: - return "label"; + return "label"sv; case TypeID::Int: - return "int"; + return "int"sv; case TypeID::Function: - return "function"; + return "function"sv; case TypeID::Ptr: - return "ptr"; + return "ptr"sv; case TypeID::Struct: - return "struct"; + return "struct"sv; case TypeID::Vector: - return "vector"; + return "vector"sv; } + std::unreachable(); } class BasicBlockTypeImpl : public TypeImpl { @@ -162,10 +165,10 @@ public: struct Hash { size_t operator()(const Key &k) const noexcept { std::size_t seed = 0; - willow::hashing::combine(seed, k.ret); + willow::hashing::combine(seed, k.ret.getImpl()); willow::hashing::combine(seed, k.params.size()); for (const auto &ty : k.params) - willow::hashing::combine(seed, ty); + willow::hashing::combine(seed, ty.getImpl()); return seed; } @@ -204,7 +207,7 @@ struct std::formatter<willow::Type> { } }; -std::ostream &operator<<(std::ostream &os, willow::Type ty) { +inline std::ostream &operator<<(std::ostream &os, willow::Type ty) { return os << std::format("{}", ty); } diff --git a/willow/include/willow/IR/Value.h b/willow/include/willow/IR/Value.h index c219788..650e773 100644 --- a/willow/include/willow/IR/Value.h +++ b/willow/include/willow/IR/Value.h @@ -7,11 +7,13 @@ #include <string> #include <string_view> #include <unordered_map> +#include <ostream> namespace willow { class Instruction; class BasicBlock; +class Constant; enum class ValueKind { Instruction, ///< the identified result of an instruction @@ -82,4 +84,6 @@ public: } // namespace willow +inline std::ostream &operator<<(std::ostream &os, const willow::Value &v); + #endif // WILLOW_INCLUDE_IR_VALUE_H diff --git a/willow/lib/IR/Instruction.cpp b/willow/lib/IR/Instruction.cpp index 587736d..a6e0c9b 100644 --- a/willow/lib/IR/Instruction.cpp +++ b/willow/lib/IR/Instruction.cpp @@ -1,4 +1,5 @@ #include <willow/IR/Instruction.h> +#include <willow/IR/Instructions.h> namespace willow { @@ -33,6 +34,19 @@ bool Instruction::isTerminatorOp(Opcode op) { } } -Successors Instruction::succs(); - +Successors Instruction::succs() { + using enum Opcode; + switch (op) { + case Jmp: { + auto inst = static_cast<JmpInst *>(this); + return Successors{inst->getTarget()}; + } + case Br: { + auto inst = static_cast<BrInst *>(this); + return Successors{inst->getTrueTarget(), inst->getFalseTarget()}; + } + default: + return Successors{}; + } +} }; // namespace willow diff --git a/willow/lib/IR/Value.cpp b/willow/lib/IR/Value.cpp new file mode 100644 index 0000000..13e029f --- /dev/null +++ b/willow/lib/IR/Value.cpp @@ -0,0 +1,27 @@ +#include <willow/IR/Value.h> +#include <willow/IR/Constant.h> +#include <ostream> + +std::ostream &operator<<(std::ostream &os, const willow::Value &v) { + using willow::ValueKind; + auto ty = v.getType(); + if (!v.isVoid()) + os << ty << " "; + + switch (v.getValueKind()) { + case ValueKind::Parameter: + [[fallthrough]]; + case ValueKind::Instruction: { + return os << "%" << v.getName(); + } + case ValueKind::BasicBlock: { + return os << "^" << v.getName(); + } + case ValueKind::Function: { + return os << "@" << v.getName(); + } + case ValueKind::Constant: { + return os << *static_cast<const willow::Constant*>(&v); + } + } +} diff --git a/willow/tools/willowc/BUILD.bazel b/willow/tools/willowc/BUILD.bazel index e69de29..708de13 100644 --- a/willow/tools/willowc/BUILD.bazel +++ b/willow/tools/willowc/BUILD.bazel @@ -0,0 +1,27 @@ +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_binary") + +cc_library( + name = "willowc_lib", + srcs = glob([ + "lib/*.cpp", + ]), + hdrs = glob([ + "include/*.hpp", + ]), + copts = [ + "-std=c++23", + "-Wall", + ], + deps = ["//willow"], + strip_include_prefix = "include", + visibility = ["//visibility:public"], +) + +cc_binary( + name = "willowc", + srcs = [ + "main.cpp", + ], + deps = [":willowc_lib"], + visibility = ["//visibility:public"], +) diff --git a/willow/tools/willowc/include/ast.hpp b/willow/tools/willowc/include/ast.hpp new file mode 100644 index 0000000..8c59067 --- /dev/null +++ b/willow/tools/willowc/include/ast.hpp @@ -0,0 +1,62 @@ +#ifndef WILLOWC_INCLUDE_AST_HPP +#define WILLOWC_INCLUDE_AST_HPP + +#include <cstdint> +#include <memory> +#include <vector> + +#include <willow/IR/Instructions.h> +#include <willow/IR/Types.h> + +#include <tokenizer.hpp> + +namespace willowc { + +using Opcode = willow::Instruction::Opcode; +using TokenIndex = std::size_t; + +// this is like willow::ValueKind, but treats groups all ssa values into 'Value' +// (because they can't be differentiated by syntax alone) +enum class ExprKind { Constant, BasicBlock, Function, Value }; + +struct ExprAST { + ExprKind kind; + + std::string name; + // token?? +}; + +struct InstAST { + Opcode op; + std::string name; + + std::vector<std::unique_ptr<ExprAST>> args; +}; + +struct BlockAST { + std::string label; + std::vector<std::unique_ptr<InstAST>> body; +}; + +struct ParameterAST { + std::string name; + willow::Type type; +}; + +struct FunctionDeclAST { + std::string name; + std::vector<std::unique_ptr<ParameterAST>> parameters; + std::string returntype; + + std::vector<std::unique_ptr<BlockAST>> body; + // TODO: movable symbol table +}; + +struct ModuleAST { + std::vector<std::unique_ptr<FunctionDeclAST>> Functions; + // TODO: imports, symbol table +}; + +}; // namespace willowc + +#endif // WILLOWC_INCLUDE_AST_HPP diff --git a/willow/tools/willowc/include/expr.hpp b/willow/tools/willowc/include/expr.hpp new file mode 100644 index 0000000..15d2985 --- /dev/null +++ b/willow/tools/willowc/include/expr.hpp @@ -0,0 +1,4 @@ +#ifndef WILLOWC_INCLUDE_EXPR_HPP +#define WILLOWC_INCLUDE_EXPR_HPP + +#endif // WILLOWC_INCLUDE_EXPR_HPP diff --git a/willow/tools/willowc/include/parser.hpp b/willow/tools/willowc/include/parser.hpp new file mode 100644 index 0000000..825dfdd --- /dev/null +++ b/willow/tools/willowc/include/parser.hpp @@ -0,0 +1,33 @@ +#ifndef WILLOWC_INCLUDE_PARSER_HPP +#define WILLOWC_INCLUDE_PARSER_HPP + +#include <tokenizer.hpp> +#include <ast.hpp> + +#include <optional> +#include <memory> +#include <vector> + +namespace willowc { + +class Parser { + std::string_view buf; + + std::vector<TokenKind> kinds; + std::vector<std::size_t> starts; + Tokenizer tokenizer; + + std::size_t pos; + +public: + Parser(std::string_view buf) : buf(buf), tokenizer(buf) {} + + std::optional<std::unique_ptr<ModuleAST>> parse(); + + TokenKind kind() const { return kinds[pos]; } + std::size_t start() const { return starts[pos]; } +}; + +} // namespace willowc + +#endif // WILLOWC_INCLUDE_PARSER_HPP diff --git a/willow/tools/willowc/include/sourcemanager.hpp b/willow/tools/willowc/include/sourcemanager.hpp new file mode 100644 index 0000000..a526e48 --- /dev/null +++ b/willow/tools/willowc/include/sourcemanager.hpp @@ -0,0 +1,26 @@ +#ifndef WILLOWC_INCLUDE_SOURCEMANAGER_HPP +#define WILLOWC_INCLUDE_SOURCEMANAGER_HPP + +#include <filesystem> +#include <memory> +#include <string> +#include <vector> + +namespace willowc { + +using FileID = std::uint32_t; + +class SourceManager { +struct File { + std::string path; + std::unique_ptr<char[]> buf; +}; +public: + std::optional<FileID> addFile(std::string_view path); +private: + std::vector<File> file_table; +}; + +} // namespace willowc + +#endif // WILLOWC_INCLUDE_SOURCEMANAGER_HPP diff --git a/willow/tools/willowc/include/tokenizer.hpp b/willow/tools/willowc/include/tokenizer.hpp new file mode 100644 index 0000000..3de9d32 --- /dev/null +++ b/willow/tools/willowc/include/tokenizer.hpp @@ -0,0 +1,72 @@ +#ifndef WILLOWC_INCLUDE_TOKENIZER_HPP +#define WILLOWC_INCLUDE_TOKENIZER_HPP + +#include <willow/IR/Location.h> + +namespace willowc { + +enum class TokenKind { + Function, + Variable, + Constant, + Type, + Label, + Inst, + + Comma, + Semicolon, + LParen, + RParen, + LCurly, + RCurly, + Equals, + RArrow, + Comment, + + FuncKW, + Eof, + Invalid, +}; + +struct Token { + std::size_t start, end; + TokenKind kind; +}; + +class Tokenizer { + std::string_view buf; + std::size_t offset; + + void skip(std::size_t idx = 1) { offset += idx; } + + char eat(std::size_t num = 1) { + if (offset >= buf.length()) + return '\0'; + + char c = buf[offset]; + offset += num; + return c; + } + + char peek(std::size_t idx = 0) { + if (offset + idx >= buf.length()) + return '\0'; + + return buf[offset + idx]; + } + + bool scan_id(bool accept_digits); + bool scan_dec(); + bool scan_hex(); + bool scan_constant(); +public: + explicit Tokenizer(std::string_view buf, std::size_t offset = 0) + : buf{buf}, offset{offset} {} + + Token scan(); + void seek(uint64_t offset); +}; + +} // namespace willowc + +#endif // WILLOWC_INCLUDE_TOKENIZER_HPP diff --git a/willow/tools/willowc/lib/driver.cpp b/willow/tools/willowc/lib/driver.cpp new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/willow/tools/willowc/lib/driver.cpp diff --git a/willow/tools/willowc/lib/parser.cpp b/willow/tools/willowc/lib/parser.cpp new file mode 100644 index 0000000..becc171 --- /dev/null +++ b/willow/tools/willowc/lib/parser.cpp @@ -0,0 +1,9 @@ +#include <parser.hpp> + +namespace willowc { + +std::optional<std::unique_ptr<ModuleAST>> parse() { + +} + +} diff --git a/willow/tools/willowc/lib/sourcemanager.cpp b/willow/tools/willowc/lib/sourcemanager.cpp new file mode 100644 index 0000000..e2a8e72 --- /dev/null +++ b/willow/tools/willowc/lib/sourcemanager.cpp @@ -0,0 +1,41 @@ +#include <filesystem> + +#include <fstream> +#include <sourcemanager.hpp> + +namespace willowc { + +std::optional<FileID> SourceManager::addFile(std::string_view _path) { + std::error_code ec; + + std::filesystem::path uncanonical_path{_path}; + auto path = std::filesystem::weakly_canonical(uncanonical_path, ec); + if (ec) { + return false; + } + std::string display_path = path.make_preferred(); + + if (!std::filesystem::exists(path, ec) || ec) + return std::nullopt; + + if (!std::filesystem::is_regular_file(path, ec) || ec) + return std::nullopt; + + std::size_t filesize = std::filesystem::file_size(path, ec); + if (ec) + return std::nullopt; + + std::ifstream f{display_path, std::ios::binary}; + if (!f) + return std::nullopt; + + auto buf = std::make_unique<char[]>(filesize); + f.read(buf.get(), filesize); + + const FileID id = file_table.size(); + file_table.push_back(File{std::move(display_path), std::move(buf)}); + + return id; +} + +} // namespace willowc diff --git a/willow/tools/willowc/lib/tokenizer.cpp b/willow/tools/willowc/lib/tokenizer.cpp new file mode 100644 index 0000000..0c1f917 --- /dev/null +++ b/willow/tools/willowc/lib/tokenizer.cpp @@ -0,0 +1,176 @@ +#include <tokenizer.hpp> + +namespace willowc { + +static inline bool is_space(unsigned char c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; +} +static inline bool is_digit(unsigned char c) { return c >= '0' && c <= '9'; } +static inline bool is_xdigit(unsigned char c) { + return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); +} +static inline bool is_alpha(unsigned char c) { + unsigned char x = static_cast<unsigned char>(c | 0x20); + return x >= 'a' && x <= 'z'; +} + +static inline bool valid_id_start(int c) { + return is_alpha(c) || c == '$' || c == '.' || c == '_' || c == '-'; +} + +bool Tokenizer::scan_id(bool accept_digits = true) { + char c = peek(); + + if (accept_digits && is_digit(c)) { + // if it starts with a digit, must be all digits + while (is_digit(peek())) + skip(); + return true; + } + + if (!valid_id_start(c)) + return false; + + while (valid_id_start(peek()) || isdigit(peek())) + skip(); + + return true; +} + +Token Tokenizer::scan() { + std::size_t start = this->offset; + + while (isspace(peek())) + skip(); + + TokenKind k = [&] { + switch (peek()) { + case '@': + skip(); + if (scan_id(false)) + return TokenKind::Function; + return TokenKind::Invalid; + case '%': + skip(); + if (scan_id()) + return TokenKind::Variable; + return TokenKind::Invalid; + case '^': + skip(); + if (scan_id()) + return TokenKind::Label; + return TokenKind::Invalid; + case ',': + skip(); + return TokenKind::Comma; + case ';': + skip(); + return TokenKind::Semicolon; + case '(': + skip(); + return TokenKind::LParen; + case ')': + skip(); + return TokenKind::RParen; + case '{': + skip(); + return TokenKind::LCurly; + case '}': + skip(); + return TokenKind::RCurly; + case '=': + skip(); + return TokenKind::Equals; + case '-': { + if (peek(1) == '>') { + skip(2); + return TokenKind::RArrow; + } + if (isdigit(peek(1))) { + skip(); + if (scan_dec()) + return TokenKind::Constant; + } + return TokenKind::Invalid; + } + case '/': { + skip(); + if (peek() != '/') + return TokenKind::Invalid; + + skip(); + char c = eat(); + while (c != '\0' && c != '\n') + c = eat(); + + return TokenKind::Comment; + } + case '\0': + return TokenKind::Eof; + default: { + if (is_digit(peek())) + return scan_constant() ? TokenKind::Constant : TokenKind::Invalid; + + if (peek() == 'i') { + skip(); + if (scan_dec()) + return TokenKind::Type; + } + + if (isalpha(peek())) { + skip(); + while (isalnum(peek()) || peek() == '.') + skip(); + return TokenKind::Inst; + } + + return TokenKind::Invalid; + } + } + }(); + + return Token{start, offset, k}; +} + +bool Tokenizer::scan_dec() { + if (!is_digit(peek())) + return false; + skip(); + while (is_digit(peek())) + skip(); + + return true; +} + +bool Tokenizer::scan_hex() { + if (!is_xdigit(peek())) + return false; + skip(); + while (is_xdigit(peek())) + skip(); + + return true; +} + +bool Tokenizer::scan_constant() { + if (peek() == '-') + skip(); + + if (peek() == '0') { + skip(); + if (is_digit(peek())) + return false; + if (peek() == 'x') { + skip(); + return scan_hex(); + } else { + return true; // 0 + } + } else if (is_digit(peek())) { + return scan_dec(); + } + + return false; +} + +} // namespace willowc diff --git a/willow/tools/willowc/main.cpp b/willow/tools/willowc/main.cpp new file mode 100644 index 0000000..237c8ce --- /dev/null +++ b/willow/tools/willowc/main.cpp @@ -0,0 +1 @@ +int main() {} diff --git a/willow/unittest/BUILD.bazel b/willow/unittest/BUILD.bazel new file mode 100644 index 0000000..8f1c55d --- /dev/null +++ b/willow/unittest/BUILD.bazel @@ -0,0 +1,6 @@ +test_suite( + name = "unittest", + tests = [ + "//willow/unittest/ir:ir_tests", + ] +) diff --git a/willow/unittest/ir/BUILD.bazel b/willow/unittest/ir/BUILD.bazel new file mode 100644 index 0000000..b41dfcd --- /dev/null +++ b/willow/unittest/ir/BUILD.bazel @@ -0,0 +1,16 @@ +cc_test( + name = "verifier", + srcs = ["VerifierTest.cpp"], + deps = [ + "//willow", + "@catch2//:catch2_main" + ], + tags = ["ir"] +) + +test_suite( + name = "ir_tests", + tests = [ + ":verifier" + ], +) diff --git a/willow/unittest/ir/VerifierTest.cpp b/willow/unittest/ir/VerifierTest.cpp new file mode 100644 index 0000000..959d72a --- /dev/null +++ b/willow/unittest/ir/VerifierTest.cpp @@ -0,0 +1,51 @@ +#include <catch2/catch_test_macros.hpp> + +#include <willow/IR/Context.h> +#include <willow/IR/Module.h> +#include <willow/IR/Verifier.h> +#include <willow/IR/Diagnostic.h> +#include <willow/IR/DiagnosticEngine.h> +#include <willow/IR/Function.h> + +using namespace willow; + +TEST_CASE("valid modules", "[verifier]") { + WillowContext ctx; + std::vector<Diagnostic> diags; + DiagnosticEngine eng( + [&](Diagnostic d) { diags.push_back(std::move(d)); }); + + auto &m = *ctx.addModule("test"); + SECTION("empty module") { + REQUIRE(succeeded(verifyModule(ctx, m, eng))); + REQUIRE(diags.empty()); + } +} + +TEST_CASE("valid function", "[verifier]") { + WillowContext ctx; + std::vector<Diagnostic> diags; + DiagnosticEngine eng( + [&](Diagnostic d) { diags.push_back(std::move(d)); }); + + auto &m = *ctx.addModule("test"); + + Type fty = ctx.types().FunctionType(ctx.types().VoidType(), {}); + auto &fn = *m.emplaceFunction("fn", &m, fty); + + REQUIRE(succeeded(verifyFunction(ctx, fn, eng))); + REQUIRE(diags.empty()); +} + +TEST_CASE("invalid basic block", "[verifier]") { + WillowContext ctx; + std::vector<Diagnostic> diags; + DiagnosticEngine eng( + [&](Diagnostic d) { diags.push_back(std::move(d)); }); + + auto &m = *ctx.addModule("test"); + + Type fty = ctx.types().FunctionType(ctx.types().VoidType(), {}); + auto &fn = *m.emplaceFunction("fn", &m, fty); + // TODO +} diff --git a/willow/unittest/willow_unittest.bzl b/willow/unittest/willow_unittest.bzl new file mode 100644 index 0000000..f1bb102 --- /dev/null +++ b/willow/unittest/willow_unittest.bzl @@ -0,0 +1,8 @@ +def willow_unittest(name, srcs, deps = [], tags = [], **kwargs): + native.cc_test( + name = name, + srcs = srcs, + deps = deps + ["@catch2//:catch2_main"], + tags = tags, + **kwargs + ) |