From add95b14f74e6dbe04a6efe98ff0f20424930b73 Mon Sep 17 00:00:00 2001 From: Stefan Weigl-Bosker Date: Tue, 3 Feb 2026 14:59:53 -0500 Subject: [willow]: initial frontend work, unit tests (#8) --- willow/BUILD.bazel | 18 ++- willow/include/willow/ADT/IList.h | 76 +++++++++++ willow/include/willow/IR/BasicBlock.h | 24 +++- willow/include/willow/IR/Constant.h | 16 +++ willow/include/willow/IR/Context.h | 7 + willow/include/willow/IR/Function.h | 7 +- willow/include/willow/IR/IRBuilder.h | 3 + willow/include/willow/IR/Instruction.h | 28 +++- willow/include/willow/IR/Location.h | 2 +- willow/include/willow/IR/Types.h | 23 ++-- willow/include/willow/IR/Value.h | 4 + willow/lib/IR/Instruction.cpp | 18 ++- willow/lib/IR/Value.cpp | 27 ++++ willow/tools/willowc/BUILD.bazel | 27 ++++ willow/tools/willowc/include/ast.hpp | 62 +++++++++ willow/tools/willowc/include/expr.hpp | 4 + willow/tools/willowc/include/parser.hpp | 33 +++++ willow/tools/willowc/include/sourcemanager.hpp | 26 ++++ willow/tools/willowc/include/tokenizer.hpp | 72 ++++++++++ willow/tools/willowc/lib/driver.cpp | 0 willow/tools/willowc/lib/parser.cpp | 9 ++ willow/tools/willowc/lib/sourcemanager.cpp | 41 ++++++ willow/tools/willowc/lib/tokenizer.cpp | 176 +++++++++++++++++++++++++ willow/tools/willowc/main.cpp | 1 + willow/unittest/BUILD.bazel | 6 + willow/unittest/ir/BUILD.bazel | 16 +++ willow/unittest/ir/VerifierTest.cpp | 51 +++++++ willow/unittest/willow_unittest.bzl | 8 ++ 28 files changed, 755 insertions(+), 30 deletions(-) create mode 100644 willow/include/willow/ADT/IList.h create mode 100644 willow/lib/IR/Value.cpp create mode 100644 willow/tools/willowc/include/ast.hpp create mode 100644 willow/tools/willowc/include/expr.hpp create mode 100644 willow/tools/willowc/include/parser.hpp create mode 100644 willow/tools/willowc/include/sourcemanager.hpp create mode 100644 willow/tools/willowc/include/tokenizer.hpp create mode 100644 willow/tools/willowc/lib/driver.cpp create mode 100644 willow/tools/willowc/lib/parser.cpp create mode 100644 willow/tools/willowc/lib/sourcemanager.cpp create mode 100644 willow/tools/willowc/lib/tokenizer.cpp create mode 100644 willow/tools/willowc/main.cpp create mode 100644 willow/unittest/BUILD.bazel create mode 100644 willow/unittest/ir/BUILD.bazel create mode 100644 willow/unittest/ir/VerifierTest.cpp create mode 100644 willow/unittest/willow_unittest.bzl (limited to 'willow') diff --git a/willow/BUILD.bazel b/willow/BUILD.bazel index 8726cbd..b38911e 100644 --- a/willow/BUILD.bazel +++ b/willow/BUILD.bazel @@ -1,13 +1,25 @@ +load("@rules_cc//cc:defs.bzl", "cc_library") + cc_library( name = "willow", - srcs = [ - "lib/IR/TypeContext.cpp", - ], + srcs = glob([ + "lib/**/*.cpp", + ]), hdrs = glob([ "include/willow/**/*.h", ]), + copts = [ + "-std=c++23", + "-Wall", + ], strip_include_prefix = "include", visibility = ["//visibility:public"], deps = [ ], ) + +alias( + name = "willowc", + actual = "//willow/tools/willowc:willowc", + visibility = ["//visibility:public"], +) diff --git a/willow/include/willow/ADT/IList.h b/willow/include/willow/ADT/IList.h new file mode 100644 index 0000000..0c55ad5 --- /dev/null +++ b/willow/include/willow/ADT/IList.h @@ -0,0 +1,76 @@ +#ifndef WILLOW_INCLUDE_ADT_ILIST_H +#define WILLOW_INCLUDE_ADT_ILIST_H + +#include + +namespace willow { + +template +class IList; + +/// An instrusive list node +template +class IListTrait { + friend class IList; + +public: + IListTrait() = default; + IListTrait(const IListTrait &) = delete; + IListTrait &operator=(const IListTrait &) = delete; + IListTrait(IListTrait &&) = delete; + IListTrait &operator=(IListTrait &&) = delete; + + T *getNext() const { return static_cast(next); } + T *getNext() { return static_cast(next); } + T *getPrev() const { return static_cast(prev); } + T *getPrev() { return static_cast(prev); } + + void insertAfter(IListTrait &node) { + auto old = this->next; + this->next = &node; + node->prev = this; + node->next = old; + }; + + void insertBefore(IListTrait &node) { + auto old = this->prev; + this->prev = node; + node->next = this; + node->prev = old; + }; + + /// Erase the node from the list. Return a pointer to the unliked node + T *removeFromList() { + prev->next = next; + next->prev = prev; + prev = nullptr; + next = nullptr; + return static_cast(this); + } + +private: + IListTrait *prev = nullptr; + IListTrait *next = nullptr; +}; + +/// Instrusive list +template +class IList { + static_assert(std::is_base_of_v, T>, + "T must implement IListTrait"); + using node = IListTrait; + +public: + IList() : dummyBegin{nullptr, &dummyEnd}, dummyEnd{&dummyBegin, dummyEnd} {} + IList(const IList &) = delete; + IList(IList &&) = delete; + +private: + // yes we could save 16 bytes, no i dont really care + node dummyBegin; + node dummyEnd; +}; + +} // namespace willow + +#endif // WILLOW_INCLUDE_ADT_ILIST_H diff --git a/willow/include/willow/IR/BasicBlock.h b/willow/include/willow/IR/BasicBlock.h index c18dfc2..5db8538 100644 --- a/willow/include/willow/IR/BasicBlock.h +++ b/willow/include/willow/IR/BasicBlock.h @@ -70,14 +70,14 @@ public: std::unordered_map& preds() { return predecessors; } const std::unordered_map& preds() const { return predecessors; } - inline void addPred(BasicBlock *bb) { + void addPred(BasicBlock *bb) { auto [it, inserted] = predecessors.try_emplace(bb, 1); if (!inserted) it->second += 1; } - inline void delPred(BasicBlock *bb) { + void delPred(BasicBlock *bb) { auto it = preds().find(bb); it->second -= 1; @@ -87,21 +87,35 @@ public: } }; -Instruction *BasicBlock::trailer() { +inline Instruction *BasicBlock::trailer() { if (empty()) return nullptr; else return body.back().get(); } -Instruction *BasicBlock::leader() { +inline Instruction *BasicBlock::leader() { if (empty()) return nullptr; else return body.back().get(); } -Instruction *BasicBlock::addInstruction(std::unique_ptr inst) { +inline const Instruction *BasicBlock::trailer() const { + if (empty()) + return nullptr; + else + return body.back().get(); +} + +inline const Instruction *BasicBlock::leader() const { + if (empty()) + return nullptr; + else + return body.back().get(); +} + +inline Instruction *BasicBlock::addInstruction(std::unique_ptr inst) { Instruction *p = inst.get(); p->setParent(this); body.push_back(std::move(inst)); diff --git a/willow/include/willow/IR/Constant.h b/willow/include/willow/IR/Constant.h index d93d65d..4e2a3d7 100644 --- a/willow/include/willow/IR/Constant.h +++ b/willow/include/willow/IR/Constant.h @@ -1,6 +1,7 @@ #ifndef WILLOW_INCLUDE_IR_CONSTANT_H #define WILLOW_INCLUDE_IR_CONSTANT_H +#include #include namespace willow { @@ -89,4 +90,19 @@ public: } // namespace willow +// TODO lol +inline std::ostream& operator<<(std::ostream &os, const willow::Constant& c) { + if (c.isPoison()) + return os << "poison"; + + if (c.isUndef()) + return os << "undef"; + + if (c.isInt()) { + return os << static_cast(&c)->getSExtVal(); + } + + std::unreachable(); +} + #endif // WILLOW_INCLUDE_IR_CONSTANT_H diff --git a/willow/include/willow/IR/Context.h b/willow/include/willow/IR/Context.h index 3f4f2ba..eda0241 100644 --- a/willow/include/willow/IR/Context.h +++ b/willow/include/willow/IR/Context.h @@ -17,6 +17,13 @@ class WillowContext { public: TypeContext &types() { return typectx; } ConstantPool &constants() { return constantpool; } + + template + [[nodiscard]] + Module *addModule(Args&&...args) { + modules.push_back(std::make_unique(std::forward(args)...)); + return modules.back().get(); + } }; } // namespace willow diff --git a/willow/include/willow/IR/Function.h b/willow/include/willow/IR/Function.h index 4be4de7..43a301b 100644 --- a/willow/include/willow/IR/Function.h +++ b/willow/include/willow/IR/Function.h @@ -38,6 +38,9 @@ public: : Value(ValueKind::Function, std::move(name), fty), parent(parent), params(std::move(params)) {} + Function(std::string name, Module *parent, Type fty) + : Value(ValueKind::Function, std::move(name), fty), parent(parent) {} + /// \return Parent module or nullptr. Module *getParent() { return parent; } const Module *getParent() const { return parent; } @@ -112,14 +115,14 @@ public: } }; -const BasicBlock *Function::entryBlock() const { +inline const BasicBlock *Function::entryBlock() const { if (blocks.empty()) return nullptr; else return blocks.front().get(); } -BasicBlock *Function::entryBlock() { +inline BasicBlock *Function::entryBlock() { if (blocks.empty()) return nullptr; else diff --git a/willow/include/willow/IR/IRBuilder.h b/willow/include/willow/IR/IRBuilder.h index 635c41b..6036e33 100644 --- a/willow/include/willow/IR/IRBuilder.h +++ b/willow/include/willow/IR/IRBuilder.h @@ -1,6 +1,7 @@ #ifndef WILLOW_INCLUDE_IR_IR_BUILDER_H #define WILLOW_INCLUDE_IR_IR_BUILDER_H +#include #include #include #include @@ -9,6 +10,8 @@ namespace willow { /// Helper for constructing and modifiying IR. class IRBuilder { + WillowContext &ctx; + const Module &mod; BasicBlock *block = nullptr; public: diff --git a/willow/include/willow/IR/Instruction.h b/willow/include/willow/IR/Instruction.h index a74c409..1198546 100644 --- a/willow/include/willow/IR/Instruction.h +++ b/willow/include/willow/IR/Instruction.h @@ -25,7 +25,11 @@ class Function; class Successors { friend class Instruction; std::array data; - uint8_t n = 0; + uint8_t n; + + Successors() : data{}, n{0} {} + explicit Successors(BasicBlock *a) : data{a}, n{1} {} + Successors(BasicBlock *a, BasicBlock *b) : data{a, b}, n{2} {} public: auto begin() { return data.data(); } @@ -198,6 +202,8 @@ constexpr std::string_view Instruction::opcodeName(Instruction::Opcode op) { case Alloca: return "alloca"; } + + std::unreachable(); } } // namespace willow @@ -212,11 +218,19 @@ struct std::formatter { } }; -// std::ostream& operator<<(std::ostream &os, const willow::Instruction &inst) { -// auto vty = inst.getType(); -// -// // int add %i % -// os << vty << " " << inst.opcode() << " " -// } +inline std::ostream& operator<<(std::ostream &os, const willow::Instruction::Opcode op) { + return os << willow::Instruction::opcodeName(op); +} + +inline std::ostream& operator<<(std::ostream &os, const willow::Instruction &inst) { + auto vty = inst.getType(); + + os << vty << " " << willow::Instruction::opcodeName(inst.opcode()); + + for (auto *operand : inst.getOperands()) + os << " " << operand; + + return os; +} #endif // WILLOW_INCLUDE_IR_INSTRUCTION_H diff --git a/willow/include/willow/IR/Location.h b/willow/include/willow/IR/Location.h index e0ce838..c3c241d 100644 --- a/willow/include/willow/IR/Location.h +++ b/willow/include/willow/IR/Location.h @@ -25,7 +25,7 @@ struct std::formatter { } }; -std::ostream &operator<<(std::ostream &os, const willow::Location &loc) { +inline std::ostream &operator<<(std::ostream &os, const willow::Location &loc) { return os << std::format("{}", loc); } diff --git a/willow/include/willow/IR/Types.h b/willow/include/willow/IR/Types.h index 9a4f868..40bb098 100644 --- a/willow/include/willow/IR/Types.h +++ b/willow/include/willow/IR/Types.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -118,23 +119,25 @@ public: }; constexpr std::string_view primitiveTypeName(TypeID tid) { + using namespace std::string_view_literals; using enum TypeID; switch (tid) { case TypeID::Void: - return "void"; + return "void"sv; case TypeID::BasicBlock: - return "label"; + return "label"sv; case TypeID::Int: - return "int"; + return "int"sv; case TypeID::Function: - return "function"; + return "function"sv; case TypeID::Ptr: - return "ptr"; + return "ptr"sv; case TypeID::Struct: - return "struct"; + return "struct"sv; case TypeID::Vector: - return "vector"; + return "vector"sv; } + std::unreachable(); } class BasicBlockTypeImpl : public TypeImpl { @@ -162,10 +165,10 @@ public: struct Hash { size_t operator()(const Key &k) const noexcept { std::size_t seed = 0; - willow::hashing::combine(seed, k.ret); + willow::hashing::combine(seed, k.ret.getImpl()); willow::hashing::combine(seed, k.params.size()); for (const auto &ty : k.params) - willow::hashing::combine(seed, ty); + willow::hashing::combine(seed, ty.getImpl()); return seed; } @@ -204,7 +207,7 @@ struct std::formatter { } }; -std::ostream &operator<<(std::ostream &os, willow::Type ty) { +inline std::ostream &operator<<(std::ostream &os, willow::Type ty) { return os << std::format("{}", ty); } diff --git a/willow/include/willow/IR/Value.h b/willow/include/willow/IR/Value.h index c219788..650e773 100644 --- a/willow/include/willow/IR/Value.h +++ b/willow/include/willow/IR/Value.h @@ -7,11 +7,13 @@ #include #include #include +#include namespace willow { class Instruction; class BasicBlock; +class Constant; enum class ValueKind { Instruction, ///< the identified result of an instruction @@ -82,4 +84,6 @@ public: } // namespace willow +inline std::ostream &operator<<(std::ostream &os, const willow::Value &v); + #endif // WILLOW_INCLUDE_IR_VALUE_H diff --git a/willow/lib/IR/Instruction.cpp b/willow/lib/IR/Instruction.cpp index 587736d..a6e0c9b 100644 --- a/willow/lib/IR/Instruction.cpp +++ b/willow/lib/IR/Instruction.cpp @@ -1,4 +1,5 @@ #include +#include namespace willow { @@ -33,6 +34,19 @@ bool Instruction::isTerminatorOp(Opcode op) { } } -Successors Instruction::succs(); - +Successors Instruction::succs() { + using enum Opcode; + switch (op) { + case Jmp: { + auto inst = static_cast(this); + return Successors{inst->getTarget()}; + } + case Br: { + auto inst = static_cast(this); + return Successors{inst->getTrueTarget(), inst->getFalseTarget()}; + } + default: + return Successors{}; + } +} }; // namespace willow diff --git a/willow/lib/IR/Value.cpp b/willow/lib/IR/Value.cpp new file mode 100644 index 0000000..13e029f --- /dev/null +++ b/willow/lib/IR/Value.cpp @@ -0,0 +1,27 @@ +#include +#include +#include + +std::ostream &operator<<(std::ostream &os, const willow::Value &v) { + using willow::ValueKind; + auto ty = v.getType(); + if (!v.isVoid()) + os << ty << " "; + + switch (v.getValueKind()) { + case ValueKind::Parameter: + [[fallthrough]]; + case ValueKind::Instruction: { + return os << "%" << v.getName(); + } + case ValueKind::BasicBlock: { + return os << "^" << v.getName(); + } + case ValueKind::Function: { + return os << "@" << v.getName(); + } + case ValueKind::Constant: { + return os << *static_cast(&v); + } + } +} diff --git a/willow/tools/willowc/BUILD.bazel b/willow/tools/willowc/BUILD.bazel index e69de29..708de13 100644 --- a/willow/tools/willowc/BUILD.bazel +++ b/willow/tools/willowc/BUILD.bazel @@ -0,0 +1,27 @@ +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_binary") + +cc_library( + name = "willowc_lib", + srcs = glob([ + "lib/*.cpp", + ]), + hdrs = glob([ + "include/*.hpp", + ]), + copts = [ + "-std=c++23", + "-Wall", + ], + deps = ["//willow"], + strip_include_prefix = "include", + visibility = ["//visibility:public"], +) + +cc_binary( + name = "willowc", + srcs = [ + "main.cpp", + ], + deps = [":willowc_lib"], + visibility = ["//visibility:public"], +) diff --git a/willow/tools/willowc/include/ast.hpp b/willow/tools/willowc/include/ast.hpp new file mode 100644 index 0000000..8c59067 --- /dev/null +++ b/willow/tools/willowc/include/ast.hpp @@ -0,0 +1,62 @@ +#ifndef WILLOWC_INCLUDE_AST_HPP +#define WILLOWC_INCLUDE_AST_HPP + +#include +#include +#include + +#include +#include + +#include + +namespace willowc { + +using Opcode = willow::Instruction::Opcode; +using TokenIndex = std::size_t; + +// this is like willow::ValueKind, but treats groups all ssa values into 'Value' +// (because they can't be differentiated by syntax alone) +enum class ExprKind { Constant, BasicBlock, Function, Value }; + +struct ExprAST { + ExprKind kind; + + std::string name; + // token?? +}; + +struct InstAST { + Opcode op; + std::string name; + + std::vector> args; +}; + +struct BlockAST { + std::string label; + std::vector> body; +}; + +struct ParameterAST { + std::string name; + willow::Type type; +}; + +struct FunctionDeclAST { + std::string name; + std::vector> parameters; + std::string returntype; + + std::vector> body; + // TODO: movable symbol table +}; + +struct ModuleAST { + std::vector> Functions; + // TODO: imports, symbol table +}; + +}; // namespace willowc + +#endif // WILLOWC_INCLUDE_AST_HPP diff --git a/willow/tools/willowc/include/expr.hpp b/willow/tools/willowc/include/expr.hpp new file mode 100644 index 0000000..15d2985 --- /dev/null +++ b/willow/tools/willowc/include/expr.hpp @@ -0,0 +1,4 @@ +#ifndef WILLOWC_INCLUDE_EXPR_HPP +#define WILLOWC_INCLUDE_EXPR_HPP + +#endif // WILLOWC_INCLUDE_EXPR_HPP diff --git a/willow/tools/willowc/include/parser.hpp b/willow/tools/willowc/include/parser.hpp new file mode 100644 index 0000000..825dfdd --- /dev/null +++ b/willow/tools/willowc/include/parser.hpp @@ -0,0 +1,33 @@ +#ifndef WILLOWC_INCLUDE_PARSER_HPP +#define WILLOWC_INCLUDE_PARSER_HPP + +#include +#include + +#include +#include +#include + +namespace willowc { + +class Parser { + std::string_view buf; + + std::vector kinds; + std::vector starts; + Tokenizer tokenizer; + + std::size_t pos; + +public: + Parser(std::string_view buf) : buf(buf), tokenizer(buf) {} + + std::optional> parse(); + + TokenKind kind() const { return kinds[pos]; } + std::size_t start() const { return starts[pos]; } +}; + +} // namespace willowc + +#endif // WILLOWC_INCLUDE_PARSER_HPP diff --git a/willow/tools/willowc/include/sourcemanager.hpp b/willow/tools/willowc/include/sourcemanager.hpp new file mode 100644 index 0000000..a526e48 --- /dev/null +++ b/willow/tools/willowc/include/sourcemanager.hpp @@ -0,0 +1,26 @@ +#ifndef WILLOWC_INCLUDE_SOURCEMANAGER_HPP +#define WILLOWC_INCLUDE_SOURCEMANAGER_HPP + +#include +#include +#include +#include + +namespace willowc { + +using FileID = std::uint32_t; + +class SourceManager { +struct File { + std::string path; + std::unique_ptr buf; +}; +public: + std::optional addFile(std::string_view path); +private: + std::vector file_table; +}; + +} // namespace willowc + +#endif // WILLOWC_INCLUDE_SOURCEMANAGER_HPP diff --git a/willow/tools/willowc/include/tokenizer.hpp b/willow/tools/willowc/include/tokenizer.hpp new file mode 100644 index 0000000..3de9d32 --- /dev/null +++ b/willow/tools/willowc/include/tokenizer.hpp @@ -0,0 +1,72 @@ +#ifndef WILLOWC_INCLUDE_TOKENIZER_HPP +#define WILLOWC_INCLUDE_TOKENIZER_HPP + +#include + +namespace willowc { + +enum class TokenKind { + Function, + Variable, + Constant, + Type, + Label, + Inst, + + Comma, + Semicolon, + LParen, + RParen, + LCurly, + RCurly, + Equals, + RArrow, + Comment, + + FuncKW, + Eof, + Invalid, +}; + +struct Token { + std::size_t start, end; + TokenKind kind; +}; + +class Tokenizer { + std::string_view buf; + std::size_t offset; + + void skip(std::size_t idx = 1) { offset += idx; } + + char eat(std::size_t num = 1) { + if (offset >= buf.length()) + return '\0'; + + char c = buf[offset]; + offset += num; + return c; + } + + char peek(std::size_t idx = 0) { + if (offset + idx >= buf.length()) + return '\0'; + + return buf[offset + idx]; + } + + bool scan_id(bool accept_digits); + bool scan_dec(); + bool scan_hex(); + bool scan_constant(); +public: + explicit Tokenizer(std::string_view buf, std::size_t offset = 0) + : buf{buf}, offset{offset} {} + + Token scan(); + void seek(uint64_t offset); +}; + +} // namespace willowc + +#endif // WILLOWC_INCLUDE_TOKENIZER_HPP diff --git a/willow/tools/willowc/lib/driver.cpp b/willow/tools/willowc/lib/driver.cpp new file mode 100644 index 0000000..e69de29 diff --git a/willow/tools/willowc/lib/parser.cpp b/willow/tools/willowc/lib/parser.cpp new file mode 100644 index 0000000..becc171 --- /dev/null +++ b/willow/tools/willowc/lib/parser.cpp @@ -0,0 +1,9 @@ +#include + +namespace willowc { + +std::optional> parse() { + +} + +} diff --git a/willow/tools/willowc/lib/sourcemanager.cpp b/willow/tools/willowc/lib/sourcemanager.cpp new file mode 100644 index 0000000..e2a8e72 --- /dev/null +++ b/willow/tools/willowc/lib/sourcemanager.cpp @@ -0,0 +1,41 @@ +#include + +#include +#include + +namespace willowc { + +std::optional SourceManager::addFile(std::string_view _path) { + std::error_code ec; + + std::filesystem::path uncanonical_path{_path}; + auto path = std::filesystem::weakly_canonical(uncanonical_path, ec); + if (ec) { + return false; + } + std::string display_path = path.make_preferred(); + + if (!std::filesystem::exists(path, ec) || ec) + return std::nullopt; + + if (!std::filesystem::is_regular_file(path, ec) || ec) + return std::nullopt; + + std::size_t filesize = std::filesystem::file_size(path, ec); + if (ec) + return std::nullopt; + + std::ifstream f{display_path, std::ios::binary}; + if (!f) + return std::nullopt; + + auto buf = std::make_unique(filesize); + f.read(buf.get(), filesize); + + const FileID id = file_table.size(); + file_table.push_back(File{std::move(display_path), std::move(buf)}); + + return id; +} + +} // namespace willowc diff --git a/willow/tools/willowc/lib/tokenizer.cpp b/willow/tools/willowc/lib/tokenizer.cpp new file mode 100644 index 0000000..0c1f917 --- /dev/null +++ b/willow/tools/willowc/lib/tokenizer.cpp @@ -0,0 +1,176 @@ +#include + +namespace willowc { + +static inline bool is_space(unsigned char c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; +} +static inline bool is_digit(unsigned char c) { return c >= '0' && c <= '9'; } +static inline bool is_xdigit(unsigned char c) { + return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); +} +static inline bool is_alpha(unsigned char c) { + unsigned char x = static_cast(c | 0x20); + return x >= 'a' && x <= 'z'; +} + +static inline bool valid_id_start(int c) { + return is_alpha(c) || c == '$' || c == '.' || c == '_' || c == '-'; +} + +bool Tokenizer::scan_id(bool accept_digits = true) { + char c = peek(); + + if (accept_digits && is_digit(c)) { + // if it starts with a digit, must be all digits + while (is_digit(peek())) + skip(); + return true; + } + + if (!valid_id_start(c)) + return false; + + while (valid_id_start(peek()) || isdigit(peek())) + skip(); + + return true; +} + +Token Tokenizer::scan() { + std::size_t start = this->offset; + + while (isspace(peek())) + skip(); + + TokenKind k = [&] { + switch (peek()) { + case '@': + skip(); + if (scan_id(false)) + return TokenKind::Function; + return TokenKind::Invalid; + case '%': + skip(); + if (scan_id()) + return TokenKind::Variable; + return TokenKind::Invalid; + case '^': + skip(); + if (scan_id()) + return TokenKind::Label; + return TokenKind::Invalid; + case ',': + skip(); + return TokenKind::Comma; + case ';': + skip(); + return TokenKind::Semicolon; + case '(': + skip(); + return TokenKind::LParen; + case ')': + skip(); + return TokenKind::RParen; + case '{': + skip(); + return TokenKind::LCurly; + case '}': + skip(); + return TokenKind::RCurly; + case '=': + skip(); + return TokenKind::Equals; + case '-': { + if (peek(1) == '>') { + skip(2); + return TokenKind::RArrow; + } + if (isdigit(peek(1))) { + skip(); + if (scan_dec()) + return TokenKind::Constant; + } + return TokenKind::Invalid; + } + case '/': { + skip(); + if (peek() != '/') + return TokenKind::Invalid; + + skip(); + char c = eat(); + while (c != '\0' && c != '\n') + c = eat(); + + return TokenKind::Comment; + } + case '\0': + return TokenKind::Eof; + default: { + if (is_digit(peek())) + return scan_constant() ? TokenKind::Constant : TokenKind::Invalid; + + if (peek() == 'i') { + skip(); + if (scan_dec()) + return TokenKind::Type; + } + + if (isalpha(peek())) { + skip(); + while (isalnum(peek()) || peek() == '.') + skip(); + return TokenKind::Inst; + } + + return TokenKind::Invalid; + } + } + }(); + + return Token{start, offset, k}; +} + +bool Tokenizer::scan_dec() { + if (!is_digit(peek())) + return false; + skip(); + while (is_digit(peek())) + skip(); + + return true; +} + +bool Tokenizer::scan_hex() { + if (!is_xdigit(peek())) + return false; + skip(); + while (is_xdigit(peek())) + skip(); + + return true; +} + +bool Tokenizer::scan_constant() { + if (peek() == '-') + skip(); + + if (peek() == '0') { + skip(); + if (is_digit(peek())) + return false; + if (peek() == 'x') { + skip(); + return scan_hex(); + } else { + return true; // 0 + } + } else if (is_digit(peek())) { + return scan_dec(); + } + + return false; +} + +} // namespace willowc diff --git a/willow/tools/willowc/main.cpp b/willow/tools/willowc/main.cpp new file mode 100644 index 0000000..237c8ce --- /dev/null +++ b/willow/tools/willowc/main.cpp @@ -0,0 +1 @@ +int main() {} diff --git a/willow/unittest/BUILD.bazel b/willow/unittest/BUILD.bazel new file mode 100644 index 0000000..8f1c55d --- /dev/null +++ b/willow/unittest/BUILD.bazel @@ -0,0 +1,6 @@ +test_suite( + name = "unittest", + tests = [ + "//willow/unittest/ir:ir_tests", + ] +) diff --git a/willow/unittest/ir/BUILD.bazel b/willow/unittest/ir/BUILD.bazel new file mode 100644 index 0000000..b41dfcd --- /dev/null +++ b/willow/unittest/ir/BUILD.bazel @@ -0,0 +1,16 @@ +cc_test( + name = "verifier", + srcs = ["VerifierTest.cpp"], + deps = [ + "//willow", + "@catch2//:catch2_main" + ], + tags = ["ir"] +) + +test_suite( + name = "ir_tests", + tests = [ + ":verifier" + ], +) diff --git a/willow/unittest/ir/VerifierTest.cpp b/willow/unittest/ir/VerifierTest.cpp new file mode 100644 index 0000000..959d72a --- /dev/null +++ b/willow/unittest/ir/VerifierTest.cpp @@ -0,0 +1,51 @@ +#include + +#include +#include +#include +#include +#include +#include + +using namespace willow; + +TEST_CASE("valid modules", "[verifier]") { + WillowContext ctx; + std::vector diags; + DiagnosticEngine eng( + [&](Diagnostic d) { diags.push_back(std::move(d)); }); + + auto &m = *ctx.addModule("test"); + SECTION("empty module") { + REQUIRE(succeeded(verifyModule(ctx, m, eng))); + REQUIRE(diags.empty()); + } +} + +TEST_CASE("valid function", "[verifier]") { + WillowContext ctx; + std::vector diags; + DiagnosticEngine eng( + [&](Diagnostic d) { diags.push_back(std::move(d)); }); + + auto &m = *ctx.addModule("test"); + + Type fty = ctx.types().FunctionType(ctx.types().VoidType(), {}); + auto &fn = *m.emplaceFunction("fn", &m, fty); + + REQUIRE(succeeded(verifyFunction(ctx, fn, eng))); + REQUIRE(diags.empty()); +} + +TEST_CASE("invalid basic block", "[verifier]") { + WillowContext ctx; + std::vector diags; + DiagnosticEngine eng( + [&](Diagnostic d) { diags.push_back(std::move(d)); }); + + auto &m = *ctx.addModule("test"); + + Type fty = ctx.types().FunctionType(ctx.types().VoidType(), {}); + auto &fn = *m.emplaceFunction("fn", &m, fty); + // TODO +} diff --git a/willow/unittest/willow_unittest.bzl b/willow/unittest/willow_unittest.bzl new file mode 100644 index 0000000..f1bb102 --- /dev/null +++ b/willow/unittest/willow_unittest.bzl @@ -0,0 +1,8 @@ +def willow_unittest(name, srcs, deps = [], tags = [], **kwargs): + native.cc_test( + name = name, + srcs = srcs, + deps = deps + ["@catch2//:catch2_main"], + tags = tags, + **kwargs + ) -- cgit v1.2.3