From add95b14f74e6dbe04a6efe98ff0f20424930b73 Mon Sep 17 00:00:00 2001 From: Stefan Weigl-Bosker Date: Tue, 3 Feb 2026 14:59:53 -0500 Subject: [willow]: initial frontend work, unit tests (#8) --- willow/tools/willowc/include/ast.hpp | 62 ++++++++++++++++++++++ willow/tools/willowc/include/expr.hpp | 4 ++ willow/tools/willowc/include/parser.hpp | 33 ++++++++++++ willow/tools/willowc/include/sourcemanager.hpp | 26 ++++++++++ willow/tools/willowc/include/tokenizer.hpp | 72 ++++++++++++++++++++++++++ 5 files changed, 197 insertions(+) create mode 100644 willow/tools/willowc/include/ast.hpp create mode 100644 willow/tools/willowc/include/expr.hpp create mode 100644 willow/tools/willowc/include/parser.hpp create mode 100644 willow/tools/willowc/include/sourcemanager.hpp create mode 100644 willow/tools/willowc/include/tokenizer.hpp (limited to 'willow/tools/willowc/include') diff --git a/willow/tools/willowc/include/ast.hpp b/willow/tools/willowc/include/ast.hpp new file mode 100644 index 0000000..8c59067 --- /dev/null +++ b/willow/tools/willowc/include/ast.hpp @@ -0,0 +1,62 @@ +#ifndef WILLOWC_INCLUDE_AST_HPP +#define WILLOWC_INCLUDE_AST_HPP + +#include +#include +#include + +#include +#include + +#include + +namespace willowc { + +using Opcode = willow::Instruction::Opcode; +using TokenIndex = std::size_t; + +// this is like willow::ValueKind, but treats groups all ssa values into 'Value' +// (because they can't be differentiated by syntax alone) +enum class ExprKind { Constant, BasicBlock, Function, Value }; + +struct ExprAST { + ExprKind kind; + + std::string name; + // token?? +}; + +struct InstAST { + Opcode op; + std::string name; + + std::vector> args; +}; + +struct BlockAST { + std::string label; + std::vector> body; +}; + +struct ParameterAST { + std::string name; + willow::Type type; +}; + +struct FunctionDeclAST { + std::string name; + std::vector> parameters; + std::string returntype; + + std::vector> body; + // TODO: movable symbol table +}; + +struct ModuleAST { + std::vector> Functions; + // TODO: imports, symbol table +}; + +}; // namespace willowc + +#endif // WILLOWC_INCLUDE_AST_HPP diff --git a/willow/tools/willowc/include/expr.hpp b/willow/tools/willowc/include/expr.hpp new file mode 100644 index 0000000..15d2985 --- /dev/null +++ b/willow/tools/willowc/include/expr.hpp @@ -0,0 +1,4 @@ +#ifndef WILLOWC_INCLUDE_EXPR_HPP +#define WILLOWC_INCLUDE_EXPR_HPP + +#endif // WILLOWC_INCLUDE_EXPR_HPP diff --git a/willow/tools/willowc/include/parser.hpp b/willow/tools/willowc/include/parser.hpp new file mode 100644 index 0000000..825dfdd --- /dev/null +++ b/willow/tools/willowc/include/parser.hpp @@ -0,0 +1,33 @@ +#ifndef WILLOWC_INCLUDE_PARSER_HPP +#define WILLOWC_INCLUDE_PARSER_HPP + +#include +#include + +#include +#include +#include + +namespace willowc { + +class Parser { + std::string_view buf; + + std::vector kinds; + std::vector starts; + Tokenizer tokenizer; + + std::size_t pos; + +public: + Parser(std::string_view buf) : buf(buf), tokenizer(buf) {} + + std::optional> parse(); + + TokenKind kind() const { return kinds[pos]; } + std::size_t start() const { return starts[pos]; } +}; + +} // namespace willowc + +#endif // WILLOWC_INCLUDE_PARSER_HPP diff --git a/willow/tools/willowc/include/sourcemanager.hpp b/willow/tools/willowc/include/sourcemanager.hpp new file mode 100644 index 0000000..a526e48 --- /dev/null +++ b/willow/tools/willowc/include/sourcemanager.hpp @@ -0,0 +1,26 @@ +#ifndef WILLOWC_INCLUDE_SOURCEMANAGER_HPP +#define WILLOWC_INCLUDE_SOURCEMANAGER_HPP + +#include +#include +#include +#include + +namespace willowc { + +using FileID = std::uint32_t; + +class SourceManager { +struct File { + std::string path; + std::unique_ptr buf; +}; +public: + std::optional addFile(std::string_view path); +private: + std::vector file_table; +}; + +} // namespace willowc + +#endif // WILLOWC_INCLUDE_SOURCEMANAGER_HPP diff --git a/willow/tools/willowc/include/tokenizer.hpp b/willow/tools/willowc/include/tokenizer.hpp new file mode 100644 index 0000000..3de9d32 --- /dev/null +++ b/willow/tools/willowc/include/tokenizer.hpp @@ -0,0 +1,72 @@ +#ifndef WILLOWC_INCLUDE_TOKENIZER_HPP +#define WILLOWC_INCLUDE_TOKENIZER_HPP + +#include + +namespace willowc { + +enum class TokenKind { + Function, + Variable, + Constant, + Type, + Label, + Inst, + + Comma, + Semicolon, + LParen, + RParen, + LCurly, + RCurly, + Equals, + RArrow, + Comment, + + FuncKW, + Eof, + Invalid, +}; + +struct Token { + std::size_t start, end; + TokenKind kind; +}; + +class Tokenizer { + std::string_view buf; + std::size_t offset; + + void skip(std::size_t idx = 1) { offset += idx; } + + char eat(std::size_t num = 1) { + if (offset >= buf.length()) + return '\0'; + + char c = buf[offset]; + offset += num; + return c; + } + + char peek(std::size_t idx = 0) { + if (offset + idx >= buf.length()) + return '\0'; + + return buf[offset + idx]; + } + + bool scan_id(bool accept_digits); + bool scan_dec(); + bool scan_hex(); + bool scan_constant(); +public: + explicit Tokenizer(std::string_view buf, std::size_t offset = 0) + : buf{buf}, offset{offset} {} + + Token scan(); + void seek(uint64_t offset); +}; + +} // namespace willowc + +#endif // WILLOWC_INCLUDE_TOKENIZER_HPP -- cgit v1.2.3