From 8f98dc579af1993ec85bd849656c4835b4039dd6 Mon Sep 17 00:00:00 2001 From: Stefan Weigl-Bosker Date: Mon, 23 Feb 2026 22:18:22 -0500 Subject: [willow]: frontend plumbing (#13) ... --- willow/tools/willowc/include/compiler.hpp | 40 ++++++++++++++++++ willow/tools/willowc/include/driver.hpp | 10 +++++ willow/tools/willowc/include/parser.hpp | 32 ++++++++------ willow/tools/willowc/include/sourcemanager.hpp | 39 ++++++++++++++++- willow/tools/willowc/include/tokenizer.hpp | 58 ++++++++++++++++++++++++++ 5 files changed, 165 insertions(+), 14 deletions(-) create mode 100644 willow/tools/willowc/include/compiler.hpp create mode 100644 willow/tools/willowc/include/driver.hpp (limited to 'willow/tools/willowc/include') diff --git a/willow/tools/willowc/include/compiler.hpp b/willow/tools/willowc/include/compiler.hpp new file mode 100644 index 0000000..96ca480 --- /dev/null +++ b/willow/tools/willowc/include/compiler.hpp @@ -0,0 +1,40 @@ +#ifndef WILLOWC_INCLUDE_COMPILER_HPP +#define WILLOWC_INCLUDE_COMPILER_HPP + +#include + +#include +#include + +namespace willowc { + +class Compiler { +public: + struct Options { + std::string filename; + bool use_stdin; + willow::Severity log_level; + }; + Compiler(); + Compiler(const Compiler &) = delete; + Compiler &operator=(const Compiler &) = delete; + + willow::LogicalResult addSourceFile(const std::string &path); + willow::LogicalResult addStdIn(); + // TODO + void run(); + void compile(FileID file); + + void setLogLevel(willow::Severity sev) { log_level_ = sev; } + size_t numFiles() { return sourcemanager_.numFiles(); } +private: + SourceManager sourcemanager_; + willow::Severity log_level_; + willow::DiagnosticEngine diagnostic_engine_; + + void emitDiagnostic(const willow::Diagnostic &d); +}; + +}; // namespace willowc + +#endif // WILLOWC_INCLUDE_COMPILER_HPP diff --git a/willow/tools/willowc/include/driver.hpp b/willow/tools/willowc/include/driver.hpp new file mode 100644 index 0000000..028adc7 --- /dev/null +++ b/willow/tools/willowc/include/driver.hpp @@ -0,0 +1,10 @@ +#ifndef WILLOWC_INCLUDE_DRIVER_HPP +#define WILLOWC_INCLUDE_DRIVER_HPP + +namespace willowc { + +int willowc_main(int argc, char **argv); + +} + +#endif // WILLOWC_INCLUDE_DRIVER_HPP diff --git a/willow/tools/willowc/include/parser.hpp b/willow/tools/willowc/include/parser.hpp index 825dfdd..59f6bfb 100644 --- a/willow/tools/willowc/include/parser.hpp +++ b/willow/tools/willowc/include/parser.hpp @@ -1,31 +1,39 @@ #ifndef WILLOWC_INCLUDE_PARSER_HPP #define WILLOWC_INCLUDE_PARSER_HPP -#include #include +#include +#include + +#include -#include #include +#include #include namespace willowc { class Parser { - std::string_view buf; +public: + Parser(SourceManager::File &f, willow::DiagnosticEngine &diagnostic_engine) + : file_(f), tokenizer_(f.buf.get()), + diagnostic_engine_(diagnostic_engine) {} - std::vector kinds; - std::vector starts; - Tokenizer tokenizer; + std::optional> run(); - std::size_t pos; + TokenKind kind() const { return kinds_[pos_]; } + std::size_t start() const { return starts_[pos_]; } -public: - Parser(std::string_view buf) : buf(buf), tokenizer(buf) {} +private: + willow::DiagnosticBuilder emitParserError(Token t, willow::Severity severity); - std::optional> parse(); + SourceManager::File &file_; + std::vector kinds_; + std::vector starts_; + Tokenizer tokenizer_; + std::size_t pos_; + willow::DiagnosticEngine &diagnostic_engine_; - TokenKind kind() const { return kinds[pos]; } - std::size_t start() const { return starts[pos]; } }; } // namespace willowc diff --git a/willow/tools/willowc/include/sourcemanager.hpp b/willow/tools/willowc/include/sourcemanager.hpp index a526e48..25d4128 100644 --- a/willow/tools/willowc/include/sourcemanager.hpp +++ b/willow/tools/willowc/include/sourcemanager.hpp @@ -1,24 +1,59 @@ #ifndef WILLOWC_INCLUDE_SOURCEMANAGER_HPP #define WILLOWC_INCLUDE_SOURCEMANAGER_HPP -#include #include #include +#include +#include #include +#include + +#include namespace willowc { using FileID = std::uint32_t; +class LineCache { +public: +private: + std::unordered_map cache_; +}; + class SourceManager { +public: struct File { std::string path; + // TODO: remove this, but find something better than using the abs path + std::string display_path; std::unique_ptr buf; + size_t size; + + willow::Location getLoc(std::size_t offset); + std::map linecache_; + + File(std::string path, std::string display_path, std::unique_ptr buf, + size_t size) + : path(std::move(path)), display_path(std::move(display_path)), + buf(std::move(buf)), size(size), linecache_{{{0, 1}}} {} }; -public: + std::optional addFile(std::string_view path); + std::optional addStdIn(); + std::optional getFileID(const std::string& path); + std::string_view getBuf(FileID file) const { + const File &f = file_table.at(file); + return std::string_view(f.buf.get(), f.size); + } + File &getFile(FileID id) { return file_table.at(id); } + + const std::vector& files() const { return file_table; } + std::vector& files() { return file_table; } + + size_t numFiles() { return file_table.size(); } private: std::vector file_table; + std::unordered_map ids; }; } // namespace willowc diff --git a/willow/tools/willowc/include/tokenizer.hpp b/willow/tools/willowc/include/tokenizer.hpp index 3de9d32..47577ab 100644 --- a/willow/tools/willowc/include/tokenizer.hpp +++ b/willow/tools/willowc/include/tokenizer.hpp @@ -1,6 +1,7 @@ #ifndef WILLOWC_INCLUDE_TOKENIZER_HPP #define WILLOWC_INCLUDE_TOKENIZER_HPP +#include #include namespace willowc { @@ -14,6 +15,7 @@ enum class TokenKind { Inst, Comma, + Colon, Semicolon, LParen, RParen, @@ -37,6 +39,8 @@ class Tokenizer { std::string_view buf; std::size_t offset; + friend class Parser; + void skip(std::size_t idx = 1) { offset += idx; } char eat(std::size_t num = 1) { @@ -55,6 +59,8 @@ class Tokenizer { return buf[offset + idx]; } + void recover(); + bool scan_id(bool accept_digits); bool scan_dec(); bool scan_hex(); @@ -67,6 +73,58 @@ public: void seek(uint64_t offset); }; +constexpr std::string_view TokenKindName(TokenKind t) { + switch (t) { + case TokenKind::Function: + return "Function"; + case TokenKind::Variable: + return "Variable"; + case TokenKind::Constant: + return "Constant"; + case TokenKind::Type: + return "Type"; + case TokenKind::Label: + return "Label"; + case TokenKind::Inst: + return "Inst"; + case TokenKind::Comma: + return "Comma"; + case TokenKind::Colon: + return "Colon"; + case TokenKind::Semicolon: + return "Semicolon"; + case TokenKind::LParen: + return "LParen"; + case TokenKind::RParen: + return "RParen"; + case TokenKind::LCurly: + return "LCurly"; + case TokenKind::RCurly: + return "RCurly"; + case TokenKind::Equals: + return "Equals"; + case TokenKind::RArrow: + return "RArrow"; + case TokenKind::Comment: + return "Comment"; + case TokenKind::FuncKW: + return "FuncKW"; + case TokenKind::Eof: + return "Eof"; + case TokenKind::Invalid: + return "Invalid"; + } + std::unreachable(); +} + } // namespace willowc +template <> +struct std::formatter { + constexpr auto parse(std::format_parse_context &ctx) { return ctx.begin(); } + constexpr auto format(const willowc::TokenKind t, std::format_context &ctx) const { + return std::format_to(ctx.out(), "{}", TokenKindName(t)); + } +}; + #endif // WILLOWC_INCLUDE_TOKENIZER_HPP -- cgit v1.2.3