diff options
| author | Stefan Weigl-Bosker <stefan@s00.xyz> | 2026-02-23 22:18:22 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-02-23 22:18:22 -0500 |
| commit | 8f98dc579af1993ec85bd849656c4835b4039dd6 (patch) | |
| tree | 3ee45620d83b209c1c11248afc9ab83ffcf39691 | |
| parent | c2d4209f85f46cc91163bc47cc43db252c94acf6 (diff) | |
| download | compiler-8f98dc579af1993ec85bd849656c4835b4039dd6.tar.gz | |
[willow]: frontend plumbing (#13)
...
| -rw-r--r-- | .bazelversion | 1 | ||||
| -rw-r--r-- | BUILD.bazel | 1 | ||||
| -rw-r--r-- | MODULE.bazel | 2 | ||||
| -rw-r--r-- | MODULE.bazel.lock | 2 | ||||
| -rw-r--r-- | nix/flake.nix | 3 | ||||
| -rw-r--r-- | willow/include/willow/IR/Diagnostic.h | 34 | ||||
| -rw-r--r-- | willow/include/willow/IR/Location.h | 5 | ||||
| -rw-r--r-- | willow/include/willow/Util/Color.h | 219 | ||||
| -rw-r--r-- | willow/tools/willowc/BUILD.bazel | 9 | ||||
| -rw-r--r-- | willow/tools/willowc/include/compiler.hpp | 40 | ||||
| -rw-r--r-- | willow/tools/willowc/include/driver.hpp | 10 | ||||
| -rw-r--r-- | willow/tools/willowc/include/parser.hpp | 32 | ||||
| -rw-r--r-- | willow/tools/willowc/include/sourcemanager.hpp | 39 | ||||
| -rw-r--r-- | willow/tools/willowc/include/tokenizer.hpp | 58 | ||||
| -rw-r--r-- | willow/tools/willowc/lib/compiler.cpp | 66 | ||||
| -rw-r--r-- | willow/tools/willowc/lib/driver.cpp | 76 | ||||
| -rw-r--r-- | willow/tools/willowc/lib/parser.cpp | 21 | ||||
| -rw-r--r-- | willow/tools/willowc/lib/sourcemanager.cpp | 75 | ||||
| -rw-r--r-- | willow/tools/willowc/lib/tokenizer.cpp | 67 | ||||
| -rw-r--r-- | willow/tools/willowc/main.cpp | 9 |
20 files changed, 734 insertions, 35 deletions
diff --git a/.bazelversion b/.bazelversion new file mode 100644 index 0000000..e7fdef7 --- /dev/null +++ b/.bazelversion @@ -0,0 +1 @@ +8.4.2 diff --git a/BUILD.bazel b/BUILD.bazel index 59c5d8f..60088b1 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -6,6 +6,5 @@ refresh_compile_commands( targets = { "//willow:willow": "", "//willow:willowc": "", - "//willow/unittest/ir:verifier": "", }, ) diff --git a/MODULE.bazel b/MODULE.bazel index a42cd9e..914ce88 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -11,7 +11,7 @@ git_override( commit = "abb61a688167623088f8768cc9264798df6a9d10", ) bazel_dep(name = "catch2", version = "3.12.0") - +bazel_dep(name = "argparse", version = "3.2.0") bazel_dep(name = "willow", version = "0.0.1") local_path_override( diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock index 9883741..2dc0b66 100644 --- a/MODULE.bazel.lock +++ b/MODULE.bazel.lock @@ -10,6 +10,8 @@ "https://bcr.bazel.build/modules/abseil-cpp/20230802.1/MODULE.bazel": "fa92e2eb41a04df73cdabeec37107316f7e5272650f81d6cc096418fe647b915", "https://bcr.bazel.build/modules/abseil-cpp/20240116.1/MODULE.bazel": "37bcdb4440fbb61df6a1c296ae01b327f19e9bb521f9b8e26ec854b6f97309ed", "https://bcr.bazel.build/modules/abseil-cpp/20240116.1/source.json": "9be551b8d4e3ef76875c0d744b5d6a504a27e3ae67bc6b28f46415fd2d2957da", + "https://bcr.bazel.build/modules/argparse/3.2.0/MODULE.bazel": "74f2686fba48acea8164eb256e0f1d61230254c2bc042c9269a2d48047a6d2eb", + "https://bcr.bazel.build/modules/argparse/3.2.0/source.json": "6c5c6818f5d79475dd3346822f96047696ccb4fdf5f4d03c594607df900e0da4", "https://bcr.bazel.build/modules/bazel_features/1.1.1/MODULE.bazel": "27b8c79ef57efe08efccbd9dd6ef70d61b4798320b8d3c134fd571f78963dbcd", "https://bcr.bazel.build/modules/bazel_features/1.11.0/MODULE.bazel": "f9382337dd5a474c3b7d334c2f83e50b6eaedc284253334cf823044a26de03e8", "https://bcr.bazel.build/modules/bazel_features/1.15.0/MODULE.bazel": "d38ff6e517149dc509406aca0db3ad1efdd890a85e049585b7234d04238e2a4d", diff --git a/nix/flake.nix b/nix/flake.nix index fa570f3..6d151c6 100644 --- a/nix/flake.nix +++ b/nix/flake.nix @@ -26,8 +26,7 @@ devShells.default = pkgs.mkShell { name = "compiler stuff"; packages = with pkgs; [ - # clang-tools - # bazelisk + argparse bazel_8 bazel-buildtools nix diff --git a/willow/include/willow/IR/Diagnostic.h b/willow/include/willow/IR/Diagnostic.h index 68483d9..137c537 100644 --- a/willow/include/willow/IR/Diagnostic.h +++ b/willow/include/willow/IR/Diagnostic.h @@ -2,10 +2,12 @@ #define WILLOW_INCLUDE_IR_DIAGNOSTIC_H #include <willow/IR/Location.h> +#include <willow/Util/Color.h> #include <string> #include <vector> #include <ostream> +#include <utility> namespace willow { @@ -18,6 +20,38 @@ struct Diagnostic { std::vector<Diagnostic> notes; }; +constexpr termcolor::TextStyle getSeverityColor(Severity sev) { + using namespace termcolor; + switch (sev) { + case Severity::Debug: + return TextStyle{AnsiColor::Green, Emphasis::Bold}; + case Severity::Remark: + return TextStyle{AnsiColor::Cyan, Emphasis::Bold}; + case Severity::Warning: + return TextStyle{AnsiColor::Magenta, Emphasis::Bold}; + case Severity::Error: + return TextStyle{AnsiColor::Red, Emphasis::Bold}; + default: + std::unreachable(); + } +} + +constexpr std::string_view getSeverityName(Severity sev) { + using namespace std::string_view_literals; + switch (sev) { + case Severity::Debug: + return "debug"sv; + case Severity::Remark: + return "info"sv; + case Severity::Warning: + return "warning"sv; + case Severity::Error: + return "error"sv; + default: + std::unreachable(); + } +} + } // namespace willow std::ostream &operator<<(std::ostream &os, const willow::Diagnostic &diagnostic); diff --git a/willow/include/willow/IR/Location.h b/willow/include/willow/IR/Location.h index c3c241d..0db7c58 100644 --- a/willow/include/willow/IR/Location.h +++ b/willow/include/willow/IR/Location.h @@ -9,8 +9,9 @@ namespace willow { /// A source location struct Location { std::string_view filename; - int line; - int col; + uint32_t line; + uint32_t col; + size_t offset; }; } // namespace willow diff --git a/willow/include/willow/Util/Color.h b/willow/include/willow/Util/Color.h new file mode 100644 index 0000000..d7c7840 --- /dev/null +++ b/willow/include/willow/Util/Color.h @@ -0,0 +1,219 @@ +#ifndef WILLOW_INCLUDE_UTIL_COLOR_H +#define WILLOW_INCLUDE_UTIL_COLOR_H + +#include <cassert> +#include <cstddef> +#include <format> +#include <stdint.h> +#include <string_view> + +/// \file Utilities for printing pretty terminal output +/// +/// This is heavily inspired by fmtlib include/fmt/color.h + +namespace willow { + +namespace termcolor { + +enum class AnsiColor : uint8_t { + None = 0, + Black = 30, + Red, + Green, + Yellow, + Blue, + Magenta, + Cyan, + White, + Default = 39, + BrightBlack = 90, + BrightRed, + BrightGreen, + BrightYellow, + BrightBlue, + BrightMagenta, + BrightCyan, + BrightWhite +}; + +enum class Emphasis : uint8_t { + None = 0, + Bold = 1, + Faint = 1 << 1, + Italic = 1 << 2, + Underline = 1 << 3, + Blink = 1 << 4, + Reverse = 1 << 5, + Conceal = 1 << 6, + Strikethrough = 1 << 7, +}; + +constexpr Emphasis operator|(Emphasis a, Emphasis b) noexcept { + return static_cast<Emphasis>(static_cast<uint8_t>(a) | + static_cast<uint8_t>(b)); +} +constexpr Emphasis &operator|=(Emphasis &a, Emphasis b) noexcept { + return a = (a | b); +} +constexpr bool has(Emphasis a, Emphasis b) { + return (static_cast<uint8_t>(a) & static_cast<uint8_t>(b)) != 0; +} + +struct TextStyle { + AnsiColor fg = AnsiColor::None; + AnsiColor bg = AnsiColor::None; + Emphasis emph = Emphasis::None; + + explicit constexpr TextStyle() = default; + constexpr TextStyle(AnsiColor fg, AnsiColor bg = AnsiColor::None, + Emphasis emph = Emphasis::None) noexcept + : fg(fg), bg(bg) {} + constexpr TextStyle(AnsiColor fg, Emphasis emph = Emphasis::None) noexcept + : fg(fg), emph(emph) {} + + constexpr bool has_foreground() const noexcept { + return fg != AnsiColor::None; + } + + constexpr bool has_background() const noexcept { + return bg != AnsiColor::None; + } + + constexpr bool has_emphasis() const noexcept { + return emph != Emphasis::None; + } + + // special sentinel value for a full reset + constexpr bool is_reset() const noexcept { + return fg == AnsiColor::None && bg == AnsiColor::None && + emph == Emphasis::None; + } +}; + +template <typename CharT> +struct AnsiColorEscape { + constexpr AnsiColorEscape(AnsiColor text_color, bool is_bg = false) noexcept { + if (is_bg) + text_color = + static_cast<AnsiColor>(static_cast<uint8_t>(text_color) + 10); + + buffer[size++] = static_cast<CharT>('\x1b'); + buffer[size++] = static_cast<CharT>('['); + + uint8_t value = static_cast<uint8_t>(text_color); + + // some bg colors need 3 digits + if (value >= 100u) { + buffer[size++] = static_cast<CharT>('1'); + value %= 100u; + } + buffer[size++] = static_cast<CharT>('0' + value / 10u); + buffer[size++] = static_cast<CharT>('0' + value % 10u); + + buffer[size++] = static_cast<CharT>('m'); + } + + constexpr AnsiColorEscape(Emphasis em) noexcept { + uint8_t em_codes[NUM_EMPHASES] = {}; + if (has_emphasis(em, Emphasis::Bold)) + em_codes[0] = 1; + if (has_emphasis(em, Emphasis::Faint)) + em_codes[1] = 2; + if (has_emphasis(em, Emphasis::Italic)) + em_codes[2] = 3; + if (has_emphasis(em, Emphasis::Underline)) + em_codes[3] = 4; + if (has_emphasis(em, Emphasis::Blink)) + em_codes[4] = 5; + if (has_emphasis(em, Emphasis::Reverse)) + em_codes[5] = 7; + if (has_emphasis(em, Emphasis::Conceal)) + em_codes[6] = 8; + if (has_emphasis(em, Emphasis::Strikethrough)) + em_codes[7] = 9; + + buffer[size++] = static_cast<CharT>('\x1b'); + buffer[size++] = static_cast<CharT>('['); + + for (unsigned char em_code : em_codes) { + if (!em_code) + continue; + buffer[size++] = static_cast<CharT>('0' + em_code); + buffer[size++] = static_cast<CharT>(';'); + } + + buffer[size - 1] = static_cast<CharT>('m'); + } + + constexpr operator const char *() const noexcept { return buffer; } + constexpr operator std::string_view() const noexcept { + return std::string_view(buffer, size); + } + + constexpr const CharT *begin() const noexcept { return buffer; } + constexpr const CharT *end() const noexcept { return buffer + size; } + +private: + static constexpr size_t NUM_EMPHASES = 8; + CharT buffer[7u + 4u * NUM_EMPHASES]; + size_t size = 0; + + static constexpr bool has_emphasis(Emphasis em, Emphasis mask) noexcept { + return static_cast<uint8_t>(em) & static_cast<uint8_t>(mask); + } +}; + +template <typename CharT> +AnsiColorEscape<CharT> make_foreground_color(AnsiColor c) { + return AnsiColorEscape<CharT>(c); +} + +template <typename CharT> +AnsiColorEscape<CharT> make_background_color(AnsiColor c) { + return AnsiColorEscape<CharT>(c, true); +} + +template <typename CharT> +AnsiColorEscape<CharT> make_emphasis(Emphasis c) { + return AnsiColorEscape<CharT>(c); +} + +}; // namespace termcolor + +}; // namespace willow + +template <typename CharT> +struct std::formatter<willow::termcolor::TextStyle, CharT> { + constexpr auto parse(const std::format_parse_context &ctx) { + auto it = ctx.begin(); + assert(it == ctx.end() || *it == '}'); + return it; + } + template <class FormatContext> + auto format(const willow::termcolor::TextStyle &style, + FormatContext &ctx) const { + auto out = ctx.out(); + if (style.is_reset()) + return std::format_to(out, "{}", "\x1b[0m"); + + if (style.has_emphasis()) { + auto emphasis = willow::termcolor::make_emphasis<CharT>(style.emph); + out = std::format_to( + out, "{}", static_cast<std::basic_string_view<CharT>>(emphasis)); + } + if (style.has_foreground()) { + auto fg = willow::termcolor::make_foreground_color<CharT>(style.fg); + out = std::format_to(out, "{}", + static_cast<std::basic_string_view<CharT>>(fg)); + } + if (style.has_background()) { + auto bg = willow::termcolor::make_background_color<CharT>(style.bg); + out = std::format_to(out, "{}", + static_cast<std::basic_string_view<CharT>>(bg)); + } + + return out; + } +}; + +#endif // WILLOW_INCLUDE_UTIL_COLOR_H diff --git a/willow/tools/willowc/BUILD.bazel b/willow/tools/willowc/BUILD.bazel index 708de13..01e5c56 100644 --- a/willow/tools/willowc/BUILD.bazel +++ b/willow/tools/willowc/BUILD.bazel @@ -12,7 +12,10 @@ cc_library( "-std=c++23", "-Wall", ], - deps = ["//willow"], + deps = [ + "//willow", + "@argparse//:argparse" + ], strip_include_prefix = "include", visibility = ["//visibility:public"], ) @@ -22,6 +25,8 @@ cc_binary( srcs = [ "main.cpp", ], - deps = [":willowc_lib"], + deps = [ + ":willowc_lib", + ], visibility = ["//visibility:public"], ) diff --git a/willow/tools/willowc/include/compiler.hpp b/willow/tools/willowc/include/compiler.hpp new file mode 100644 index 0000000..96ca480 --- /dev/null +++ b/willow/tools/willowc/include/compiler.hpp @@ -0,0 +1,40 @@ +#ifndef WILLOWC_INCLUDE_COMPILER_HPP +#define WILLOWC_INCLUDE_COMPILER_HPP + +#include <sourcemanager.hpp> + +#include <willow/IR/DiagnosticEngine.h> +#include <string> + +namespace willowc { + +class Compiler { +public: + struct Options { + std::string filename; + bool use_stdin; + willow::Severity log_level; + }; + Compiler(); + Compiler(const Compiler &) = delete; + Compiler &operator=(const Compiler &) = delete; + + willow::LogicalResult addSourceFile(const std::string &path); + willow::LogicalResult addStdIn(); + // TODO + void run(); + void compile(FileID file); + + void setLogLevel(willow::Severity sev) { log_level_ = sev; } + size_t numFiles() { return sourcemanager_.numFiles(); } +private: + SourceManager sourcemanager_; + willow::Severity log_level_; + willow::DiagnosticEngine diagnostic_engine_; + + void emitDiagnostic(const willow::Diagnostic &d); +}; + +}; // namespace willowc + +#endif // WILLOWC_INCLUDE_COMPILER_HPP diff --git a/willow/tools/willowc/include/driver.hpp b/willow/tools/willowc/include/driver.hpp new file mode 100644 index 0000000..028adc7 --- /dev/null +++ b/willow/tools/willowc/include/driver.hpp @@ -0,0 +1,10 @@ +#ifndef WILLOWC_INCLUDE_DRIVER_HPP +#define WILLOWC_INCLUDE_DRIVER_HPP + +namespace willowc { + +int willowc_main(int argc, char **argv); + +} + +#endif // WILLOWC_INCLUDE_DRIVER_HPP diff --git a/willow/tools/willowc/include/parser.hpp b/willow/tools/willowc/include/parser.hpp index 825dfdd..59f6bfb 100644 --- a/willow/tools/willowc/include/parser.hpp +++ b/willow/tools/willowc/include/parser.hpp @@ -1,31 +1,39 @@ #ifndef WILLOWC_INCLUDE_PARSER_HPP #define WILLOWC_INCLUDE_PARSER_HPP -#include <tokenizer.hpp> #include <ast.hpp> +#include <sourcemanager.hpp> +#include <tokenizer.hpp> + +#include <willow/IR/DiagnosticEngine.h> -#include <optional> #include <memory> +#include <optional> #include <vector> namespace willowc { class Parser { - std::string_view buf; +public: + Parser(SourceManager::File &f, willow::DiagnosticEngine &diagnostic_engine) + : file_(f), tokenizer_(f.buf.get()), + diagnostic_engine_(diagnostic_engine) {} - std::vector<TokenKind> kinds; - std::vector<std::size_t> starts; - Tokenizer tokenizer; + std::optional<std::unique_ptr<ModuleAST>> run(); - std::size_t pos; + TokenKind kind() const { return kinds_[pos_]; } + std::size_t start() const { return starts_[pos_]; } -public: - Parser(std::string_view buf) : buf(buf), tokenizer(buf) {} +private: + willow::DiagnosticBuilder emitParserError(Token t, willow::Severity severity); - std::optional<std::unique_ptr<ModuleAST>> parse(); + SourceManager::File &file_; + std::vector<TokenKind> kinds_; + std::vector<std::size_t> starts_; + Tokenizer tokenizer_; + std::size_t pos_; + willow::DiagnosticEngine &diagnostic_engine_; - TokenKind kind() const { return kinds[pos]; } - std::size_t start() const { return starts[pos]; } }; } // namespace willowc diff --git a/willow/tools/willowc/include/sourcemanager.hpp b/willow/tools/willowc/include/sourcemanager.hpp index a526e48..25d4128 100644 --- a/willow/tools/willowc/include/sourcemanager.hpp +++ b/willow/tools/willowc/include/sourcemanager.hpp @@ -1,24 +1,59 @@ #ifndef WILLOWC_INCLUDE_SOURCEMANAGER_HPP #define WILLOWC_INCLUDE_SOURCEMANAGER_HPP -#include <filesystem> #include <memory> #include <string> +#include <unordered_map> +#include <map> #include <vector> +#include <optional> + +#include <willow/IR/Location.h> namespace willowc { using FileID = std::uint32_t; +class LineCache { +public: +private: + std::unordered_map<std::size_t, std::size_t> cache_; +}; + class SourceManager { +public: struct File { std::string path; + // TODO: remove this, but find something better than using the abs path + std::string display_path; std::unique_ptr<char[]> buf; + size_t size; + + willow::Location getLoc(std::size_t offset); + std::map<std::size_t, uint32_t> linecache_; + + File(std::string path, std::string display_path, std::unique_ptr<char[]> buf, + size_t size) + : path(std::move(path)), display_path(std::move(display_path)), + buf(std::move(buf)), size(size), linecache_{{{0, 1}}} {} }; -public: + std::optional<FileID> addFile(std::string_view path); + std::optional<FileID> addStdIn(); + std::optional<FileID> getFileID(const std::string& path); + std::string_view getBuf(FileID file) const { + const File &f = file_table.at(file); + return std::string_view(f.buf.get(), f.size); + } + File &getFile(FileID id) { return file_table.at(id); } + + const std::vector<File>& files() const { return file_table; } + std::vector<File>& files() { return file_table; } + + size_t numFiles() { return file_table.size(); } private: std::vector<File> file_table; + std::unordered_map<std::string, FileID> ids; }; } // namespace willowc diff --git a/willow/tools/willowc/include/tokenizer.hpp b/willow/tools/willowc/include/tokenizer.hpp index 3de9d32..47577ab 100644 --- a/willow/tools/willowc/include/tokenizer.hpp +++ b/willow/tools/willowc/include/tokenizer.hpp @@ -1,6 +1,7 @@ #ifndef WILLOWC_INCLUDE_TOKENIZER_HPP #define WILLOWC_INCLUDE_TOKENIZER_HPP +#include <utility> #include <willow/IR/Location.h> namespace willowc { @@ -14,6 +15,7 @@ enum class TokenKind { Inst, Comma, + Colon, Semicolon, LParen, RParen, @@ -37,6 +39,8 @@ class Tokenizer { std::string_view buf; std::size_t offset; + friend class Parser; + void skip(std::size_t idx = 1) { offset += idx; } char eat(std::size_t num = 1) { @@ -55,6 +59,8 @@ class Tokenizer { return buf[offset + idx]; } + void recover(); + bool scan_id(bool accept_digits); bool scan_dec(); bool scan_hex(); @@ -67,6 +73,58 @@ public: void seek(uint64_t offset); }; +constexpr std::string_view TokenKindName(TokenKind t) { + switch (t) { + case TokenKind::Function: + return "Function"; + case TokenKind::Variable: + return "Variable"; + case TokenKind::Constant: + return "Constant"; + case TokenKind::Type: + return "Type"; + case TokenKind::Label: + return "Label"; + case TokenKind::Inst: + return "Inst"; + case TokenKind::Comma: + return "Comma"; + case TokenKind::Colon: + return "Colon"; + case TokenKind::Semicolon: + return "Semicolon"; + case TokenKind::LParen: + return "LParen"; + case TokenKind::RParen: + return "RParen"; + case TokenKind::LCurly: + return "LCurly"; + case TokenKind::RCurly: + return "RCurly"; + case TokenKind::Equals: + return "Equals"; + case TokenKind::RArrow: + return "RArrow"; + case TokenKind::Comment: + return "Comment"; + case TokenKind::FuncKW: + return "FuncKW"; + case TokenKind::Eof: + return "Eof"; + case TokenKind::Invalid: + return "Invalid"; + } + std::unreachable(); +} + } // namespace willowc +template <> +struct std::formatter<willowc::TokenKind> { + constexpr auto parse(std::format_parse_context &ctx) { return ctx.begin(); } + constexpr auto format(const willowc::TokenKind t, std::format_context &ctx) const { + return std::format_to(ctx.out(), "{}", TokenKindName(t)); + } +}; + #endif // WILLOWC_INCLUDE_TOKENIZER_HPP diff --git a/willow/tools/willowc/lib/compiler.cpp b/willow/tools/willowc/lib/compiler.cpp new file mode 100644 index 0000000..b933f65 --- /dev/null +++ b/willow/tools/willowc/lib/compiler.cpp @@ -0,0 +1,66 @@ +#include <compiler.hpp> + +#include <willow/IR/Diagnostic.h> +#include <willow/IR/Location.h> +#include <willow/Util/Color.h> + +#include <parser.hpp> + +#include <iostream> +#include <print> + +namespace willowc { + +Compiler::Compiler() + : sourcemanager_(), log_level_(willow::Severity::Error), + diagnostic_engine_([this](const willow::Diagnostic &d) { emitDiagnostic(d); }) {} + +void Compiler::run() { + assert(sourcemanager_.numFiles() == 1); + Compiler::compile(0); +} + +void Compiler::compile(FileID file) { + Parser parser{sourcemanager_.getFile(file), diagnostic_engine_}; + + auto x = parser.run(); +} + +void Compiler::emitDiagnostic(const willow::Diagnostic &d) { + using namespace willow::termcolor; + if (log_level_ > d.severity) + return; + + if (d.location) { + std::print(std::cerr, "{}{}: ", TextStyle{AnsiColor::None, Emphasis::Bold}, + d.location.value()); + } + + std::print(std::cerr, "{}{}: {}{}{}\n", willow::getSeverityColor(d.severity), + willow::getSeverityName(d.severity), + TextStyle{AnsiColor::Default, Emphasis::Bold}, d.message, TextStyle{}); + // TODO: trace +} + +willow::LogicalResult Compiler::addSourceFile(const std::string &path) { + std::optional<FileID> maybe_source_file = sourcemanager_.addFile(path); + if (!maybe_source_file) { + std::println(std::cerr, "error: failed to open input file '{}'", path); + return willow::failure(); + } + + return willow::success(); +} + +willow::LogicalResult Compiler::addStdIn() { + std::optional<FileID> maybestdin = sourcemanager_.addStdIn(); + + if (!maybestdin) { + std::println(std::cerr, "error: failed to read from stdin"); + return willow::failure(); + } + + return willow::success(); +} + +}; // namespace willowc diff --git a/willow/tools/willowc/lib/driver.cpp b/willow/tools/willowc/lib/driver.cpp index e69de29..1962da4 100644 --- a/willow/tools/willowc/lib/driver.cpp +++ b/willow/tools/willowc/lib/driver.cpp @@ -0,0 +1,76 @@ +#include <print> +#include <compiler.hpp> +#include <driver.hpp> + +#include <argparse/argparse.hpp> + +namespace willowc { + +int willowc_main(int argc, char **argv) { + argparse::ArgumentParser cl("willowc"); + cl.set_prefix_chars("-"); + cl.set_assign_chars("="); + + willowc::Compiler compiler; + + auto &ll = cl.add_mutually_exclusive_group(); + ll.add_argument("-Lerror") + .help("only emit diagnostics on error (default)") + .default_value(true) + .implicit_value(true) + .action( + [&](const auto &) { compiler.setLogLevel(willow::Severity::Error); }); + ll.add_argument("-Lwarn") + .help("emit warning messages") + .default_value(false) + .implicit_value(true) + .action([&](const auto &) { + compiler.setLogLevel(willow::Severity::Warning); + }); + ll.add_argument("-Linfo") + .help("emit remarks") + .default_value(false) + .implicit_value(true) + .action([&](const auto &) { + compiler.setLogLevel(willow::Severity::Remark); + }); + ll.add_argument("-Ldebug") + .help("emit all diagnostics") + .default_value(false) + .implicit_value(true) + .action( + [&](const auto &) { compiler.setLogLevel(willow::Severity::Debug); }); + + cl.add_argument("<file>").help("input file path").required().nargs(1); + + if (argc < 2) { + std::cerr << cl; + return 1; + } + + try { + cl.parse_args(argc, argv); + } catch (std::exception &e) { + std::println("error: {}", e.what()); + } + + std::string f; + try { + f = cl.get("<file>"); + } catch (std::exception &e) { + std::println("error: {}", e.what()); + return 1; + } + + if (f == "-") { + if (willow::failed(compiler.addStdIn())) + return 1; + } else if (willow::failed(compiler.addSourceFile(f))) + return 1; + + compiler.run(); + + return 0; +} + +} // namespace willowc diff --git a/willow/tools/willowc/lib/parser.cpp b/willow/tools/willowc/lib/parser.cpp index becc171..e658909 100644 --- a/willow/tools/willowc/lib/parser.cpp +++ b/willow/tools/willowc/lib/parser.cpp @@ -2,8 +2,27 @@ namespace willowc { -std::optional<std::unique_ptr<ModuleAST>> parse() { +std::optional<std::unique_ptr<ModuleAST>> Parser::run() { + bool failed = false; + while (true) { + Token t = tokenizer_.scan(); + if (t.kind == TokenKind::Eof) { + willow::emit(diagnostic_engine_, willow::Severity::Debug) + << std::format("[ <eof> :: {} : ({}, {}) ]", t.kind, t.start, t.end); + break; + } + willow::emit(diagnostic_engine_, willow::Severity::Debug) + << std::format("[ '{}' :: {} : ({}, {}) ]", + tokenizer_.buf.substr(t.start, t.end - t.start), t.kind, + t.start, t.end); + } + return std::nullopt; } +willow::DiagnosticBuilder Parser::emitParserError(Token t, willow::Severity severity) { + willow::Location loc = file_.getLoc(t.start); + return willow::DiagnosticBuilder(diagnostic_engine_, severity, loc); } + +} // namespace willowc diff --git a/willow/tools/willowc/lib/sourcemanager.cpp b/willow/tools/willowc/lib/sourcemanager.cpp index e2a8e72..a30c76e 100644 --- a/willow/tools/willowc/lib/sourcemanager.cpp +++ b/willow/tools/willowc/lib/sourcemanager.cpp @@ -1,6 +1,9 @@ +#include <cassert> #include <filesystem> - #include <fstream> +#include <iostream> +#include <string_view> + #include <sourcemanager.hpp> namespace willowc { @@ -11,9 +14,14 @@ std::optional<FileID> SourceManager::addFile(std::string_view _path) { std::filesystem::path uncanonical_path{_path}; auto path = std::filesystem::weakly_canonical(uncanonical_path, ec); if (ec) { - return false; + return std::nullopt; } - std::string display_path = path.make_preferred(); + + std::filesystem::path display_path = path; + display_path.make_preferred(); + std::string display = display_path.string(); + + assert(!getFileID(display)); if (!std::filesystem::exists(path, ec) || ec) return std::nullopt; @@ -33,9 +41,68 @@ std::optional<FileID> SourceManager::addFile(std::string_view _path) { f.read(buf.get(), filesize); const FileID id = file_table.size(); - file_table.push_back(File{std::move(display_path), std::move(buf)}); + file_table.emplace_back(std::move(display), std::string(_path), std::move(buf), filesize); + ids[file_table.back().path] = id; return id; } +std::optional<FileID> SourceManager::getFileID(const std::string &path) { + auto it = ids.find(path); + + if (it != ids.end()) + return it->second; + + return std::nullopt; +} + +std::optional<FileID> SourceManager::addStdIn() { + std::string content{std::istreambuf_iterator<char>(std::cin), + std::istreambuf_iterator<char>()}; + + if (std::cin.bad()) + return std::nullopt; + + std::size_t size = content.size(); + auto buf = std::make_unique<char[]>(size); + const FileID id = file_table.size(); + file_table.emplace_back("<stdin>", "<stdin>", std::move(buf), size); + ids[file_table.back().path] = id; + return id; +} + +willow::Location SourceManager::File::getLoc(std::size_t offset) { + size_t line_start = offset; + while (line_start != 0) { + if (this->buf[--line_start] != '\n') + continue; + + line_start++; + break; + } + + uint32_t col = offset - line_start + 1; + auto it = linecache_.find(line_start); + + if (it != linecache_.end()) + return willow::Location{display_path, it->second, col, offset}; + + auto back = linecache_.rbegin(); + auto i = back->first; + auto line = back->second; + assert(i < line_start); + for (; i < line_start; i++) { + if (buf[i] == '\n') { + line = line + 1; + size_t next_start = i + 1; + + if (next_start <= size) + linecache_.insert({next_start, ++line}); + } + } + linecache_.insert({line_start, line}); + return willow::Location{ + display_path, line, static_cast<uint32_t>(offset - line_start + 1), offset}; +} + } // namespace willowc diff --git a/willow/tools/willowc/lib/tokenizer.cpp b/willow/tools/willowc/lib/tokenizer.cpp index 0c1f917..7ad28a6 100644 --- a/willow/tools/willowc/lib/tokenizer.cpp +++ b/willow/tools/willowc/lib/tokenizer.cpp @@ -38,11 +38,11 @@ bool Tokenizer::scan_id(bool accept_digits = true) { } Token Tokenizer::scan() { - std::size_t start = this->offset; - - while (isspace(peek())) + while (is_space(peek())) skip(); + std::size_t start = this->offset; + TokenKind k = [&] { switch (peek()) { case '@': @@ -63,6 +63,9 @@ Token Tokenizer::scan() { case ',': skip(); return TokenKind::Comma; + case ':': + skip(); + return TokenKind::Colon; case ';': skip(); return TokenKind::Semicolon; @@ -99,9 +102,8 @@ Token Tokenizer::scan() { return TokenKind::Invalid; skip(); - char c = eat(); - while (c != '\0' && c != '\n') - c = eat(); + while (peek() != '\0' && peek() != '\n') + skip(); return TokenKind::Comment; } @@ -124,12 +126,20 @@ Token Tokenizer::scan() { return TokenKind::Inst; } + skip(); return TokenKind::Invalid; } } }(); - return Token{start, offset, k}; + Token t{start, offset, k}; + if (t.kind == TokenKind::Invalid) { + if (t.start == t.end) + t.end++; + recover(); + } + + return t; } bool Tokenizer::scan_dec() { @@ -173,4 +183,47 @@ bool Tokenizer::scan_constant() { return false; } +void Tokenizer::recover() { + auto is_boundary = [&](char c) { + switch (c) { + case ' ': + [[fallthrough]]; + case '\n': + [[fallthrough]]; + case '\t': + [[fallthrough]]; + case '\\': + [[fallthrough]]; + case ',': + [[fallthrough]]; + case '%': + [[fallthrough]]; + case '@': + [[fallthrough]]; + case ':': + [[fallthrough]]; + case ';': + [[fallthrough]]; + case '(': + [[fallthrough]]; + case ')': + [[fallthrough]]; + case '{': + [[fallthrough]]; + case '}': + [[fallthrough]]; + case '=': + [[fallthrough]]; + case '\0': + return true; + default: + return false; + } + }; + + while (!is_boundary(peek())) { + skip(); + } +} + } // namespace willowc diff --git a/willow/tools/willowc/main.cpp b/willow/tools/willowc/main.cpp index 237c8ce..e5049a7 100644 --- a/willow/tools/willowc/main.cpp +++ b/willow/tools/willowc/main.cpp @@ -1 +1,8 @@ -int main() {} +#include <compiler.hpp> +#include <driver.hpp> + +int main(int argc, char **argv) { + willowc::willowc_main(argc, argv); + + return 0; +} |