summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Weigl-Bosker <stefan@s00.xyz>2026-02-23 22:18:22 -0500
committerGitHub <noreply@github.com>2026-02-23 22:18:22 -0500
commit8f98dc579af1993ec85bd849656c4835b4039dd6 (patch)
tree3ee45620d83b209c1c11248afc9ab83ffcf39691
parentc2d4209f85f46cc91163bc47cc43db252c94acf6 (diff)
downloadcompiler-8f98dc579af1993ec85bd849656c4835b4039dd6.tar.gz
[willow]: frontend plumbing (#13)
...
-rw-r--r--.bazelversion1
-rw-r--r--BUILD.bazel1
-rw-r--r--MODULE.bazel2
-rw-r--r--MODULE.bazel.lock2
-rw-r--r--nix/flake.nix3
-rw-r--r--willow/include/willow/IR/Diagnostic.h34
-rw-r--r--willow/include/willow/IR/Location.h5
-rw-r--r--willow/include/willow/Util/Color.h219
-rw-r--r--willow/tools/willowc/BUILD.bazel9
-rw-r--r--willow/tools/willowc/include/compiler.hpp40
-rw-r--r--willow/tools/willowc/include/driver.hpp10
-rw-r--r--willow/tools/willowc/include/parser.hpp32
-rw-r--r--willow/tools/willowc/include/sourcemanager.hpp39
-rw-r--r--willow/tools/willowc/include/tokenizer.hpp58
-rw-r--r--willow/tools/willowc/lib/compiler.cpp66
-rw-r--r--willow/tools/willowc/lib/driver.cpp76
-rw-r--r--willow/tools/willowc/lib/parser.cpp21
-rw-r--r--willow/tools/willowc/lib/sourcemanager.cpp75
-rw-r--r--willow/tools/willowc/lib/tokenizer.cpp67
-rw-r--r--willow/tools/willowc/main.cpp9
20 files changed, 734 insertions, 35 deletions
diff --git a/.bazelversion b/.bazelversion
new file mode 100644
index 0000000..e7fdef7
--- /dev/null
+++ b/.bazelversion
@@ -0,0 +1 @@
+8.4.2
diff --git a/BUILD.bazel b/BUILD.bazel
index 59c5d8f..60088b1 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -6,6 +6,5 @@ refresh_compile_commands(
targets = {
"//willow:willow": "",
"//willow:willowc": "",
- "//willow/unittest/ir:verifier": "",
},
)
diff --git a/MODULE.bazel b/MODULE.bazel
index a42cd9e..914ce88 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -11,7 +11,7 @@ git_override(
commit = "abb61a688167623088f8768cc9264798df6a9d10",
)
bazel_dep(name = "catch2", version = "3.12.0")
-
+bazel_dep(name = "argparse", version = "3.2.0")
bazel_dep(name = "willow", version = "0.0.1")
local_path_override(
diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock
index 9883741..2dc0b66 100644
--- a/MODULE.bazel.lock
+++ b/MODULE.bazel.lock
@@ -10,6 +10,8 @@
"https://bcr.bazel.build/modules/abseil-cpp/20230802.1/MODULE.bazel": "fa92e2eb41a04df73cdabeec37107316f7e5272650f81d6cc096418fe647b915",
"https://bcr.bazel.build/modules/abseil-cpp/20240116.1/MODULE.bazel": "37bcdb4440fbb61df6a1c296ae01b327f19e9bb521f9b8e26ec854b6f97309ed",
"https://bcr.bazel.build/modules/abseil-cpp/20240116.1/source.json": "9be551b8d4e3ef76875c0d744b5d6a504a27e3ae67bc6b28f46415fd2d2957da",
+ "https://bcr.bazel.build/modules/argparse/3.2.0/MODULE.bazel": "74f2686fba48acea8164eb256e0f1d61230254c2bc042c9269a2d48047a6d2eb",
+ "https://bcr.bazel.build/modules/argparse/3.2.0/source.json": "6c5c6818f5d79475dd3346822f96047696ccb4fdf5f4d03c594607df900e0da4",
"https://bcr.bazel.build/modules/bazel_features/1.1.1/MODULE.bazel": "27b8c79ef57efe08efccbd9dd6ef70d61b4798320b8d3c134fd571f78963dbcd",
"https://bcr.bazel.build/modules/bazel_features/1.11.0/MODULE.bazel": "f9382337dd5a474c3b7d334c2f83e50b6eaedc284253334cf823044a26de03e8",
"https://bcr.bazel.build/modules/bazel_features/1.15.0/MODULE.bazel": "d38ff6e517149dc509406aca0db3ad1efdd890a85e049585b7234d04238e2a4d",
diff --git a/nix/flake.nix b/nix/flake.nix
index fa570f3..6d151c6 100644
--- a/nix/flake.nix
+++ b/nix/flake.nix
@@ -26,8 +26,7 @@
devShells.default = pkgs.mkShell {
name = "compiler stuff";
packages = with pkgs; [
- # clang-tools
- # bazelisk
+ argparse
bazel_8
bazel-buildtools
nix
diff --git a/willow/include/willow/IR/Diagnostic.h b/willow/include/willow/IR/Diagnostic.h
index 68483d9..137c537 100644
--- a/willow/include/willow/IR/Diagnostic.h
+++ b/willow/include/willow/IR/Diagnostic.h
@@ -2,10 +2,12 @@
#define WILLOW_INCLUDE_IR_DIAGNOSTIC_H
#include <willow/IR/Location.h>
+#include <willow/Util/Color.h>
#include <string>
#include <vector>
#include <ostream>
+#include <utility>
namespace willow {
@@ -18,6 +20,38 @@ struct Diagnostic {
std::vector<Diagnostic> notes;
};
+constexpr termcolor::TextStyle getSeverityColor(Severity sev) {
+ using namespace termcolor;
+ switch (sev) {
+ case Severity::Debug:
+ return TextStyle{AnsiColor::Green, Emphasis::Bold};
+ case Severity::Remark:
+ return TextStyle{AnsiColor::Cyan, Emphasis::Bold};
+ case Severity::Warning:
+ return TextStyle{AnsiColor::Magenta, Emphasis::Bold};
+ case Severity::Error:
+ return TextStyle{AnsiColor::Red, Emphasis::Bold};
+ default:
+ std::unreachable();
+ }
+}
+
+constexpr std::string_view getSeverityName(Severity sev) {
+ using namespace std::string_view_literals;
+ switch (sev) {
+ case Severity::Debug:
+ return "debug"sv;
+ case Severity::Remark:
+ return "info"sv;
+ case Severity::Warning:
+ return "warning"sv;
+ case Severity::Error:
+ return "error"sv;
+ default:
+ std::unreachable();
+ }
+}
+
} // namespace willow
std::ostream &operator<<(std::ostream &os, const willow::Diagnostic &diagnostic);
diff --git a/willow/include/willow/IR/Location.h b/willow/include/willow/IR/Location.h
index c3c241d..0db7c58 100644
--- a/willow/include/willow/IR/Location.h
+++ b/willow/include/willow/IR/Location.h
@@ -9,8 +9,9 @@ namespace willow {
/// A source location
struct Location {
std::string_view filename;
- int line;
- int col;
+ uint32_t line;
+ uint32_t col;
+ size_t offset;
};
} // namespace willow
diff --git a/willow/include/willow/Util/Color.h b/willow/include/willow/Util/Color.h
new file mode 100644
index 0000000..d7c7840
--- /dev/null
+++ b/willow/include/willow/Util/Color.h
@@ -0,0 +1,219 @@
+#ifndef WILLOW_INCLUDE_UTIL_COLOR_H
+#define WILLOW_INCLUDE_UTIL_COLOR_H
+
+#include <cassert>
+#include <cstddef>
+#include <format>
+#include <stdint.h>
+#include <string_view>
+
+/// \file Utilities for printing pretty terminal output
+///
+/// This is heavily inspired by fmtlib include/fmt/color.h
+
+namespace willow {
+
+namespace termcolor {
+
+enum class AnsiColor : uint8_t {
+ None = 0,
+ Black = 30,
+ Red,
+ Green,
+ Yellow,
+ Blue,
+ Magenta,
+ Cyan,
+ White,
+ Default = 39,
+ BrightBlack = 90,
+ BrightRed,
+ BrightGreen,
+ BrightYellow,
+ BrightBlue,
+ BrightMagenta,
+ BrightCyan,
+ BrightWhite
+};
+
+enum class Emphasis : uint8_t {
+ None = 0,
+ Bold = 1,
+ Faint = 1 << 1,
+ Italic = 1 << 2,
+ Underline = 1 << 3,
+ Blink = 1 << 4,
+ Reverse = 1 << 5,
+ Conceal = 1 << 6,
+ Strikethrough = 1 << 7,
+};
+
+constexpr Emphasis operator|(Emphasis a, Emphasis b) noexcept {
+ return static_cast<Emphasis>(static_cast<uint8_t>(a) |
+ static_cast<uint8_t>(b));
+}
+constexpr Emphasis &operator|=(Emphasis &a, Emphasis b) noexcept {
+ return a = (a | b);
+}
+constexpr bool has(Emphasis a, Emphasis b) {
+ return (static_cast<uint8_t>(a) & static_cast<uint8_t>(b)) != 0;
+}
+
+struct TextStyle {
+ AnsiColor fg = AnsiColor::None;
+ AnsiColor bg = AnsiColor::None;
+ Emphasis emph = Emphasis::None;
+
+ explicit constexpr TextStyle() = default;
+ constexpr TextStyle(AnsiColor fg, AnsiColor bg = AnsiColor::None,
+ Emphasis emph = Emphasis::None) noexcept
+ : fg(fg), bg(bg) {}
+ constexpr TextStyle(AnsiColor fg, Emphasis emph = Emphasis::None) noexcept
+ : fg(fg), emph(emph) {}
+
+ constexpr bool has_foreground() const noexcept {
+ return fg != AnsiColor::None;
+ }
+
+ constexpr bool has_background() const noexcept {
+ return bg != AnsiColor::None;
+ }
+
+ constexpr bool has_emphasis() const noexcept {
+ return emph != Emphasis::None;
+ }
+
+ // special sentinel value for a full reset
+ constexpr bool is_reset() const noexcept {
+ return fg == AnsiColor::None && bg == AnsiColor::None &&
+ emph == Emphasis::None;
+ }
+};
+
+template <typename CharT>
+struct AnsiColorEscape {
+ constexpr AnsiColorEscape(AnsiColor text_color, bool is_bg = false) noexcept {
+ if (is_bg)
+ text_color =
+ static_cast<AnsiColor>(static_cast<uint8_t>(text_color) + 10);
+
+ buffer[size++] = static_cast<CharT>('\x1b');
+ buffer[size++] = static_cast<CharT>('[');
+
+ uint8_t value = static_cast<uint8_t>(text_color);
+
+ // some bg colors need 3 digits
+ if (value >= 100u) {
+ buffer[size++] = static_cast<CharT>('1');
+ value %= 100u;
+ }
+ buffer[size++] = static_cast<CharT>('0' + value / 10u);
+ buffer[size++] = static_cast<CharT>('0' + value % 10u);
+
+ buffer[size++] = static_cast<CharT>('m');
+ }
+
+ constexpr AnsiColorEscape(Emphasis em) noexcept {
+ uint8_t em_codes[NUM_EMPHASES] = {};
+ if (has_emphasis(em, Emphasis::Bold))
+ em_codes[0] = 1;
+ if (has_emphasis(em, Emphasis::Faint))
+ em_codes[1] = 2;
+ if (has_emphasis(em, Emphasis::Italic))
+ em_codes[2] = 3;
+ if (has_emphasis(em, Emphasis::Underline))
+ em_codes[3] = 4;
+ if (has_emphasis(em, Emphasis::Blink))
+ em_codes[4] = 5;
+ if (has_emphasis(em, Emphasis::Reverse))
+ em_codes[5] = 7;
+ if (has_emphasis(em, Emphasis::Conceal))
+ em_codes[6] = 8;
+ if (has_emphasis(em, Emphasis::Strikethrough))
+ em_codes[7] = 9;
+
+ buffer[size++] = static_cast<CharT>('\x1b');
+ buffer[size++] = static_cast<CharT>('[');
+
+ for (unsigned char em_code : em_codes) {
+ if (!em_code)
+ continue;
+ buffer[size++] = static_cast<CharT>('0' + em_code);
+ buffer[size++] = static_cast<CharT>(';');
+ }
+
+ buffer[size - 1] = static_cast<CharT>('m');
+ }
+
+ constexpr operator const char *() const noexcept { return buffer; }
+ constexpr operator std::string_view() const noexcept {
+ return std::string_view(buffer, size);
+ }
+
+ constexpr const CharT *begin() const noexcept { return buffer; }
+ constexpr const CharT *end() const noexcept { return buffer + size; }
+
+private:
+ static constexpr size_t NUM_EMPHASES = 8;
+ CharT buffer[7u + 4u * NUM_EMPHASES];
+ size_t size = 0;
+
+ static constexpr bool has_emphasis(Emphasis em, Emphasis mask) noexcept {
+ return static_cast<uint8_t>(em) & static_cast<uint8_t>(mask);
+ }
+};
+
+template <typename CharT>
+AnsiColorEscape<CharT> make_foreground_color(AnsiColor c) {
+ return AnsiColorEscape<CharT>(c);
+}
+
+template <typename CharT>
+AnsiColorEscape<CharT> make_background_color(AnsiColor c) {
+ return AnsiColorEscape<CharT>(c, true);
+}
+
+template <typename CharT>
+AnsiColorEscape<CharT> make_emphasis(Emphasis c) {
+ return AnsiColorEscape<CharT>(c);
+}
+
+}; // namespace termcolor
+
+}; // namespace willow
+
+template <typename CharT>
+struct std::formatter<willow::termcolor::TextStyle, CharT> {
+ constexpr auto parse(const std::format_parse_context &ctx) {
+ auto it = ctx.begin();
+ assert(it == ctx.end() || *it == '}');
+ return it;
+ }
+ template <class FormatContext>
+ auto format(const willow::termcolor::TextStyle &style,
+ FormatContext &ctx) const {
+ auto out = ctx.out();
+ if (style.is_reset())
+ return std::format_to(out, "{}", "\x1b[0m");
+
+ if (style.has_emphasis()) {
+ auto emphasis = willow::termcolor::make_emphasis<CharT>(style.emph);
+ out = std::format_to(
+ out, "{}", static_cast<std::basic_string_view<CharT>>(emphasis));
+ }
+ if (style.has_foreground()) {
+ auto fg = willow::termcolor::make_foreground_color<CharT>(style.fg);
+ out = std::format_to(out, "{}",
+ static_cast<std::basic_string_view<CharT>>(fg));
+ }
+ if (style.has_background()) {
+ auto bg = willow::termcolor::make_background_color<CharT>(style.bg);
+ out = std::format_to(out, "{}",
+ static_cast<std::basic_string_view<CharT>>(bg));
+ }
+
+ return out;
+ }
+};
+
+#endif // WILLOW_INCLUDE_UTIL_COLOR_H
diff --git a/willow/tools/willowc/BUILD.bazel b/willow/tools/willowc/BUILD.bazel
index 708de13..01e5c56 100644
--- a/willow/tools/willowc/BUILD.bazel
+++ b/willow/tools/willowc/BUILD.bazel
@@ -12,7 +12,10 @@ cc_library(
"-std=c++23",
"-Wall",
],
- deps = ["//willow"],
+ deps = [
+ "//willow",
+ "@argparse//:argparse"
+ ],
strip_include_prefix = "include",
visibility = ["//visibility:public"],
)
@@ -22,6 +25,8 @@ cc_binary(
srcs = [
"main.cpp",
],
- deps = [":willowc_lib"],
+ deps = [
+ ":willowc_lib",
+ ],
visibility = ["//visibility:public"],
)
diff --git a/willow/tools/willowc/include/compiler.hpp b/willow/tools/willowc/include/compiler.hpp
new file mode 100644
index 0000000..96ca480
--- /dev/null
+++ b/willow/tools/willowc/include/compiler.hpp
@@ -0,0 +1,40 @@
+#ifndef WILLOWC_INCLUDE_COMPILER_HPP
+#define WILLOWC_INCLUDE_COMPILER_HPP
+
+#include <sourcemanager.hpp>
+
+#include <willow/IR/DiagnosticEngine.h>
+#include <string>
+
+namespace willowc {
+
+class Compiler {
+public:
+ struct Options {
+ std::string filename;
+ bool use_stdin;
+ willow::Severity log_level;
+ };
+ Compiler();
+ Compiler(const Compiler &) = delete;
+ Compiler &operator=(const Compiler &) = delete;
+
+ willow::LogicalResult addSourceFile(const std::string &path);
+ willow::LogicalResult addStdIn();
+ // TODO
+ void run();
+ void compile(FileID file);
+
+ void setLogLevel(willow::Severity sev) { log_level_ = sev; }
+ size_t numFiles() { return sourcemanager_.numFiles(); }
+private:
+ SourceManager sourcemanager_;
+ willow::Severity log_level_;
+ willow::DiagnosticEngine diagnostic_engine_;
+
+ void emitDiagnostic(const willow::Diagnostic &d);
+};
+
+}; // namespace willowc
+
+#endif // WILLOWC_INCLUDE_COMPILER_HPP
diff --git a/willow/tools/willowc/include/driver.hpp b/willow/tools/willowc/include/driver.hpp
new file mode 100644
index 0000000..028adc7
--- /dev/null
+++ b/willow/tools/willowc/include/driver.hpp
@@ -0,0 +1,10 @@
+#ifndef WILLOWC_INCLUDE_DRIVER_HPP
+#define WILLOWC_INCLUDE_DRIVER_HPP
+
+namespace willowc {
+
+int willowc_main(int argc, char **argv);
+
+}
+
+#endif // WILLOWC_INCLUDE_DRIVER_HPP
diff --git a/willow/tools/willowc/include/parser.hpp b/willow/tools/willowc/include/parser.hpp
index 825dfdd..59f6bfb 100644
--- a/willow/tools/willowc/include/parser.hpp
+++ b/willow/tools/willowc/include/parser.hpp
@@ -1,31 +1,39 @@
#ifndef WILLOWC_INCLUDE_PARSER_HPP
#define WILLOWC_INCLUDE_PARSER_HPP
-#include <tokenizer.hpp>
#include <ast.hpp>
+#include <sourcemanager.hpp>
+#include <tokenizer.hpp>
+
+#include <willow/IR/DiagnosticEngine.h>
-#include <optional>
#include <memory>
+#include <optional>
#include <vector>
namespace willowc {
class Parser {
- std::string_view buf;
+public:
+ Parser(SourceManager::File &f, willow::DiagnosticEngine &diagnostic_engine)
+ : file_(f), tokenizer_(f.buf.get()),
+ diagnostic_engine_(diagnostic_engine) {}
- std::vector<TokenKind> kinds;
- std::vector<std::size_t> starts;
- Tokenizer tokenizer;
+ std::optional<std::unique_ptr<ModuleAST>> run();
- std::size_t pos;
+ TokenKind kind() const { return kinds_[pos_]; }
+ std::size_t start() const { return starts_[pos_]; }
-public:
- Parser(std::string_view buf) : buf(buf), tokenizer(buf) {}
+private:
+ willow::DiagnosticBuilder emitParserError(Token t, willow::Severity severity);
- std::optional<std::unique_ptr<ModuleAST>> parse();
+ SourceManager::File &file_;
+ std::vector<TokenKind> kinds_;
+ std::vector<std::size_t> starts_;
+ Tokenizer tokenizer_;
+ std::size_t pos_;
+ willow::DiagnosticEngine &diagnostic_engine_;
- TokenKind kind() const { return kinds[pos]; }
- std::size_t start() const { return starts[pos]; }
};
} // namespace willowc
diff --git a/willow/tools/willowc/include/sourcemanager.hpp b/willow/tools/willowc/include/sourcemanager.hpp
index a526e48..25d4128 100644
--- a/willow/tools/willowc/include/sourcemanager.hpp
+++ b/willow/tools/willowc/include/sourcemanager.hpp
@@ -1,24 +1,59 @@
#ifndef WILLOWC_INCLUDE_SOURCEMANAGER_HPP
#define WILLOWC_INCLUDE_SOURCEMANAGER_HPP
-#include <filesystem>
#include <memory>
#include <string>
+#include <unordered_map>
+#include <map>
#include <vector>
+#include <optional>
+
+#include <willow/IR/Location.h>
namespace willowc {
using FileID = std::uint32_t;
+class LineCache {
+public:
+private:
+ std::unordered_map<std::size_t, std::size_t> cache_;
+};
+
class SourceManager {
+public:
struct File {
std::string path;
+ // TODO: remove this, but find something better than using the abs path
+ std::string display_path;
std::unique_ptr<char[]> buf;
+ size_t size;
+
+ willow::Location getLoc(std::size_t offset);
+ std::map<std::size_t, uint32_t> linecache_;
+
+ File(std::string path, std::string display_path, std::unique_ptr<char[]> buf,
+ size_t size)
+ : path(std::move(path)), display_path(std::move(display_path)),
+ buf(std::move(buf)), size(size), linecache_{{{0, 1}}} {}
};
-public:
+
std::optional<FileID> addFile(std::string_view path);
+ std::optional<FileID> addStdIn();
+ std::optional<FileID> getFileID(const std::string& path);
+ std::string_view getBuf(FileID file) const {
+ const File &f = file_table.at(file);
+ return std::string_view(f.buf.get(), f.size);
+ }
+ File &getFile(FileID id) { return file_table.at(id); }
+
+ const std::vector<File>& files() const { return file_table; }
+ std::vector<File>& files() { return file_table; }
+
+ size_t numFiles() { return file_table.size(); }
private:
std::vector<File> file_table;
+ std::unordered_map<std::string, FileID> ids;
};
} // namespace willowc
diff --git a/willow/tools/willowc/include/tokenizer.hpp b/willow/tools/willowc/include/tokenizer.hpp
index 3de9d32..47577ab 100644
--- a/willow/tools/willowc/include/tokenizer.hpp
+++ b/willow/tools/willowc/include/tokenizer.hpp
@@ -1,6 +1,7 @@
#ifndef WILLOWC_INCLUDE_TOKENIZER_HPP
#define WILLOWC_INCLUDE_TOKENIZER_HPP
+#include <utility>
#include <willow/IR/Location.h>
namespace willowc {
@@ -14,6 +15,7 @@ enum class TokenKind {
Inst,
Comma,
+ Colon,
Semicolon,
LParen,
RParen,
@@ -37,6 +39,8 @@ class Tokenizer {
std::string_view buf;
std::size_t offset;
+ friend class Parser;
+
void skip(std::size_t idx = 1) { offset += idx; }
char eat(std::size_t num = 1) {
@@ -55,6 +59,8 @@ class Tokenizer {
return buf[offset + idx];
}
+ void recover();
+
bool scan_id(bool accept_digits);
bool scan_dec();
bool scan_hex();
@@ -67,6 +73,58 @@ public:
void seek(uint64_t offset);
};
+constexpr std::string_view TokenKindName(TokenKind t) {
+ switch (t) {
+ case TokenKind::Function:
+ return "Function";
+ case TokenKind::Variable:
+ return "Variable";
+ case TokenKind::Constant:
+ return "Constant";
+ case TokenKind::Type:
+ return "Type";
+ case TokenKind::Label:
+ return "Label";
+ case TokenKind::Inst:
+ return "Inst";
+ case TokenKind::Comma:
+ return "Comma";
+ case TokenKind::Colon:
+ return "Colon";
+ case TokenKind::Semicolon:
+ return "Semicolon";
+ case TokenKind::LParen:
+ return "LParen";
+ case TokenKind::RParen:
+ return "RParen";
+ case TokenKind::LCurly:
+ return "LCurly";
+ case TokenKind::RCurly:
+ return "RCurly";
+ case TokenKind::Equals:
+ return "Equals";
+ case TokenKind::RArrow:
+ return "RArrow";
+ case TokenKind::Comment:
+ return "Comment";
+ case TokenKind::FuncKW:
+ return "FuncKW";
+ case TokenKind::Eof:
+ return "Eof";
+ case TokenKind::Invalid:
+ return "Invalid";
+ }
+ std::unreachable();
+}
+
} // namespace willowc
+template <>
+struct std::formatter<willowc::TokenKind> {
+ constexpr auto parse(std::format_parse_context &ctx) { return ctx.begin(); }
+ constexpr auto format(const willowc::TokenKind t, std::format_context &ctx) const {
+ return std::format_to(ctx.out(), "{}", TokenKindName(t));
+ }
+};
+
#endif // WILLOWC_INCLUDE_TOKENIZER_HPP
diff --git a/willow/tools/willowc/lib/compiler.cpp b/willow/tools/willowc/lib/compiler.cpp
new file mode 100644
index 0000000..b933f65
--- /dev/null
+++ b/willow/tools/willowc/lib/compiler.cpp
@@ -0,0 +1,66 @@
+#include <compiler.hpp>
+
+#include <willow/IR/Diagnostic.h>
+#include <willow/IR/Location.h>
+#include <willow/Util/Color.h>
+
+#include <parser.hpp>
+
+#include <iostream>
+#include <print>
+
+namespace willowc {
+
+Compiler::Compiler()
+ : sourcemanager_(), log_level_(willow::Severity::Error),
+ diagnostic_engine_([this](const willow::Diagnostic &d) { emitDiagnostic(d); }) {}
+
+void Compiler::run() {
+ assert(sourcemanager_.numFiles() == 1);
+ Compiler::compile(0);
+}
+
+void Compiler::compile(FileID file) {
+ Parser parser{sourcemanager_.getFile(file), diagnostic_engine_};
+
+ auto x = parser.run();
+}
+
+void Compiler::emitDiagnostic(const willow::Diagnostic &d) {
+ using namespace willow::termcolor;
+ if (log_level_ > d.severity)
+ return;
+
+ if (d.location) {
+ std::print(std::cerr, "{}{}: ", TextStyle{AnsiColor::None, Emphasis::Bold},
+ d.location.value());
+ }
+
+ std::print(std::cerr, "{}{}: {}{}{}\n", willow::getSeverityColor(d.severity),
+ willow::getSeverityName(d.severity),
+ TextStyle{AnsiColor::Default, Emphasis::Bold}, d.message, TextStyle{});
+ // TODO: trace
+}
+
+willow::LogicalResult Compiler::addSourceFile(const std::string &path) {
+ std::optional<FileID> maybe_source_file = sourcemanager_.addFile(path);
+ if (!maybe_source_file) {
+ std::println(std::cerr, "error: failed to open input file '{}'", path);
+ return willow::failure();
+ }
+
+ return willow::success();
+}
+
+willow::LogicalResult Compiler::addStdIn() {
+ std::optional<FileID> maybestdin = sourcemanager_.addStdIn();
+
+ if (!maybestdin) {
+ std::println(std::cerr, "error: failed to read from stdin");
+ return willow::failure();
+ }
+
+ return willow::success();
+}
+
+}; // namespace willowc
diff --git a/willow/tools/willowc/lib/driver.cpp b/willow/tools/willowc/lib/driver.cpp
index e69de29..1962da4 100644
--- a/willow/tools/willowc/lib/driver.cpp
+++ b/willow/tools/willowc/lib/driver.cpp
@@ -0,0 +1,76 @@
+#include <print>
+#include <compiler.hpp>
+#include <driver.hpp>
+
+#include <argparse/argparse.hpp>
+
+namespace willowc {
+
+int willowc_main(int argc, char **argv) {
+ argparse::ArgumentParser cl("willowc");
+ cl.set_prefix_chars("-");
+ cl.set_assign_chars("=");
+
+ willowc::Compiler compiler;
+
+ auto &ll = cl.add_mutually_exclusive_group();
+ ll.add_argument("-Lerror")
+ .help("only emit diagnostics on error (default)")
+ .default_value(true)
+ .implicit_value(true)
+ .action(
+ [&](const auto &) { compiler.setLogLevel(willow::Severity::Error); });
+ ll.add_argument("-Lwarn")
+ .help("emit warning messages")
+ .default_value(false)
+ .implicit_value(true)
+ .action([&](const auto &) {
+ compiler.setLogLevel(willow::Severity::Warning);
+ });
+ ll.add_argument("-Linfo")
+ .help("emit remarks")
+ .default_value(false)
+ .implicit_value(true)
+ .action([&](const auto &) {
+ compiler.setLogLevel(willow::Severity::Remark);
+ });
+ ll.add_argument("-Ldebug")
+ .help("emit all diagnostics")
+ .default_value(false)
+ .implicit_value(true)
+ .action(
+ [&](const auto &) { compiler.setLogLevel(willow::Severity::Debug); });
+
+ cl.add_argument("<file>").help("input file path").required().nargs(1);
+
+ if (argc < 2) {
+ std::cerr << cl;
+ return 1;
+ }
+
+ try {
+ cl.parse_args(argc, argv);
+ } catch (std::exception &e) {
+ std::println("error: {}", e.what());
+ }
+
+ std::string f;
+ try {
+ f = cl.get("<file>");
+ } catch (std::exception &e) {
+ std::println("error: {}", e.what());
+ return 1;
+ }
+
+ if (f == "-") {
+ if (willow::failed(compiler.addStdIn()))
+ return 1;
+ } else if (willow::failed(compiler.addSourceFile(f)))
+ return 1;
+
+ compiler.run();
+
+ return 0;
+}
+
+} // namespace willowc
diff --git a/willow/tools/willowc/lib/parser.cpp b/willow/tools/willowc/lib/parser.cpp
index becc171..e658909 100644
--- a/willow/tools/willowc/lib/parser.cpp
+++ b/willow/tools/willowc/lib/parser.cpp
@@ -2,8 +2,27 @@
namespace willowc {
-std::optional<std::unique_ptr<ModuleAST>> parse() {
+std::optional<std::unique_ptr<ModuleAST>> Parser::run() {
+ bool failed = false;
+ while (true) {
+ Token t = tokenizer_.scan();
+ if (t.kind == TokenKind::Eof) {
+ willow::emit(diagnostic_engine_, willow::Severity::Debug)
+ << std::format("[ <eof> :: {} : ({}, {}) ]", t.kind, t.start, t.end);
+ break;
+ }
+ willow::emit(diagnostic_engine_, willow::Severity::Debug)
+ << std::format("[ '{}' :: {} : ({}, {}) ]",
+ tokenizer_.buf.substr(t.start, t.end - t.start), t.kind,
+ t.start, t.end);
+ }
+ return std::nullopt;
}
+willow::DiagnosticBuilder Parser::emitParserError(Token t, willow::Severity severity) {
+ willow::Location loc = file_.getLoc(t.start);
+ return willow::DiagnosticBuilder(diagnostic_engine_, severity, loc);
}
+
+} // namespace willowc
diff --git a/willow/tools/willowc/lib/sourcemanager.cpp b/willow/tools/willowc/lib/sourcemanager.cpp
index e2a8e72..a30c76e 100644
--- a/willow/tools/willowc/lib/sourcemanager.cpp
+++ b/willow/tools/willowc/lib/sourcemanager.cpp
@@ -1,6 +1,9 @@
+#include <cassert>
#include <filesystem>
-
#include <fstream>
+#include <iostream>
+#include <string_view>
+
#include <sourcemanager.hpp>
namespace willowc {
@@ -11,9 +14,14 @@ std::optional<FileID> SourceManager::addFile(std::string_view _path) {
std::filesystem::path uncanonical_path{_path};
auto path = std::filesystem::weakly_canonical(uncanonical_path, ec);
if (ec) {
- return false;
+ return std::nullopt;
}
- std::string display_path = path.make_preferred();
+
+ std::filesystem::path display_path = path;
+ display_path.make_preferred();
+ std::string display = display_path.string();
+
+ assert(!getFileID(display));
if (!std::filesystem::exists(path, ec) || ec)
return std::nullopt;
@@ -33,9 +41,68 @@ std::optional<FileID> SourceManager::addFile(std::string_view _path) {
f.read(buf.get(), filesize);
const FileID id = file_table.size();
- file_table.push_back(File{std::move(display_path), std::move(buf)});
+ file_table.emplace_back(std::move(display), std::string(_path), std::move(buf), filesize);
+ ids[file_table.back().path] = id;
return id;
}
+std::optional<FileID> SourceManager::getFileID(const std::string &path) {
+ auto it = ids.find(path);
+
+ if (it != ids.end())
+ return it->second;
+
+ return std::nullopt;
+}
+
+std::optional<FileID> SourceManager::addStdIn() {
+ std::string content{std::istreambuf_iterator<char>(std::cin),
+ std::istreambuf_iterator<char>()};
+
+ if (std::cin.bad())
+ return std::nullopt;
+
+ std::size_t size = content.size();
+ auto buf = std::make_unique<char[]>(size);
+ const FileID id = file_table.size();
+ file_table.emplace_back("<stdin>", "<stdin>", std::move(buf), size);
+ ids[file_table.back().path] = id;
+ return id;
+}
+
+willow::Location SourceManager::File::getLoc(std::size_t offset) {
+ size_t line_start = offset;
+ while (line_start != 0) {
+ if (this->buf[--line_start] != '\n')
+ continue;
+
+ line_start++;
+ break;
+ }
+
+ uint32_t col = offset - line_start + 1;
+ auto it = linecache_.find(line_start);
+
+ if (it != linecache_.end())
+ return willow::Location{display_path, it->second, col, offset};
+
+ auto back = linecache_.rbegin();
+ auto i = back->first;
+ auto line = back->second;
+ assert(i < line_start);
+ for (; i < line_start; i++) {
+ if (buf[i] == '\n') {
+ line = line + 1;
+ size_t next_start = i + 1;
+
+ if (next_start <= size)
+ linecache_.insert({next_start, ++line});
+ }
+ }
+ linecache_.insert({line_start, line});
+ return willow::Location{
+ display_path, line, static_cast<uint32_t>(offset - line_start + 1), offset};
+}
+
} // namespace willowc
diff --git a/willow/tools/willowc/lib/tokenizer.cpp b/willow/tools/willowc/lib/tokenizer.cpp
index 0c1f917..7ad28a6 100644
--- a/willow/tools/willowc/lib/tokenizer.cpp
+++ b/willow/tools/willowc/lib/tokenizer.cpp
@@ -38,11 +38,11 @@ bool Tokenizer::scan_id(bool accept_digits = true) {
}
Token Tokenizer::scan() {
- std::size_t start = this->offset;
-
- while (isspace(peek()))
+ while (is_space(peek()))
skip();
+ std::size_t start = this->offset;
+
TokenKind k = [&] {
switch (peek()) {
case '@':
@@ -63,6 +63,9 @@ Token Tokenizer::scan() {
case ',':
skip();
return TokenKind::Comma;
+ case ':':
+ skip();
+ return TokenKind::Colon;
case ';':
skip();
return TokenKind::Semicolon;
@@ -99,9 +102,8 @@ Token Tokenizer::scan() {
return TokenKind::Invalid;
skip();
- char c = eat();
- while (c != '\0' && c != '\n')
- c = eat();
+ while (peek() != '\0' && peek() != '\n')
+ skip();
return TokenKind::Comment;
}
@@ -124,12 +126,20 @@ Token Tokenizer::scan() {
return TokenKind::Inst;
}
+ skip();
return TokenKind::Invalid;
}
}
}();
- return Token{start, offset, k};
+ Token t{start, offset, k};
+ if (t.kind == TokenKind::Invalid) {
+ if (t.start == t.end)
+ t.end++;
+ recover();
+ }
+
+ return t;
}
bool Tokenizer::scan_dec() {
@@ -173,4 +183,47 @@ bool Tokenizer::scan_constant() {
return false;
}
+void Tokenizer::recover() {
+ auto is_boundary = [&](char c) {
+ switch (c) {
+ case ' ':
+ [[fallthrough]];
+ case '\n':
+ [[fallthrough]];
+ case '\t':
+ [[fallthrough]];
+ case '\\':
+ [[fallthrough]];
+ case ',':
+ [[fallthrough]];
+ case '%':
+ [[fallthrough]];
+ case '@':
+ [[fallthrough]];
+ case ':':
+ [[fallthrough]];
+ case ';':
+ [[fallthrough]];
+ case '(':
+ [[fallthrough]];
+ case ')':
+ [[fallthrough]];
+ case '{':
+ [[fallthrough]];
+ case '}':
+ [[fallthrough]];
+ case '=':
+ [[fallthrough]];
+ case '\0':
+ return true;
+ default:
+ return false;
+ }
+ };
+
+ while (!is_boundary(peek())) {
+ skip();
+ }
+}
+
} // namespace willowc
diff --git a/willow/tools/willowc/main.cpp b/willow/tools/willowc/main.cpp
index 237c8ce..e5049a7 100644
--- a/willow/tools/willowc/main.cpp
+++ b/willow/tools/willowc/main.cpp
@@ -1 +1,8 @@
-int main() {}
+#include <compiler.hpp>
+#include <driver.hpp>
+
+int main(int argc, char **argv) {
+ willowc::willowc_main(argc, argv);
+
+ return 0;
+}