summaryrefslogtreecommitdiff
path: root/willow/tools/willowc/lib
diff options
context:
space:
mode:
authorStefan Weigl-Bosker <stefan@s00.xyz>2026-02-03 14:59:53 -0500
committerGitHub <noreply@github.com>2026-02-03 14:59:53 -0500
commitadd95b14f74e6dbe04a6efe98ff0f20424930b73 (patch)
tree13ce413ee4190a4c8f8743c7740aaa8d04353f14 /willow/tools/willowc/lib
parentc5b2905c5a64433f8519531a77d3acc42d881f17 (diff)
downloadcompiler-add95b14f74e6dbe04a6efe98ff0f20424930b73.tar.gz
[willow]: initial frontend work, unit tests (#8)dev/stefan
Diffstat (limited to 'willow/tools/willowc/lib')
-rw-r--r--willow/tools/willowc/lib/driver.cpp0
-rw-r--r--willow/tools/willowc/lib/parser.cpp9
-rw-r--r--willow/tools/willowc/lib/sourcemanager.cpp41
-rw-r--r--willow/tools/willowc/lib/tokenizer.cpp176
4 files changed, 226 insertions, 0 deletions
diff --git a/willow/tools/willowc/lib/driver.cpp b/willow/tools/willowc/lib/driver.cpp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/willow/tools/willowc/lib/driver.cpp
diff --git a/willow/tools/willowc/lib/parser.cpp b/willow/tools/willowc/lib/parser.cpp
new file mode 100644
index 0000000..becc171
--- /dev/null
+++ b/willow/tools/willowc/lib/parser.cpp
@@ -0,0 +1,9 @@
+#include <parser.hpp>
+
+namespace willowc {
+
+std::optional<std::unique_ptr<ModuleAST>> parse() {
+
+}
+
+}
diff --git a/willow/tools/willowc/lib/sourcemanager.cpp b/willow/tools/willowc/lib/sourcemanager.cpp
new file mode 100644
index 0000000..e2a8e72
--- /dev/null
+++ b/willow/tools/willowc/lib/sourcemanager.cpp
@@ -0,0 +1,41 @@
+#include <filesystem>
+
+#include <fstream>
+#include <sourcemanager.hpp>
+
+namespace willowc {
+
+std::optional<FileID> SourceManager::addFile(std::string_view _path) {
+ std::error_code ec;
+
+ std::filesystem::path uncanonical_path{_path};
+ auto path = std::filesystem::weakly_canonical(uncanonical_path, ec);
+ if (ec) {
+ return false;
+ }
+ std::string display_path = path.make_preferred();
+
+ if (!std::filesystem::exists(path, ec) || ec)
+ return std::nullopt;
+
+ if (!std::filesystem::is_regular_file(path, ec) || ec)
+ return std::nullopt;
+
+ std::size_t filesize = std::filesystem::file_size(path, ec);
+ if (ec)
+ return std::nullopt;
+
+ std::ifstream f{display_path, std::ios::binary};
+ if (!f)
+ return std::nullopt;
+
+ auto buf = std::make_unique<char[]>(filesize);
+ f.read(buf.get(), filesize);
+
+ const FileID id = file_table.size();
+ file_table.push_back(File{std::move(display_path), std::move(buf)});
+
+ return id;
+}
+
+} // namespace willowc
diff --git a/willow/tools/willowc/lib/tokenizer.cpp b/willow/tools/willowc/lib/tokenizer.cpp
new file mode 100644
index 0000000..0c1f917
--- /dev/null
+++ b/willow/tools/willowc/lib/tokenizer.cpp
@@ -0,0 +1,176 @@
+#include <tokenizer.hpp>
+
+namespace willowc {
+
+static inline bool is_space(unsigned char c) {
+ return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+}
+static inline bool is_digit(unsigned char c) { return c >= '0' && c <= '9'; }
+static inline bool is_xdigit(unsigned char c) {
+ return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
+}
+static inline bool is_alpha(unsigned char c) {
+ unsigned char x = static_cast<unsigned char>(c | 0x20);
+ return x >= 'a' && x <= 'z';
+}
+
+static inline bool valid_id_start(int c) {
+ return is_alpha(c) || c == '$' || c == '.' || c == '_' || c == '-';
+}
+
+bool Tokenizer::scan_id(bool accept_digits = true) {
+ char c = peek();
+
+ if (accept_digits && is_digit(c)) {
+ // if it starts with a digit, must be all digits
+ while (is_digit(peek()))
+ skip();
+ return true;
+ }
+
+ if (!valid_id_start(c))
+ return false;
+
+ while (valid_id_start(peek()) || isdigit(peek()))
+ skip();
+
+ return true;
+}
+
+Token Tokenizer::scan() {
+ std::size_t start = this->offset;
+
+ while (isspace(peek()))
+ skip();
+
+ TokenKind k = [&] {
+ switch (peek()) {
+ case '@':
+ skip();
+ if (scan_id(false))
+ return TokenKind::Function;
+ return TokenKind::Invalid;
+ case '%':
+ skip();
+ if (scan_id())
+ return TokenKind::Variable;
+ return TokenKind::Invalid;
+ case '^':
+ skip();
+ if (scan_id())
+ return TokenKind::Label;
+ return TokenKind::Invalid;
+ case ',':
+ skip();
+ return TokenKind::Comma;
+ case ';':
+ skip();
+ return TokenKind::Semicolon;
+ case '(':
+ skip();
+ return TokenKind::LParen;
+ case ')':
+ skip();
+ return TokenKind::RParen;
+ case '{':
+ skip();
+ return TokenKind::LCurly;
+ case '}':
+ skip();
+ return TokenKind::RCurly;
+ case '=':
+ skip();
+ return TokenKind::Equals;
+ case '-': {
+ if (peek(1) == '>') {
+ skip(2);
+ return TokenKind::RArrow;
+ }
+ if (isdigit(peek(1))) {
+ skip();
+ if (scan_dec())
+ return TokenKind::Constant;
+ }
+ return TokenKind::Invalid;
+ }
+ case '/': {
+ skip();
+ if (peek() != '/')
+ return TokenKind::Invalid;
+
+ skip();
+ char c = eat();
+ while (c != '\0' && c != '\n')
+ c = eat();
+
+ return TokenKind::Comment;
+ }
+ case '\0':
+ return TokenKind::Eof;
+ default: {
+ if (is_digit(peek()))
+ return scan_constant() ? TokenKind::Constant : TokenKind::Invalid;
+
+ if (peek() == 'i') {
+ skip();
+ if (scan_dec())
+ return TokenKind::Type;
+ }
+
+ if (isalpha(peek())) {
+ skip();
+ while (isalnum(peek()) || peek() == '.')
+ skip();
+ return TokenKind::Inst;
+ }
+
+ return TokenKind::Invalid;
+ }
+ }
+ }();
+
+ return Token{start, offset, k};
+}
+
+bool Tokenizer::scan_dec() {
+ if (!is_digit(peek()))
+ return false;
+ skip();
+ while (is_digit(peek()))
+ skip();
+
+ return true;
+}
+
+bool Tokenizer::scan_hex() {
+ if (!is_xdigit(peek()))
+ return false;
+ skip();
+ while (is_xdigit(peek()))
+ skip();
+
+ return true;
+}
+
+bool Tokenizer::scan_constant() {
+ if (peek() == '-')
+ skip();
+
+ if (peek() == '0') {
+ skip();
+ if (is_digit(peek()))
+ return false;
+ if (peek() == 'x') {
+ skip();
+ return scan_hex();
+ } else {
+ return true; // 0
+ }
+ } else if (is_digit(peek())) {
+ return scan_dec();
+ }
+
+ return false;
+}
+
+} // namespace willowc