diff options
Diffstat (limited to 'willow/tools')
| -rw-r--r-- | willow/tools/willowc/BUILD.bazel | 27 | ||||
| -rw-r--r-- | willow/tools/willowc/include/ast.hpp | 62 | ||||
| -rw-r--r-- | willow/tools/willowc/include/expr.hpp | 4 | ||||
| -rw-r--r-- | willow/tools/willowc/include/parser.hpp | 33 | ||||
| -rw-r--r-- | willow/tools/willowc/include/sourcemanager.hpp | 26 | ||||
| -rw-r--r-- | willow/tools/willowc/include/tokenizer.hpp | 72 | ||||
| -rw-r--r-- | willow/tools/willowc/lib/driver.cpp | 0 | ||||
| -rw-r--r-- | willow/tools/willowc/lib/parser.cpp | 9 | ||||
| -rw-r--r-- | willow/tools/willowc/lib/sourcemanager.cpp | 41 | ||||
| -rw-r--r-- | willow/tools/willowc/lib/tokenizer.cpp | 176 | ||||
| -rw-r--r-- | willow/tools/willowc/main.cpp | 1 |
11 files changed, 451 insertions, 0 deletions
diff --git a/willow/tools/willowc/BUILD.bazel b/willow/tools/willowc/BUILD.bazel index e69de29..708de13 100644 --- a/willow/tools/willowc/BUILD.bazel +++ b/willow/tools/willowc/BUILD.bazel @@ -0,0 +1,27 @@ +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_binary") + +cc_library( + name = "willowc_lib", + srcs = glob([ + "lib/*.cpp", + ]), + hdrs = glob([ + "include/*.hpp", + ]), + copts = [ + "-std=c++23", + "-Wall", + ], + deps = ["//willow"], + strip_include_prefix = "include", + visibility = ["//visibility:public"], +) + +cc_binary( + name = "willowc", + srcs = [ + "main.cpp", + ], + deps = [":willowc_lib"], + visibility = ["//visibility:public"], +) diff --git a/willow/tools/willowc/include/ast.hpp b/willow/tools/willowc/include/ast.hpp new file mode 100644 index 0000000..8c59067 --- /dev/null +++ b/willow/tools/willowc/include/ast.hpp @@ -0,0 +1,62 @@ +#ifndef WILLOWC_INCLUDE_AST_HPP +#define WILLOWC_INCLUDE_AST_HPP + +#include <cstdint> +#include <memory> +#include <vector> + +#include <willow/IR/Instructions.h> +#include <willow/IR/Types.h> + +#include <tokenizer.hpp> + +namespace willowc { + +using Opcode = willow::Instruction::Opcode; +using TokenIndex = std::size_t; + +// this is like willow::ValueKind, but treats groups all ssa values into 'Value' +// (because they can't be differentiated by syntax alone) +enum class ExprKind { Constant, BasicBlock, Function, Value }; + +struct ExprAST { + ExprKind kind; + + std::string name; + // token?? +}; + +struct InstAST { + Opcode op; + std::string name; + + std::vector<std::unique_ptr<ExprAST>> args; +}; + +struct BlockAST { + std::string label; + std::vector<std::unique_ptr<InstAST>> body; +}; + +struct ParameterAST { + std::string name; + willow::Type type; +}; + +struct FunctionDeclAST { + std::string name; + std::vector<std::unique_ptr<ParameterAST>> parameters; + std::string returntype; + + std::vector<std::unique_ptr<BlockAST>> body; + // TODO: movable symbol table +}; + +struct ModuleAST { + std::vector<std::unique_ptr<FunctionDeclAST>> Functions; + // TODO: imports, symbol table +}; + +}; // namespace willowc + +#endif // WILLOWC_INCLUDE_AST_HPP diff --git a/willow/tools/willowc/include/expr.hpp b/willow/tools/willowc/include/expr.hpp new file mode 100644 index 0000000..15d2985 --- /dev/null +++ b/willow/tools/willowc/include/expr.hpp @@ -0,0 +1,4 @@ +#ifndef WILLOWC_INCLUDE_EXPR_HPP +#define WILLOWC_INCLUDE_EXPR_HPP + +#endif // WILLOWC_INCLUDE_EXPR_HPP diff --git a/willow/tools/willowc/include/parser.hpp b/willow/tools/willowc/include/parser.hpp new file mode 100644 index 0000000..825dfdd --- /dev/null +++ b/willow/tools/willowc/include/parser.hpp @@ -0,0 +1,33 @@ +#ifndef WILLOWC_INCLUDE_PARSER_HPP +#define WILLOWC_INCLUDE_PARSER_HPP + +#include <tokenizer.hpp> +#include <ast.hpp> + +#include <optional> +#include <memory> +#include <vector> + +namespace willowc { + +class Parser { + std::string_view buf; + + std::vector<TokenKind> kinds; + std::vector<std::size_t> starts; + Tokenizer tokenizer; + + std::size_t pos; + +public: + Parser(std::string_view buf) : buf(buf), tokenizer(buf) {} + + std::optional<std::unique_ptr<ModuleAST>> parse(); + + TokenKind kind() const { return kinds[pos]; } + std::size_t start() const { return starts[pos]; } +}; + +} // namespace willowc + +#endif // WILLOWC_INCLUDE_PARSER_HPP diff --git a/willow/tools/willowc/include/sourcemanager.hpp b/willow/tools/willowc/include/sourcemanager.hpp new file mode 100644 index 0000000..a526e48 --- /dev/null +++ b/willow/tools/willowc/include/sourcemanager.hpp @@ -0,0 +1,26 @@ +#ifndef WILLOWC_INCLUDE_SOURCEMANAGER_HPP +#define WILLOWC_INCLUDE_SOURCEMANAGER_HPP + +#include <filesystem> +#include <memory> +#include <string> +#include <vector> + +namespace willowc { + +using FileID = std::uint32_t; + +class SourceManager { +struct File { + std::string path; + std::unique_ptr<char[]> buf; +}; +public: + std::optional<FileID> addFile(std::string_view path); +private: + std::vector<File> file_table; +}; + +} // namespace willowc + +#endif // WILLOWC_INCLUDE_SOURCEMANAGER_HPP diff --git a/willow/tools/willowc/include/tokenizer.hpp b/willow/tools/willowc/include/tokenizer.hpp new file mode 100644 index 0000000..3de9d32 --- /dev/null +++ b/willow/tools/willowc/include/tokenizer.hpp @@ -0,0 +1,72 @@ +#ifndef WILLOWC_INCLUDE_TOKENIZER_HPP +#define WILLOWC_INCLUDE_TOKENIZER_HPP + +#include <willow/IR/Location.h> + +namespace willowc { + +enum class TokenKind { + Function, + Variable, + Constant, + Type, + Label, + Inst, + + Comma, + Semicolon, + LParen, + RParen, + LCurly, + RCurly, + Equals, + RArrow, + Comment, + + FuncKW, + Eof, + Invalid, +}; + +struct Token { + std::size_t start, end; + TokenKind kind; +}; + +class Tokenizer { + std::string_view buf; + std::size_t offset; + + void skip(std::size_t idx = 1) { offset += idx; } + + char eat(std::size_t num = 1) { + if (offset >= buf.length()) + return '\0'; + + char c = buf[offset]; + offset += num; + return c; + } + + char peek(std::size_t idx = 0) { + if (offset + idx >= buf.length()) + return '\0'; + + return buf[offset + idx]; + } + + bool scan_id(bool accept_digits); + bool scan_dec(); + bool scan_hex(); + bool scan_constant(); +public: + explicit Tokenizer(std::string_view buf, std::size_t offset = 0) + : buf{buf}, offset{offset} {} + + Token scan(); + void seek(uint64_t offset); +}; + +} // namespace willowc + +#endif // WILLOWC_INCLUDE_TOKENIZER_HPP diff --git a/willow/tools/willowc/lib/driver.cpp b/willow/tools/willowc/lib/driver.cpp new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/willow/tools/willowc/lib/driver.cpp diff --git a/willow/tools/willowc/lib/parser.cpp b/willow/tools/willowc/lib/parser.cpp new file mode 100644 index 0000000..becc171 --- /dev/null +++ b/willow/tools/willowc/lib/parser.cpp @@ -0,0 +1,9 @@ +#include <parser.hpp> + +namespace willowc { + +std::optional<std::unique_ptr<ModuleAST>> parse() { + +} + +} diff --git a/willow/tools/willowc/lib/sourcemanager.cpp b/willow/tools/willowc/lib/sourcemanager.cpp new file mode 100644 index 0000000..e2a8e72 --- /dev/null +++ b/willow/tools/willowc/lib/sourcemanager.cpp @@ -0,0 +1,41 @@ +#include <filesystem> + +#include <fstream> +#include <sourcemanager.hpp> + +namespace willowc { + +std::optional<FileID> SourceManager::addFile(std::string_view _path) { + std::error_code ec; + + std::filesystem::path uncanonical_path{_path}; + auto path = std::filesystem::weakly_canonical(uncanonical_path, ec); + if (ec) { + return false; + } + std::string display_path = path.make_preferred(); + + if (!std::filesystem::exists(path, ec) || ec) + return std::nullopt; + + if (!std::filesystem::is_regular_file(path, ec) || ec) + return std::nullopt; + + std::size_t filesize = std::filesystem::file_size(path, ec); + if (ec) + return std::nullopt; + + std::ifstream f{display_path, std::ios::binary}; + if (!f) + return std::nullopt; + + auto buf = std::make_unique<char[]>(filesize); + f.read(buf.get(), filesize); + + const FileID id = file_table.size(); + file_table.push_back(File{std::move(display_path), std::move(buf)}); + + return id; +} + +} // namespace willowc diff --git a/willow/tools/willowc/lib/tokenizer.cpp b/willow/tools/willowc/lib/tokenizer.cpp new file mode 100644 index 0000000..0c1f917 --- /dev/null +++ b/willow/tools/willowc/lib/tokenizer.cpp @@ -0,0 +1,176 @@ +#include <tokenizer.hpp> + +namespace willowc { + +static inline bool is_space(unsigned char c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; +} +static inline bool is_digit(unsigned char c) { return c >= '0' && c <= '9'; } +static inline bool is_xdigit(unsigned char c) { + return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); +} +static inline bool is_alpha(unsigned char c) { + unsigned char x = static_cast<unsigned char>(c | 0x20); + return x >= 'a' && x <= 'z'; +} + +static inline bool valid_id_start(int c) { + return is_alpha(c) || c == '$' || c == '.' || c == '_' || c == '-'; +} + +bool Tokenizer::scan_id(bool accept_digits = true) { + char c = peek(); + + if (accept_digits && is_digit(c)) { + // if it starts with a digit, must be all digits + while (is_digit(peek())) + skip(); + return true; + } + + if (!valid_id_start(c)) + return false; + + while (valid_id_start(peek()) || isdigit(peek())) + skip(); + + return true; +} + +Token Tokenizer::scan() { + std::size_t start = this->offset; + + while (isspace(peek())) + skip(); + + TokenKind k = [&] { + switch (peek()) { + case '@': + skip(); + if (scan_id(false)) + return TokenKind::Function; + return TokenKind::Invalid; + case '%': + skip(); + if (scan_id()) + return TokenKind::Variable; + return TokenKind::Invalid; + case '^': + skip(); + if (scan_id()) + return TokenKind::Label; + return TokenKind::Invalid; + case ',': + skip(); + return TokenKind::Comma; + case ';': + skip(); + return TokenKind::Semicolon; + case '(': + skip(); + return TokenKind::LParen; + case ')': + skip(); + return TokenKind::RParen; + case '{': + skip(); + return TokenKind::LCurly; + case '}': + skip(); + return TokenKind::RCurly; + case '=': + skip(); + return TokenKind::Equals; + case '-': { + if (peek(1) == '>') { + skip(2); + return TokenKind::RArrow; + } + if (isdigit(peek(1))) { + skip(); + if (scan_dec()) + return TokenKind::Constant; + } + return TokenKind::Invalid; + } + case '/': { + skip(); + if (peek() != '/') + return TokenKind::Invalid; + + skip(); + char c = eat(); + while (c != '\0' && c != '\n') + c = eat(); + + return TokenKind::Comment; + } + case '\0': + return TokenKind::Eof; + default: { + if (is_digit(peek())) + return scan_constant() ? TokenKind::Constant : TokenKind::Invalid; + + if (peek() == 'i') { + skip(); + if (scan_dec()) + return TokenKind::Type; + } + + if (isalpha(peek())) { + skip(); + while (isalnum(peek()) || peek() == '.') + skip(); + return TokenKind::Inst; + } + + return TokenKind::Invalid; + } + } + }(); + + return Token{start, offset, k}; +} + +bool Tokenizer::scan_dec() { + if (!is_digit(peek())) + return false; + skip(); + while (is_digit(peek())) + skip(); + + return true; +} + +bool Tokenizer::scan_hex() { + if (!is_xdigit(peek())) + return false; + skip(); + while (is_xdigit(peek())) + skip(); + + return true; +} + +bool Tokenizer::scan_constant() { + if (peek() == '-') + skip(); + + if (peek() == '0') { + skip(); + if (is_digit(peek())) + return false; + if (peek() == 'x') { + skip(); + return scan_hex(); + } else { + return true; // 0 + } + } else if (is_digit(peek())) { + return scan_dec(); + } + + return false; +} + +} // namespace willowc diff --git a/willow/tools/willowc/main.cpp b/willow/tools/willowc/main.cpp new file mode 100644 index 0000000..237c8ce --- /dev/null +++ b/willow/tools/willowc/main.cpp @@ -0,0 +1 @@ +int main() {} |