From add95b14f74e6dbe04a6efe98ff0f20424930b73 Mon Sep 17 00:00:00 2001 From: Stefan Weigl-Bosker Date: Tue, 3 Feb 2026 14:59:53 -0500 Subject: [willow]: initial frontend work, unit tests (#8) --- willow/tools/willowc/include/tokenizer.hpp | 72 ++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 willow/tools/willowc/include/tokenizer.hpp (limited to 'willow/tools/willowc/include/tokenizer.hpp') diff --git a/willow/tools/willowc/include/tokenizer.hpp b/willow/tools/willowc/include/tokenizer.hpp new file mode 100644 index 0000000..3de9d32 --- /dev/null +++ b/willow/tools/willowc/include/tokenizer.hpp @@ -0,0 +1,72 @@ +#ifndef WILLOWC_INCLUDE_TOKENIZER_HPP +#define WILLOWC_INCLUDE_TOKENIZER_HPP + +#include + +namespace willowc { + +enum class TokenKind { + Function, + Variable, + Constant, + Type, + Label, + Inst, + + Comma, + Semicolon, + LParen, + RParen, + LCurly, + RCurly, + Equals, + RArrow, + Comment, + + FuncKW, + Eof, + Invalid, +}; + +struct Token { + std::size_t start, end; + TokenKind kind; +}; + +class Tokenizer { + std::string_view buf; + std::size_t offset; + + void skip(std::size_t idx = 1) { offset += idx; } + + char eat(std::size_t num = 1) { + if (offset >= buf.length()) + return '\0'; + + char c = buf[offset]; + offset += num; + return c; + } + + char peek(std::size_t idx = 0) { + if (offset + idx >= buf.length()) + return '\0'; + + return buf[offset + idx]; + } + + bool scan_id(bool accept_digits); + bool scan_dec(); + bool scan_hex(); + bool scan_constant(); +public: + explicit Tokenizer(std::string_view buf, std::size_t offset = 0) + : buf{buf}, offset{offset} {} + + Token scan(); + void seek(uint64_t offset); +}; + +} // namespace willowc + +#endif // WILLOWC_INCLUDE_TOKENIZER_HPP -- cgit v1.2.3