diff options
| author | Stefan Weigl-Bosker <stefan@s00.xyz> | 2026-02-03 14:59:53 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-02-03 14:59:53 -0500 |
| commit | add95b14f74e6dbe04a6efe98ff0f20424930b73 (patch) | |
| tree | 13ce413ee4190a4c8f8743c7740aaa8d04353f14 /willow/tools/willowc/include/tokenizer.hpp | |
| parent | c5b2905c5a64433f8519531a77d3acc42d881f17 (diff) | |
| download | compiler-dev/stefan.tar.gz | |
[willow]: initial frontend work, unit tests (#8)dev/stefan
Diffstat (limited to 'willow/tools/willowc/include/tokenizer.hpp')
| -rw-r--r-- | willow/tools/willowc/include/tokenizer.hpp | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/willow/tools/willowc/include/tokenizer.hpp b/willow/tools/willowc/include/tokenizer.hpp new file mode 100644 index 0000000..3de9d32 --- /dev/null +++ b/willow/tools/willowc/include/tokenizer.hpp @@ -0,0 +1,72 @@ +#ifndef WILLOWC_INCLUDE_TOKENIZER_HPP +#define WILLOWC_INCLUDE_TOKENIZER_HPP + +#include <willow/IR/Location.h> + +namespace willowc { + +enum class TokenKind { + Function, + Variable, + Constant, + Type, + Label, + Inst, + + Comma, + Semicolon, + LParen, + RParen, + LCurly, + RCurly, + Equals, + RArrow, + Comment, + + FuncKW, + Eof, + Invalid, +}; + +struct Token { + std::size_t start, end; + TokenKind kind; +}; + +class Tokenizer { + std::string_view buf; + std::size_t offset; + + void skip(std::size_t idx = 1) { offset += idx; } + + char eat(std::size_t num = 1) { + if (offset >= buf.length()) + return '\0'; + + char c = buf[offset]; + offset += num; + return c; + } + + char peek(std::size_t idx = 0) { + if (offset + idx >= buf.length()) + return '\0'; + + return buf[offset + idx]; + } + + bool scan_id(bool accept_digits); + bool scan_dec(); + bool scan_hex(); + bool scan_constant(); +public: + explicit Tokenizer(std::string_view buf, std::size_t offset = 0) + : buf{buf}, offset{offset} {} + + Token scan(); + void seek(uint64_t offset); +}; + +} // namespace willowc + +#endif // WILLOWC_INCLUDE_TOKENIZER_HPP |