#ifndef WILLOWC_INCLUDE_TOKENIZER_HPP #define WILLOWC_INCLUDE_TOKENIZER_HPP #include #include namespace willowc { enum class TokenKind { Function, Variable, Constant, Type, Label, Inst, Comma, Colon, Semicolon, LParen, RParen, LCurly, RCurly, Equals, RArrow, Comment, FuncKW, Eof, Invalid, }; struct Token { std::size_t start, end; TokenKind kind; }; class Tokenizer { std::string_view buf; std::size_t offset; friend class Parser; void skip(std::size_t idx = 1) { offset += idx; } char eat(std::size_t num = 1) { if (offset >= buf.length()) return '\0'; char c = buf[offset]; offset += num; return c; } char peek(std::size_t idx = 0) { if (offset + idx >= buf.length()) return '\0'; return buf[offset + idx]; } void recover(); bool scan_id(bool accept_digits); bool scan_dec(); bool scan_hex(); bool scan_constant(); public: explicit Tokenizer(std::string_view buf, std::size_t offset = 0) : buf{buf}, offset{offset} {} Token scan(); void seek(uint64_t offset); }; constexpr std::string_view TokenKindName(TokenKind t) { switch (t) { case TokenKind::Function: return "Function"; case TokenKind::Variable: return "Variable"; case TokenKind::Constant: return "Constant"; case TokenKind::Type: return "Type"; case TokenKind::Label: return "Label"; case TokenKind::Inst: return "Inst"; case TokenKind::Comma: return "Comma"; case TokenKind::Colon: return "Colon"; case TokenKind::Semicolon: return "Semicolon"; case TokenKind::LParen: return "LParen"; case TokenKind::RParen: return "RParen"; case TokenKind::LCurly: return "LCurly"; case TokenKind::RCurly: return "RCurly"; case TokenKind::Equals: return "Equals"; case TokenKind::RArrow: return "RArrow"; case TokenKind::Comment: return "Comment"; case TokenKind::FuncKW: return "FuncKW"; case TokenKind::Eof: return "Eof"; case TokenKind::Invalid: return "Invalid"; } std::unreachable(); } } // namespace willowc template <> struct std::formatter { constexpr auto parse(std::format_parse_context &ctx) { return ctx.begin(); } constexpr auto format(const willowc::TokenKind t, std::format_context &ctx) const { return std::format_to(ctx.out(), "{}", TokenKindName(t)); } }; #endif // WILLOWC_INCLUDE_TOKENIZER_HPP