#include namespace willowc { static inline bool is_space(unsigned char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r'; } static inline bool is_digit(unsigned char c) { return c >= '0' && c <= '9'; } static inline bool is_xdigit(unsigned char c) { return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } static inline bool is_alpha(unsigned char c) { unsigned char x = static_cast(c | 0x20); return x >= 'a' && x <= 'z'; } static inline bool valid_id_start(int c) { return is_alpha(c) || c == '$' || c == '.' || c == '_' || c == '-'; } bool Tokenizer::scan_id(bool accept_digits = true) { char c = peek(); if (accept_digits && is_digit(c)) { // if it starts with a digit, must be all digits while (is_digit(peek())) skip(); return true; } if (!valid_id_start(c)) return false; while (valid_id_start(peek()) || isdigit(peek())) skip(); return true; } Token Tokenizer::scan() { std::size_t start = this->offset; while (isspace(peek())) skip(); TokenKind k = [&] { switch (peek()) { case '@': skip(); if (scan_id(false)) return TokenKind::Function; return TokenKind::Invalid; case '%': skip(); if (scan_id()) return TokenKind::Variable; return TokenKind::Invalid; case '^': skip(); if (scan_id()) return TokenKind::Label; return TokenKind::Invalid; case ',': skip(); return TokenKind::Comma; case ';': skip(); return TokenKind::Semicolon; case '(': skip(); return TokenKind::LParen; case ')': skip(); return TokenKind::RParen; case '{': skip(); return TokenKind::LCurly; case '}': skip(); return TokenKind::RCurly; case '=': skip(); return TokenKind::Equals; case '-': { if (peek(1) == '>') { skip(2); return TokenKind::RArrow; } if (isdigit(peek(1))) { skip(); if (scan_dec()) return TokenKind::Constant; } return TokenKind::Invalid; } case '/': { skip(); if (peek() != '/') return TokenKind::Invalid; skip(); char c = eat(); while (c != '\0' && c != '\n') c = eat(); return TokenKind::Comment; } case '\0': return TokenKind::Eof; default: { if (is_digit(peek())) return scan_constant() ? TokenKind::Constant : TokenKind::Invalid; if (peek() == 'i') { skip(); if (scan_dec()) return TokenKind::Type; } if (isalpha(peek())) { skip(); while (isalnum(peek()) || peek() == '.') skip(); return TokenKind::Inst; } return TokenKind::Invalid; } } }(); return Token{start, offset, k}; } bool Tokenizer::scan_dec() { if (!is_digit(peek())) return false; skip(); while (is_digit(peek())) skip(); return true; } bool Tokenizer::scan_hex() { if (!is_xdigit(peek())) return false; skip(); while (is_xdigit(peek())) skip(); return true; } bool Tokenizer::scan_constant() { if (peek() == '-') skip(); if (peek() == '0') { skip(); if (is_digit(peek())) return false; if (peek() == 'x') { skip(); return scan_hex(); } else { return true; // 0 } } else if (is_digit(peek())) { return scan_dec(); } return false; } } // namespace willowc