1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
#ifndef WILLOWC_INCLUDE_TOKENIZER_HPP
#define WILLOWC_INCLUDE_TOKENIZER_HPP
#include <utility>
#include <willow/IR/Location.h>
namespace willowc {
enum class TokenKind {
Function,
Variable,
Constant,
Type,
Label,
Inst,
Comma,
Colon,
Semicolon,
LParen,
RParen,
LCurly,
RCurly,
Equals,
Star,
LTriangle,
RTriangle,
RArrow,
Comment,
FuncKW,
Eof,
Invalid,
};
struct Token {
std::size_t start, end;
TokenKind kind;
};
class Tokenizer {
std::string_view buf;
std::size_t offset;
friend class Parser;
void skip(std::size_t idx = 1) { offset += idx; }
char eat(std::size_t num = 1) {
if (offset >= buf.length())
return '\0';
char c = buf[offset];
offset += num;
return c;
}
char peek(std::size_t idx = 0) {
if (offset + idx >= buf.length())
return '\0';
return buf[offset + idx];
}
void recover();
bool scan_id(bool accept_digits);
bool scan_dec();
bool scan_hex();
bool scan_constant();
public:
explicit constexpr Tokenizer(std::string_view buf, std::size_t offset = 0)
: buf{buf}, offset{offset} {}
Token scan();
void seek(uint64_t offset);
};
constexpr std::string_view TokenKindName(TokenKind t) {
switch (t) {
case TokenKind::Function:
return "Function";
case TokenKind::Variable:
return "Variable";
case TokenKind::Constant:
return "Constant";
case TokenKind::Type:
return "Type";
case TokenKind::Label:
return "Label";
case TokenKind::Inst:
return "Inst";
case TokenKind::Comma:
return "Comma";
case TokenKind::Colon:
return "Colon";
case TokenKind::Semicolon:
return "Semicolon";
case TokenKind::LParen:
return "LParen";
case TokenKind::RParen:
return "RParen";
case TokenKind::LCurly:
return "LCurly";
case TokenKind::RCurly:
return "RCurly";
case TokenKind::Equals:
return "Equals";
case TokenKind::Star:
return "Star";
case TokenKind::LTriangle:
return "LTriangle";
case TokenKind::RTriangle:
return "RTriangle";
case TokenKind::RArrow:
return "RArrow";
case TokenKind::Comment:
return "Comment";
case TokenKind::FuncKW:
return "FuncKW";
case TokenKind::Eof:
return "Eof";
case TokenKind::Invalid:
return "Invalid";
}
std::unreachable();
}
} // namespace willowc
template <>
struct std::formatter<willowc::TokenKind> {
constexpr auto parse(std::format_parse_context &ctx) { return ctx.begin(); }
constexpr auto format(const willowc::TokenKind t, std::format_context &ctx) const {
return std::format_to(ctx.out(), "{}", TokenKindName(t));
}
};
#endif // WILLOWC_INCLUDE_TOKENIZER_HPP
|