#include #include #include #include #include #include #include using namespace willowc; using namespace willow::termcolor; bool tokenizer_test(std::string_view buffer, std::span args) { Tokenizer tokenizer(buffer); size_t token_index = 0; while (true) { Token t = tokenizer.scan(); if (token_index >= args.size()) break; TokenKind expected = args[token_index++]; if (t.kind != expected) { size_t line_start = [&] { size_t ls = t.start; if (ls > buffer.size()) ls = buffer.size(); for (; ls > 0; ls--) { if (buffer[ls - 1] == '\n') { break; } } return ls; }(); size_t line_end = [&]() { auto p = buffer.find('\n', t.start); return (p == std::string_view::npos) ? buffer.size() : p; }(); std::println(std::cerr); std::println(std::cerr, "{}FAIL:{} expected '{}', got '{}'{}", willow::termcolor::TextStyle{AnsiColor::Red, Emphasis::Bold}, TextStyle{AnsiColor::Default, AnsiColor::Default}, expected, t.kind, TextStyle{AnsiColor::None, AnsiColor::None, Emphasis::None}); std::println(std::cerr, "{}", buffer.substr(line_start, line_end - line_start)); for (size_t i = line_start; i < t.start; i++) std::print(std::cerr, " "); std::print(std::cerr, "{}", TextStyle{AnsiColor::Red, Emphasis::Bold}); for (size_t i = t.start; i < t.end; i++) std::print(std::cerr, "^"); std::println(); return false; } if (t.kind == willowc::TokenKind::Eof) break; } return true; } TEST_CASE("basic code", "[tokenizer]") { using namespace std::string_literals; using enum TokenKind; REQUIRE(tokenizer_test( R"( func @add(%a: i32, %b: i32) -> i32 { %c: i32 = add %a, %b; // return the sum of %a and %b return %c; })", // clang-format off std::array{FuncKW, Function, LParen, Variable, Colon, Type, Comma, Variable, Colon, Type, RParen, RArrow, Type, LCurly, Variable, Colon, Type, Equals, Inst, Variable, Comma, Variable, Semicolon, Comment, Inst, Variable, Semicolon, RCurly, Eof})); // clang-format on REQUIRE(tokenizer_test( R"( func @slt(%a: i32, %b: i32) -> i32 { ^entry: %retval: *i32 = alloca i32; %pred: i1 = lt %a, %b; br %pred, ^lt, ^ge; ^lt: store %a, %retval; jmp ^ret; ^gt: store %b, %retval; jmp ^ret; ^ret: %r: i32 = load %retval; return %r; })", // clang-format off std::array{ FuncKW, Function, LParen, Variable, Colon, Type, Comma, Variable, Colon, Type, RParen, RArrow, Type, LCurly, Label, Colon, // ^entry: Variable, Colon, Star, Type, Equals, Inst, Type, Semicolon, // %retval: *i32 = alloca i32; Variable, Colon, Type, Equals, Inst, Variable, Comma, Variable, Semicolon, // %pred: i1 = lt %a, %b; Inst, Variable, Comma, Label, Comma, Label, Semicolon, // br %pred, %lt, %ge; Label, Colon, // ^lt: Inst, Variable, Comma, Variable, Semicolon, // store %a, %retval; Inst, Label, Semicolon, // jmp ^ret; Label, Colon, // ^gt: Inst, Variable, Comma, Variable, Semicolon, // store %b, %retval; Inst, Label, Semicolon, // jmp ^ret; Label, Colon, // ^ret: Variable, Colon, Type, Equals, Inst, Variable, Semicolon, // %r: i32 = load %retval; Inst, Variable, Semicolon, // return %r; RCurly, Eof })); // clang-format on } TEST_CASE("constants", "[tokenizer]") { using namespace std::string_literals; using enum TokenKind; REQUIRE(tokenizer_test( R"( func @c() -> i32 { %a: i32 = const 0; %b: i64 = const -7; %c: i64 = const 123; %d: i32 = const 0x2a; } )", // clang-format off std::array{FuncKW, Function, LParen, RParen, RArrow, Type, LCurly, Variable, Colon, Type, Equals, Inst, Constant, Semicolon, Variable, Colon, Type, Equals, Inst, Constant, Semicolon, Variable, Colon, Type, Equals, Inst, Constant, Semicolon, Variable, Colon, Type, Equals, Inst, Constant, Semicolon, RCurly, Eof})); // clang-format on } TEST_CASE("identifiers", "[identifiers]") { using namespace std::string_literals; using enum TokenKind; REQUIRE(tokenizer_test( R"( func @-foo.bar_baz-0(%$a0: i32, %_tmp_1: i32) -> i32 { ret %_tmp_1; } )", // clang-format off std::array{ FuncKW, Function, LParen, Variable, Colon, Type, Comma, Variable, Colon, Type, RParen, RArrow, Type, LCurly, Inst, Variable, Semicolon, RCurly, Eof })); // clang-format on } TEST_CASE("edge cases", "[tokenizer]") { using namespace std::string_literals; using enum TokenKind; REQUIRE(tokenizer_test(R"()", std::array{Eof})); REQUIRE(tokenizer_test(R"(@foo // hi!)", std::array{Function, Comment, Eof})); REQUIRE(tokenizer_test(R"(--5)", std::array{Invalid, Eof})); REQUIRE(tokenizer_test(R"(/ %foo)", std::array{Invalid, Variable, Eof})); REQUIRE(tokenizer_test(R"(^:)", std::array{Invalid, Colon, Eof})); } TEST_CASE("invalid token recovery", "[tokenizer]") { using namespace std::string_literals; using enum TokenKind; REQUIRE(tokenizer_test(R"(----xyz)", std::array{Invalid, Eof})); REQUIRE(tokenizer_test(R"(^^foo:)", std::array{Invalid, Label, Colon, Eof})); REQUIRE(tokenizer_test(R"(%13a %foo)", std::array{Variable, Inst, Variable, Eof})); }