diff options
| author | sweiglbosker <stefan@s00.xyz> | 2026-02-24 13:04:50 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-02-24 13:04:50 -0500 |
| commit | 9d386221c9d6265f8ab85b42fcb93b4a0cafbb54 (patch) | |
| tree | 317c3f56776538eae9980ad93bd16432d396470b /willow/tools/willowc/unittest | |
| parent | 4b005e4a6e646c0b2788fc261097cdca2a93696c (diff) | |
| download | compiler-9d386221c9d6265f8ab85b42fcb93b4a0cafbb54.tar.gz | |
Diffstat (limited to 'willow/tools/willowc/unittest')
| -rw-r--r-- | willow/tools/willowc/unittest/BUILD.bazel | 16 | ||||
| -rw-r--r-- | willow/tools/willowc/unittest/TokenizerTest.cpp | 197 |
2 files changed, 213 insertions, 0 deletions
diff --git a/willow/tools/willowc/unittest/BUILD.bazel b/willow/tools/willowc/unittest/BUILD.bazel new file mode 100644 index 0000000..141bf82 --- /dev/null +++ b/willow/tools/willowc/unittest/BUILD.bazel @@ -0,0 +1,16 @@ +test_suite( + name = "unittest", + tests = [ + ":tokenizer" + ], +) + +cc_test( + name = "tokenizer", + srcs = ["TokenizerTest.cpp"], + deps = [ + "//willow/tools/willowc:willowc_lib", + "@catch2//:catch2_main" + ], + tags = ["tokenizer"] +) diff --git a/willow/tools/willowc/unittest/TokenizerTest.cpp b/willow/tools/willowc/unittest/TokenizerTest.cpp new file mode 100644 index 0000000..d085b1d --- /dev/null +++ b/willow/tools/willowc/unittest/TokenizerTest.cpp @@ -0,0 +1,197 @@ +#include <catch2/catch_test_macros.hpp> + +#include <iostream> +#include <parser.hpp> +#include <print> +#include <span> +#include <tokenizer.hpp> +#include <willow/Util/Color.h> + +using namespace willowc; +using namespace willow::termcolor; + +bool tokenizer_test(std::string_view buffer, std::span<const TokenKind> args) { + Tokenizer tokenizer(buffer); + + size_t token_index = 0; + while (true) { + Token t = tokenizer.scan(); + + if (token_index >= args.size()) + break; + + TokenKind expected = args[token_index++]; + if (t.kind != expected) { + size_t line_start = [&] { + size_t ls = t.start; + if (ls > buffer.size()) + ls = buffer.size(); + for (; ls > 0; ls--) { + if (buffer[ls - 1] == '\n') { + break; + } + } + return ls; + }(); + size_t line_end = [&]() { + auto p = buffer.find('\n', t.start); + return (p == std::string_view::npos) ? buffer.size() : p; + }(); + + std::println(std::cerr); + std::println(std::cerr, "{}FAIL:{} expected '{}', got '{}'{}", + willow::termcolor::TextStyle{AnsiColor::Red, Emphasis::Bold}, + TextStyle{AnsiColor::Default, AnsiColor::Default}, expected, + t.kind, + TextStyle{AnsiColor::None, AnsiColor::None, Emphasis::None}); + std::println(std::cerr, "{}", + buffer.substr(line_start, line_end - line_start)); + for (size_t i = line_start; i < t.start; i++) + std::print(std::cerr, " "); + + std::print(std::cerr, "{}", TextStyle{AnsiColor::Red, Emphasis::Bold}); + for (size_t i = t.start; i < t.end; i++) + std::print(std::cerr, "^"); + std::println(); + + return false; + } + + if (t.kind == willowc::TokenKind::Eof) + break; + } + + return true; +} + +TEST_CASE("basic code", "[tokenizer]") { + using namespace std::string_literals; + using enum TokenKind; + + REQUIRE(tokenizer_test( + R"( + func @add(%a: i32, %b: i32) -> i32 { + %c: i32 = add %a, %b; + // return the sum of %a and %b + return %c; + })", + // clang-format off + std::array{FuncKW, Function, LParen, Variable, Colon, Type, Comma, Variable, Colon, Type, RParen, RArrow, Type, LCurly, + Variable, Colon, Type, Equals, Inst, Variable, Comma, Variable, Semicolon, + Comment, + Inst, Variable, Semicolon, + RCurly, Eof})); + // clang-format on + + REQUIRE(tokenizer_test( + R"( + func @slt(%a: i32, %b: i32) -> i32 { + ^entry: + %retval: *i32 = alloca i32; + %pred: i1 = lt %a, %b; + br %pred, ^lt, ^ge; + ^lt: + store %a, %retval; + jmp ^ret; + ^gt: + store %b, %retval; + jmp ^ret; + ^ret: + %r: i32 = load %retval; + return %r; + })", + // clang-format off + std::array{ + FuncKW, Function, LParen, Variable, Colon, Type, Comma, Variable, Colon, Type, RParen, RArrow, Type, LCurly, + Label, Colon, // ^entry: + + Variable, Colon, Star, Type, Equals, Inst, Type, Semicolon, // %retval: *i32 = alloca i32; + Variable, Colon, Type, Equals, Inst, Variable, Comma, Variable, Semicolon, // %pred: i1 = lt %a, %b; + + Inst, Variable, Comma, Label, Comma, Label, Semicolon, // br %pred, %lt, %ge; + + Label, Colon, // ^lt: + Inst, Variable, Comma, Variable, Semicolon, // store %a, %retval; + Inst, Label, Semicolon, // jmp ^ret; + + Label, Colon, // ^gt: + Inst, Variable, Comma, Variable, Semicolon, // store %b, %retval; + Inst, Label, Semicolon, // jmp ^ret; + + Label, Colon, // ^ret: + Variable, Colon, Type, Equals, Inst, Variable, Semicolon, // %r: i32 = load %retval; + Inst, Variable, Semicolon, // return %r; + RCurly, Eof + })); + + // clang-format on +} + +TEST_CASE("constants", "[tokenizer]") { + using namespace std::string_literals; + using enum TokenKind; + + REQUIRE(tokenizer_test( + R"( + func @c() -> i32 { + %a: i32 = const 0; + %b: i64 = const -7; + %c: i64 = const 123; + %d: i32 = const 0x2a; + } + )", + // clang-format off + std::array{FuncKW, Function, LParen, RParen, RArrow, Type, LCurly, + Variable, Colon, Type, Equals, Inst, Constant, Semicolon, + Variable, Colon, Type, Equals, Inst, Constant, Semicolon, + Variable, Colon, Type, Equals, Inst, Constant, Semicolon, + Variable, Colon, Type, Equals, Inst, Constant, Semicolon, + RCurly, Eof})); + // clang-format on +} + +TEST_CASE("identifiers", "[identifiers]") { + using namespace std::string_literals; + using enum TokenKind; + + REQUIRE(tokenizer_test( + R"( + func @-foo.bar_baz-0(%$a0: i32, %_tmp_1: i32) -> i32 { + ret %_tmp_1; + } + )", + // clang-format off + std::array{ + FuncKW, Function, LParen, Variable, Colon, Type, Comma, Variable, Colon, Type, RParen, RArrow, Type, LCurly, + Inst, Variable, Semicolon, + RCurly, Eof + })); + // clang-format on +} + +TEST_CASE("edge cases", "[tokenizer]") { + using namespace std::string_literals; + using enum TokenKind; + + REQUIRE(tokenizer_test(R"()", std::array{Eof})); + + REQUIRE(tokenizer_test(R"(@foo // hi!)", std::array{Function, Comment, Eof})); + + REQUIRE(tokenizer_test(R"(--5)", std::array{Invalid, Eof})); + + REQUIRE(tokenizer_test(R"(/ %foo)", std::array{Invalid, Variable, Eof})); + + REQUIRE(tokenizer_test(R"(^:)", std::array{Invalid, Colon, Eof})); +} + +TEST_CASE("invalid token recovery", "[tokenizer]") { + using namespace std::string_literals; + using enum TokenKind; + + REQUIRE(tokenizer_test(R"(----xyz)", std::array{Invalid, Eof})); + + REQUIRE(tokenizer_test(R"(^^foo:)", std::array{Invalid, Label, Colon, Eof})); + + REQUIRE(tokenizer_test(R"(%13a %foo)", + std::array{Variable, Inst, Variable, Eof})); +} |