Skip to content

Commit

Permalink
Adding UTF-16 tests; #583
Browse files Browse the repository at this point in the history
  • Loading branch information
the-moisrex committed Dec 18, 2024
1 parent 1919d3a commit 8f32441
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 4 deletions.
43 changes: 43 additions & 0 deletions tests/unicode_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,23 @@ namespace {
return utf8_str;
}

std::u16string utf32_to_utf16(std::u32string const& utf32_str) {
std::u16string utf16_str;
utf16_str.reserve(utf32_str.length() * 4); // Estimate maximum size of UTF-8 string

std::u16string test_str;
for (char32_t const code_point : utf32_str) {
old_impl::append(test_str, code_point);
if (!webpp::unicode::checked::append(utf16_str, code_point)) {
throw webpp::stl::invalid_argument("Invalid code point");
}

EXPECT_EQ(utf16_str, test_str);
}

return utf16_str;
}

constexpr char32_t utf8_to_utf32(std::u8string_view const input) {
char32_t codepoint = 0;

Expand Down Expand Up @@ -6656,6 +6673,14 @@ namespace {
return oss.str();
}

[[maybe_unused]] std::string u32ToString(std::u16string const& hexString) {
std::ostringstream oss;
for (auto const codePoint : hexString) {
oss << "\\x" << std::hex << static_cast<std::uint32_t>(codePoint);
}
return oss.str();
}

[[maybe_unused]] std::string u32ToString(std::string const& hexString) {
std::ostringstream oss;
for (auto const codePoint : hexString) {
Expand Down Expand Up @@ -6763,6 +6788,10 @@ TEST(Unicode, NormalizationTests) {
u8string const nfc8 = utf32_to_utf8(nfc);
u8string const nfd8 = utf32_to_utf8(nfd);

u16string const source16 = utf32_to_utf16(source);
u16string const nfc16 = utf32_to_utf16(nfc);
u16string const nfd16 = utf32_to_utf16(nfd);

EXPECT_EQ(nfd, toNFD(source))
<< " Source: " << u32ToString(source) << "\n NFD: " << u32ToString(nfd)
<< "\n NFC: " << u32ToString(nfc) << "\n line: " << line << "\n index: " << test_index
Expand Down Expand Up @@ -6791,6 +6820,20 @@ TEST(Unicode, NormalizationTests) {
<< "\n Decomposed: " << u32ToString(canonical_decomposed<std::u32string>(source));
}

{
EXPECT_EQ(nfd16, toNFD(source16))
<< " Source: " << u32ToString(source) << " Source: " << u32ToString(source16)
<< "\n NFD: " << u32ToString(nfd) << "\n NFC: " << u32ToString(nfc) << "\n line: " << line
<< "\n index: " << test_index
<< "\n Decomposed: " << u32ToString(canonical_decomposed<std::u32string>(source));

EXPECT_EQ(nfc16, toNFC(source16))
<< " Source: " << u32ToString(source) << " Source8: " << u32ToString(source16)
<< "\n NFD: " << u32ToString(nfd) << "\n NFC: " << u32ToString(nfc) << "\n line: " << line
<< "\n index: " << test_index
<< "\n Decomposed: " << u32ToString(canonical_decomposed<std::u32string>(source));
}

check_idempotent(source, nfc, nfd);

if constexpr (enable_utf8_composition_tests) {
Expand Down
4 changes: 2 additions & 2 deletions webpp/unicode/unicode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ namespace webpp::unicode {
static constexpr u32 replacement_char = 0x0000'FFFD;

/// Basic Multilingual Plane (BMP)
template <UTF32 u32 = char32_t>
static constexpr u32 max_bmp = 0x0000'FFFF;
template <UTF T = char32_t>
static constexpr T max_bmp = 0x0000'FFFF;

template <UTF32 u32 = char32_t>
static constexpr u32 max_utf16 = 0x0010'FFFF;
Expand Down
7 changes: 5 additions & 2 deletions webpp/unicode/unicode_concepts.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@ namespace webpp::unicode {
concept UTF16 = (sizeof(T) == sizeof(char16_t));

template <typename T>
concept WChar = (sizeof(T) >= sizeof(char32_t));
concept WChar = (sizeof(T) >= sizeof(wchar_t));

template <typename T>
concept UTF32 = WChar<T>;
concept UTF32 = (sizeof(T) >= sizeof(wchar_t));

template <typename T>
concept UTF = UTF8<T> || UTF16<T> || UTF32<T>;

} // namespace webpp::unicode

Expand Down

0 comments on commit 8f32441

Please sign in to comment.