From f1eb06a54750cf02f50a0ac7d5a150e20e0ed6ac Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 17 Jun 2022 22:11:40 +0200 Subject: [PATCH 1/3] Extend test coverage of TestDbcsWriteRead --- src/host/ft_host/CJK_DbcsTests.cpp | 2337 ++++++++++++---------------- 1 file changed, 1007 insertions(+), 1330 deletions(-) diff --git a/src/host/ft_host/CJK_DbcsTests.cpp b/src/host/ft_host/CJK_DbcsTests.cpp index ed6d1c877ea..4e2fd3a9d1e 100644 --- a/src/host/ft_host/CJK_DbcsTests.cpp +++ b/src/host/ft_host/CJK_DbcsTests.cpp @@ -5,11 +5,33 @@ #include #include #include -#include -#define ENGLISH_US_CP 437u #define JAPANESE_CP 932u +// CHAR_INFO's .Char member is a union of a wchar_t UnicodeChar and char AsciiChar. +// If they share the same offsetof we can write the lower byte of the former to +// overwrite the latter, while ensuring that the high byte is properly cleared to 0. +static_assert(offsetof(CHAR_INFO, Char.UnicodeChar) == offsetof(CHAR_INFO, Char.AsciiChar)); + +template +constexpr CHAR_INFO makeCharInfo(T ch, WORD attr) +{ + CHAR_INFO info{}; + // If T is a char, it'll be a signed integer, whereas UnicodeChar is an unsigned one. + // A negative char like -1 would then result in a wchar_t of 0xffff instead of the expected 0xff. + // Casting ch to a unsigned integer first prevents such "sign extension". + info.Char.UnicodeChar = static_cast(til::as_unsigned(ch)); + info.Attributes = attr; + return info; +} + +using CharInfoPattern = std::array; + +// These two are the same strings but in different encodings. +// Both strings are exactly 16 "cells" wide which matches the size of CharInfoPattern. +static constexpr std::string_view dbcsInput{ "Q\x82\xA2\x82\xa9\x82\xc8ZYXWVUT\x82\xc9" }; // Shift-JIS (Codepage 932) +static constexpr std::wstring_view unicodeInput{ L"QいかなZYXWVUTに" }; // Regular UTF-16 + using namespace WEX::Logging; using WEX::TestExecution::TestData; using namespace WEX::Common; @@ -30,198 +52,57 @@ namespace DbcsWriteRead ReadConsoleOutputCharacterFunc = 1 }; - void TestRunner(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - _In_opt_ WORD* const pwAttrOverride, + enum UnicodeMode + { + Ascii = 0, + UnicodeSingle, + UnicodeDoubled, + }; + + void TestRunner(_In_opt_ WORD* const pwAttrOverride, const bool fUseTrueType, const DbcsWriteRead::WriteMode WriteMode, - const bool fWriteInUnicode, + const UnicodeMode fWriteInUnicode, const DbcsWriteRead::ReadMode ReadMode, const bool fReadWithUnicode); - bool Setup(_In_ unsigned int uiCodePage, - _In_ bool fIsTrueType, + bool Setup(_In_ bool fIsTrueType, _Out_ HANDLE* const phOut, _Out_ WORD* const pwAttributes); void SendOutput(const HANDLE hOut, - _In_ const unsigned int uiCodePage, const WriteMode WriteMode, - const bool fIsUnicode, - _In_ PCSTR pszTestString, + const UnicodeMode fIsUnicode, const WORD wAttr); void RetrieveOutput(const HANDLE hOut, const DbcsWriteRead::ReadMode ReadMode, const bool fReadUnicode, - _Out_writes_(cChars) CHAR_INFO* const rgChars, - const SHORT cChars); - - void Verify(_In_reads_(cExpected) CHAR_INFO* const rgExpected, - const size_t cExpected, - _In_reads_(cExpected) CHAR_INFO* const rgActual); - - void PrepExpected(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - const DbcsWriteRead::WriteMode WriteMode, - const bool fWriteWithUnicode, - const bool fIsTrueTypeFont, - const DbcsWriteRead::ReadMode ReadMode, - const bool fReadWithUnicode, - _Outptr_result_buffer_(*pcExpected) CHAR_INFO** const ppciExpected, - _Out_ size_t* const pcExpected); - - void PrepReadConsoleOutput(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - const DbcsWriteRead::WriteMode WriteMode, - const bool fWriteWithUnicode, - const bool fIsTrueTypeFont, - const bool fReadWithUnicode, - _Inout_updates_all_(cExpectedNeeded) CHAR_INFO* const rgciExpected, - const size_t cExpectedNeeded); - - void PrepReadConsoleOutputCharacter(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - const DbcsWriteRead::WriteMode WriteMode, - const bool fWriteWithUnicode, - const bool fIsTrueTypeFont, - const bool fReadWithUnicode, - _Inout_updates_all_(cExpectedNeeded) CHAR_INFO* const rgciExpected, - const size_t cExpectedNeeded); - - namespace PrepPattern - { - // There are 14 different patterns that result from the various combinations of our APIs. - // These patterns are simply recognized based on the existing v1 console behavior and generated - // here as a black box test to maintain compatibility based on the variations in API usage. - // It can be assumed that calling this pattern means that the combinations of APIs used for the test - // resulted in output that looks like this pattern on the v1 console. - // - // All patterns will be documented with their sample before and afters above the comment. - // We will use *KI* to represent a Japanese Hiragana character that is romanized and - // no * to represent US ASCII text. - // - // We don't store the Hiragana directly in this file because Visual Studio and Git fight over the - // proper encoding of UTF-8. - - // 1 - void SpacePaddedDedupeW(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected); - - // 2 - void SpacePaddedDedupeTruncatedW(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected); - - // 3 - void NullPaddedDedupeW(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected); - - // 4 - void DoubledWNegativeOneTrailing(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected); - - // 5 - void DoubledW(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected); - - // 6 - void A(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected); - - // 7 - void AStompsWNegativeOnePatternTruncateSpacePadded(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected); - - // 8 - void AOnDoubledWNegativeOneTrailing(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected); - - // 9 - void AOnDoubledW(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected); - - // 10 - void WNullCoverAChar(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected); - - // 11 - void WSpaceFill(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected); - - // 12 - void ACoverAttrSpacePaddedDedupeTruncatedW(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected); - - // 13 - void SpacePaddedDedupeA(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected); - - // 14 - void TrueTypeCharANullWithAttrs(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected); - }; + CharInfoPattern& rgChars); + + void Verify(const CharInfoPattern& rgExpected, + const CharInfoPattern& rgActual); + + void PrepExpected( + const WORD wAttrWritten, + const DbcsWriteRead::WriteMode WriteMode, + const DbcsWriteRead::UnicodeMode fWriteWithUnicode, + const bool fIsTrueTypeFont, + const DbcsWriteRead::ReadMode ReadMode, + const bool fReadWithUnicode, + CharInfoPattern& expected); + + const CharInfoPattern& PrepReadConsoleOutput( + const DbcsWriteRead::WriteMode WriteMode, + const UnicodeMode fWriteWithUnicode, + const bool fIsTrueTypeFont, + const bool fReadWithUnicode); + + const CharInfoPattern& PrepReadConsoleOutputCharacter( + const DbcsWriteRead::WriteMode WriteMode, + const UnicodeMode fWriteWithUnicode, + const bool fIsTrueTypeFont, + const bool fReadWithUnicode); }; class DbcsTests @@ -237,10 +118,9 @@ class DbcsTests TEST_METHOD(TestMultibyteInputRetrieval); BEGIN_TEST_METHOD(TestDbcsWriteRead) - TEST_METHOD_PROPERTY(L"Data:uiCodePage", L"{437, 932}") TEST_METHOD_PROPERTY(L"Data:fUseTrueTypeFont", L"{true, false}") TEST_METHOD_PROPERTY(L"Data:WriteMode", L"{0, 1, 2, 3}") - TEST_METHOD_PROPERTY(L"Data:fWriteInUnicode", L"{true, false}") + TEST_METHOD_PROPERTY(L"Data:fWriteInUnicode", L"{0, 1, 2}") TEST_METHOD_PROPERTY(L"Data:ReadMode", L"{0, 1}") TEST_METHOD_PROPERTY(L"Data:fReadInUnicode", L"{true, false}") END_TEST_METHOD() @@ -281,16 +161,15 @@ bool DbcsTests::DbcsTestSetup() return true; } -bool DbcsWriteRead::Setup(_In_ unsigned int uiCodePage, - _In_ bool fIsTrueType, +bool DbcsWriteRead::Setup(_In_ bool fIsTrueType, _Out_ HANDLE* const phOut, _Out_ WORD* const pwAttributes) { const auto hOut = GetStdOutputHandle(); // Ensure that the console is set into the appropriate codepage for the test - VERIFY_WIN32_BOOL_SUCCEEDED_RETURN(SetConsoleCP(uiCodePage)); - VERIFY_WIN32_BOOL_SUCCEEDED_RETURN(SetConsoleOutputCP(uiCodePage)); + VERIFY_WIN32_BOOL_SUCCEEDED_RETURN(SetConsoleCP(JAPANESE_CP)); + VERIFY_WIN32_BOOL_SUCCEEDED_RETURN(SetConsoleOutputCP(JAPANESE_CP)); // Now set up the font. Many of these APIs are oddly dependent on font, so set as appropriate. CONSOLE_FONT_INFOEX cfiex = { 0 }; @@ -306,16 +185,7 @@ bool DbcsWriteRead::Setup(_In_ unsigned int uiCodePage, } else { - switch (uiCodePage) - { - case JAPANESE_CP: - wcscpy_s(cfiex.FaceName, L"MS Gothic"); - break; - case ENGLISH_US_CP: - wcscpy_s(cfiex.FaceName, L"Consolas"); - break; - } - + wcscpy_s(cfiex.FaceName, L"MS Gothic"); cfiex.dwFontSize.Y = 16; } @@ -359,41 +229,10 @@ bool DbcsWriteRead::Setup(_In_ unsigned int uiCodePage, } void DbcsWriteRead::SendOutput(const HANDLE hOut, - _In_ const unsigned int uiCodePage, const DbcsWriteRead::WriteMode WriteMode, - const bool fIsUnicode, - _In_ PCSTR pszTestString, + const UnicodeMode fIsUnicode, const WORD wAttr) { - // DBCS is very dependent on knowing the byte length in the original codepage of the input text. - // Save off the original length of the string so we know what its A length was. - const auto cTestString = (SHORT)strlen(pszTestString); - - // If we're in Unicode mode, we will need to translate the test string to Unicode before passing into the console - PWSTR pwszTestString = nullptr; - if (fIsUnicode) - { - // Use double-call pattern to find space to allocate, allocate it, then convert. - const auto icchNeeded = MultiByteToWideChar(uiCodePage, 0, pszTestString, -1, nullptr, 0); - - pwszTestString = new WCHAR[icchNeeded]; - VERIFY_IS_NOT_NULL(pwszTestString); - - const auto iRes = MultiByteToWideChar(uiCodePage, 0, pszTestString, -1, pwszTestString, icchNeeded); - CheckLastErrorZeroFail(iRes, L"MultiByteToWideChar"); - } - - // Calculate the number of cells/characters/calls we will need to fill with our input depending on the mode. - SHORT cChars = 0; - if (fIsUnicode) - { - cChars = (SHORT)wcslen(pwszTestString); - } - else - { - cChars = cTestString; - } - // These parameters will be used to print out the written rectangle if we used the console APIs (not the CRT APIs) // This information will be stored and printed out at the very end after we move the cursor off of the text we just printed. // The cursor auto-moves for CRT, but we have to manually move it for some of the Console APIs. @@ -402,6 +241,7 @@ void DbcsWriteRead::SendOutput(const HANDLE hOut, SMALL_RECT srWritten = { 0 }; auto fUseDwordWritten = false; + DWORD dwWrittenExpected = 0; DWORD dwWritten = 0; switch (WriteMode) @@ -423,16 +263,16 @@ void DbcsWriteRead::SendOutput(const HANDLE hOut, // Write each character in the string individually out through the CRT if (fIsUnicode) { - for (SHORT i = 0; i < cChars; i++) + for (const auto& ch : unicodeInput) { - putwchar(pwszTestString[i]); + putwchar(ch); } } else { - for (SHORT i = 0; i < cChars; i++) + for (const auto& ch : dbcsInput) { - putchar(pszTestString[i]); + putchar(ch); } } break; @@ -441,30 +281,45 @@ void DbcsWriteRead::SendOutput(const HANDLE hOut, { // If we're going to be using WriteConsoleOutput, we need to create up a nice // CHAR_INFO buffer to pass into the method containing the string and possibly attributes - auto rgChars = new CHAR_INFO[cChars]; - VERIFY_IS_NOT_NULL(rgChars); + std::vector rgChars; + rgChars.reserve(dbcsInput.size()); - for (SHORT i = 0; i < cChars; i++) + switch (fIsUnicode) { - rgChars[i].Attributes = wAttr; - - if (fIsUnicode) + case UnicodeMode::UnicodeSingle: + for (const auto& ch : unicodeInput) { - rgChars[i].Char.UnicodeChar = pwszTestString[i]; + rgChars.push_back(makeCharInfo(ch, wAttr)); } - else + break; + case UnicodeMode::UnicodeDoubled: + for (const auto& ch : unicodeInput) { - // Ensure the top half of the union is filled with 0 for comparison purposes later. - rgChars[i].Char.UnicodeChar = 0; - rgChars[i].Char.AsciiChar = pszTestString[i]; + // For the sake of this test we're going to simply assume that any non-ASCII character is wide. + if (ch < 0x80) + { + rgChars.push_back(makeCharInfo(ch, wAttr)); + } + else + { + rgChars.push_back(makeCharInfo(ch, wAttr | COMMON_LVB_LEADING_BYTE)); + rgChars.push_back(makeCharInfo(ch, wAttr | COMMON_LVB_TRAILING_BYTE)); + } } + break; + default: + for (const auto& ch : dbcsInput) + { + rgChars.push_back(makeCharInfo(ch, wAttr)); + } + break; } // This is the stated size of the buffer we're passing. // This console API can treat the buffer as a 2D array. We're only doing 1 dimension so the Y is 1 and the X is the number of CHAR_INFO characters. COORD coordBufferSize = { 0 }; coordBufferSize.Y = 1; - coordBufferSize.X = cChars; + coordBufferSize.X = gsl::narrow(rgChars.size()); // We want to write to the coordinate 0,0 of the buffer. The test setup function has blanked out that line. COORD coordBufferTarget = { 0 }; @@ -473,27 +328,25 @@ void DbcsWriteRead::SendOutput(const HANDLE hOut, SMALL_RECT srWriteRegion = { 0 }; // Since we could have full-width characters, we have to "allow" the console to write up to the entire A string length (up to double the W length) - srWriteRegion.Right = cTestString - 1; + srWriteRegion.Right = gsl::narrow(dbcsInput.size()) - 1; // Save the expected written rectangle for comparison after the call srWrittenExpected = { 0 }; - srWrittenExpected.Right = cChars - 1; // we expect that the written report will be the number of characters inserted, not the size of buffer consumed + srWrittenExpected.Right = coordBufferSize.X - 1; // we expect that the written report will be the number of characters inserted, not the size of buffer consumed // NOTE: Don't VERIFY these calls or we will overwrite the text in the buffer with the log message. if (fIsUnicode) { - WriteConsoleOutputW(hOut, rgChars, coordBufferSize, coordBufferTarget, &srWriteRegion); + WriteConsoleOutputW(hOut, rgChars.data(), coordBufferSize, coordBufferTarget, &srWriteRegion); } else { - WriteConsoleOutputA(hOut, rgChars, coordBufferSize, coordBufferTarget, &srWriteRegion); + WriteConsoleOutputA(hOut, rgChars.data(), coordBufferSize, coordBufferTarget, &srWriteRegion); } // Save write region so we can print it out after we move the cursor out of the way srWritten = srWriteRegion; fUseRectWritten = true; - - delete[] rgChars; break; } case DbcsWriteRead::WriteMode::WriteConsoleOutputCharacterFunc: @@ -502,11 +355,13 @@ void DbcsWriteRead::SendOutput(const HANDLE hOut, if (fIsUnicode) { - WriteConsoleOutputCharacterW(hOut, pwszTestString, cChars, coordBufferTarget, &dwWritten); + dwWrittenExpected = gsl::narrow(unicodeInput.size()); + WriteConsoleOutputCharacterW(hOut, unicodeInput.data(), dwWrittenExpected, coordBufferTarget, &dwWritten); } else { - WriteConsoleOutputCharacterA(hOut, pszTestString, cChars, coordBufferTarget, &dwWritten); + dwWrittenExpected = gsl::narrow(dbcsInput.size()); + WriteConsoleOutputCharacterA(hOut, dbcsInput.data(), dwWrittenExpected, coordBufferTarget, &dwWritten); } fUseDwordWritten = true; @@ -516,11 +371,13 @@ void DbcsWriteRead::SendOutput(const HANDLE hOut, { if (fIsUnicode) { - WriteConsoleW(hOut, pwszTestString, cChars, &dwWritten, nullptr); + dwWrittenExpected = gsl::narrow(unicodeInput.size()); + WriteConsoleW(hOut, unicodeInput.data(), dwWrittenExpected, &dwWritten, nullptr); } else { - WriteConsoleA(hOut, pszTestString, cChars, &dwWritten, nullptr); + dwWrittenExpected = gsl::narrow(dbcsInput.size()); + WriteConsoleA(hOut, dbcsInput.data(), dwWrittenExpected, &dwWritten, nullptr); } fUseDwordWritten = true; @@ -530,12 +387,6 @@ void DbcsWriteRead::SendOutput(const HANDLE hOut, VERIFY_FAIL(L"Unsupported write mode."); } - // Free memory if appropriate (if we had to convert A to W) - if (nullptr != pwszTestString) - { - delete[] pwszTestString; - } - // Move the cursor down a line in case log info prints out. COORD coordSetCursor = { 0 }; coordSetCursor.Y = 1; @@ -551,1143 +402,999 @@ void DbcsWriteRead::SendOutput(const HANDLE hOut, else if (fUseDwordWritten) { Log::Comment(NoThrowString().Format(L"Chars Written: %d", dwWritten)); - VERIFY_ARE_EQUAL((DWORD)cChars, dwWritten); - } -} - -// 3 -// From Input String: "Q(Hiragana I)(Hiragana KA)(Hiragana NA)ZYXWVUT(Hiragana NI) -// With Default Attribute 0x7 (before writing) and Applied Attribute 0x29 (written with text) -// ... -// Receive Output Table: -// attr | wchar (char) | symbol -// ------------------------------------ -// 0x029 | 0x0051 (0x51) | Q -// 0x029 | 0x3044 (0x44) | Hiragana I -// 0x029 | 0x304B (0x4B) | Hiragana KA -// 0x029 | 0x306A (0x6A) | Hiragana NA -// 0x029 | 0x005A (0x5A) | Z -// 0x029 | 0x0059 (0x59) | Y -// 0x029 | 0x0058 (0x58) | X -// 0x029 | 0x0057 (0x57) | W -// 0x029 | 0x0056 (0x56) | V -// 0x029 | 0x0055 (0x55) | U -// 0x029 | 0x0054 (0x54) | T -// 0x029 | 0x306B (0x6B) | Hiragana NI -// 0x000 | 0x0000 (0x00) | -// 0x000 | 0x0000 (0x00) | -// 0x000 | 0x0000 (0x00) | -// 0x000 | 0x0000 (0x00) | -// ... -// "Null Padded" means any unused data in the buffer will be filled with null and null attribute. -// "Dedupe" means that any full-width characters in the buffer (despite being stored doubled inside the buffer) -// will be returned as single copies. -// "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) -void DbcsWriteRead::PrepPattern::NullPaddedDedupeW(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD /*wAttrOriginal*/, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected) -{ - Log::Comment(L"Pattern 3"); - const auto iwchNeeded = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, nullptr, 0); - auto pwszTestData = new wchar_t[iwchNeeded]; - VERIFY_IS_NOT_NULL(pwszTestData); - const auto iSuccess = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, pwszTestData, iwchNeeded); - CheckLastErrorZeroFail(iSuccess, L"MultiByteToWideChar"); - - const auto cWideTestData = wcslen(pwszTestData); - VERIFY_IS_GREATER_THAN_OR_EQUAL(cExpected, cWideTestData); - - for (size_t i = 0; i < cWideTestData; i++) - { - const auto pciCurrent = &pciExpected[i]; - const auto wch = pwszTestData[i]; - - pciCurrent->Attributes = wAttrWritten; - pciCurrent->Char.UnicodeChar = wch; - } - - delete[] pwszTestData; -} - -// 1 -// From Input String: "Q(Hiragana I)(Hiragana KA)(Hiragana NA)ZYXWVUT(Hiragana NI) -// With Default Attribute 0x7 (before writing) and Applied Attribute 0x29 (written with text) -// ... -// Receive Output Table: -// attr | wchar (char) | symbol -// ------------------------------------ -// 0x029 | 0x0051 (0x51) | Q -// 0x029 | 0x3044 (0x44) | Hiragana I -// 0x029 | 0x304B (0x4B) | Hiragana KA -// 0x029 | 0x306A (0x6A) | Hiragana NA -// 0x029 | 0x005A (0x5A) | Z -// 0x029 | 0x0059 (0x59) | Y -// 0x029 | 0x0058 (0x58) | X -// 0x029 | 0x0057 (0x57) | W -// 0x029 | 0x0056 (0x56) | V -// 0x029 | 0x0055 (0x55) | U -// 0x029 | 0x0054 (0x54) | T -// 0x029 | 0x306B (0x6B) | Hiragana NI -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// ... -// "Space Padded" means any unused data in the buffer will be filled with spaces and the default attribute. -// "Dedupe" means that any full-width characters in the buffer (despite being stored doubled inside the buffer) -// will be returned as single copies. -// "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) -void DbcsWriteRead::PrepPattern::SpacePaddedDedupeW(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected) -{ - Log::Comment(L"Pattern 1"); - DbcsWriteRead::PrepPattern::NullPaddedDedupeW(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, pciExpected, cExpected); - - for (size_t i = 0; i < cExpected; i++) - { - const auto pciCurrent = &pciExpected[i]; - - if (0 == pciCurrent->Attributes && 0 == pciCurrent->Char.UnicodeChar) - { - pciCurrent->Attributes = wAttrOriginal; - pciCurrent->Char.UnicodeChar = L'\x20'; - } - } -} - -// 2 -// From Input String: "Q(Hiragana I)(Hiragana KA)(Hiragana NA)ZYXWVUT(Hiragana NI) -// With Default Attribute 0x7 (before writing) and Applied Attribute 0x29 (written with text) -// ... -// Receive Output Table: -// attr | wchar (char) | symbol -// ------------------------------------ -// 0x029 | 0x0051 (0x51) | Q -// 0x029 | 0x3044 (0x44) | Hiragana I -// 0x029 | 0x304B (0x4B) | Hiragana KA -// 0x029 | 0x306A (0x6A) | Hiragana NA -// 0x029 | 0x005A (0x5A) | Z -// 0x029 | 0x0059 (0x59) | Y -// 0x029 | 0x0058 (0x58) | X -// 0x029 | 0x0057 (0x57) | W -// 0x029 | 0x0056 (0x56) | V -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// 0x000 | 0x0000 (0x00) | -// 0x000 | 0x0000 (0x00) | -// 0x000 | 0x0000 (0x00) | -// ... -// "Space Padded" means most of the unused data in the buffer will be filled with spaces and the default attribute. -// "Dedupe" means that any full-width characters in the buffer (despite being stored doubled inside the buffer) -// will be returned as single copies. -// "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) -// "Truncated" means that this pattern trims off some of the end of the buffer with NULLs. -void DbcsWriteRead::PrepPattern::SpacePaddedDedupeTruncatedW(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected) -{ - Log::Comment(L"Pattern 2"); - - const auto iwchNeeded = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, nullptr, 0); - auto pwszTestData = new wchar_t[iwchNeeded]; - VERIFY_IS_NOT_NULL(pwszTestData); - const auto iSuccess = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, pwszTestData, iwchNeeded); - CheckLastErrorZeroFail(iSuccess, L"MultiByteToWideChar"); - - const auto cWideData = wcslen(pwszTestData); - - // The maximum number of columns the console will consume is the number of wide characters there are in the string. - // This is whether or not the characters themselves are halfwidth or fullwidth (1 col or 2 col respectively.) - // This means that for 4 wide characters that are halfwidth (1 col), the console will copy out all 4 of them. - // For 4 wide characters that are fullwidth (2 col each), the console will copy out 2 of them (because it will count each fullwidth as 2 when filling) - // For a mixed string that is something like half, full, half (4 columns, 3 wchars), we will receive half, full (3 columns worth) and truncate the last half. - - const auto cMaxColumns = cWideData; - size_t iColumnsConsumed = 0; - - size_t iNarrow = 0; - size_t iWide = 0; - size_t iExpected = 0; - - size_t iNulls = 0; - - while (iColumnsConsumed < cMaxColumns) - { - const auto pciCurrent = &pciExpected[iExpected]; - const auto chCurrent = pszTestData[iWide]; - const auto wchCurrent = pwszTestData[iWide]; - - pciCurrent->Attributes = wAttrWritten; - pciCurrent->Char.UnicodeChar = wchCurrent; - - if (IsDBCSLeadByteEx(uiCodePage, chCurrent)) - { - iColumnsConsumed += 2; - iNarrow += 2; - iNulls++; - } - else - { - iColumnsConsumed++; - iNarrow++; - } - - iWide++; - iExpected++; - } - - // Fill remaining with spaces and original attribute - while (iExpected < cExpected - iNulls) - { - const auto pciCurrent = &pciExpected[iExpected]; - pciCurrent->Attributes = wAttrOriginal; - pciCurrent->Char.UnicodeChar = L'\x20'; - - iExpected++; - } - - delete[] pwszTestData; -} - -// 13 -// From Input String: "Q(Hiragana I)(Hiragana KA)(Hiragana NA)ZYXWVUT(Hiragana NI) -// With Default Attribute 0x7 (before writing) and Applied Attribute 0x29 (written with text) -// ... -// Receive Output Table: -// attr | wchar (char) | symbol -// ------------------------------------ -// 0x029 | 0x0051 (0x51) | Q -// 0x129 | 0x0082 (0x82) | Hiragana I Shift-JIS Codepage 932 Lead Byte -// 0x229 | 0x00A2 (0xA2) | Hiragana I Shift-JIS Codepage 932 Trail Byte -// 0x129 | 0x0082 (0x82) | Hiragana KA Shift-JIS Codepage 932 Lead Byte -// 0x229 | 0x00A9 (0xA9) | Hiragana KA Shift-JIS Codepage 932 Trail Byte -// 0x129 | 0x0082 (0x82) | Hiragana NA Shift-JIS Codepage 932 Lead Byte -// 0x229 | 0x00C8 (0xC8) | Hiragana NA Shift-JIS Codepage 932 Trail Byte -// 0x029 | 0x005A (0x5A) | Z -// 0x029 | 0x0059 (0x59) | Y -// 0x029 | 0x0058 (0x58) | X -// 0x029 | 0x0057 (0x57) | W -// 0x029 | 0x0056 (0x56) | V -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// ... -// "Space Padded" means most of the unused data in the buffer will be filled with spaces and the default attribute. -// "Dedupe" means that any full-width characters in the buffer (despite being stored doubled inside the buffer) -// will be returned as single copies. -// "A" means that we intend in-codepage (char) data to be browsed in the resulting struct (even though wchar and char are unioned.) -void DbcsWriteRead::PrepPattern::SpacePaddedDedupeA(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected) -{ - Log::Comment(L"Pattern 13"); - - const auto iwchNeeded = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, nullptr, 0); - auto pwszTestData = new wchar_t[iwchNeeded]; - VERIFY_IS_NOT_NULL(pwszTestData); - const auto iSuccess = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, pwszTestData, iwchNeeded); - CheckLastErrorZeroFail(iSuccess, L"MultiByteToWideChar"); - - const auto cWideData = wcslen(pwszTestData); - - // The maximum number of columns the console will consume is the number of wide characters there are in the string. - // This is whether or not the characters themselves are halfwidth or fullwidth (1 col or 2 col respectively.) - // This means that for 4 wide characters that are halfwidth (1 col), the console will copy out all 4 of them. - // For 4 wide characters that are fullwidth (2 col each), the console will copy out 2 of them (because it will count each fullwidth as 2 when filling) - // For a mixed string that is something like half, full, half (4 columns, 3 wchars), we will receive half, full (3 columns worth) and truncate the last half. - - const auto cMaxColumns = cWideData; - - auto fIsNextTrailing = false; - size_t i = 0; - for (; i < cMaxColumns; i++) - { - const auto pciCurrent = &pciExpected[i]; - const auto chCurrent = pszTestData[i]; - - pciCurrent->Attributes = wAttrWritten; - pciCurrent->Char.AsciiChar = chCurrent; - - if (IsDBCSLeadByteEx(uiCodePage, chCurrent)) - { - pciCurrent->Attributes |= COMMON_LVB_LEADING_BYTE; - fIsNextTrailing = true; - } - else if (fIsNextTrailing) - { - pciCurrent->Attributes |= COMMON_LVB_TRAILING_BYTE; - fIsNextTrailing = false; - } - } - - // Fill remaining with spaces and original attribute - while (i < cExpected) - { - const auto pciCurrent = &pciExpected[i]; - pciCurrent->Attributes = wAttrOriginal; - pciCurrent->Char.UnicodeChar = L'\x20'; - - i++; - } - - delete[] pwszTestData; -} - -// 5 -// From Input String: "Q(Hiragana I)(Hiragana KA)(Hiragana NA)ZYXWVUT(Hiragana NI) -// With Default Attribute 0x7 (before writing) and Applied Attribute 0x29 (written with text) -// ... -// Receive Output Table: -// attr | wchar (char) | symbol -// ------------------------------------ -// 0x029 | 0x0051 (0x51) | Q -// 0x129 | 0x3044 (0x44) | Hiragana I -// 0x229 | 0x3044 (0x44) | Hiragana I -// 0x129 | 0x304B (0x4B) | Hiragana KA -// 0x229 | 0x304B (0x4B) | Hiragana KA -// 0x129 | 0x306A (0x6A) | Hiragana NA -// 0x229 | 0x306A (0x6A) | Hiragana NA -// 0x029 | 0x005A (0x5A) | Z -// 0x029 | 0x0059 (0x59) | Y -// 0x029 | 0x0058 (0x58) | X -// 0x029 | 0x0057 (0x57) | W -// 0x029 | 0x0056 (0x56) | V -// 0x029 | 0x0055 (0x55) | U -// 0x029 | 0x0054 (0x54) | T -// 0x129 | 0x306B (0x6B) | Hiragana NI -// 0x229 | 0x306B (0x6B) | Hiragana NI -// ... -// "Doubled" means that any full-width characters in the buffer are returned twice with a leading and trailing byte marker. -// "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) -void DbcsWriteRead::PrepPattern::DoubledW(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD /*wAttrOriginal*/, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected) -{ - Log::Comment(L"Pattern 5"); - const auto cTestData = strlen(pszTestData); - VERIFY_IS_GREATER_THAN_OR_EQUAL(cExpected, cTestData); - - const auto iwchNeeded = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, nullptr, 0); - auto pwszTestData = new wchar_t[iwchNeeded]; - VERIFY_IS_NOT_NULL(pwszTestData); - const auto iSuccess = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, pwszTestData, iwchNeeded); - CheckLastErrorZeroFail(iSuccess, L"MultiByteToWideChar"); - - size_t iWide = 0; - auto wchRepeat = L'\0'; - auto fIsNextTrailing = false; - for (size_t i = 0; i < cTestData; i++) - { - const auto pciCurrent = &pciExpected[i]; - const auto chTest = pszTestData[i]; - const auto wchCopy = pwszTestData[iWide]; - - pciCurrent->Attributes = wAttrWritten; - - if (IsDBCSLeadByteEx(uiCodePage, chTest)) - { - pciCurrent->Char.UnicodeChar = wchCopy; - iWide++; - - pciCurrent->Attributes |= COMMON_LVB_LEADING_BYTE; - - wchRepeat = wchCopy; - fIsNextTrailing = true; - } - else if (fIsNextTrailing) - { - pciCurrent->Char.UnicodeChar = wchRepeat; - - pciCurrent->Attributes |= COMMON_LVB_TRAILING_BYTE; - - fIsNextTrailing = false; - } - else - { - pciCurrent->Char.UnicodeChar = wchCopy; - iWide++; - } - } - - delete[] pwszTestData; -} - -// 4 -// From Input String: "Q(Hiragana I)(Hiragana KA)(Hiragana NA)ZYXWVUT(Hiragana NI) -// With Default Attribute 0x7 (before writing) and Applied Attribute 0x29 (written with text) -// ... -// Receive Output Table: -// attr | wchar (char) | symbol -// ------------------------------------ -// 0x029 | 0x0051 (0x51) | Q -// 0x129 | 0x3044 (0x44) | Hiragana I -// 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character -// 0x129 | 0x304B (0x4B) | Hiragana KA -// 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character -// 0x129 | 0x306A (0x6A) | Hiragana NA -// 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character -// 0x029 | 0x005A (0x5A) | Z -// 0x029 | 0x0059 (0x59) | Y -// 0x029 | 0x0058 (0x58) | X -// 0x029 | 0x0057 (0x57) | W -// 0x029 | 0x0056 (0x56) | V -// 0x029 | 0x0055 (0x55) | U -// 0x029 | 0x0054 (0x54) | T -// 0x129 | 0x306B (0x6B) | Hiragana NI -// 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character -// ... -// "Doubled" means that any full-width characters in the buffer are returned twice with a leading and trailing byte marker. -// "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) -// "NegativeOneTrailing" means that all trailing bytes have their character replaced with the value -1 or 0xFFFF -void DbcsWriteRead::PrepPattern::DoubledWNegativeOneTrailing(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected) -{ - Log::Comment(L"Pattern 4"); - DbcsWriteRead::PrepPattern::DoubledW(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, pciExpected, cExpected); - - for (size_t i = 0; i < cExpected; i++) - { - auto pciCurrent = &pciExpected[i]; - - if (WI_IsFlagSet(pciCurrent->Attributes, COMMON_LVB_TRAILING_BYTE)) - { - pciCurrent->Char.UnicodeChar = 0xFFFF; - } + VERIFY_ARE_EQUAL(dwWrittenExpected, dwWritten); } } -// 7 -// From Input String: "Q(Hiragana I)(Hiragana KA)(Hiragana NA)ZYXWVUT(Hiragana NI) -// With Default Attribute 0x7 (before writing) and Applied Attribute 0x29 (written with text) -// ... -// Receive Output Table: -// attr | wchar (char) | symbol -// ------------------------------------ -// 0x029 | 0x0051 (0x51) | Q -// 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. -// 0x229 | 0xFFA2 (0xA2) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 -// 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. -// 0x229 | 0xFFA9 (0xA9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 -// 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. -// 0x229 | 0xFFC8 (0xC8) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 -// 0x029 | 0x005A (0x5A) | Z -// 0x029 | 0x0059 (0x59) | Y -// 0x029 | 0x0058 (0x58) | X -// 0x029 | 0x0057 (0x57) | W -// 0x029 | 0x0056 (0x56) | V -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// ... -// "AStompsW" means that the Unicode characters were fit into the result buffer first, then the Multibyte conversion -// was written over the top of the lower byte. This makes an invalid Unicode character, but can be understood -// as in-codepage from the char portion of the union. -// "NegativeOnePattern" means that every trailing byte started as -1 or 0xFFFF -// "TruncateSpacePadded" means that we only allowed ourselves to return as many characters as is in the unicode length -// of the string and then filled the rest of the buffer after that with spaces. -void DbcsWriteRead::PrepPattern::AStompsWNegativeOnePatternTruncateSpacePadded(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected) +namespace PrepPattern { - Log::Comment(L"Pattern 7"); - DbcsWriteRead::PrepPattern::DoubledWNegativeOneTrailing(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, pciExpected, cExpected); - - // Stomp all A portions of the structure from the existing pattern with the A characters - const auto cTestData = strlen(pszTestData); - for (size_t i = 0; i < cTestData; i++) - { - const auto pciCurrent = &pciExpected[i]; - pciCurrent->Char.AsciiChar = pszTestData[i]; - } - - // Now truncate down and space fill the space based on the max column count. - const auto iwchNeeded = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, nullptr, 0); - auto pwszTestData = new wchar_t[iwchNeeded]; - VERIFY_IS_NOT_NULL(pwszTestData); - const auto iSuccess = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, pwszTestData, iwchNeeded); - CheckLastErrorZeroFail(iSuccess, L"MultiByteToWideChar"); - - const auto cWideData = wcslen(pwszTestData); - - // The maximum number of columns the console will consume is the number of wide characters there are in the string. - // This is whether or not the characters themselves are halfwidth or fullwidth (1 col or 2 col respectively.) - // This means that for 4 wide characters that are halfwidth (1 col), the console will copy out all 4 of them. - // For 4 wide characters that are fullwidth (2 col each), the console will copy out 2 of them (because it will count each fullwidth as 2 when filling) - // For a mixed string that is something like half, full, half (4 columns, 3 wchars), we will receive half, full (3 columns worth) and truncate the last half. - - const auto cMaxColumns = cWideData; - - for (auto i = cMaxColumns; i < cExpected; i++) - { - const auto pciCurrent = &pciExpected[i]; - pciCurrent->Char.UnicodeChar = L'\x20'; - pciCurrent->Attributes = wAttrOriginal; - } + static constexpr WORD zeroed = 0x0000; + static constexpr WORD white = FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE; + // If the lower byte in our test data is 0xff it indicates that it's "flexible" + // and supposed to be replaced with whatever color attributes were written. + // The upper byte contains leading/trailing flags we're testing for. + static constexpr WORD colored = 0x00ff; - delete[] pwszTestData; -} - -// 6 -// From Input String: "Q(Hiragana I)(Hiragana KA)(Hiragana NA)ZYXWVUT(Hiragana NI) -// With Default Attribute 0x7 (before writing) and Applied Attribute 0x29 (written with text) -// ... -// Receive Output Table: -// attr | wchar (char) | symbol -// ------------------------------------ -// 0x029 | 0x0051 (0x51) | Q -// 0x129 | 0x0082 (0x82) | Hiragana I Shift-JIS Codepage 932 Lead Byte -// 0x229 | 0x00A2 (0xA2) | Hiragana I Shift-JIS Codepage 932 Trail Byte -// 0x129 | 0x0082 (0x82) | Hiragana KA Shift-JIS Codepage 932 Lead Byte -// 0x229 | 0x00A9 (0xA9) | Hiragana KA Shift-JIS Codepage 932 Trail Byte -// 0x129 | 0x0082 (0x82) | Hiragana NA Shift-JIS Codepage 932 Lead Byte -// 0x229 | 0x00C8 (0xC8) | Hiragana NA Shift-JIS Codepage 932 Trail Byte -// 0x029 | 0x005A (0x5A) | Z -// 0x029 | 0x0059 (0x59) | Y -// 0x029 | 0x0058 (0x58) | X -// 0x029 | 0x0057 (0x57) | W -// 0x029 | 0x0056 (0x56) | V -// 0x029 | 0x0055 (0x55) | U -// 0x029 | 0x0054 (0x54) | T -// 0x129 | 0x0082 (0x82) | Hiragana NI Shift-JIS Codepage 932 Lead Byte -// 0x229 | 0x00C9 (0xC9) | Hiragana NI Shift-JIS Codepage 932 Trail Byte -// ... -// "A" means that we intend in-codepage (char) data to be browsed in the resulting struct. -// This one returns pretty much exactly as expected. -void DbcsWriteRead::PrepPattern::A(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD /*wAttrOriginal*/, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected) -{ - Log::Comment(L"Pattern 6"); - const auto cTestData = strlen(pszTestData); - VERIFY_IS_GREATER_THAN_OR_EQUAL(cExpected, cTestData); + static constexpr WORD leading = COMMON_LVB_LEADING_BYTE; + static constexpr WORD trailing = COMMON_LVB_TRAILING_BYTE; - auto fIsNextTrailing = false; - for (size_t i = 0; i < cTestData; i++) + constexpr void replaceColorPlaceholders(CharInfoPattern& pattern, WORD attr) { - const auto pciCurrent = &pciExpected[i]; - const auto ch = pszTestData[i]; - - pciCurrent->Attributes = wAttrWritten; - pciCurrent->Char.AsciiChar = ch; - - if (IsDBCSLeadByteEx(uiCodePage, ch)) + for (auto& info : pattern) { - pciCurrent->Attributes |= COMMON_LVB_LEADING_BYTE; - fIsNextTrailing = true; - } - else if (fIsNextTrailing) - { - pciCurrent->Attributes |= COMMON_LVB_TRAILING_BYTE; - fIsNextTrailing = false; + if ((info.Attributes & colored) == colored) + { + info.Attributes &= 0xff00 | attr; + } } } -} -// 10 -// From Input String: "Q(Hiragana I)(Hiragana KA)(Hiragana NA)ZYXWVUT(Hiragana NI) -// With Default Attribute 0x7 (before writing) and Applied Attribute 0x29 (written with text) -// ... -// Receive Output Table: -// attr | wchar (char) | symbol -// ------------------------------------ -// 0x029 | 0x0051 (0x51) | Q -// 0x129 | 0x3044 (0x44) | Hiragana I -// 0x229 | 0x304B (0x4B) | Hiragana KA -// 0x129 | 0x306A (0x6A) | Hiragana NA -// 0x229 | 0x005A (0x5A) | Z -// 0x129 | 0x0059 (0x59) | Y -// 0x229 | 0x0058 (0x58) | X -// 0x029 | 0x0057 (0x57) | W -// 0x029 | 0x0056 (0x56) | V -// 0x029 | 0x0055 (0x55) | U -// 0x029 | 0x0054 (0x54) | T -// 0x029 | 0x306B (0x6B) | Hiragana NI -// 0x029 | 0x0000 (0x00) | -// 0x029 | 0x0000 (0x00) | -// 0x129 | 0x0000 (0x00) | -// 0x229 | 0x0000 (0x00) | -// ... -// "Null" means any unused data in the buffer will be filled with null and null attribute. -// "CoverAChar" means that the attributes belong to the A version of the call, but we've placed de-duped W characters over the top. -// "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) -void DbcsWriteRead::PrepPattern::WNullCoverAChar(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected) -{ - Log::Comment(L"Pattern 10"); - DbcsWriteRead::PrepPattern::A(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, pciExpected, cExpected); - - const auto iwchNeeded = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, nullptr, 0); - auto pwszTestData = new wchar_t[iwchNeeded]; - VERIFY_IS_NOT_NULL(pwszTestData); - const auto iSuccess = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, pwszTestData, iwchNeeded); - CheckLastErrorZeroFail(iSuccess, L"MultiByteToWideChar"); - const auto cWideData = wcslen(pwszTestData); - - size_t i = 0; - for (; i < cWideData; i++) - { - pciExpected[i].Char.UnicodeChar = pwszTestData[i]; - } - - for (; i < cExpected; i++) - { - pciExpected[i].Char.UnicodeChar = L'\0'; - } - - delete[] pwszTestData; -} - -// 11 -// From Input String: "Q(Hiragana I)(Hiragana KA)(Hiragana NA)ZYXWVUT(Hiragana NI) -// With Default Attribute 0x7 (before writing) and Applied Attribute 0x29 (written with text) -// ... -// Receive Output Table: -// attr | wchar (char) | symbol -// ------------------------------------ -// 0x029 | 0x0051 (0x51) | Q -// 0x029 | 0x3044 (0x44) | Hiragana I -// 0x029 | 0x304B (0x4B) | Hiragana KA -// 0x029 | 0x306A (0x6A) | Hiragana NA -// 0x029 | 0x005A (0x5A) | Z -// 0x029 | 0x0059 (0x59) | Y -// 0x029 | 0x0058 (0x58) | X -// 0x029 | 0x0057 (0x57) | W -// 0x029 | 0x0056 (0x56) | V -// 0x029 | 0x0055 (0x55) | U -// 0x029 | 0x0054 (0x54) | T -// 0x029 | 0x306B (0x6B) | Hiragana NI -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// ... -// "Space fill" means any unused data in the buffer will be filled with space and default attribute -// "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) -void DbcsWriteRead::PrepPattern::WSpaceFill(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected) -{ - Log::Comment(L"Pattern 11"); - DbcsWriteRead::PrepPattern::WNullCoverAChar(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, pciExpected, cExpected); - - const auto iwchNeeded = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, nullptr, 0); - auto pwszTestData = new wchar_t[iwchNeeded]; - VERIFY_IS_NOT_NULL(pwszTestData); - const auto iSuccess = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, pwszTestData, iwchNeeded); - CheckLastErrorZeroFail(iSuccess, L"MultiByteToWideChar"); - const auto cWideData = wcslen(pwszTestData); - - size_t i = 0; - for (; i < cWideData; i++) - { - pciExpected[i].Attributes = wAttrWritten; - } + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x029 | 0x3044 (0x44) | Hiragana I + // 0x029 | 0x304B (0x4B) | Hiragana KA + // 0x029 | 0x306A (0x6A) | Hiragana NA + // 0x029 | 0x005A (0x5A) | Z + // 0x029 | 0x0059 (0x59) | Y + // 0x029 | 0x0058 (0x58) | X + // 0x029 | 0x0057 (0x57) | W + // 0x029 | 0x0056 (0x56) | V + // 0x029 | 0x0055 (0x55) | U + // 0x029 | 0x0054 (0x54) | T + // 0x029 | 0x306B (0x6B) | Hiragana NI + // 0x000 | 0x0000 (0x00) | + // 0x000 | 0x0000 (0x00) | + // 0x000 | 0x0000 (0x00) | + // 0x000 | 0x0000 (0x00) | + // ... + // "Null Padded" means any unused data in the buffer will be filled with null and null attribute. + // "Dedupe" means that any full-width characters in the buffer (despite being stored doubled inside the buffer) + // will be returned as single copies. + // "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) + static constexpr CharInfoPattern NullPaddedDedupeW{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x3044, colored), + makeCharInfo(0x304b, colored), + makeCharInfo(0x306a, colored), + makeCharInfo(0x005a, colored), + makeCharInfo(0x0059, colored), + makeCharInfo(0x0058, colored), + makeCharInfo(0x0057, colored), + makeCharInfo(0x0056, colored), + makeCharInfo(0x0055, colored), + makeCharInfo(0x0054, colored), + makeCharInfo(0x306b, colored), + makeCharInfo(0x0000, zeroed), + makeCharInfo(0x0000, zeroed), + makeCharInfo(0x0000, zeroed), + makeCharInfo(0x0000, zeroed), + }; - for (; i < cExpected; i++) - { - pciExpected[i].Char.UnicodeChar = L'\x20'; - pciExpected[i].Attributes = wAttrOriginal; - } + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x029 | 0x3044 (0x44) | Hiragana I + // 0x029 | 0x304B (0x4B) | Hiragana KA + // 0x029 | 0x306A (0x6A) | Hiragana NA + // 0x029 | 0x005A (0x5A) | Z + // 0x029 | 0x0059 (0x59) | Y + // 0x029 | 0x0058 (0x58) | X + // 0x029 | 0x0057 (0x57) | W + // 0x029 | 0x0056 (0x56) | V + // 0x029 | 0x0055 (0x55) | U + // 0x029 | 0x0054 (0x54) | T + // 0x029 | 0x306B (0x6B) | Hiragana NI + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // ... + // "Space Padded" means any unused data in the buffer will be filled with spaces and the default attribute. + // "Dedupe" means that any full-width characters in the buffer (despite being stored doubled inside the buffer) + // will be returned as single copies. + // "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) + static constexpr CharInfoPattern SpacePaddedDedupeW{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x3044, colored), + makeCharInfo(0x304b, colored), + makeCharInfo(0x306a, colored), + makeCharInfo(0x005a, colored), + makeCharInfo(0x0059, colored), + makeCharInfo(0x0058, colored), + makeCharInfo(0x0057, colored), + makeCharInfo(0x0056, colored), + makeCharInfo(0x0055, colored), + makeCharInfo(0x0054, colored), + makeCharInfo(0x306b, colored), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + }; - delete[] pwszTestData; -} + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x029 | 0x3044 (0x44) | Hiragana I + // 0x029 | 0x304B (0x4B) | Hiragana KA + // 0x029 | 0x306A (0x6A) | Hiragana NA + // 0x029 | 0x005A (0x5A) | Z + // 0x029 | 0x0059 (0x59) | Y + // 0x029 | 0x0058 (0x58) | X + // 0x029 | 0x0057 (0x57) | W + // 0x029 | 0x0056 (0x56) | V + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // 0x000 | 0x0000 (0x00) | + // 0x000 | 0x0000 (0x00) | + // 0x000 | 0x0000 (0x00) | + // ... + // "Space Padded" means most of the unused data in the buffer will be filled with spaces and the default attribute. + // "Dedupe" means that any full-width characters in the buffer (despite being stored doubled inside the buffer) + // will be returned as single copies. + // "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) + // "Truncated" means that this pattern trims off some of the end of the buffer with NULLs. + static constexpr CharInfoPattern SpacePaddedDedupeTruncatedW{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x3044, colored), + makeCharInfo(0x304b, colored), + makeCharInfo(0x306a, colored), + makeCharInfo(0x005a, colored), + makeCharInfo(0x0059, colored), + makeCharInfo(0x0058, colored), + makeCharInfo(0x0057, colored), + makeCharInfo(0x0056, colored), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + makeCharInfo(0x0000, zeroed), + makeCharInfo(0x0000, zeroed), + makeCharInfo(0x0000, zeroed), + }; -//8 -// From Input String: "Q(Hiragana I)(Hiragana KA)(Hiragana NA)ZYXWVUT(Hiragana NI) -// With Default Attribute 0x7 (before writing) and Applied Attribute 0x29 (written with text) -// ... -// Receive Output Table: -// attr | wchar (char) | symbol -// ------------------------------------ -// 0x029 | 0x0051 (0x51) | Q -// 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. -// 0x229 | 0xFFA2 (0xA2) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 -// 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. -// 0x229 | 0xFFA9 (0xA9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 -// 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. -// 0x229 | 0xFFC8 (0xC8) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 -// 0x029 | 0x005A (0x5A) | Z -// 0x029 | 0x0059 (0x59) | Y -// 0x029 | 0x0058 (0x58) | X -// 0x029 | 0x0057 (0x57) | W -// 0x029 | 0x0056 (0x56) | V -// 0x029 | 0x0055 (0x55) | U -// 0x029 | 0x0054 (0x54) | T -// 0x129 | 0x3082 (0x30) | Hiragana NI 0x306B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. -// 0x229 | 0xFFC9 (0xC9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC9 -// ... -// "AOn" means that the Unicode characters were fit into the result buffer first, then the Multibyte conversion -// was written over the top of the lower byte. This makes an invalid Unicode character, but can be understood -// as in-codepage from the char portion of the union. -// "DoubledW" means that the full-width Unicode characters were inserted twice into the buffer (and marked lead/trailing) -// "NegativeOneTrailing" means that every trailing byte started as -1 or 0xFFFF -void DbcsWriteRead::PrepPattern::AOnDoubledWNegativeOneTrailing(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected) -{ - Log::Comment(L"Pattern 8"); + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x029 | 0x3044 (0x44) | Hiragana I + // 0x029 | 0x3044 (0x44) | Hiragana I + // 0x029 | 0x304B (0x4B) | Hiragana KA + // 0x029 | 0x304B (0x4B) | Hiragana KA + // 0x029 | 0x306A (0x6A) | Hiragana NA + // 0x029 | 0x306A (0x6A) | Hiragana NA + // 0x029 | 0x005A (0x5A) | Z + // 0x029 | 0x0059 (0x59) | Y + // 0x029 | 0x0058 (0x58) | X + // 0x000 | 0x0000 (0x00) | + // 0x000 | 0x0000 (0x00) | + // 0x000 | 0x0000 (0x00) | + // 0x000 | 0x0000 (0x00) | + // 0x000 | 0x0000 (0x00) | + // 0x000 | 0x0000 (0x00) | + // ... + // "Doubled" means that any full-width characters in the buffer are returned twice. + // "Truncated" means that this pattern trims off some of the end of the buffer with NULLs. + // "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) + static constexpr CharInfoPattern DoubledTruncatedW{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x3044, colored), + makeCharInfo(0x3044, colored), + makeCharInfo(0x304b, colored), + makeCharInfo(0x304b, colored), + makeCharInfo(0x306a, colored), + makeCharInfo(0x306a, colored), + makeCharInfo(0x005a, colored), + makeCharInfo(0x0059, colored), + makeCharInfo(0x0058, colored), + makeCharInfo(0x0000, zeroed), + makeCharInfo(0x0000, zeroed), + makeCharInfo(0x0000, zeroed), + makeCharInfo(0x0000, zeroed), + makeCharInfo(0x0000, zeroed), + makeCharInfo(0x0000, zeroed), + }; - DbcsWriteRead::PrepPattern::DoubledWNegativeOneTrailing(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, pciExpected, cExpected); + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x129 | 0x0082 (0x82) | Hiragana I Shift-JIS Codepage 932 Lead Byte + // 0x229 | 0x00A2 (0xA2) | Hiragana I Shift-JIS Codepage 932 Trail Byte + // 0x129 | 0x0082 (0x82) | Hiragana KA Shift-JIS Codepage 932 Lead Byte + // 0x229 | 0x00A9 (0xA9) | Hiragana KA Shift-JIS Codepage 932 Trail Byte + // 0x129 | 0x0082 (0x82) | Hiragana NA Shift-JIS Codepage 932 Lead Byte + // 0x229 | 0x00C8 (0xC8) | Hiragana NA Shift-JIS Codepage 932 Trail Byte + // 0x029 | 0x005A (0x5A) | Z + // 0x029 | 0x0059 (0x59) | Y + // 0x029 | 0x0058 (0x58) | X + // 0x029 | 0x0057 (0x57) | W + // 0x029 | 0x0056 (0x56) | V + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // ... + // "Space Padded" means most of the unused data in the buffer will be filled with spaces and the default attribute. + // "Dedupe" means that any full-width characters in the buffer (despite being stored doubled inside the buffer) + // will be returned as single copies. + // "A" means that we intend in-codepage (char) data to be browsed in the resulting struct (even though wchar and char are unioned.) + static constexpr CharInfoPattern SpacePaddedDedupeA{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x0082, colored | leading), + makeCharInfo(0x00a2, colored | trailing), + makeCharInfo(0x0082, colored | leading), + makeCharInfo(0x00a9, colored | trailing), + makeCharInfo(0x0082, colored | leading), + makeCharInfo(0x00c8, colored | trailing), + makeCharInfo(0x005a, colored), + makeCharInfo(0x0059, colored), + makeCharInfo(0x0058, colored), + makeCharInfo(0x0057, colored), + makeCharInfo(0x0056, colored), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + }; - // Stomp all A portions of the structure from the existing pattern with the A characters - const auto cTestData = strlen(pszTestData); - VERIFY_IS_GREATER_THAN_OR_EQUAL(cExpected, cTestData); - for (size_t i = 0; i < cTestData; i++) - { - const auto pciCurrent = &pciExpected[i]; - pciCurrent->Char.AsciiChar = pszTestData[i]; - } -} + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x129 | 0x3044 (0x44) | Hiragana I + // 0x229 | 0x3044 (0x44) | Hiragana I + // 0x129 | 0x304B (0x4B) | Hiragana KA + // 0x229 | 0x304B (0x4B) | Hiragana KA + // 0x129 | 0x306A (0x6A) | Hiragana NA + // 0x229 | 0x306A (0x6A) | Hiragana NA + // 0x029 | 0x005A (0x5A) | Z + // 0x029 | 0x0059 (0x59) | Y + // 0x029 | 0x0058 (0x58) | X + // 0x029 | 0x0057 (0x57) | W + // 0x029 | 0x0056 (0x56) | V + // 0x029 | 0x0055 (0x55) | U + // 0x029 | 0x0054 (0x54) | T + // 0x129 | 0x306B (0x6B) | Hiragana NI + // 0x229 | 0x306B (0x6B) | Hiragana NI + // ... + // "Doubled" means that any full-width characters in the buffer are returned twice with a leading and trailing byte marker. + // "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) + static constexpr CharInfoPattern DoubledW{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x3044, colored | leading), + makeCharInfo(0x3044, colored | trailing), + makeCharInfo(0x304b, colored | leading), + makeCharInfo(0x304b, colored | trailing), + makeCharInfo(0x306a, colored | leading), + makeCharInfo(0x306a, colored | trailing), + makeCharInfo(0x005a, colored), + makeCharInfo(0x0059, colored), + makeCharInfo(0x0058, colored), + makeCharInfo(0x0057, colored), + makeCharInfo(0x0056, colored), + makeCharInfo(0x0055, colored), + makeCharInfo(0x0054, colored), + makeCharInfo(0x306b, colored | leading), + makeCharInfo(0x306b, colored | trailing), + }; -// 9 -// From Input String: "Q(Hiragana I)(Hiragana KA)(Hiragana NA)ZYXWVUT(Hiragana NI) -// With Default Attribute 0x7 (before writing) and Applied Attribute 0x29 (written with text) -// ... -// Receive Output Table: -// attr | wchar (char) | symbol -// ------------------------------------ -// 0x029 | 0x0051 (0x51) | Q -// 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. -// 0x229 | 0x30A2 (0xA2) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 -// 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. -// 0x229 | 0x30A9 (0xA9) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 -// 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. -// 0x229 | 0x39C8 (0xC8) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 -// 0x029 | 0x005A (0x5A) | Z -// 0x029 | 0x0059 (0x59) | Y -// 0x029 | 0x0058 (0x58) | X -// 0x029 | 0x0057 (0x57) | W -// 0x029 | 0x0056 (0x56) | V -// 0x029 | 0x0055 (0x55) | U -// 0x029 | 0x0054 (0x54) | T -// 0x129 | 0x3082 (0x30) | Hiragana NI 0x306B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. -// 0x229 | 0x30C9 (0xC9) | Hiragana NI 0x306B with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC9 -// ... -// "AOn" means that the Unicode characters were fit into the result buffer first, then the Multibyte conversion -// was written over the top of the lower byte. This makes an invalid Unicode character, but can be understood -// as in-codepage from the char portion of the union. -// "DoubledW" means that the full-width Unicode characters were inserted twice into the buffer (and marked lead/trailing) -// "NegativeOneTrailing" means that every trailing byte started as -1 or 0xFFFF -void DbcsWriteRead::PrepPattern::AOnDoubledW(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected) -{ - Log::Comment(L"Pattern 9"); + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x129 | 0x3044 (0x44) | Hiragana I + // 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character + // 0x129 | 0x304B (0x4B) | Hiragana KA + // 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character + // 0x129 | 0x306A (0x6A) | Hiragana NA + // 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character + // 0x029 | 0x005A (0x5A) | Z + // 0x029 | 0x0059 (0x59) | Y + // 0x029 | 0x0058 (0x58) | X + // 0x029 | 0x0057 (0x57) | W + // 0x029 | 0x0056 (0x56) | V + // 0x029 | 0x0055 (0x55) | U + // 0x029 | 0x0054 (0x54) | T + // 0x129 | 0x306B (0x6B) | Hiragana NI + // 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character + // ... + // "Doubled" means that any full-width characters in the buffer are returned twice with a leading and trailing byte marker. + // "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) + // "NegativeOneTrailing" means that all trailing bytes have their character replaced with the value -1 or 0xFFFF + static constexpr CharInfoPattern DoubledWNegativeOneTrailing{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x3044, colored | leading), + makeCharInfo(0xffff, colored | trailing), + makeCharInfo(0x304b, colored | leading), + makeCharInfo(0xffff, colored | trailing), + makeCharInfo(0x306a, colored | leading), + makeCharInfo(0xffff, colored | trailing), + makeCharInfo(0x005a, colored), + makeCharInfo(0x0059, colored), + makeCharInfo(0x0058, colored), + makeCharInfo(0x0057, colored), + makeCharInfo(0x0056, colored), + makeCharInfo(0x0055, colored), + makeCharInfo(0x0054, colored), + makeCharInfo(0x306b, colored | leading), + makeCharInfo(0xffff, colored | trailing), + }; - DbcsWriteRead::PrepPattern::DoubledW(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, pciExpected, cExpected); + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0xFFA2 (0xA2) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 + // 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0xFFA9 (0xA9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 + // 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0xFFC8 (0xC8) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 + // 0x029 | 0x005A (0x5A) | Z + // 0x029 | 0x0059 (0x59) | Y + // 0x029 | 0x0058 (0x58) | X + // 0x029 | 0x0057 (0x57) | W + // 0x029 | 0x0056 (0x56) | V + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // ... + // "AStompsW" means that the Unicode characters were fit into the result buffer first, then the Multibyte conversion + // was written over the top of the lower byte. This makes an invalid Unicode character, but can be understood + // as in-codepage from the char portion of the union. + // "NegativeOnePattern" means that every trailing byte started as -1 or 0xFFFF + // "TruncateSpacePadded" means that we only allowed ourselves to return as many characters as is in the unicode length + // of the string and then filled the rest of the buffer after that with spaces. + static constexpr CharInfoPattern AStompsWNegativeOnePatternTruncateSpacePadded{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0xffa2, colored | trailing), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0xffa9, colored | trailing), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0xffc8, colored | trailing), + makeCharInfo(0x005a, colored), + makeCharInfo(0x0059, colored), + makeCharInfo(0x0058, colored), + makeCharInfo(0x0057, colored), + makeCharInfo(0x0056, colored), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + }; - // Stomp all A portions of the structure from the existing pattern with the A characters - const auto cTestData = strlen(pszTestData); - VERIFY_IS_GREATER_THAN_OR_EQUAL(cExpected, cTestData); - for (size_t i = 0; i < cTestData; i++) - { - const auto pciCurrent = &pciExpected[i]; - pciCurrent->Char.AsciiChar = pszTestData[i]; - } -} + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x129 | 0x0082 (0x82) | Hiragana I Shift-JIS Codepage 932 Lead Byte + // 0x229 | 0x00A2 (0xA2) | Hiragana I Shift-JIS Codepage 932 Trail Byte + // 0x129 | 0x0082 (0x82) | Hiragana KA Shift-JIS Codepage 932 Lead Byte + // 0x229 | 0x00A9 (0xA9) | Hiragana KA Shift-JIS Codepage 932 Trail Byte + // 0x129 | 0x0082 (0x82) | Hiragana NA Shift-JIS Codepage 932 Lead Byte + // 0x229 | 0x00C8 (0xC8) | Hiragana NA Shift-JIS Codepage 932 Trail Byte + // 0x029 | 0x005A (0x5A) | Z + // 0x029 | 0x0059 (0x59) | Y + // 0x029 | 0x0058 (0x58) | X + // 0x029 | 0x0057 (0x57) | W + // 0x029 | 0x0056 (0x56) | V + // 0x029 | 0x0055 (0x55) | U + // 0x029 | 0x0054 (0x54) | T + // 0x129 | 0x0082 (0x82) | Hiragana NI Shift-JIS Codepage 932 Lead Byte + // 0x229 | 0x00C9 (0xC9) | Hiragana NI Shift-JIS Codepage 932 Trail Byte + // ... + // "A" means that we intend in-codepage (char) data to be browsed in the resulting struct. + // This one returns pretty much exactly as expected. + static constexpr CharInfoPattern A{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x0082, colored | leading), + makeCharInfo(0x00a2, colored | trailing), + makeCharInfo(0x0082, colored | leading), + makeCharInfo(0x00a9, colored | trailing), + makeCharInfo(0x0082, colored | leading), + makeCharInfo(0x00c8, colored | trailing), + makeCharInfo(0x005a, colored), + makeCharInfo(0x0059, colored), + makeCharInfo(0x0058, colored), + makeCharInfo(0x0057, colored), + makeCharInfo(0x0056, colored), + makeCharInfo(0x0055, colored), + makeCharInfo(0x0054, colored), + makeCharInfo(0x0082, colored | leading), + makeCharInfo(0x00c9, colored | trailing), + }; -// 12 -// From Input String: "Q(Hiragana I)(Hiragana KA)(Hiragana NA)ZYXWVUT(Hiragana NI) -// With Default Attribute 0x7 (before writing) and Applied Attribute 0x29 (written with text) -// ... -// Receive Output Table: -// attr | wchar (char) | symbol -// ------------------------------------ -// 0x029 | 0x0051 (0x51) | Q -// 0x129 | 0x3044 (0x44) | Hiragana I -// 0x229 | 0x304B (0x4B) | Hiragana KA -// 0x129 | 0x306A (0x6A) | Hiragana NA -// 0x229 | 0x005A (0x5A) | Z -// 0x129 | 0x0059 (0x59) | Y -// 0x229 | 0x0058 (0x58) | X -// 0x029 | 0x0057 (0x57) | W -// 0x029 | 0x0056 (0x56) | V -// 0x029 | 0x0020 (0x20) | -// 0x029 | 0x0020 (0x20) | -// 0x029 | 0x0020 (0x20) | -// 0x007 | 0x0020 (0x20) | -// 0x007 | 0x0000 (0x00) | -// 0x007 | 0x0000 (0x00) | -// 0x007 | 0x0000 (0x00) | -// ... -// "Space Padded" means most of the unused data in the buffer will be filled with spaces and the default attribute. -// "Dedupe" means that any full-width characters in the buffer (despite being stored doubled inside the buffer) -// will be returned as single copies. -// "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) -// "Truncated" means that this pattern trims off some of the end of the buffer with NULLs. -// "A Cover Attr" means that after all the other operations, we will finally run through and cover up the attributes -// again with what they would have been for multi-byte data (leading and trailing flags) -void DbcsWriteRead::PrepPattern::ACoverAttrSpacePaddedDedupeTruncatedW(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected) -{ - Log::Comment(L"Pattern 12"); - DbcsWriteRead::PrepPattern::SpacePaddedDedupeTruncatedW(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, pciExpected, cExpected); - - const auto iwchNeeded = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, nullptr, 0); - auto pwszTestData = new wchar_t[iwchNeeded]; - VERIFY_IS_NOT_NULL(pwszTestData); - const auto iSuccess = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, pwszTestData, iwchNeeded); - CheckLastErrorZeroFail(iSuccess, L"MultiByteToWideChar"); - const auto cWideData = wcslen(pwszTestData); - - size_t i = 0; - auto fIsNextTrailing = false; - for (; i < cWideData; i++) - { - pciExpected[i].Attributes = wAttrWritten; + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x129 | 0x0082 (0x82) | Hiragana I Shift-JIS Codepage 932 Lead Byte + // 0x229 | 0x00A2 (0xA2) | Hiragana I Shift-JIS Codepage 932 Trail Byte + // 0x129 | 0x0082 (0x82) | Hiragana I Shift-JIS Codepage 932 Lead Byte + // 0x229 | 0x00A2 (0xA2) | Hiragana I Shift-JIS Codepage 932 Trail Byte + // 0x129 | 0x0082 (0x82) | Hiragana KA Shift-JIS Codepage 932 Lead Byte + // 0x229 | 0x00A9 (0xA9) | Hiragana KA Shift-JIS Codepage 932 Trail Byte + // 0x129 | 0x0082 (0x82) | Hiragana KA Shift-JIS Codepage 932 Lead Byte + // 0x229 | 0x00A9 (0xA9) | Hiragana KA Shift-JIS Codepage 932 Trail Byte + // 0x129 | 0x0082 (0x82) | Hiragana NA Shift-JIS Codepage 932 Lead Byte + // 0x229 | 0x00C8 (0xC8) | Hiragana NA Shift-JIS Codepage 932 Trail Byte + // 0x129 | 0x0082 (0x82) | Hiragana NA Shift-JIS Codepage 932 Lead Byte + // 0x229 | 0x00C8 (0xC8) | Hiragana NA Shift-JIS Codepage 932 Trail Byte + // 0x029 | 0x005A (0x5A) | Z + // 0x029 | 0x0059 (0x59) | Y + // 0x029 | 0x0058 (0x58) | X + // ... + // "Doubled" means that any full-width characters in the buffer are returned twice. + // "A" means that we intend in-codepage (char) data to be browsed in the resulting struct. + static constexpr CharInfoPattern DoubledA{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x0082, colored | leading), + makeCharInfo(0x00a2, colored | trailing), + makeCharInfo(0x0082, colored | leading), + makeCharInfo(0x00a2, colored | trailing), + makeCharInfo(0x0082, colored | leading), + makeCharInfo(0x00a9, colored | trailing), + makeCharInfo(0x0082, colored | leading), + makeCharInfo(0x00a9, colored | trailing), + makeCharInfo(0x0082, colored | leading), + makeCharInfo(0x00c8, colored | trailing), + makeCharInfo(0x0082, colored | leading), + makeCharInfo(0x00c8, colored | trailing), + makeCharInfo(0x005a, colored), + makeCharInfo(0x0059, colored), + makeCharInfo(0x0058, colored), + }; - if (IsDBCSLeadByteEx(uiCodePage, pszTestData[i])) - { - pciExpected[i].Attributes |= COMMON_LVB_LEADING_BYTE; - fIsNextTrailing = true; - } - else if (fIsNextTrailing) - { - pciExpected[i].Attributes |= COMMON_LVB_TRAILING_BYTE; - fIsNextTrailing = false; - } - } + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x129 | 0x3044 (0x44) | Hiragana I + // 0x229 | 0x304B (0x4B) | Hiragana KA + // 0x129 | 0x306A (0x6A) | Hiragana NA + // 0x229 | 0x005A (0x5A) | Z + // 0x129 | 0x0059 (0x59) | Y + // 0x229 | 0x0058 (0x58) | X + // 0x029 | 0x0057 (0x57) | W + // 0x029 | 0x0056 (0x56) | V + // 0x029 | 0x0055 (0x55) | U + // 0x029 | 0x0054 (0x54) | T + // 0x029 | 0x306B (0x6B) | Hiragana NI + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x129 | 0x0000 (0x00) | + // 0x229 | 0x0000 (0x00) | + // ... + // "Null" means any unused data in the buffer will be filled with null. + // "CoverAChar" means that the attributes belong to the A version of the call, but we've placed de-duped W characters over the top. + // "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) + static constexpr CharInfoPattern WNullCoverAChar{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x3044, colored | leading), + makeCharInfo(0x304b, colored | trailing), + makeCharInfo(0x306a, colored | leading), + makeCharInfo(0x005a, colored | trailing), + makeCharInfo(0x0059, colored | leading), + makeCharInfo(0x0058, colored | trailing), + makeCharInfo(0x0057, colored), + makeCharInfo(0x0056, colored), + makeCharInfo(0x0055, colored), + makeCharInfo(0x0054, colored), + makeCharInfo(0x306b, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored | leading), + makeCharInfo(0x0000, colored | trailing), + }; - for (; i < cExpected; i++) - { - pciExpected[i].Attributes = wAttrOriginal; - } + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x129 | 0x3044 (0x44) | Hiragana I + // 0x229 | 0x3044 (0x44) | Hiragana I + // 0x129 | 0x304B (0x4B) | Hiragana KA + // 0x229 | 0x304B (0x4B) | Hiragana KA + // 0x129 | 0x306A (0x6A) | Hiragana NA + // 0x229 | 0x306A (0x6A) | Hiragana NA + // 0x129 | 0x005A (0x5A) | Z + // 0x229 | 0x0059 (0x59) | Y + // 0x129 | 0x0058 (0x58) | X + // 0x229 | 0x0000 (0x00) | + // 0x129 | 0x0000 (0x00) | + // 0x229 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // ... + // "Doubled" means that any full-width characters in the buffer are returned twice. + // "Truncated" means that this pattern trims off some of the end of the buffer with NULLs. + // "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) + static constexpr CharInfoPattern DoubledTruncatedCoverAChar{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x3044, colored | leading), + makeCharInfo(0x3044, colored | trailing), + makeCharInfo(0x304b, colored | leading), + makeCharInfo(0x304b, colored | trailing), + makeCharInfo(0x306a, colored | leading), + makeCharInfo(0x306a, colored | trailing), + makeCharInfo(0x005a, colored | leading), + makeCharInfo(0x0059, colored | trailing), + makeCharInfo(0x0058, colored | leading), + makeCharInfo(0x0000, colored | trailing), + makeCharInfo(0x0000, colored | leading), + makeCharInfo(0x0000, colored | trailing), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + }; - delete[] pwszTestData; -} + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0xFFA2 (0xA2) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 + // 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0xFFA9 (0xA9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 + // 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0xFFC8 (0xC8) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 + // 0x029 | 0x005A (0x5A) | Z + // 0x029 | 0x0059 (0x59) | Y + // 0x029 | 0x0058 (0x58) | X + // 0x029 | 0x0057 (0x57) | W + // 0x029 | 0x0056 (0x56) | V + // 0x029 | 0x0055 (0x55) | U + // 0x029 | 0x0054 (0x54) | T + // 0x129 | 0x3082 (0x30) | Hiragana NI 0x306B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0xFFC9 (0xC9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC9 + // ... + // "AOn" means that the Unicode characters were fit into the result buffer first, then the Multibyte conversion + // was written over the top of the lower byte. This makes an invalid Unicode character, but can be understood + // as in-codepage from the char portion of the union. + // "DoubledW" means that the full-width Unicode characters were inserted twice into the buffer (and marked lead/trailing) + // "NegativeOneTrailing" means that every trailing byte started as -1 or 0xFFFF + static constexpr CharInfoPattern AOnDoubledWNegativeOneTrailing{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0xffa2, colored | trailing), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0xffa9, colored | trailing), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0xffc8, colored | trailing), + makeCharInfo(0x005a, colored), + makeCharInfo(0x0059, colored), + makeCharInfo(0x0058, colored), + makeCharInfo(0x0057, colored), + makeCharInfo(0x0056, colored), + makeCharInfo(0x0055, colored), + makeCharInfo(0x0054, colored), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0xffc9, colored | trailing), + }; -// 14 -// From Input String: "Q(Hiragana I)(Hiragana KA)(Hiragana NA)ZYXWVUT(Hiragana NI) -// With Default Attribute 0x7 (before writing) and Applied Attribute 0x29 (written with text) -// ... -// Receive Output Table: -// attr | wchar (char) | symbol -// ------------------------------------ -// 0x029 | 0x0000 (0x00) | -// 0x029 | 0x0000 (0x00) | -// 0x029 | 0x0000 (0x00) | -// 0x029 | 0x0000 (0x00) | -// 0x029 | 0x0000 (0x00) | -// 0x029 | 0x0000 (0x00) | -// 0x029 | 0x0000 (0x00) | -// 0x029 | 0x0000 (0x00) | -// 0x029 | 0x0000 (0x00) | -// 0x029 | 0x0000 (0x00) | -// 0x029 | 0x0000 (0x00) | -// 0x029 | 0x0000 (0x00) | -// 0x007 | 0x0000 (0x00) | -// 0x007 | 0x0000 (0x00) | -// 0x007 | 0x0000 (0x00) | -// 0x007 | 0x0000 (0x00) | -// ... -// "Space Padded" means most of the unused data in the buffer will be filled with spaces and the default attribute. -// "Dedupe" means that any full-width characters in the buffer (despite being stored doubled inside the buffer) -// will be returned as single copies. -// "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) -// "Truncated" means that this pattern trims off some of the end of the buffer with NULLs. -// "A Cover Attr" means that after all the other operations, we will finally run through and cover up the attributes -// again with what they would have been for multi-byte data (leading and trailing flags) -void DbcsWriteRead::PrepPattern::TrueTypeCharANullWithAttrs(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - _Inout_updates_all_(cExpected) CHAR_INFO* const pciExpected, - const size_t cExpected) -{ - Log::Comment(L"Pattern 14"); - const auto iwchNeeded = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, nullptr, 0); - auto pwszTestData = new wchar_t[iwchNeeded]; - VERIFY_IS_NOT_NULL(pwszTestData); - const auto iSuccess = MultiByteToWideChar(uiCodePage, 0, pszTestData, -1, pwszTestData, iwchNeeded); - CheckLastErrorZeroFail(iSuccess, L"MultiByteToWideChar"); - const auto cWideData = wcslen(pwszTestData); - - // Fill the number of columns worth of wide characters with the write attribute. The rest get the original attribute. - size_t i; - for (i = 0; i < cWideData; i++) - { - pciExpected[i].Attributes = wAttrWritten; - } + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0xFFA2 (0xA2) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 + // 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0xFFA2 (0xA2) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 + // 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0xFFA9 (0xA9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 + // 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0xFFA9 (0xA9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 + // 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0xFFC8 (0xC8) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 + // 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0xFFC8 (0xC8) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 + // 0x029 | 0x005A (0x5A) | Z + // 0x029 | 0x0059 (0x59) | Y + // 0x029 | 0x0058 (0x58) | X + // ... + // "AOn" means that the Unicode characters were fit into the result buffer first, then the Multibyte conversion + // was written over the top of the lower byte. This makes an invalid Unicode character, but can be understood + // as in-codepage from the char portion of the union. + // "DoubledW" means that the full-width Unicode characters were inserted twice into the buffer (and marked lead/trailing) + // "NegativeOneTrailing" means that every trailing byte started as -1 or 0xFFFF + static constexpr CharInfoPattern AOnDoubleDoubledWNegativeOneTrailing{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0xffa2, colored | trailing), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0xffa2, colored | trailing), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0xffa9, colored | trailing), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0xffa9, colored | trailing), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0xffc8, colored | trailing), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0xffc8, colored | trailing), + makeCharInfo(0x005a, colored), + makeCharInfo(0x0059, colored), + makeCharInfo(0x0058, colored), + }; - for (; i < cExpected; i++) - { - pciExpected[i].Attributes = wAttrOriginal; - } + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0x30A2 (0xA2) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 + // 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0x30A9 (0xA9) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 + // 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0x39C8 (0xC8) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 + // 0x029 | 0x005A (0x5A) | Z + // 0x029 | 0x0059 (0x59) | Y + // 0x029 | 0x0058 (0x58) | X + // 0x029 | 0x0057 (0x57) | W + // 0x029 | 0x0056 (0x56) | V + // 0x029 | 0x0055 (0x55) | U + // 0x029 | 0x0054 (0x54) | T + // 0x129 | 0x3082 (0x30) | Hiragana NI 0x306B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. + // 0x229 | 0x30C9 (0xC9) | Hiragana NI 0x306B with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC9 + // ... + // "AOn" means that the Unicode characters were fit into the result buffer first, then the Multibyte conversion + // was written over the top of the lower byte. This makes an invalid Unicode character, but can be understood + // as in-codepage from the char portion of the union. + // "DoubledW" means that the full-width Unicode characters were inserted twice into the buffer (and marked lead/trailing) + // "NegativeOneTrailing" means that every trailing byte started as -1 or 0xFFFF + static constexpr CharInfoPattern AOnDoubledW{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0x30a2, colored | trailing), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0x30a9, colored | trailing), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0x30c8, colored | trailing), + makeCharInfo(0x005a, colored), + makeCharInfo(0x0059, colored), + makeCharInfo(0x0058, colored), + makeCharInfo(0x0057, colored), + makeCharInfo(0x0056, colored), + makeCharInfo(0x0055, colored), + makeCharInfo(0x0054, colored), + makeCharInfo(0x3082, colored | leading), + makeCharInfo(0x30c9, colored | trailing), + }; - // For characters, if the string contained NO double-byte characters, it will return. Otherwise, it won't return due to - // a long standing bug in the console's way it calls RtlUnicodeToOemN - const auto cTestData = strlen(pszTestData); - if (cWideData == cTestData) - { - for (i = 0; i < cTestData; i++) - { - pciExpected[i].Char.AsciiChar = pszTestData[i]; - } - } + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x129 | 0x3044 (0x44) | Hiragana I + // 0x229 | 0x304B (0x4B) | Hiragana KA + // 0x129 | 0x306A (0x6A) | Hiragana NA + // 0x229 | 0x005A (0x5A) | Z + // 0x129 | 0x0059 (0x59) | Y + // 0x229 | 0x0058 (0x58) | X + // 0x029 | 0x0057 (0x57) | W + // 0x029 | 0x0056 (0x56) | V + // 0x029 | 0x0020 (0x20) | + // 0x029 | 0x0020 (0x20) | + // 0x029 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0000 (0x00) | + // 0x007 | 0x0000 (0x00) | + // 0x007 | 0x0000 (0x00) | + // ... + // "Space Padded" means most of the unused data in the buffer will be filled with spaces and the default attribute. + // "Dedupe" means that any full-width characters in the buffer (despite being stored doubled inside the buffer) + // will be returned as single copies. + // "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) + // "Truncated" means that this pattern trims off some of the end of the buffer with NULLs. + // "A Cover Attr" means that after all the other operations, we will finally run through and cover up the attributes + // again with what they would have been for multi-byte data (leading and trailing flags) + static constexpr CharInfoPattern ACoverAttrSpacePaddedDedupeTruncatedW{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x3044, colored | leading), + makeCharInfo(0x304b, colored | trailing), + makeCharInfo(0x306a, colored | leading), + makeCharInfo(0x005a, colored | trailing), + makeCharInfo(0x0059, colored | leading), + makeCharInfo(0x0058, colored | trailing), + makeCharInfo(0x0057, colored), + makeCharInfo(0x0056, colored), + makeCharInfo(0x0020, colored), + makeCharInfo(0x0020, colored), + makeCharInfo(0x0020, colored), + makeCharInfo(0x0020, white), + makeCharInfo(0x0000, white), + makeCharInfo(0x0000, white), + makeCharInfo(0x0000, white), + }; - delete[] pwszTestData; + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x007 | 0x0000 (0x00) | + // 0x007 | 0x0000 (0x00) | + // 0x007 | 0x0000 (0x00) | + // 0x007 | 0x0000 (0x00) | + // ... + // "Space Padded" means most of the unused data in the buffer will be filled with spaces and the default attribute. + // "Dedupe" means that any full-width characters in the buffer (despite being stored doubled inside the buffer) + // will be returned as single copies. + // "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) + // "Truncated" means that this pattern trims off some of the end of the buffer with NULLs. + // "A Cover Attr" means that after all the other operations, we will finally run through and cover up the attributes + // again with what they would have been for multi-byte data (leading and trailing flags) + static constexpr CharInfoPattern TrueTypeCharANullWithAttrs{ + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, white), + makeCharInfo(0x0000, white), + makeCharInfo(0x0000, white), + makeCharInfo(0x0000, white), + }; } -void DbcsWriteRead::PrepReadConsoleOutput(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - const DbcsWriteRead::WriteMode WriteMode, - const bool fWriteWithUnicode, - const bool fIsTrueTypeFont, - const bool fReadWithUnicode, - _Inout_updates_all_(cExpectedNeeded) CHAR_INFO* const rgciExpected, - const size_t cExpectedNeeded) +const CharInfoPattern& DbcsWriteRead::PrepReadConsoleOutput( + const DbcsWriteRead::WriteMode WriteMode, + const UnicodeMode fWriteWithUnicode, + const bool fIsTrueTypeFont, + const bool fReadWithUnicode) { switch (WriteMode) { case DbcsWriteRead::WriteMode::WriteConsoleOutputFunc: - { - // If we wrote with WriteConsoleOutput*, things are going to be munged depending on the font and the A/W status of both the write and the read. - if (!fReadWithUnicode) + switch (fWriteWithUnicode) { - // If we read it back with the A functions, the font might matter. - // We will get different results dependent on whether the original text was written with the W or A method. - if (fWriteWithUnicode) + case UnicodeMode::UnicodeSingle: + if (fReadWithUnicode) + { + if (fIsTrueTypeFont) + { + // When written with WriteConsoleOutputW and read back with ReadConsoleOutputW when the font is TrueType, + // we will get a deduplicated set of Unicode characters with no lead/trailing markings and space padded at the end. + return PrepPattern::SpacePaddedDedupeW; + } + else + { + // When written with WriteConsoleOutputW and read back with ReadConsoleOutputW when the font is Raster, + // we will get a deduplicated set of Unicode characters with no lead/trailing markings and space padded at the end... + // ... except something weird happens with truncation (TODO figure out what) + return PrepPattern::SpacePaddedDedupeTruncatedW; + } + } + else { if (fIsTrueTypeFont) { // When written with WriteConsoleOutputW and read back with ReadConsoleOutputA under TT font, we will get a deduplicated // set of Unicode characters (YES. Unicode characters despite calling the A API to read back) that is space padded out // There will be no lead/trailing markings. - DbcsWriteRead::PrepPattern::SpacePaddedDedupeW(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); + return PrepPattern::SpacePaddedDedupeW; } else { // When written with WriteConsoleOutputW and read back with ReadConsoleOutputA under Raster font, we will get the // double-byte sequences stomped on top of a Unicode filled CHAR_INFO structure that used -1 for trailing bytes. - DbcsWriteRead::PrepPattern::AStompsWNegativeOnePatternTruncateSpacePadded(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); + return PrepPattern::AStompsWNegativeOnePatternTruncateSpacePadded; } } - else + break; + case UnicodeMode::UnicodeDoubled: + if (fReadWithUnicode) { - // When written with WriteConsoleOutputA and read back with ReadConsoleOutputA, - // we will get back the double-byte sequences appropriately labeled with leading/trailing bytes. - //DbcsWriteRead::PrepPattern::A(pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); - DbcsWriteRead::PrepPattern::AOnDoubledWNegativeOneTrailing(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); + if (fIsTrueTypeFont) + { + // In a TrueType font, we will get back Unicode characters doubled up and marked with leading and trailing bytes. + return PrepPattern::DoubledW; + } + else + { + // We get the same as SpacePaddedDedupeTruncatedW above, but due to the unicode chars being doubled, we get DoubledTruncatedW. + return PrepPattern::DoubledTruncatedW; + } } - } - else - { - // If we read it back with the W functions, both the font and the original write mode (A vs. W) matter - if (fIsTrueTypeFont) + else { - if (fWriteWithUnicode) + if (fIsTrueTypeFont) { - // When written with WriteConsoleOutputW and read back with ReadConsoleOutputW when the font is TrueType, - // we will get a deduplicated set of Unicode characters with no lead/trailing markings and space padded at the end. - DbcsWriteRead::PrepPattern::SpacePaddedDedupeW(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); + // In a TrueType font, we will get back Unicode characters doubled up and marked with leading and trailing bytes. + return PrepPattern::AOnDoubledW; } else { - // When written with WriteConsoleOutputW and read back with ReadConsoleOutputA when the font is TrueType, - // we will get back Unicode characters doubled up and marked with leading and trailing bytes... - // ... except all the trailing bytes character values will be -1. - DbcsWriteRead::PrepPattern::DoubledWNegativeOneTrailing(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); + // When written with WriteConsoleOutputW and read back with ReadConsoleOutputA under Raster font, we will get the + // double-byte sequences stomped on top of a Unicode filled CHAR_INFO structure that used -1 for trailing bytes. + return PrepPattern::AOnDoubleDoubledWNegativeOneTrailing; } } - else + break; + default: + if (fReadWithUnicode) { - if (fWriteWithUnicode) + if (fIsTrueTypeFont) { - // When written with WriteConsoleOutputW and read back with ReadConsoleOutputW when the font is Raster, - // we will get a deduplicated set of Unicode characters with no lead/trailing markings and space padded at the end... - // ... except something weird happens with truncation (TODO figure out what) - DbcsWriteRead::PrepPattern::SpacePaddedDedupeTruncatedW(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); + // When written with WriteConsoleOutputA and read back with ReadConsoleOutputW when the font is TrueType, + // we will get back Unicode characters doubled up and marked with leading and trailing bytes... + // ... except all the trailing bytes character values will be -1. + return PrepPattern::DoubledWNegativeOneTrailing; } else { // When written with WriteConsoleOutputA and read back with ReadConsoleOutputW when the font is Raster, // we will get back de-duplicated Unicode characters with no lead / trail markings.The extra array space will remain null. - DbcsWriteRead::PrepPattern::NullPaddedDedupeW(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); + return PrepPattern::NullPaddedDedupeW; } } + else + { + // When written with WriteConsoleOutputA and read back with ReadConsoleOutputA, + // we will get back the double-byte sequences appropriately labeled with leading/trailing bytes. + return PrepPattern::AOnDoubledWNegativeOneTrailing; + } + break; } break; - } case DbcsWriteRead::WriteMode::CrtWrite: case DbcsWriteRead::WriteMode::WriteConsoleOutputCharacterFunc: case DbcsWriteRead::WriteMode::WriteConsoleFunc: - { // Writing with the CRT down here. - if (!fReadWithUnicode) - { - // If we wrote with the CRT and are reading with A functions, the font doesn't matter. - // We will always get back the double-byte sequences appropriately labeled with leading/trailing bytes. - //DbcsWriteRead::PrepPattern::(pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); - DbcsWriteRead::PrepPattern::AOnDoubledW(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); - } - else + if (fReadWithUnicode) { // If we wrote with the CRT and are reading back with the W functions, the font does matter. if (fIsTrueTypeFont) { // In a TrueType font, we will get back Unicode characters doubled up and marked with leading and trailing bytes. - DbcsWriteRead::PrepPattern::DoubledW(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); + return PrepPattern::DoubledW; } else { // In a Raster font, we will get back de-duplicated Unicode characters with no lead/trail markings. The extra array space will remain null. - DbcsWriteRead::PrepPattern::NullPaddedDedupeW(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); + return PrepPattern::NullPaddedDedupeW; } } + else + { + // If we wrote with the CRT and are reading with A functions, the font doesn't matter. + // We will always get back the double-byte sequences appropriately labeled with leading/trailing bytes. + return PrepPattern::AOnDoubledW; + } break; - } default: VERIFY_FAIL(L"Unsupported write mode"); + std::terminate(); } } -void DbcsWriteRead::PrepReadConsoleOutputCharacter(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, - const DbcsWriteRead::WriteMode WriteMode, - const bool fWriteWithUnicode, - const bool fIsTrueTypeFont, - const bool fReadWithUnicode, - _Inout_updates_all_(cExpectedNeeded) CHAR_INFO* const rgciExpected, - const size_t cExpectedNeeded) +const CharInfoPattern& DbcsWriteRead::PrepReadConsoleOutputCharacter( + const DbcsWriteRead::WriteMode WriteMode, + const UnicodeMode fWriteWithUnicode, + const bool fIsTrueTypeFont, + const bool fReadWithUnicode) { - if (DbcsWriteRead::WriteMode::WriteConsoleOutputFunc == WriteMode && fWriteWithUnicode) + if (DbcsWriteRead::WriteMode::WriteConsoleOutputFunc == WriteMode) { - if (fIsTrueTypeFont) + switch (fWriteWithUnicode) { + case UnicodeMode::UnicodeSingle: + if (fReadWithUnicode) + { + if (fIsTrueTypeFont) + { + return PrepPattern::SpacePaddedDedupeW; + } + else + { + return PrepPattern::ACoverAttrSpacePaddedDedupeTruncatedW; + } + } + else + { + if (fIsTrueTypeFont) + { + return PrepPattern::TrueTypeCharANullWithAttrs; + } + else + { + return PrepPattern::SpacePaddedDedupeA; + } + } + break; + case UnicodeMode::UnicodeDoubled: if (fReadWithUnicode) { - DbcsWriteRead::PrepPattern::WSpaceFill(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); + if (fIsTrueTypeFont) + { + return PrepPattern::WNullCoverAChar; + } + else + { + return PrepPattern::DoubledTruncatedCoverAChar; + } } else { - DbcsWriteRead::PrepPattern::TrueTypeCharANullWithAttrs(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); + if (fIsTrueTypeFont) + { + return PrepPattern::A; + } + else + { + return PrepPattern::DoubledA; + } } - } - else - { + break; + default: if (fReadWithUnicode) { - DbcsWriteRead::PrepPattern::ACoverAttrSpacePaddedDedupeTruncatedW(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); + return PrepPattern::WNullCoverAChar; } else { - DbcsWriteRead::PrepPattern::SpacePaddedDedupeA(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); + return PrepPattern::A; } + break; } } else { - if (!fReadWithUnicode) + if (fReadWithUnicode) { - DbcsWriteRead::PrepPattern::A(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); + return PrepPattern::WNullCoverAChar; } else { - DbcsWriteRead::PrepPattern::WNullCoverAChar(uiCodePage, pszTestData, wAttrOriginal, wAttrWritten, rgciExpected, cExpectedNeeded); + return PrepPattern::A; } } } -void DbcsWriteRead::PrepExpected(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - const WORD wAttrOriginal, - const WORD wAttrWritten, +void DbcsWriteRead::PrepExpected(const WORD wAttrWritten, const DbcsWriteRead::WriteMode WriteMode, - const bool fWriteWithUnicode, + const DbcsWriteRead::UnicodeMode fWriteWithUnicode, const bool fIsTrueTypeFont, const DbcsWriteRead::ReadMode ReadMode, const bool fReadWithUnicode, - _Outptr_result_buffer_(*pcExpected) CHAR_INFO** const ppciExpected, - _Out_ size_t* const pcExpected) + CharInfoPattern& expected) { - // We will expect to read back one CHAR_INFO for every A character we sent to the console using the assumption above. - // We expect that reading W characters will always be less than or equal to that. - const auto cExpectedNeeded = strlen(pszTestData); - - // Allocate and zero out the space so comparisons don't fail from garbage bytes. - auto rgciExpected = new CHAR_INFO[cExpectedNeeded]; - VERIFY_IS_NOT_NULL(rgciExpected); - ZeroMemory(rgciExpected, sizeof(CHAR_INFO) * cExpectedNeeded); - switch (ReadMode) { case DbcsWriteRead::ReadMode::ReadConsoleOutputFunc: { - DbcsWriteRead::PrepReadConsoleOutput(uiCodePage, - pszTestData, - wAttrOriginal, - wAttrWritten, - WriteMode, - fWriteWithUnicode, - fIsTrueTypeFont, - fReadWithUnicode, - rgciExpected, - cExpectedNeeded); + expected = DbcsWriteRead::PrepReadConsoleOutput(WriteMode, fWriteWithUnicode, fIsTrueTypeFont, fReadWithUnicode); break; } case DbcsWriteRead::ReadMode::ReadConsoleOutputCharacterFunc: { - DbcsWriteRead::PrepReadConsoleOutputCharacter(uiCodePage, - pszTestData, - wAttrOriginal, - wAttrWritten, - WriteMode, - fWriteWithUnicode, - fIsTrueTypeFont, - fReadWithUnicode, - rgciExpected, - cExpectedNeeded); + expected = DbcsWriteRead::PrepReadConsoleOutputCharacter(WriteMode, fWriteWithUnicode, fIsTrueTypeFont, fReadWithUnicode); break; } default: @@ -1697,16 +1404,13 @@ void DbcsWriteRead::PrepExpected(_In_ const unsigned int uiCodePage, } } - // Return the expected array and the length that should be used for comparison at the end of the test. - *ppciExpected = rgciExpected; - *pcExpected = cExpectedNeeded; + PrepPattern::replaceColorPlaceholders(expected, wAttrWritten); } void DbcsWriteRead::RetrieveOutput(const HANDLE hOut, const DbcsWriteRead::ReadMode ReadMode, const bool fReadUnicode, - _Out_writes_(cChars) CHAR_INFO* const rgChars, - const SHORT cChars) + CharInfoPattern& rgChars) { COORD coordBufferTarget = { 0 }; @@ -1717,21 +1421,21 @@ void DbcsWriteRead::RetrieveOutput(const HANDLE hOut, // Since we wrote (in SendOutput function) to the 0,0 line, we need to read back the same width from that line. COORD coordBufferSize = { 0 }; coordBufferSize.Y = 1; - coordBufferSize.X = cChars; + coordBufferSize.X = gsl::narrow(rgChars.size()); SMALL_RECT srReadRegion = { 0 }; // inclusive rectangle (bottom and right are INSIDE the read area. usually are exclusive.) - srReadRegion.Right = cChars - 1; + srReadRegion.Right = coordBufferSize.X - 1; // return value for read region shouldn't change const auto srReadRegionExpected = srReadRegion; if (!fReadUnicode) { - VERIFY_WIN32_BOOL_SUCCEEDED_RETURN(ReadConsoleOutputA(hOut, rgChars, coordBufferSize, coordBufferTarget, &srReadRegion)); + VERIFY_WIN32_BOOL_SUCCEEDED_RETURN(ReadConsoleOutputA(hOut, rgChars.data(), coordBufferSize, coordBufferTarget, &srReadRegion)); } else { - VERIFY_WIN32_BOOL_SUCCEEDED_RETURN(ReadConsoleOutputW(hOut, rgChars, coordBufferSize, coordBufferTarget, &srReadRegion)); + VERIFY_WIN32_BOOL_SUCCEEDED_RETURN(ReadConsoleOutputW(hOut, rgChars.data(), coordBufferSize, coordBufferTarget, &srReadRegion)); } Log::Comment(NoThrowString().Format(L"ReadRegion T: %d L: %d B: %d R: %d", srReadRegion.Top, srReadRegion.Left, srReadRegion.Bottom, srReadRegion.Right)); @@ -1740,6 +1444,7 @@ void DbcsWriteRead::RetrieveOutput(const HANDLE hOut, } case DbcsWriteRead::ReadMode::ReadConsoleOutputCharacterFunc: { + const auto cChars = gsl::narrow(rgChars.size()); DWORD dwRead = 0; if (!fReadUnicode) { @@ -1786,12 +1491,11 @@ void DbcsWriteRead::RetrieveOutput(const HANDLE hOut, } } -void DbcsWriteRead::Verify(_In_reads_(cExpected) CHAR_INFO* const rgExpected, - const size_t cExpected, - _In_reads_(cExpected) CHAR_INFO* const rgActual) +void DbcsWriteRead::Verify(const CharInfoPattern& rgExpected, + const CharInfoPattern& rgActual) { // We will walk through for the number of CHAR_INFOs expected. - for (size_t i = 0; i < cExpected; i++) + for (size_t i = 0; i < rgExpected.size(); i++) { // Uncomment these lines for help debugging the verification. /* @@ -1804,12 +1508,10 @@ void DbcsWriteRead::Verify(_In_reads_(cExpected) CHAR_INFO* const rgExpected, } } -void DbcsWriteRead::TestRunner(_In_ const unsigned int uiCodePage, - _In_ PCSTR pszTestData, - _In_opt_ WORD* const pwAttrOverride, +void DbcsWriteRead::TestRunner(_In_opt_ WORD* const pwAttrOverride, const bool fUseTrueType, const DbcsWriteRead::WriteMode WriteMode, - const bool fWriteInUnicode, + const UnicodeMode fWriteInUnicode, const DbcsWriteRead::ReadMode ReadMode, const bool fReadWithUnicode) { @@ -1818,14 +1520,12 @@ void DbcsWriteRead::TestRunner(_In_ const unsigned int uiCodePage, // used by default in the buffer (set during clearing as well). HANDLE hOut; WORD wAttributes; - if (!DbcsWriteRead::Setup(uiCodePage, fUseTrueType, &hOut, &wAttributes)) + if (!DbcsWriteRead::Setup(fUseTrueType, &hOut, &wAttributes)) { // If we can't set up (setup will detect systems where this test cannot operate) then return early. return; } - const auto wAttrOriginal = wAttributes; - // Some tests might want to override the colors applied to ensure both parts of the CHAR_INFO union // work for methods that support sending that union. (i.e. not the CRT path) if (nullptr != pwAttrOverride) @@ -1833,38 +1533,24 @@ void DbcsWriteRead::TestRunner(_In_ const unsigned int uiCodePage, wAttributes = *pwAttrOverride; } - // The console bases the space it walks for DBCS conversions on the length of the A version of the text. - // Store that length now so we have it for our read/write operations. - const auto cTestData = strlen(pszTestData); - // Write the string under test into the appropriate WRITE API for this test. - DbcsWriteRead::SendOutput(hOut, uiCodePage, WriteMode, fWriteInUnicode, pszTestData, wAttributes); + DbcsWriteRead::SendOutput(hOut, WriteMode, fWriteInUnicode, wAttributes); // Prepare the array of CHAR_INFO structs that we expect to receive back when we will call read in a moment. // This can vary based on font, unicode/non-unicode (when reading AND writing), and codepage. - CHAR_INFO* pciExpected; - size_t cExpected; - DbcsWriteRead::PrepExpected(uiCodePage, pszTestData, wAttrOriginal, wAttributes, WriteMode, fWriteInUnicode, fUseTrueType, ReadMode, fReadWithUnicode, &pciExpected, &cExpected); + CharInfoPattern pciExpected; + DbcsWriteRead::PrepExpected(wAttributes, WriteMode, fWriteInUnicode, fUseTrueType, ReadMode, fReadWithUnicode, pciExpected); // Now call the appropriate READ API for this test. - auto pciActual = new CHAR_INFO[cTestData]; - VERIFY_IS_NOT_NULL(pciActual); - ZeroMemory(pciActual, sizeof(CHAR_INFO) * cTestData); - DbcsWriteRead::RetrieveOutput(hOut, ReadMode, fReadWithUnicode, pciActual, (SHORT)cTestData); + CharInfoPattern pciActual{}; + DbcsWriteRead::RetrieveOutput(hOut, ReadMode, fReadWithUnicode, pciActual); // Loop through and verify that our expected array matches what was actually returned by the given API. - DbcsWriteRead::Verify(pciExpected, cExpected, pciActual); - - // Free allocated structures - delete[] pciActual; - delete[] pciExpected; + DbcsWriteRead::Verify(pciExpected, pciActual); } void DbcsTests::TestDbcsWriteRead() { - unsigned int uiCodePage; - VERIFY_SUCCEEDED(TestData::TryGetValue(L"uiCodePage", uiCodePage)); - bool fUseTrueTypeFont; VERIFY_SUCCEEDED(TestData::TryGetValue(L"fUseTrueTypeFont", fUseTrueTypeFont)); @@ -1872,8 +1558,9 @@ void DbcsTests::TestDbcsWriteRead() VERIFY_SUCCEEDED(TestData::TryGetValue(L"WriteMode", iWriteMode)); auto WriteMode = (DbcsWriteRead::WriteMode)iWriteMode; - bool fWriteInUnicode; - VERIFY_SUCCEEDED(TestData::TryGetValue(L"fWriteInUnicode", fWriteInUnicode)); + int iWriteInUnicode; + VERIFY_SUCCEEDED(TestData::TryGetValue(L"fWriteInUnicode", iWriteInUnicode)); + auto fWriteInUnicode = (DbcsWriteRead::UnicodeMode)iWriteInUnicode; int iReadMode; VERIFY_SUCCEEDED(TestData::TryGetValue(L"ReadMode", iReadMode)); @@ -1882,6 +1569,12 @@ void DbcsTests::TestDbcsWriteRead() bool fReadInUnicode; VERIFY_SUCCEEDED(TestData::TryGetValue(L"fReadInUnicode", fReadInUnicode)); + // UnicodeDoubled is only relevant for WriteConsoleOutputW + if (fWriteInUnicode == DbcsWriteRead::UnicodeMode::UnicodeDoubled && WriteMode != DbcsWriteRead::WriteMode::WriteConsoleOutputFunc) + { + return; + } + auto pwszWriteMode = L""; switch (WriteMode) { @@ -1914,31 +1607,17 @@ void DbcsTests::TestDbcsWriteRead() VERIFY_FAIL(L"Read mode not supported"); } - auto testInfo = NoThrowString().Format(L"\r\n\r\n\r\nUse '%ls' font. Write with %ls '%ls'. Check Read with %ls '%ls' API. Use %d codepage.\r\n", + auto testInfo = NoThrowString().Format(L"\r\n\r\n\r\nUse '%s' font. Write with %s '%s'%s. Check Read with %s '%s' API. Use %d codepage.\r\n", fUseTrueTypeFont ? L"TrueType" : L"Raster", pwszWriteMode, fWriteInUnicode ? L"W" : L"A", + fWriteInUnicode == DbcsWriteRead::UnicodeMode::UnicodeDoubled ? L" (doubled)" : L"", pwszReadMode, fReadInUnicode ? L"W" : L"A", - uiCodePage); + JAPANESE_CP); Log::Comment(testInfo); - auto pszTestData = ""; - switch (uiCodePage) - { - case ENGLISH_US_CP: - pszTestData = "QWERTYUIOP"; - break; - case JAPANESE_CP: - // Q (Hiragana I) (Hiragana KA) (Hiragana NA) Z Y X W V U T (Hiragana NI) in Shift-JIS (Codepage 932) - pszTestData = "Q\x82\xA2\x82\xa9\x82\xc8ZYXWVUT\x82\xc9"; - break; - default: - VERIFY_FAIL(L"No test data for this codepage"); - break; - } - WORD wAttributes = 0; if (WriteMode == 1) @@ -1947,9 +1626,7 @@ void DbcsTests::TestDbcsWriteRead() wAttributes = FOREGROUND_BLUE | FOREGROUND_INTENSITY | BACKGROUND_GREEN; } - DbcsWriteRead::TestRunner(uiCodePage, - pszTestData, - wAttributes != 0 ? &wAttributes : nullptr, + DbcsWriteRead::TestRunner(wAttributes != 0 ? &wAttributes : nullptr, fUseTrueTypeFont, WriteMode, fWriteInUnicode, From 444d44f816e2a6c743353b41b7c3653b8191483c Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 17 Jun 2022 22:20:07 +0200 Subject: [PATCH 2/3] Introduce breaking changes to ReadConsoleOutput --- src/host/directio.cpp | 130 ++++++------- src/host/ft_host/CJK_DbcsTests.cpp | 292 ++++++----------------------- 2 files changed, 115 insertions(+), 307 deletions(-) diff --git a/src/host/directio.cpp b/src/host/directio.cpp index 4933aec9ab5..38be8e8838b 100644 --- a/src/host/directio.cpp +++ b/src/host/directio.cpp @@ -27,8 +27,6 @@ using Microsoft::Console::Interactivity::ServiceLocator; class CONSOLE_INFORMATION; -#define UNICODE_DBCS_PADDING 0xffff - // Routine Description: // - converts non-unicode InputEvents to unicode InputEvents // Arguments: @@ -531,61 +529,59 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, { try { - std::vector tempBuffer(buffer.begin(), buffer.end()); - const auto size = rectangle.Dimensions(); - auto tempIter = tempBuffer.cbegin(); auto outIter = buffer.begin(); - for (til::CoordType i = 0; i < size.Y; i++) + for (til::CoordType i = 0; i < size.Y; ++i) { - for (til::CoordType j = 0; j < size.X; j++) + for (til::CoordType j = 0; j < size.X; ++j, ++outIter) { + auto& in1 = *outIter; + + // If .AsciiChar and .UnicodeChar have the same offset (since they're a union), + // we can just write the latter with a byte-sized value to set the former + // _and_ simultaneously clear the upper byte of .UnicodeChar to 0. Nice! + static_assert(offsetof(CHAR_INFO, Char.AsciiChar) == offsetof(CHAR_INFO, Char.UnicodeChar)); + // Any time we see the lead flag, we presume there will be a trailing one following it. // Giving us two bytes of space (one per cell in the ascii part of the character union) // to fill with whatever this Unicode character converts into. - if (WI_IsFlagSet(tempIter->Attributes, COMMON_LVB_LEADING_BYTE)) + if (WI_IsFlagSet(in1.Attributes, COMMON_LVB_LEADING_BYTE)) { // As long as we're not looking at the exact last column of the buffer... if (j < size.X - 1) { // Walk forward one because we're about to consume two cells. - j++; + ++j; + ++outIter; + + auto& in2 = *outIter; // Try to convert the unicode character (2 bytes) in the leading cell to the codepage. - CHAR AsciiDbcs[2] = { 0 }; - auto NumBytes = gsl::narrow(sizeof(AsciiDbcs)); - NumBytes = ConvertToOem(codepage, &tempIter->Char.UnicodeChar, 1, &AsciiDbcs[0], NumBytes); + CHAR AsciiDbcs[2]{}; + ConvertToOem(codepage, &in1.Char.UnicodeChar, 1, &AsciiDbcs[0], 2); // Fill the 1 byte (AsciiChar) portion of the leading and trailing cells with each of the bytes returned. - outIter->Char.AsciiChar = AsciiDbcs[0]; - outIter->Attributes = tempIter->Attributes; - outIter++; - tempIter++; - outIter->Char.AsciiChar = AsciiDbcs[1]; - outIter->Attributes = tempIter->Attributes; - outIter++; - tempIter++; + // We have to be bit careful here not to directly write the CHARs, because CHARs are signed whereas wchar_t isn't + // and we don't want any sign-extension. We want a 1:1 copy instead, so cast it to an unsigned char first. + in1.Char.UnicodeChar = til::bit_cast(AsciiDbcs[0]); + in2.Char.UnicodeChar = til::bit_cast(AsciiDbcs[1]); } else { // When we're in the last column with only a leading byte, we can't return that without a trailing. // Instead, replace the output data with just a space and clear all flags. - outIter->Char.AsciiChar = UNICODE_SPACE; - outIter->Attributes = tempIter->Attributes; - WI_ClearAllFlags(outIter->Attributes, COMMON_LVB_SBCSDBCS); - outIter++; - tempIter++; + in1.Char.UnicodeChar = UNICODE_SPACE; + WI_ClearAllFlags(in1.Attributes, COMMON_LVB_SBCSDBCS); } } - else if (WI_AreAllFlagsClear(tempIter->Attributes, COMMON_LVB_SBCSDBCS)) + else if (WI_AreAllFlagsClear(in1.Attributes, COMMON_LVB_SBCSDBCS)) { // If there are no leading/trailing pair flags, then we only have 1 ascii byte to try to fit the // 2 byte UTF-16 character into. Give it a go. - ConvertToOem(codepage, &tempIter->Char.UnicodeChar, 1, &outIter->Char.AsciiChar, 1); - outIter->Attributes = tempIter->Attributes; - outIter++; - tempIter++; + CHAR asciiChar{}; + ConvertToOem(codepage, &in1.Char.UnicodeChar, 1, &asciiChar, 1); + in1.Char.UnicodeChar = til::bit_cast(asciiChar); } } } @@ -615,58 +611,57 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, const auto size = rectangle.Dimensions(); auto outIter = buffer.begin(); - for (til::CoordType i = 0; i < size.Y; i++) + for (til::CoordType i = 0; i < size.Y; ++i) { - for (til::CoordType j = 0; j < size.X; j++) + for (til::CoordType j = 0; j < size.X; ++j, ++outIter) { // Clear lead/trailing flags. We'll determine it for ourselves versus the given codepage. - WI_ClearAllFlags(outIter->Attributes, COMMON_LVB_SBCSDBCS); + auto& in1 = *outIter; + WI_ClearAllFlags(in1.Attributes, COMMON_LVB_SBCSDBCS); // If the 1 byte given is a lead in this codepage, we likely need two cells for the width. - if (IsDBCSLeadByteConsole(outIter->Char.AsciiChar, &gci.OutputCPInfo)) + if (IsDBCSLeadByteConsole(in1.Char.AsciiChar, &gci.OutputCPInfo)) { // If we're not on the last column, we have two cells to use. if (j < size.X - 1) { // Mark we're consuming two cells. - j++; + ++outIter; + ++j; + + // Just as above - clear the flags, as we're setting them ourselves. + auto& in2 = *outIter; + WI_ClearAllFlags(in2.Attributes, COMMON_LVB_SBCSDBCS); // Grab the lead/trailing byte pair from this cell and the next one forward. CHAR AsciiDbcs[2]; - AsciiDbcs[0] = outIter->Char.AsciiChar; - AsciiDbcs[1] = (outIter + 1)->Char.AsciiChar; + AsciiDbcs[0] = in1.Char.AsciiChar; + AsciiDbcs[1] = in2.Char.AsciiChar; // Convert it to UTF-16. - WCHAR UnicodeDbcs[2]; - ConvertOutputToUnicode(codepage, &AsciiDbcs[0], 2, &UnicodeDbcs[0], 2); + wchar_t wch = UNICODE_SPACE; + ConvertOutputToUnicode(codepage, &AsciiDbcs[0], 2, &wch, 1); // Store the actual character in the first available position. - outIter->Char.UnicodeChar = UnicodeDbcs[0]; - WI_ClearAllFlags(outIter->Attributes, COMMON_LVB_SBCSDBCS); - WI_SetFlag(outIter->Attributes, COMMON_LVB_LEADING_BYTE); - outIter++; + in1.Char.UnicodeChar = wch; + WI_SetFlag(in1.Attributes, COMMON_LVB_LEADING_BYTE); // Put a padding character in the second position. - outIter->Char.UnicodeChar = UNICODE_DBCS_PADDING; - WI_ClearAllFlags(outIter->Attributes, COMMON_LVB_SBCSDBCS); - WI_SetFlag(outIter->Attributes, COMMON_LVB_TRAILING_BYTE); - outIter++; + in2.Char.UnicodeChar = wch; + WI_SetFlag(in2.Attributes, COMMON_LVB_TRAILING_BYTE); } else { // If we were on the last column, put in a space. - outIter->Char.UnicodeChar = UNICODE_SPACE; - WI_ClearAllFlags(outIter->Attributes, COMMON_LVB_SBCSDBCS); - outIter++; + in1.Char.UnicodeChar = UNICODE_SPACE; } } else { // If it's not detected as a lead byte of a pair, then just convert it in place and move on. - auto c = outIter->Char.AsciiChar; - - ConvertOutputToUnicode(codepage, &c, 1, &outIter->Char.UnicodeChar, 1); - outIter++; + wchar_t wch = UNICODE_SPACE; + ConvertOutputToUnicode(codepage, &in1.Char.AsciiChar, 1, &wch, 1); + in1.Char.UnicodeChar = wch; } } } @@ -679,7 +674,7 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, [[nodiscard]] static std::vector _ConvertCellsToMungedW(gsl::span buffer, const Viewport& rectangle) { std::vector result; - result.reserve(buffer.size() * 2); // we estimate we'll need up to double the cells if they all expand. + result.reserve(buffer.size()); const auto size = rectangle.Dimensions(); auto bufferIter = buffer.begin(); @@ -689,12 +684,11 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, for (til::CoordType j = 0; j < size.X; j++) { // Prepare a candidate charinfo on the output side copying the colors but not the lead/trail information. - CHAR_INFO candidate; - candidate.Attributes = bufferIter->Attributes; + auto candidate = *bufferIter; WI_ClearAllFlags(candidate.Attributes, COMMON_LVB_SBCSDBCS); // If the glyph we're given is full width, it needs to take two cells. - if (IsGlyphFullWidth(bufferIter->Char.UnicodeChar)) + if (IsGlyphFullWidth(candidate.Char.UnicodeChar)) { // If we're not on the final cell of the row... if (j < size.X - 1) @@ -703,14 +697,11 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, j++; // Fill one cell with a copy of the color and character marked leading - candidate.Char.UnicodeChar = bufferIter->Char.UnicodeChar; WI_SetFlag(candidate.Attributes, COMMON_LVB_LEADING_BYTE); result.push_back(candidate); // Fill a second cell with a copy of the color marked trailing and a padding character. - candidate.Char.UnicodeChar = UNICODE_DBCS_PADDING; - candidate.Attributes = bufferIter->Attributes; - WI_ClearAllFlags(candidate.Attributes, COMMON_LVB_SBCSDBCS); + WI_ClearFlag(candidate.Attributes, COMMON_LVB_LEADING_BYTE); WI_SetFlag(candidate.Attributes, COMMON_LVB_TRAILING_BYTE); } else @@ -719,17 +710,12 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, candidate.Char.UnicodeChar = UNICODE_SPACE; } } - else - { - // If we're not full-width, we're half-width. Just copy the character over. - candidate.Char.UnicodeChar = bufferIter->Char.UnicodeChar; - } // Push our candidate in. result.push_back(candidate); // Advance to read the next item. - bufferIter++; + ++bufferIter; } } return result; @@ -743,8 +729,8 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, try { const auto& gci = ServiceLocator::LocateGlobals().getConsoleInformation(); - const auto& storageBuffer = context.GetActiveBuffer(); - const auto storageSize = storageBuffer.GetBufferSize().Dimensions(); + const auto& storageBuffer = context.GetActiveBuffer().GetTextBuffer(); + const auto storageSize = storageBuffer.GetSize().Dimensions(); const auto targetSize = requestRectangle.Dimensions(); @@ -802,11 +788,11 @@ void EventsToUnicode(_Inout_ std::deque>& inEvents, // Copy the data into position... *targetIter = gci.AsCharInfo(*sourceIter); // ... and advance the read iterator. - sourceIter++; + ++sourceIter; } // Always advance the write iterator, we might have skipped it due to clipping. - targetIter++; + ++targetIter; // Increment the target targetPos.X++; diff --git a/src/host/ft_host/CJK_DbcsTests.cpp b/src/host/ft_host/CJK_DbcsTests.cpp index 4e2fd3a9d1e..9958838fbed 100644 --- a/src/host/ft_host/CJK_DbcsTests.cpp +++ b/src/host/ft_host/CJK_DbcsTests.cpp @@ -515,6 +515,49 @@ namespace PrepPattern makeCharInfo(0x0020, white), }; + // Receive Output Table: + // attr | wchar (char) | symbol + // ------------------------------------ + // 0x029 | 0x0051 (0x51) | Q + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x0000 (0x00) | + // 0x029 | 0x005A (0x5A) | Z + // 0x029 | 0x0059 (0x59) | Y + // 0x029 | 0x0058 (0x58) | X + // 0x029 | 0x0057 (0x57) | W + // 0x029 | 0x0056 (0x56) | V + // 0x029 | 0x0055 (0x55) | U + // 0x029 | 0x0054 (0x54) | T + // 0x029 | 0x0000 (0x00) | + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // 0x007 | 0x0020 (0x20) | + // ... + // "Space Padded" means any unused data in the buffer will be filled with spaces and the default attribute. + // "Dedupe" means that any full-width characters in the buffer will be returned as single copies. + // But due to the target being a DBCS character set that can't represent these in a single char, it's null. + // "A" means that we intend in-codepage (char) data to be browsed in the resulting struct + static constexpr CharInfoPattern SpacePaddedDedupeInvalidA{ + makeCharInfo(0x0051, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x005a, colored), + makeCharInfo(0x0059, colored), + makeCharInfo(0x0058, colored), + makeCharInfo(0x0057, colored), + makeCharInfo(0x0056, colored), + makeCharInfo(0x0055, colored), + makeCharInfo(0x0054, colored), + makeCharInfo(0x0000, colored), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + makeCharInfo(0x0020, white), + }; + // Receive Output Table: // attr | wchar (char) | symbol // ------------------------------------ @@ -685,92 +728,6 @@ namespace PrepPattern makeCharInfo(0x306b, colored | trailing), }; - // Receive Output Table: - // attr | wchar (char) | symbol - // ------------------------------------ - // 0x029 | 0x0051 (0x51) | Q - // 0x129 | 0x3044 (0x44) | Hiragana I - // 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character - // 0x129 | 0x304B (0x4B) | Hiragana KA - // 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character - // 0x129 | 0x306A (0x6A) | Hiragana NA - // 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character - // 0x029 | 0x005A (0x5A) | Z - // 0x029 | 0x0059 (0x59) | Y - // 0x029 | 0x0058 (0x58) | X - // 0x029 | 0x0057 (0x57) | W - // 0x029 | 0x0056 (0x56) | V - // 0x029 | 0x0055 (0x55) | U - // 0x029 | 0x0054 (0x54) | T - // 0x129 | 0x306B (0x6B) | Hiragana NI - // 0x229 | 0xFFFF (0xFF) | Invalid Unicode Character - // ... - // "Doubled" means that any full-width characters in the buffer are returned twice with a leading and trailing byte marker. - // "W" means that we intend Unicode data to be browsed in the resulting struct (even though wchar and char are unioned.) - // "NegativeOneTrailing" means that all trailing bytes have their character replaced with the value -1 or 0xFFFF - static constexpr CharInfoPattern DoubledWNegativeOneTrailing{ - makeCharInfo(0x0051, colored), - makeCharInfo(0x3044, colored | leading), - makeCharInfo(0xffff, colored | trailing), - makeCharInfo(0x304b, colored | leading), - makeCharInfo(0xffff, colored | trailing), - makeCharInfo(0x306a, colored | leading), - makeCharInfo(0xffff, colored | trailing), - makeCharInfo(0x005a, colored), - makeCharInfo(0x0059, colored), - makeCharInfo(0x0058, colored), - makeCharInfo(0x0057, colored), - makeCharInfo(0x0056, colored), - makeCharInfo(0x0055, colored), - makeCharInfo(0x0054, colored), - makeCharInfo(0x306b, colored | leading), - makeCharInfo(0xffff, colored | trailing), - }; - - // Receive Output Table: - // attr | wchar (char) | symbol - // ------------------------------------ - // 0x029 | 0x0051 (0x51) | Q - // 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA2 (0xA2) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 - // 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA9 (0xA9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 - // 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFC8 (0xC8) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 - // 0x029 | 0x005A (0x5A) | Z - // 0x029 | 0x0059 (0x59) | Y - // 0x029 | 0x0058 (0x58) | X - // 0x029 | 0x0057 (0x57) | W - // 0x029 | 0x0056 (0x56) | V - // 0x007 | 0x0020 (0x20) | - // 0x007 | 0x0020 (0x20) | - // 0x007 | 0x0020 (0x20) | - // 0x007 | 0x0020 (0x20) | - // ... - // "AStompsW" means that the Unicode characters were fit into the result buffer first, then the Multibyte conversion - // was written over the top of the lower byte. This makes an invalid Unicode character, but can be understood - // as in-codepage from the char portion of the union. - // "NegativeOnePattern" means that every trailing byte started as -1 or 0xFFFF - // "TruncateSpacePadded" means that we only allowed ourselves to return as many characters as is in the unicode length - // of the string and then filled the rest of the buffer after that with spaces. - static constexpr CharInfoPattern AStompsWNegativeOnePatternTruncateSpacePadded{ - makeCharInfo(0x0051, colored), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa2, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa9, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffc8, colored | trailing), - makeCharInfo(0x005a, colored), - makeCharInfo(0x0059, colored), - makeCharInfo(0x0058, colored), - makeCharInfo(0x0057, colored), - makeCharInfo(0x0056, colored), - makeCharInfo(0x0020, white), - makeCharInfo(0x0020, white), - makeCharInfo(0x0020, white), - makeCharInfo(0x0020, white), - }; // Receive Output Table: // attr | wchar (char) | symbol @@ -938,138 +895,6 @@ namespace PrepPattern makeCharInfo(0x0000, colored), }; - // Receive Output Table: - // attr | wchar (char) | symbol - // ------------------------------------ - // 0x029 | 0x0051 (0x51) | Q - // 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA2 (0xA2) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 - // 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA9 (0xA9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 - // 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFC8 (0xC8) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 - // 0x029 | 0x005A (0x5A) | Z - // 0x029 | 0x0059 (0x59) | Y - // 0x029 | 0x0058 (0x58) | X - // 0x029 | 0x0057 (0x57) | W - // 0x029 | 0x0056 (0x56) | V - // 0x029 | 0x0055 (0x55) | U - // 0x029 | 0x0054 (0x54) | T - // 0x129 | 0x3082 (0x30) | Hiragana NI 0x306B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFC9 (0xC9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC9 - // ... - // "AOn" means that the Unicode characters were fit into the result buffer first, then the Multibyte conversion - // was written over the top of the lower byte. This makes an invalid Unicode character, but can be understood - // as in-codepage from the char portion of the union. - // "DoubledW" means that the full-width Unicode characters were inserted twice into the buffer (and marked lead/trailing) - // "NegativeOneTrailing" means that every trailing byte started as -1 or 0xFFFF - static constexpr CharInfoPattern AOnDoubledWNegativeOneTrailing{ - makeCharInfo(0x0051, colored), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa2, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa9, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffc8, colored | trailing), - makeCharInfo(0x005a, colored), - makeCharInfo(0x0059, colored), - makeCharInfo(0x0058, colored), - makeCharInfo(0x0057, colored), - makeCharInfo(0x0056, colored), - makeCharInfo(0x0055, colored), - makeCharInfo(0x0054, colored), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffc9, colored | trailing), - }; - - // Receive Output Table: - // attr | wchar (char) | symbol - // ------------------------------------ - // 0x029 | 0x0051 (0x51) | Q - // 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA2 (0xA2) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 - // 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA2 (0xA2) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 - // 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA9 (0xA9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 - // 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFA9 (0xA9) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 - // 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFC8 (0xC8) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 - // 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0xFFC8 (0xC8) | Invalid Unicode Character 0xFFFF with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 - // 0x029 | 0x005A (0x5A) | Z - // 0x029 | 0x0059 (0x59) | Y - // 0x029 | 0x0058 (0x58) | X - // ... - // "AOn" means that the Unicode characters were fit into the result buffer first, then the Multibyte conversion - // was written over the top of the lower byte. This makes an invalid Unicode character, but can be understood - // as in-codepage from the char portion of the union. - // "DoubledW" means that the full-width Unicode characters were inserted twice into the buffer (and marked lead/trailing) - // "NegativeOneTrailing" means that every trailing byte started as -1 or 0xFFFF - static constexpr CharInfoPattern AOnDoubleDoubledWNegativeOneTrailing{ - makeCharInfo(0x0051, colored), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa2, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa2, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa9, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffa9, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffc8, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0xffc8, colored | trailing), - makeCharInfo(0x005a, colored), - makeCharInfo(0x0059, colored), - makeCharInfo(0x0058, colored), - }; - - // Receive Output Table: - // attr | wchar (char) | symbol - // ------------------------------------ - // 0x029 | 0x0051 (0x51) | Q - // 0x129 | 0x3082 (0x82) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0x30A2 (0xA2) | Hiragana I Unicode 0x3044 with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA2 - // 0x129 | 0x3082 (0x82) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0x30A9 (0xA9) | Hiragana KA Unicode 0x304B with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xA9 - // 0x129 | 0x3082 (0x82) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0x39C8 (0xC8) | Hiragana NA 0x306A with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC8 - // 0x029 | 0x005A (0x5A) | Z - // 0x029 | 0x0059 (0x59) | Y - // 0x029 | 0x0058 (0x58) | X - // 0x029 | 0x0057 (0x57) | W - // 0x029 | 0x0056 (0x56) | V - // 0x029 | 0x0055 (0x55) | U - // 0x029 | 0x0054 (0x54) | T - // 0x129 | 0x3082 (0x30) | Hiragana NI 0x306B with the lower byte covered by Shift-JIS Codepage 932 Lead Byte 0x82. - // 0x229 | 0x30C9 (0xC9) | Hiragana NI 0x306B with the lower byte covered by Shift-JIS Codepage 932 Trail Byte 0xC9 - // ... - // "AOn" means that the Unicode characters were fit into the result buffer first, then the Multibyte conversion - // was written over the top of the lower byte. This makes an invalid Unicode character, but can be understood - // as in-codepage from the char portion of the union. - // "DoubledW" means that the full-width Unicode characters were inserted twice into the buffer (and marked lead/trailing) - // "NegativeOneTrailing" means that every trailing byte started as -1 or 0xFFFF - static constexpr CharInfoPattern AOnDoubledW{ - makeCharInfo(0x0051, colored), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0x30a2, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0x30a9, colored | trailing), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0x30c8, colored | trailing), - makeCharInfo(0x005a, colored), - makeCharInfo(0x0059, colored), - makeCharInfo(0x0058, colored), - makeCharInfo(0x0057, colored), - makeCharInfo(0x0056, colored), - makeCharInfo(0x0055, colored), - makeCharInfo(0x0054, colored), - makeCharInfo(0x3082, colored | leading), - makeCharInfo(0x30c9, colored | trailing), - }; - // Receive Output Table: // attr | wchar (char) | symbol // ------------------------------------ @@ -1195,16 +1020,15 @@ const CharInfoPattern& DbcsWriteRead::PrepReadConsoleOutput( { if (fIsTrueTypeFont) { - // When written with WriteConsoleOutputW and read back with ReadConsoleOutputA under TT font, we will get a deduplicated - // set of Unicode characters (YES. Unicode characters despite calling the A API to read back) that is space padded out - // There will be no lead/trailing markings. - return PrepPattern::SpacePaddedDedupeW; + // Normally this would be SpacePaddedDedupeA (analogous to the SpacePaddedDedupeW above), but since the narrow + // unicode chars can't be represented as narrow DBCS (since those don't exist) we get SpacePaddedDedupeInvalidA. + return PrepPattern::SpacePaddedDedupeInvalidA; } else { // When written with WriteConsoleOutputW and read back with ReadConsoleOutputA under Raster font, we will get the // double-byte sequences stomped on top of a Unicode filled CHAR_INFO structure that used -1 for trailing bytes. - return PrepPattern::AStompsWNegativeOnePatternTruncateSpacePadded; + return PrepPattern::SpacePaddedDedupeA; } } break; @@ -1227,13 +1051,13 @@ const CharInfoPattern& DbcsWriteRead::PrepReadConsoleOutput( if (fIsTrueTypeFont) { // In a TrueType font, we will get back Unicode characters doubled up and marked with leading and trailing bytes. - return PrepPattern::AOnDoubledW; + return PrepPattern::A; } else { - // When written with WriteConsoleOutputW and read back with ReadConsoleOutputA under Raster font, we will get the - // double-byte sequences stomped on top of a Unicode filled CHAR_INFO structure that used -1 for trailing bytes. - return PrepPattern::AOnDoubleDoubledWNegativeOneTrailing; + // When written with WriteConsoleOutputW and read back with ReadConsoleOutputA under Raster font, + // we will get the double-byte sequences doubled up, because each narrow cell is written as a DBCS separately. + return PrepPattern::DoubledA; } } break; @@ -1242,10 +1066,9 @@ const CharInfoPattern& DbcsWriteRead::PrepReadConsoleOutput( { if (fIsTrueTypeFont) { - // When written with WriteConsoleOutputA and read back with ReadConsoleOutputW when the font is TrueType, - // we will get back Unicode characters doubled up and marked with leading and trailing bytes... - // ... except all the trailing bytes character values will be -1. - return PrepPattern::DoubledWNegativeOneTrailing; + // When written with WriteConsoleOutputW and read back with ReadConsoleOutputA when the font is TrueType, + // we will get back Unicode characters doubled up and marked with leading and trailing bytes. + return PrepPattern::DoubledW; } else { @@ -1258,7 +1081,7 @@ const CharInfoPattern& DbcsWriteRead::PrepReadConsoleOutput( { // When written with WriteConsoleOutputA and read back with ReadConsoleOutputA, // we will get back the double-byte sequences appropriately labeled with leading/trailing bytes. - return PrepPattern::AOnDoubledWNegativeOneTrailing; + return PrepPattern::A; } break; } @@ -1285,7 +1108,7 @@ const CharInfoPattern& DbcsWriteRead::PrepReadConsoleOutput( { // If we wrote with the CRT and are reading with A functions, the font doesn't matter. // We will always get back the double-byte sequences appropriately labeled with leading/trailing bytes. - return PrepPattern::AOnDoubledW; + return PrepPattern::A; } break; default: @@ -1911,8 +1734,7 @@ void DbcsTests::TestDbcsBisectWriteCellsBeginA() const auto originalReadRegion = readRegion; CHAR_INFO readCell; - CHAR_INFO expectedCell; - expectedCell.Char.UnicodeChar = L'\xffff'; + CHAR_INFO expectedCell{}; expectedCell.Char.AsciiChar = originalCell.Char.AsciiChar; expectedCell.Attributes = originalCell.Attributes; WI_ClearAllFlags(expectedCell.Attributes, COMMON_LVB_LEADING_BYTE | COMMON_LVB_TRAILING_BYTE); From 85b186897fe47f109932802a0bfdd7e1525dad02 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 15 Jul 2022 22:34:19 +0200 Subject: [PATCH 3/3] Fix formatting --- src/host/ft_host/CJK_DbcsTests.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/host/ft_host/CJK_DbcsTests.cpp b/src/host/ft_host/CJK_DbcsTests.cpp index 9958838fbed..e3091d17f28 100644 --- a/src/host/ft_host/CJK_DbcsTests.cpp +++ b/src/host/ft_host/CJK_DbcsTests.cpp @@ -728,7 +728,6 @@ namespace PrepPattern makeCharInfo(0x306b, colored | trailing), }; - // Receive Output Table: // attr | wchar (char) | symbol // ------------------------------------