Skip to content

Commit

Permalink
Encode Win32 error messages in UTF-8. (#3987)
Browse files Browse the repository at this point in the history
Currently, Win32 error messages are retrieved using the `FormatMessageA`
API, which returns them in ANSI encoding. This results in corrupted
error messages when the computer's language is other than English.

With this PR, we use `FormatMessageW` which returns the message in
UTF-16 encoding, which then gets converted to UTF-8 and stored in the
encoding-agnostic `std::string`.

__Before__

![image](https://user-images.githubusercontent.com/12659251/226977534-68f56838-6e9c-4e04-92ce-ae0824ef7c56.png)

__After__

![image](https://user-images.githubusercontent.com/12659251/226977191-6c024bc4-99c3-4d91-b28b-5a3407038da4.png)

__After, if the console's output mode is set to UTF-8__

![image](https://user-images.githubusercontent.com/12659251/226976565-6f0670f7-9f4b-4cd0-ab53-b542f79522cf.png)

The C# API (at least, don't know about the others) needs to be updated
to support incoming UTF-8 strings from the Core.

IIRC, POSIX works in UTF-8 by default so it doesn't need any changes,
right?

[SC-26131](https://app.shortcut.com/tiledb-inc/story/26131/win32-vfs-error-messages-in-greek-are-corrupted)

---
TYPE: IMPROVEMENT
DESC: Encode Win32 error messages in UTF-8.

---------

Co-authored-by: KiterLuc <[email protected]>
(cherry picked from commit b8ec44f)
  • Loading branch information
teo-tsirpanis authored and github-actions[bot] committed Oct 31, 2023
1 parent f5a3e5e commit 3a0d1c5
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 20 deletions.
2 changes: 2 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,8 @@ add_executable(

add_dependencies(tiledb_unit tiledb_test_support_lib)

target_compile_options(tiledb_unit PRIVATE "$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")

# We want tests to continue as normal even as the API is changing,
# so don't warn for deprecations, since they'll be escalated to errors.
if (NOT MSVC)
Expand Down
34 changes: 34 additions & 0 deletions test/src/unit-win-filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
#include "tiledb/sm/filesystem/path_win.h"
#include "tiledb/sm/filesystem/win.h"

#include <Windows.h>

using namespace tiledb::common;
using namespace tiledb::sm;

Expand Down Expand Up @@ -285,4 +287,36 @@ TEST_CASE_METHOD(
Crypto::MD5_DIGEST_BYTES) == 0);
}

// Uses RAII to temporarily change the Win32 thread UI language.
class ChangeThreadUILanguage {
public:
ChangeThreadUILanguage(LANGID langid) {
old_langid_ = ::GetThreadUILanguage();
::SetThreadUILanguage(langid);
}
~ChangeThreadUILanguage() {
::SetThreadUILanguage(old_langid_);
}

private:
LANGID old_langid_;
};

// This test requires the Greek language pack to be installed.
TEST_CASE("Test UTF-8 error messages", "[.hide][windows][utf8-msgs]") {
// Change the thread UI language to Greek, to test that an error message with
// Unicode characters is received correctly.
ChangeThreadUILanguage change_langid(
MAKELANGID(LANG_GREEK, SUBLANG_GREEK_GREECE));

Win win;
REQUIRE(win.init(Config()).ok());
// NUL is a special file on Windows; deleting it should always fail.
Status st = win.remove_file("NUL");
REQUIRE(!st.ok());
auto message = st.message();
auto expected = u8"Δεν επιτρέπεται η πρόσβαση."; // Access denied.
REQUIRE(message.find((char*)expected) != std::string::npos);
}

#endif // _WIN32
50 changes: 30 additions & 20 deletions tiledb/sm/filesystem/win.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,33 +31,33 @@
*/
#ifdef _WIN32

#include "win.h"
#include "path_win.h"
#include "tiledb/common/common.h"
#include "tiledb/common/filesystem/directory_entry.h"
#include "tiledb/common/heap_memory.h"
#include "tiledb/common/logger.h"
#include "tiledb/common/scoped_executor.h"
#include "tiledb/common/stdx_string.h"
#include "tiledb/sm/misc/constants.h"
#include "tiledb/sm/misc/tdb_math.h"
#include "tiledb/sm/misc/utils.h"
#include "uri.h"

#if !defined(NOMINMAX)
#define NOMINMAX // suppress definition of min/max macros in Windows headers
#endif
#include <Shlwapi.h>
#include <Windows.h>
#include <strsafe.h>
#include <wininet.h> // For INTERNET_MAX_URL_LENGTH
#include <algorithm>
#include <cassert>
#include <codecvt>
#include <fstream>
#include <iostream>
#include <locale>
#include <sstream>
#include <string_view>

#include "path_win.h"
#include "tiledb/common/common.h"
#include "tiledb/common/filesystem/directory_entry.h"
#include "tiledb/common/heap_memory.h"
#include "tiledb/common/logger.h"
#include "tiledb/common/scoped_executor.h"
#include "tiledb/common/stdx_string.h"
#include "tiledb/sm/misc/constants.h"
#include "tiledb/sm/misc/tdb_math.h"
#include "tiledb/sm/misc/utils.h"
#include "uri.h"
#include "win.h"

using namespace tiledb::common;
using tiledb::common::filesystem::directory_entry;

Expand All @@ -67,22 +67,32 @@ namespace sm {
namespace {
/** Returns the last Windows error message string. */
std::string get_last_error_msg_desc(decltype(GetLastError()) gle) {
LPVOID lpMsgBuf = nullptr;
if (FormatMessage(
LPWSTR lpMsgBuf = nullptr;
// FormatMessageW allocates a buffer that must be freed with LocalFree.
if (FormatMessageW(
FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS,
NULL,
gle,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
(LPTSTR)&lpMsgBuf,
// By passing zero as the language ID, Windows will try the following
// languages in order:
// * Language neutral
// * Thread LANGID, based on the thread's locale value
// * User default LANGID, based on the user's default locale value
// * System default LANGID, based on the system default locale value
// * US English
0,
(LPWSTR)&lpMsgBuf,
0,
NULL) == 0) {
if (lpMsgBuf) {
LocalFree(lpMsgBuf);
}
return "unknown error";
}
std::string msg(reinterpret_cast<char*>(lpMsgBuf));
// Convert to UTF-8.
std::string msg =
std::wstring_convert<std::codecvt_utf8<wchar_t>>{}.to_bytes(lpMsgBuf);
LocalFree(lpMsgBuf);
return msg;
}
Expand Down

0 comments on commit 3a0d1c5

Please sign in to comment.