From 665747a83a9cfc8c16edb66c8de229d216a45de6 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Fri, 1 Nov 2024 22:23:39 +0100 Subject: [PATCH 1/4] Add patch tool --- src/BUILD | 28 +++++++-- src/minimize_jdk.sh | 13 +++- src/patch_java_manifest_for_utf8.cc | 94 +++++++++++++++++++++++++++++ src/test/shell/bazel/BUILD | 1 + 4 files changed, 131 insertions(+), 5 deletions(-) create mode 100644 src/patch_java_manifest_for_utf8.cc diff --git a/src/BUILD b/src/BUILD index 1830c0e07990a8..56065a0ed14cce 100644 --- a/src/BUILD +++ b/src/BUILD @@ -179,20 +179,38 @@ filegroup( visibility = ["//src/test/shell/bazel:__pkg__"], ) +cc_binary( + name = "patch_java_manifest_for_utf8", + srcs = ["patch_java_manifest_for_utf8.cc"], + tags = ["manual"], + target_compatible_with = ["@platforms//os:windows"], +) + sh_binary( name = "minimize_jdk", srcs = ["minimize_jdk.sh"], + data = select({ + "@platforms//os:windows": [ + ":patch_java_manifest_for_utf8", + ], + "//conditions:default": [], + }), + deps = [ + "@bazel_tools//tools/bash/runfiles", + ], ) genrule( name = "embedded_jdk_minimal", srcs = [ ":embedded_jdk_vanilla", - ":minimize_jdk.sh", ":jdeps_modules.golden", ], outs = ["minimal_jdk.zip"], - cmd = "$(location :minimize_jdk.sh) $(location :embedded_jdk_vanilla) $(location :jdeps_modules.golden) $(OUTS)", + cmd = "$(location :minimize_jdk) $(location :embedded_jdk_vanilla) $(location :jdeps_modules.golden) $(OUTS)", + tools = [ + ":minimize_jdk", + ], visibility = ["//src/test/shell/bazel:__pkg__"], ) @@ -200,11 +218,13 @@ genrule( name = "embedded_jdk_allmodules", srcs = [ ":embedded_jdk_vanilla", - ":minimize_jdk.sh", ":jdeps_modules.golden", ], outs = ["allmodules_jdk.zip"], - cmd = "$(location :minimize_jdk.sh) --allmodules $(location :embedded_jdk_vanilla) $(location :jdeps_modules.golden) $(OUTS)", + cmd = "$(location :minimize_jdk) --allmodules $(location :embedded_jdk_vanilla) $(location :jdeps_modules.golden) $(OUTS)", + tools = [ + ":minimize_jdk", + ], visibility = ["//src/test/shell/bazel:__pkg__"], ) diff --git a/src/minimize_jdk.sh b/src/minimize_jdk.sh index afeeb65a1c64fb..06e0ad17438015 100755 --- a/src/minimize_jdk.sh +++ b/src/minimize_jdk.sh @@ -17,7 +17,17 @@ # This script creates from the full JDK a minimized version that only contains # the specified JDK modules. -set -euo pipefail +# --- begin runfiles.bash initialization v3 --- +# Copy-pasted from the Bazel Bash runfiles library v3. +set -uo pipefail; set +e; f=bazel_tools/tools/bash/runfiles/runfiles.bash +# shellcheck disable=SC1090 +source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \ + source "$0.runfiles/$f" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \ + { echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e +# --- end runfiles.bash initialization v3 --- if [ "$1" == "--allmodules" ]; then shift @@ -54,6 +64,7 @@ if [[ "$UNAME" =~ msys_nt* ]]; then ./bin/jlink --module-path ./jmods/ --add-modules "$modules" \ --vm=server --strip-debug --no-man-pages \ --output reduced + "$(rlocation "io_bazel/src/patch_java_manifest_for_utf8.exe")" reduced/bin/java.exe cp $DOCS legal/java.base/ASSEMBLY_EXCEPTION \ reduced/ # These are necessary for --host_jvm_debug to work. diff --git a/src/patch_java_manifest_for_utf8.cc b/src/patch_java_manifest_for_utf8.cc new file mode 100644 index 00000000000000..3e3aa1d0936d42 --- /dev/null +++ b/src/patch_java_manifest_for_utf8.cc @@ -0,0 +1,94 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +#define WIN32_LEAN_AND_MEAN +#include + +// This program patches the app manifest of the java.exe launcher to force its +// active code page to UTF-8. This is necessary because the launcher sets +// sun.jnu.encoding to the system code page, which by default is a legacy +// code page such as Cp1252 on Windows. +int wmain(int argc, wchar_t *argv[]) { + if (argc != 2) { + fwprintf(stderr, L"Usage: %ls \n", argv[0]); + return 1; + } + + // Read the app manifest (aka side-by-side or fusion manifest) from the + // executable, which requires loading it as a "module". + HMODULE exe = LoadLibraryExW(argv[1], nullptr, LOAD_LIBRARY_AS_DATAFILE); + if (!exe) { + fwprintf(stderr, L"Error loading file %ls\n", argv[1]); + return 1; + } + HRSRC manifest_resource = FindResourceA(exe, MAKEINTRESOURCE(1), RT_MANIFEST); + if (!manifest_resource) { + fwprintf(stderr, L"Resource not found.\n"); + return 1; + } + HGLOBAL manifest_handle = LoadResource(exe, manifest_resource); + if (!manifest_handle) { + fwprintf(stderr, L"Error loading resource.\n"); + return 1; + } + LPVOID manifest_data = LockResource(manifest_handle); + if (!manifest_data) { + fwprintf(stderr, L"Error locking resource.\n"); + return 1; + } + DWORD manifest_len = SizeofResource(exe, manifest_resource); + std::string manifest((char *) manifest_data, manifest_len); + UnlockResource(manifest_handle); + FreeResource(manifest_handle); + FreeLibrary(exe); + + // Insert the activeCodePage element into the manifest at the end of the + // windowsSettings element. + // https://github.com/openjdk/jdk/blob/29882bfe7b7e76446a96862cd0a5e81c7e054415/src/java.base/windows/native/launcher/java.manifest#L43 + std::size_t insert_pos = manifest.find(""); + if (insert_pos == std::wstring::npos) { + fwprintf(stderr, L"End tag not found in manifest:\n%s", manifest.c_str()); + return 1; + } + std::string new_manifest = manifest.substr(0, insert_pos) + + "UTF-8" + + manifest.substr(insert_pos); + + // Write back the modified app manifest. + HANDLE update_handle = BeginUpdateResourceW(argv[1], false); + if (!update_handle) { + fwprintf(stderr, L"Error opening file for update.\n"); + return 1; + } + if (!UpdateResourceA(update_handle, + MAKEINTRESOURCE(1), + RT_MANIFEST, + MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL), + const_cast(new_manifest.c_str()), + new_manifest.size())) { + fwprintf(stderr, L"Error updating resource.\n"); + return 1; + } + if (!EndUpdateResourceW(update_handle, false)) { + fwprintf(stderr, L"Error finalizing update.\n"); + return 1; + } + + return 0; +} diff --git a/src/test/shell/bazel/BUILD b/src/test/shell/bazel/BUILD index 256c8f4ae4043d..4295ef2c8bf9df 100644 --- a/src/test/shell/bazel/BUILD +++ b/src/test/shell/bazel/BUILD @@ -726,6 +726,7 @@ sh_test( genquery( name = "embedded_tools_deps", expression = "kind(\"cc_(binary|library)\", deps(//src:embedded_tools_jdk_allmodules_srcs))", + opts = ["--notool_deps"], scope = ["//src:embedded_tools_jdk_allmodules_srcs"], ) From 6557f73c67bac51a055bb46ec1787209d117943f Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Mon, 4 Nov 2024 10:15:03 +0100 Subject: [PATCH 2/4] Fix arguments --- src/patch_java_manifest_for_utf8.cc | 32 ++++++++++++++++++----------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/patch_java_manifest_for_utf8.cc b/src/patch_java_manifest_for_utf8.cc index 3e3aa1d0936d42..0a3b6d9ce597e3 100644 --- a/src/patch_java_manifest_for_utf8.cc +++ b/src/patch_java_manifest_for_utf8.cc @@ -21,9 +21,17 @@ #include // This program patches the app manifest of the java.exe launcher to force its -// active code page to UTF-8. This is necessary because the launcher sets -// sun.jnu.encoding to the system code page, which by default is a legacy -// code page such as Cp1252 on Windows. +// active code page to UTF-8 on Windows 1903 and later. +// https://learn.microsoft.com/en-us/windows/apps/design/globalizing/use-utf8-code-page#set-a-process-code-page-to-utf-8 +// +// This is necessary because the launcher sets sun.jnu.encoding to the system +// code page, which by default is a legacy code page such as Cp1252 on Windows. +// This causes the JVM to be unable to interact with files whose paths contain +// Unicode characters not representable in the system code page, as well as +// command-line arguments and environment variables containing such characters. +// +// Usage in the libjava.dll code: +// https://github.com/openjdk/jdk/blob/e7f0bf11ff0e89b6b156d5e88ca3771c706aa46a/src/java.base/windows/native/libjava/java_props_md.c#L63-L65 int wmain(int argc, wchar_t *argv[]) { if (argc != 2) { fwprintf(stderr, L"Usage: %ls \n", argv[0]); @@ -34,22 +42,22 @@ int wmain(int argc, wchar_t *argv[]) { // executable, which requires loading it as a "module". HMODULE exe = LoadLibraryExW(argv[1], nullptr, LOAD_LIBRARY_AS_DATAFILE); if (!exe) { - fwprintf(stderr, L"Error loading file %ls\n", argv[1]); + fwprintf(stderr, L"Error loading file %ls: %d\n", argv[1], GetLastError()); return 1; } HRSRC manifest_resource = FindResourceA(exe, MAKEINTRESOURCE(1), RT_MANIFEST); if (!manifest_resource) { - fwprintf(stderr, L"Resource not found.\n"); + fwprintf(stderr, L"Resource not found: %d\n", GetLastError()); return 1; } HGLOBAL manifest_handle = LoadResource(exe, manifest_resource); if (!manifest_handle) { - fwprintf(stderr, L"Error loading resource.\n"); + fwprintf(stderr, L"Error loading resource: %d\n", GetLastError()); return 1; } LPVOID manifest_data = LockResource(manifest_handle); if (!manifest_data) { - fwprintf(stderr, L"Error locking resource.\n"); + fwprintf(stderr, L"Error locking resource: %d\n", GetLastError()); return 1; } DWORD manifest_len = SizeofResource(exe, manifest_resource); @@ -63,7 +71,7 @@ int wmain(int argc, wchar_t *argv[]) { // https://github.com/openjdk/jdk/blob/29882bfe7b7e76446a96862cd0a5e81c7e054415/src/java.base/windows/native/launcher/java.manifest#L43 std::size_t insert_pos = manifest.find(""); if (insert_pos == std::wstring::npos) { - fwprintf(stderr, L"End tag not found in manifest:\n%s", manifest.c_str()); + fwprintf(stderr, L"End tag not found in manifest:\n%hs", manifest.c_str()); return 1; } std::string new_manifest = manifest.substr(0, insert_pos) + @@ -73,20 +81,20 @@ int wmain(int argc, wchar_t *argv[]) { // Write back the modified app manifest. HANDLE update_handle = BeginUpdateResourceW(argv[1], false); if (!update_handle) { - fwprintf(stderr, L"Error opening file for update.\n"); + fwprintf(stderr, L"Error opening file %ls for update: %d\n", argv[1], GetLastError()); return 1; } if (!UpdateResourceA(update_handle, - MAKEINTRESOURCE(1), RT_MANIFEST, + MAKEINTRESOURCE(1), MAKELANGID(LANG_NEUTRAL, SUBLANG_NEUTRAL), const_cast(new_manifest.c_str()), new_manifest.size())) { - fwprintf(stderr, L"Error updating resource.\n"); + fwprintf(stderr, L"Error updating resource: %d\n", GetLastError()); return 1; } if (!EndUpdateResourceW(update_handle, false)) { - fwprintf(stderr, L"Error finalizing update.\n"); + fwprintf(stderr, L"Error finalizing update: %d\n", GetLastError()); return 1; } From 6fe3c50ebc121aceb0b48385ea4913f82013c090 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Mon, 4 Nov 2024 14:25:18 +0100 Subject: [PATCH 3/4] Add comment --- src/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/BUILD b/src/BUILD index 56065a0ed14cce..a166f26f88dd92 100644 --- a/src/BUILD +++ b/src/BUILD @@ -179,6 +179,9 @@ filegroup( visibility = ["//src/test/shell/bazel:__pkg__"], ) +# This program patches the app manifest of the java.exe launcher to force its +# active code page to UTF-8 on Windows 1903 and later, which is required for +# proper support of Unicode characters outside the system code page. cc_binary( name = "patch_java_manifest_for_utf8", srcs = ["patch_java_manifest_for_utf8.cc"], From e2b26e933439a0079bb8f56aea50d72a7673c127 Mon Sep 17 00:00:00 2001 From: Fabian Meumertzheim Date: Mon, 4 Nov 2024 14:55:45 +0100 Subject: [PATCH 4/4] Add reference --- src/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/BUILD b/src/BUILD index a166f26f88dd92..8d5bbb61ad6bd0 100644 --- a/src/BUILD +++ b/src/BUILD @@ -182,6 +182,8 @@ filegroup( # This program patches the app manifest of the java.exe launcher to force its # active code page to UTF-8 on Windows 1903 and later, which is required for # proper support of Unicode characters outside the system code page. +# The JDK currently (as of JDK 23) doesn't support this natively: +# https://mail.openjdk.org/pipermail/core-libs-dev/2024-November/133773.html cc_binary( name = "patch_java_manifest_for_utf8", srcs = ["patch_java_manifest_for_utf8.cc"],