diff --git a/src/main/cpp/BUILD b/src/main/cpp/BUILD index b27812d17e255e..dd3bc4039771c7 100644 --- a/src/main/cpp/BUILD +++ b/src/main/cpp/BUILD @@ -143,7 +143,12 @@ cc_binary( cc_library( name = "option_processor", - srcs = ["option_processor.cc"], + srcs = [ + "option_processor.cc", + ] + select({ + "//src/conditions:windows": ["option_processor_windows.cc"], + "//conditions:default": ["option_processor_unix.cc"], + }), hdrs = [ "option_processor.h", "option_processor-internal.h", @@ -179,6 +184,7 @@ cc_library( "//src/main/cpp/util", "//src/main/cpp/util:blaze_exit_code", "//src/main/cpp/util:logging", + "@abseil-cpp//absl/strings", ], ) diff --git a/src/main/cpp/blaze.cc b/src/main/cpp/blaze.cc index 046b50f6ccca9c..5cb8d5b281b0e7 100644 --- a/src/main/cpp/blaze.cc +++ b/src/main/cpp/blaze.cc @@ -371,7 +371,9 @@ static vector GetServerExeArgs(const blaze_util::Path &jvm_path, } result.push_back(java_library_path.str()); - // Force use of latin1 for file names. + // TODO: Investigate whether this still has any effect. File name encoding is + // governed by sun.jnu.encoding in JDKs with JEP 400, which can't be + // influenced by setting a property. result.push_back("-Dfile.encoding=ISO-8859-1"); // Force into the root locale to ensure consistent behavior of string // operations across machines (e.g. in the tr_TR locale, capital ASCII 'I' diff --git a/src/main/cpp/blaze_util_windows.cc b/src/main/cpp/blaze_util_windows.cc index 1407743879fc3f..a7d536bd4e1849 100644 --- a/src/main/cpp/blaze_util_windows.cc +++ b/src/main/cpp/blaze_util_windows.cc @@ -12,9 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN -#endif #include #include @@ -937,14 +935,15 @@ void CreateSecureOutputRoot(const blaze_util::Path& path) { } string GetEnv(const string& name) { - DWORD size = ::GetEnvironmentVariableA(name.c_str(), nullptr, 0); + std::wstring wname = blaze_util::CstringToWstring(name); + DWORD size = ::GetEnvironmentVariableW(wname.c_str(), nullptr, 0); if (size == 0) { return string(); // unset or empty envvar } - unique_ptr value(new char[size]); - ::GetEnvironmentVariableA(name.c_str(), value.get(), size); - return string(value.get()); + unique_ptr value(new WCHAR[size]); + ::GetEnvironmentVariableW(wname.c_str(), value.get(), size); + return blaze_util::WstringToCstring(value.get()); } string GetPathEnv(const string& name) { @@ -970,11 +969,14 @@ bool ExistsEnv(const string& name) { } void SetEnv(const string& name, const string& value) { - // _putenv_s both calls ::SetEnvionmentVariableA and updates environ(5). - _putenv_s(name.c_str(), value.c_str()); + ::SetEnvironmentVariableW(blaze_util::CstringToWstring(name).c_str(), + blaze_util::CstringToWstring(value).c_str()); } -void UnsetEnv(const string& name) { SetEnv(name, ""); } +void UnsetEnv(const string& name) { + ::SetEnvironmentVariableW(blaze_util::CstringToWstring(name).c_str(), + nullptr); +} bool WarnIfStartedFromDesktop() { // GetConsoleProcessList returns: diff --git a/src/main/cpp/main.cc b/src/main/cpp/main.cc index 0ea770a44ba967..b5ffaf301d3e21 100644 --- a/src/main/cpp/main.cc +++ b/src/main/cpp/main.cc @@ -19,9 +19,10 @@ #include "src/main/cpp/blaze_util_platform.h" #include "src/main/cpp/option_processor.h" #include "src/main/cpp/startup_options.h" +#include "src/main/cpp/util/strings.h" #include "src/main/cpp/workspace_layout.h" -int main(int argc, char **argv) { +int main_impl(int argc, char **argv) { uint64_t start_time = blaze::GetMillisecondsMonotonic(); std::unique_ptr workspace_layout( new blaze::WorkspaceLayout()); @@ -32,3 +33,23 @@ int main(int argc, char **argv) { std::move(startup_options)), start_time); } + +#ifdef _WIN32 +// Define wmain to support Unicode command line arguments on Windows +// regardless of the current code page. +int wmain(int argc, wchar_t **argv) { + std::vector args; + for (int i = 0; i < argc; ++i) { + args.push_back(blaze_util::WstringToCstring(argv[i])); + } + std::vector c_args; + for (const std::string &arg : args) { + c_args.push_back(const_cast(arg.c_str())); + } + c_args.push_back(nullptr); + // Account for the null terminator. + return main_impl(c_args.size() - 1, c_args.data()); +} +#else +int main(int argc, char **argv) { return main_impl(argc, argv); } +#endif diff --git a/src/main/cpp/option_processor-internal.h b/src/main/cpp/option_processor-internal.h index e1b9e7defd8313..d368dcb6c1ce95 100644 --- a/src/main/cpp/option_processor-internal.h +++ b/src/main/cpp/option_processor-internal.h @@ -58,6 +58,8 @@ std::string FindRcAlongsideBinary(const std::string& cwd, blaze_exit_code::ExitCode ParseErrorToExitCode(RcFile::ParseError parse_error); +std::vector GetProcessedEnv(); + } // namespace internal } // namespace blaze diff --git a/src/main/cpp/option_processor.cc b/src/main/cpp/option_processor.cc index 9fb942277771fa..f68b135b535982 100644 --- a/src/main/cpp/option_processor.cc +++ b/src/main/cpp/option_processor.cc @@ -39,6 +39,11 @@ extern char **environ; #endif +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#include +#endif + namespace blaze { using std::map; @@ -48,7 +53,6 @@ using std::vector; constexpr char WorkspaceLayout::WorkspacePrefix[]; static constexpr const char* kRcBasename = ".bazelrc"; -static std::vector GetProcessedEnv(); OptionProcessor::OptionProcessor( const WorkspaceLayout* workspace_layout, @@ -512,8 +516,8 @@ blaze_exit_code::ExitCode OptionProcessor::ParseOptions( return parse_startup_options_exit_code; } - blazerc_and_env_command_args_ = - GetBlazercAndEnvCommandArgs(cwd, rc_file_ptrs, GetProcessedEnv()); + blazerc_and_env_command_args_ = GetBlazercAndEnvCommandArgs( + cwd, rc_file_ptrs, blaze::internal::GetProcessedEnv()); return blaze_exit_code::SUCCESS; } @@ -583,70 +587,6 @@ blaze_exit_code::ExitCode OptionProcessor::ParseStartupOptions( return startup_options_->ProcessArgs(rcstartup_flags, error); } -static bool IsValidEnvName(const char* p) { -#if defined(_WIN32) || defined(__CYGWIN__) - for (; *p && *p != '='; ++p) { - if (!((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || - (*p >= '0' && *p <= '9') || *p == '_' || *p == '(' || *p == ')')) { - return false; - } - } -#endif - return true; -} - -#if defined(_WIN32) -static void PreprocessEnvString(string* env_str) { - static constexpr const char* vars_to_uppercase[] = {"PATH", "SYSTEMROOT", - "SYSTEMDRIVE", - "TEMP", "TEMPDIR", "TMP"}; - - int pos = env_str->find_first_of('='); - if (pos == string::npos) return; - - string name = env_str->substr(0, pos); - // We do not care about locale. All variable names are ASCII. - std::transform(name.begin(), name.end(), name.begin(), ::toupper); - if (std::find(std::begin(vars_to_uppercase), std::end(vars_to_uppercase), - name) != std::end(vars_to_uppercase)) { - env_str->assign(name + "=" + env_str->substr(pos + 1)); - } -} - -#elif defined(__CYGWIN__) // not defined(_WIN32) - -static void PreprocessEnvString(string* env_str) { - int pos = env_str->find_first_of('='); - if (pos == string::npos) return; - string name = env_str->substr(0, pos); - if (name == "PATH") { - env_str->assign("PATH=" + env_str->substr(pos + 1)); - } else if (name == "TMP") { - // A valid Windows path "c:/foo" is also a valid Unix path list of - // ["c", "/foo"] so must use ConvertPath here. See GitHub issue #1684. - env_str->assign("TMP=" + blaze_util::ConvertPath(env_str->substr(pos + 1))); - } -} - -#else // Non-Windows platforms. - -static void PreprocessEnvString(const string* env_str) { - // do nothing. -} -#endif // defined(_WIN32) - -static std::vector GetProcessedEnv() { - std::vector processed_env; - for (char** env = environ; *env != nullptr; env++) { - string env_str(*env); - if (IsValidEnvName(*env)) { - PreprocessEnvString(&env_str); - processed_env.push_back(std::move(env_str)); - } - } - return processed_env; -} - // IMPORTANT: The options added here do not come from the user. In order for // their source to be correctly tracked, the options must either be passed // as --default_override=0, 0 being "client", or must be listed in diff --git a/src/main/cpp/option_processor_unix.cc b/src/main/cpp/option_processor_unix.cc new file mode 100644 index 00000000000000..36c590d15802d8 --- /dev/null +++ b/src/main/cpp/option_processor_unix.cc @@ -0,0 +1,35 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "src/main/cpp/option_processor-internal.h" + +// On OSX, there apparently is no header that defines this. +#ifndef environ +extern char** environ; +#endif + +namespace blaze::internal { + +std::vector GetProcessedEnv() { + std::vector processed_env; + for (char** env = environ; *env != nullptr; env++) { + processed_env.emplace_back(*env); + } + return processed_env; +} + +} // namespace blaze::internal diff --git a/src/main/cpp/option_processor_windows.cc b/src/main/cpp/option_processor_windows.cc new file mode 100644 index 00000000000000..ce5b037cf26c0d --- /dev/null +++ b/src/main/cpp/option_processor_windows.cc @@ -0,0 +1,94 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#define WIN32_LEAN_AND_MEAN +#include + +#include +#include +#include + +#include "absl/strings/ascii.h" +#include "src/main/cpp/option_processor-internal.h" +#include "src/main/cpp/util/strings.h" + +namespace blaze::internal { + +#if defined(__CYGWIN__) + +static void PreprocessEnvString(std::string* env_str) { + int pos = env_str->find_first_of('='); + if (pos == string::npos) { + return; + } + std::string name = env_str->substr(0, pos); + if (name == "PATH") { + env_str->assign("PATH=" + env_str->substr(pos + 1)); + } else if (name == "TMP") { + // A valid Windows path "c:/foo" is also a valid Unix path list of + // ["c", "/foo"] so must use ConvertPath here. See GitHub issue #1684. + env_str->assign("TMP=" + blaze_util::ConvertPath(env_str->substr(pos + 1))); + } +} + +#else // not defined(__CYGWIN__) + +static void PreprocessEnvString(std::string* env_str) { + static constexpr const char* vars_to_uppercase[] = { + "PATH", "SYSTEMROOT", "SYSTEMDRIVE", "TEMP", "TEMPDIR", "TMP"}; + + std::size_t pos = env_str->find_first_of('='); + if (pos == std::string::npos) { + return; + } + + std::string name = absl::AsciiStrToUpper(env_str->substr(0, pos)); + if (std::find(std::begin(vars_to_uppercase), std::end(vars_to_uppercase), + name) != std::end(vars_to_uppercase)) { + env_str->assign(name + "=" + env_str->substr(pos + 1)); + } +} + +#endif // defined(__CYGWIN__) + +static bool IsValidEnvName(std::string_view s) { + std::string_view name = s.substr(0, s.find('=')); + return std::all_of(name.begin(), name.end(), [](char c) { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9') || c == '_' || c == '(' || c == ')'; + }); +} + +// Use GetEnvironmentStringsW to get the environment variables to support +// Unicode regardless of the current code page. +std::vector GetProcessedEnv() { + std::vector processed_env; + wchar_t* env = GetEnvironmentStringsW(); + if (env == nullptr) { + return processed_env; + } + + for (wchar_t* p = env; *p != L'\0'; p += wcslen(p) + 1) { + std::string env_str = blaze_util::WstringToCstring(p); + if (IsValidEnvName(env_str)) { + PreprocessEnvString(&env_str); + processed_env.push_back(std::move(env_str)); + } + } + + FreeEnvironmentStringsW(env); + return processed_env; +} + +} // namespace blaze::internal diff --git a/src/main/java/com/google/devtools/build/lib/BUILD b/src/main/java/com/google/devtools/build/lib/BUILD index 429e4a3c59399e..71deae4ac94099 100644 --- a/src/main/java/com/google/devtools/build/lib/BUILD +++ b/src/main/java/com/google/devtools/build/lib/BUILD @@ -325,7 +325,6 @@ java_library( "//src/main/java/com/google/devtools/build/lib/actions:file_metadata", "//src/main/java/com/google/devtools/build/lib/actions:important_output_handler", "//src/main/java/com/google/devtools/build/lib/actions:localhost_capacity", - "//src/main/java/com/google/devtools/build/lib/actions:middleman_type", "//src/main/java/com/google/devtools/build/lib/actions:package_roots", "//src/main/java/com/google/devtools/build/lib/actions:resource_manager", "//src/main/java/com/google/devtools/build/lib/actions:runfiles_tree", @@ -478,6 +477,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util:process", "//src/main/java/com/google/devtools/build/lib/util:shallow_object_size_computer", "//src/main/java/com/google/devtools/build/lib/util:string", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/util/io", "//src/main/java/com/google/devtools/build/lib/util/io:io-proto", "//src/main/java/com/google/devtools/build/lib/util/io:out-err", diff --git a/src/main/java/com/google/devtools/build/lib/actions/BUILD b/src/main/java/com/google/devtools/build/lib/actions/BUILD index 1bcd190f718c6d..562006f07e784b 100644 --- a/src/main/java/com/google/devtools/build/lib/actions/BUILD +++ b/src/main/java/com/google/devtools/build/lib/actions/BUILD @@ -196,7 +196,6 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util:filetype", "//src/main/java/com/google/devtools/build/lib/util:os", "//src/main/java/com/google/devtools/build/lib/util:shell_escaper", - "//src/main/java/com/google/devtools/build/lib/util:string", "//src/main/java/com/google/devtools/build/lib/util:var_int", "//src/main/java/com/google/devtools/build/lib/util/io", "//src/main/java/com/google/devtools/build/lib/vfs", diff --git a/src/main/java/com/google/devtools/build/lib/actions/CommandLines.java b/src/main/java/com/google/devtools/build/lib/actions/CommandLines.java index 665667ee13449f..9a489fc99c16bc 100644 --- a/src/main/java/com/google/devtools/build/lib/actions/CommandLines.java +++ b/src/main/java/com/google/devtools/build/lib/actions/CommandLines.java @@ -28,7 +28,6 @@ import com.google.protobuf.ByteString; import java.io.IOException; import java.io.OutputStream; -import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; import java.util.UUID; @@ -134,8 +133,7 @@ ExpandedCommandLines expand( new ParamFileActionInput( paramFileExecPath, ParameterFile.flagsOnly(chunk.arguments()), - paramFileInfo.getFileType(), - paramFileInfo.getCharset())); + paramFileInfo.getFileType())); for (String positionalArg : ParameterFile.nonFlags(chunk.arguments())) { arguments.add(positionalArg); cmdLineLength += positionalArg.length() + 1; @@ -143,10 +141,7 @@ ExpandedCommandLines expand( } else { paramFiles.add( new ParamFileActionInput( - paramFileExecPath, - chunk.arguments(), - paramFileInfo.getFileType(), - paramFileInfo.getCharset())); + paramFileExecPath, chunk.arguments(), paramFileInfo.getFileType())); } } } @@ -224,22 +219,17 @@ public static final class ParamFileActionInput extends VirtualActionInput { private final PathFragment paramFileExecPath; private final Iterable arguments; private final ParameterFileType type; - private final Charset charset; public ParamFileActionInput( - PathFragment paramFileExecPath, - Iterable arguments, - ParameterFileType type, - Charset charset) { + PathFragment paramFileExecPath, Iterable arguments, ParameterFileType type) { this.paramFileExecPath = paramFileExecPath; this.arguments = arguments; this.type = type; - this.charset = charset; } @Override public void writeTo(OutputStream out) throws IOException { - ParameterFile.writeParameterFile(out, arguments, type, charset); + ParameterFile.writeParameterFile(out, arguments, type); } @Override @@ -285,7 +275,6 @@ private static void addParamFileInfoToFingerprint( fingerprint.addUUID(PARAM_FILE_UUID); fingerprint.addString(paramFileInfo.getFlagFormatString()); fingerprint.addString(paramFileInfo.getFileType().toString()); - fingerprint.addString(paramFileInfo.getCharset().toString()); } public static Builder builder() { diff --git a/src/main/java/com/google/devtools/build/lib/actions/ParamFileInfo.java b/src/main/java/com/google/devtools/build/lib/actions/ParamFileInfo.java index ab8b4e515cbb2e..23211736fe5d15 100644 --- a/src/main/java/com/google/devtools/build/lib/actions/ParamFileInfo.java +++ b/src/main/java/com/google/devtools/build/lib/actions/ParamFileInfo.java @@ -14,14 +14,11 @@ package com.google.devtools.build.lib.actions; -import static java.nio.charset.StandardCharsets.ISO_8859_1; - import com.google.common.base.Preconditions; import com.google.common.collect.Interner; import com.google.devtools.build.lib.actions.ParameterFile.ParameterFileType; import com.google.devtools.build.lib.concurrent.BlazeInterners; import com.google.errorprone.annotations.CanIgnoreReturnValue; -import java.nio.charset.Charset; import java.util.Objects; import javax.annotation.concurrent.Immutable; @@ -32,7 +29,6 @@ @Immutable public final class ParamFileInfo { private final ParameterFileType fileType; - private final Charset charset; private final String flagFormatString; private final boolean always; private final boolean flagsOnly; @@ -42,7 +38,6 @@ public final class ParamFileInfo { private ParamFileInfo(Builder builder) { this.fileType = Preconditions.checkNotNull(builder.fileType); - this.charset = Preconditions.checkNotNull(builder.charset); this.flagFormatString = Preconditions.checkNotNull(builder.flagFormatString); this.always = builder.always; this.flagsOnly = builder.flagsOnly; @@ -53,11 +48,6 @@ public ParameterFileType getFileType() { return fileType; } - /** Returns the charset. */ - public Charset getCharset() { - return charset; - } - /** Returns the format string for the params filename on the command line (typically "@%s"). */ public String getFlagFormatString() { return flagFormatString; @@ -78,7 +68,7 @@ public boolean flagsOnly() { @Override public int hashCode() { - return Objects.hash(charset, flagFormatString, fileType, always); + return Objects.hash(flagFormatString, fileType, always); } @Override @@ -90,7 +80,6 @@ public boolean equals(Object obj) { return false; } return fileType.equals(other.fileType) - && charset.equals(other.charset) && flagFormatString.equals(other.flagFormatString) && always == other.always && flagsOnly == other.flagsOnly; @@ -103,7 +92,6 @@ public static Builder builder(ParameterFileType parameterFileType) { /** Builder for a ParamFileInfo. */ public static class Builder { private final ParameterFileType fileType; - private Charset charset = ISO_8859_1; private String flagFormatString = "@%s"; private boolean always; private boolean flagsOnly; @@ -112,13 +100,6 @@ private Builder(ParameterFileType fileType) { this.fileType = fileType; } - /** Sets the encoding to write the parameter file with. */ - @CanIgnoreReturnValue - public Builder setCharset(Charset charset) { - this.charset = charset; - return this; - } - /** * Sets a format string to use for the flag that is passed to original command. * diff --git a/src/main/java/com/google/devtools/build/lib/actions/ParameterFile.java b/src/main/java/com/google/devtools/build/lib/actions/ParameterFile.java index d4dc55197f1336..3e4d35340065ef 100644 --- a/src/main/java/com/google/devtools/build/lib/actions/ParameterFile.java +++ b/src/main/java/com/google/devtools/build/lib/actions/ParameterFile.java @@ -13,47 +13,38 @@ // limitations under the License. package com.google.devtools.build.lib.actions; -import static java.nio.charset.StandardCharsets.ISO_8859_1; -import static java.nio.charset.StandardCharsets.UTF_8; - import com.google.common.collect.Iterables; import com.google.devtools.build.lib.unsafe.StringUnsafe; import com.google.devtools.build.lib.util.FileType; import com.google.devtools.build.lib.util.GccParamFileEscaper; import com.google.devtools.build.lib.util.ShellEscaper; -import com.google.devtools.build.lib.util.StringUtil; import com.google.devtools.build.lib.util.WindowsParamFileEscaper; import com.google.devtools.build.lib.vfs.PathFragment; import java.io.BufferedOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.nio.ByteBuffer; -import java.nio.CharBuffer; -import java.nio.charset.Charset; -import java.nio.charset.CharsetEncoder; /** - * Support for parameter file generation (as used by gcc and other tools, e.g. - * {@code gcc @param_file}. Note that the parameter file needs to be explicitly - * deleted after use. Different tools require different parameter file formats, - * which can be selected via the {@link ParameterFileType} enum. + * Support for parameter file generation (as used by gcc and other tools, e.g. {@code + * gcc @param_file}. Note that the parameter file needs to be explicitly deleted after use. + * Different tools require different parameter file formats, which can be selected via the {@link + * ParameterFileType} enum. * - *

The default charset is ISO-8859-1 (latin1). This also has to match the - * expectation of the tool. + *

The default charset is ISO-8859-1 (latin1). This also has to match the expectation of the + * tool. * - *

Don't use this class for new code. Use the ParameterFileWriteAction - * instead! + *

Don't use this class for new code. Use the ParameterFileWriteAction instead! */ public class ParameterFile { + public static final StringUnsafe STRING_UNSAFE = StringUnsafe.getInstance(); + /** Different styles of parameter files. */ public enum ParameterFileType { /** - * A parameter file with every parameter on a separate line. This format - * cannot handle newlines in parameters. It is currently used for most - * tools, but may not be interpreted correctly if parameters contain - * white space or other special characters. It should be avoided for new + * A parameter file with every parameter on a separate line. This format cannot handle newlines + * in parameters. It is currently used for most tools, but may not be interpreted correctly if + * parameters contain white space or other special characters. It should be avoided for new * development. */ UNQUOTED, @@ -81,119 +72,38 @@ public enum ParameterFileType { public static final FileType PARAMETER_FILE = FileType.of(".params"); - /** - * Creates a parameter file with the given parameters. - */ - private ParameterFile() { - } - /** - * Derives an path from a given path by appending ".params". - */ + /** Creates a parameter file with the given parameters. */ + private ParameterFile() {} + + /** Derives a path from a given path by appending ".params". */ public static PathFragment derivePath(PathFragment original) { return derivePath(original, "2"); } - /** - * Derives an path from a given path by appending ".params". - */ + /** Derives a path from a given path by appending ".params". */ public static PathFragment derivePath(PathFragment original, String flavor) { return original.replaceName(original.getBaseName() + "-" + flavor + ".params"); } /** Writes an argument list to a parameter file. */ public static void writeParameterFile( - OutputStream out, Iterable arguments, ParameterFileType type, Charset charset) - throws IOException { + OutputStream out, Iterable arguments, ParameterFileType type) throws IOException { OutputStream bufferedOut = new BufferedOutputStream(out); switch (type) { - case SHELL_QUOTED -> writeContent(bufferedOut, ShellEscaper.escapeAll(arguments), charset); - case GCC_QUOTED -> - writeContent(bufferedOut, GccParamFileEscaper.escapeAll(arguments), charset); - case UNQUOTED -> writeContent(bufferedOut, arguments, charset); - case WINDOWS -> - writeContent(bufferedOut, WindowsParamFileEscaper.escapeAll(arguments), charset); - } - } - - private static void writeContent( - OutputStream outputStream, Iterable arguments, Charset charset) throws IOException { - if (charset.equals(ISO_8859_1)) { - writeContentLatin1(outputStream, arguments); - } else if (charset.equals(UTF_8)) { - writeContentUtf8(outputStream, arguments); - } else { - // Generic charset support - OutputStreamWriter out = new OutputStreamWriter(outputStream, charset); - for (String line : arguments) { - out.write(line); - out.write('\n'); - } - out.flush(); + case SHELL_QUOTED -> writeContent(bufferedOut, ShellEscaper.escapeAll(arguments)); + case GCC_QUOTED -> writeContent(bufferedOut, GccParamFileEscaper.escapeAll(arguments)); + case UNQUOTED -> writeContent(bufferedOut, arguments); + case WINDOWS -> writeContent(bufferedOut, WindowsParamFileEscaper.escapeAll(arguments)); } } - /** - * Fast LATIN-1 path that avoids GC overhead. This takes advantage of the fact that strings are - * encoded as either LATIN-1 or UTF-16 under JDK9+. When LATIN-1 we can simply copy the byte - * buffer, when UTF-16 we can fail loudly. - */ - private static void writeContentLatin1(OutputStream outputStream, Iterable arguments) + private static void writeContent(OutputStream out, Iterable arguments) throws IOException { - StringUnsafe stringUnsafe = StringUnsafe.getInstance(); for (String line : arguments) { - if (stringUnsafe.getCoder(line) == StringUnsafe.LATIN1) { - byte[] bytes = stringUnsafe.getByteArray(line); - outputStream.write(bytes); - } else { - // Error case, encode with '?' characters - ByteBuffer encodedBytes = ISO_8859_1.encode(CharBuffer.wrap(line)); - outputStream.write( - encodedBytes.array(), - encodedBytes.arrayOffset(), - encodedBytes.arrayOffset() + encodedBytes.limit()); - } - outputStream.write('\n'); - } - outputStream.flush(); - } - - /** - * Fast UTF-8 path that tries to coder GC overhead. This takes advantage of the fact that strings - * are encoded as either LATIN-1 or UTF-16 under JDK9+. When LATIN-1 we can check if the buffer is - * ASCII and copy that directly (since this is both valid LATIN-1 and UTF-8), in all other cases - * we must re-encode. - */ - private static void writeContentUtf8(OutputStream outputStream, Iterable arguments) - throws IOException { - CharsetEncoder encoder = UTF_8.newEncoder(); - StringUnsafe stringUnsafe = StringUnsafe.getInstance(); - for (String line : arguments) { - byte[] bytes = stringUnsafe.getByteArray(line); - if (stringUnsafe.getCoder(line) == StringUnsafe.LATIN1 && isAscii(bytes)) { - outputStream.write(bytes); - } else if (!StringUtil.reencodeInternalToExternal(line).equals(line)) { - // We successfully decoded line from utf8 - meaning it was already encoded as utf8. - // We do not want to double-encode. - outputStream.write(bytes); - } else { - ByteBuffer encodedBytes = encoder.encode(CharBuffer.wrap(line)); - outputStream.write( - encodedBytes.array(), - encodedBytes.arrayOffset(), - encodedBytes.arrayOffset() + encodedBytes.limit()); - } - outputStream.write('\n'); - } - outputStream.flush(); - } - - private static boolean isAscii(byte[] latin1Bytes) { - boolean hiBitSet = false; - int n = latin1Bytes.length; - for (int i = 0; i < n; ++i) { - hiBitSet |= ((latin1Bytes[i] & 0x80) != 0); + out.write(STRING_UNSAFE.getInternalStringBytes(line)); + out.write('\n'); } - return !hiBitSet; + out.flush(); } /** Criterion shared by {@link #flagsOnly} and {@link #nonFlags}. */ diff --git a/src/main/java/com/google/devtools/build/lib/analysis/BUILD b/src/main/java/com/google/devtools/build/lib/analysis/BUILD index acb4ef8fae0e10..c127673cd5e46a 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/BUILD +++ b/src/main/java/com/google/devtools/build/lib/analysis/BUILD @@ -435,6 +435,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util:os", "//src/main/java/com/google/devtools/build/lib/util:shell_escaper", "//src/main/java/com/google/devtools/build/lib/util:string", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/util/io:out-err", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", diff --git a/src/main/java/com/google/devtools/build/lib/analysis/TargetCompleteEvent.java b/src/main/java/com/google/devtools/build/lib/analysis/TargetCompleteEvent.java index 40feee187b3dcf..d9c1c0fe8dd900 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/TargetCompleteEvent.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/TargetCompleteEvent.java @@ -16,8 +16,6 @@ import static com.google.devtools.build.lib.analysis.config.BuildConfigurationValue.configurationId; import static com.google.devtools.build.lib.buildeventstream.TestFileNameConstants.BASELINE_COVERAGE; -import static java.nio.charset.StandardCharsets.ISO_8859_1; -import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -63,7 +61,7 @@ import com.google.devtools.build.lib.skyframe.ConfiguredTargetKey; import com.google.devtools.build.lib.util.DetailedExitCode; import com.google.devtools.build.lib.util.DetailedExitCode.DetailedExitCodeComparator; -import com.google.devtools.build.lib.util.OS; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.skyframe.SkyValue; @@ -366,26 +364,22 @@ public static BuildEventStreamProtos.File newFileFromArtifact( CompletionContext completionContext, @Nullable String uri) { if (name == null) { - name = artifact.getRootRelativePath().getRelative(relPath).getPathString(); - if (OS.getCurrent() != OS.WINDOWS) { - // TODO(b/36360490): Unix file names are currently always Latin-1 strings, even if they - // contain UTF-8 bytes. Protobuf specifies string fields to contain UTF-8 and passing a - // "Latin-1 with UTF-8 bytes" string will lead to double-encoding the bytes with the high - // bit set. Until we address the pervasive use of "Latin-1 with UTF-8 bytes" throughout - // Bazel (eg. by standardizing on UTF-8 on Unix systems) we will need to silently swap out - // the encoding at the protobuf library boundary. Windows does not suffer from this issue - // due to the corresponding OS APIs supporting UTF-16. - name = new String(name.getBytes(ISO_8859_1), UTF_8); - } + name = + StringEncoding.internalToUnicode( + artifact.getRootRelativePath().getRelative(relPath).getPathString()); } File.Builder file = File.newBuilder() .setName(name) - .addAllPathPrefix(artifact.getRoot().getExecPath().segments()); + .addAllPathPrefix( + Iterables.transform( + artifact.getRoot().getExecPath().segments(), + StringEncoding::internalToUnicode)); FileArtifactValue fileArtifactValue = completionContext.getFileArtifactValue(artifact); if (fileArtifactValue instanceof UnresolvedSymlinkArtifactValue) { file.setSymlinkTargetPath( - ((UnresolvedSymlinkArtifactValue) fileArtifactValue).getSymlinkTarget()); + StringEncoding.internalToUnicode( + ((UnresolvedSymlinkArtifactValue) fileArtifactValue).getSymlinkTarget())); } else if (fileArtifactValue != null && fileArtifactValue.getType().exists()) { byte[] digest = fileArtifactValue.getDigest(); if (digest != null) { @@ -394,7 +388,7 @@ public static BuildEventStreamProtos.File newFileFromArtifact( file.setLength(fileArtifactValue.getSize()); } if (uri != null) { - file.setUri(uri); + file.setUri(StringEncoding.internalToUnicode(uri)); } return file.build(); } diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/FileWriteAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/FileWriteAction.java index 6fa43fb7c230ed..df3598c0d936e8 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/FileWriteAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/FileWriteAction.java @@ -291,11 +291,7 @@ public String getFileContents() { throw new IllegalStateException(e); } - try { - return StringUnsafe.getInstance().newInstance(uncompressedBytes, coder); - } catch (ReflectiveOperationException e) { - throw new IllegalStateException(e); - } + return StringUnsafe.getInstance().newInstance(uncompressedBytes, coder); } @Override diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/ParameterFileWriteAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/ParameterFileWriteAction.java index 06cf671fdbb7dd..393aadb5a821e6 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/ParameterFileWriteAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/ParameterFileWriteAction.java @@ -117,7 +117,7 @@ public Iterable getArguments() public String getStringContents() throws CommandLineExpansionException, InterruptedException, IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); - ParameterFile.writeParameterFile(out, getArguments(), type, ISO_8859_1); + ParameterFile.writeParameterFile(out, getArguments(), type); return out.toString(ISO_8859_1); } @@ -164,7 +164,7 @@ private static class ParamFileWriter implements DeterministicWriter { @Override public void writeOutputFile(OutputStream out) throws IOException { - ParameterFile.writeParameterFile(out, arguments, type, ISO_8859_1); + ParameterFile.writeParameterFile(out, arguments, type); } } diff --git a/src/main/java/com/google/devtools/build/lib/analysis/starlark/Args.java b/src/main/java/com/google/devtools/build/lib/analysis/starlark/Args.java index 42a7f049d765a6..9de947314d9b19 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/starlark/Args.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/starlark/Args.java @@ -31,7 +31,6 @@ import com.google.devtools.build.lib.starlarkbuildapi.CommandLineArgsApi; import com.google.devtools.build.lib.supplier.InterruptibleSupplier; import com.google.errorprone.annotations.CanIgnoreReturnValue; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -183,10 +182,7 @@ public ParamFileInfo getParamFileInfo() { @Override public CommandLineArgsApi addArgument( - Object argNameOrValue, - Object value, - Object format, - StarlarkThread thread) + Object argNameOrValue, Object value, Object format, StarlarkThread thread) throws EvalException { throw Starlark.errorf("cannot modify frozen value"); } @@ -250,6 +246,7 @@ private static class MutableArgs extends Args implements StarlarkValue, Mutabili private final List> potentialDirectoryArtifacts = new ArrayList<>(); private final Set directoryArtifacts = new HashSet<>(); + /** * If true, flag names and values will be grouped with '=', e.g. * @@ -286,12 +283,11 @@ public ParamFileInfo getParamFileInfo() { if (flagFormatString == null) { return null; } else { - return ParamFileInfo.builder(getParameterFileType()) - .setFlagFormatString(flagFormatString) - .setUseAlways(alwaysUseParamFile) - .setCharset(StandardCharsets.UTF_8) - .setFlagsOnly(flagPerLine) - .build(); + ParamFileInfo.Builder builder = + ParamFileInfo.builder(getParameterFileType()) + .setFlagFormatString(flagFormatString) + .setUseAlways(alwaysUseParamFile); + return builder.setFlagsOnly(flagPerLine).build(); } } @@ -623,6 +619,5 @@ public ImmutableSet getDirectoryArtifacts() { potentialDirectoryArtifacts.clear(); return ImmutableSet.copyOf(directoryArtifacts); } - } } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD index 30b5d051d1d556..cd8e322476d33f 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD @@ -38,7 +38,6 @@ java_library( "//src/main/java/com/google/devtools/build/lib/packages", "//src/main/java/com/google/devtools/build/lib/packages/semantics", "//src/main/java/com/google/devtools/build/lib/repository:repository_events", - "//src/main/java/com/google/devtools/build/lib/rules:repository/repo_recorded_input", "//src/main/java/com/google/devtools/build/lib/rules:repository/repository_directory_value", "//src/main/java/com/google/devtools/build/lib/rules:repository/repository_function", "//src/main/java/com/google/devtools/build/lib/rules:repository/resolved_file_value", @@ -46,9 +45,9 @@ java_library( "//src/main/java/com/google/devtools/build/lib/rules:repository/workspace_configured_target_factory", "//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value", "//src/main/java/com/google/devtools/build/lib/util", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", - "//src/main/java/com/google/devtools/build/skyframe", "//src/main/java/com/google/devtools/build/skyframe:skyframe-objects", "//src/main/java/com/google/devtools/common/options", "//src/main/java/net/starlark/java/eval", diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java index c32a423940e0a1..6abede93ea0b83 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java @@ -16,11 +16,11 @@ import static com.google.devtools.build.lib.bazel.repository.StripPrefixedPath.maybeDeprefixSymlink; import static java.nio.charset.StandardCharsets.ISO_8859_1; -import static java.nio.charset.StandardCharsets.UTF_8; import com.google.auto.service.AutoService; import com.google.common.io.ByteStreams; import com.google.devtools.build.lib.bazel.repository.DecompressorValue.Decompressor; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; @@ -168,10 +168,9 @@ public Path decompress(DecompressorDescriptor descriptor) */ private static String toRawBytesString(String name) { // Marked strings are already encoded in ISO-8859-1. Other strings originate from PAX headers - // and are thus encoded in UTF-8, which we decode to the raw bytes and then re-encode trivially - // in ISO-8859-1. + // and are thus Unicode. return MarkedIso88591Charset.getRawBytesStringIfMarked(name) - .orElseGet(() -> new String(name.getBytes(UTF_8), ISO_8859_1)); + .orElseGet(() -> StringEncoding.unicodeToInternal(name)); } /** A provider of {@link MarkedIso88591Charset}s. */ diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java index 3aca056e2c8f39..f97d2492fb503f 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java @@ -14,9 +14,6 @@ package com.google.devtools.build.lib.bazel.repository; -import static java.nio.charset.StandardCharsets.ISO_8859_1; -import static java.nio.charset.StandardCharsets.UTF_8; - import com.google.common.annotations.VisibleForTesting; import com.google.devtools.build.lib.rules.repository.RepositoryFunction.RepositoryFunctionException; import com.google.devtools.build.lib.vfs.Path; @@ -66,11 +63,7 @@ public static Optional maybeMakePrefixSuggestion(PathFragment pathFragme if (!pathFragment.isMultiSegment()) { return Optional.empty(); } - String rawFirstSegment = pathFragment.getSegment(0); - // Users can only specify prefixes from Starlark, which is planned to use UTF-8 for all - // strings, but currently still collects the raw bytes in a latin-1 string. We thus - // optimistically decode the raw bytes with UTF-8 here for display purposes. - return Optional.of(new String(rawFirstSegment.getBytes(ISO_8859_1), UTF_8)); + return Optional.of(pathFragment.getSegment(0)); } } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriter.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriter.java index 5d10613febbbcb..8e24c5333e6b2e 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriter.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriter.java @@ -34,13 +34,14 @@ import com.google.devtools.build.lib.vfs.Path; import java.io.BufferedReader; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; -import java.nio.file.Files; import java.util.Base64; import java.util.Collection; import java.util.HashMap; @@ -103,7 +104,9 @@ public static UrlRewriter getDownloaderUrlRewriter( String.format("Unable to find downloader config file %s", configPath)); } - try (BufferedReader reader = Files.newBufferedReader(actualConfigPath.getPathFile().toPath())) { + try (InputStream inputStream = actualConfigPath.getInputStream(); + Reader inputStreamReader = new InputStreamReader(inputStream); + Reader reader = new BufferedReader(inputStreamReader)) { return new UrlRewriter(log, configPath, reader); } catch (IOException e) { throw new UrlRewriterParseException(e.getMessage()); @@ -111,9 +114,8 @@ public static UrlRewriter getDownloaderUrlRewriter( } /** - * Rewrites {@code urls} using the configuration provided to {@link - * #getDownloaderUrlRewriter(String, Reporter)}. The returned list of URLs may be empty if the - * configuration used blocks all the input URLs. + * Rewrites {@code urls} using the configuration provided to {@link #getDownloaderUrlRewriter}. + * The returned list of URLs may be empty if the configuration used blocks all the input URLs. * * @param urls The input list of {@link URL}s. May be empty. * @return The amended lists of URLs. diff --git a/src/main/java/com/google/devtools/build/lib/query2/BUILD b/src/main/java/com/google/devtools/build/lib/query2/BUILD index 646517cb8f0290..80e8e5803ca36d 100644 --- a/src/main/java/com/google/devtools/build/lib/query2/BUILD +++ b/src/main/java/com/google/devtools/build/lib/query2/BUILD @@ -59,7 +59,6 @@ java_library( "//src/main/java/com/google/devtools/build/lib/analysis/config/output:configuration_for_output", "//src/main/java/com/google/devtools/build/lib/analysis/config/output:fragment_for_output", "//src/main/java/com/google/devtools/build/lib/analysis/config/output:fragment_options_for_output", - "//src/main/java/com/google/devtools/build/lib/analysis/producers", "//src/main/java/com/google/devtools/build/lib/bugreport", "//src/main/java/com/google/devtools/build/lib/buildeventstream", "//src/main/java/com/google/devtools/build/lib/buildeventstream/proto:build_event_stream_java_proto", @@ -118,7 +117,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util:command", "//src/main/java/com/google/devtools/build/lib/util:detailed_exit_code", "//src/main/java/com/google/devtools/build/lib/util:shell_escaper", - "//src/main/java/com/google/devtools/build/lib/util:string", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//src/main/java/com/google/devtools/build/skyframe", diff --git a/src/main/java/com/google/devtools/build/lib/query2/aquery/ActionGraphTextOutputFormatterCallback.java b/src/main/java/com/google/devtools/build/lib/query2/aquery/ActionGraphTextOutputFormatterCallback.java index 1e557686996817..0a97cb982f5d23 100644 --- a/src/main/java/com/google/devtools/build/lib/query2/aquery/ActionGraphTextOutputFormatterCallback.java +++ b/src/main/java/com/google/devtools/build/lib/query2/aquery/ActionGraphTextOutputFormatterCallback.java @@ -15,7 +15,7 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.devtools.build.lib.query2.aquery.AqueryUtils.getActionInputs; -import static com.google.devtools.build.lib.util.StringUtil.reencodeInternalToExternal; +import static com.google.devtools.build.lib.util.StringEncoding.internalToUnicode; import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.collect.ImmutableList; @@ -207,7 +207,7 @@ private void writeAction(ActionAnalysisMetadata action, PrintStream printStream) .append(" Inputs: [") .append( inputs.toList().stream() - .map(input -> escapeBytestringUtf8(input.getExecPathString())) + .map(input -> internalToEscapedUnicode(input.getExecPathString())) .sorted() .collect(Collectors.joining(", "))) .append("]\n"); @@ -218,7 +218,7 @@ private void writeAction(ActionAnalysisMetadata action, PrintStream printStream) action.getOutputs().stream() .map( output -> - escapeBytestringUtf8( + internalToEscapedUnicode( output.isTreeArtifact() ? output.getExecPathString() + " (TreeArtifact)" : output.getExecPathString())) @@ -239,7 +239,7 @@ private void writeAction(ActionAnalysisMetadata action, PrintStream printStream) Streams.stream(fixedEnvironment) .map( environmentVariable -> - escapeBytestringUtf8( + internalToEscapedUnicode( environmentVariable.getKey() + "=" + environmentVariable.getValue())) @@ -257,7 +257,7 @@ private void writeAction(ActionAnalysisMetadata action, PrintStream printStream) /* prettyPrintArgs= */ true, ((CommandAction) action) .getArguments().stream() - .map(a -> escapeBytestringUtf8(a)) + .map(a -> internalToEscapedUnicode(a)) .collect(toImmutableList()), /* environment= */ null, /* environmentVariablesToClear= */ null, @@ -351,24 +351,20 @@ private Map getParamFileNameToContentMap() { } /** - * Decode a bytestring that might contain UTF-8, and escape any characters outside the basic - * printable ASCII range. - * - *

This function is intended for human consumption in debug output that needs to be durable - * against unusual encoding settings, and does not guarantee that the escaping process is - * reverseable. + * Convert an internal string (see {@link com.google.devtools.build.lib.util.StringEncoding}) to a + * Unicode string with any character outside the basic printable ASCII range escaped. * *

Characters other than printable ASCII but within the Basic Multilingual Plane are formatted * with `\\uXXXX`. Characters outside the BMP are formatted as `\\UXXXXXXXX`. */ - public static String escapeBytestringUtf8(String maybeUtf8) { - if (maybeUtf8.chars().allMatch(c -> c >= 0x20 && c < 0x7F)) { - return maybeUtf8; + public static String internalToEscapedUnicode(String internal) { + if (internal.chars().allMatch(c -> c >= 0x20 && c < 0x7F)) { + return internal; } - final String decoded = reencodeInternalToExternal(maybeUtf8); - final StringBuilder sb = new StringBuilder(decoded.length() * 8); - decoded + final String unicode = internalToUnicode(internal); + final StringBuilder sb = new StringBuilder(unicode.length() * 8); + unicode .codePoints() .forEach( c -> { diff --git a/src/main/java/com/google/devtools/build/lib/remote/BUILD b/src/main/java/com/google/devtools/build/lib/remote/BUILD index cef5c7b29e203f..37e221e88dd010 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/BUILD +++ b/src/main/java/com/google/devtools/build/lib/remote/BUILD @@ -117,7 +117,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util:detailed_exit_code", "//src/main/java/com/google/devtools/build/lib/util:exit_code", "//src/main/java/com/google/devtools/build/lib/util:os", - "//src/main/java/com/google/devtools/build/lib/util:string", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/util:temp_path_generator", "//src/main/java/com/google/devtools/build/lib/util/io", "//src/main/java/com/google/devtools/build/lib/util/io:out-err", diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java index ff6012bbbfac6c..2ee998fccebfba 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java @@ -27,8 +27,8 @@ import static com.google.devtools.build.lib.remote.util.Utils.grpcAwareErrorMessage; import static com.google.devtools.build.lib.remote.util.Utils.shouldUploadLocalResultsToRemoteCache; import static com.google.devtools.build.lib.remote.util.Utils.waitForBulkTransfer; -import static com.google.devtools.build.lib.util.StringUtil.reencodeExternalToInternal; -import static com.google.devtools.build.lib.util.StringUtil.reencodeInternalToExternal; +import static com.google.devtools.build.lib.util.StringEncoding.internalToUnicode; +import static com.google.devtools.build.lib.util.StringEncoding.unicodeToInternal; import static java.util.Collections.min; import build.bazel.remote.execution.v2.Action; @@ -262,8 +262,7 @@ private Command buildCommand( if (useOutputPaths) { var outputPaths = new ArrayList(); for (ActionInput output : outputs) { - String pathString = - reencodeInternalToExternal(remotePathResolver.localPathToOutputPath(output)); + String pathString = internalToUnicode(remotePathResolver.localPathToOutputPath(output)); outputPaths.add(pathString); } Collections.sort(outputPaths); @@ -272,8 +271,7 @@ private Command buildCommand( var outputFiles = new ArrayList(); var outputDirectories = new ArrayList(); for (ActionInput output : outputs) { - String pathString = - reencodeInternalToExternal(remotePathResolver.localPathToOutputPath(output)); + String pathString = internalToUnicode(remotePathResolver.localPathToOutputPath(output)); if (output.isDirectory()) { outputDirectories.add(pathString); } else { @@ -298,15 +296,15 @@ private Command buildCommand( OS executionOs = ConstraintConstants.getOsFromConstraints(executionPlatform.constraints()); arg = OsPathPolicy.of(executionOs).postProcessPathStringForExecution(arg); } - command.addArguments(reencodeInternalToExternal(arg)); + command.addArguments(internalToUnicode(arg)); } // Sorting the environment pairs by variable name. TreeSet variables = new TreeSet<>(env.keySet()); for (String var : variables) { command .addEnvironmentVariablesBuilder() - .setName(reencodeInternalToExternal(var)) - .setValue(reencodeInternalToExternal(env.get(var))); + .setName(internalToUnicode(var)) + .setValue(internalToUnicode(env.get(var))); } return command.setWorkingDirectory(remotePathResolver.getWorkingDirectory()).build(); @@ -930,12 +928,12 @@ private ListenableFuture downloadFile( ListenableFuture future = combinedCache.downloadFile( context, - reencodeInternalToExternal(remotePathResolver.localPathToOutputPath(file.path())), + internalToUnicode(remotePathResolver.localPathToOutputPath(file.path())), tmpPath, file.digest(), new CombinedCache.DownloadProgressReporter( progressStatusListener, - reencodeInternalToExternal(remotePathResolver.localPathToOutputPath(file.path())), + internalToUnicode(remotePathResolver.localPathToOutputPath(file.path())), file.digest().getSizeBytes())); return transform(future, (d) -> file, directExecutor()); } catch (IOException e) { @@ -1175,7 +1173,7 @@ static ActionResultMetadata parseActionResultMetadata( for (OutputDirectory dir : result.getOutputDirectoriesList()) { var outputPath = dir.getPath(); dirMetadataDownloads.put( - remotePathResolver.outputPathToLocalPath(reencodeExternalToInternal(outputPath)), + remotePathResolver.outputPathToLocalPath(unicodeToInternal(outputPath)), Futures.transformAsync( combinedCache.downloadBlob(context, outputPath, dir.getTreeDigest()), (treeBytes) -> @@ -1201,8 +1199,7 @@ static ActionResultMetadata parseActionResultMetadata( ImmutableMap.Builder files = ImmutableMap.builder(); for (OutputFile outputFile : result.getOutputFilesList()) { Path localPath = - remotePathResolver.outputPathToLocalPath( - reencodeExternalToInternal(outputFile.getPath())); + remotePathResolver.outputPathToLocalPath(unicodeToInternal(outputFile.getPath())); files.put( localPath, new FileMetadata( @@ -1220,8 +1217,8 @@ static ActionResultMetadata parseActionResultMetadata( result.getOutputSymlinksList()); for (var symlink : outputSymlinks) { var localPath = - remotePathResolver.outputPathToLocalPath(reencodeExternalToInternal(symlink.getPath())); - var target = PathFragment.create(reencodeExternalToInternal(symlink.getTarget())); + remotePathResolver.outputPathToLocalPath(unicodeToInternal(symlink.getPath())); + var target = PathFragment.create(unicodeToInternal(symlink.getTarget())); var existingMetadata = symlinkMap.get(localPath); if (existingMetadata != null) { if (!target.equals(existingMetadata.target())) { @@ -1632,7 +1629,7 @@ public SpawnResult waitForAndReuseOutputs(RemoteAction action, LocalExecution pr .toList(); try { for (String output : outputPathsList) { - String reencodedOutput = reencodeExternalToInternal(output); + String reencodedOutput = unicodeToInternal(output); Path sourcePath = previousExecution.action.getRemotePathResolver().outputPathToLocalPath(reencodedOutput); ActionInput outputArtifact = previousOutputs.get(sourcePath); @@ -1938,7 +1935,7 @@ public RemoteActionResult executeRemotely( PathFragment inMemoryOutputPath = getInMemoryOutputPath(action.getSpawn()); if (inMemoryOutputPath != null) { requestBuilder.addInlineOutputFiles( - reencodeInternalToExternal( + internalToUnicode( action.getRemotePathResolver().localPathToOutputPath(inMemoryOutputPath))); } diff --git a/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java b/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java index 7de3cd96adb71e..a51c9126c59915 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java +++ b/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java @@ -21,7 +21,7 @@ import static com.google.devtools.build.lib.remote.util.RxFutures.toSingle; import static com.google.devtools.build.lib.remote.util.RxUtils.mergeBulkTransfer; import static com.google.devtools.build.lib.remote.util.RxUtils.toTransferResult; -import static com.google.devtools.build.lib.util.StringUtil.reencodeInternalToExternal; +import static com.google.devtools.build.lib.util.StringEncoding.internalToUnicode; import static java.util.Comparator.comparing; import static java.util.Comparator.naturalOrder; import static java.util.Comparator.reverseOrder; @@ -326,8 +326,8 @@ public Digest getStderrDigest() { private void addFileSymbolicLink(Path file, PathFragment target) { OutputSymlink outputSymlink = OutputSymlink.newBuilder() - .setPath(reencodeInternalToExternal(remotePathResolver.localPathToOutputPath(file))) - .setTarget(reencodeInternalToExternal(target.toString())) + .setPath(internalToUnicode(remotePathResolver.localPathToOutputPath(file))) + .setTarget(internalToUnicode(target.toString())) .build(); result.addOutputFileSymlinks(outputSymlink); result.addOutputSymlinks(outputSymlink); @@ -336,8 +336,8 @@ private void addFileSymbolicLink(Path file, PathFragment target) { private void addDirectorySymbolicLink(Path file, PathFragment target) { OutputSymlink outputSymlink = OutputSymlink.newBuilder() - .setPath(reencodeInternalToExternal(remotePathResolver.localPathToOutputPath(file))) - .setTarget(reencodeInternalToExternal(target.toString())) + .setPath(internalToUnicode(remotePathResolver.localPathToOutputPath(file))) + .setTarget(internalToUnicode(target.toString())) .build(); result.addOutputDirectorySymlinks(outputSymlink); result.addOutputSymlinks(outputSymlink); @@ -346,7 +346,7 @@ private void addDirectorySymbolicLink(Path file, PathFragment target) { private void addFile(Digest digest, Path file) { result .addOutputFilesBuilder() - .setPath(reencodeInternalToExternal(remotePathResolver.localPathToOutputPath(file))) + .setPath(internalToUnicode(remotePathResolver.localPathToOutputPath(file))) .setDigest(digest) .setIsExecutable(true); @@ -549,7 +549,7 @@ private void addDirectory(Path dir) throws ExecException, IOException, Interrupt result .addOutputDirectoriesBuilder() - .setPath(reencodeInternalToExternal(remotePathResolver.localPathToOutputPath(dir))) + .setPath(internalToUnicode(remotePathResolver.localPathToOutputPath(dir))) .setTreeDigest(treeDigest) .setIsTopologicallySorted(true); diff --git a/src/main/java/com/google/devtools/build/lib/remote/merkletree/BUILD b/src/main/java/com/google/devtools/build/lib/remote/merkletree/BUILD index f84e7973dff118..d3f4df2f51c53d 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/merkletree/BUILD +++ b/src/main/java/com/google/devtools/build/lib/remote/merkletree/BUILD @@ -22,7 +22,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/profiler", "//src/main/java/com/google/devtools/build/lib/remote:scrubber", "//src/main/java/com/google/devtools/build/lib/remote/util", - "//src/main/java/com/google/devtools/build/lib/util:string", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//third_party:guava", diff --git a/src/main/java/com/google/devtools/build/lib/remote/merkletree/MerkleTree.java b/src/main/java/com/google/devtools/build/lib/remote/merkletree/MerkleTree.java index 7cace2695d63b5..b3e965986c3ad9 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/merkletree/MerkleTree.java +++ b/src/main/java/com/google/devtools/build/lib/remote/merkletree/MerkleTree.java @@ -13,7 +13,7 @@ // limitations under the License. package com.google.devtools.build.lib.remote.merkletree; -import static com.google.devtools.build.lib.util.StringUtil.reencodeInternalToExternal; +import static com.google.devtools.build.lib.util.StringEncoding.internalToUnicode; import build.bazel.remote.execution.v2.Digest; import build.bazel.remote.execution.v2.Directory; @@ -398,7 +398,7 @@ private static MerkleTree buildMerkleTree( private static FileNode buildProto(DirectoryTree.FileNode file) { var node = FileNode.newBuilder() - .setName(reencodeInternalToExternal(file.getPathSegment())) + .setName(internalToUnicode(file.getPathSegment())) .setDigest(file.getDigest()) .setIsExecutable(file.isExecutable()); if (file.isToolInput()) { @@ -409,15 +409,15 @@ private static FileNode buildProto(DirectoryTree.FileNode file) { private static DirectoryNode buildProto(String baseName, MerkleTree dir) { return DirectoryNode.newBuilder() - .setName(reencodeInternalToExternal(baseName)) + .setName(internalToUnicode(baseName)) .setDigest(dir.getRootDigest()) .build(); } private static SymlinkNode buildProto(DirectoryTree.SymlinkNode symlink) { return SymlinkNode.newBuilder() - .setName(reencodeInternalToExternal(symlink.getPathSegment())) - .setTarget(reencodeInternalToExternal(symlink.getTarget())) + .setName(internalToUnicode(symlink.getPathSegment())) + .setTarget(internalToUnicode(symlink.getTarget())) .build(); } diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java b/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java index 8ba94f289cbaeb..80becd5609ec3b 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/CppCompileAction.java @@ -16,7 +16,6 @@ import static com.google.common.base.MoreObjects.firstNonNull; import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.devtools.build.lib.actions.ActionAnalysisMetadata.mergeMaps; -import static java.nio.charset.StandardCharsets.ISO_8859_1; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.CharMatcher; @@ -99,7 +98,6 @@ import com.google.protobuf.ByteString; import java.io.IOException; import java.io.InputStream; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -283,12 +281,7 @@ public class CppCompileAction extends AbstractAction implements IncludeScannable Preconditions.checkNotNull(additionalIncludeScanningRoots); this.compileCommandLine = buildCommandLine( - sourceFile, - coptsFilter, - actionName, - dotdFile, - featureConfiguration, - variables); + sourceFile, coptsFilter, actionName, dotdFile, featureConfiguration, variables); this.executionInfo = executionInfo; this.actionName = actionName; this.featureConfiguration = featureConfiguration; @@ -519,7 +512,7 @@ public NestedSet discoverInputs(ActionExecutionContext actionExecution "failed to generate compile command for rule '%s: %s", getOwner().getLabel(), e.getMessage()); DetailedExitCode code = createDetailedExitCode(message, Code.COMMAND_GENERATION_FAILURE); - throw new ActionExecutionException(message, this, /*catastrophe=*/ false, code); + throw new ActionExecutionException(message, this, /* catastrophe= */ false, code); } commandLineKey = computeCommandLineKey(options); ImmutableList systemIncludeDirs = getSystemIncludeDirs(options); @@ -913,10 +906,7 @@ public Sequence getStarlarkArgs() { ParamFileInfo paramFileInfo = null; if (cppConfiguration().useArgsParamsFile()) { paramFileInfo = - ParamFileInfo.builder(ParameterFileType.GCC_QUOTED) - .setCharset(ISO_8859_1) - .setUseAlways(true) - .build(); + ParamFileInfo.builder(ParameterFileType.GCC_QUOTED).setUseAlways(true).build(); } CommandLineAndParamFileInfo commandLineAndParamFileInfo = new CommandLineAndParamFileInfo(commandLine, paramFileInfo); @@ -1356,15 +1346,14 @@ public ActionResult execute(ActionExecutionContext actionExecutionContext) paramFilePath, compileCommandLine.getCompilerOptions(getOverwrittenVariables(), pathMapper), // TODO(b/132888308): Support MSVC, which has its own method of escaping strings. - ParameterFileType.GCC_QUOTED, - StandardCharsets.ISO_8859_1); + ParameterFileType.GCC_QUOTED); } catch (CommandLineExpansionException e) { String message = String.format( "failed to generate compile command for rule '%s: %s", getOwner().getLabel(), e.getMessage()); DetailedExitCode code = createDetailedExitCode(message, Code.COMMAND_GENERATION_FAILURE); - throw new ActionExecutionException(message, this, /*catastrophe=*/ false, code); + throw new ActionExecutionException(message, this, /* catastrophe= */ false, code); } } @@ -1604,7 +1593,7 @@ Spawn createSpawn(Path execRoot, Map clientEnv, PathMapper pathM "failed to generate compile command for rule '%s: %s", getOwner().getLabel(), e.getMessage()); DetailedExitCode code = createDetailedExitCode(message, Code.COMMAND_GENERATION_FAILURE); - throw new ActionExecutionException(message, this, /*catastrophe=*/ false, code); + throw new ActionExecutionException(message, this, /* catastrophe= */ false, code); } } @@ -1768,7 +1757,7 @@ public NestedSet getInputFilesForExtraAction( "failed to generate compile environment variables for rule '%s: %s", getOwner().getLabel(), e.getMessage()); DetailedExitCode code = createDetailedExitCode(message, Code.COMMAND_GENERATION_FAILURE); - throw new ActionExecutionException(message, this, /*catastrophe=*/ false, code); + throw new ActionExecutionException(message, this, /* catastrophe= */ false, code); } } diff --git a/src/main/java/com/google/devtools/build/lib/rules/java/JavaCompileAction.java b/src/main/java/com/google/devtools/build/lib/rules/java/JavaCompileAction.java index 6081e0b393447a..cd684286e93dbf 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/java/JavaCompileAction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/java/JavaCompileAction.java @@ -18,7 +18,6 @@ import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.common.flogger.LazyArgs.lazy; import static com.google.devtools.build.lib.actions.ActionAnalysisMetadata.mergeMaps; -import static java.nio.charset.StandardCharsets.ISO_8859_1; import static java.util.concurrent.TimeUnit.SECONDS; import static java.util.stream.Collectors.joining; @@ -107,10 +106,7 @@ public final class JavaCompileAction extends AbstractAction implements CommandAc private static final UUID GUID = UUID.fromString("e423747c-2827-49e6-b961-f6c08c10bb51"); private static final ParamFileInfo PARAM_FILE_INFO = - ParamFileInfo.builder(ParameterFile.ParameterFileType.UNQUOTED) - .setCharset(ISO_8859_1) - .setUseAlways(true) - .build(); + ParamFileInfo.builder(ParameterFile.ParameterFileType.UNQUOTED).setUseAlways(true).build(); enum CompilationType { JAVAC("Javac"), diff --git a/src/main/java/com/google/devtools/build/lib/rules/java/JavaHeaderCompileAction.java b/src/main/java/com/google/devtools/build/lib/rules/java/JavaHeaderCompileAction.java index 478380780fcf89..0994cdd3837a68 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/java/JavaHeaderCompileAction.java +++ b/src/main/java/com/google/devtools/build/lib/rules/java/JavaHeaderCompileAction.java @@ -20,7 +20,6 @@ import static com.google.devtools.build.lib.actions.ParameterFile.ParameterFileType.UNQUOTED; import static com.google.devtools.build.lib.packages.ExecGroup.DEFAULT_EXEC_GROUP_NAME; import static com.google.devtools.build.lib.rules.java.JavaCompileActionBuilder.UTF8_ENVIRONMENT; -import static java.nio.charset.StandardCharsets.ISO_8859_1; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -173,8 +172,7 @@ public static Builder newBuilder(RuleContext ruleContext) { /** Builder for {@link JavaHeaderCompileAction}. */ public static final class Builder { - private static final ParamFileInfo PARAM_FILE_INFO = - ParamFileInfo.builder(UNQUOTED).setCharset(ISO_8859_1).build(); + private static final ParamFileInfo PARAM_FILE_INFO = ParamFileInfo.builder(UNQUOTED).build(); private final RuleContext ruleContext; diff --git a/src/main/java/com/google/devtools/build/lib/runtime/BlazeRuntime.java b/src/main/java/com/google/devtools/build/lib/runtime/BlazeRuntime.java index bc922cd1eb27e2..25d5111554c0a7 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/BlazeRuntime.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/BlazeRuntime.java @@ -17,6 +17,7 @@ import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.devtools.build.lib.util.DetailedExitCode.DetailedExitCodeComparator.chooseMoreImportantWithFirstIfTie; +import static java.nio.charset.StandardCharsets.ISO_8859_1; import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.annotations.VisibleForTesting; @@ -95,6 +96,7 @@ import com.google.devtools.build.lib.util.LoggingUtil; import com.google.devtools.build.lib.util.Pair; import com.google.devtools.build.lib.util.ProcessUtils; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.util.TestType; import com.google.devtools.build.lib.util.ThreadUtils; import com.google.devtools.build.lib.util.io.OutErr; @@ -116,12 +118,12 @@ import com.google.devtools.common.options.OptionsProvider; import com.google.devtools.common.options.TriState; import com.google.errorprone.annotations.CanIgnoreReturnValue; +import com.google.protobuf.ByteString; import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.io.PrintStream; import java.lang.reflect.Type; -import java.nio.charset.StandardCharsets; import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; @@ -841,6 +843,8 @@ public BuildOptions createBuildOptions(OptionsProvider optionsProvider) { */ @SuppressWarnings("SystemExitOutsideMain") public static void main(Iterable> moduleClasses, String[] args) { + // Transform args into Bazel's internal string representation. + args = Arrays.stream(args).map(StringEncoding::platformToInternal).toArray(String[]::new); setupUncaughtHandlerAtStartup(args); List modules = createModules(moduleClasses); // blaze.cc will put --batch first if the user set it. @@ -1063,23 +1067,29 @@ private static int batchMain(Iterable modules, String[] args) { shutdownDone = true; signalHandler.uninstall(); ExecRequest request = result.getExecRequest(); + String[] argv = new String[request.getArgvCount()]; for (int i = 0; i < argv.length; i++) { - argv[i] = request.getArgv(i).toString(StandardCharsets.ISO_8859_1); + argv[i] = internalBytesToPlatformString(request.getArgv(i)); } - - String workingDirectory = request.getWorkingDirectory().toString(StandardCharsets.ISO_8859_1); + String workingDirectory = internalBytesToPlatformString(request.getWorkingDirectory()); try { ProcessBuilder process = new ProcessBuilder().command(argv).directory(new File(workingDirectory)).inheritIO(); + for (int i = 0; i < request.getEnvironmentVariableToClearCount(); i++) { + process + .environment() + .remove(internalBytesToPlatformString(request.getEnvironmentVariableToClear(i))); + } + for (int i = 0; i < request.getEnvironmentVariableCount(); i++) { EnvironmentVariable variable = request.getEnvironmentVariable(i); process .environment() .put( - variable.getName().toString(StandardCharsets.ISO_8859_1), - variable.getValue().toString(StandardCharsets.ISO_8859_1)); + internalBytesToPlatformString(variable.getName()), + internalBytesToPlatformString(variable.getValue())); } return process.start().waitFor(); @@ -1795,4 +1805,8 @@ private static int readPidFile(Path pidFile) throws AbruptExitException { new IOException(e)); } } + + private static String internalBytesToPlatformString(ByteString bytes) { + return StringEncoding.internalToPlatform(bytes.toString(ISO_8859_1)); + } } diff --git a/src/main/java/com/google/devtools/build/lib/runtime/commands/info/BUILD b/src/main/java/com/google/devtools/build/lib/runtime/commands/info/BUILD index 9d98d7740179a8..38a050635262be 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/commands/info/BUILD +++ b/src/main/java/com/google/devtools/build/lib/runtime/commands/info/BUILD @@ -33,6 +33,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util:detailed_exit_code", "//src/main/java/com/google/devtools/build/lib/util:heap_offset_helper", "//src/main/java/com/google/devtools/build/lib/util:string", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/worker:worker_process_metrics", "//src/main/java/com/google/devtools/build/skyframe", diff --git a/src/main/java/com/google/devtools/build/lib/runtime/commands/info/JavaHomeInfoItem.java b/src/main/java/com/google/devtools/build/lib/runtime/commands/info/JavaHomeInfoItem.java index 7a147b7e32c0a7..45443f355ab0e5 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/commands/info/JavaHomeInfoItem.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/commands/info/JavaHomeInfoItem.java @@ -18,6 +18,7 @@ import com.google.devtools.build.lib.analysis.config.BuildConfigurationValue; import com.google.devtools.build.lib.runtime.CommandEnvironment; import com.google.devtools.build.lib.runtime.InfoItem; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.Path; /** Info item for the location of the Java runtime. */ @@ -29,7 +30,7 @@ public JavaHomeInfoItem() { @Override public byte[] get( Supplier configurationSupplier, CommandEnvironment env) { - String javaHome = System.getProperty("java.home"); + String javaHome = StringEncoding.platformToInternal(System.getProperty("java.home")); if (javaHome == null) { return print("unknown"); } diff --git a/src/main/java/com/google/devtools/build/lib/sandbox/BUILD b/src/main/java/com/google/devtools/build/lib/sandbox/BUILD index 8e91491513c643..38364120b0aa97 100644 --- a/src/main/java/com/google/devtools/build/lib/sandbox/BUILD +++ b/src/main/java/com/google/devtools/build/lib/sandbox/BUILD @@ -285,6 +285,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/exec/local", "//src/main/java/com/google/devtools/build/lib/shell", "//src/main/java/com/google/devtools/build/lib/util:os", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//third_party:guava", diff --git a/src/main/java/com/google/devtools/build/lib/sandbox/DarwinSandboxedSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/sandbox/DarwinSandboxedSpawnRunner.java index ef49aeeabee595..70d69d0fb39116 100644 --- a/src/main/java/com/google/devtools/build/lib/sandbox/DarwinSandboxedSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/sandbox/DarwinSandboxedSpawnRunner.java @@ -34,6 +34,7 @@ import com.google.devtools.build.lib.shell.CommandException; import com.google.devtools.build.lib.shell.CommandResult; import com.google.devtools.build.lib.util.OS; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.FileSystem; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; @@ -171,7 +172,7 @@ private static ImmutableSet getAlwaysWritableDirs(FileSystem fs) // {@link AbstractSandboxSpawnRunner#getWritableDirs}. // ~/Library/Caches and ~/Library/Logs need to be writable (cf. issue #2231). - Path homeDir = fs.getPath(System.getProperty("user.home")); + Path homeDir = fs.getPath(StringEncoding.platformToInternal(System.getProperty("user.home"))); addPathToSetIfExists(writableDirs, homeDir.getRelative("Library/Caches")); addPathToSetIfExists(writableDirs, homeDir.getRelative("Library/Logs")); diff --git a/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java b/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java index 03ad8b8f36aeaf..f97e88ffb78391 100644 --- a/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java +++ b/src/main/java/com/google/devtools/build/lib/server/GrpcServerImpl.java @@ -14,6 +14,8 @@ package com.google.devtools.build.lib.server; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Strings; @@ -138,7 +140,6 @@ public static GrpcServerImpl create( slowInterruptMessageSuffix); } - @VisibleForTesting enum StreamType { STDOUT, @@ -565,11 +566,10 @@ private void executeCommand(RunRequest request, BlockingStreamObserver> startupOptions = ImmutableList.builder(); for (StartupOption option : request.getStartupOptionsList()) { // UTF-8 won't do because we want to be able to pass arbitrary binary strings. - // Not that the internals of Bazel handle that correctly, but why not make at least this - // little part correct? - startupOptions.add(new Pair<>( - option.getSource().toString(StandardCharsets.ISO_8859_1), - option.getOption().toString(StandardCharsets.ISO_8859_1))); + startupOptions.add( + new Pair<>( + platformBytesToInternalString(option.getSource()), + platformBytesToInternalString(option.getOption()))); } commandManager.preemptEligibleCommands(); @@ -595,12 +595,11 @@ private void executeCommand(RunRequest request, BlockingStreamObserver args = request.getArgList().stream() - .map(arg -> arg.toString(StandardCharsets.ISO_8859_1)) - .collect(ImmutableList.toImmutableList()); + // Transform args into Bazel's internal string representation. + ImmutableList args = + request.getArgList().stream() + .map(GrpcServerImpl::platformBytesToInternalString) + .collect(ImmutableList.toImmutableList()); InvocationPolicy policy = InvocationPolicyParser.parsePolicy(request.getInvocationPolicy()); logger.atInfo().log("%s", SafeRequestLogging.getRequestLogString(args)); @@ -746,4 +745,8 @@ private static FailureDetail createFailureDetail(String message, GrpcServer.Code .setGrpcServer(GrpcServer.newBuilder().setCode(detailedCode)) .build(); } + + private static String platformBytesToInternalString(ByteString bytes) { + return bytes.toString(ISO_8859_1); + } } diff --git a/src/main/java/com/google/devtools/build/lib/shell/BUILD b/src/main/java/com/google/devtools/build/lib/shell/BUILD index a6bd924ba0cbb7..bde1611db6dcb7 100644 --- a/src/main/java/com/google/devtools/build/lib/shell/BUILD +++ b/src/main/java/com/google/devtools/build/lib/shell/BUILD @@ -20,7 +20,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/jni", "//src/main/java/com/google/devtools/build/lib/util:describable_execution_unit", "//src/main/java/com/google/devtools/build/lib/util:os", - "//src/main/java/com/google/devtools/build/lib/util:string", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//src/main/java/com/google/devtools/build/lib/windows:processes", @@ -47,6 +47,7 @@ bootstrap_java_library( "//src/main/java/com/google/devtools/build/lib/util:filetype", "//src/main/java/com/google/devtools/build/lib/util:os", "//src/main/java/com/google/devtools/build/lib/util:string", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//src/main/java/com/google/devtools/build/lib/windows:processes", "//third_party:auto_value-jars", diff --git a/src/main/java/com/google/devtools/build/lib/shell/JavaSubprocessFactory.java b/src/main/java/com/google/devtools/build/lib/shell/JavaSubprocessFactory.java index 7f93c64fc0b6a3..e3fd943b557c6c 100644 --- a/src/main/java/com/google/devtools/build/lib/shell/JavaSubprocessFactory.java +++ b/src/main/java/com/google/devtools/build/lib/shell/JavaSubprocessFactory.java @@ -16,14 +16,12 @@ import com.google.common.collect.Lists; import com.google.devtools.build.lib.shell.SubprocessBuilder.StreamAction; -import com.google.devtools.build.lib.util.StringUtil; +import com.google.devtools.build.lib.util.StringEncoding; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.lang.ProcessBuilder.Redirect; -import java.util.List; -import java.util.Objects; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; @@ -158,33 +156,18 @@ private synchronized Process start(ProcessBuilder builder) throws IOException { @Override public Subprocess create(SubprocessBuilder params) throws IOException { ProcessBuilder builder = new ProcessBuilder(); - // On JDK 19 and newer, java.lang.ProcessImpl#start encodes argv and the environment using - // sun.jnu.encoding, so if sun.jnu.encoding is set to UTF-8, our argv needs to be UTF-8. (Note - // that on some platforms, for example on macOS, sun.jnu.encoding is hard-coded in the JVM as - // UTF-8.) - boolean reencodeToUtf8 = - Runtime.version().feature() >= 19 - && Objects.equals(System.getProperty("sun.jnu.encoding"), "UTF-8"); - List argv = params.getArgv(); - if (reencodeToUtf8) { - argv = Lists.transform(argv, StringUtil::reencodeInternalToExternal); - } - builder.command(argv); + builder.command(Lists.transform(params.getArgv(), StringEncoding::internalToPlatform)); if (params.getEnv() != null) { builder.environment().clear(); - if (reencodeToUtf8) { - params - .getEnv() - .forEach( - (key, value) -> - builder - .environment() - .put( - StringUtil.reencodeInternalToExternal(key), - StringUtil.reencodeInternalToExternal(value))); - } else { - builder.environment().putAll(params.getEnv()); - } + params + .getEnv() + .forEach( + (key, value) -> + builder + .environment() + .put( + StringEncoding.internalToPlatform(key), + StringEncoding.internalToPlatform(value))); } builder.redirectOutput(getRedirect(params.getStdout(), params.getStdoutFile())); diff --git a/src/main/java/com/google/devtools/build/lib/shell/WindowsSubprocessFactory.java b/src/main/java/com/google/devtools/build/lib/shell/WindowsSubprocessFactory.java index d55ec5eca101ec..fe65683cdf0dc6 100644 --- a/src/main/java/com/google/devtools/build/lib/shell/WindowsSubprocessFactory.java +++ b/src/main/java/com/google/devtools/build/lib/shell/WindowsSubprocessFactory.java @@ -15,7 +15,9 @@ package com.google.devtools.build.lib.shell; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; import com.google.devtools.build.lib.shell.SubprocessBuilder.StreamAction; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.lib.windows.WindowsProcesses; import java.io.File; @@ -32,7 +34,7 @@ public class WindowsSubprocessFactory implements SubprocessFactory { @Override public Subprocess create(SubprocessBuilder builder) throws IOException { - List argv = builder.getArgv(); + List argv = Lists.transform(builder.getArgv(), StringEncoding::internalToPlatform); // DO NOT quote argv0, createProcess will do it for us. String argv0 = processArgv0(argv.get(0)); @@ -155,7 +157,11 @@ private static byte[] convertEnvToNative(Map envMap) { // be an error, we have ignore them here. continue; } - result.append(entry.getKey() + "=" + entry.getValue() + "\0"); + result + .append(StringEncoding.internalToPlatform(entry.getKey())) + .append("=") + .append(StringEncoding.internalToPlatform(entry.getValue())) + .append("\0"); } result.append("\0"); diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/BUILD b/src/main/java/com/google/devtools/build/lib/skyframe/BUILD index b8d012d3f20e06..8395fb3f4af978 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/BUILD +++ b/src/main/java/com/google/devtools/build/lib/skyframe/BUILD @@ -1741,6 +1741,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/cmdline", "//src/main/java/com/google/devtools/build/lib/jni", "//src/main/java/com/google/devtools/build/lib/util:os", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//src/main/java/com/google/devtools/common/options", diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/LocalDiffAwareness.java b/src/main/java/com/google/devtools/build/lib/skyframe/LocalDiffAwareness.java index b4e472971d811d..0332accc8c807e 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/LocalDiffAwareness.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/LocalDiffAwareness.java @@ -20,6 +20,7 @@ import com.google.common.collect.ImmutableSet; import com.google.devtools.build.lib.cmdline.IgnoredSubdirectories; import com.google.devtools.build.lib.util.OS; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.ModifiedFileSet; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.lib.vfs.Root; @@ -29,7 +30,6 @@ import com.google.devtools.common.options.OptionsBase; import com.google.devtools.common.options.OptionsProvider; import java.io.IOException; -import java.nio.file.FileSystems; import java.nio.file.Path; import java.util.Set; import javax.annotation.Nullable; @@ -104,12 +104,14 @@ public DiffAwareness maybeCreate( return null; } } + Path watchRoot = + Path.of(StringEncoding.internalToPlatform(resolvedPathEntryFragment.getPathString())); // On OSX uses FsEvents due to https://bugs.openjdk.java.net/browse/JDK-7133447 if (OS.getCurrent() == OS.DARWIN) { - return new MacOSXFsEventsDiffAwareness(resolvedPathEntryFragment.toString()); + return new MacOSXFsEventsDiffAwareness(watchRoot); } - return new WatchServiceDiffAwareness(resolvedPathEntryFragment.toString(), ignoredPaths); + return new WatchServiceDiffAwareness(watchRoot, ignoredPaths); } } @@ -132,10 +134,10 @@ public static boolean areInSequence(SequentialView oldView, SequentialView newVi private int numGetCurrentViewCalls = 0; /** Root directory to watch. This is an absolute path. */ - protected final Path watchRootPath; + protected final Path watchRoot; - protected LocalDiffAwareness(String watchRoot) { - this.watchRootPath = FileSystems.getDefault().getPath(watchRoot); + protected LocalDiffAwareness(Path watchRoot) { + this.watchRoot = watchRoot; } /** @@ -198,12 +200,13 @@ public ModifiedFileSet getDiff(@Nullable View oldView, View newView) ModifiedFileSet.Builder resultBuilder = ModifiedFileSet.builder(); for (Path modifiedPath : newSequentialView.modifiedAbsolutePaths) { - if (!modifiedPath.startsWith(watchRootPath)) { + if (!modifiedPath.startsWith(watchRoot)) { throw new BrokenDiffAwarenessException( - String.format("%s is not under %s", modifiedPath, watchRootPath)); + String.format("%s is not under %s", modifiedPath, watchRoot)); } PathFragment relativePath = - PathFragment.create(watchRootPath.relativize(modifiedPath).toString()); + PathFragment.create( + StringEncoding.platformToInternal(watchRoot.relativize(modifiedPath).toString())); if (!relativePath.isEmpty()) { resultBuilder.modify(relativePath); } diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/MacOSXFsEventsDiffAwareness.java b/src/main/java/com/google/devtools/build/lib/skyframe/MacOSXFsEventsDiffAwareness.java index d8c5d175d735d7..5e4c7dd6be50de 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/MacOSXFsEventsDiffAwareness.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/MacOSXFsEventsDiffAwareness.java @@ -14,6 +14,8 @@ package com.google.devtools.build.lib.skyframe; +import static java.nio.charset.StandardCharsets.UTF_8; + import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSet; import com.google.devtools.build.lib.jni.JniLoader; @@ -41,25 +43,24 @@ public final class MacOSXFsEventsDiffAwareness extends LocalDiffAwareness { private boolean opened; /** - * Watch changes on the file system under watchRoot with a granularity of - * delay seconds. + * Watch changes on the file system under watchRoot with a granularity of delay + * seconds. */ - MacOSXFsEventsDiffAwareness(String watchRoot, double latency) { + MacOSXFsEventsDiffAwareness(Path watchRoot, double latency) { super(watchRoot); this.latency = latency; } - /** - * Watch changes on the file system under watchRoot with a granularity of 5ms. - */ - MacOSXFsEventsDiffAwareness(String watchRoot) { + /** Watch changes on the file system under watchRoot with a granularity of 5ms. */ + MacOSXFsEventsDiffAwareness(Path watchRoot) { this(watchRoot, 0.005); } /** - * Helper function to start the watch of paths, called by the constructor. + * Helper function to start the watch of paths, which is expected to be an array of + * byte arrays containing the UTF-8 bytes of the paths to watch, called by the constructor. */ - private native void create(String[] paths, double latency); + private native void create(byte[][] paths, double latency); /** * Runs the main loop to listen for fsevents. @@ -76,7 +77,7 @@ private void init() { // TODO(jmmv): This can break if the user interrupts as anywhere in this function. Preconditions.checkState(!opened); opened = true; - create(new String[] {watchRootPath.toAbsolutePath().toString()}, latency); + create(new byte[][] {watchRoot.toAbsolutePath().toString().getBytes(UTF_8)}, latency); // Start a thread that just contains the OS X run loop. CountDownLatch listening = new CountDownLatch(1); @@ -110,10 +111,10 @@ public void close() { /** * JNI code returning the list of absolute path modified since last call. * - * @return the list of paths modified since the last call, or null if we can't precisely tell what - * changed + * @return the array of paths (in the form of byte arrays containing the UTF-8 representation) + * modified since the last call, or null if we can't precisely tell what changed */ - private native String[] poll(); + private native byte[][] poll(); static { boolean loadJniWorked = false; @@ -147,13 +148,13 @@ public View getCurrentView(OptionsProvider options) return EVERYTHING_MODIFIED; } Preconditions.checkState(!closed); - String[] polledPaths = poll(); + byte[][] polledPaths = poll(); if (polledPaths == null) { return EVERYTHING_MODIFIED; } else { ImmutableSet.Builder paths = ImmutableSet.builder(); - for (String path : polledPaths) { - paths.add(Paths.get(path)); + for (byte[] pathBytes : polledPaths) { + paths.add(Paths.get(new String(pathBytes, UTF_8))); } return newView(paths.build()); } diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/WatchServiceDiffAwareness.java b/src/main/java/com/google/devtools/build/lib/skyframe/WatchServiceDiffAwareness.java index 8e4d1ba0a832b7..9e56401e7c83d2 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/WatchServiceDiffAwareness.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/WatchServiceDiffAwareness.java @@ -24,7 +24,6 @@ import com.google.devtools.common.options.OptionsProvider; import java.io.IOException; import java.nio.file.ClosedWatchServiceException; -import java.nio.file.FileSystems; import java.nio.file.FileVisitResult; import java.nio.file.Files; import java.nio.file.LinkOption; @@ -57,7 +56,7 @@ public final class WatchServiceDiffAwareness extends LocalDiffAwareness { private final IgnoredSubdirectories ignoredPaths; - WatchServiceDiffAwareness(String watchRoot, IgnoredSubdirectories ignoredPaths) { + WatchServiceDiffAwareness(Path watchRoot, IgnoredSubdirectories ignoredPaths) { super(watchRoot); this.ignoredPaths = ignoredPaths; } @@ -65,7 +64,7 @@ public final class WatchServiceDiffAwareness extends LocalDiffAwareness { private void init() { Preconditions.checkState(watchService == null); try { - watchService = FileSystems.getDefault().newWatchService(); + watchService = watchRoot.getFileSystem().newWatchService(); } catch (IOException ignored) { // According to the docs, this can never happen with the default file system provider. } @@ -125,7 +124,7 @@ public View getCurrentView(OptionsProvider options) throws BrokenDiffAwarenessEx Set modifiedAbsolutePaths; if (isFirstCall()) { try { - registerSubDirectories(watchRootPath); + registerSubDirectories(watchRoot); } catch (IOException e) { close(); throw new BrokenDiffAwarenessException( @@ -243,7 +242,7 @@ private Set collectChanges() throws BrokenDiffAwarenessException, IOExcept } if (watchKeyToDirBiMap.isEmpty()) { // No more directories to watch, something happened the root directory being watched. - throw new IOException("Root directory " + watchRootPath + " became inaccessible."); + throw new IOException("Root directory " + watchRoot + " became inaccessible."); } Set changedPaths = new HashSet<>(); diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/UnsafeStringCodec.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/UnsafeStringCodec.java index 6af65f15b584ca..d7984df74ba123 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/UnsafeStringCodec.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/UnsafeStringCodec.java @@ -23,7 +23,6 @@ import com.google.protobuf.CodedInputStream; import com.google.protobuf.CodedOutputStream; import java.io.IOException; -import java.util.Arrays; /** * A high-performance {@link ObjectCodec} for {@link String} objects specialized for Strings in @@ -39,7 +38,7 @@ public final class UnsafeStringCodec extends LeafObjectCodec { */ private static final UnsafeStringCodec INSTANCE = new UnsafeStringCodec(); - private final StringUnsafe stringUnsafe = StringUnsafe.getInstance(); + private static final StringUnsafe STRING_UNSAFE = StringUnsafe.getInstance(); public static UnsafeStringCodec stringCodec() { return INSTANCE; @@ -53,8 +52,8 @@ public Class getEncodedClass() { @Override public void serialize(LeafSerializationContext context, String obj, CodedOutputStream codedOut) throws SerializationException, IOException { - byte coder = stringUnsafe.getCoder(obj); - byte[] value = stringUnsafe.getByteArray(obj); + byte coder = STRING_UNSAFE.getCoder(obj); + byte[] value = STRING_UNSAFE.getByteArray(obj); // Optimize for the case that coder == 0, in which case we can just write the length here, // potentially using just one byte. If coder != 0, we'll use 4 bytes, but that's vanishingly // rare. @@ -80,11 +79,6 @@ public String deserialize(LeafDeserializationContext context, CodedInputStream c length = -length; } byte[] value = codedIn.readRawBytes(length); - try { - return stringUnsafe.newInstance(value, coder); - } catch (ReflectiveOperationException e) { - throw new SerializationException( - "Could not instantiate string: " + Arrays.toString(value) + ", " + coder, e); - } + return STRING_UNSAFE.newInstance(value, coder); } } diff --git a/src/main/java/com/google/devtools/build/lib/unix/BUILD b/src/main/java/com/google/devtools/build/lib/unix/BUILD index ef10bd5464aae9..01c02f601588fe 100644 --- a/src/main/java/com/google/devtools/build/lib/unix/BUILD +++ b/src/main/java/com/google/devtools/build/lib/unix/BUILD @@ -23,9 +23,9 @@ java_library( "//src/main/java/com/google/devtools/build/lib/profiler", "//src/main/java/com/google/devtools/build/lib/util:blocker", "//src/main/java/com/google/devtools/build/lib/util:os", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", - "//third_party:flogger", "//third_party:guava", "//third_party:jsr305", ], diff --git a/src/main/java/com/google/devtools/build/lib/unix/NativePosixFiles.java b/src/main/java/com/google/devtools/build/lib/unix/NativePosixFiles.java index 6f584505ff8c0f..40aa88eb3dcc93 100644 --- a/src/main/java/com/google/devtools/build/lib/unix/NativePosixFiles.java +++ b/src/main/java/com/google/devtools/build/lib/unix/NativePosixFiles.java @@ -15,12 +15,10 @@ package com.google.devtools.build.lib.unix; import com.google.common.annotations.VisibleForTesting; -import com.google.common.flogger.GoogleLogger; import com.google.devtools.build.lib.jni.JniLoader; import com.google.devtools.build.lib.util.Blocker; import java.io.FileNotFoundException; import java.io.IOException; -import java.util.logging.LogManager; /** * Utility methods for access to UNIX filesystem calls not exposed by the Java @@ -31,23 +29,6 @@ public final class NativePosixFiles { private NativePosixFiles() {} static { - if (!java.nio.charset.Charset.defaultCharset().name().equals("ISO-8859-1")) { - // Defer the Logger call, so we don't deadlock if this is called from Logger - // initialization code. - new Thread( - () -> { - // wait (if necessary) until the logging system is initialized - synchronized (LogManager.getLogManager()) { - } - @SuppressWarnings("FloggerRequiredModifiers") - GoogleLogger logger = GoogleLogger.forEnclosingClass(); - logger.atFine().log( - "WARNING: Default character set is not latin1; java.io.File and" - + " com.google.devtools.build.lib.unix.FilesystemUtils will represent" - + " some filenames differently."); - }) - .start(); - } JniLoader.loadJni(); initJNIClasses(); } diff --git a/src/main/java/com/google/devtools/build/lib/unix/UnixFileSystem.java b/src/main/java/com/google/devtools/build/lib/unix/UnixFileSystem.java index 173804917d8b41..6110d53204219e 100644 --- a/src/main/java/com/google/devtools/build/lib/unix/UnixFileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/unix/UnixFileSystem.java @@ -22,6 +22,7 @@ import com.google.devtools.build.lib.unix.NativePosixFiles.Dirents; import com.google.devtools.build.lib.unix.NativePosixFiles.ReadTypes; import com.google.devtools.build.lib.util.Blocker; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.AbstractFileSystemWithCustomStat; import com.google.devtools.build.lib.vfs.DigestHashFunction; import com.google.devtools.build.lib.vfs.Dirent; @@ -34,7 +35,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; import java.util.Arrays; import java.util.Collection; import java.util.List; @@ -525,27 +526,19 @@ protected void deleteTreesBelow(PathFragment dir) throws IOException { } } - private static File createJavaIoFile(PathFragment path) { - final String pathStr = path.getPathString(); - if (pathStr.chars().allMatch(c -> c < 128)) { - return new File(pathStr); - } - - // Paths returned from NativePosixFiles are Strings containing raw bytes from the filesystem. - // Java's IO subsystem expects paths to be encoded per the `sun.jnu.encoding` setting. This - // is difficult to handle generically, but we can special-case the most common case (UTF-8). - if ("UTF-8".equals(System.getProperty("sun.jnu.encoding"))) { - final byte[] pathBytes = pathStr.getBytes(StandardCharsets.ISO_8859_1); - return new File(new String(pathBytes, StandardCharsets.UTF_8)); - } + @Override + protected File getIoFile(PathFragment path) { + return new File(StringEncoding.internalToPlatform(path.getPathString())); + } - // This will probably fail but not much that can be done without migrating to `java.nio.Files`. - return new File(pathStr); + @Override + protected java.nio.file.Path getNioPath(PathFragment path) { + return Paths.get(StringEncoding.internalToPlatform(path.getPathString())); } @Override protected InputStream createFileInputStream(PathFragment path) throws IOException { - return new FileInputStream(createJavaIoFile(path)); + return new FileInputStream(StringEncoding.internalToPlatform(path.getPathString())); } protected OutputStream createFileOutputStream(PathFragment path, boolean append) @@ -636,7 +629,7 @@ public synchronized void write(byte[] b, int off, int len) throws IOException { private static final class ProfiledNativeFileOutputStream extends NativeFileOutputStream { private final String name; - public ProfiledNativeFileOutputStream(int fd, String name) throws FileNotFoundException { + public ProfiledNativeFileOutputStream(int fd, String name) { super(fd); this.name = name; } diff --git a/src/main/java/com/google/devtools/build/lib/unsafe/BUILD b/src/main/java/com/google/devtools/build/lib/unsafe/BUILD index 2790b744f56410..c5bf8c4cbc2414 100644 --- a/src/main/java/com/google/devtools/build/lib/unsafe/BUILD +++ b/src/main/java/com/google/devtools/build/lib/unsafe/BUILD @@ -22,5 +22,8 @@ java_library( add_opens = [ "java.base/java.lang", ], - deps = [":unsafe-provider"], + deps = [ + ":unsafe-provider", + "//third_party:guava", + ], ) diff --git a/src/main/java/com/google/devtools/build/lib/unsafe/StringUnsafe.java b/src/main/java/com/google/devtools/build/lib/unsafe/StringUnsafe.java index 2db411d53d55d8..fd7eb4afb8e7a8 100644 --- a/src/main/java/com/google/devtools/build/lib/unsafe/StringUnsafe.java +++ b/src/main/java/com/google/devtools/build/lib/unsafe/StringUnsafe.java @@ -15,8 +15,12 @@ import static com.google.devtools.build.lib.unsafe.UnsafeProvider.unsafe; +import com.google.common.base.Ascii; +import java.lang.invoke.MethodHandle; +import java.lang.invoke.MethodHandles; import java.lang.reflect.Constructor; import java.lang.reflect.Field; +import java.lang.reflect.Method; import java.util.Arrays; /** @@ -35,6 +39,20 @@ public final class StringUnsafe { private static final StringUnsafe INSTANCE = new StringUnsafe(); + private static final MethodHandle HAS_NEGATIVES; + + static { + try { + Class stringCoding = Class.forName("java.lang.StringCoding"); + Method hasNegatives = + stringCoding.getDeclaredMethod("hasNegatives", byte[].class, int.class, int.class); + hasNegatives.setAccessible(true); + HAS_NEGATIVES = MethodHandles.lookup().unreflect(hasNegatives); + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException e) { + throw new IllegalStateException(e); + } + } + private final Constructor constructor; private final long valueOffset; private final long coderOffset; @@ -78,8 +96,60 @@ public byte[] getByteArray(String obj) { return (byte[]) unsafe().getObject(obj, valueOffset); } - /** Constructs a new string from a byte array and coder. */ - public String newInstance(byte[] bytes, byte coder) throws ReflectiveOperationException { - return constructor.newInstance(bytes, coder); + /** + * Return the internal byte array of a String using Bazel's internal encoding (see {@link + * com.google.devtools.build.lib.util.StringEncoding}). + * + *

Use of this is unsafe. The representation may change from one JDK version to the next. + * Ensure you do not mutate this byte array in any way. + */ + public byte[] getInternalStringBytes(String obj) { + // This is both a performance optimization and a correctness check: internal strings must + // always be coded in Latin-1, otherwise they have been constructed out of a non-ASCII string + // that hasn't been converted to internal encoding. + if (getCoder(obj) != LATIN1) { + // Truncation is ASCII only and thus doesn't change the encoding. + String truncatedString = Ascii.truncate(obj, 1000, "..."); + throw new IllegalArgumentException( + "Expected internal string with Latin-1 coder, got: %s (%s)" + .formatted(truncatedString, Arrays.toString(getByteArray(truncatedString)))); + } + return getByteArray(obj); + } + + /** Returns whether the string is ASCII-only. */ + public boolean isAscii(String obj) { + // This implementation uses java.lang.StringCoding#hasNegatives, which is implemented as a JVM + // intrinsic. On a machine with 512-bit SIMD registers, this is 5x as fast as a naive loop + // over getByteArray(obj), which in turn is 5x as fast as obj.chars().anyMatch(c -> c > 0x7F) in + // a JMH benchmark. + + if (getCoder(obj) != LATIN1) { + // Latin-1 is a superset of ASCII, so we must have non-ASCII characters. + return false; + } + byte[] bytes = getByteArray(obj); + try { + return !(boolean) HAS_NEGATIVES.invokeExact(bytes, 0, bytes.length); + } catch (Throwable t) { + // hasNegatives doesn't throw. + throw new IllegalStateException(t); + } + } + + /** + * Constructs a new string from a byte array and coder. + * + *

The new string shares the byte array instance, which must not be modified after calling this + * method. + */ + public String newInstance(byte[] bytes, byte coder) { + try { + return constructor.newInstance(bytes, coder); + } catch (ReflectiveOperationException e) { + // The constructor never throws and has been made accessible, so this is not expected. + throw new IllegalStateException( + "Could not instantiate string: " + Arrays.toString(bytes) + ", " + coder, e); + } } } diff --git a/src/main/java/com/google/devtools/build/lib/util/BUILD b/src/main/java/com/google/devtools/build/lib/util/BUILD index 71246af4dade18..0ef5ea4fa72e7e 100644 --- a/src/main/java/com/google/devtools/build/lib/util/BUILD +++ b/src/main/java/com/google/devtools/build/lib/util/BUILD @@ -46,6 +46,7 @@ java_library( name = "file_system_lock", srcs = ["FileSystemLock.java"], deps = [ + ":string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//third_party:guava", ], @@ -317,6 +318,16 @@ java_library( ], ) +java_library( + name = "string_encoding", + srcs = ["StringEncoding.java"], + deps = [ + ":os", + "//src/main/java/com/google/devtools/build/lib/unsafe:string", + "//third_party:guava", + ], +) + java_library( name = "string", srcs = [ diff --git a/src/main/java/com/google/devtools/build/lib/util/FileSystemLock.java b/src/main/java/com/google/devtools/build/lib/util/FileSystemLock.java index c5023cd90559d2..96817e1bd1a4f2 100644 --- a/src/main/java/com/google/devtools/build/lib/util/FileSystemLock.java +++ b/src/main/java/com/google/devtools/build/lib/util/FileSystemLock.java @@ -13,14 +13,12 @@ // limitations under the License. package com.google.devtools.build.lib.util; -import static java.nio.charset.StandardCharsets.ISO_8859_1; import com.google.common.annotations.VisibleForTesting; import com.google.devtools.build.lib.vfs.Path; import java.io.IOException; import java.nio.channels.FileChannel; import java.nio.channels.FileLock; -import java.nio.charset.Charset; import java.nio.file.StandardOpenOption; /** @@ -62,7 +60,7 @@ private static FileSystemLock get(Path path, boolean shared) throws IOException FileChannel channel = FileChannel.open( // Correctly handle non-ASCII paths by converting from the internal string encoding. - java.nio.file.Path.of(getPathStringForJavaIo(path)), + java.nio.file.Path.of(StringEncoding.internalToPlatform(path.getPathString())), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); @@ -75,12 +73,6 @@ private static FileSystemLock get(Path path, boolean shared) throws IOException return new FileSystemLock(channel, lock); } - private static String getPathStringForJavaIo(Path path) { - return new String( - path.getPathString().getBytes(ISO_8859_1), - Charset.forName(System.getProperty("sun.jnu.encoding"), ISO_8859_1)); - } - @VisibleForTesting boolean isShared() { return lock.isShared(); diff --git a/src/main/java/com/google/devtools/build/lib/util/StringEncoding.java b/src/main/java/com/google/devtools/build/lib/util/StringEncoding.java new file mode 100644 index 00000000000000..aeea4784ef807a --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/util/StringEncoding.java @@ -0,0 +1,177 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.util; + +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; + +import com.google.common.base.Preconditions; +import com.google.devtools.build.lib.unsafe.StringUnsafe; +import java.lang.reflect.Field; +import java.nio.charset.Charset; + +/** + * Utility functions for reencoding strings between Bazel's internal raw byte encoding and regular + * Java strings. + * + *

Bazel needs to support the following two setups: + * + *

    + *
  • File paths, command-line arguments, environment variables, BUILD and .bzl files are all + * encoded in UTF-8, on Linux, macOS or Windows. + *
  • File paths, command-line arguments, environment variables, BUILD and .bzl files are all + * encoded in some consistent encoding, on Linux and with the en_US.ISO-8859-1 locale + * available on the host (legacy setup). In particular, this setup allows any byte sequence to + * appear in a file path and be referenced in a BUILD file. + *
+ * + *

Bazel achieves this by forcing an en_US.ISO-8859-1 locale on Unix when available, which due to + * the byte-based nature of Unix APIs allows all Java (N)IO functions to treat strings as raw byte + * sequences (a Latin-1 character is equivalent to an unconstrained byte value). On macOS, where the + * JVM forces UTF-8 encoding for any kind of system interaction, as well as on Windows, where system + * APIs are all restricted to valid Unicode strings, Bazel has to reencode strings to Unicode before + * passing them to the JVM (and vice versa). Since BUILD and .bzl files are always read into Latin-1 + * strings (file encodings are not forced by the JVM) and are assumed to be encoded in UTF-8 (unless + * the Latin-1 locale is available), Bazel has to reencode the strings to UTF-8 so that they match + * up with the Starlark contents of these files (e.g. file paths mentioned in a BUILD file). + * + *

While allowing the user a great deal of flexibility, this requires great care when {@link + * String}s are passed into or out of Bazel via Java standard library functions or external APIs. + * The following three different types of strings need to be distinguished as if they were different + * Java types: + * + *

    + *
  • Internal strings: All strings retained by Bazel and used in its inner layers are expected + * to be raw byte sequences stored in Latin-1 {@link String}s. With Java's compact string + * representation, this means that the Latin-1 bytes are stored directly in the internal byte + * array {@link String#value} and the {@link String#coder} is {@link String#LATIN1}. + *
  • Unicode strings: Regular Java strings, which are always Unicode. A common example is a + * {@code string} field in a protobuf message. + *
  • Platform strings: Strings that are passed to or returned from Java (N)IO functions or as + * command-line arguments or environment variables to the {@code java} binary at startup or + * processes started via {@link java.lang.ProcessBuilder}. These strings are encoded and + * decoded by the JVM according to its default native encoding, which is given by the {@code + * sun.jnu.encoding} system property. With the current JDK version (21), this is: + *
      + *
    • UTF-8 on macOS; + *
    • determined by the active code page on Windows (Cp1252 on US Windows, can be set to + * UTF-8 by the user); + *
    • determined by the current locale on Linux (forced to en_US.ISO-8859-1 by the client + * if available, otherwise usually UTF-8); + *
    • determined by the current locale on OpenBSD, which is always UTF-8. + *
    + * As a result, there are two cases to consider: + *
      + *
    • On Linux with a Latin-1 locale, platform strings are identical to internal strings + * and Java (N)IO functions can be used to operate with Unix API on a raw byte level. + *
    • In all other cases, platform strings are a subset of Unicode strings. + *
    + *
+ * + *

The static methods in this class efficiently reencode {@link String}s between these three + * "types". Crucially, since ASCII strings are encoded identically in ISO-8859-1 and UTF-8, such + * strings do not need to be reencoded. + */ +public final class StringEncoding { + + static { + try { + Field compactStrings = String.class.getDeclaredField("COMPACT_STRINGS"); + compactStrings.setAccessible(true); + Preconditions.checkState( + (boolean) compactStrings.get(null), "Bazel requires -XX:-CompactStrings"); + } catch (NoSuchFieldException | IllegalAccessException e) { + throw new IllegalStateException(e); + } + } + + /** + * Transforms an internal string into a platform string as efficiently as possible. + * + *

See the class documentation for more information on the different types of strings. + */ + public static String internalToPlatform(String s) { + return needsReencodeForPlatform(s) + ? new String(STRING_UNSAFE.getInternalStringBytes(s), UTF_8) + : s; + } + + /** + * Transforms a platform string into an internal string as efficiently as possible. + * + *

See the class documentation for more information on the different types of strings. + */ + public static String platformToInternal(String s) { + return needsReencodeForPlatform(s) + ? STRING_UNSAFE.newInstance(s.getBytes(UTF_8), StringUnsafe.LATIN1) + : s; + } + + /** + * Transforms an internal string into a Unicode string as efficiently as possible. + * + *

See the class documentation for more information on the different types of strings. + */ + public static String internalToUnicode(String s) { + return needsReencodeForUnicode(s) + ? new String(STRING_UNSAFE.getInternalStringBytes(s), UTF_8) + : s; + } + + /** + * Transforms a Unicode string into an internal string as efficiently as possible. + * + *

See the class documentation for more information on the different types of strings. + */ + public static String unicodeToInternal(String s) { + return needsReencodeForUnicode(s) + ? STRING_UNSAFE.newInstance(s.getBytes(UTF_8), StringUnsafe.LATIN1) + : s; + } + + private static final StringUnsafe STRING_UNSAFE = StringUnsafe.getInstance(); + + /** + * The {@link Charset} with which the JVM encodes any strings passed to or returned from Java + * (N)IO functions, command-line arguments or environment variables. + */ + private static final boolean SUN_JNU_ENCODING_IS_ISO_8859_1 = + Charset.forName(System.getProperty("sun.jnu.encoding")).equals(ISO_8859_1); + + /** + * This only exists for RemoteWorker, which uses JavaIoFileSystem with Unicode strings and thus + * shouldn't be subject to any reencoding. + */ + private static final boolean BAZEL_UNICODE_STRINGS = + Boolean.getBoolean("bazel.internal.UnicodeStrings"); + + private static boolean needsReencodeForPlatform(String s) { + if (SUN_JNU_ENCODING_IS_ISO_8859_1 && OS.getCurrent() == OS.LINUX) { + // In this case, platform strings encode raw bytes and are thus identical to internal strings. + return false; + } + // Otherwise, platform strings are a subset of Unicode strings. + return needsReencodeForUnicode(s); + } + + private static boolean needsReencodeForUnicode(String s) { + if (BAZEL_UNICODE_STRINGS) { + return false; + } + return !STRING_UNSAFE.isAscii(s); + } + + private StringEncoding() {} +} diff --git a/src/main/java/com/google/devtools/build/lib/util/StringUtil.java b/src/main/java/com/google/devtools/build/lib/util/StringUtil.java index e2a36fe565550d..ebb250519bbef1 100644 --- a/src/main/java/com/google/devtools/build/lib/util/StringUtil.java +++ b/src/main/java/com/google/devtools/build/lib/util/StringUtil.java @@ -13,9 +13,6 @@ // limitations under the License. package com.google.devtools.build.lib.util; -import static java.nio.charset.StandardCharsets.ISO_8859_1; -import static java.nio.charset.StandardCharsets.UTF_8; - import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; @@ -121,67 +118,5 @@ public static String ordinal(int number) { } } - /** - * Decode a String that might actually be UTF-8, in which case each input character will be - * treated as a byte. - * - *

Several Bazel subsystems, including Starlark, store bytes in `String` values where each - * `char` stores one `byte` in its lower 8 bits. This function converts its input to a `[]byte`, - * then decodes that byte array as UTF-8. - * - *

Using U+2049 (EXCLAMATION QUESTION MARK) as an example: - * - *

"\u2049".getBytes(UTF_8) == [0xE2, 0x81, 0x89] - * - *

decodeBytestringUtf8("\u00E2\u0081\u0089") == "\u2049" - * - *

The return value is suitable for passing to Protobuf string fields or printing to the - * terminal. - */ - public static String reencodeInternalToExternal(String maybeUtf8) { - if (maybeUtf8.chars().allMatch(c -> c < 128)) { - return maybeUtf8; - } - - // Try our best to get a valid Unicode string, assuming that the input - // is either UTF-8 (from Starlark or a UNIX file path) or already valid - // Unicode (from a Windows file path). - if (maybeUtf8.chars().anyMatch(c -> c > 0xFF)) { - return maybeUtf8; - } - - final byte[] utf8 = maybeUtf8.getBytes(ISO_8859_1); - final String decoded = new String(utf8, UTF_8); - - // If the input was Unicode that happens to contain only codepoints in - // the ISO-8859-1 range, then it will probably have a partial decoding - // failure. - if (decoded.chars().anyMatch(c -> c == 0xFFFD)) { - return maybeUtf8; - } - - return decoded; - } - - /** - * Encodes a String to UTF-8, then converts those UTF-8 bytes to a String by zero-extending each - * `byte` into a `char`. - * - *

Using U+2049 (EXCLAMATION QUESTION MARK) as an example: - * - *

"\u2049".getBytes(UTF_8) == [0xE2, 0x81, 0x89] - * - *

encodeBytestringUtf8("\u2049") == "\u00E2\u0081\u0089" - * - *

See {@link #reencodeInternalToExternal} for motivation. - */ - public static String reencodeExternalToInternal(String unicode) { - if (unicode.chars().allMatch(c -> c < 128)) { - return unicode; - } - final byte[] utf8 = unicode.getBytes(UTF_8); - return new String(utf8, ISO_8859_1); - } - private StringUtil() {} } diff --git a/src/main/java/com/google/devtools/build/lib/vfs/AbstractFileSystem.java b/src/main/java/com/google/devtools/build/lib/vfs/AbstractFileSystem.java index bfb84caaca628b..dd5fae93254c45 100644 --- a/src/main/java/com/google/devtools/build/lib/vfs/AbstractFileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/vfs/AbstractFileSystem.java @@ -24,6 +24,7 @@ import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; import com.google.devtools.build.lib.profiler.Profiler; import com.google.devtools.build.lib.profiler.ProfilerTask; +import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; @@ -33,7 +34,6 @@ import java.io.OutputStream; import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; -import java.nio.file.Paths; import java.nio.file.StandardOpenOption; /** This class implements the FileSystem interface using direct calls to the UNIX filesystem. */ @@ -73,7 +73,7 @@ protected InputStream getInputStream(PathFragment path) throws IOException { /** Allows the mapping of PathFragment to InputStream to be overridden in subclasses. */ protected InputStream createFileInputStream(PathFragment path) throws IOException { - return new FileInputStream(path.toString()); + return new FileInputStream(getIoFile(path)); } /** Returns either normal or profiled FileInputStream. */ @@ -100,18 +100,16 @@ private InputStream createMaybeProfiledInputStream(PathFragment path) throws IOE @Override protected SeekableByteChannel createReadWriteByteChannel(PathFragment path) throws IOException { - String name = path.getPathString(); - boolean shouldProfile = profiler.isActive() && profiler.isProfiling(ProfilerTask.VFS_OPEN); long startTime = Profiler.nanoTimeMaybe(); try { // Currently, we do not proxy SeekableByteChannel for profiling reads and writes. - return Files.newByteChannel(Paths.get(name), READ_WRITE_BYTE_CHANNEL_OPEN_OPTIONS); + return Files.newByteChannel(getNioPath(path), READ_WRITE_BYTE_CHANNEL_OPEN_OPTIONS); } finally { if (shouldProfile) { - profiler.logSimpleTask(startTime, ProfilerTask.VFS_OPEN, name); + profiler.logSimpleTask(startTime, ProfilerTask.VFS_OPEN, path.toString()); } } } @@ -122,19 +120,18 @@ protected SeekableByteChannel createReadWriteByteChannel(PathFragment path) thro */ protected OutputStream createFileOutputStream(PathFragment path, boolean append, boolean internal) throws FileNotFoundException { - final String name = path.toString(); if (!internal && profiler.isActive() && (profiler.isProfiling(ProfilerTask.VFS_WRITE) || profiler.isProfiling(ProfilerTask.VFS_OPEN))) { long startTime = Profiler.nanoTimeMaybe(); try { - return new ProfiledFileOutputStream(name, append); + return new ProfiledFileOutputStream(getIoFile(path), append); } finally { - profiler.logSimpleTask(startTime, ProfilerTask.VFS_OPEN, name); + profiler.logSimpleTask(startTime, ProfilerTask.VFS_OPEN, path.toString()); } } else { - return new FileOutputStream(name, append); + return new FileOutputStream(getIoFile(path), append); } } @@ -191,11 +188,11 @@ public int read(byte[] b, int off, int len) throws IOException { } private static final class ProfiledFileOutputStream extends FileOutputStream { - private final String name; + private final File file; - public ProfiledFileOutputStream(String name, boolean append) throws FileNotFoundException { - super(name, append); - this.name = name; + public ProfiledFileOutputStream(File file, boolean append) throws FileNotFoundException { + super(file, append); + this.file = file; } @Override @@ -209,7 +206,7 @@ public void write(byte[] b, int off, int len) throws IOException { try { super.write(b, off, len); } finally { - profiler.logSimpleTask(startTime, ProfilerTask.VFS_WRITE, name); + profiler.logSimpleTask(startTime, ProfilerTask.VFS_WRITE, file.toString()); } } } diff --git a/src/main/java/com/google/devtools/build/lib/vfs/BUILD b/src/main/java/com/google/devtools/build/lib/vfs/BUILD index 0c4503f2203e93..f734c27f5bcafd 100644 --- a/src/main/java/com/google/devtools/build/lib/vfs/BUILD +++ b/src/main/java/com/google/devtools/build/lib/vfs/BUILD @@ -74,6 +74,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/skyframe/serialization:visible-for-serialization", "//src/main/java/com/google/devtools/build/lib/skyframe/serialization/autocodec", "//src/main/java/com/google/devtools/build/lib/util:filetype", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/skyframe:skyframe-objects", "//src/main/java/com/google/devtools/common/options", "//third_party:caffeine", diff --git a/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java b/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java index 7685a6ebcf95b3..5820d398fe78c6 100644 --- a/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/vfs/FileSystem.java @@ -23,6 +23,7 @@ import com.google.common.io.ByteSource; import com.google.common.io.CharStreams; import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; +import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; @@ -824,4 +825,21 @@ protected abstract void createFSDependentHardLink( */ protected void prefetchPackageAsync(PathFragment path, int maxDirs) {} + /** + * Returns a {@link File} object for the given path. This method is only supported by file system + * implementations that are backed by the local file system. + */ + protected File getIoFile(PathFragment path) { + throw new UnsupportedOperationException( + "getIoFile() not supported for " + getClass().getName()); + } + + /** + * Returns a {@link java.nio.file.Path} object for the given path. This method is only supported + * by file system implementations that are backed by the local file system. + */ + protected java.nio.file.Path getNioPath(PathFragment path) { + throw new UnsupportedOperationException( + "getNioPath() not supported for " + getClass().getName()); + } } diff --git a/src/main/java/com/google/devtools/build/lib/vfs/FileSystemUtils.java b/src/main/java/com/google/devtools/build/lib/vfs/FileSystemUtils.java index 9fe5d13c8deb3e..aefae38f11fba6 100644 --- a/src/main/java/com/google/devtools/build/lib/vfs/FileSystemUtils.java +++ b/src/main/java/com/google/devtools/build/lib/vfs/FileSystemUtils.java @@ -22,6 +22,7 @@ import com.google.common.io.ByteStreams; import com.google.devtools.build.lib.concurrent.ThreadSafety.ConditionallyThreadSafe; import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.errorprone.annotations.InlineMe; import java.io.IOException; import java.io.InputStream; @@ -245,7 +246,11 @@ public static Path getWorkingDirectory(FileSystem fs) { * 'user.dir'. This version does not require a {@link FileSystem}. */ public static PathFragment getWorkingDirectory() { - return PathFragment.create(System.getProperty("user.dir", "/")); + // System properties obtained from host are encoded using sun.jnu.encoding, so reencode them to + // the internal representation. + // https://github.com/openjdk/jdk/blob/285385247aaa262866697ed848040f05f4d94988/src/java.base/share/native/libjava/System.c#L121 + return PathFragment.create( + StringEncoding.platformToInternal(System.getProperty("user.dir", "/"))); } /** diff --git a/src/main/java/com/google/devtools/build/lib/vfs/JavaIoFileSystem.java b/src/main/java/com/google/devtools/build/lib/vfs/JavaIoFileSystem.java index cb74d3f22d2682..ae8a73d2b58c02 100644 --- a/src/main/java/com/google/devtools/build/lib/vfs/JavaIoFileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/vfs/JavaIoFileSystem.java @@ -13,11 +13,13 @@ // limitations under the License. package com.google.devtools.build.lib.vfs; +import com.google.common.collect.Lists; import com.google.devtools.build.lib.clock.Clock; import com.google.devtools.build.lib.clock.JavaClock; import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; import com.google.devtools.build.lib.profiler.Profiler; import com.google.devtools.build.lib.profiler.ProfilerTask; +import com.google.devtools.build.lib.util.StringEncoding; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -62,8 +64,9 @@ public JavaIoFileSystem(DigestHashFunction hashFunction) { this.clock = new JavaClock(); } + @Override protected File getIoFile(PathFragment path) { - return new File(path.toString()); + return new File(StringEncoding.internalToPlatform(path.getPathString())); } /** @@ -74,8 +77,9 @@ protected File getIoFile(PathFragment path) { * avoids extra allocations and does not lose track of the underlying Java filesystem, which is * useful for some in-memory filesystem implementations like JimFS. */ + @Override protected java.nio.file.Path getNioPath(PathFragment path) { - return Paths.get(path.toString()); + return Paths.get(StringEncoding.internalToPlatform(path.getPathString())); } private LinkOption[] linkOpts(boolean followSymlinks) { @@ -99,7 +103,7 @@ protected Collection getDirectoryEntries(PathFragment path) throws IOExc } finally { profiler.logSimpleTask(startTime, ProfilerTask.VFS_DIR, file.getPath()); } - return Arrays.asList(entries); + return Lists.transform(Arrays.asList(entries), StringEncoding::platformToInternal); } @Override @@ -279,7 +283,9 @@ protected void createSymbolicLink(PathFragment linkPath, PathFragment targetFrag throws IOException { java.nio.file.Path nioPath = getNioPath(linkPath); try { - Files.createSymbolicLink(nioPath, Paths.get(targetFragment.getSafePathString())); + Files.createSymbolicLink( + nioPath, + Paths.get(StringEncoding.internalToPlatform(targetFragment.getSafePathString()))); } catch (java.nio.file.FileAlreadyExistsException e) { throw new IOException(linkPath + ERR_FILE_EXISTS, e); } catch (java.nio.file.AccessDeniedException e) { @@ -295,7 +301,7 @@ protected PathFragment readSymbolicLink(PathFragment path) throws IOException { long startTime = Profiler.nanoTimeMaybe(); try { String link = Files.readSymbolicLink(nioPath).toString(); - return PathFragment.create(link); + return PathFragment.create(StringEncoding.platformToInternal(link)); } catch (java.nio.file.NotLinkException e) { throw new NotASymlinkException(path, e); } catch (java.nio.file.NoSuchFileException e) { @@ -517,8 +523,6 @@ protected FileStatus statIfFound(PathFragment path, boolean followSymlinks) { @Override protected void createFSDependentHardLink(PathFragment linkPath, PathFragment originalPath) throws IOException { - Files.createLink( - java.nio.file.Paths.get(linkPath.toString()), - java.nio.file.Paths.get(originalPath.toString())); + Files.createLink(getNioPath(linkPath), getNioPath(originalPath)); } } diff --git a/src/main/java/com/google/devtools/build/lib/vfs/PathTransformingDelegateFileSystem.java b/src/main/java/com/google/devtools/build/lib/vfs/PathTransformingDelegateFileSystem.java index 3e427af8b1ca2e..3cccd2d8de2bb7 100644 --- a/src/main/java/com/google/devtools/build/lib/vfs/PathTransformingDelegateFileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/vfs/PathTransformingDelegateFileSystem.java @@ -16,6 +16,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -284,6 +285,16 @@ protected void prefetchPackageAsync(PathFragment path, int maxDirs) { delegateFs.prefetchPackageAsync(toDelegatePath(path), maxDirs); } + @Override + protected File getIoFile(PathFragment path) { + return delegateFs.getIoFile(toDelegatePath(path)); + } + + @Override + protected java.nio.file.Path getNioPath(PathFragment path) { + return delegateFs.getNioPath(toDelegatePath(path)); + } + /** Transform original path to a different one to be used with the {@code delegateFs}. */ protected abstract PathFragment toDelegatePath(PathFragment path); diff --git a/src/main/java/com/google/devtools/build/lib/windows/BUILD b/src/main/java/com/google/devtools/build/lib/windows/BUILD index 20809bbfb83094..13619da8aedb32 100644 --- a/src/main/java/com/google/devtools/build/lib/windows/BUILD +++ b/src/main/java/com/google/devtools/build/lib/windows/BUILD @@ -39,6 +39,7 @@ java_library( ":file", "//src/main/java/com/google/devtools/build/lib/concurrent", "//src/main/java/com/google/devtools/build/lib/profiler", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//third_party:guava", diff --git a/src/main/java/com/google/devtools/build/lib/windows/WindowsFileSystem.java b/src/main/java/com/google/devtools/build/lib/windows/WindowsFileSystem.java index 9c351785872e1a..f697b1143d0000 100644 --- a/src/main/java/com/google/devtools/build/lib/windows/WindowsFileSystem.java +++ b/src/main/java/com/google/devtools/build/lib/windows/WindowsFileSystem.java @@ -17,6 +17,7 @@ import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; import com.google.devtools.build.lib.profiler.Profiler; import com.google.devtools.build.lib.profiler.ProfilerTask; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.DigestHashFunction; import com.google.devtools.build.lib.vfs.FileStatus; import com.google.devtools.build.lib.vfs.JavaIoFileSystem; @@ -53,7 +54,8 @@ public String getFileSystemType(PathFragment path) { protected boolean delete(PathFragment path) throws IOException { long startTime = Profiler.nanoTimeMaybe(); try { - return WindowsFileOperations.deletePath(path.getPathString()); + return WindowsFileOperations.deletePath( + StringEncoding.internalToPlatform(path.getPathString())); } catch (java.nio.file.DirectoryNotEmptyException e) { throw new IOException(path.getPathString() + ERR_DIRECTORY_NOT_EMPTY, e); } catch (java.nio.file.AccessDeniedException e) { @@ -77,17 +79,17 @@ protected void createSymbolicLink(PathFragment linkPath, PathFragment targetFrag ? targetFragment : linkPath.getParentDirectory().getRelative(targetFragment); try { - java.nio.file.Path link = getIoFile(linkPath).toPath(); - java.nio.file.Path target = getIoFile(targetPath).toPath(); - if (target.toFile().isDirectory()) { + File link = getIoFile(linkPath); + File target = getIoFile(targetPath); + if (target.isDirectory()) { WindowsFileOperations.createJunction(link.toString(), target.toString()); } else if (createSymbolicLinks) { WindowsFileOperations.createSymlink(link.toString(), target.toString()); - } else if (!target.toFile().exists()) { + } else if (!target.exists()) { // Still Create a dangling junction if the target doesn't exist. WindowsFileOperations.createJunction(link.toString(), target.toString()); } else { - Files.copy(target, link); + Files.copy(target.toPath(), link.toPath()); } } catch (java.nio.file.FileAlreadyExistsException e) { throw new IOException(linkPath + ERR_FILE_EXISTS, e); @@ -104,7 +106,7 @@ protected PathFragment readSymbolicLink(PathFragment path) throws IOException { WindowsFileOperations.ReadSymlinkOrJunctionResult result = WindowsFileOperations.readSymlinkOrJunction(nioPath.toString()); if (result.getStatus() == WindowsFileOperations.ReadSymlinkOrJunctionResult.Status.OK) { - return PathFragment.create(result.getResult()); + return PathFragment.create(StringEncoding.platformToInternal(result.getResult())); } if (result.getStatus() == WindowsFileOperations.ReadSymlinkOrJunctionResult.Status.NOT_A_LINK) { throw new NotASymlinkException(path); diff --git a/src/main/java/com/google/devtools/build/lib/worker/BUILD b/src/main/java/com/google/devtools/build/lib/worker/BUILD index 067c20001721df..759c7c341eda03 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/BUILD +++ b/src/main/java/com/google/devtools/build/lib/worker/BUILD @@ -75,7 +75,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/exec/local", "//src/main/java/com/google/devtools/build/lib/profiler", "//src/main/java/com/google/devtools/build/lib/sandbox:sandbox_helpers", - "//src/main/java/com/google/devtools/build/lib/util:string", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/util/io", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", diff --git a/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnRunner.java index f9bd86ba820d0b..2cfd2c3ed64851 100644 --- a/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/worker/WorkerSpawnRunner.java @@ -15,7 +15,6 @@ package com.google.devtools.build.lib.worker; import static com.google.common.base.Preconditions.checkNotNull; -import static com.google.devtools.build.lib.util.StringUtil.reencodeInternalToExternal; import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.base.Stopwatch; @@ -58,6 +57,7 @@ import com.google.devtools.build.lib.server.FailureDetails; import com.google.devtools.build.lib.server.FailureDetails.FailureDetail; import com.google.devtools.build.lib.server.FailureDetails.Worker.Code; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.util.io.FileOutErr; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; @@ -277,7 +277,7 @@ private WorkRequest createWorkRequest( requestBuilder .addInputsBuilder() - .setPath(reencodeInternalToExternal(input.getExecPathString())) + .setPath(StringEncoding.internalToUnicode(input.getExecPathString())) .setDigest(digest); } if (workerOptions.workerVerbose) { diff --git a/src/main/native/BUILD b/src/main/native/BUILD index c011aef6d20c2b..0942aa368a27fb 100644 --- a/src/main/native/BUILD +++ b/src/main/native/BUILD @@ -52,6 +52,9 @@ cc_library( ], hdrs = ["latin1_jni_path.h"], includes = ["."], # For jni headers. + deps = [ + "//src/main/cpp/util:logging", + ], ) cc_library( diff --git a/src/main/native/darwin/fsevents.cc b/src/main/native/darwin/fsevents.cc index 76e933cbd07978..fb0e65a1b12ff1 100644 --- a/src/main/native/darwin/fsevents.cc +++ b/src/main/native/darwin/fsevents.cc @@ -98,18 +98,20 @@ Java_com_google_devtools_build_lib_skyframe_MacOSXFsEventsDiffAwareness_create( context.release = nullptr; context.copyDescription = nullptr; - // Create an CFArrayRef of CFStringRef from the Java array of String + // Create a CFArrayRef of CFStringRef from the Java array of UTF-8 byte + // strings. jsize length = env->GetArrayLength(paths); CFStringRef *pathsArray = new CFStringRef[length]; for (int i = 0; i < length; i++) { - jstring path = (jstring)env->GetObjectArrayElement(paths, i); - const char *pathCStr = env->GetStringUTFChars(path, nullptr); - pathsArray[i] = - CFStringCreateWithCString(nullptr, pathCStr, kCFStringEncodingUTF8); - env->ReleaseStringUTFChars(path, pathCStr); + jbyteArray path = (jbyteArray)env->GetObjectArrayElement(paths, i); + jbyte *pathBytes = env->GetByteArrayElements(path, nullptr); + jsize pathLength = env->GetArrayLength(path); + pathsArray[i] = CFStringCreateWithBytes( + nullptr, (UInt8 *)pathBytes, pathLength, kCFStringEncodingUTF8, false); + env->ReleaseByteArrayElements(path, pathBytes, JNI_ABORT); } CFArrayRef pathsToWatch = - CFArrayCreate(nullptr, (const void **)pathsArray, 1, nullptr); + CFArrayCreate(nullptr, (const void **)pathsArray, length, nullptr); delete[] pathsArray; info->stream = FSEventStreamCreate( nullptr, &FsEventsDiffAwarenessCallback, &context, pathsToWatch, @@ -160,11 +162,15 @@ Java_com_google_devtools_build_lib_skyframe_MacOSXFsEventsDiffAwareness_poll( if (info->everything_changed) { result = nullptr; } else { - jclass classString = env->FindClass("java/lang/String"); - result = env->NewObjectArray(info->paths.size(), classString, nullptr); + jclass classByteArray = env->FindClass("[B"); + result = env->NewObjectArray(info->paths.size(), classByteArray, nullptr); int i = 0; for (auto it = info->paths.begin(); it != info->paths.end(); it++, i++) { - env->SetObjectArrayElement(result, i, env->NewStringUTF(it->c_str())); + jbyteArray path = env->NewByteArray(it->size()); + env->SetByteArrayRegion(path, 0, it->size(), + reinterpret_cast(it->c_str())); + env->SetObjectArrayElement(result, i, path); + env->DeleteLocalRef(path); } } diff --git a/src/main/native/latin1_jni_path.cc b/src/main/native/latin1_jni_path.cc index 042406abeca845..950ba554310d96 100644 --- a/src/main/native/latin1_jni_path.cc +++ b/src/main/native/latin1_jni_path.cc @@ -14,8 +14,11 @@ #include "src/main/native/latin1_jni_path.h" +#include #include +#include "src/main/cpp/util/logging.h" + namespace blaze_jni { jstring NewStringLatin1(JNIEnv *env, const char *str) { @@ -39,53 +42,26 @@ jstring NewStringLatin1(JNIEnv *env, const char *str) { return result; } -static jfieldID String_coder_field; -static jfieldID String_value_field; - -static bool CompactStringsEnabled(JNIEnv *env) { - if (jclass klass = env->FindClass("java/lang/String")) { - if (jfieldID csf = env->GetStaticFieldID(klass, "COMPACT_STRINGS", "Z")) { - if (env->GetStaticBooleanField(klass, csf)) { - if ((String_coder_field = env->GetFieldID(klass, "coder", "B"))) { - if ((String_value_field = env->GetFieldID(klass, "value", "[B"))) { - return true; - } - } - } - } - } - env->ExceptionClear(); - return false; -} - char *GetStringLatin1Chars(JNIEnv *env, jstring jstr) { - jint len = env->GetStringLength(jstr); + static jclass String_class = env->FindClass("java/lang/String"); + static jfieldID String_coder_field = + env->GetFieldID(String_class, "coder", "B"); + static jfieldID String_value_field = + env->GetFieldID(String_class, "value", "[B"); - // Fast path for latin1 strings. - static bool cs_enabled = CompactStringsEnabled(env); - const int LATIN1 = 0; - if (cs_enabled && env->GetByteField(jstr, String_coder_field) == LATIN1) { - char *result = new char[len + 1]; - if (jobject jvalue = env->GetObjectField(jstr, String_value_field)) { - env->GetByteArrayRegion((jbyteArray)jvalue, 0, len, (jbyte *)result); - } - result[len] = 0; - return result; + // All path strings used in Bazel are encoded as raw bytes with a Latin1 + // coder. + if (env->GetByteField(jstr, String_coder_field) != 0) { + BAZEL_LOG(FATAL) << "Expected Latin1-encoded string"; } - - const jchar *str = env->GetStringCritical(jstr, nullptr); - if (str == nullptr) { - return nullptr; - } - + jint len = env->GetStringLength(jstr); char *result = new char[len + 1]; - for (int i = 0; i < len; i++) { - jchar unicode = str[i]; // (unsigned) - result[i] = unicode <= 0x00ff ? unicode : '?'; + if (jobject jvalue = env->GetObjectField(jstr, String_value_field)) { + env->GetByteArrayRegion((jbyteArray)jvalue, 0, len, (jbyte *)result); + } else { + return nullptr; } - result[len] = 0; - env->ReleaseStringCritical(jstr, str); return result; } diff --git a/src/test/cpp/blaze_util_windows_test.cc b/src/test/cpp/blaze_util_windows_test.cc index 7aa0f3e1470105..a6ab71c2a52a4d 100644 --- a/src/test/cpp/blaze_util_windows_test.cc +++ b/src/test/cpp/blaze_util_windows_test.cc @@ -128,7 +128,7 @@ TEST(BlazeUtilWindowsTest, TestSetEnv) { SetEnv("Bazel_TEST_Key1", "some_VALUE"); ASSERT_ENVVAR("Bazel_TEST_Key1", "some_VALUE"); SetEnv("Bazel_TEST_Key1", ""); - ASSERT_ENVVAR_UNSET("Bazel_TEST_Key1"); + ASSERT_ENVVAR("Bazel_TEST_Key1", ""); string long_string(MAX_PATH, 'a'); string long_key = string("Bazel_TEST_Key2_") + long_string; @@ -138,19 +138,19 @@ TEST(BlazeUtilWindowsTest, TestSetEnv) { SetEnv(long_key, long_value); ASSERT_ENVVAR(long_key.c_str(), long_value.c_str()); SetEnv(long_key, ""); - ASSERT_ENVVAR_UNSET(long_key.c_str()); + ASSERT_ENVVAR(long_key.c_str(), ""); } TEST(BlazeUtilWindowsTest, TestUnsetEnv) { - ASSERT_ENVVAR_UNSET("Bazel_TEST_Key1"); - SetEnv("Bazel_TEST_Key1", "some_VALUE"); - ASSERT_ENVVAR("Bazel_TEST_Key1", "some_VALUE"); - UnsetEnv("Bazel_TEST_Key1"); - ASSERT_ENVVAR_UNSET("Bazel_TEST_Key1"); + ASSERT_ENVVAR_UNSET("Bazel_TEST_Key3"); + SetEnv("Bazel_TEST_Key3", "some_VALUE"); + ASSERT_ENVVAR("Bazel_TEST_Key3", "some_VALUE"); + UnsetEnv("Bazel_TEST_Key3"); + ASSERT_ENVVAR_UNSET("Bazel_TEST_Key3"); string long_string(MAX_PATH, 'a'); - string long_key = string("Bazel_TEST_Key2_") + long_string; - string long_value = string("Bazel_TEST_Value2_") + long_string; + string long_key = string("Bazel_TEST_Key4_") + long_string; + string long_value = string("Bazel_TEST_Value4_") + long_string; ASSERT_ENVVAR_UNSET(long_key.c_str()); SetEnv(long_key, long_value); diff --git a/src/test/java/com/google/devtools/build/lib/analysis/starlark/ArgsParamFileTest.java b/src/test/java/com/google/devtools/build/lib/analysis/starlark/ArgsParamFileTest.java index 37a6a87879cbb1..7ecbd19a7df016 100644 --- a/src/test/java/com/google/devtools/build/lib/analysis/starlark/ArgsParamFileTest.java +++ b/src/test/java/com/google/devtools/build/lib/analysis/starlark/ArgsParamFileTest.java @@ -147,8 +147,7 @@ private static ImmutableList toParamFile(Args args) throws Exception { ParameterFile.writeParameterFile( outputStream, args.build(() -> RepositoryMapping.ALWAYS_FALLBACK).arguments(), - args.getParameterFileType(), - UTF_8); + args.getParameterFileType()); bytes = outputStream.toByteArray(); } try (ByteArrayInputStream inputStream = new ByteArrayInputStream(bytes); diff --git a/src/test/java/com/google/devtools/build/lib/analysis/starlark/FlagPerLineTest.java b/src/test/java/com/google/devtools/build/lib/analysis/starlark/FlagPerLineTest.java index 2e536f6297a5b7..451bd47eef5a5f 100644 --- a/src/test/java/com/google/devtools/build/lib/analysis/starlark/FlagPerLineTest.java +++ b/src/test/java/com/google/devtools/build/lib/analysis/starlark/FlagPerLineTest.java @@ -169,8 +169,7 @@ private static ImmutableList toParamFile(Args args) throws Exception { ParameterFile.writeParameterFile( outputStream, args.build(() -> RepositoryMapping.ALWAYS_FALLBACK).arguments(), - args.getParameterFileType(), - UTF_8); + args.getParameterFileType()); bytes = outputStream.toByteArray(); } try (ByteArrayInputStream inputStream = new ByteArrayInputStream(bytes); diff --git a/src/test/java/com/google/devtools/build/lib/analysis/util/BuildViewTestCase.java b/src/test/java/com/google/devtools/build/lib/analysis/util/BuildViewTestCase.java index 652bc63f92d98b..a10f16d612700f 100644 --- a/src/test/java/com/google/devtools/build/lib/analysis/util/BuildViewTestCase.java +++ b/src/test/java/com/google/devtools/build/lib/analysis/util/BuildViewTestCase.java @@ -187,6 +187,7 @@ import com.google.errorprone.annotations.ForOverride; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -465,7 +466,9 @@ protected BuildOptions createBuildOptions(String... args) } protected Target getTarget(String label) - throws NoSuchPackageException, NoSuchTargetException, LabelSyntaxException, + throws NoSuchPackageException, + NoSuchTargetException, + LabelSyntaxException, InterruptedException { return getTarget(Label.parseCanonical(label)); } @@ -697,8 +700,8 @@ public SkyFunctionName functionName() { return null; } }, - /*extendedSanityChecks=*/ false, - /*allowAnalysisFailures=*/ false, + /* extendedSanityChecks= */ false, + /* allowAnalysisFailures= */ false, reporter, env, starlarkBuiltinsValue); @@ -773,7 +776,7 @@ protected static void assertConfigurationsEqual( protected static void assertConfigurationsEqual( BuildConfigurationValue config1, BuildConfigurationValue config2) { - assertConfigurationsEqual(config1, config2, /*excludeFragmentOptions=*/ ImmutableSet.of()); + assertConfigurationsEqual(config1, config2, /* excludeFragmentOptions= */ ImmutableSet.of()); } /** @@ -920,11 +923,8 @@ protected final String paramFileStringContentsForAction(Action action) if (pair.paramFileInfo != null) { ByteArrayOutputStream out = new ByteArrayOutputStream(); ParameterFile.writeParameterFile( - out, - pair.commandLine.arguments(), - pair.paramFileInfo.getFileType(), - pair.paramFileInfo.getCharset()); - return out.toString(pair.paramFileInfo.getCharset()); + out, pair.commandLine.arguments(), pair.paramFileInfo.getFileType()); + return out.toString(StandardCharsets.ISO_8859_1); } } } @@ -1095,7 +1095,8 @@ protected ConfiguredTarget getConfiguredTarget(Label label, BuildConfigurationVa */ protected ConfiguredTargetAndData getConfiguredTargetAndData( Label label, BuildConfigurationValue config) - throws StarlarkTransition.TransitionException, InvalidConfigurationException, + throws StarlarkTransition.TransitionException, + InvalidConfigurationException, InterruptedException { return view.getConfiguredTargetAndDataForTesting(reporter, label, config); } @@ -1106,8 +1107,10 @@ protected ConfiguredTargetAndData getConfiguredTargetAndData( * config in the ConfiguredTargetAndData's ConfiguredTarget. */ public ConfiguredTargetAndData getConfiguredTargetAndData(String label) - throws LabelSyntaxException, StarlarkTransition.TransitionException, - InvalidConfigurationException, InterruptedException { + throws LabelSyntaxException, + StarlarkTransition.TransitionException, + InvalidConfigurationException, + InterruptedException { return getConfiguredTargetAndData(Label.parseCanonical(label), targetConfig); } @@ -1935,7 +1938,7 @@ protected AnalysisResult update(String target, int loadingPhaseThreads, boolean return update( ImmutableList.of(target), ImmutableList.of(), - /*keepGoing=*/ true, // value doesn't matter since we have only one target. + /* keepGoing= */ true, // value doesn't matter since we have only one target. loadingPhaseThreads, doAnalysis, new EventBus()); @@ -1976,7 +1979,7 @@ protected AnalysisResult update( loadingOptions, loadingPhaseThreads, keepGoing, - /*determineTests=*/ false); + /* determineTests= */ false); if (!doAnalysis) { // TODO(bazel-team): What's supposed to happen in this case? return null; @@ -1984,9 +1987,9 @@ protected AnalysisResult update( return view.update( loadingResult, targetConfig.getOptions(), - /*explicitTargetPatterns=*/ ImmutableSet.of(), + /* explicitTargetPatterns= */ ImmutableSet.of(), aspects, - /*aspectsParameters=*/ ImmutableMap.of(), + /* aspectsParameters= */ ImmutableMap.of(), viewOptions, keepGoing, loadingPhaseThreads, diff --git a/src/test/java/com/google/devtools/build/lib/exec/ParameterFileTest.java b/src/test/java/com/google/devtools/build/lib/exec/ParameterFileTest.java index 11b038952c45f8..4204b537b7c98b 100644 --- a/src/test/java/com/google/devtools/build/lib/exec/ParameterFileTest.java +++ b/src/test/java/com/google/devtools/build/lib/exec/ParameterFileTest.java @@ -17,6 +17,7 @@ import static com.google.common.truth.Truth.assertThat; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; import com.google.devtools.build.lib.actions.ParameterFile; import com.google.devtools.build.lib.actions.ParameterFile.ParameterFileType; import com.google.devtools.build.lib.testutil.FoundationTestCase; @@ -80,9 +81,12 @@ public void nonFlags() throws Exception { private static ImmutableList writeContent(Charset charset, Iterable content) throws Exception { ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - ParameterFile.writeParameterFile(outputStream, content, ParameterFileType.UNQUOTED, charset); - return ImmutableList.builder() - .add(new String(outputStream.toByteArray(), charset).split("\n")) - .build(); + ParameterFile.writeParameterFile( + outputStream, + Iterables.transform( + // Bazel internally represents all strings as raw bytes in ISO-8859-1. + content, s -> new String(s.getBytes(charset), StandardCharsets.ISO_8859_1)), + ParameterFileType.UNQUOTED); + return ImmutableList.builder().add(outputStream.toString(charset).split("\n")).build(); } } diff --git a/src/test/java/com/google/devtools/build/lib/exec/SpawnLogContextTestBase.java b/src/test/java/com/google/devtools/build/lib/exec/SpawnLogContextTestBase.java index 24f8f6967a8521..1f9fec8a2c7461 100644 --- a/src/test/java/com/google/devtools/build/lib/exec/SpawnLogContextTestBase.java +++ b/src/test/java/com/google/devtools/build/lib/exec/SpawnLogContextTestBase.java @@ -1565,8 +1565,7 @@ public void testParamFileInput() throws Exception { new ParamFileActionInput( PathFragment.create("foo.params"), ImmutableList.of("a", "b", "c"), - ParameterFileType.UNQUOTED, - UTF_8); + ParameterFileType.UNQUOTED); // Do not materialize the file on disk, which would be the case when running remotely. SpawnBuilder spawn = defaultSpawnBuilder().withInputs(paramFileInput); diff --git a/src/test/java/com/google/devtools/build/lib/exec/local/LocalSpawnRunnerTest.java b/src/test/java/com/google/devtools/build/lib/exec/local/LocalSpawnRunnerTest.java index 68cd9af164d7e3..32ac9d78a3060e 100644 --- a/src/test/java/com/google/devtools/build/lib/exec/local/LocalSpawnRunnerTest.java +++ b/src/test/java/com/google/devtools/build/lib/exec/local/LocalSpawnRunnerTest.java @@ -345,8 +345,7 @@ public void testParamFiles() throws Exception { new ParamFileActionInput( PathFragment.create("some/dir/params"), ImmutableList.of("--foo", "--bar"), - ParameterFileType.UNQUOTED, - UTF_8); + ParameterFileType.UNQUOTED); Spawn spawn = new SpawnBuilder("/bin/echo", "Hi!") .withInput(paramFileActionInput) diff --git a/src/test/java/com/google/devtools/build/lib/remote/BUILD b/src/test/java/com/google/devtools/build/lib/remote/BUILD index 865e62bbaf052f..8beb307d13a8ac 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/BUILD +++ b/src/test/java/com/google/devtools/build/lib/remote/BUILD @@ -125,7 +125,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util:abrupt_exit_exception", "//src/main/java/com/google/devtools/build/lib/util:exit_code", "//src/main/java/com/google/devtools/build/lib/util:os", - "//src/main/java/com/google/devtools/build/lib/util:string", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/util:temp_path_generator", "//src/main/java/com/google/devtools/build/lib/util/io", "//src/main/java/com/google/devtools/build/lib/util/io:io-proto", diff --git a/src/test/java/com/google/devtools/build/lib/remote/RemoteExecutionServiceTest.java b/src/test/java/com/google/devtools/build/lib/remote/RemoteExecutionServiceTest.java index 09e4d8763b2c79..8947d3bc821075 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/RemoteExecutionServiceTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/RemoteExecutionServiceTest.java @@ -21,7 +21,7 @@ import static com.google.devtools.build.lib.actions.ExecutionRequirements.REMOTE_EXECUTION_INLINE_OUTPUTS; import static com.google.devtools.build.lib.remote.util.DigestUtil.toBinaryDigest; import static com.google.devtools.build.lib.remote.util.Utils.getFromFuture; -import static com.google.devtools.build.lib.util.StringUtil.reencodeExternalToInternal; +import static com.google.devtools.build.lib.util.StringEncoding.unicodeToInternal; import static com.google.devtools.build.lib.vfs.FileSystemUtils.readContent; import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertThrows; @@ -1678,7 +1678,7 @@ public void downloadOutputs_missingMandatoryOutputs_reportError() throws Excepti ImmutableSet.Builder outputs = ImmutableSet.builder(); ImmutableList expectedOutputFiles = ImmutableList.of("outputs/foo", "outputs/bar"); for (String outputFile : expectedOutputFiles) { - Path path = remotePathResolver.outputPathToLocalPath(reencodeExternalToInternal(outputFile)); + Path path = remotePathResolver.outputPathToLocalPath(unicodeToInternal(outputFile)); Artifact output = ActionsTestUtil.createArtifact(artifactRoot, path); outputs.add(output); } @@ -2553,15 +2553,13 @@ private Spawn newSpawnFromResult( ImmutableMap executionInfo, RemoteActionResult result) { ImmutableSet.Builder outputs = ImmutableSet.builder(); for (OutputFile file : result.getOutputFiles()) { - Path path = - remotePathResolver.outputPathToLocalPath(reencodeExternalToInternal(file.getPath())); + Path path = remotePathResolver.outputPathToLocalPath(unicodeToInternal(file.getPath())); Artifact output = ActionsTestUtil.createArtifact(artifactRoot, path); outputs.add(output); } for (OutputDirectory directory : result.getOutputDirectories()) { - Path path = - remotePathResolver.outputPathToLocalPath(reencodeExternalToInternal(directory.getPath())); + Path path = remotePathResolver.outputPathToLocalPath(unicodeToInternal(directory.getPath())); Artifact output = ActionsTestUtil.createTreeArtifactWithGeneratingAction( artifactRoot, path.relativeTo(execRoot)); @@ -2570,16 +2568,14 @@ private Spawn newSpawnFromResult( for (OutputSymlink fileSymlink : result.getOutputFileSymlinks()) { Path path = - remotePathResolver.outputPathToLocalPath( - reencodeExternalToInternal(fileSymlink.getPath())); + remotePathResolver.outputPathToLocalPath(unicodeToInternal(fileSymlink.getPath())); Artifact output = ActionsTestUtil.createArtifact(artifactRoot, path); outputs.add(output); } for (OutputSymlink directorySymlink : result.getOutputDirectorySymlinks()) { Path path = - remotePathResolver.outputPathToLocalPath( - reencodeExternalToInternal(directorySymlink.getPath())); + remotePathResolver.outputPathToLocalPath(unicodeToInternal(directorySymlink.getPath())); Artifact output = ActionsTestUtil.createTreeArtifactWithGeneratingAction( artifactRoot, path.relativeTo(execRoot)); @@ -2587,8 +2583,7 @@ private Spawn newSpawnFromResult( } for (OutputSymlink symlink : result.getOutputSymlinks()) { - Path path = - remotePathResolver.outputPathToLocalPath(reencodeExternalToInternal(symlink.getPath())); + Path path = remotePathResolver.outputPathToLocalPath(unicodeToInternal(symlink.getPath())); Artifact output = ActionsTestUtil.createArtifact(artifactRoot, path); outputs.add(output); } diff --git a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java index fc319076995472..18357f6c4048a6 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/RemoteSpawnRunnerTest.java @@ -16,7 +16,6 @@ import static com.google.common.truth.Truth.assertThat; import static com.google.common.truth.extensions.proto.ProtoTruth.assertThat; import static com.google.common.util.concurrent.MoreExecutors.directExecutor; -import static java.nio.charset.StandardCharsets.ISO_8859_1; import static org.junit.Assert.assertThrows; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; @@ -1208,7 +1207,7 @@ private void testParamFilesAreMaterializedForFlag(String flag) throws Exception ImmutableList args = ImmutableList.of("--foo", "--bar"); ParamFileActionInput input = new ParamFileActionInput( - PathFragment.create("out/param_file"), args, ParameterFileType.UNQUOTED, ISO_8859_1); + PathFragment.create("out/param_file"), args, ParameterFileType.UNQUOTED); Spawn spawn = new SimpleSpawn( new FakeOwner("foo", "bar", "//dummy:label"), diff --git a/src/test/java/com/google/devtools/build/lib/sandbox/DarwinSandboxedSpawnRunnerTest.java b/src/test/java/com/google/devtools/build/lib/sandbox/DarwinSandboxedSpawnRunnerTest.java index cd6908164d1e9f..f01c488442d9e0 100644 --- a/src/test/java/com/google/devtools/build/lib/sandbox/DarwinSandboxedSpawnRunnerTest.java +++ b/src/test/java/com/google/devtools/build/lib/sandbox/DarwinSandboxedSpawnRunnerTest.java @@ -146,8 +146,7 @@ public void testSupportsParamFiles() throws Exception { new ParamFileActionInput( PathFragment.create("params/param-file"), ImmutableList.of("--foo", "--bar"), - ParameterFileType.UNQUOTED, - StandardCharsets.UTF_8)) + ParameterFileType.UNQUOTED)) .withOutput("out") .build(); FileOutErr fileOutErr = diff --git a/src/test/java/com/google/devtools/build/lib/sandbox/LinuxSandboxedSpawnRunnerTest.java b/src/test/java/com/google/devtools/build/lib/sandbox/LinuxSandboxedSpawnRunnerTest.java index 1dfbf40572d4d0..fe4e7d696612ba 100644 --- a/src/test/java/com/google/devtools/build/lib/sandbox/LinuxSandboxedSpawnRunnerTest.java +++ b/src/test/java/com/google/devtools/build/lib/sandbox/LinuxSandboxedSpawnRunnerTest.java @@ -84,8 +84,7 @@ public void exec_commandWithParamFiles_executesSuccessfully() throws Exception { new ParamFileActionInput( PathFragment.create("params/param-file"), ImmutableList.of("--foo", "--bar"), - ParameterFileType.UNQUOTED, - UTF_8)) + ParameterFileType.UNQUOTED)) .withOutput("out") .build(); SpawnExecutionContext policy = createSpawnExecutionContext(spawn); diff --git a/src/test/java/com/google/devtools/build/lib/sandbox/SandboxHelpersTest.java b/src/test/java/com/google/devtools/build/lib/sandbox/SandboxHelpersTest.java index 889aeb797a48a9..906f3c04b7fb5b 100644 --- a/src/test/java/com/google/devtools/build/lib/sandbox/SandboxHelpersTest.java +++ b/src/test/java/com/google/devtools/build/lib/sandbox/SandboxHelpersTest.java @@ -96,8 +96,7 @@ public void processInputFiles_materializesParamFile() throws Exception { new ParamFileActionInput( PathFragment.create("paramFile"), ImmutableList.of("-a", "-b"), - ParameterFileType.UNQUOTED, - UTF_8); + ParameterFileType.UNQUOTED); SandboxInputs inputs = sandboxHelpers.processInputFiles(inputMap(paramFile), execRoot); @@ -191,8 +190,7 @@ public void atomicallyWriteVirtualInput_writesParamFile() throws Exception { new ParamFileActionInput( PathFragment.create("paramFile"), ImmutableList.of("-a", "-b"), - ParameterFileType.UNQUOTED, - UTF_8); + ParameterFileType.UNQUOTED); paramFile.atomicallyWriteRelativeTo(scratch.resolve("/outputs")); diff --git a/src/test/java/com/google/devtools/build/lib/skyframe/MacOSXFsEventsDiffAwarenessTest.java b/src/test/java/com/google/devtools/build/lib/skyframe/MacOSXFsEventsDiffAwarenessTest.java index 8c65a4f1d76db3..9d23af672ba468 100644 --- a/src/test/java/com/google/devtools/build/lib/skyframe/MacOSXFsEventsDiffAwarenessTest.java +++ b/src/test/java/com/google/devtools/build/lib/skyframe/MacOSXFsEventsDiffAwarenessTest.java @@ -80,7 +80,7 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx @Before public void setUp() throws Exception { watchedPath = com.google.common.io.Files.createTempDir().getCanonicalFile().toPath(); - underTest = new MacOSXFsEventsDiffAwareness(watchedPath.toString()); + underTest = new MacOSXFsEventsDiffAwareness(watchedPath); LocalDiffAwareness.Options localDiffOptions = new LocalDiffAwareness.Options(); localDiffOptions.watchFS = true; watchFsEnabledProvider = FakeOptions.of(localDiffOptions); diff --git a/src/test/java/com/google/devtools/build/lib/testutil/BUILD b/src/test/java/com/google/devtools/build/lib/testutil/BUILD index 5430ed23bc82ac..c2016b873ffb50 100644 --- a/src/test/java/com/google/devtools/build/lib/testutil/BUILD +++ b/src/test/java/com/google/devtools/build/lib/testutil/BUILD @@ -227,6 +227,7 @@ java_library( name = "TestUtils", srcs = ["TestUtils.java"], deps = [ + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//third_party:jsr305", diff --git a/src/test/java/com/google/devtools/build/lib/testutil/TestUtils.java b/src/test/java/com/google/devtools/build/lib/testutil/TestUtils.java index 76a2ff25171322..e41de6bdccfb97 100644 --- a/src/test/java/com/google/devtools/build/lib/testutil/TestUtils.java +++ b/src/test/java/com/google/devtools/build/lib/testutil/TestUtils.java @@ -14,6 +14,7 @@ package com.google.devtools.build.lib.testutil; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.DigestHashFunction; import com.google.devtools.build.lib.vfs.Dirent; import com.google.devtools.build.lib.vfs.FileStatus; @@ -79,7 +80,10 @@ public static Path createUniqueTmpDir(FileSystem fileSystem) throws IOException fileSystem = new JavaIoFileSystem(DigestHashFunction.SHA256); } File tmpDirRoot = tmpDirRoot(); - Path path = fileSystem.getPath(tmpDirRoot.getPath()).getRelative(UUID.randomUUID().toString()); + Path path = + fileSystem + .getPath(StringEncoding.platformToInternal(tmpDirRoot.getPath())) + .getRelative(UUID.randomUUID().toString()); path.createDirectoryAndParents(); return path; } diff --git a/src/test/java/com/google/devtools/build/lib/unix/BUILD b/src/test/java/com/google/devtools/build/lib/unix/BUILD index 3e6ce82249f2e8..afded4782a3e58 100644 --- a/src/test/java/com/google/devtools/build/lib/unix/BUILD +++ b/src/test/java/com/google/devtools/build/lib/unix/BUILD @@ -46,6 +46,14 @@ java_library( java_test( name = "UnixTests", size = "medium", + env = select({ + "@platforms//os:linux": { + # Force the same locale as Bazel's client to test roundtripping of + # arbitrary byte sequences through strings. + "LC_ALL": "en_US.ISO-8859-1", + }, + "//conditions:default": {}, + }), tags = ["no_windows"], test_class = "com.google.devtools.build.lib.AllTests", runtime_deps = [ diff --git a/src/test/java/com/google/devtools/build/lib/unsafe/StringUnsafeTest.java b/src/test/java/com/google/devtools/build/lib/unsafe/StringUnsafeTest.java index 917a586762bf7e..9386b0c975ba03 100644 --- a/src/test/java/com/google/devtools/build/lib/unsafe/StringUnsafeTest.java +++ b/src/test/java/com/google/devtools/build/lib/unsafe/StringUnsafeTest.java @@ -56,4 +56,14 @@ public void testNewInstance() throws Exception { assertThat(stringUnsafe.newInstance(stringUnsafe.getByteArray(s), stringUnsafe.getCoder(s))) .isEqualTo("hello"); } + + @Test + public void testIsAscii() { + StringUnsafe stringUnsafe = StringUnsafe.getInstance(); + assertThat(stringUnsafe.isAscii("")).isTrue(); + assertThat(stringUnsafe.isAscii("hello")).isTrue(); + assertThat(stringUnsafe.isAscii("hällo")).isFalse(); + assertThat(stringUnsafe.isAscii("hållo")).isFalse(); + assertThat(stringUnsafe.isAscii("h👋llo")).isFalse(); + } } diff --git a/src/test/java/com/google/devtools/build/lib/util/BUILD b/src/test/java/com/google/devtools/build/lib/util/BUILD index 36a2e3a0f987bb..72fef2f50818f7 100644 --- a/src/test/java/com/google/devtools/build/lib/util/BUILD +++ b/src/test/java/com/google/devtools/build/lib/util/BUILD @@ -27,6 +27,7 @@ java_test( "DependencySetWindowsTest.java", "ResourceFileLoaderTest.java", "HeapOffsetHelperTest.java", + "StringEncodingTest.java", ], ), tags = [ @@ -172,3 +173,24 @@ java_test( "//third_party:truth", ], ) + +java_test( + name = "StringEncodingTest", + srcs = ["StringEncodingTest.java"], + env = select({ + "@platforms//os:linux": { + # Force the same locale as Bazel's client to test roundtripping of + # arbitrary byte sequences through strings. + "LC_ALL": "en_US.ISO-8859-1", + }, + "//conditions:default": {}, + }), + deps = [ + "//src/main/java/com/google/devtools/build/lib/unsafe:string", + "//src/main/java/com/google/devtools/build/lib/util:os", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", + "//third_party:junit4", + "//third_party:truth", + "@maven//:com_google_testparameterinjector_test_parameter_injector", + ], +) diff --git a/src/test/java/com/google/devtools/build/lib/util/StringEncodingTest.java b/src/test/java/com/google/devtools/build/lib/util/StringEncodingTest.java new file mode 100644 index 00000000000000..ae5cb8a280dd2c --- /dev/null +++ b/src/test/java/com/google/devtools/build/lib/util/StringEncodingTest.java @@ -0,0 +1,198 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.util; + +import static com.google.common.truth.Truth.assertThat; +import static com.google.common.truth.TruthJUnit.assume; +import static com.google.devtools.build.lib.util.StringEncoding.internalToPlatform; +import static com.google.devtools.build.lib.util.StringEncoding.internalToUnicode; +import static com.google.devtools.build.lib.util.StringEncoding.platformToInternal; +import static com.google.devtools.build.lib.util.StringEncoding.unicodeToInternal; +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; + +import com.google.devtools.build.lib.unsafe.StringUnsafe; +import com.google.testing.junit.testparameterinjector.TestParameter; +import com.google.testing.junit.testparameterinjector.TestParameterInjector; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import org.junit.Test; +import org.junit.runner.RunWith; + +@RunWith(TestParameterInjector.class) +public class StringEncodingTest { + + public static final Charset SUN_JNU_ENCODING = + Charset.forName(System.getProperty("sun.jnu.encoding")); + + @Test + public void testUnicodeToInternal() { + assertThat(unicodeToInternal("")).isSameInstanceAs(""); + assertThat(unicodeToInternal("hello")).isSameInstanceAs("hello"); + assertThat(unicodeToInternal("hällo")) + .isEqualTo(new String("hällo".getBytes(UTF_8), ISO_8859_1)); + assertThat(unicodeToInternal("hållo")) + .isEqualTo(new String("hållo".getBytes(UTF_8), ISO_8859_1)); + assertThat(unicodeToInternal("h👋llo")) + .isEqualTo(new String("h👋llo".getBytes(UTF_8), ISO_8859_1)); + } + + @Test + public void testInternalToUnicode() { + assertThat(internalToUnicode("")).isSameInstanceAs(""); + assertThat(internalToUnicode("hello")).isSameInstanceAs("hello"); + assertThat(internalToUnicode(new String("hällo".getBytes(UTF_8), ISO_8859_1))) + .isEqualTo("hällo"); + assertThat(internalToUnicode(new String("hållo".getBytes(UTF_8), ISO_8859_1))) + .isEqualTo("hållo"); + assertThat(internalToUnicode(new String("h👋llo".getBytes(UTF_8), ISO_8859_1))) + .isEqualTo("h👋llo"); + } + + @Test + public void testPlatformToInternal() { + if (SUN_JNU_ENCODING.equals(ISO_8859_1) && OS.getCurrent() == OS.LINUX) { + assertThat(platformToInternal("")).isSameInstanceAs(""); + assertThat(platformToInternal("hello")).isSameInstanceAs("hello"); + { + String s = new String("hällo".getBytes(UTF_8), ISO_8859_1); + assertThat(platformToInternal(s)).isSameInstanceAs(s); + } + { + String s = new String("hållo".getBytes(UTF_8), ISO_8859_1); + assertThat(platformToInternal(s)).isSameInstanceAs(s); + } + { + String s = new String("h👋llo".getBytes(UTF_8), ISO_8859_1); + assertThat(platformToInternal(s)).isSameInstanceAs(s); + } + { + // Not valid Unicode. + String s = new String(new byte[] {(byte) 0xFF, (byte) 0xFE, 0X01}, ISO_8859_1); + assertThat(platformToInternal(s)).isSameInstanceAs(s); + } + } else { + assertThat(platformToInternal("")).isSameInstanceAs(""); + assertThat(platformToInternal("hello")).isSameInstanceAs("hello"); + assertThat(platformToInternal("hällo")) + .isEqualTo(new String("hällo".getBytes(UTF_8), ISO_8859_1)); + assertThat(platformToInternal("hållo")) + .isEqualTo(new String("hållo".getBytes(UTF_8), ISO_8859_1)); + assertThat(platformToInternal("h👋llo")) + .isEqualTo(new String("h👋llo".getBytes(UTF_8), ISO_8859_1)); + } + } + + @Test + public void testInternalToPlatform() { + if (SUN_JNU_ENCODING.equals(ISO_8859_1) && OS.getCurrent() == OS.LINUX) { + assertThat(internalToPlatform("")).isSameInstanceAs(""); + assertThat(internalToPlatform("hello")).isSameInstanceAs("hello"); + { + String s = new String("hällo".getBytes(UTF_8), ISO_8859_1); + assertThat(internalToPlatform(s)).isSameInstanceAs(s); + } + { + String s = new String("hållo".getBytes(UTF_8), ISO_8859_1); + assertThat(internalToPlatform(s)).isSameInstanceAs(s); + } + { + String s = new String("h👋llo".getBytes(UTF_8), ISO_8859_1); + assertThat(internalToPlatform(s)).isSameInstanceAs(s); + } + { + // Not valid Unicode. + String s = new String(new byte[] {(byte) 0xFF, (byte) 0xFE, 0X01}, ISO_8859_1); + assertThat(internalToPlatform(s)).isSameInstanceAs(s); + } + } else { + assertThat(internalToPlatform("")).isSameInstanceAs(""); + assertThat(internalToPlatform("hello")).isSameInstanceAs("hello"); + assertThat(internalToPlatform(new String("hällo".getBytes(UTF_8), ISO_8859_1))) + .isEqualTo("hällo"); + assertThat(internalToPlatform(new String("hållo".getBytes(UTF_8), ISO_8859_1))) + .isEqualTo("hållo"); + assertThat(internalToPlatform(new String("h👋llo".getBytes(UTF_8), ISO_8859_1))) + .isEqualTo("h👋llo"); + } + } + + @Test + public void testPlatformToInternal_roundtrip( + @TestParameter({"ascii", "äöüÄÖÜß", "🌱", "羅勒罗勒学名"}) String s) { + assume().that(canEncode(s, SUN_JNU_ENCODING)).isTrue(); + + String internal = platformToInternal(s); + // In the internal encoding, raw bytes are encoded as Latin-1. + assertThat(StringUnsafe.getInstance().getCoder(internal)).isEqualTo(StringUnsafe.LATIN1); + String roundtripped = internalToPlatform(internal); + if (StringUnsafe.getInstance().isAscii(s)) { + assertThat(roundtripped).isSameInstanceAs(s); + } else { + assertThat(roundtripped).isEqualTo(s); + } + } + + @Test + public void testPlatformToInternal_rawBytesRoundtrip() { + // Not valid UTF-8 + byte[] rawBytes = new byte[] {0x00, 0x7F, (byte) 0x80, (byte) 0xFE, (byte) 0xFF}; + assertThat(canDecode(rawBytes, UTF_8)).isFalse(); + + // Roundtripping raw bytes through the internal encoding requires Linux and a Latin-1 locale. + assume().that(OS.getCurrent()).isEqualTo(OS.LINUX); + assume().that(SUN_JNU_ENCODING).isEqualTo(ISO_8859_1); + + String platform = new String(rawBytes, ISO_8859_1); + String internal = platformToInternal(platform); + assertThat(internal).isSameInstanceAs(platform); + String roundtripped = internalToPlatform(internal); + assertThat(roundtripped).isSameInstanceAs(internal); + } + + @Test + public void testUnicodeToInternal_roundtrip( + @TestParameter({"ascii", "äöüÄÖÜß", "🌱", "羅勒罗勒学名"}) String s) { + String internal = unicodeToInternal(s); + // In the internal encoding, raw bytes are encoded as Latin-1. + assertThat(StringUnsafe.getInstance().getCoder(internal)).isEqualTo(StringUnsafe.LATIN1); + String roundtripped = internalToUnicode(internal); + if (StringUnsafe.getInstance().isAscii(s)) { + assertThat(roundtripped).isSameInstanceAs(s); + } else { + assertThat(roundtripped).isEqualTo(s); + } + } + + private static boolean canEncode(String s, Charset charset) { + try { + charset.newEncoder().encode(CharBuffer.wrap(s)); + return true; + } catch (CharacterCodingException e) { + return false; + } + } + + private static boolean canDecode(byte[] bytes, Charset charset) { + try { + charset.newDecoder().decode(ByteBuffer.wrap(bytes)); + return true; + } catch (CharacterCodingException e) { + return false; + } + } +} diff --git a/src/test/java/com/google/devtools/build/lib/util/StringUtilTest.java b/src/test/java/com/google/devtools/build/lib/util/StringUtilTest.java index 0145a80df3b42e..c5dcf7c922fcc9 100644 --- a/src/test/java/com/google/devtools/build/lib/util/StringUtilTest.java +++ b/src/test/java/com/google/devtools/build/lib/util/StringUtilTest.java @@ -16,8 +16,6 @@ import static com.google.common.truth.Truth.assertThat; import static com.google.devtools.build.lib.util.StringUtil.joinEnglishList; import static com.google.devtools.build.lib.util.StringUtil.joinEnglishListSingleQuoted; -import static com.google.devtools.build.lib.util.StringUtil.reencodeExternalToInternal; -import static com.google.devtools.build.lib.util.StringUtil.reencodeInternalToExternal; import com.google.common.collect.ImmutableList; import java.util.Arrays; @@ -71,43 +69,4 @@ public void listItemsWithLimit() throws Exception { .toString()) .isEqualTo("begin/a, b, c ...(omitting 2 more item(s))/end"); } - - @Test - @SuppressWarnings("UnicodeEscape") - public void testReencodeInternalToExternal() throws Exception { - // Regular ASCII passes through OK. - assertThat(reencodeInternalToExternal("ascii")).isEqualTo("ascii"); - - // Valid UTF-8 is decoded. - assertThat(reencodeInternalToExternal("\u00E2\u0081\u0089")) - .isEqualTo("\u2049"); // U+2049 EXCLAMATION QUESTION MARK - assertThat(reencodeInternalToExternal("\u00f0\u009f\u008c\u00b1")) - .isEqualTo("\uD83C\uDF31"); // U+1F331 SEEDLING - - // Strings that contain characters that can't be UTF-8 are returned as-is, - // to support Windows file paths. - assertThat(reencodeInternalToExternal("\u2049")) - .isEqualTo("\u2049"); // U+2049 EXCLAMATION QUESTION MARK - assertThat(reencodeInternalToExternal("\uD83C\uDF31")) - .isEqualTo("\uD83C\uDF31"); // U+1F331 SEEDLING - - // Strings that might be either UTF-8 or Unicode are optimistically decoded, - // and returned as-is if decoding succeeded with no replacement characters. - assertThat(reencodeInternalToExternal("\u00FC\u006E\u00EF\u0063\u00F6\u0064\u00EB")) - .isEqualTo("\u00FC\u006E\u00EF\u0063\u00F6\u0064\u00EB"); // "ünïcödë" - - // A string that is both valid ISO-8859-1 and valid UTF-8 will be decoded - // as UTF-8. - assertThat(reencodeInternalToExternal("\u00C2\u00A3")) // "£" - .isEqualTo("\u00A3"); // U+00A3 POUND SIGN - } - - @Test - public void testEncodeBytestringUtf8() throws Exception { - assertThat(reencodeExternalToInternal("ascii")).isEqualTo("ascii"); - assertThat(reencodeExternalToInternal("\u2049")) // U+2049 EXCLAMATION QUESTION MARK - .isEqualTo("\u00E2\u0081\u0089"); - assertThat(reencodeExternalToInternal("\uD83C\uDF31")) // U+1F331 SEEDLING - .isEqualTo("\u00f0\u009f\u008c\u00b1"); - } } diff --git a/src/test/java/com/google/devtools/build/lib/vfs/BUILD b/src/test/java/com/google/devtools/build/lib/vfs/BUILD index 16b7572f521e5b..9929f0f7c067a0 100644 --- a/src/test/java/com/google/devtools/build/lib/vfs/BUILD +++ b/src/test/java/com/google/devtools/build/lib/vfs/BUILD @@ -35,6 +35,7 @@ java_library( deps = [ ":testutil", "//src/main/java/com/google/devtools/build/lib/util:os", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//third_party:junit4", @@ -84,8 +85,8 @@ java_library( name = "FileSystemTest_lib", srcs = ["FileSystemTest.java"], deps = [ - "//src/main/java/com/google/devtools/build/lib/unix", "//src/main/java/com/google/devtools/build/lib/util", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//src/test/java/com/google/devtools/build/lib/testutil:TestUtils", @@ -143,8 +144,8 @@ java_library( testonly = 1, srcs = ["FileSystemTest.java"], deps = [ - "//src/main/java/com/google/devtools/build/lib/unix", "//src/main/java/com/google/devtools/build/lib/util", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//src/test/java/com/google/devtools/build/lib/testutil:TestUtils", diff --git a/src/test/java/com/google/devtools/build/lib/vfs/FileSystemTest.java b/src/test/java/com/google/devtools/build/lib/vfs/FileSystemTest.java index 55575cb4e8ddef..19fbe04cb388ee 100644 --- a/src/test/java/com/google/devtools/build/lib/vfs/FileSystemTest.java +++ b/src/test/java/com/google/devtools/build/lib/vfs/FileSystemTest.java @@ -16,6 +16,7 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.truth.Truth.assertThat; +import static com.google.common.truth.TruthJUnit.assume; import static java.lang.Math.min; import static java.nio.charset.StandardCharsets.ISO_8859_1; import static java.nio.charset.StandardCharsets.UTF_8; @@ -25,11 +26,13 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import com.google.common.collect.Sets; import com.google.common.io.BaseEncoding; import com.google.devtools.build.lib.testutil.TestUtils; -import com.google.devtools.build.lib.unix.FileStatus; -import com.google.devtools.build.lib.unix.NativePosixFiles; import com.google.devtools.build.lib.util.Fingerprint; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.testing.junit.testparameterinjector.TestParameter; import com.google.testing.junit.testparameterinjector.TestParameterInjector; import com.google.testing.junit.testparameterinjector.TestParameterValuesProvider; @@ -42,7 +45,10 @@ import java.nio.channels.ReadableByteChannel; import java.nio.channels.SeekableByteChannel; import java.nio.channels.WritableByteChannel; +import java.nio.charset.Charset; import java.nio.file.FileAlreadyExistsException; +import java.nio.file.Files; +import java.nio.file.attribute.PosixFilePermission; import java.time.Instant; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; @@ -125,10 +131,6 @@ protected void destroyFileSystem(FileSystem fileSystem) throws IOException { protected abstract FileSystem getFreshFileSystem(DigestHashFunction digestHashFunction) throws IOException; - protected boolean isSymbolicLink(File file) throws IOException { - return NativePosixFiles.lstat(file.getPath()).isSymbolicLink(); - } - private static final Pattern STAT_SUBDIR_ERROR = Pattern.compile("(.*) \\(Not a directory\\)"); // Test that file is not present, using statIfFound. Base implementation throws an exception, but @@ -154,52 +156,45 @@ protected void expectNotFound(Path path) throws IOException { protected void cleanUpWorkingDirectory(Path workingPath) throws IOException { if (workingPath.exists()) { - removeEntireDirectory(workingPath.getPathFile()); // uses java.io.File! + removeEntireDirectory(workingPath.getPathFile().toPath()); // uses java.nio.file.Path! } workingPath.createDirectoryAndParents(); } /** - * This function removes an entire directory and all of its contents. - * Much like rm -rf directoryToRemove + * This function removes an entire directory and all of its contents. Much like rm -rf + * directoryToRemove + * + *

This method explicitly only uses Java APIs to interact with files to prevent any issues with + * Bazel's own file systems from leaking from one test to another. */ - protected void removeEntireDirectory(File directoryToRemove) - throws IOException { + protected void removeEntireDirectory(java.nio.file.Path directoryToRemove) throws IOException { // make sure that we do not remove anything outside the test directory Path testDirPath = testFS.getPath(getTestTmpDir()); - if (!testFS.getPath(directoryToRemove.getAbsolutePath()).startsWith(testDirPath)) { + if (!testFS.getPath(directoryToRemove.toAbsolutePath().toString()).startsWith(testDirPath)) { throw new IOException("trying to remove files outside of the testdata directory"); } // Some tests set the directories read-only and/or non-executable, so // override that: - NativePosixFiles.chmod( - directoryToRemove.getPath(), - NativePosixFiles.lstat(directoryToRemove.getPath()).getPermissions() - | FileStatus.S_IWUSR - | FileStatus.S_IXUSR); - - File[] files = directoryToRemove.listFiles(); - if (files != null) { - for (File currentFile : files) { - boolean isSymbolicLink = isSymbolicLink(currentFile); - if (!isSymbolicLink && currentFile.isDirectory()) { - removeEntireDirectory(currentFile); - } else { - if (!isSymbolicLink) { - NativePosixFiles.chmod( - currentFile.getPath(), - NativePosixFiles.lstat(currentFile.getPath()).getPermissions() - | FileStatus.S_IWUSR); - } - if (!currentFile.delete()) { - throw new IOException("Failed to delete '" + currentFile + "'"); - } - } - } + Files.setPosixFilePermissions( + directoryToRemove, + Sets.union( + Files.getPosixFilePermissions(directoryToRemove), + ImmutableSet.of(PosixFilePermission.OWNER_WRITE, PosixFilePermission.OWNER_EXECUTE))); + + java.nio.file.Path[] entries; + try (var entriesStream = Files.list(directoryToRemove)) { + entries = entriesStream.toArray(java.nio.file.Path[]::new); } - if (!directoryToRemove.delete()) { - throw new IOException("Failed to delete '" + directoryToRemove + "'"); + for (var entry : entries) { + boolean isSymbolicLink = Files.isSymbolicLink(entry); + if (!isSymbolicLink && Files.isDirectory(entry)) { + removeEntireDirectory(entry); + } else { + Files.delete(entry); + } } + Files.delete(directoryToRemove); } /** Recursively make directories readable/executable and files readable. */ @@ -1979,4 +1974,86 @@ protected boolean isHardLinked(Path a, Path b) throws IOException { return testFS.stat(a.asFragment(), false).getNodeId() == testFS.stat(b.asFragment(), false).getNodeId(); } + + @Test + public void testGetNioPath_basic() { + java.nio.file.Path javaPath = getJavaPathOrSkipIfUnsupported(xFile); + assertThat(Files.isRegularFile(javaPath)).isTrue(); + } + + @Test + public void testGetNioPath_externalUtf8() throws IOException { + assumeUtf8CompatibleEncoding(); + + // Simulates a Starlark string constant, which is read from a presumably UTF-8 encoded source + // file into Bazel's internal representation. + Path utf8File = absolutize(StringEncoding.unicodeToInternal("some_dir/入力_A_🌱.txt")); + utf8File.getParentDirectory().createDirectoryAndParents(); + FileSystemUtils.writeContent(utf8File, UTF_8, "hello 入力_A_🌱"); + + java.nio.file.Path javaPath = getJavaPathOrSkipIfUnsupported(utf8File); + assertThat(Files.isRegularFile(javaPath)).isTrue(); + assertThat(Files.readString(javaPath)).isEqualTo("hello 入力_A_🌱"); + + // Ensure that the view of the file as a directory entry is consistent with how it was created. + assertThat(utf8File.getParentDirectory().getDirectoryEntries()).containsExactly(utf8File); + } + + @Test + public void testGetNioPath_internalUtf8() throws IOException { + assumeUtf8CompatibleEncoding(); + + Path dirPath = absolutize("some_dir"); + dirPath.createDirectoryAndParents(); + + // Create a file through Java APIs. + java.nio.file.Path javaDirPath = getJavaPathOrSkipIfUnsupported(dirPath); + Files.writeString(javaDirPath.resolve(unicodeToPlatform("入力_A_🌱.txt")), "hello 入力_A_🌱"); + + // Retrieve its path through the filesystem API. + var entries = dirPath.getDirectoryEntries(); + assertThat(entries).hasSize(1); + var filePath = Iterables.getOnlyElement(entries); + assertThat(filePath.exists()).isTrue(); + + // Verify the file content through the Java APIs. + var javaFilePath = getJavaPathOrSkipIfUnsupported(filePath); + assertThat(Files.isRegularFile(javaFilePath)).isTrue(); + assertThat(Files.readString(javaFilePath)).isEqualTo("hello 入力_A_🌱"); + } + + protected java.nio.file.Path getJavaPathOrSkipIfUnsupported(Path path) { + java.nio.file.Path javaPath = null; + try { + javaPath = testFS.getNioPath(path.asFragment()); + } catch (UnsupportedOperationException e) { + // Intentionally ignored. + } + + File javaFile = null; + try { + javaFile = testFS.getIoFile(path.asFragment()); + } catch (UnsupportedOperationException e) { + // Intentionally ignored. + } + + assertThat(javaPath == null).isEqualTo(javaFile == null); + assumeTrue(javaPath != null && javaFile != null); + assertThat(javaFile.toPath()).isEqualTo(javaPath); + + return javaPath; + } + + protected static String unicodeToPlatform(String s) { + return StringEncoding.internalToPlatform(StringEncoding.unicodeToInternal(s)); + } + + protected static String platformToUnicode(String s) { + return StringEncoding.internalToUnicode(StringEncoding.platformToInternal(s)); + } + + protected static void assumeUtf8CompatibleEncoding() { + Charset sunJnuEncoding = Charset.forName(System.getProperty("sun.jnu.encoding")); + assume().that(ImmutableList.of(UTF_8, ISO_8859_1)).contains(sunJnuEncoding); + } } diff --git a/src/test/java/com/google/devtools/build/lib/vfs/SymlinkAwareFileSystemTest.java b/src/test/java/com/google/devtools/build/lib/vfs/SymlinkAwareFileSystemTest.java index 9f22a4e7cb95bc..f6cf370ce8e721 100644 --- a/src/test/java/com/google/devtools/build/lib/vfs/SymlinkAwareFileSystemTest.java +++ b/src/test/java/com/google/devtools/build/lib/vfs/SymlinkAwareFileSystemTest.java @@ -17,9 +17,11 @@ import static org.junit.Assert.assertThrows; import com.google.devtools.build.lib.util.OS; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.FileSystem.NotASymlinkException; import java.io.FileNotFoundException; import java.io.IOException; +import java.nio.file.Files; import java.util.Collection; import org.junit.Before; import org.junit.Test; @@ -665,4 +667,18 @@ public void testCreatingFileInLinkedDirectory() throws Exception { byte[] inputData = FileSystemUtils.readContent(child); assertThat(inputData).isEqualTo(outputData); } + + @Test + public void testUtf8Symlink() throws Exception { + assumeUtf8CompatibleEncoding(); + + String target = StringEncoding.unicodeToInternal("入力_A_🌱.target"); + Path link = absolutize(StringEncoding.unicodeToInternal("入力_A_🌱.txt")); + createSymbolicLink(link, PathFragment.create(target)); + assertThat(link.readSymbolicLink().toString()).isEqualTo(target); + + java.nio.file.Path javaPath = getJavaPathOrSkipIfUnsupported(link); + assertThat(platformToUnicode(Files.readSymbolicLink(javaPath).toString())) + .isEqualTo("入力_A_🌱.target"); + } } diff --git a/src/test/shell/bazel/bazel_determinism_test.sh b/src/test/shell/bazel/bazel_determinism_test.sh index 373eb047334b3a..beea7c3b71a999 100755 --- a/src/test/shell/bazel/bazel_determinism_test.sh +++ b/src/test/shell/bazel/bazel_determinism_test.sh @@ -61,8 +61,9 @@ function hash_outputs() { } function test_determinism() { - # Verify that Bazel can build itself under a path with spaces. - local workdir="${TEST_TMPDIR}/work dir" + # Verify that Bazel can build itself under a path with spaces and non-ASCII + # characters. + local workdir="${TEST_TMPDIR}/wo🌱rk dir" mkdir "${workdir}" || fail "Could not create work directory" cd "${workdir}" || fail "Could not change to work directory" unzip -q "${DISTFILE}" diff --git a/src/test/shell/integration/BUILD b/src/test/shell/integration/BUILD index 4342824b8c4e6a..a504ed5963d3ec 100644 --- a/src/test/shell/integration/BUILD +++ b/src/test/shell/integration/BUILD @@ -882,7 +882,10 @@ sh_test( ":test-deps", "@bazel_tools//tools/bash/runfiles", ], - tags = ["no_windows"], + tags = [ + "exclusive", + "no_windows", + ], ) sh_test( diff --git a/src/test/shell/integration/run_test.sh b/src/test/shell/integration/run_test.sh index 95d8353cc9db8b..3e8fbfddc68f8c 100755 --- a/src/test/shell/integration/run_test.sh +++ b/src/test/shell/integration/run_test.sh @@ -53,7 +53,13 @@ msys*|mingw*|cygwin*) ;; esac -add_to_bazelrc "test --notest_loasd" +if $is_windows; then + export LC_ALL=C.utf8 +elif [[ "$(uname -s)" == "Linux" ]]; then + export LC_ALL=C.UTF-8 +else + export LC_ALL=en_US.UTF-8 +fi #### HELPER FUNCTIONS ################################################## @@ -195,35 +201,124 @@ function test_script_file_generation { } function test_consistent_command_line_encoding { - # todo(aehlig): reenable: https://github.com/bazelbuild/bazel/issues/1775 - return 0 - - # TODO(bazel-team): fix bazel to have consistent encoding, also on darwin; - # see https://github.com/bazelbuild/bazel/issues/1766 - [ "$PLATFORM" != "darwin" ] || warn "test disabled on darwin, see Github issue 1766" - [ "$PLATFORM" != "darwin" ] || return 0 - - # äöüÄÖÜß in UTF8 - local arg=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F') + if "$is_windows"; then + # The JVM sets sun.jnu.encoding, which is used to encode command-line + # arguments to java.exe, based on the return value of GetACP() on Windows. + # On Windows with an English locale, GetACP() returns 1252, which is a + # variant of ISO 8859-1 that can represent the characters below, but not + # the full Unicode range. + # TODO: Fix this by patching the fusion manifest of the embedded java.exe to + # force GetACP() to return 65001 (UTF-8). + # äöüÄÖÜß in UTF8 + local arg=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F') + else + # äöüÄÖÜß🌱 in UTF8 + local arg=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F\xF0\x9F\x8C\xB1') + fi mkdir -p foo || fail "mkdir foo failed" echo 'sh_binary(name = "foo", srcs = ["foo.sh"])' > foo/BUILD echo 'sh_test(name = "foo_test", srcs = ["foo.sh"])' >> foo/BUILD - echo 'test "$1" = "'"$arg"'"' > foo/foo.sh + cat > foo/foo.sh < output \ + bazel run //foo -- "$arg" > $TEST_log 2>&1 \ || fail "${PRODUCT_NAME} run failed." - bazel test //foo:foo_test --test_arg="$arg" \ + bazel test //foo:foo_test --test_arg="$arg" --test_output=errors \ || fail "${PRODUCT_NAME} test failed" - bazel --batch run //foo -- "$arg" > output \ + bazel --batch run //foo -- "$arg" > $TEST_log 2>&1 \ || fail "${PRODUCT_NAME} run failed (--batch)." - bazel --batch test //foo:foo_test --test_arg="$arg" \ + + bazel --batch test //foo:foo_test --test_arg="$arg" --test_output=errors \ || fail "${PRODUCT_NAME} test failed (--batch)" } +function test_consistent_env_var_encoding { + if "$is_windows"; then + # äöüÄÖÜß in UTF8 + local env=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F') + else + # äöüÄÖÜß🌱 in UTF8 + local env=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F\xF0\x9F\x8C\xB1') + fi + + mkdir -p foo || fail "mkdir foo failed" + cat > foo/BUILD < foo/foo_test.sh < $TEST_log 2>&1 \ + || fail "${PRODUCT_NAME} run failed." + + env INHERITED_KEY="inherited_value_$env" \ + bazel test //foo:foo_test --test_output=errors \ + || fail "${PRODUCT_NAME} test failed" + + env INHERITED_KEY="inherited_value_$env" \ + bazel --batch test //foo:foo_test --test_output=errors \ + || fail "${PRODUCT_NAME} test failed (--batch)" +} + +function test_consistent_working_directory_encoding { + if "$is_windows"; then + # äöüÄÖÜß in UTF8 + local unicode_string=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F') + else + # äöüÄÖÜß🌱 in UTF8 + local unicode_string=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F\xF0\x9F\x8C\xB1') + fi + + mkdir -p foo || fail "mkdir foo failed" + cat > foo/BUILD < foo/foo.sh < $TEST_log 2>&1 \ + || fail "${PRODUCT_NAME} run failed." + + bazel --batch run //foo \ + || fail "${PRODUCT_NAME} run failed (--batch)." +} + # Tests bazel run with --color=no on a failed build does not produce color. function test_no_color_on_failed_run() { mkdir -p x || fail "mkdir failed" diff --git a/src/test/shell/integration/unicode_test.sh b/src/test/shell/integration/unicode_test.sh index d4108c86ac6827..57853db9cbaf72 100755 --- a/src/test/shell/integration/unicode_test.sh +++ b/src/test/shell/integration/unicode_test.sh @@ -93,4 +93,4 @@ function test_unicode_action_run_param_file { || fail "Output not as expected" } -run_suite "Integration tests for ${PRODUCT_NAME}'s unicode i/o in actions" \ No newline at end of file +run_suite "Integration tests for ${PRODUCT_NAME}'s unicode i/o in actions" diff --git a/src/test/shell/integration/watchfs_test.sh b/src/test/shell/integration/watchfs_test.sh index db261d9a6e92c4..b173e5588a1fb9 100755 --- a/src/test/shell/integration/watchfs_test.sh +++ b/src/test/shell/integration/watchfs_test.sh @@ -41,6 +41,23 @@ source "$(rlocation "io_bazel/src/test/shell/integration_test_setup.sh")" \ cd "$TEST_TMPDIR" +case "$(uname -s | tr [:upper:] [:lower:])" in +msys*|mingw*|cygwin*) + declare -r is_windows=true + ;; +*) + declare -r is_windows=false + ;; +esac + +if $is_windows; then + export LC_ALL=C.utf8 +elif [[ "$(uname -s)" == "Linux" ]]; then + export LC_ALL=C.UTF-8 +else + export LC_ALL=en_US.UTF-8 +fi + function set_up() { cd ${WORKSPACE_DIR} } @@ -99,4 +116,44 @@ function test_both() { expect_not_log "VFS stat.*${pkg}/whocares.in" } +function test_special_chars() { + bazel info + mkdir -p empty || fail "mkdir empty" + touch empty/BUILD + bazel build --watchfs //empty/... &> "$TEST_log" || fail "Expected success." + expect_not_log "Hello, Unicode!" + + mkdir pkg || fail "mkdir pkg" + cat > pkg/BUILD << 'EOF' +print("Hello, Unicode!") + +genrule( + name = "foo", + srcs = ["foo 🌱.in"], + outs = ["foo.out"], + output_to_bindir = True, + cmd = "cp '$<' $@", +) +EOF + cat > 'pkg/foo 🌱.in' << 'EOF' +foo +EOF + + sleep 5 + bazel build --watchfs //pkg/... &> "$TEST_log" || fail "Expected success." + expect_not_log "WARNING:.*falling back to manually" + expect_log "Hello, Unicode!" + assert_contains "foo" "${PRODUCT_NAME}-bin/pkg/foo.out" + + cat > 'pkg/foo 🌱.in' << 'EOF' +bar +EOF + + sleep 5 + bazel build --watchfs //pkg/... &> "$TEST_log" || fail "Expected success." + expect_not_log "WARNING:.*falling back to manually" + expect_not_log "Hello, Unicode!" + assert_contains "bar" "${PRODUCT_NAME}-bin/pkg/foo.out" +} + run_suite "Integration tests for --watchfs." diff --git a/src/tools/remote/BUILD b/src/tools/remote/BUILD index 222fc3c473dcab..7fa6ad76dc161f 100644 --- a/src/tools/remote/BUILD +++ b/src/tools/remote/BUILD @@ -17,6 +17,10 @@ filegroup( java_binary( name = "worker", + jvm_flags = [ + # Disables functionality in StringUtil that relies on Bazel's own internal string representation. + "-Dbazel.internal.UnicodeStrings=true", + ], main_class = "com.google.devtools.build.remote.worker.RemoteWorker", visibility = ["//visibility:public"], runtime_deps = ["//src/tools/remote/src/main/java/com/google/devtools/build/remote/worker"],