Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix most Unicode encoding bugs #24010

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/main/cpp/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,12 @@ cc_binary(

cc_library(
name = "option_processor",
srcs = ["option_processor.cc"],
srcs = [
"option_processor.cc",
] + select({
"//src/conditions:windows": ["option_processor_windows.cc"],
"//conditions:default": ["option_processor_unix.cc"],
}),
hdrs = [
"option_processor.h",
"option_processor-internal.h",
Expand Down Expand Up @@ -179,6 +184,7 @@ cc_library(
"//src/main/cpp/util",
"//src/main/cpp/util:blaze_exit_code",
"//src/main/cpp/util:logging",
"@abseil-cpp//absl/strings",
],
)

Expand Down
4 changes: 3 additions & 1 deletion src/main/cpp/blaze.cc
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,9 @@ static vector<string> GetServerExeArgs(const blaze_util::Path &jvm_path,
}
result.push_back(java_library_path.str());

// Force use of latin1 for file names.
// TODO: Investigate whether this still has any effect. File name encoding is
// governed by sun.jnu.encoding in JDKs with JEP 400, which can't be
// influenced by setting a property.
result.push_back("-Dfile.encoding=ISO-8859-1");
// Force into the root locale to ensure consistent behavior of string
// operations across machines (e.g. in the tr_TR locale, capital ASCII 'I'
Expand Down
18 changes: 11 additions & 7 deletions src/main/cpp/blaze_util_windows.cc
Original file line number Diff line number Diff line change
Expand Up @@ -937,14 +937,15 @@ void CreateSecureOutputRoot(const blaze_util::Path& path) {
}

string GetEnv(const string& name) {
DWORD size = ::GetEnvironmentVariableA(name.c_str(), nullptr, 0);
std::wstring wname = blaze_util::CstringToWstring(name);
DWORD size = ::GetEnvironmentVariableW(wname.c_str(), nullptr, 0);
if (size == 0) {
return string(); // unset or empty envvar
}

unique_ptr<char[]> value(new char[size]);
::GetEnvironmentVariableA(name.c_str(), value.get(), size);
return string(value.get());
unique_ptr<WCHAR[]> value(new WCHAR[size]);
::GetEnvironmentVariableW(wname.c_str(), value.get(), size);
return blaze_util::WstringToCstring(value.get());
}

string GetPathEnv(const string& name) {
Expand All @@ -970,11 +971,14 @@ bool ExistsEnv(const string& name) {
}

void SetEnv(const string& name, const string& value) {
// _putenv_s both calls ::SetEnvionmentVariableA and updates environ(5).
_putenv_s(name.c_str(), value.c_str());
::SetEnvironmentVariableW(blaze_util::CstringToWstring(name).c_str(),
blaze_util::CstringToWstring(value).c_str());
}

void UnsetEnv(const string& name) { SetEnv(name, ""); }
void UnsetEnv(const string& name) {
::SetEnvironmentVariableW(blaze_util::CstringToWstring(name).c_str(),
nullptr);
}

bool WarnIfStartedFromDesktop() {
// GetConsoleProcessList returns:
Expand Down
23 changes: 22 additions & 1 deletion src/main/cpp/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@
#include "src/main/cpp/option_processor.h"
#include "src/main/cpp/startup_options.h"
#include "src/main/cpp/workspace_layout.h"
#include "src/main/cpp/util/strings.h"

int main(int argc, char **argv) {
int main_impl(int argc, char **argv) {
uint64_t start_time = blaze::GetMillisecondsMonotonic();
std::unique_ptr<blaze::WorkspaceLayout> workspace_layout(
new blaze::WorkspaceLayout());
Expand All @@ -32,3 +33,23 @@ int main(int argc, char **argv) {
std::move(startup_options)),
start_time);
}

#ifdef _WIN32
// Define wmain to support Unicode command line arguments on Windows
// regardless of the current code page.
int wmain(int argc, wchar_t **argv) {
std::vector<std::string> args;
for (int i = 0; i < argc; ++i) {
args.push_back(blaze_util::WstringToCstring(argv[i]));
fmeum marked this conversation as resolved.
Show resolved Hide resolved
}
std::vector<char *> c_args;
for (const std::string &arg : args) {
c_args.push_back(const_cast<char *>(arg.c_str()));
}
c_args.push_back(nullptr);
// Account for the null terminator.
return main_impl(c_args.size() - 1, c_args.data());
}
#else
int main(int argc, char **argv) { return main_impl(argc, argv); }
#endif
2 changes: 2 additions & 0 deletions src/main/cpp/option_processor-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ std::string FindRcAlongsideBinary(const std::string& cwd,

blaze_exit_code::ExitCode ParseErrorToExitCode(RcFile::ParseError parse_error);

std::vector<std::string> GetProcessedEnv();

} // namespace internal
} // namespace blaze

Expand Down
72 changes: 6 additions & 66 deletions src/main/cpp/option_processor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@
extern char **environ;
#endif

#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif

namespace blaze {

using std::map;
Expand All @@ -48,7 +53,6 @@ using std::vector;

constexpr char WorkspaceLayout::WorkspacePrefix[];
static constexpr const char* kRcBasename = ".bazelrc";
static std::vector<std::string> GetProcessedEnv();

OptionProcessor::OptionProcessor(
const WorkspaceLayout* workspace_layout,
Expand Down Expand Up @@ -513,7 +517,7 @@ blaze_exit_code::ExitCode OptionProcessor::ParseOptions(
}

blazerc_and_env_command_args_ =
GetBlazercAndEnvCommandArgs(cwd, rc_file_ptrs, GetProcessedEnv());
GetBlazercAndEnvCommandArgs(cwd, rc_file_ptrs, blaze::internal::GetProcessedEnv());
return blaze_exit_code::SUCCESS;
}

Expand Down Expand Up @@ -583,70 +587,6 @@ blaze_exit_code::ExitCode OptionProcessor::ParseStartupOptions(
return startup_options_->ProcessArgs(rcstartup_flags, error);
}

static bool IsValidEnvName(const char* p) {
#if defined(_WIN32) || defined(__CYGWIN__)
for (; *p && *p != '='; ++p) {
if (!((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
(*p >= '0' && *p <= '9') || *p == '_' || *p == '(' || *p == ')')) {
return false;
}
}
#endif
return true;
}

#if defined(_WIN32)
static void PreprocessEnvString(string* env_str) {
static constexpr const char* vars_to_uppercase[] = {"PATH", "SYSTEMROOT",
"SYSTEMDRIVE",
"TEMP", "TEMPDIR", "TMP"};

int pos = env_str->find_first_of('=');
if (pos == string::npos) return;

string name = env_str->substr(0, pos);
// We do not care about locale. All variable names are ASCII.
std::transform(name.begin(), name.end(), name.begin(), ::toupper);
if (std::find(std::begin(vars_to_uppercase), std::end(vars_to_uppercase),
name) != std::end(vars_to_uppercase)) {
env_str->assign(name + "=" + env_str->substr(pos + 1));
}
}

#elif defined(__CYGWIN__) // not defined(_WIN32)

static void PreprocessEnvString(string* env_str) {
int pos = env_str->find_first_of('=');
if (pos == string::npos) return;
string name = env_str->substr(0, pos);
if (name == "PATH") {
env_str->assign("PATH=" + env_str->substr(pos + 1));
} else if (name == "TMP") {
// A valid Windows path "c:/foo" is also a valid Unix path list of
// ["c", "/foo"] so must use ConvertPath here. See GitHub issue #1684.
env_str->assign("TMP=" + blaze_util::ConvertPath(env_str->substr(pos + 1)));
}
}

#else // Non-Windows platforms.

static void PreprocessEnvString(const string* env_str) {
// do nothing.
}
#endif // defined(_WIN32)

static std::vector<std::string> GetProcessedEnv() {
std::vector<std::string> processed_env;
for (char** env = environ; *env != nullptr; env++) {
string env_str(*env);
if (IsValidEnvName(*env)) {
PreprocessEnvString(&env_str);
processed_env.push_back(std::move(env_str));
}
}
return processed_env;
}

// IMPORTANT: The options added here do not come from the user. In order for
// their source to be correctly tracked, the options must either be passed
// as --default_override=0, 0 being "client", or must be listed in
Expand Down
34 changes: 34 additions & 0 deletions src/main/cpp/option_processor_unix.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright 2024 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "src/main/cpp/option_processor-internal.h"

#include <string>

// On OSX, there apparently is no header that defines this.
#ifndef environ
extern char **environ;
#endif

namespace blaze::internal {

std::vector<std::string> GetProcessedEnv() {
std::vector<std::string> processed_env;
for (char** env = environ; *env != nullptr; env++) {
processed_env.emplace_back(*env);
}
return processed_env;
}

}
96 changes: 96 additions & 0 deletions src/main/cpp/option_processor_windows.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright 2024 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "src/main/cpp/option_processor-internal.h"

#include <algorithm>
#include <string>
#include <string_view>

#include "absl/strings/ascii.h"
#include "src/main/cpp/util/strings.h"

#define WIN32_LEAN_AND_MEAN
#include <windows.h>

namespace blaze::internal {

#if defined(__CYGWIN__)

static void PreprocessEnvString(std::string *env_str) {
int pos = env_str->find_first_of('=');
if (pos == string::npos) {
return;
}
std::string name = env_str->substr(0, pos);
if (name == "PATH") {
env_str->assign("PATH=" + env_str->substr(pos + 1));
} else if (name == "TMP") {
// A valid Windows path "c:/foo" is also a valid Unix path list of
// ["c", "/foo"] so must use ConvertPath here. See GitHub issue #1684.
env_str->assign("TMP=" + blaze_util::ConvertPath(env_str->substr(pos + 1)));
}
}

#else // not defined(__CYGWIN__)

static void PreprocessEnvString(std::string *env_str) {
static constexpr const char *vars_to_uppercase[] = {"PATH", "SYSTEMROOT",
"SYSTEMDRIVE",
"TEMP", "TEMPDIR", "TMP"};

std::size_t pos = env_str->find_first_of('=');
if (pos == std::string::npos) {
return;
}

std::string name = absl::AsciiStrToUpper(env_str->substr(0, pos));
if (std::find(std::begin(vars_to_uppercase), std::end(vars_to_uppercase),
name) != std::end(vars_to_uppercase)) {
env_str->assign(name + "=" + env_str->substr(pos + 1));
}
}

#endif // defined(__CYGWIN__)

static bool IsValidEnvName(std::string_view s) {
std::string_view name = s.substr(0, s.find('='));
return std::all_of(name.begin(), name.end(), [](char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '9') || c == '_' || c == '(' || c == ')';
});
}

// Use GetEnvironmentStringsW to get the environment variables to support
// Unicode regardless of the current code page.
std::vector<std::string> GetProcessedEnv() {
std::vector<std::string> processed_env;
wchar_t* env = GetEnvironmentStringsW();
if (env == nullptr) {
return processed_env;
}

for (wchar_t* p = env; *p != L'\0'; p += wcslen(p) + 1) {
std::string env_str = blaze_util::WstringToCstring(p);
if (IsValidEnvName(env_str)) {
PreprocessEnvString(&env_str);
processed_env.push_back(std::move(env_str));
}
}

FreeEnvironmentStringsW(env);
return processed_env;
}

} // namespace blaze::internal
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/util:process",
"//src/main/java/com/google/devtools/build/lib/util:shallow_object_size_computer",
"//src/main/java/com/google/devtools/build/lib/util:string",
"//src/main/java/com/google/devtools/build/lib/util:string_encoding",
"//src/main/java/com/google/devtools/build/lib/util/io",
"//src/main/java/com/google/devtools/build/lib/util/io:io-proto",
"//src/main/java/com/google/devtools/build/lib/util/io:out-err",
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/google/devtools/build/lib/actions/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/util:filetype",
"//src/main/java/com/google/devtools/build/lib/util:os",
"//src/main/java/com/google/devtools/build/lib/util:shell_escaper",
"//src/main/java/com/google/devtools/build/lib/util:string",
"//src/main/java/com/google/devtools/build/lib/util:string_encoding",
"//src/main/java/com/google/devtools/build/lib/util:var_int",
"//src/main/java/com/google/devtools/build/lib/util/io",
"//src/main/java/com/google/devtools/build/lib/vfs",
Expand Down
Loading
Loading