Skip to content

Commit

Permalink
First Unicode PR
Browse files Browse the repository at this point in the history
  • Loading branch information
fmeum committed Nov 1, 2024
1 parent c436f3e commit 8b3e453
Show file tree
Hide file tree
Showing 100 changed files with 1,434 additions and 812 deletions.
8 changes: 7 additions & 1 deletion src/main/cpp/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,12 @@ cc_binary(

cc_library(
name = "option_processor",
srcs = ["option_processor.cc"],
srcs = [
"option_processor.cc",
] + select({
"//src/conditions:windows": ["option_processor_windows.cc"],
"//conditions:default": ["option_processor_unix.cc"],
}),
hdrs = [
"option_processor.h",
"option_processor-internal.h",
Expand Down Expand Up @@ -179,6 +184,7 @@ cc_library(
"//src/main/cpp/util",
"//src/main/cpp/util:blaze_exit_code",
"//src/main/cpp/util:logging",
"@abseil-cpp//absl/strings",
],
)

Expand Down
4 changes: 3 additions & 1 deletion src/main/cpp/blaze.cc
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,9 @@ static vector<string> GetServerExeArgs(const blaze_util::Path &jvm_path,
}
result.push_back(java_library_path.str());

// Force use of latin1 for file names.
// TODO: Investigate whether this still has any effect. File name encoding is
// governed by sun.jnu.encoding in JDKs with JEP 400, which can't be
// influenced by setting a property.
result.push_back("-Dfile.encoding=ISO-8859-1");
// Force into the root locale to ensure consistent behavior of string
// operations across machines (e.g. in the tr_TR locale, capital ASCII 'I'
Expand Down
18 changes: 11 additions & 7 deletions src/main/cpp/blaze_util_windows.cc
Original file line number Diff line number Diff line change
Expand Up @@ -937,14 +937,15 @@ void CreateSecureOutputRoot(const blaze_util::Path& path) {
}

string GetEnv(const string& name) {
DWORD size = ::GetEnvironmentVariableA(name.c_str(), nullptr, 0);
std::wstring wname = blaze_util::CstringToWstring(name);
DWORD size = ::GetEnvironmentVariableW(wname.c_str(), nullptr, 0);
if (size == 0) {
return string(); // unset or empty envvar
}

unique_ptr<char[]> value(new char[size]);
::GetEnvironmentVariableA(name.c_str(), value.get(), size);
return string(value.get());
unique_ptr<WCHAR[]> value(new WCHAR[size]);
::GetEnvironmentVariableW(wname.c_str(), value.get(), size);
return blaze_util::WstringToCstring(value.get());
}

string GetPathEnv(const string& name) {
Expand All @@ -970,11 +971,14 @@ bool ExistsEnv(const string& name) {
}

void SetEnv(const string& name, const string& value) {
// _putenv_s both calls ::SetEnvionmentVariableA and updates environ(5).
_putenv_s(name.c_str(), value.c_str());
::SetEnvironmentVariableW(blaze_util::CstringToWstring(name).c_str(),
blaze_util::CstringToWstring(value).c_str());
}

void UnsetEnv(const string& name) { SetEnv(name, ""); }
void UnsetEnv(const string& name) {
::SetEnvironmentVariableW(blaze_util::CstringToWstring(name).c_str(),
nullptr);
}

bool WarnIfStartedFromDesktop() {
// GetConsoleProcessList returns:
Expand Down
23 changes: 22 additions & 1 deletion src/main/cpp/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@
#include "src/main/cpp/option_processor.h"
#include "src/main/cpp/startup_options.h"
#include "src/main/cpp/workspace_layout.h"
#include "src/main/cpp/util/strings.h"

int main(int argc, char **argv) {
int main_impl(int argc, char **argv) {
uint64_t start_time = blaze::GetMillisecondsMonotonic();
std::unique_ptr<blaze::WorkspaceLayout> workspace_layout(
new blaze::WorkspaceLayout());
Expand All @@ -32,3 +33,23 @@ int main(int argc, char **argv) {
std::move(startup_options)),
start_time);
}

#ifdef _WIN32
// Define wmain to support Unicode command line arguments on Windows
// regardless of the current code page.
int wmain(int argc, wchar_t **argv) {
std::vector<std::string> args;
for (int i = 0; i < argc; ++i) {
args.push_back(blaze_util::WstringToCstring(argv[i]));
}
std::vector<char *> c_args;
for (const std::string &arg : args) {
c_args.push_back(const_cast<char *>(arg.c_str()));
}
c_args.push_back(nullptr);
// Account for the null terminator.
return main_impl(c_args.size() - 1, c_args.data());
}
#else
int main(int argc, char **argv) { return main_impl(argc, argv); }
#endif
2 changes: 2 additions & 0 deletions src/main/cpp/option_processor-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ std::string FindRcAlongsideBinary(const std::string& cwd,

blaze_exit_code::ExitCode ParseErrorToExitCode(RcFile::ParseError parse_error);

std::vector<std::string> GetProcessedEnv();

} // namespace internal
} // namespace blaze

Expand Down
72 changes: 6 additions & 66 deletions src/main/cpp/option_processor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@
extern char **environ;
#endif

#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif

namespace blaze {

using std::map;
Expand All @@ -48,7 +53,6 @@ using std::vector;

constexpr char WorkspaceLayout::WorkspacePrefix[];
static constexpr const char* kRcBasename = ".bazelrc";
static std::vector<std::string> GetProcessedEnv();

OptionProcessor::OptionProcessor(
const WorkspaceLayout* workspace_layout,
Expand Down Expand Up @@ -513,7 +517,7 @@ blaze_exit_code::ExitCode OptionProcessor::ParseOptions(
}

blazerc_and_env_command_args_ =
GetBlazercAndEnvCommandArgs(cwd, rc_file_ptrs, GetProcessedEnv());
GetBlazercAndEnvCommandArgs(cwd, rc_file_ptrs, blaze::internal::GetProcessedEnv());
return blaze_exit_code::SUCCESS;
}

Expand Down Expand Up @@ -583,70 +587,6 @@ blaze_exit_code::ExitCode OptionProcessor::ParseStartupOptions(
return startup_options_->ProcessArgs(rcstartup_flags, error);
}

static bool IsValidEnvName(const char* p) {
#if defined(_WIN32) || defined(__CYGWIN__)
for (; *p && *p != '='; ++p) {
if (!((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
(*p >= '0' && *p <= '9') || *p == '_' || *p == '(' || *p == ')')) {
return false;
}
}
#endif
return true;
}

#if defined(_WIN32)
static void PreprocessEnvString(string* env_str) {
static constexpr const char* vars_to_uppercase[] = {"PATH", "SYSTEMROOT",
"SYSTEMDRIVE",
"TEMP", "TEMPDIR", "TMP"};

int pos = env_str->find_first_of('=');
if (pos == string::npos) return;

string name = env_str->substr(0, pos);
// We do not care about locale. All variable names are ASCII.
std::transform(name.begin(), name.end(), name.begin(), ::toupper);
if (std::find(std::begin(vars_to_uppercase), std::end(vars_to_uppercase),
name) != std::end(vars_to_uppercase)) {
env_str->assign(name + "=" + env_str->substr(pos + 1));
}
}

#elif defined(__CYGWIN__) // not defined(_WIN32)

static void PreprocessEnvString(string* env_str) {
int pos = env_str->find_first_of('=');
if (pos == string::npos) return;
string name = env_str->substr(0, pos);
if (name == "PATH") {
env_str->assign("PATH=" + env_str->substr(pos + 1));
} else if (name == "TMP") {
// A valid Windows path "c:/foo" is also a valid Unix path list of
// ["c", "/foo"] so must use ConvertPath here. See GitHub issue #1684.
env_str->assign("TMP=" + blaze_util::ConvertPath(env_str->substr(pos + 1)));
}
}

#else // Non-Windows platforms.

static void PreprocessEnvString(const string* env_str) {
// do nothing.
}
#endif // defined(_WIN32)

static std::vector<std::string> GetProcessedEnv() {
std::vector<std::string> processed_env;
for (char** env = environ; *env != nullptr; env++) {
string env_str(*env);
if (IsValidEnvName(*env)) {
PreprocessEnvString(&env_str);
processed_env.push_back(std::move(env_str));
}
}
return processed_env;
}

// IMPORTANT: The options added here do not come from the user. In order for
// their source to be correctly tracked, the options must either be passed
// as --default_override=0, 0 being "client", or must be listed in
Expand Down
34 changes: 34 additions & 0 deletions src/main/cpp/option_processor_unix.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright 2024 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "src/main/cpp/option_processor-internal.h"

#include <string>

// On OSX, there apparently is no header that defines this.
#ifndef environ
extern char **environ;
#endif

namespace blaze::internal {

std::vector<std::string> GetProcessedEnv() {
std::vector<std::string> processed_env;
for (char** env = environ; *env != nullptr; env++) {
processed_env.emplace_back(*env);
}
return processed_env;
}

}
96 changes: 96 additions & 0 deletions src/main/cpp/option_processor_windows.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright 2024 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "src/main/cpp/option_processor-internal.h"

#include <algorithm>
#include <string>
#include <string_view>

#include "absl/strings/ascii.h"
#include "src/main/cpp/util/strings.h"

#define WIN32_LEAN_AND_MEAN
#include <windows.h>

namespace blaze::internal {

#if defined(__CYGWIN__)

static void PreprocessEnvString(std::string *env_str) {
int pos = env_str->find_first_of('=');
if (pos == string::npos) {
return;
}
std::string name = env_str->substr(0, pos);
if (name == "PATH") {
env_str->assign("PATH=" + env_str->substr(pos + 1));
} else if (name == "TMP") {
// A valid Windows path "c:/foo" is also a valid Unix path list of
// ["c", "/foo"] so must use ConvertPath here. See GitHub issue #1684.
env_str->assign("TMP=" + blaze_util::ConvertPath(env_str->substr(pos + 1)));
}
}

#else // not defined(__CYGWIN__)

static void PreprocessEnvString(std::string *env_str) {
static constexpr const char *vars_to_uppercase[] = {"PATH", "SYSTEMROOT",
"SYSTEMDRIVE",
"TEMP", "TEMPDIR", "TMP"};

std::size_t pos = env_str->find_first_of('=');
if (pos == std::string::npos) {
return;
}

std::string name = absl::AsciiStrToUpper(env_str->substr(0, pos));
if (std::find(std::begin(vars_to_uppercase), std::end(vars_to_uppercase),
name) != std::end(vars_to_uppercase)) {
env_str->assign(name + "=" + env_str->substr(pos + 1));
}
}

#endif // defined(__CYGWIN__)

static bool IsValidEnvName(std::string_view s) {
std::string_view name = s.substr(0, s.find('='));
return std::all_of(name.begin(), name.end(), [](char c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '9') || c == '_' || c == '(' || c == ')';
});
}

// Use GetEnvironmentStringsW to get the environment variables to support
// Unicode regardless of the current code page.
std::vector<std::string> GetProcessedEnv() {
std::vector<std::string> processed_env;
wchar_t* env = GetEnvironmentStringsW();
if (env == nullptr) {
return processed_env;
}

for (wchar_t* p = env; *p != L'\0'; p += wcslen(p) + 1) {
std::string env_str = blaze_util::WstringToCstring(p);
if (IsValidEnvName(env_str)) {
PreprocessEnvString(&env_str);
processed_env.push_back(std::move(env_str));
}
}

FreeEnvironmentStringsW(env);
return processed_env;
}

} // namespace blaze::internal
1 change: 1 addition & 0 deletions src/main/java/com/google/devtools/build/lib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/util:process",
"//src/main/java/com/google/devtools/build/lib/util:shallow_object_size_computer",
"//src/main/java/com/google/devtools/build/lib/util:string",
"//src/main/java/com/google/devtools/build/lib/util:string_encoding",
"//src/main/java/com/google/devtools/build/lib/util/io",
"//src/main/java/com/google/devtools/build/lib/util/io:io-proto",
"//src/main/java/com/google/devtools/build/lib/util/io:out-err",
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/google/devtools/build/lib/actions/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/util:filetype",
"//src/main/java/com/google/devtools/build/lib/util:os",
"//src/main/java/com/google/devtools/build/lib/util:shell_escaper",
"//src/main/java/com/google/devtools/build/lib/util:string",
"//src/main/java/com/google/devtools/build/lib/util:string_encoding",
"//src/main/java/com/google/devtools/build/lib/util:var_int",
"//src/main/java/com/google/devtools/build/lib/util/io",
"//src/main/java/com/google/devtools/build/lib/vfs",
Expand Down
Loading

0 comments on commit 8b3e453

Please sign in to comment.