Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix more Unicode bugs #24182

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/main/java/com/google/devtools/build/lib/analysis/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1466,6 +1466,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/actions:commandline_item",
"//src/main/java/com/google/devtools/build/lib/collect/nestedset",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/net/starlark/java/eval",
"//third_party:jsr305",
Expand All @@ -1482,6 +1483,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:artifact_expander",
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/collect/nestedset",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/util",
"//third_party:guava",
"//third_party:jsr305",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import com.google.devtools.build.lib.packages.RuleTransitionData;
import com.google.devtools.build.lib.packages.WorkspaceFactory;
import com.google.devtools.build.lib.starlarkbuildapi.core.Bootstrap;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.vfs.DigestHashFunction;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
Expand Down Expand Up @@ -131,7 +132,10 @@ protected synchronized byte[] getFastDigest(PathFragment path) {

@Override
protected synchronized byte[] getDigest(PathFragment path) {
return getDigestFunction().getHashFunction().hashString(path.toString(), UTF_8).asBytes();
return getDigestFunction()
.getHashFunction()
.hashBytes(StringUnsafe.getInstance().getInternalStringBytes(path.getPathString()))
.asBytes();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableSortedMap.toImmutableSortedMap;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Comparator.comparing;

import com.github.benmanes.caffeine.cache.Caffeine;
Expand Down Expand Up @@ -146,7 +145,7 @@ protected void computeKey(
public String getFileContents(@Nullable EventHandler eventHandler) throws IOException {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
newDeterministicWriter().writeOutputFile(stream);
return stream.toString(UTF_8);
return stream.toString(ISO_8859_1);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -213,7 +212,7 @@ public void writeOutputFile(OutputStream out, @Nullable EventHandler eventHandle
public String getFileContents(@Nullable EventHandler eventHandler) throws IOException {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
writeOutputFile(stream, eventHandler);
return stream.toString(UTF_8);
return stream.toString(ISO_8859_1);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.devtools.build.lib.actions.ActionExecutionContext;
import com.google.devtools.build.lib.actions.ActionKeyContext;
Expand All @@ -25,6 +24,7 @@
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder;
import com.google.devtools.build.lib.collect.nestedset.Order;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.util.Fingerprint;
import javax.annotation.Nullable;
import net.starlark.java.eval.Tuple;
Expand All @@ -49,7 +49,8 @@ public LazyWriteNestedSetOfTupleAction(

@Override
public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) {
return out -> out.write(getContents(delimiter).getBytes(UTF_8));
return out ->
out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents(delimiter)));
}

/** Computes the Action key for this action by computing the fingerprint for the file contents. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.collect.ImmutableSet;
import com.google.devtools.build.lib.actions.ActionExecutionContext;
Expand All @@ -26,6 +25,7 @@
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder;
import com.google.devtools.build.lib.collect.nestedset.Order;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.util.Fingerprint;
import java.util.function.Function;
import javax.annotation.Nullable;
Expand Down Expand Up @@ -71,7 +71,7 @@ public LazyWritePathsFileAction(

@Override
public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) {
return out -> out.write(getContents().getBytes(UTF_8));
return out -> out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents()));
}

/** Computes the Action key for this action by computing the fingerprint for the file contents. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.collect.ImmutableList;
import com.google.devtools.build.lib.actions.AbstractAction;
Expand Down Expand Up @@ -47,7 +47,8 @@ public ImmutableList<SpawnResult> expandTemplate(
final String expandedTemplate =
getExpandedTemplateUnsafe(
templateMetadata.template(), templateMetadata.substitutions(), ctx.getPathResolver());
DeterministicWriter deterministicWriter = out -> out.write(expandedTemplate.getBytes(UTF_8));
DeterministicWriter deterministicWriter =
out -> out.write(expandedTemplate.getBytes(ISO_8859_1));
return ctx.getContext(FileWriteActionContext.class)
.writeOutputToFile(
action,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.
package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -349,10 +349,13 @@ protected void afterExecute(
for (Artifact input : allInputs.toList()) {
usedInputsByMappedPath.put(pathMapper.getMappedExecPathString(input), input);
}
// Bazel encodes file system paths as raw bytes stored in a Latin-1 encoded string, so we need
// to make sure to also decode the unused input list as Latin-1.
try (BufferedReader br =
new BufferedReader(
new InputStreamReader(
getUnusedInputListInputStream(actionExecutionContext, spawnResults), UTF_8))) {
getUnusedInputListInputStream(actionExecutionContext, spawnResults),
ISO_8859_1))) {
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.annotations.VisibleForTesting;
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.ArtifactPathResolver;
Expand All @@ -22,16 +24,12 @@
import com.google.devtools.build.lib.vfs.FileSystemUtils;
import com.google.devtools.build.lib.vfs.Path;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import javax.annotation.Nullable;

/** A template that contains text content, or alternatively throws an {@link IOException}. */
@Immutable // all subclasses are immutable
public abstract class Template {

static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;

/** We only allow subclasses in this file. */
private Template() {}

Expand Down Expand Up @@ -105,7 +103,8 @@ private static final class ArtifactTemplate extends Template {
public String getContent(ArtifactPathResolver resolver) throws IOException {
Path templatePath = resolver.toPath(templateArtifact);
try {
return FileSystemUtils.readContent(templatePath, DEFAULT_CHARSET);
// Bazel's internal encoding for strings is raw bytes as Latin-1
return FileSystemUtils.readContent(templatePath, ISO_8859_1);
} catch (IOException e) {
throw new IOException(
"failed to load template file '"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -977,13 +977,8 @@ public void expandTemplate(
ImmutableMap.Builder<String, Substitution> substitutionsBuilder = ImmutableMap.builder();
for (Map.Entry<String, String> substitution :
Dict.cast(substitutionsUnchecked, String.class, String.class, "substitutions").entrySet()) {
// Blaze calls ParserInput.fromLatin1 when reading BUILD files, which might
// contain UTF-8 encoded symbols as part of template substitution.
// As a quick fix, the substitution values are corrected before being passed on.
// In the long term, avoiding ParserInput.fromLatin would be a better approach.
substitutionsBuilder.put(
substitution.getKey(),
Substitution.of(substitution.getKey(), convertLatin1ToUtf8(substitution.getValue())));
substitution.getKey(), Substitution.of(substitution.getKey(), substitution.getValue()));
}
if (!Starlark.UNBOUND.equals(computedSubstitutions)) {
for (Substitution substitution : ((TemplateDict) computedSubstitutions).getAll()) {
Expand All @@ -1007,16 +1002,6 @@ public void expandTemplate(
registerAction(action);
}

/**
* Returns the proper UTF-8 representation of a String that was erroneously read using Latin1.
*
* @param latin1 Input string
* @return The input string, UTF8 encoded
*/
private static String convertLatin1ToUtf8(String latin1) {
return new String(latin1.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
}

@Override
public Args args(StarlarkThread thread) {
return Args.newArgs(thread.mutability(), getSemantics());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
// limitations under the License.
package com.google.devtools.build.lib.authandtls;

import com.google.common.base.Strings;
import java.nio.charset.Charset;
import static java.nio.charset.StandardCharsets.UTF_8;

import java.util.Base64;

/**
Expand All @@ -26,16 +26,9 @@ public final class BasicHttpAuthenticationEncoder {

private BasicHttpAuthenticationEncoder() {}

/** Encode username and password into a token with given {@link Charset}. */
public static String encode(String username, String password, Charset charset) {
StringBuilder sb = new StringBuilder();
if (!Strings.isNullOrEmpty(username)) {
sb.append(username);
}
sb.append(":");
if (!Strings.isNullOrEmpty(password)) {
sb.append(password);
}
return "Basic " + Base64.getEncoder().encodeToString(sb.toString().getBytes(charset));
/** Encode username and password into a token, encoded using UTF-8. */
public static String encode(String username, String password) {
return "Basic "
+ Base64.getEncoder().encodeToString((username + ":" + password).getBytes(UTF_8));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
// limitations under the License.
package com.google.devtools.build.lib.authandtls;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.auth.Credentials;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -58,7 +56,7 @@ public Map<String, List<String>> getRequestMetadata(URI uri) throws IOException
Credential credential = netrc.getCredential(uri.getHost());
if (credential != null) {
String token =
BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password(), UTF_8);
BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password());
return ImmutableMap.of("Authorization", ImmutableList.of(token));
} else {
return ImmutableMap.of();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

package com.google.devtools.build.lib.bazel.bzlmod;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.collect.ImmutableList;
import com.google.devtools.build.lib.actions.FileValue;
Expand All @@ -35,6 +34,7 @@
import com.google.devtools.build.skyframe.SkyKey;
import com.google.devtools.build.skyframe.SkyValue;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import javax.annotation.Nullable;
import net.starlark.java.eval.EvalException;
import net.starlark.java.eval.Mutability;
Expand Down Expand Up @@ -138,8 +138,7 @@ private void createVendorFile(Path vendorPath, Path vendorFilePath)
throws VendorFileFunctionException {
try {
vendorPath.createDirectoryAndParents();
byte[] vendorFileContents = VENDOR_FILE_HEADER.getBytes(UTF_8);
FileSystemUtils.writeContent(vendorFilePath, vendorFileContents);
FileSystemUtils.writeContent(vendorFilePath, StandardCharsets.UTF_8, VENDOR_FILE_HEADER);
} catch (IOException e) {
throw new VendorFileFunctionException(
new IOException("error creating VENDOR.bazel file", e), Transience.TRANSIENT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// limitations under the License.
package com.google.devtools.build.lib.bazel.repository;

import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
Expand All @@ -29,7 +31,6 @@
import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.LinkedHashMap;
import java.util.Map;
import net.starlark.java.eval.Printer;
Expand Down Expand Up @@ -81,7 +82,7 @@ public void afterCommand() {
for (Object resolved : resolvedValues.values()) {
resultBuilder.add(resolved);
}
try (Writer writer = Files.newWriter(new File(resolvedFile), StandardCharsets.UTF_8)) {
try (Writer writer = Files.newWriter(new File(resolvedFile), ISO_8859_1)) {
writer.write(EXPORTED_NAME + " = " + new ValuePrinter().repr(resultBuilder.build()));
} catch (IOException e) {
logger.atWarning().withCause(e).log("IO Error writing to file %s", resolvedFile);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value",
"//src/main/java/com/google/devtools/build/lib/skyframe:repository_mapping_value",
"//src/main/java/com/google/devtools/build/lib/starlarkbuildapi/repository",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/com/google/devtools/build/lib/util:string",
"//src/main/java/com/google/devtools/build/lib/util:string_encoding",
"//src/main/java/com/google/devtools/build/lib/util/io:out-err",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor;
import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor.ExecutionResult;
import com.google.devtools.build.lib.skyframe.ActionEnvironmentFunction;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.util.OsUtils;
import com.google.devtools.build.lib.util.io.OutErr;
import com.google.devtools.build.lib.vfs.FileSystemUtils;
Expand Down Expand Up @@ -1324,23 +1325,17 @@ private static String renamedStripPrefix(String method, String stripPrefix, Stri
@Param(
name = "legacy_utf8",
named = true,
defaultValue = "True",
defaultValue = "False",
doc =
"""
Encode file content to UTF-8, true by default. Future versions will change \
the default and remove this parameter.
No-op. This parameter is deprecated and will be removed in a future version of \
Bazel.
"""),
})
public void createFile(
Object path, String content, Boolean executable, Boolean legacyUtf8, StarlarkThread thread)
throws RepositoryFunctionException, EvalException, InterruptedException {
StarlarkPath p = getPath(path);
byte[] contentBytes;
if (legacyUtf8) {
contentBytes = content.getBytes(UTF_8);
} else {
contentBytes = content.getBytes(ISO_8859_1);
}
WorkspaceRuleEvent w =
WorkspaceRuleEvent.newFileEvent(
p.toString(),
Expand All @@ -1354,7 +1349,7 @@ public void createFile(
makeDirectories(p.getPath());
p.getPath().delete();
try (OutputStream stream = p.getPath().getOutputStream()) {
stream.write(contentBytes);
stream.write(StringUnsafe.getInstance().getInternalStringBytes(content));
}
if (executable) {
p.getPath().setExecutable(true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ Builder setQuiet(boolean quiet) {

private static String toString(ByteArrayOutputStream stream) {
try {
return new String(stream.toByteArray(), UTF_8);
return stream.toString(UTF_8);
} catch (IllegalStateException e) {
return "";
}
Expand Down
Loading