Skip to content

Commit

Permalink
Fix more Unicode bugs
Browse files Browse the repository at this point in the history
* Use Latin-1 in many native file write rules for consistency with the internal encoding.
* Use Latin-1 for the resolved repository file and the JSON profile.
* Fix `unused_input_list` handling of non-ASCII characters in file names.
* Flip the `legacy_utf8` parameter of `repository_ctx.file` to `False` and make it a no-op. With the previous default, any non-ASCII characters would be written out as double encoded UTF-8, which is not a useful choice.
* Change `repository_ctx.template` to operate on raw bytes for consistency with `repository_ctx.read` and to fix substitution with non-ASCII keys/values.
* Move some usages of `UTF_8` closer to their usage site to clarify why they are correct.
* Fixes parsing of dependency files with Unicode character contents (`/showIncludes` and `.d` files)

Closes bazelbuild#24182.

PiperOrigin-RevId: 698111811
Change-Id: Ie43bab9eb5963bf81690dd8985d358f544a711c9
(cherry picked from commit 3fdec93)
  • Loading branch information
fmeum committed Nov 19, 2024
1 parent 74b473d commit a12e8af
Show file tree
Hide file tree
Showing 42 changed files with 351 additions and 293 deletions.
2 changes: 2 additions & 0 deletions src/main/java/com/google/devtools/build/lib/analysis/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1466,6 +1466,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/actions:commandline_item",
"//src/main/java/com/google/devtools/build/lib/collect/nestedset",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/net/starlark/java/eval",
"//third_party:jsr305",
Expand All @@ -1482,6 +1483,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:artifact_expander",
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/collect/nestedset",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/util",
"//third_party:guava",
"//third_party:jsr305",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import com.google.devtools.build.lib.packages.RuleTransitionData;
import com.google.devtools.build.lib.packages.WorkspaceFactory;
import com.google.devtools.build.lib.starlarkbuildapi.core.Bootstrap;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.vfs.DigestHashFunction;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
Expand Down Expand Up @@ -131,7 +132,10 @@ protected synchronized byte[] getFastDigest(PathFragment path) {

@Override
protected synchronized byte[] getDigest(PathFragment path) {
return getDigestFunction().getHashFunction().hashString(path.toString(), UTF_8).asBytes();
return getDigestFunction()
.getHashFunction()
.hashBytes(StringUnsafe.getInstance().getInternalStringBytes(path.getPathString()))
.asBytes();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableSortedMap.toImmutableSortedMap;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Comparator.comparing;

import com.github.benmanes.caffeine.cache.Caffeine;
Expand Down Expand Up @@ -146,7 +145,7 @@ protected void computeKey(
public String getFileContents(@Nullable EventHandler eventHandler) throws IOException {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
newDeterministicWriter().writeOutputFile(stream);
return stream.toString(UTF_8);
return stream.toString(ISO_8859_1);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -213,7 +212,7 @@ public void writeOutputFile(OutputStream out, @Nullable EventHandler eventHandle
public String getFileContents(@Nullable EventHandler eventHandler) throws IOException {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
writeOutputFile(stream, eventHandler);
return stream.toString(UTF_8);
return stream.toString(ISO_8859_1);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.devtools.build.lib.actions.ActionExecutionContext;
import com.google.devtools.build.lib.actions.ActionKeyContext;
Expand All @@ -25,6 +24,7 @@
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder;
import com.google.devtools.build.lib.collect.nestedset.Order;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.util.Fingerprint;
import javax.annotation.Nullable;
import net.starlark.java.eval.Tuple;
Expand All @@ -49,7 +49,8 @@ public LazyWriteNestedSetOfTupleAction(

@Override
public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) {
return out -> out.write(getContents(delimiter).getBytes(UTF_8));
return out ->
out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents(delimiter)));
}

/** Computes the Action key for this action by computing the fingerprint for the file contents. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.collect.ImmutableSet;
import com.google.devtools.build.lib.actions.ActionExecutionContext;
Expand All @@ -26,6 +25,7 @@
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder;
import com.google.devtools.build.lib.collect.nestedset.Order;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.util.Fingerprint;
import java.util.function.Function;
import javax.annotation.Nullable;
Expand Down Expand Up @@ -71,7 +71,7 @@ public LazyWritePathsFileAction(

@Override
public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) {
return out -> out.write(getContents().getBytes(UTF_8));
return out -> out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents()));
}

/** Computes the Action key for this action by computing the fingerprint for the file contents. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.collect.ImmutableList;
import com.google.devtools.build.lib.actions.AbstractAction;
Expand Down Expand Up @@ -47,7 +47,8 @@ public ImmutableList<SpawnResult> expandTemplate(
final String expandedTemplate =
getExpandedTemplateUnsafe(
templateMetadata.template(), templateMetadata.substitutions(), ctx.getPathResolver());
DeterministicWriter deterministicWriter = out -> out.write(expandedTemplate.getBytes(UTF_8));
DeterministicWriter deterministicWriter =
out -> out.write(expandedTemplate.getBytes(ISO_8859_1));
return ctx.getContext(FileWriteActionContext.class)
.writeOutputToFile(
action,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.
package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -349,10 +349,13 @@ protected void afterExecute(
for (Artifact input : allInputs.toList()) {
usedInputsByMappedPath.put(pathMapper.getMappedExecPathString(input), input);
}
// Bazel encodes file system paths as raw bytes stored in a Latin-1 encoded string, so we need
// to make sure to also decode the unused input list as Latin-1.
try (BufferedReader br =
new BufferedReader(
new InputStreamReader(
getUnusedInputListInputStream(actionExecutionContext, spawnResults), UTF_8))) {
getUnusedInputListInputStream(actionExecutionContext, spawnResults),
ISO_8859_1))) {
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.annotations.VisibleForTesting;
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.ArtifactPathResolver;
Expand All @@ -22,16 +24,12 @@
import com.google.devtools.build.lib.vfs.FileSystemUtils;
import com.google.devtools.build.lib.vfs.Path;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import javax.annotation.Nullable;

/** A template that contains text content, or alternatively throws an {@link IOException}. */
@Immutable // all subclasses are immutable
public abstract class Template {

static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;

/** We only allow subclasses in this file. */
private Template() {}

Expand Down Expand Up @@ -105,7 +103,8 @@ private static final class ArtifactTemplate extends Template {
public String getContent(ArtifactPathResolver resolver) throws IOException {
Path templatePath = resolver.toPath(templateArtifact);
try {
return FileSystemUtils.readContent(templatePath, DEFAULT_CHARSET);
// Bazel's internal encoding for strings is raw bytes as Latin-1
return FileSystemUtils.readContent(templatePath, ISO_8859_1);
} catch (IOException e) {
throw new IOException(
"failed to load template file '"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -977,13 +977,8 @@ public void expandTemplate(
ImmutableMap.Builder<String, Substitution> substitutionsBuilder = ImmutableMap.builder();
for (Map.Entry<String, String> substitution :
Dict.cast(substitutionsUnchecked, String.class, String.class, "substitutions").entrySet()) {
// Blaze calls ParserInput.fromLatin1 when reading BUILD files, which might
// contain UTF-8 encoded symbols as part of template substitution.
// As a quick fix, the substitution values are corrected before being passed on.
// In the long term, avoiding ParserInput.fromLatin would be a better approach.
substitutionsBuilder.put(
substitution.getKey(),
Substitution.of(substitution.getKey(), convertLatin1ToUtf8(substitution.getValue())));
substitution.getKey(), Substitution.of(substitution.getKey(), substitution.getValue()));
}
if (!Starlark.UNBOUND.equals(computedSubstitutions)) {
for (Substitution substitution : ((TemplateDict) computedSubstitutions).getAll()) {
Expand All @@ -1007,16 +1002,6 @@ public void expandTemplate(
registerAction(action);
}

/**
* Returns the proper UTF-8 representation of a String that was erroneously read using Latin1.
*
* @param latin1 Input string
* @return The input string, UTF8 encoded
*/
private static String convertLatin1ToUtf8(String latin1) {
return new String(latin1.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
}

@Override
public Args args(StarlarkThread thread) {
return Args.newArgs(thread.mutability(), getSemantics());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/authandtls/credentialhelper",
"//src/main/java/com/google/devtools/build/lib/concurrent",
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/common/options",
"//third_party:auth",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
// limitations under the License.
package com.google.devtools.build.lib.authandtls;

import com.google.common.base.Strings;
import java.nio.charset.Charset;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import java.util.Base64;

/**
Expand All @@ -26,16 +25,18 @@ public final class BasicHttpAuthenticationEncoder {

private BasicHttpAuthenticationEncoder() {}

/** Encode username and password into a token with given {@link Charset}. */
public static String encode(String username, String password, Charset charset) {
StringBuilder sb = new StringBuilder();
if (!Strings.isNullOrEmpty(username)) {
sb.append(username);
}
sb.append(":");
if (!Strings.isNullOrEmpty(password)) {
sb.append(password);
}
return "Basic " + Base64.getEncoder().encodeToString(sb.toString().getBytes(charset));
/**
* Encode username and password into a token.
*
* <p>username and password are expected to use Bazel's internal string encoding. The returned
* string is a regular Unicode string.
*/
public static String encode(String username, String password) {
// The raw bytes in the internal string are assumed to be UTF-8, which is the encoding used for
// basic authentication.
return "Basic "
+ Base64.getEncoder()
.encodeToString(
StringUnsafe.getInstance().getInternalStringBytes(username + ":" + password));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
// limitations under the License.
package com.google.devtools.build.lib.authandtls;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.auth.Credentials;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -58,7 +56,7 @@ public Map<String, List<String>> getRequestMetadata(URI uri) throws IOException
Credential credential = netrc.getCredential(uri.getHost());
if (credential != null) {
String token =
BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password(), UTF_8);
BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password());
return ImmutableMap.of("Authorization", ImmutableList.of(token));
} else {
return ImmutableMap.of();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
package com.google.devtools.build.lib.authandtls;

import static com.google.common.base.Predicates.not;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.auto.value.AutoValue;
import com.google.common.base.Strings;
Expand Down Expand Up @@ -77,7 +77,7 @@ private static class TokenStream implements Closeable {
private final Queue<Token> tokens = new ArrayDeque<>();

TokenStream(InputStream inputStream) throws IOException {
bufferedReader = new BufferedReader(new InputStreamReader(inputStream, UTF_8));
bufferedReader = new BufferedReader(new InputStreamReader(inputStream, ISO_8859_1));
processLine();
}

Expand Down Expand Up @@ -183,8 +183,7 @@ private static Credential parseCredentialForMachine(TokenStream tokenStream, Str
while (!done && tokenStream.hasNext()) {
// Peek rather than taking next token since we probably won't process it
Token token = tokenStream.peek();
if (token instanceof ItemToken itemToken) {
String item = itemToken.item();
if (token instanceof ItemToken(String item)) {
switch (item) {
case LOGIN -> {
tokenStream.next();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_function",
"//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_value",
"//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value",
"//src/main/java/com/google/devtools/build/lib/util:string_encoding",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
"//src/main/java/com/google/devtools/build/lib/vfs/inmemoryfs",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

package com.google.devtools.build.lib.bazel.bzlmod;

import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.collect.ImmutableList;
import com.google.devtools.build.lib.actions.FileValue;
Expand All @@ -25,6 +25,7 @@
import com.google.devtools.build.lib.packages.VendorThreadContext;
import com.google.devtools.build.lib.rules.repository.RepositoryDelegatorFunction;
import com.google.devtools.build.lib.skyframe.PrecomputedValue;
import com.google.devtools.build.lib.util.StringEncoding;
import com.google.devtools.build.lib.vfs.FileSystemUtils;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.Root;
Expand Down Expand Up @@ -54,7 +55,8 @@
public class VendorFileFunction implements SkyFunction {

private static final String VENDOR_FILE_HEADER =
"""
StringEncoding.unicodeToInternal(
"""
###############################################################################
# This file is used to configure how external repositories are handled in vendor mode.
# ONLY the two following functions can be used:
Expand All @@ -67,7 +69,7 @@ public class VendorFileFunction implements SkyFunction {
# Note that Bazel will NOT update the vendored source for this repo while running vendor command
# unless it's unpinned. The user can modify and maintain the vendored source for this repo manually.
###############################################################################
""";
""");

private final BazelStarlarkEnvironment starlarkEnv;

Expand Down Expand Up @@ -138,8 +140,7 @@ private void createVendorFile(Path vendorPath, Path vendorFilePath)
throws VendorFileFunctionException {
try {
vendorPath.createDirectoryAndParents();
byte[] vendorFileContents = VENDOR_FILE_HEADER.getBytes(UTF_8);
FileSystemUtils.writeContent(vendorFilePath, vendorFileContents);
FileSystemUtils.writeContent(vendorFilePath, ISO_8859_1, VENDOR_FILE_HEADER);
} catch (IOException e) {
throw new VendorFileFunctionException(
new IOException("error creating VENDOR.bazel file", e), Transience.TRANSIENT);
Expand Down
Loading

0 comments on commit a12e8af

Please sign in to comment.