diff --git a/src/main/java/com/google/devtools/build/lib/analysis/BUILD b/src/main/java/com/google/devtools/build/lib/analysis/BUILD index c127673cd5e46a..b706fe3954e0d7 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/BUILD +++ b/src/main/java/com/google/devtools/build/lib/analysis/BUILD @@ -1466,6 +1466,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/actions:artifacts", "//src/main/java/com/google/devtools/build/lib/actions:commandline_item", "//src/main/java/com/google/devtools/build/lib/collect/nestedset", + "//src/main/java/com/google/devtools/build/lib/unsafe:string", "//src/main/java/com/google/devtools/build/lib/util", "//src/main/java/net/starlark/java/eval", "//third_party:jsr305", @@ -1482,6 +1483,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/actions:artifact_expander", "//src/main/java/com/google/devtools/build/lib/actions:artifacts", "//src/main/java/com/google/devtools/build/lib/collect/nestedset", + "//src/main/java/com/google/devtools/build/lib/unsafe:string", "//src/main/java/com/google/devtools/build/lib/util", "//third_party:guava", "//third_party:jsr305", diff --git a/src/main/java/com/google/devtools/build/lib/analysis/ConfiguredRuleClassProvider.java b/src/main/java/com/google/devtools/build/lib/analysis/ConfiguredRuleClassProvider.java index df527b31de764f..564309c487f868 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/ConfiguredRuleClassProvider.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/ConfiguredRuleClassProvider.java @@ -53,6 +53,7 @@ import com.google.devtools.build.lib.packages.RuleTransitionData; import com.google.devtools.build.lib.packages.WorkspaceFactory; import com.google.devtools.build.lib.starlarkbuildapi.core.Bootstrap; +import com.google.devtools.build.lib.unsafe.StringUnsafe; import com.google.devtools.build.lib.vfs.DigestHashFunction; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; @@ -131,7 +132,10 @@ protected synchronized byte[] getFastDigest(PathFragment path) { @Override protected synchronized byte[] getDigest(PathFragment path) { - return getDigestFunction().getHashFunction().hashString(path.toString(), UTF_8).asBytes(); + return getDigestFunction() + .getHashFunction() + .hashBytes(StringUnsafe.getInstance().getInternalStringBytes(path.getPathString())) + .asBytes(); } } diff --git a/src/main/java/com/google/devtools/build/lib/analysis/RepoMappingManifestAction.java b/src/main/java/com/google/devtools/build/lib/analysis/RepoMappingManifestAction.java index f4b724886be784..6d903571edc1ef 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/RepoMappingManifestAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/RepoMappingManifestAction.java @@ -16,7 +16,6 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableSortedMap.toImmutableSortedMap; import static java.nio.charset.StandardCharsets.ISO_8859_1; -import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Comparator.comparing; import com.github.benmanes.caffeine.cache.Caffeine; @@ -146,7 +145,7 @@ protected void computeKey( public String getFileContents(@Nullable EventHandler eventHandler) throws IOException { ByteArrayOutputStream stream = new ByteArrayOutputStream(); newDeterministicWriter().writeOutputFile(stream); - return stream.toString(UTF_8); + return stream.toString(ISO_8859_1); } @Override diff --git a/src/main/java/com/google/devtools/build/lib/analysis/SourceManifestAction.java b/src/main/java/com/google/devtools/build/lib/analysis/SourceManifestAction.java index c8fb12671db480..c6f7327bab50cc 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/SourceManifestAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/SourceManifestAction.java @@ -15,7 +15,6 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static java.nio.charset.StandardCharsets.ISO_8859_1; -import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; @@ -213,7 +212,7 @@ public void writeOutputFile(OutputStream out, @Nullable EventHandler eventHandle public String getFileContents(@Nullable EventHandler eventHandler) throws IOException { ByteArrayOutputStream stream = new ByteArrayOutputStream(); writeOutputFile(stream, eventHandler); - return stream.toString(UTF_8); + return stream.toString(ISO_8859_1); } @Override diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWriteNestedSetOfTupleAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWriteNestedSetOfTupleAction.java index 8958b5e2aee2ab..6bd98ea17edf37 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWriteNestedSetOfTupleAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWriteNestedSetOfTupleAction.java @@ -14,7 +14,6 @@ package com.google.devtools.build.lib.analysis.actions; -import static java.nio.charset.StandardCharsets.UTF_8; import com.google.devtools.build.lib.actions.ActionExecutionContext; import com.google.devtools.build.lib.actions.ActionKeyContext; @@ -25,6 +24,7 @@ import com.google.devtools.build.lib.collect.nestedset.NestedSet; import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder; import com.google.devtools.build.lib.collect.nestedset.Order; +import com.google.devtools.build.lib.unsafe.StringUnsafe; import com.google.devtools.build.lib.util.Fingerprint; import javax.annotation.Nullable; import net.starlark.java.eval.Tuple; @@ -49,7 +49,8 @@ public LazyWriteNestedSetOfTupleAction( @Override public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) { - return out -> out.write(getContents(delimiter).getBytes(UTF_8)); + return out -> + out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents(delimiter))); } /** Computes the Action key for this action by computing the fingerprint for the file contents. */ diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWritePathsFileAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWritePathsFileAction.java index e27d03b48eac12..2e24dc3d0d6ee1 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWritePathsFileAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWritePathsFileAction.java @@ -15,7 +15,6 @@ package com.google.devtools.build.lib.analysis.actions; -import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.collect.ImmutableSet; import com.google.devtools.build.lib.actions.ActionExecutionContext; @@ -26,6 +25,7 @@ import com.google.devtools.build.lib.collect.nestedset.NestedSet; import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder; import com.google.devtools.build.lib.collect.nestedset.Order; +import com.google.devtools.build.lib.unsafe.StringUnsafe; import com.google.devtools.build.lib.util.Fingerprint; import java.util.function.Function; import javax.annotation.Nullable; @@ -71,7 +71,7 @@ public LazyWritePathsFileAction( @Override public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) { - return out -> out.write(getContents().getBytes(UTF_8)); + return out -> out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents())); } /** Computes the Action key for this action by computing the fingerprint for the file contents. */ diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/LocalTemplateExpansionStrategy.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/LocalTemplateExpansionStrategy.java index 2c288480127756..93396ad2923fb9 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/LocalTemplateExpansionStrategy.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/LocalTemplateExpansionStrategy.java @@ -14,7 +14,7 @@ package com.google.devtools.build.lib.analysis.actions; -import static java.nio.charset.StandardCharsets.UTF_8; +import static java.nio.charset.StandardCharsets.ISO_8859_1; import com.google.common.collect.ImmutableList; import com.google.devtools.build.lib.actions.AbstractAction; @@ -47,7 +47,8 @@ public ImmutableList expandTemplate( final String expandedTemplate = getExpandedTemplateUnsafe( templateMetadata.template(), templateMetadata.substitutions(), ctx.getPathResolver()); - DeterministicWriter deterministicWriter = out -> out.write(expandedTemplate.getBytes(UTF_8)); + DeterministicWriter deterministicWriter = + out -> out.write(expandedTemplate.getBytes(ISO_8859_1)); return ctx.getContext(FileWriteActionContext.class) .writeOutputToFile( action, diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/StarlarkAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/StarlarkAction.java index c6df48534c8e04..ecf92e4768fb04 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/StarlarkAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/StarlarkAction.java @@ -13,7 +13,7 @@ // limitations under the License. package com.google.devtools.build.lib.analysis.actions; -import static java.nio.charset.StandardCharsets.UTF_8; +import static java.nio.charset.StandardCharsets.ISO_8859_1; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; @@ -349,10 +349,13 @@ protected void afterExecute( for (Artifact input : allInputs.toList()) { usedInputsByMappedPath.put(pathMapper.getMappedExecPathString(input), input); } + // Bazel encodes file system paths as raw bytes stored in a Latin-1 encoded string, so we need + // to make sure to also decode the unused input list as Latin-1. try (BufferedReader br = new BufferedReader( new InputStreamReader( - getUnusedInputListInputStream(actionExecutionContext, spawnResults), UTF_8))) { + getUnusedInputListInputStream(actionExecutionContext, spawnResults), + ISO_8859_1))) { String line; while ((line = br.readLine()) != null) { line = line.trim(); diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/Template.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/Template.java index 3cc16afd28166a..40a856c473c745 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/Template.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/Template.java @@ -14,6 +14,8 @@ package com.google.devtools.build.lib.analysis.actions; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + import com.google.common.annotations.VisibleForTesting; import com.google.devtools.build.lib.actions.Artifact; import com.google.devtools.build.lib.actions.ArtifactPathResolver; @@ -22,16 +24,12 @@ import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import java.io.IOException; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; import javax.annotation.Nullable; /** A template that contains text content, or alternatively throws an {@link IOException}. */ @Immutable // all subclasses are immutable public abstract class Template { - static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8; - /** We only allow subclasses in this file. */ private Template() {} @@ -105,7 +103,8 @@ private static final class ArtifactTemplate extends Template { public String getContent(ArtifactPathResolver resolver) throws IOException { Path templatePath = resolver.toPath(templateArtifact); try { - return FileSystemUtils.readContent(templatePath, DEFAULT_CHARSET); + // Bazel's internal encoding for strings is raw bytes as Latin-1 + return FileSystemUtils.readContent(templatePath, ISO_8859_1); } catch (IOException e) { throw new IOException( "failed to load template file '" diff --git a/src/main/java/com/google/devtools/build/lib/analysis/starlark/StarlarkActionFactory.java b/src/main/java/com/google/devtools/build/lib/analysis/starlark/StarlarkActionFactory.java index b33ce8d0b7c9a1..2b944788cbf197 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/starlark/StarlarkActionFactory.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/starlark/StarlarkActionFactory.java @@ -977,13 +977,8 @@ public void expandTemplate( ImmutableMap.Builder substitutionsBuilder = ImmutableMap.builder(); for (Map.Entry substitution : Dict.cast(substitutionsUnchecked, String.class, String.class, "substitutions").entrySet()) { - // Blaze calls ParserInput.fromLatin1 when reading BUILD files, which might - // contain UTF-8 encoded symbols as part of template substitution. - // As a quick fix, the substitution values are corrected before being passed on. - // In the long term, avoiding ParserInput.fromLatin would be a better approach. substitutionsBuilder.put( - substitution.getKey(), - Substitution.of(substitution.getKey(), convertLatin1ToUtf8(substitution.getValue()))); + substitution.getKey(), Substitution.of(substitution.getKey(), substitution.getValue())); } if (!Starlark.UNBOUND.equals(computedSubstitutions)) { for (Substitution substitution : ((TemplateDict) computedSubstitutions).getAll()) { @@ -1007,16 +1002,6 @@ public void expandTemplate( registerAction(action); } - /** - * Returns the proper UTF-8 representation of a String that was erroneously read using Latin1. - * - * @param latin1 Input string - * @return The input string, UTF8 encoded - */ - private static String convertLatin1ToUtf8(String latin1) { - return new String(latin1.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8); - } - @Override public Args args(StarlarkThread thread) { return Args.newArgs(thread.mutability(), getSemantics()); diff --git a/src/main/java/com/google/devtools/build/lib/authandtls/BUILD b/src/main/java/com/google/devtools/build/lib/authandtls/BUILD index 585e980b89ec52..79bebaaa39ad03 100644 --- a/src/main/java/com/google/devtools/build/lib/authandtls/BUILD +++ b/src/main/java/com/google/devtools/build/lib/authandtls/BUILD @@ -21,6 +21,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/authandtls/credentialhelper", "//src/main/java/com/google/devtools/build/lib/concurrent", "//src/main/java/com/google/devtools/build/lib/events", + "//src/main/java/com/google/devtools/build/lib/unsafe:string", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/common/options", "//third_party:auth", diff --git a/src/main/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoder.java b/src/main/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoder.java index a4a4f453b9f7bf..244aaa75f0b93f 100644 --- a/src/main/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoder.java +++ b/src/main/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoder.java @@ -13,8 +13,7 @@ // limitations under the License. package com.google.devtools.build.lib.authandtls; -import com.google.common.base.Strings; -import java.nio.charset.Charset; +import com.google.devtools.build.lib.unsafe.StringUnsafe; import java.util.Base64; /** @@ -26,16 +25,18 @@ public final class BasicHttpAuthenticationEncoder { private BasicHttpAuthenticationEncoder() {} - /** Encode username and password into a token with given {@link Charset}. */ - public static String encode(String username, String password, Charset charset) { - StringBuilder sb = new StringBuilder(); - if (!Strings.isNullOrEmpty(username)) { - sb.append(username); - } - sb.append(":"); - if (!Strings.isNullOrEmpty(password)) { - sb.append(password); - } - return "Basic " + Base64.getEncoder().encodeToString(sb.toString().getBytes(charset)); + /** + * Encode username and password into a token. + * + *

username and password are expected to use Bazel's internal string encoding. The returned + * string is a regular Unicode string. + */ + public static String encode(String username, String password) { + // The raw bytes in the internal string are assumed to be UTF-8, which is the encoding used for + // basic authentication. + return "Basic " + + Base64.getEncoder() + .encodeToString( + StringUnsafe.getInstance().getInternalStringBytes(username + ":" + password)); } } diff --git a/src/main/java/com/google/devtools/build/lib/authandtls/NetrcCredentials.java b/src/main/java/com/google/devtools/build/lib/authandtls/NetrcCredentials.java index ee15a91b9f9422..bc40512d659a63 100644 --- a/src/main/java/com/google/devtools/build/lib/authandtls/NetrcCredentials.java +++ b/src/main/java/com/google/devtools/build/lib/authandtls/NetrcCredentials.java @@ -13,8 +13,6 @@ // limitations under the License. package com.google.devtools.build.lib.authandtls; -import static java.nio.charset.StandardCharsets.UTF_8; - import com.google.auth.Credentials; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -58,7 +56,7 @@ public Map> getRequestMetadata(URI uri) throws IOException Credential credential = netrc.getCredential(uri.getHost()); if (credential != null) { String token = - BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password(), UTF_8); + BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password()); return ImmutableMap.of("Authorization", ImmutableList.of(token)); } else { return ImmutableMap.of(); diff --git a/src/main/java/com/google/devtools/build/lib/authandtls/NetrcParser.java b/src/main/java/com/google/devtools/build/lib/authandtls/NetrcParser.java index 8f6de4c95dc07d..13edbf50338eb4 100644 --- a/src/main/java/com/google/devtools/build/lib/authandtls/NetrcParser.java +++ b/src/main/java/com/google/devtools/build/lib/authandtls/NetrcParser.java @@ -14,7 +14,7 @@ package com.google.devtools.build.lib.authandtls; import static com.google.common.base.Predicates.not; -import static java.nio.charset.StandardCharsets.UTF_8; +import static java.nio.charset.StandardCharsets.ISO_8859_1; import com.google.auto.value.AutoValue; import com.google.common.base.Strings; @@ -77,7 +77,7 @@ private static class TokenStream implements Closeable { private final Queue tokens = new ArrayDeque<>(); TokenStream(InputStream inputStream) throws IOException { - bufferedReader = new BufferedReader(new InputStreamReader(inputStream, UTF_8)); + bufferedReader = new BufferedReader(new InputStreamReader(inputStream, ISO_8859_1)); processLine(); } @@ -183,8 +183,7 @@ private static Credential parseCredentialForMachine(TokenStream tokenStream, Str while (!done && tokenStream.hasNext()) { // Peek rather than taking next token since we probably won't process it Token token = tokenStream.peek(); - if (token instanceof ItemToken itemToken) { - String item = itemToken.item(); + if (token instanceof ItemToken(String item)) { switch (item) { case LOGIN -> { tokenStream.next(); diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD index 435274306788b6..16cb96552d6f19 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD @@ -259,6 +259,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_function", "//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_value", "//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//src/main/java/com/google/devtools/build/lib/vfs/inmemoryfs", diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java index 07eb48edd23d00..aaba70a5ffdf89 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java @@ -14,7 +14,7 @@ package com.google.devtools.build.lib.bazel.bzlmod; -import static java.nio.charset.StandardCharsets.UTF_8; +import static java.nio.charset.StandardCharsets.ISO_8859_1; import com.google.common.collect.ImmutableList; import com.google.devtools.build.lib.actions.FileValue; @@ -25,6 +25,7 @@ import com.google.devtools.build.lib.packages.VendorThreadContext; import com.google.devtools.build.lib.rules.repository.RepositoryDelegatorFunction; import com.google.devtools.build.lib.skyframe.PrecomputedValue; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.Root; @@ -54,7 +55,8 @@ public class VendorFileFunction implements SkyFunction { private static final String VENDOR_FILE_HEADER = - """ + StringEncoding.unicodeToInternal( + """ ############################################################################### # This file is used to configure how external repositories are handled in vendor mode. # ONLY the two following functions can be used: @@ -67,7 +69,7 @@ public class VendorFileFunction implements SkyFunction { # Note that Bazel will NOT update the vendored source for this repo while running vendor command # unless it's unpinned. The user can modify and maintain the vendored source for this repo manually. ############################################################################### -"""; +"""); private final BazelStarlarkEnvironment starlarkEnv; @@ -138,8 +140,7 @@ private void createVendorFile(Path vendorPath, Path vendorFilePath) throws VendorFileFunctionException { try { vendorPath.createDirectoryAndParents(); - byte[] vendorFileContents = VENDOR_FILE_HEADER.getBytes(UTF_8); - FileSystemUtils.writeContent(vendorFilePath, vendorFileContents); + FileSystemUtils.writeContent(vendorFilePath, ISO_8859_1, VENDOR_FILE_HEADER); } catch (IOException e) { throw new VendorFileFunctionException( new IOException("error creating VENDOR.bazel file", e), Transience.TRANSIENT); diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryResolvedModule.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryResolvedModule.java index 84b9ec714d6d0b..97940ed169d1d7 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryResolvedModule.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryResolvedModule.java @@ -13,6 +13,8 @@ // limitations under the License. package com.google.devtools.build.lib.bazel.repository; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -29,7 +31,6 @@ import java.io.File; import java.io.IOException; import java.io.Writer; -import java.nio.charset.StandardCharsets; import java.util.LinkedHashMap; import java.util.Map; import net.starlark.java.eval.Printer; @@ -81,7 +82,7 @@ public void afterCommand() { for (Object resolved : resolvedValues.values()) { resultBuilder.add(resolved); } - try (Writer writer = Files.newWriter(new File(resolvedFile), StandardCharsets.UTF_8)) { + try (Writer writer = Files.newWriter(new File(resolvedFile), ISO_8859_1)) { writer.write(EXPORTED_NAME + " = " + new ValuePrinter().repr(resultBuilder.build())); } catch (IOException e) { logger.atWarning().withCause(e).log("IO Error writing to file %s", resolvedFile); diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD index 015b609ec2290a..e0f2d7f6beff97 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD @@ -48,8 +48,10 @@ java_library( "//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value", "//src/main/java/com/google/devtools/build/lib/skyframe:repository_mapping_value", "//src/main/java/com/google/devtools/build/lib/starlarkbuildapi/repository", + "//src/main/java/com/google/devtools/build/lib/unsafe:string", "//src/main/java/com/google/devtools/build/lib/util", "//src/main/java/com/google/devtools/build/lib/util:string", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/util/io:out-err", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java index 5d8068da7835f6..131ca7373637a1 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java @@ -57,6 +57,7 @@ import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor; import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor.ExecutionResult; import com.google.devtools.build.lib.skyframe.ActionEnvironmentFunction; +import com.google.devtools.build.lib.unsafe.StringUnsafe; import com.google.devtools.build.lib.util.OsUtils; import com.google.devtools.build.lib.util.io.OutErr; import com.google.devtools.build.lib.vfs.FileSystemUtils; @@ -1324,23 +1325,17 @@ private static String renamedStripPrefix(String method, String stripPrefix, Stri @Param( name = "legacy_utf8", named = true, - defaultValue = "True", + defaultValue = "False", doc = """ - Encode file content to UTF-8, true by default. Future versions will change \ - the default and remove this parameter. + No-op. This parameter is deprecated and will be removed in a future version of \ + Bazel. """), }) public void createFile( Object path, String content, Boolean executable, Boolean legacyUtf8, StarlarkThread thread) throws RepositoryFunctionException, EvalException, InterruptedException { StarlarkPath p = getPath(path); - byte[] contentBytes; - if (legacyUtf8) { - contentBytes = content.getBytes(UTF_8); - } else { - contentBytes = content.getBytes(ISO_8859_1); - } WorkspaceRuleEvent w = WorkspaceRuleEvent.newFileEvent( p.toString(), @@ -1354,7 +1349,7 @@ public void createFile( makeDirectories(p.getPath()); p.getPath().delete(); try (OutputStream stream = p.getPath().getOutputStream()) { - stream.write(contentBytes); + stream.write(StringUnsafe.getInstance().getInternalStringBytes(content)); } if (executable) { p.getPath().setExecutable(true); diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkExecutionResult.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkExecutionResult.java index 7bd40f94599f20..ba58776abe9431 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkExecutionResult.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkExecutionResult.java @@ -13,7 +13,7 @@ // limitations under the License. package com.google.devtools.build.lib.bazel.repository.starlark; -import static java.nio.charset.StandardCharsets.UTF_8; +import static java.nio.charset.StandardCharsets.ISO_8859_1; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; @@ -38,7 +38,6 @@ import java.util.Set; import net.starlark.java.annot.StarlarkBuiltin; import net.starlark.java.annot.StarlarkMethod; -import net.starlark.java.eval.EvalException; import net.starlark.java.eval.StarlarkValue; /** @@ -182,14 +181,14 @@ Builder setQuiet(boolean quiet) { private static String toString(ByteArrayOutputStream stream) { try { - return new String(stream.toByteArray(), UTF_8); + return stream.toString(ISO_8859_1); } catch (IllegalStateException e) { return ""; } } - /** Execute the command specified by {@link #addArguments(Iterable)}. */ - StarlarkExecutionResult execute() throws EvalException, InterruptedException { + /** Execute the command specified by {@link #addArguments}. */ + StarlarkExecutionResult execute() throws InterruptedException { Preconditions.checkArgument(timeout > 0, "Timeout must be set prior to calling execute()."); Preconditions.checkArgument(!args.isEmpty(), "No command specified."); Preconditions.checkState(!executed, "Command was already executed, cannot re-use builder."); diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java index 05d6bc4a50b515..3cab859e9e31cf 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java @@ -14,6 +14,8 @@ package com.google.devtools.build.lib.bazel.repository.starlark; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + import com.github.difflib.patch.PatchFailedException; import com.google.common.collect.ImmutableMap; import com.google.devtools.build.docgen.annot.DocCategory; @@ -47,7 +49,6 @@ import com.google.devtools.build.skyframe.SkyFunctionException.Transience; import java.io.IOException; import java.io.OutputStream; -import java.nio.charset.StandardCharsets; import java.nio.file.InvalidPathException; import java.util.HashMap; import java.util.Map; @@ -322,14 +323,16 @@ public void createFileFromTemplate( try { checkInOutputDirectory("write", p); makeDirectories(p.getPath()); - String tpl = FileSystemUtils.readContent(t.getPath(), StandardCharsets.UTF_8); + // Read and write files as raw bytes by using the Latin-1 encoding, which matches the encoding + // used by Bazel for strings. + String tpl = FileSystemUtils.readContent(t.getPath(), ISO_8859_1); for (Map.Entry substitution : substitutionMap.entrySet()) { tpl = StringUtilities.replaceAllLiteral(tpl, substitution.getKey(), substitution.getValue()); } p.getPath().delete(); try (OutputStream stream = p.getPath().getOutputStream()) { - stream.write(tpl.getBytes(StandardCharsets.UTF_8)); + stream.write(tpl.getBytes(ISO_8859_1)); } if (executable) { p.getPath().setExecutable(true); diff --git a/src/main/java/com/google/devtools/build/lib/profiler/JsonProfile.java b/src/main/java/com/google/devtools/build/lib/profiler/JsonProfile.java index cbb02d5e54cb13..150b7f5683158c 100644 --- a/src/main/java/com/google/devtools/build/lib/profiler/JsonProfile.java +++ b/src/main/java/com/google/devtools/build/lib/profiler/JsonProfile.java @@ -14,6 +14,8 @@ package com.google.devtools.build.lib.profiler; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + import com.google.auto.value.AutoValue; import com.google.devtools.build.lib.profiler.statistics.PhaseSummaryStatistics; import com.google.gson.stream.JsonReader; @@ -24,7 +26,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.nio.charset.StandardCharsets; import java.time.Duration; import java.util.List; import java.util.zip.GZIPInputStream; @@ -47,8 +48,7 @@ public JsonProfile(File profileFile) throws IOException { public JsonProfile(InputStream inputStream) throws IOException { try (JsonReader reader = - new JsonReader( - new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8)))) { + new JsonReader(new BufferedReader(new InputStreamReader(inputStream, ISO_8859_1)))) { if (reader.peek() == JsonToken.BEGIN_OBJECT) { reader.beginObject(); while (reader.hasNext()) { diff --git a/src/main/java/com/google/devtools/build/lib/profiler/JsonTraceFileWriter.java b/src/main/java/com/google/devtools/build/lib/profiler/JsonTraceFileWriter.java index d73fa42380960f..2ab8881db77cd4 100644 --- a/src/main/java/com/google/devtools/build/lib/profiler/JsonTraceFileWriter.java +++ b/src/main/java/com/google/devtools/build/lib/profiler/JsonTraceFileWriter.java @@ -13,6 +13,8 @@ // limitations under the License. package com.google.devtools.build.lib.profiler; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + import com.google.common.base.Preconditions; import com.google.devtools.build.lib.analysis.BlazeVersionInfo; import com.google.devtools.build.lib.profiler.Profiler.TaskData; @@ -21,7 +23,6 @@ import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; -import java.nio.charset.StandardCharsets; import java.time.Duration; import java.time.Instant; import java.util.HashMap; @@ -223,8 +224,9 @@ public void run() { try (JsonWriter writer = new JsonWriter( // The buffer size of 262144 is chosen at random. - new OutputStreamWriter( - new BufferedOutputStream(outStream, 262144), StandardCharsets.UTF_8))) { + // Bazel internally stores strings as raw bytes encoded in ISO_8859_1, so we use the + // same encoding here to also write out raw bytes. + new OutputStreamWriter(new BufferedOutputStream(outStream, 262144), ISO_8859_1))) { var startDate = Instant.now(); writer.beginObject(); writer.name("otherData"); diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/BUILD b/src/main/java/com/google/devtools/build/lib/rules/cpp/BUILD index a6f525dc4c679d..1216a355cd5ffe 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/cpp/BUILD +++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/BUILD @@ -114,6 +114,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util:os", "//src/main/java/com/google/devtools/build/lib/util:shell_escaper", "//src/main/java/com/google/devtools/build/lib/util:string", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/util/io", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:ospathpolicy", diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilter.java b/src/main/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilter.java index e6804184fb707f..46efc909977e65 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilter.java +++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilter.java @@ -14,14 +14,17 @@ package com.google.devtools.build.lib.rules.cpp; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.Path; import java.io.ByteArrayOutputStream; import java.io.FilterOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -59,94 +62,37 @@ public static class FilterShowIncludesOutputStream extends FilterOutputStream { // cl.exe will print different prefix according to the locale configured for MSVC. private static final ImmutableList SHOW_INCLUDES_PREFIXES = ImmutableList.of( - new String( - new byte[] { - 78, 111, 116, 101, 58, 32, 105, 110, 99, 108, 117, 100, 105, 110, 103, 32, 102, - 105, 108, 101, 58 - }, - StandardCharsets.UTF_8), // English - new String( - new byte[] { - -26, -77, -88, -26, -124, -113, 58, 32, -27, -116, -123, -27, -112, -85, -26, -86, - -108, -26, -95, -120, 58 - }, - StandardCharsets.UTF_8), // Traditional Chinese - new String( - new byte[] { - 80, 111, 122, 110, -61, -95, 109, 107, 97, 58, 32, 86, -60, -115, 101, 116, 110, - -60, -101, 32, 115, 111, 117, 98, 111, 114, 117, 58 - }, - StandardCharsets.UTF_8), // Czech - new String( - new byte[] { - 72, 105, 110, 119, 101, 105, 115, 58, 32, 69, 105, 110, 108, 101, 115, 101, 110, - 32, 100, 101, 114, 32, 68, 97, 116, 101, 105, 58 - }, - StandardCharsets.UTF_8), // German - new String( - new byte[] { - 82, 101, 109, 97, 114, 113, 117, 101, -62, -96, 58, 32, 105, 110, 99, 108, 117, - 115, 105, 111, 110, 32, 100, 117, 32, 102, 105, 99, 104, 105, 101, 114, -62, -96, - 58 - }, - StandardCharsets.UTF_8), // French - new String( - new byte[] { - 78, 111, 116, 97, 58, 32, 102, 105, 108, 101, 32, 105, 110, 99, 108, 117, 115, 111 - }, - StandardCharsets.UTF_8), // Italian - new String( - new byte[] { - -29, -125, -95, -29, -125, -94, 58, 32, -29, -126, -92, -29, -125, -77, -29, -126, - -81, -29, -125, -85, -29, -125, -68, -29, -125, -119, 32, -29, -125, -107, -29, - -126, -95, -29, -126, -92, -29, -125, -85, 58 - }, - StandardCharsets.UTF_8), // Janpanese - new String( - new byte[] { - -20, -80, -72, -22, -77, -96, 58, 32, -19, -113, -84, -19, -107, -88, 32, -19, - -116, -116, -20, -99, -68, 58 - }, - StandardCharsets.UTF_8), // Korean - new String( - new byte[] { - 85, 119, 97, 103, 97, 58, 32, 119, 32, 116, 121, 109, 32, 112, 108, 105, 107, 117, - 58 - }, - StandardCharsets.UTF_8), // Polish - new String( - new byte[] { - 79, 98, 115, 101, 114, 118, 97, -61, -89, -61, -93, 111, 58, 32, 105, 110, 99, - 108, 117, 105, 110, 100, 111, 32, 97, 114, 113, 117, 105, 118, 111, 58 - }, - StandardCharsets.UTF_8), // Portuguese - new String( - new byte[] { - -48, -97, -47, -128, -48, -72, -48, -68, -48, -75, -47, -121, -48, -80, -48, -67, - -48, -72, -48, -75, 58, 32, -48, -78, -48, -70, -48, -69, -47, -114, -47, -121, - -48, -75, -48, -67, -48, -72, -48, -75, 32, -47, -124, -48, -80, -48, -71, -48, - -69, -48, -80, 58 - }, - StandardCharsets.UTF_8), // Russian - new String( - new byte[] { - 78, 111, 116, 58, 32, 101, 107, 108, 101, 110, 101, 110, 32, 100, 111, 115, 121, - 97, 58 - }, - StandardCharsets.UTF_8), // Turkish - new String( - new byte[] { - -26, -77, -88, -26, -124, -113, 58, 32, -27, -116, -123, -27, -112, -85, -26, - -106, -121, -28, -69, -74, 58 - }, - StandardCharsets.UTF_8), // Simplified Chinese - new String( - new byte[] { - 78, 111, 116, 97, 58, 32, 105, 110, 99, 108, 117, 115, 105, -61, -77, 110, 32, - 100, 101, 108, 32, 97, 114, 99, 104, 105, 118, 111, 58 - }, - StandardCharsets.UTF_8) // Spanish - ); + // English + "Note: including file:", + // Traditional Chinese + "注意: 包含檔案:", + // Czech + "Poznámka: Včetně souboru:", + // German + "Hinweis: Einlesen der Datei:", + // French (non-breaking spaces before the colons) + "Remarque : inclusion du fichier :", + // Italian (the missing : is intentional, this appears to be a bug in MSVC) + "Nota: file incluso", + // Japanese + "メモ: インクルード ファイル:", + // Korean + "참고: 포함 파일:", + // Polish + "Uwaga: w tym pliku:", + // Portuguese + "Observação: incluindo arquivo:", + // Russian + "Примечание: включение файла:", + // Turkish + "Not: eklenen dosya:", + // Simplified Chinese + "注意: 包含文件:", + // Spanish + "Nota: inclusión del archivo:") + .stream() + .map(StringEncoding::unicodeToInternal) + .collect(toImmutableList()); private final String sourceFileName; private boolean sawPotentialUnsupportedShowIncludesLine; // Grab everything under the execroot base so that external repository header files are covered @@ -171,7 +117,7 @@ public FilterShowIncludesOutputStream(OutputStream out, String sourceFileName) { public void write(int b) throws IOException { buffer.write(b); if (b == NEWLINE) { - String line = buffer.toString(StandardCharsets.UTF_8.name()); + String line = buffer.toString(ISO_8859_1); boolean prefixMatched = false; for (String prefix : SHOW_INCLUDES_PREFIXES) { if (line.startsWith(prefix)) { @@ -195,9 +141,7 @@ public void write(int b) throws IOException { // can use non-UTF8 encodings, which the checks above fail to detect. As this results in // incorrect incremental builds, we emit a warning if the raw byte sequence comprising the // line looks like it could be a /showIncludes line. - if (POTENTIAL_UNSUPPORTED_SHOW_INCLUDES_LINE - .matcher(buffer.toString(StandardCharsets.ISO_8859_1).trim()) - .matches()) { + if (POTENTIAL_UNSUPPORTED_SHOW_INCLUDES_LINE.matcher(line.trim()).matches()) { sawPotentialUnsupportedShowIncludesLine = true; } buffer.writeTo(out); @@ -208,7 +152,7 @@ public void write(int b) throws IOException { @Override public void flush() throws IOException { - String line = buffer.toString(StandardCharsets.UTF_8.name()); + String line = buffer.toString(ISO_8859_1); // If this line starts or could start with a prefix. boolean startingWithAnyPrefix = false; diff --git a/src/main/java/com/google/devtools/build/lib/util/BUILD b/src/main/java/com/google/devtools/build/lib/util/BUILD index 0ef5ea4fa72e7e..cda04800879ad3 100644 --- a/src/main/java/com/google/devtools/build/lib/util/BUILD +++ b/src/main/java/com/google/devtools/build/lib/util/BUILD @@ -251,6 +251,7 @@ java_library( deps = [ ":os", ":shell_escaper", + ":string_encoding", "//src/main/java/com/google/devtools/build/lib/bugreport", "//src/main/java/com/google/devtools/build/lib/concurrent", "//src/main/java/com/google/devtools/build/lib/vfs", diff --git a/src/main/java/com/google/devtools/build/lib/util/DependencySet.java b/src/main/java/com/google/devtools/build/lib/util/DependencySet.java index 524cda148e0c39..62a34a000efded 100644 --- a/src/main/java/com/google/devtools/build/lib/util/DependencySet.java +++ b/src/main/java/com/google/devtools/build/lib/util/DependencySet.java @@ -14,14 +14,16 @@ package com.google.devtools.build.lib.util; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Ascii; import com.google.common.base.Preconditions; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import com.google.errorprone.annotations.CanIgnoreReturnValue; import java.io.IOException; import java.io.PrintStream; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -146,7 +148,7 @@ public DependencySet process(byte[] content) throws IOException { // keep scanning. We do this to cope with "foo.o : \" which is // valid Makefile syntax produced by the cuda compiler. if (sawTarget && w > 0) { - addDependency(new String(content, 0, w, StandardCharsets.UTF_8)); + addDependency(new String(content, 0, w, ISO_8859_1)); w = 0; } continue; @@ -160,7 +162,7 @@ public DependencySet process(byte[] content) throws IOException { // (Arguably if !sawTarget && w > 0 we should report an error, // as that suggests the .d file is malformed.) if (sawTarget && w > 0) { - addDependency(new String(content, 0, w, StandardCharsets.UTF_8)); + addDependency(new String(content, 0, w, ISO_8859_1)); } w = 0; sawTarget = false; // reset for new line @@ -174,7 +176,7 @@ public DependencySet process(byte[] content) throws IOException { case '\n': case '\r': if (w > 0) { - outputFileName = new String(content, 0, w, StandardCharsets.UTF_8); + outputFileName = new String(content, 0, w, ISO_8859_1); w = 0; sawTarget = true; } @@ -270,11 +272,7 @@ private static String translateWindowsPath(String path) { return path; } if (n >= 2 && isAsciiLetter(path.charAt(1)) && (n == 2 || path.charAt(2) == '/')) { - StringBuilder sb = new StringBuilder(path.length()); - sb.append(Character.toUpperCase(path.charAt(1))); - sb.append(":/"); - sb.append(path, 2, path.length()); - return sb.toString(); + return Ascii.toUpperCase(path.charAt(1)) + ":/" + path.substring(2); } else { String unixRoot = getUnixRoot(); return unixRoot + path; @@ -310,7 +308,7 @@ private static String getUnixRoot() { @Nullable private static String determineUnixRoot(String jvmArgName) { // Get the path from a JVM flag, if specified. - String path = System.getProperty(jvmArgName); + String path = StringEncoding.platformToInternal(System.getProperty(jvmArgName)); if (path == null) { return null; } diff --git a/src/test/java/com/google/devtools/build/lib/analysis/BUILD b/src/test/java/com/google/devtools/build/lib/analysis/BUILD index 782e30f10491f6..08a14544b91d98 100644 --- a/src/test/java/com/google/devtools/build/lib/analysis/BUILD +++ b/src/test/java/com/google/devtools/build/lib/analysis/BUILD @@ -153,6 +153,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util:detailed_exit_code", "//src/main/java/com/google/devtools/build/lib/util:filetype", "//src/main/java/com/google/devtools/build/lib/util:os", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/util/io", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", diff --git a/src/test/java/com/google/devtools/build/lib/analysis/actions/TemplateExpansionActionTest.java b/src/test/java/com/google/devtools/build/lib/analysis/actions/TemplateExpansionActionTest.java index 15f208d16b6e82..1ebd5b08e4449f 100644 --- a/src/test/java/com/google/devtools/build/lib/analysis/actions/TemplateExpansionActionTest.java +++ b/src/test/java/com/google/devtools/build/lib/analysis/actions/TemplateExpansionActionTest.java @@ -39,6 +39,7 @@ import com.google.devtools.build.lib.exec.util.TestExecutorBuilder; import com.google.devtools.build.lib.testutil.FoundationTestCase; import com.google.devtools.build.lib.util.Fingerprint; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.util.io.FileOutErr; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; @@ -252,7 +253,10 @@ public void testWithSpecialCharacters() throws Exception { // scratch.overwriteFile appends a newline, so we need an additional \n here String expected = String.format("%s%s\n", SPECIAL_CHARS, SPECIAL_CHARS); - executeTemplateExpansion(expected, ImmutableList.of(Substitution.of("%key%", SPECIAL_CHARS))); + executeTemplateExpansion( + expected, + ImmutableList.of( + Substitution.of("%key%", StringEncoding.unicodeToInternal(SPECIAL_CHARS)))); } private String computeKey(TemplateExpansionAction action) throws EvalException { diff --git a/src/test/java/com/google/devtools/build/lib/authandtls/BUILD b/src/test/java/com/google/devtools/build/lib/authandtls/BUILD index d16811aae7af9b..d013c143102b22 100644 --- a/src/test/java/com/google/devtools/build/lib/authandtls/BUILD +++ b/src/test/java/com/google/devtools/build/lib/authandtls/BUILD @@ -27,6 +27,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/authandtls", "//src/main/java/com/google/devtools/build/lib/authandtls/credentialhelper", "//src/main/java/com/google/devtools/build/lib/events", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs/inmemoryfs", "//src/main/java/com/google/devtools/common/options", diff --git a/src/test/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoderTest.java b/src/test/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoderTest.java index 7873e0572d0a4e..a7586f49a9758b 100644 --- a/src/test/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoderTest.java +++ b/src/test/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoderTest.java @@ -16,6 +16,7 @@ import static com.google.common.truth.Truth.assertThat; import static java.nio.charset.StandardCharsets.UTF_8; +import com.google.devtools.build.lib.util.StringEncoding; import java.util.Base64; import org.junit.Test; import org.junit.runner.RunWith; @@ -34,13 +35,13 @@ private static String[] decode(String message) { @Test public void encode_normalUsernamePassword_outputExpected() { - String message = BasicHttpAuthenticationEncoder.encode("Aladdin", "open sesame", UTF_8); + String message = BasicHttpAuthenticationEncoder.encode("Aladdin", "open sesame"); assertThat(message).isEqualTo("Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ=="); } @Test public void encode_normalUsernamePassword_canBeDecoded() { - String message = BasicHttpAuthenticationEncoder.encode("Aladdin", "open sesame", UTF_8); + String message = BasicHttpAuthenticationEncoder.encode("Aladdin", "open sesame"); String[] usernameAndPassword = decode(message); assertThat(usernameAndPassword[0]).isEqualTo("Aladdin"); @@ -49,7 +50,7 @@ public void encode_normalUsernamePassword_canBeDecoded() { @Test public void encode_usernameContainsColon_canBeDecoded() { - String message = BasicHttpAuthenticationEncoder.encode("foo:user", "foopass", UTF_8); + String message = BasicHttpAuthenticationEncoder.encode("foo:user", "foopass"); String[] usernameAndPassword = decode(message); assertThat(usernameAndPassword[0]).isEqualTo("foo"); @@ -58,25 +59,27 @@ public void encode_usernameContainsColon_canBeDecoded() { @Test public void encode_emptyUsername_outputExpected() { - String message = BasicHttpAuthenticationEncoder.encode("", "foopass", UTF_8); + String message = BasicHttpAuthenticationEncoder.encode("", "foopass"); assertThat(message).isEqualTo("Basic OmZvb3Bhc3M="); } @Test public void encode_emptyPassword_outputExpected() { - String message = BasicHttpAuthenticationEncoder.encode("foouser", "", UTF_8); + String message = BasicHttpAuthenticationEncoder.encode("foouser", ""); assertThat(message).isEqualTo("Basic Zm9vdXNlcjo="); } @Test public void encode_emptyUsernamePassword_outputExpected() { - String message = BasicHttpAuthenticationEncoder.encode("", "", UTF_8); + String message = BasicHttpAuthenticationEncoder.encode("", ""); assertThat(message).isEqualTo("Basic Og=="); } @Test public void encode_specialCharacterUtf8_outputExpected() { - String message = BasicHttpAuthenticationEncoder.encode("test", "123\u00A3", UTF_8); + String message = + BasicHttpAuthenticationEncoder.encode( + "test", StringEncoding.unicodeToInternal("123\u00A3")); assertThat(message).isEqualTo("Basic dGVzdDoxMjPCow=="); } } diff --git a/src/test/java/com/google/devtools/build/lib/authandtls/GoogleAuthUtilsTest.java b/src/test/java/com/google/devtools/build/lib/authandtls/GoogleAuthUtilsTest.java index ec3f4e6437d3ea..ec1561e96def83 100644 --- a/src/test/java/com/google/devtools/build/lib/authandtls/GoogleAuthUtilsTest.java +++ b/src/test/java/com/google/devtools/build/lib/authandtls/GoogleAuthUtilsTest.java @@ -15,7 +15,6 @@ package com.google.devtools.build.lib.authandtls; import static com.google.common.truth.Truth.assertThat; -import static java.nio.charset.StandardCharsets.UTF_8; import com.google.auth.Credentials; import com.google.common.base.Preconditions; @@ -344,7 +343,7 @@ private static void assertRequestMetadata( Map> requestMetadata, String username, String password) { assertThat(requestMetadata.keySet()).containsExactly("Authorization"); assertThat(Iterables.getOnlyElement(requestMetadata.values())) - .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password, UTF_8)); + .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password)); } private static CredentialHelperProvider newCredentialHelperProvider( diff --git a/src/test/java/com/google/devtools/build/lib/authandtls/NetrcCredentialsTest.java b/src/test/java/com/google/devtools/build/lib/authandtls/NetrcCredentialsTest.java index 2cb07cdf815003..12d204f3d4e3ba 100644 --- a/src/test/java/com/google/devtools/build/lib/authandtls/NetrcCredentialsTest.java +++ b/src/test/java/com/google/devtools/build/lib/authandtls/NetrcCredentialsTest.java @@ -14,7 +14,6 @@ package com.google.devtools.build.lib.authandtls; import static com.google.common.truth.Truth.assertThat; -import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; @@ -131,6 +130,6 @@ private static void assertRequestMetadata( Map> requestMetadata, String username, String password) { assertThat(requestMetadata.keySet()).containsExactly("Authorization"); assertThat(Iterables.getOnlyElement(requestMetadata.values())) - .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password, UTF_8)); + .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password)); } } diff --git a/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/IndexRegistryTest.java b/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/IndexRegistryTest.java index 02e73b8f9ebe36..844b50249e800c 100644 --- a/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/IndexRegistryTest.java +++ b/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/IndexRegistryTest.java @@ -70,8 +70,7 @@ public ImmutableMap> getRecordedHashes() { } } - private final String authToken = - BasicHttpAuthenticationEncoder.encode("rinne", "rinnepass", UTF_8); + private final String authToken = BasicHttpAuthenticationEncoder.encode("rinne", "rinnepass"); private DownloadManager downloadManager; private EventRecorder eventRecorder; @Rule public final TestHttpServer server = new TestHttpServer(authToken); diff --git a/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriterTest.java b/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriterTest.java index dcc41d16e58280..9b2b8cb0a18857 100644 --- a/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriterTest.java +++ b/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriterTest.java @@ -16,7 +16,6 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.truth.Truth.assertThat; import static java.nio.charset.StandardCharsets.ISO_8859_1; -import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.fail; import com.google.auth.Credentials; @@ -440,6 +439,6 @@ private static void assertRequestMetadata( Map> requestMetadata, String username, String password) { assertThat(requestMetadata.keySet()).containsExactly("Authorization"); assertThat(Iterables.getOnlyElement(requestMetadata.values())) - .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password, UTF_8)); + .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password)); } } diff --git a/src/test/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilterTest.java b/src/test/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilterTest.java index 12c3cf88dfd440..c831070a17061b 100644 --- a/src/test/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilterTest.java +++ b/src/test/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilterTest.java @@ -17,9 +17,6 @@ import static com.google.common.truth.Truth.assertThat; import static java.nio.charset.StandardCharsets.UTF_8; -import com.google.devtools.build.lib.vfs.DigestHashFunction; -import com.google.devtools.build.lib.vfs.FileSystem; -import com.google.devtools.build.lib.vfs.inmemoryfs.InMemoryFileSystem; import java.io.ByteArrayOutputStream; import java.io.FilterOutputStream; import java.io.IOException; @@ -35,15 +32,12 @@ public class ShowIncludesFilterTest { private ShowIncludesFilter showIncludesFilter; private ByteArrayOutputStream output; private FilterOutputStream filterOutputStream; - private FileSystem fs; @Before public void setUpOutputStreams() throws IOException { showIncludesFilter = new ShowIncludesFilter("foo.cpp"); output = new ByteArrayOutputStream(); filterOutputStream = showIncludesFilter.getFilteredOutputStream(output); - fs = new InMemoryFileSystem(DigestHashFunction.SHA256); - fs.getPath("/out").createDirectory(); } private byte[] getBytes(String str) { diff --git a/src/test/java/com/google/devtools/build/lib/starlark/StarlarkRuleImplementationFunctionsTest.java b/src/test/java/com/google/devtools/build/lib/starlark/StarlarkRuleImplementationFunctionsTest.java index b78af6d8ed19e9..9cef7af493cba4 100644 --- a/src/test/java/com/google/devtools/build/lib/starlark/StarlarkRuleImplementationFunctionsTest.java +++ b/src/test/java/com/google/devtools/build/lib/starlark/StarlarkRuleImplementationFunctionsTest.java @@ -19,6 +19,7 @@ import static com.google.common.truth.Truth.assertWithMessage; import static com.google.devtools.build.lib.bazel.bzlmod.BzlmodTestUtil.createModuleKey; import static com.google.devtools.build.lib.skyframe.BzlLoadValue.keyForBuild; +import static java.nio.charset.StandardCharsets.ISO_8859_1; import static org.junit.Assert.assertThrows; import static org.junit.Assert.fail; @@ -68,8 +69,6 @@ import com.google.devtools.build.lib.testutil.TestConstants; import com.google.devtools.build.lib.util.Fingerprint; import com.google.devtools.build.lib.util.OsUtils; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; @@ -916,29 +915,20 @@ public void testCreateTemplateAction() throws Exception { assertThat(action.makeExecutable()).isFalse(); } - /** - * Simulates the fact that the Parser currently uses Latin1 to read BUILD files, while users - * usually write those files using UTF-8 encoding. Currently, the string-valued 'substitutions' - * parameter of the template_action function contains a hack that assumes its input is a UTF-8 - * encoded string which has been ingested as Latin 1. The hack converts the string to its - * "correct" UTF-8 value. Once Blaze starts calling {@link - * net.starlark.java.syntax.ParserInput#fromUTF8} instead of {@code fromLatin1} and the hack for - * the substitutions parameter is removed, this test will fail. - */ @Test - public void testCreateTemplateActionWithWrongEncoding() throws Exception { + public void testCreateTemplateActionUnicode() throws Exception { // The following array contains bytes that represent a string of length two when treated as // UTF-8 and a string of length four when treated as ISO-8859-1 (a.k.a. Latin 1). - byte[] bytesToDecode = {(byte) 0xC2, (byte) 0xA2, (byte) 0xC2, (byte) 0xA2}; - Charset latin1 = StandardCharsets.ISO_8859_1; - Charset utf8 = StandardCharsets.UTF_8; + String internalString = + new String(new byte[] {(byte) 0xC2, (byte) 0xA2, (byte) 0xC2, (byte) 0xA2}, ISO_8859_1); StarlarkRuleContext ruleContext = createRuleContext("//foo:foo"); setRuleContext(ruleContext); + // In production, Bazel parses Starlark as raw bytes encoded as Latin-1. ev.exec( "ruleContext.actions.expand_template(", " template = ruleContext.files.srcs[0],", " output = ruleContext.files.srcs[1],", - " substitutions = {'a': '" + new String(bytesToDecode, latin1) + "'},", + " substitutions = {'a" + internalString + "': '" + internalString + "'},", " is_executable = False)"); TemplateExpansionAction action = (TemplateExpansionAction) @@ -946,7 +936,8 @@ public void testCreateTemplateActionWithWrongEncoding() throws Exception { ruleContext.getRuleContext().getAnalysisEnvironment().getRegisteredActions()); List substitutions = action.getSubstitutions(); assertThat(substitutions).hasSize(1); - assertThat(substitutions.get(0).getValue()).isEqualTo(new String(bytesToDecode, utf8)); + assertThat(substitutions.get(0).getKey()).isEqualTo("a" + internalString); + assertThat(substitutions.get(0).getValue()).isEqualTo(internalString); } @Test diff --git a/src/test/shell/bazel/BUILD b/src/test/shell/bazel/BUILD index cc5ebafe945635..3aee8794ae5c05 100644 --- a/src/test/shell/bazel/BUILD +++ b/src/test/shell/bazel/BUILD @@ -874,7 +874,7 @@ sh_test( data = [ ":test-deps", "@bazel_tools//tools/bash/runfiles", - "@local_jdk//:jdk", + "@local_jdk//:jdk", # for remote_helpers setup_localjdk_javabase ], shard_count = 10, tags = [ diff --git a/src/test/shell/bazel/bazel_workspaces_test.sh b/src/test/shell/bazel/bazel_workspaces_test.sh index dc21ce0c7859f9..7b689800249803 100755 --- a/src/test/shell/bazel/bazel_workspaces_test.sh +++ b/src/test/shell/bazel/bazel_workspaces_test.sh @@ -579,21 +579,7 @@ function test_read() { ensure_contains_exactly 'path: ".*filefile.sh"' 2 } -function test_read_roundtrip_legacy_utf8() { - # See discussion on https://github.com/bazelbuild/bazel/pull/7309 - set_workspace_command ' - content = "echo fïlëfïlë" - repository_ctx.file("filefile.sh", content, True, legacy_utf8=True) - read_result = repository_ctx.read("filefile.sh") - - corrupted_content = "echo fïlëfïlë" - if read_result != corrupted_content: - fail("read(): expected %r, got %r" % (corrupted_content, read_result))' - - build_and_process_log --exclude_rule "repository @@local_config_cc" -} - -function test_read_roundtrip_nolegacy_utf8() { +function test_read_roundtrip_utf8() { set_workspace_command ' content = "echo fïlëfïlë" repository_ctx.file("filefile.sh", content, True, legacy_utf8=False) diff --git a/src/test/shell/bazel/starlark_repository_test.sh b/src/test/shell/bazel/starlark_repository_test.sh index 9a093466c35689..cf659feb4f3160 100755 --- a/src/test/shell/bazel/starlark_repository_test.sh +++ b/src/test/shell/bazel/starlark_repository_test.sh @@ -56,6 +56,14 @@ msys*) ;; esac +if $is_windows; then + export LC_ALL=C.utf8 +elif [[ "$(uname -s)" == "Linux" ]]; then + export LC_ALL=C.UTF-8 +else + export LC_ALL=en_US.UTF-8 +fi + source "$(rlocation "io_bazel/src/test/shell/bazel/remote_helpers.sh")" \ || { echo "remote_helpers.sh not found!" >&2; exit 1; } @@ -470,6 +478,84 @@ EOF expect_log "PWD=$repo2 TOTO=titi" } +function test_starlark_repository_unicode() { + setup_starlark_repository + + if "$is_windows"; then + # äöüÄÖÜß in UTF-8 + local unicode=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F') + else + # äöüÄÖÜß🌱 in UTF-8 + local unicode=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F\xF0\x9F\x8C\xB1') + fi + + tmpdir="$(mktemp -d ${TEST_TMPDIR}/test.XXXXXXXX)" + input_file="${tmpdir}/input$unicode" + echo -n "$unicode" > "${input_file}" + + cat >test.bzl <indirect%s.txt" % (UNICODE, UNICODE)] + ) + if result.return_code != 0: + fail("Incorrect return code from bash: %s != 0\n%s" % (result.return_code, result.stderr)) + + result = repository_ctx.execute([str(repository_ctx.which("bash")), "-c", "echo '%s'" % UNICODE]) + if result.return_code != 0: + fail("Incorrect return code from bash: %s != 0\n%s" % (result.return_code, result.stderr)) + if result.stdout.strip() != UNICODE: + fail("Incorrect output from bash: %s != %s\n%s" % (result.stdout.strip(), UNICODE, result.stderr)) + + result = repository_ctx.execute([str(repository_ctx.which("bash")), "-c", "echo '%s' && exit 123" % UNICODE]) + if result.return_code != 123: + fail("Incorrect return code from bash: %s != 123\n%s" % (result.return_code, result.stderr)) + if result.stdout.strip() != UNICODE: + fail("Incorrect output from bash: %s != %s\n%s" % (result.stdout.strip(), UNICODE, result.stderr)) + + repository_ctx.file("foo.txt", UNICODE) + read_content = repository_ctx.read("foo.txt") + if read_content != UNICODE: + fail("Incorrect content in foo.txt: %s != %s" % (read_content, UNICODE)) + + print("UNICODE = %s" % UNICODE) +repo = repository_rule(implementation=_impl) +EOF + + bazel build "--repo_env=INPUT_$unicode=${input_file}" @foo//:bar >& $TEST_log || fail "Failed to build" + expect_log "UNICODE = $unicode" + output_base="$(bazel info output_base)" + assert_contains "$unicode" "$output_base/external/+_repo_rules+foo/direct${unicode}.txt" + assert_contains "$unicode" "$output_base/external/+_repo_rules+foo/indirect${unicode}.txt" + assert_contains "${unicode}_replaced_${unicode}" "$output_base/external/+_repo_rules+foo/template${unicode}.txt" + + # The repo rule should not be re-run on server restart + bazel shutdown + bazel build "--repo_env=INPUT_${unicode}=${input_file}" @foo//:bar >& $TEST_log || fail "Failed to build" + expect_not_log "UNICODE" +} + function test_starlark_repository_environ() { setup_starlark_repository @@ -1946,7 +2032,7 @@ password foopass machine bar.example.org login barusername -password passbar +password passbar🌱 # following lines mix tabs and spaces machine oauthlife.com @@ -2011,7 +2097,7 @@ expected = { "https://bar.example.org/file3.tar" : { "type" : "basic", "login": "barusername", - "password" : "passbar", + "password" : "passbar🌱", }, "https://oauthlife.com/fizz/buzz/file5.tar": { "type" : "pattern", diff --git a/src/test/shell/bazel/unicode_filenames_test.sh b/src/test/shell/bazel/unicode_filenames_test.sh index 0db493480a971c..dad4f071f46a94 100755 --- a/src/test/shell/bazel/unicode_filenames_test.sh +++ b/src/test/shell/bazel/unicode_filenames_test.sh @@ -50,6 +50,13 @@ msys*|mingw*|cygwin*) ;; esac +if $is_windows; then + export LC_ALL=C.utf8 +elif [[ "$(uname -s)" == "Linux" ]]; then + export LC_ALL=C.UTF-8 +else + export LC_ALL=en_US.UTF-8 +fi #### SETUP ############################################################# @@ -91,23 +98,7 @@ function has_iso_8859_1_locale() { [[ "${charmap}" == "ISO-8859-1" ]] } -function has_utf8_locale() { - charmap="$(LC_ALL=en_US.UTF-8 locale charmap 2>/dev/null)" - [[ "${charmap}" == "UTF-8" ]] -} - function test_utf8_source_artifact() { - # Bazel relies on the JVM for filename encoding, and can only support - # UTF-8 if either a UTF-8 or ISO-8859-1 locale is available. - if ! "$is_windows"; then - if ! has_iso_8859_1_locale && ! has_utf8_locale; then - echo "Skipping test (no ISO-8859-1 or UTF-8 locale)." - echo "Available locales (need ISO-8859-1 or UTF-8):" - locale -a - return - fi - fi - unicode_filenames_test_setup touch 'pkg/srcs/regular file.txt' @@ -119,14 +110,7 @@ function test_utf8_source_artifact() { # 'pkg/srcs/\xc3\xbcn\xc3\xafc\xc3\xb6d\xc3\xab f\xc3\xafl\xc3\xab.txt' touch "$(printf '%b' 'pkg/srcs/\xc3\xbcn\xc3\xafc\xc3\xb6d\xc3\xab f\xc3\xafl\xc3\xab.txt')" - # On systems without an ISO-8859-1 locale, the environment locale must be - # the same as the file encoding. - # - # This doesn't affect systems that do have an ISO-8859-1 locale, because the - # Bazel launcher will force it to be used. - bazel shutdown - LC_ALL=en_US.UTF-8 bazel build //pkg:ls_srcs >$TEST_log 2>&1 || fail "Should build" - bazel shutdown + bazel build //pkg:ls_srcs >$TEST_log 2>&1 || fail "Should build" assert_contains "pkg/srcs/regular file.txt" bazel-bin/pkg/ls_srcs assert_contains "pkg/srcs/subdir/file.txt" bazel-bin/pkg/ls_srcs @@ -165,17 +149,6 @@ function test_traditional_encoding_source_artifact() { } function test_utf8_source_artifact_in_bep() { - # Bazel relies on the JVM for filename encoding, and can only support - # UTF-8 if either a UTF-8 or ISO-8859-1 locale is available. - if ! "$is_windows"; then - if ! has_iso_8859_1_locale && ! has_utf8_locale; then - echo "Skipping test (no ISO-8859-1 or UTF-8 locale)." - echo "Available locales (need ISO-8859-1 or UTF-8):" - locale -a - return - fi - fi - unicode_filenames_test_setup touch 'pkg/srcs/regular file.txt' @@ -187,15 +160,8 @@ function test_utf8_source_artifact_in_bep() { # 'pkg/srcs/\xc3\xbcn\xc3\xafc\xc3\xb6d\xc3\xab f\xc3\xafl\xc3\xab.txt' touch "$(printf '%b' 'pkg/srcs/\xc3\xbcn\xc3\xafc\xc3\xb6d\xc3\xab f\xc3\xafl\xc3\xab.txt')" - # On systems without an ISO-8859-1 locale, the environment locale must be - # the same as the file encoding. - # - # This doesn't affect systems that do have an ISO-8859-1 locale, because the - # Bazel launcher will force it to be used. - bazel shutdown - LC_ALL=en_US.UTF-8 bazel build --build_event_json_file="$TEST_log" \ + bazel build --build_event_json_file="$TEST_log" \ //pkg:filegroup 2>&1 || fail "Should build" - bazel shutdown expect_log '"name":"pkg/srcs/regular file.txt"' expect_log '"name":"pkg/srcs/subdir/file.txt"' @@ -203,10 +169,6 @@ function test_utf8_source_artifact_in_bep() { } function test_utf8_filename_in_java_test() { - # Intentionally do not check for available locales: Either C.UTF_8 or - # en_US.UTF-8 should exist on all CI machines - if not, we want to learn about - # this so that the Java stub template can be adapted accordingly. - touch WORKSPACE mkdir pkg @@ -233,4 +195,53 @@ EOF bazel test //pkg:Test --test_output=errors 2>$TEST_log || fail "Test should pass" } +function test_cc_dependency_with_utf8_filename() { + # TODO: Find a way to get cl.exe to output Unicode when not running in a + # console or migrate to /sourceDependencies. + if $is_windows; then + echo "Skipping test on Windows." && return + fi + + local unicode="äöüÄÖÜß🌱" + + setup_module_dot_bazel + + mkdir pkg + cat >pkg/BUILD <"pkg/${unicode}.h" < pkg/bin.cc + cat >>pkg/bin.cc < + +int main() { + std::cout << MY_STRING << std::endl; + return 0; +} +EOF + bazel run //pkg:bin >$TEST_log 2>&1 || fail "Should build" + expect_log "original" + + # Change the header file and rebuild + cat >"pkg/${unicode}.h" <$TEST_log 2>&1 || fail "Should build" + expect_log "changed" +} + run_suite "Tests for handling of Unicode filenames" diff --git a/src/test/shell/integration/starlark_dependency_pruning_test.sh b/src/test/shell/integration/starlark_dependency_pruning_test.sh index ceb84465081346..802c31a2c1e345 100755 --- a/src/test/shell/integration/starlark_dependency_pruning_test.sh +++ b/src/test/shell/integration/starlark_dependency_pruning_test.sh @@ -49,6 +49,14 @@ msys*|mingw*|cygwin*) ;; esac +if $is_windows; then + export LC_ALL=C.utf8 +elif [[ "$(uname -s)" == "Linux" ]]; then + export LC_ALL=C.UTF-8 +else + export LC_ALL=en_US.UTF-8 +fi + add_to_bazelrc "build --package_path=%workspace%" add_to_bazelrc "build --spawn_strategy=local" @@ -242,6 +250,43 @@ function test_dependency_pruning_scenario() { check_unused_content "pkg/c.input" } +function test_dependency_pruning_scenario_unicode() { + if "$is_windows"; then + # äöüÄÖÜß in UTF-8 + local unicode=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F') + else + # äöüÄÖÜß🌱 in UTF-8 + local unicode=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F\xF0\x9F\x8C\xB1') + fi + + # Initial build. + echo "contentD${unicode}" > "pkg/d${unicode}.input" + bazel build //pkg:output || fail "build failed" + check_output_content "contentA contentB contentC contentD${unicode}" + check_unused_content + + # Mark "d" as unused. + echo "unused" > "pkg/d${unicode}.input" + bazel build //pkg:output || fail "build failed" + check_output_content "contentA contentB contentC" + check_unused_content "pkg/d${unicode}.input" + + # Change "d" again: + # This time it should be used. But given that it was marked "unused" + # the build should not trigger: "d" should still be considered unused. + echo "newContentD${unicode}" > "pkg/d${unicode}.input" + bazel build //pkg:output || fail "build failed" + check_output_content "contentA contentB contentC" + check_unused_content "pkg/d${unicode}.input" + + # Change c: + # The build should be triggered, and the newer version of "d" should be used. + echo "unused" > pkg/c.input + bazel build //pkg:output || fail "build failed" + check_output_content "contentA contentB newContentD${unicode}" + check_unused_content "pkg/c.input" +} + # Verify that the state of the local action cache survives server shutdown. function test_unused_shutdown() { # Mark "b" as unused + initial build