diff --git a/src/main/java/com/google/devtools/build/lib/analysis/BUILD b/src/main/java/com/google/devtools/build/lib/analysis/BUILD index 6ab782847a2075..c81c808fe6a596 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/BUILD +++ b/src/main/java/com/google/devtools/build/lib/analysis/BUILD @@ -1466,6 +1466,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/actions:artifacts", "//src/main/java/com/google/devtools/build/lib/actions:commandline_item", "//src/main/java/com/google/devtools/build/lib/collect/nestedset", + "//src/main/java/com/google/devtools/build/lib/unsafe:string", "//src/main/java/com/google/devtools/build/lib/util", "//src/main/java/net/starlark/java/eval", "//third_party:jsr305", @@ -1482,6 +1483,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/actions:artifact_expander", "//src/main/java/com/google/devtools/build/lib/actions:artifacts", "//src/main/java/com/google/devtools/build/lib/collect/nestedset", + "//src/main/java/com/google/devtools/build/lib/unsafe:string", "//src/main/java/com/google/devtools/build/lib/util", "//third_party:guava", "//third_party:jsr305", diff --git a/src/main/java/com/google/devtools/build/lib/analysis/ConfiguredRuleClassProvider.java b/src/main/java/com/google/devtools/build/lib/analysis/ConfiguredRuleClassProvider.java index df527b31de764f..564309c487f868 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/ConfiguredRuleClassProvider.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/ConfiguredRuleClassProvider.java @@ -53,6 +53,7 @@ import com.google.devtools.build.lib.packages.RuleTransitionData; import com.google.devtools.build.lib.packages.WorkspaceFactory; import com.google.devtools.build.lib.starlarkbuildapi.core.Bootstrap; +import com.google.devtools.build.lib.unsafe.StringUnsafe; import com.google.devtools.build.lib.vfs.DigestHashFunction; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; @@ -131,7 +132,10 @@ protected synchronized byte[] getFastDigest(PathFragment path) { @Override protected synchronized byte[] getDigest(PathFragment path) { - return getDigestFunction().getHashFunction().hashString(path.toString(), UTF_8).asBytes(); + return getDigestFunction() + .getHashFunction() + .hashBytes(StringUnsafe.getInstance().getInternalStringBytes(path.getPathString())) + .asBytes(); } } diff --git a/src/main/java/com/google/devtools/build/lib/analysis/RepoMappingManifestAction.java b/src/main/java/com/google/devtools/build/lib/analysis/RepoMappingManifestAction.java index f4b724886be784..6d903571edc1ef 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/RepoMappingManifestAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/RepoMappingManifestAction.java @@ -16,7 +16,6 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableSortedMap.toImmutableSortedMap; import static java.nio.charset.StandardCharsets.ISO_8859_1; -import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Comparator.comparing; import com.github.benmanes.caffeine.cache.Caffeine; @@ -146,7 +145,7 @@ protected void computeKey( public String getFileContents(@Nullable EventHandler eventHandler) throws IOException { ByteArrayOutputStream stream = new ByteArrayOutputStream(); newDeterministicWriter().writeOutputFile(stream); - return stream.toString(UTF_8); + return stream.toString(ISO_8859_1); } @Override diff --git a/src/main/java/com/google/devtools/build/lib/analysis/SourceManifestAction.java b/src/main/java/com/google/devtools/build/lib/analysis/SourceManifestAction.java index c8fb12671db480..c6f7327bab50cc 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/SourceManifestAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/SourceManifestAction.java @@ -15,7 +15,6 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static java.nio.charset.StandardCharsets.ISO_8859_1; -import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; @@ -213,7 +212,7 @@ public void writeOutputFile(OutputStream out, @Nullable EventHandler eventHandle public String getFileContents(@Nullable EventHandler eventHandler) throws IOException { ByteArrayOutputStream stream = new ByteArrayOutputStream(); writeOutputFile(stream, eventHandler); - return stream.toString(UTF_8); + return stream.toString(ISO_8859_1); } @Override diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWriteNestedSetOfTupleAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWriteNestedSetOfTupleAction.java index 8958b5e2aee2ab..6bd98ea17edf37 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWriteNestedSetOfTupleAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWriteNestedSetOfTupleAction.java @@ -14,7 +14,6 @@ package com.google.devtools.build.lib.analysis.actions; -import static java.nio.charset.StandardCharsets.UTF_8; import com.google.devtools.build.lib.actions.ActionExecutionContext; import com.google.devtools.build.lib.actions.ActionKeyContext; @@ -25,6 +24,7 @@ import com.google.devtools.build.lib.collect.nestedset.NestedSet; import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder; import com.google.devtools.build.lib.collect.nestedset.Order; +import com.google.devtools.build.lib.unsafe.StringUnsafe; import com.google.devtools.build.lib.util.Fingerprint; import javax.annotation.Nullable; import net.starlark.java.eval.Tuple; @@ -49,7 +49,8 @@ public LazyWriteNestedSetOfTupleAction( @Override public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) { - return out -> out.write(getContents(delimiter).getBytes(UTF_8)); + return out -> + out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents(delimiter))); } /** Computes the Action key for this action by computing the fingerprint for the file contents. */ diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWritePathsFileAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWritePathsFileAction.java index e27d03b48eac12..2e24dc3d0d6ee1 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWritePathsFileAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWritePathsFileAction.java @@ -15,7 +15,6 @@ package com.google.devtools.build.lib.analysis.actions; -import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.collect.ImmutableSet; import com.google.devtools.build.lib.actions.ActionExecutionContext; @@ -26,6 +25,7 @@ import com.google.devtools.build.lib.collect.nestedset.NestedSet; import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder; import com.google.devtools.build.lib.collect.nestedset.Order; +import com.google.devtools.build.lib.unsafe.StringUnsafe; import com.google.devtools.build.lib.util.Fingerprint; import java.util.function.Function; import javax.annotation.Nullable; @@ -71,7 +71,7 @@ public LazyWritePathsFileAction( @Override public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) { - return out -> out.write(getContents().getBytes(UTF_8)); + return out -> out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents())); } /** Computes the Action key for this action by computing the fingerprint for the file contents. */ diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/LocalTemplateExpansionStrategy.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/LocalTemplateExpansionStrategy.java index 2c288480127756..93396ad2923fb9 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/LocalTemplateExpansionStrategy.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/LocalTemplateExpansionStrategy.java @@ -14,7 +14,7 @@ package com.google.devtools.build.lib.analysis.actions; -import static java.nio.charset.StandardCharsets.UTF_8; +import static java.nio.charset.StandardCharsets.ISO_8859_1; import com.google.common.collect.ImmutableList; import com.google.devtools.build.lib.actions.AbstractAction; @@ -47,7 +47,8 @@ public ImmutableList expandTemplate( final String expandedTemplate = getExpandedTemplateUnsafe( templateMetadata.template(), templateMetadata.substitutions(), ctx.getPathResolver()); - DeterministicWriter deterministicWriter = out -> out.write(expandedTemplate.getBytes(UTF_8)); + DeterministicWriter deterministicWriter = + out -> out.write(expandedTemplate.getBytes(ISO_8859_1)); return ctx.getContext(FileWriteActionContext.class) .writeOutputToFile( action, diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/StarlarkAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/StarlarkAction.java index c6df48534c8e04..ecf92e4768fb04 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/StarlarkAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/StarlarkAction.java @@ -13,7 +13,7 @@ // limitations under the License. package com.google.devtools.build.lib.analysis.actions; -import static java.nio.charset.StandardCharsets.UTF_8; +import static java.nio.charset.StandardCharsets.ISO_8859_1; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; @@ -349,10 +349,13 @@ protected void afterExecute( for (Artifact input : allInputs.toList()) { usedInputsByMappedPath.put(pathMapper.getMappedExecPathString(input), input); } + // Bazel encodes file system paths as raw bytes stored in a Latin-1 encoded string, so we need + // to make sure to also decode the unused input list as Latin-1. try (BufferedReader br = new BufferedReader( new InputStreamReader( - getUnusedInputListInputStream(actionExecutionContext, spawnResults), UTF_8))) { + getUnusedInputListInputStream(actionExecutionContext, spawnResults), + ISO_8859_1))) { String line; while ((line = br.readLine()) != null) { line = line.trim(); diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/Template.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/Template.java index 3cc16afd28166a..40a856c473c745 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/Template.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/Template.java @@ -14,6 +14,8 @@ package com.google.devtools.build.lib.analysis.actions; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + import com.google.common.annotations.VisibleForTesting; import com.google.devtools.build.lib.actions.Artifact; import com.google.devtools.build.lib.actions.ArtifactPathResolver; @@ -22,16 +24,12 @@ import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import java.io.IOException; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; import javax.annotation.Nullable; /** A template that contains text content, or alternatively throws an {@link IOException}. */ @Immutable // all subclasses are immutable public abstract class Template { - static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8; - /** We only allow subclasses in this file. */ private Template() {} @@ -105,7 +103,8 @@ private static final class ArtifactTemplate extends Template { public String getContent(ArtifactPathResolver resolver) throws IOException { Path templatePath = resolver.toPath(templateArtifact); try { - return FileSystemUtils.readContent(templatePath, DEFAULT_CHARSET); + // Bazel's internal encoding for strings is raw bytes as Latin-1 + return FileSystemUtils.readContent(templatePath, ISO_8859_1); } catch (IOException e) { throw new IOException( "failed to load template file '" diff --git a/src/main/java/com/google/devtools/build/lib/analysis/starlark/StarlarkActionFactory.java b/src/main/java/com/google/devtools/build/lib/analysis/starlark/StarlarkActionFactory.java index b33ce8d0b7c9a1..2b944788cbf197 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/starlark/StarlarkActionFactory.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/starlark/StarlarkActionFactory.java @@ -977,13 +977,8 @@ public void expandTemplate( ImmutableMap.Builder substitutionsBuilder = ImmutableMap.builder(); for (Map.Entry substitution : Dict.cast(substitutionsUnchecked, String.class, String.class, "substitutions").entrySet()) { - // Blaze calls ParserInput.fromLatin1 when reading BUILD files, which might - // contain UTF-8 encoded symbols as part of template substitution. - // As a quick fix, the substitution values are corrected before being passed on. - // In the long term, avoiding ParserInput.fromLatin would be a better approach. substitutionsBuilder.put( - substitution.getKey(), - Substitution.of(substitution.getKey(), convertLatin1ToUtf8(substitution.getValue()))); + substitution.getKey(), Substitution.of(substitution.getKey(), substitution.getValue())); } if (!Starlark.UNBOUND.equals(computedSubstitutions)) { for (Substitution substitution : ((TemplateDict) computedSubstitutions).getAll()) { @@ -1007,16 +1002,6 @@ public void expandTemplate( registerAction(action); } - /** - * Returns the proper UTF-8 representation of a String that was erroneously read using Latin1. - * - * @param latin1 Input string - * @return The input string, UTF8 encoded - */ - private static String convertLatin1ToUtf8(String latin1) { - return new String(latin1.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8); - } - @Override public Args args(StarlarkThread thread) { return Args.newArgs(thread.mutability(), getSemantics()); diff --git a/src/main/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoder.java b/src/main/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoder.java index a4a4f453b9f7bf..ef76293263373d 100644 --- a/src/main/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoder.java +++ b/src/main/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoder.java @@ -13,8 +13,8 @@ // limitations under the License. package com.google.devtools.build.lib.authandtls; -import com.google.common.base.Strings; -import java.nio.charset.Charset; +import static java.nio.charset.StandardCharsets.UTF_8; + import java.util.Base64; /** @@ -26,16 +26,9 @@ public final class BasicHttpAuthenticationEncoder { private BasicHttpAuthenticationEncoder() {} - /** Encode username and password into a token with given {@link Charset}. */ - public static String encode(String username, String password, Charset charset) { - StringBuilder sb = new StringBuilder(); - if (!Strings.isNullOrEmpty(username)) { - sb.append(username); - } - sb.append(":"); - if (!Strings.isNullOrEmpty(password)) { - sb.append(password); - } - return "Basic " + Base64.getEncoder().encodeToString(sb.toString().getBytes(charset)); + /** Encode username and password into a token, encoded using UTF-8. */ + public static String encode(String username, String password) { + return "Basic " + + Base64.getEncoder().encodeToString((username + ":" + password).getBytes(UTF_8)); } } diff --git a/src/main/java/com/google/devtools/build/lib/authandtls/NetrcCredentials.java b/src/main/java/com/google/devtools/build/lib/authandtls/NetrcCredentials.java index ee15a91b9f9422..bc40512d659a63 100644 --- a/src/main/java/com/google/devtools/build/lib/authandtls/NetrcCredentials.java +++ b/src/main/java/com/google/devtools/build/lib/authandtls/NetrcCredentials.java @@ -13,8 +13,6 @@ // limitations under the License. package com.google.devtools.build.lib.authandtls; -import static java.nio.charset.StandardCharsets.UTF_8; - import com.google.auth.Credentials; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -58,7 +56,7 @@ public Map> getRequestMetadata(URI uri) throws IOException Credential credential = netrc.getCredential(uri.getHost()); if (credential != null) { String token = - BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password(), UTF_8); + BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password()); return ImmutableMap.of("Authorization", ImmutableList.of(token)); } else { return ImmutableMap.of(); diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java index 50f1102ed801bb..aaf92f38d770af 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java @@ -14,7 +14,6 @@ package com.google.devtools.build.lib.bazel.bzlmod; -import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.collect.ImmutableList; import com.google.devtools.build.lib.actions.FileValue; @@ -35,6 +34,7 @@ import com.google.devtools.build.skyframe.SkyKey; import com.google.devtools.build.skyframe.SkyValue; import java.io.IOException; +import java.nio.charset.StandardCharsets; import javax.annotation.Nullable; import net.starlark.java.eval.EvalException; import net.starlark.java.eval.Mutability; @@ -138,8 +138,7 @@ private void createVendorFile(Path vendorPath, Path vendorFilePath) throws VendorFileFunctionException { try { vendorPath.createDirectoryAndParents(); - byte[] vendorFileContents = VENDOR_FILE_HEADER.getBytes(UTF_8); - FileSystemUtils.writeContent(vendorFilePath, vendorFileContents); + FileSystemUtils.writeContent(vendorFilePath, StandardCharsets.UTF_8, VENDOR_FILE_HEADER); } catch (IOException e) { throw new VendorFileFunctionException( new IOException("error creating VENDOR.bazel file", e), Transience.TRANSIENT); diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryResolvedModule.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryResolvedModule.java index 84b9ec714d6d0b..97940ed169d1d7 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryResolvedModule.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryResolvedModule.java @@ -13,6 +13,8 @@ // limitations under the License. package com.google.devtools.build.lib.bazel.repository; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -29,7 +31,6 @@ import java.io.File; import java.io.IOException; import java.io.Writer; -import java.nio.charset.StandardCharsets; import java.util.LinkedHashMap; import java.util.Map; import net.starlark.java.eval.Printer; @@ -81,7 +82,7 @@ public void afterCommand() { for (Object resolved : resolvedValues.values()) { resultBuilder.add(resolved); } - try (Writer writer = Files.newWriter(new File(resolvedFile), StandardCharsets.UTF_8)) { + try (Writer writer = Files.newWriter(new File(resolvedFile), ISO_8859_1)) { writer.write(EXPORTED_NAME + " = " + new ValuePrinter().repr(resultBuilder.build())); } catch (IOException e) { logger.atWarning().withCause(e).log("IO Error writing to file %s", resolvedFile); diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD index ec879778d6d0b1..1a3e7beababe3b 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD @@ -48,8 +48,10 @@ java_library( "//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value", "//src/main/java/com/google/devtools/build/lib/skyframe:repository_mapping_value", "//src/main/java/com/google/devtools/build/lib/starlarkbuildapi/repository", + "//src/main/java/com/google/devtools/build/lib/unsafe:string", "//src/main/java/com/google/devtools/build/lib/util", "//src/main/java/com/google/devtools/build/lib/util:string", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/util/io:out-err", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java index e34f582b977402..e5446fd89c4aa0 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java @@ -57,6 +57,7 @@ import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor; import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor.ExecutionResult; import com.google.devtools.build.lib.skyframe.ActionEnvironmentFunction; +import com.google.devtools.build.lib.unsafe.StringUnsafe; import com.google.devtools.build.lib.util.OsUtils; import com.google.devtools.build.lib.util.io.OutErr; import com.google.devtools.build.lib.vfs.FileSystemUtils; @@ -1324,23 +1325,17 @@ private static String renamedStripPrefix(String method, String stripPrefix, Stri @Param( name = "legacy_utf8", named = true, - defaultValue = "True", + defaultValue = "False", doc = """ - Encode file content to UTF-8, true by default. Future versions will change \ - the default and remove this parameter. + No-op. This parameter is deprecated and will be removed in a future version of \ + Bazel. """), }) public void createFile( Object path, String content, Boolean executable, Boolean legacyUtf8, StarlarkThread thread) throws RepositoryFunctionException, EvalException, InterruptedException { StarlarkPath p = getPath(path); - byte[] contentBytes; - if (legacyUtf8) { - contentBytes = content.getBytes(UTF_8); - } else { - contentBytes = content.getBytes(ISO_8859_1); - } WorkspaceRuleEvent w = WorkspaceRuleEvent.newFileEvent( p.toString(), @@ -1354,7 +1349,7 @@ public void createFile( makeDirectories(p.getPath()); p.getPath().delete(); try (OutputStream stream = p.getPath().getOutputStream()) { - stream.write(contentBytes); + stream.write(StringUnsafe.getInstance().getInternalStringBytes(content)); } if (executable) { p.getPath().setExecutable(true); diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkExecutionResult.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkExecutionResult.java index 7bd40f94599f20..f3e14370d863cd 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkExecutionResult.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkExecutionResult.java @@ -182,7 +182,7 @@ Builder setQuiet(boolean quiet) { private static String toString(ByteArrayOutputStream stream) { try { - return new String(stream.toByteArray(), UTF_8); + return stream.toString(UTF_8); } catch (IllegalStateException e) { return ""; } diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java index 6ea3f7e21786d1..5b1d08175fdf33 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java @@ -322,14 +322,16 @@ public void createFileFromTemplate( try { checkInOutputDirectory("write", p); makeDirectories(p.getPath()); - String tpl = FileSystemUtils.readContent(t.getPath(), StandardCharsets.UTF_8); + // Read and write files as raw bytes by using the Latin-1 encoding, which matches the encoding + // used by Bazel for strings. + String tpl = FileSystemUtils.readContent(t.getPath(), StandardCharsets.ISO_8859_1); for (Map.Entry substitution : substitutionMap.entrySet()) { tpl = StringUtilities.replaceAllLiteral(tpl, substitution.getKey(), substitution.getValue()); } p.getPath().delete(); try (OutputStream stream = p.getPath().getOutputStream()) { - stream.write(tpl.getBytes(StandardCharsets.UTF_8)); + stream.write(tpl.getBytes(StandardCharsets.ISO_8859_1)); } if (executable) { p.getPath().setExecutable(true); diff --git a/src/main/java/com/google/devtools/build/lib/profiler/JsonProfile.java b/src/main/java/com/google/devtools/build/lib/profiler/JsonProfile.java index cbb02d5e54cb13..1d0538fba43fa0 100644 --- a/src/main/java/com/google/devtools/build/lib/profiler/JsonProfile.java +++ b/src/main/java/com/google/devtools/build/lib/profiler/JsonProfile.java @@ -14,6 +14,8 @@ package com.google.devtools.build.lib.profiler; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + import com.google.auto.value.AutoValue; import com.google.devtools.build.lib.profiler.statistics.PhaseSummaryStatistics; import com.google.gson.stream.JsonReader; diff --git a/src/main/java/com/google/devtools/build/lib/profiler/JsonTraceFileWriter.java b/src/main/java/com/google/devtools/build/lib/profiler/JsonTraceFileWriter.java index d73fa42380960f..2ab8881db77cd4 100644 --- a/src/main/java/com/google/devtools/build/lib/profiler/JsonTraceFileWriter.java +++ b/src/main/java/com/google/devtools/build/lib/profiler/JsonTraceFileWriter.java @@ -13,6 +13,8 @@ // limitations under the License. package com.google.devtools.build.lib.profiler; +import static java.nio.charset.StandardCharsets.ISO_8859_1; + import com.google.common.base.Preconditions; import com.google.devtools.build.lib.analysis.BlazeVersionInfo; import com.google.devtools.build.lib.profiler.Profiler.TaskData; @@ -21,7 +23,6 @@ import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; -import java.nio.charset.StandardCharsets; import java.time.Duration; import java.time.Instant; import java.util.HashMap; @@ -223,8 +224,9 @@ public void run() { try (JsonWriter writer = new JsonWriter( // The buffer size of 262144 is chosen at random. - new OutputStreamWriter( - new BufferedOutputStream(outStream, 262144), StandardCharsets.UTF_8))) { + // Bazel internally stores strings as raw bytes encoded in ISO_8859_1, so we use the + // same encoding here to also write out raw bytes. + new OutputStreamWriter(new BufferedOutputStream(outStream, 262144), ISO_8859_1))) { var startDate = Instant.now(); writer.beginObject(); writer.name("otherData"); diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilter.java b/src/main/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilter.java index e6804184fb707f..51c9368b83918e 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilter.java +++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilter.java @@ -171,7 +171,7 @@ public FilterShowIncludesOutputStream(OutputStream out, String sourceFileName) { public void write(int b) throws IOException { buffer.write(b); if (b == NEWLINE) { - String line = buffer.toString(StandardCharsets.UTF_8.name()); + String line = buffer.toString(StandardCharsets.UTF_8); boolean prefixMatched = false; for (String prefix : SHOW_INCLUDES_PREFIXES) { if (line.startsWith(prefix)) { diff --git a/src/test/java/com/google/devtools/build/lib/analysis/BUILD b/src/test/java/com/google/devtools/build/lib/analysis/BUILD index 782e30f10491f6..08a14544b91d98 100644 --- a/src/test/java/com/google/devtools/build/lib/analysis/BUILD +++ b/src/test/java/com/google/devtools/build/lib/analysis/BUILD @@ -153,6 +153,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util:detailed_exit_code", "//src/main/java/com/google/devtools/build/lib/util:filetype", "//src/main/java/com/google/devtools/build/lib/util:os", + "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/util/io", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", diff --git a/src/test/java/com/google/devtools/build/lib/analysis/actions/TemplateExpansionActionTest.java b/src/test/java/com/google/devtools/build/lib/analysis/actions/TemplateExpansionActionTest.java index 15f208d16b6e82..1ebd5b08e4449f 100644 --- a/src/test/java/com/google/devtools/build/lib/analysis/actions/TemplateExpansionActionTest.java +++ b/src/test/java/com/google/devtools/build/lib/analysis/actions/TemplateExpansionActionTest.java @@ -39,6 +39,7 @@ import com.google.devtools.build.lib.exec.util.TestExecutorBuilder; import com.google.devtools.build.lib.testutil.FoundationTestCase; import com.google.devtools.build.lib.util.Fingerprint; +import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.util.io.FileOutErr; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; @@ -252,7 +253,10 @@ public void testWithSpecialCharacters() throws Exception { // scratch.overwriteFile appends a newline, so we need an additional \n here String expected = String.format("%s%s\n", SPECIAL_CHARS, SPECIAL_CHARS); - executeTemplateExpansion(expected, ImmutableList.of(Substitution.of("%key%", SPECIAL_CHARS))); + executeTemplateExpansion( + expected, + ImmutableList.of( + Substitution.of("%key%", StringEncoding.unicodeToInternal(SPECIAL_CHARS)))); } private String computeKey(TemplateExpansionAction action) throws EvalException { diff --git a/src/test/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoderTest.java b/src/test/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoderTest.java index 7873e0572d0a4e..a5fecad125126b 100644 --- a/src/test/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoderTest.java +++ b/src/test/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoderTest.java @@ -34,13 +34,13 @@ private static String[] decode(String message) { @Test public void encode_normalUsernamePassword_outputExpected() { - String message = BasicHttpAuthenticationEncoder.encode("Aladdin", "open sesame", UTF_8); + String message = BasicHttpAuthenticationEncoder.encode("Aladdin", "open sesame"); assertThat(message).isEqualTo("Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ=="); } @Test public void encode_normalUsernamePassword_canBeDecoded() { - String message = BasicHttpAuthenticationEncoder.encode("Aladdin", "open sesame", UTF_8); + String message = BasicHttpAuthenticationEncoder.encode("Aladdin", "open sesame"); String[] usernameAndPassword = decode(message); assertThat(usernameAndPassword[0]).isEqualTo("Aladdin"); @@ -49,7 +49,7 @@ public void encode_normalUsernamePassword_canBeDecoded() { @Test public void encode_usernameContainsColon_canBeDecoded() { - String message = BasicHttpAuthenticationEncoder.encode("foo:user", "foopass", UTF_8); + String message = BasicHttpAuthenticationEncoder.encode("foo:user", "foopass"); String[] usernameAndPassword = decode(message); assertThat(usernameAndPassword[0]).isEqualTo("foo"); @@ -58,25 +58,25 @@ public void encode_usernameContainsColon_canBeDecoded() { @Test public void encode_emptyUsername_outputExpected() { - String message = BasicHttpAuthenticationEncoder.encode("", "foopass", UTF_8); + String message = BasicHttpAuthenticationEncoder.encode("", "foopass"); assertThat(message).isEqualTo("Basic OmZvb3Bhc3M="); } @Test public void encode_emptyPassword_outputExpected() { - String message = BasicHttpAuthenticationEncoder.encode("foouser", "", UTF_8); + String message = BasicHttpAuthenticationEncoder.encode("foouser", ""); assertThat(message).isEqualTo("Basic Zm9vdXNlcjo="); } @Test public void encode_emptyUsernamePassword_outputExpected() { - String message = BasicHttpAuthenticationEncoder.encode("", "", UTF_8); + String message = BasicHttpAuthenticationEncoder.encode("", ""); assertThat(message).isEqualTo("Basic Og=="); } @Test public void encode_specialCharacterUtf8_outputExpected() { - String message = BasicHttpAuthenticationEncoder.encode("test", "123\u00A3", UTF_8); + String message = BasicHttpAuthenticationEncoder.encode("test", "123\u00A3"); assertThat(message).isEqualTo("Basic dGVzdDoxMjPCow=="); } } diff --git a/src/test/java/com/google/devtools/build/lib/authandtls/GoogleAuthUtilsTest.java b/src/test/java/com/google/devtools/build/lib/authandtls/GoogleAuthUtilsTest.java index ec3f4e6437d3ea..0810ebfacaf3c5 100644 --- a/src/test/java/com/google/devtools/build/lib/authandtls/GoogleAuthUtilsTest.java +++ b/src/test/java/com/google/devtools/build/lib/authandtls/GoogleAuthUtilsTest.java @@ -15,7 +15,7 @@ package com.google.devtools.build.lib.authandtls; import static com.google.common.truth.Truth.assertThat; -import static java.nio.charset.StandardCharsets.UTF_8; + import com.google.auth.Credentials; import com.google.common.base.Preconditions; @@ -344,7 +344,7 @@ private static void assertRequestMetadata( Map> requestMetadata, String username, String password) { assertThat(requestMetadata.keySet()).containsExactly("Authorization"); assertThat(Iterables.getOnlyElement(requestMetadata.values())) - .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password, UTF_8)); + .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password)); } private static CredentialHelperProvider newCredentialHelperProvider( diff --git a/src/test/java/com/google/devtools/build/lib/authandtls/NetrcCredentialsTest.java b/src/test/java/com/google/devtools/build/lib/authandtls/NetrcCredentialsTest.java index 2cb07cdf815003..22054333c92b98 100644 --- a/src/test/java/com/google/devtools/build/lib/authandtls/NetrcCredentialsTest.java +++ b/src/test/java/com/google/devtools/build/lib/authandtls/NetrcCredentialsTest.java @@ -14,7 +14,7 @@ package com.google.devtools.build.lib.authandtls; import static com.google.common.truth.Truth.assertThat; -import static java.nio.charset.StandardCharsets.UTF_8; + import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; @@ -131,6 +131,6 @@ private static void assertRequestMetadata( Map> requestMetadata, String username, String password) { assertThat(requestMetadata.keySet()).containsExactly("Authorization"); assertThat(Iterables.getOnlyElement(requestMetadata.values())) - .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password, UTF_8)); + .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password)); } } diff --git a/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/IndexRegistryTest.java b/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/IndexRegistryTest.java index 02e73b8f9ebe36..a2635a7cf5cb42 100644 --- a/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/IndexRegistryTest.java +++ b/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/IndexRegistryTest.java @@ -71,7 +71,7 @@ public ImmutableMap> getRecordedHashes() { } private final String authToken = - BasicHttpAuthenticationEncoder.encode("rinne", "rinnepass", UTF_8); + BasicHttpAuthenticationEncoder.encode("rinne", "rinnepass"); private DownloadManager downloadManager; private EventRecorder eventRecorder; @Rule public final TestHttpServer server = new TestHttpServer(authToken); diff --git a/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriterTest.java b/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriterTest.java index dcc41d16e58280..9b2b8cb0a18857 100644 --- a/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriterTest.java +++ b/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriterTest.java @@ -16,7 +16,6 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.truth.Truth.assertThat; import static java.nio.charset.StandardCharsets.ISO_8859_1; -import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.fail; import com.google.auth.Credentials; @@ -440,6 +439,6 @@ private static void assertRequestMetadata( Map> requestMetadata, String username, String password) { assertThat(requestMetadata.keySet()).containsExactly("Authorization"); assertThat(Iterables.getOnlyElement(requestMetadata.values())) - .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password, UTF_8)); + .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password)); } } diff --git a/src/test/java/com/google/devtools/build/lib/starlark/StarlarkRuleImplementationFunctionsTest.java b/src/test/java/com/google/devtools/build/lib/starlark/StarlarkRuleImplementationFunctionsTest.java index ad270e0ebfe260..753b62f955cd44 100644 --- a/src/test/java/com/google/devtools/build/lib/starlark/StarlarkRuleImplementationFunctionsTest.java +++ b/src/test/java/com/google/devtools/build/lib/starlark/StarlarkRuleImplementationFunctionsTest.java @@ -19,6 +19,7 @@ import static com.google.common.truth.Truth.assertWithMessage; import static com.google.devtools.build.lib.bazel.bzlmod.BzlmodTestUtil.createModuleKey; import static com.google.devtools.build.lib.skyframe.BzlLoadValue.keyForBuild; +import static java.nio.charset.StandardCharsets.ISO_8859_1; import static org.junit.Assert.assertThrows; import static org.junit.Assert.fail; @@ -68,8 +69,6 @@ import com.google.devtools.build.lib.testutil.TestConstants; import com.google.devtools.build.lib.util.Fingerprint; import com.google.devtools.build.lib.util.OsUtils; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; @@ -916,29 +915,20 @@ public void testCreateTemplateAction() throws Exception { assertThat(action.makeExecutable()).isFalse(); } - /** - * Simulates the fact that the Parser currently uses Latin1 to read BUILD files, while users - * usually write those files using UTF-8 encoding. Currently, the string-valued 'substitutions' - * parameter of the template_action function contains a hack that assumes its input is a UTF-8 - * encoded string which has been ingested as Latin 1. The hack converts the string to its - * "correct" UTF-8 value. Once Blaze starts calling {@link - * net.starlark.java.syntax.ParserInput#fromUTF8} instead of {@code fromLatin1} and the hack for - * the substitutions parameter is removed, this test will fail. - */ @Test - public void testCreateTemplateActionWithWrongEncoding() throws Exception { + public void testCreateTemplateActionUnicode() throws Exception { // The following array contains bytes that represent a string of length two when treated as // UTF-8 and a string of length four when treated as ISO-8859-1 (a.k.a. Latin 1). - byte[] bytesToDecode = {(byte) 0xC2, (byte) 0xA2, (byte) 0xC2, (byte) 0xA2}; - Charset latin1 = StandardCharsets.ISO_8859_1; - Charset utf8 = StandardCharsets.UTF_8; + String internalString = + new String(new byte[] {(byte) 0xC2, (byte) 0xA2, (byte) 0xC2, (byte) 0xA2}, ISO_8859_1); StarlarkRuleContext ruleContext = createRuleContext("//foo:foo"); setRuleContext(ruleContext); + // In production, Bazel parses Starlark as raw bytes encoded as Latin-1. ev.exec( "ruleContext.actions.expand_template(", " template = ruleContext.files.srcs[0],", " output = ruleContext.files.srcs[1],", - " substitutions = {'a': '" + new String(bytesToDecode, latin1) + "'},", + " substitutions = {'a" + internalString + "': '" + internalString + "'},", " is_executable = False)"); TemplateExpansionAction action = (TemplateExpansionAction) @@ -946,7 +936,8 @@ public void testCreateTemplateActionWithWrongEncoding() throws Exception { ruleContext.getRuleContext().getAnalysisEnvironment().getRegisteredActions()); List substitutions = action.getSubstitutions(); assertThat(substitutions).hasSize(1); - assertThat(substitutions.get(0).getValue()).isEqualTo(new String(bytesToDecode, utf8)); + assertThat(substitutions.get(0).getKey()).isEqualTo("a" + internalString); + assertThat(substitutions.get(0).getValue()).isEqualTo(internalString); } @Test diff --git a/src/test/shell/bazel/BUILD b/src/test/shell/bazel/BUILD index cc5ebafe945635..3aee8794ae5c05 100644 --- a/src/test/shell/bazel/BUILD +++ b/src/test/shell/bazel/BUILD @@ -874,7 +874,7 @@ sh_test( data = [ ":test-deps", "@bazel_tools//tools/bash/runfiles", - "@local_jdk//:jdk", + "@local_jdk//:jdk", # for remote_helpers setup_localjdk_javabase ], shard_count = 10, tags = [ diff --git a/src/test/shell/bazel/bazel_workspaces_test.sh b/src/test/shell/bazel/bazel_workspaces_test.sh index dc21ce0c7859f9..7b689800249803 100755 --- a/src/test/shell/bazel/bazel_workspaces_test.sh +++ b/src/test/shell/bazel/bazel_workspaces_test.sh @@ -579,21 +579,7 @@ function test_read() { ensure_contains_exactly 'path: ".*filefile.sh"' 2 } -function test_read_roundtrip_legacy_utf8() { - # See discussion on https://github.com/bazelbuild/bazel/pull/7309 - set_workspace_command ' - content = "echo fïlëfïlë" - repository_ctx.file("filefile.sh", content, True, legacy_utf8=True) - read_result = repository_ctx.read("filefile.sh") - - corrupted_content = "echo fïlëfïlë" - if read_result != corrupted_content: - fail("read(): expected %r, got %r" % (corrupted_content, read_result))' - - build_and_process_log --exclude_rule "repository @@local_config_cc" -} - -function test_read_roundtrip_nolegacy_utf8() { +function test_read_roundtrip_utf8() { set_workspace_command ' content = "echo fïlëfïlë" repository_ctx.file("filefile.sh", content, True, legacy_utf8=False) diff --git a/src/test/shell/bazel/starlark_repository_test.sh b/src/test/shell/bazel/starlark_repository_test.sh index ad02691cf73f4b..151ad4c29721ad 100755 --- a/src/test/shell/bazel/starlark_repository_test.sh +++ b/src/test/shell/bazel/starlark_repository_test.sh @@ -56,6 +56,14 @@ msys*) ;; esac +if $is_windows; then + export LC_ALL=C.utf8 +elif [[ "$(uname -s)" == "Linux" ]]; then + export LC_ALL=C.UTF-8 +else + export LC_ALL=en_US.UTF-8 +fi + source "$(rlocation "io_bazel/src/test/shell/bazel/remote_helpers.sh")" \ || { echo "remote_helpers.sh not found!" >&2; exit 1; } @@ -470,6 +478,84 @@ EOF expect_log "PWD=$repo2 TOTO=titi" } +function test_starlark_repository_unicode() { + setup_starlark_repository + + if "$is_windows"; then + # äöüÄÖÜß in UTF8 + local unicode=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F') + else + # äöüÄÖÜß🌱 in UTF8 + local unicode=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F\xF0\x9F\x8C\xB1') + fi + + tmpdir="$(mktemp -d ${TEST_TMPDIR}/test.XXXXXXXX)" + input_file="${tmpdir}/input$unicode" + echo -n "$unicode" > "${input_file}" + + cat >test.bzl <indirect%s.txt" % (UNICODE, UNICODE)] + ) + if result.return_code != 0: + fail("Incorrect return code from bash: %s != 0\n%s" % (result.return_code, result.stderr)) + + result = repository_ctx.execute([str(repository_ctx.which("bash")), "-c", "echo '%s'" % UNICODE]) + if result.return_code != 0: + fail("Incorrect return code from bash: %s != 0\n%s" % (result.return_code, result.stderr)) + if result.stdout.strip() != UNICODE: + fail("Incorrect output from bash: %s != %s\n%s" % (result.stdout.strip(), UNICODE, result.stderr)) + + result = repository_ctx.execute([str(repository_ctx.which("bash")), "-c", "echo '%s' && exit 123" % UNICODE]) + if result.return_code != 123: + fail("Incorrect return code from bash: %s != 123\n%s" % (result.return_code, result.stderr)) + if result.stdout.strip() != UNICODE: + fail("Incorrect output from bash: %s != %s\n%s" % (result.stdout.strip(), UNICODE, result.stderr)) + + repository_ctx.file("foo.txt", UNICODE) + read_content = repository_ctx.read("foo.txt") + if read_content != UNICODE: + fail("Incorrect content in foo.txt: %s != %s" % (read_content, UNICODE)) + + print("UNICODE = %s" % UNICODE) +repo = repository_rule(implementation=_impl) +EOF + + bazel build "--repo_env=INPUT_$unicode=${input_file}" @foo//:bar >& $TEST_log || fail "Failed to build" + expect_log "UNICODE = $unicode" + output_base="$(bazel info output_base)" + assert_contains "$unicode" "$output_base/external/+_repo_rules+foo/direct${unicode}.txt" + assert_contains "$unicode" "$output_base/external/+_repo_rules+foo/indirect${unicode}.txt" + assert_contains "${unicode}_replaced_${unicode}" "$output_base/external/+_repo_rules+foo/template${unicode}.txt" + + # The repo rule should not be re-run on server restart + bazel shutdown + bazel build "--repo_env=INPUT_${unicode}=${input_file}" @foo//:bar >& $TEST_log || fail "Failed to build" + expect_not_log "UNICODE" +} + function test_starlark_repository_environ() { setup_starlark_repository @@ -1947,7 +2033,7 @@ password foopass machine bar.example.org login barusername -password passbar +password passbar🌱 # following lines mix tabs and spaces machine oauthlife.com @@ -2012,7 +2098,7 @@ expected = { "https://bar.example.org/file3.tar" : { "type" : "basic", "login": "barusername", - "password" : "passbar", + "password" : "passbar🌱", }, "https://oauthlife.com/fizz/buzz/file5.tar": { "type" : "pattern", diff --git a/src/test/shell/integration/starlark_dependency_pruning_test.sh b/src/test/shell/integration/starlark_dependency_pruning_test.sh index ceb84465081346..8b6595e6c45dc3 100755 --- a/src/test/shell/integration/starlark_dependency_pruning_test.sh +++ b/src/test/shell/integration/starlark_dependency_pruning_test.sh @@ -49,6 +49,14 @@ msys*|mingw*|cygwin*) ;; esac +if $is_windows; then + export LC_ALL=C.utf8 +elif [[ "$(uname -s)" == "Linux" ]]; then + export LC_ALL=C.UTF-8 +else + export LC_ALL=en_US.UTF-8 +fi + add_to_bazelrc "build --package_path=%workspace%" add_to_bazelrc "build --spawn_strategy=local" @@ -242,6 +250,43 @@ function test_dependency_pruning_scenario() { check_unused_content "pkg/c.input" } +function test_dependency_pruning_scenario_unicode() { + if "$is_windows"; then + # äöüÄÖÜß in UTF8 + local unicode=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F') + else + # äöüÄÖÜß🌱 in UTF8 + local unicode=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F\xF0\x9F\x8C\xB1') + fi + + # Initial build. + echo "contentD${unicode}" > "pkg/d${unicode}.input" + bazel build //pkg:output || fail "build failed" + check_output_content "contentA contentB contentC contentD${unicode}" + check_unused_content + + # Mark "d" as unused. + echo "unused" > "pkg/d${unicode}.input" + bazel build //pkg:output || fail "build failed" + check_output_content "contentA contentB contentC" + check_unused_content "pkg/d${unicode}.input" + + # Change "d" again: + # This time it should be used. But given that it was marked "unused" + # the build should not trigger: "d" should still be considered unused. + echo "newContentD${unicode}" > "pkg/d${unicode}.input" + bazel build //pkg:output || fail "build failed" + check_output_content "contentA contentB contentC" + check_unused_content "pkg/d${unicode}.input" + + # Change c: + # The build should be triggered, and the newer version of "d" should be used. + echo "unused" > pkg/c.input + bazel build //pkg:output || fail "build failed" + check_output_content "contentA contentB newContentD${unicode}" + check_unused_content "pkg/c.input" +} + # Verify that the state of the local action cache survives server shutdown. function test_unused_shutdown() { # Mark "b" as unused + initial build