diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD
index 9236b95ccf9caf..b624864b06e4b6 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/BUILD
@@ -51,6 +51,7 @@ java_library(
"//src/main/java/com/google/devtools/common/options",
"//src/main/java/net/starlark/java/eval",
"//third_party:apache_commons_compress",
+ "//third_party:auto_service",
"//third_party:auto_value",
"//third_party:flogger",
"//third_party:guava",
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java
index f38e7d56894ac2..c32a423940e0a1 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java
@@ -15,8 +15,10 @@
package com.google.devtools.build.lib.bazel.repository;
import static com.google.devtools.build.lib.bazel.repository.StripPrefixedPath.maybeDeprefixSymlink;
+import static java.nio.charset.StandardCharsets.ISO_8859_1;
+import static java.nio.charset.StandardCharsets.UTF_8;
-import com.google.common.base.Optional;
+import com.google.auto.service.AutoService;
import com.google.common.io.ByteStreams;
import com.google.devtools.build.lib.bazel.repository.DecompressorValue.Decompressor;
import com.google.devtools.build.lib.vfs.FileSystemUtils;
@@ -25,16 +27,33 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.spi.CharsetProvider;
+import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.Map;
+import java.util.Optional;
import java.util.Set;
+import java.util.UUID;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
/**
* Common code for unarchiving a compressed TAR file.
+ *
+ *
TAR file entries commonly use one of two formats: PAX, which uses UTF-8 encoding for all
+ * strings, and USTAR, which does not specify an encoding. This class interprets USTAR headers as
+ * latin-1, thus preserving the original bytes of the header without enforcing any particular
+ * encoding. Internally, for file system operations, all strings are converted into Bazel's internal
+ * representation of raw bytes stored as latin-1 strings.
*/
public abstract class CompressedTarFunction implements Decompressor {
protected abstract InputStream getDecompressorStream(DecompressorDescriptor descriptor)
@@ -54,20 +73,23 @@ public Path decompress(DecompressorDescriptor descriptor)
Map symlinks = new HashMap<>();
try (InputStream decompressorStream = getDecompressorStream(descriptor)) {
- TarArchiveInputStream tarStream = new TarArchiveInputStream(decompressorStream);
+ // USTAR tar headers use an unspecified encoding whereas PAX tar headers always use UTF-8.
+ // We can specify the encoding to use for USTAR headers, but the Charset used for PAX headers
+ // is fixed to UTF-8. We thus specify a custom Charset for the former so that we can
+ // distinguish between the two.
+ TarArchiveInputStream tarStream =
+ new TarArchiveInputStream(decompressorStream, MarkedIso88591Charset.NAME);
TarArchiveEntry entry;
while ((entry = tarStream.getNextTarEntry()) != null) {
- String entryName = entry.getName();
+ String entryName = toRawBytesString(entry.getName());
entryName = renameFiles.getOrDefault(entryName, entryName);
- StripPrefixedPath entryPath = StripPrefixedPath.maybeDeprefix(entryName, prefix);
+ StripPrefixedPath entryPath =
+ StripPrefixedPath.maybeDeprefix(entryName.getBytes(ISO_8859_1), prefix);
foundPrefix = foundPrefix || entryPath.foundPrefix();
if (prefix.isPresent() && !foundPrefix) {
- Optional suggestion =
- CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment());
- if (suggestion.isPresent()) {
- availablePrefixes.add(suggestion.get());
- }
+ CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment())
+ .ifPresent(availablePrefixes::add);
}
if (entryPath.skip()) {
@@ -80,8 +102,11 @@ public Path decompress(DecompressorDescriptor descriptor)
filePath.createDirectoryAndParents();
} else {
if (entry.isSymbolicLink() || entry.isLink()) {
- PathFragment targetName = PathFragment.create(entry.getLinkName());
- targetName = maybeDeprefixSymlink(targetName, prefix, descriptor.destinationPath());
+ PathFragment targetName =
+ maybeDeprefixSymlink(
+ toRawBytesString(entry.getLinkName()).getBytes(ISO_8859_1),
+ prefix,
+ descriptor.destinationPath());
if (entry.isSymbolicLink()) {
symlinks.put(filePath, targetName);
} else {
@@ -135,4 +160,100 @@ public Path decompress(DecompressorDescriptor descriptor)
return descriptor.destinationPath();
}
+
+ /**
+ * Returns a string that contains the raw bytes of the given string encoded in ISO-8859-1,
+ * assuming that the given string was encoded with either UTF-8 or the special {@link
+ * MarkedIso88591Charset}.
+ */
+ private static String toRawBytesString(String name) {
+ // Marked strings are already encoded in ISO-8859-1. Other strings originate from PAX headers
+ // and are thus encoded in UTF-8, which we decode to the raw bytes and then re-encode trivially
+ // in ISO-8859-1.
+ return MarkedIso88591Charset.getRawBytesStringIfMarked(name)
+ .orElseGet(() -> new String(name.getBytes(UTF_8), ISO_8859_1));
+ }
+
+ /** A provider of {@link MarkedIso88591Charset}s. */
+ @AutoService(CharsetProvider.class)
+ public static class MarkedIso88591CharsetProvider extends CharsetProvider {
+ private static final Charset CHARSET = new MarkedIso88591Charset();
+
+ @Override
+ public Iterator charsets() {
+ // This charset is only meant for internal use within CompressedTarFunction and thus should
+ // not be discoverable.
+ return Collections.emptyIterator();
+ }
+
+ @Override
+ public Charset charsetForName(String charsetName) {
+ return MarkedIso88591Charset.NAME.equals(charsetName) ? CHARSET : null;
+ }
+ }
+
+ /**
+ * A charset that decodes ISO-8859-1, i.e., produces a String that contains the raw decoded bytes,
+ * and appends a marker to the end of the string to indicate that it was decoded with this
+ * charset.
+ */
+ private static class MarkedIso88591Charset extends Charset {
+ // The name
+ // * must not collide with the name of any other charset.
+ // * must not appear in archive entry names by chance.
+ // * is internal to CompressedTarFunction.
+ // This is best served by a cryptographically random UUID, generated at startup.
+ private static final String NAME = UUID.randomUUID().toString();
+
+ private MarkedIso88591Charset() {
+ super(NAME, new String[0]);
+ }
+
+ public static Optional getRawBytesStringIfMarked(String s) {
+ // Check for the marker in all positions as TarArchiveInputStream manipulates the raw name in
+ // certain cases (for example, appending a '/' to directory names).
+ if (s.contains(NAME)) {
+ return Optional.of(s.replaceAll(NAME, ""));
+ }
+ return Optional.empty();
+ }
+
+ @Override
+ public CharsetDecoder newDecoder() {
+ return new CharsetDecoder(this, 1, 1) {
+ @Override
+ protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
+ // A simple unoptimized ISO-8859-1 decoder.
+ while (in.hasRemaining()) {
+ if (!out.hasRemaining()) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put((char) (in.get() & 0xFF));
+ }
+ return CoderResult.UNDERFLOW;
+ }
+
+ @Override
+ protected CoderResult implFlush(CharBuffer out) {
+ // Append the marker to the end of the buffer to indicate that it was decoded with this
+ // charset.
+ if (out.remaining() < NAME.length()) {
+ return CoderResult.OVERFLOW;
+ }
+ out.put(NAME);
+ return CoderResult.UNDERFLOW;
+ }
+ };
+ }
+
+ @Override
+ public CharsetEncoder newEncoder() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean contains(Charset cs) {
+ return false;
+ }
+ }
}
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorDescriptor.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorDescriptor.java
index 2676fdba14d091..c40d5ab35f38da 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorDescriptor.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorDescriptor.java
@@ -15,10 +15,10 @@
package com.google.devtools.build.lib.bazel.repository;
import com.google.auto.value.AutoValue;
-import com.google.common.base.Optional;
import com.google.common.collect.ImmutableMap;
import com.google.devtools.build.lib.vfs.Path;
import java.util.Map;
+import java.util.Optional;
/** Description of an archive to be decompressed. */
@AutoValue
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java
index e32a2decb18741..98ca4b4fa7e0ee 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/DecompressorValue.java
@@ -14,14 +14,17 @@
package com.google.devtools.build.lib.bazel.repository;
+import static java.nio.charset.StandardCharsets.ISO_8859_1;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Optional;
import com.google.devtools.build.lib.rules.repository.RepositoryFunction.RepositoryFunctionException;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.skyframe.SkyFunctionException.Transience;
import com.google.devtools.build.skyframe.SkyValue;
import java.io.IOException;
+import java.util.Optional;
import java.util.Set;
import net.starlark.java.eval.Starlark;
@@ -59,9 +62,14 @@ private static String prepareErrorMessage(String prefix, Set availablePr
}
public static Optional maybeMakePrefixSuggestion(PathFragment pathFragment) {
- return pathFragment.isMultiSegment()
- ? Optional.of(pathFragment.getSegment(0))
- : Optional.absent();
+ if (!pathFragment.isMultiSegment()) {
+ return Optional.empty();
+ }
+ String rawFirstSegment = pathFragment.getSegment(0);
+ // Users can only specify prefixes from Starlark, which is planned to use UTF-8 for all
+ // strings, but currently still collects the raw bytes in a latin-1 string. We thus
+ // optimistically decode the raw bytes with UTF-8 here for display purposes.
+ return Optional.of(new String(rawFirstSegment.getBytes(ISO_8859_1), UTF_8));
}
}
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPath.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPath.java
index 77c5815e79a50d..5852b4d0ea5303 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPath.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPath.java
@@ -14,11 +14,13 @@
package com.google.devtools.build.lib.bazel.repository;
-import com.google.common.base.Optional;
+import static java.nio.charset.StandardCharsets.ISO_8859_1;
+
import com.google.common.base.Preconditions;
import com.google.devtools.build.lib.concurrent.ThreadSafety;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
+import java.util.Optional;
/**
* Utility class for removing a prefix from an archive's path.
@@ -36,17 +38,19 @@ public final class StripPrefixedPath {
* could cause collisions, if a zip file had one entry for bin/some-binary and another entry for
* /bin/some-binary.
*
- * Note that the prefix is stripped to move the files up one level, so if you have an entry
+ * Note that the prefix is stripped to move the files up one level, so if you have an entry
* "foo/../bar" and a prefix of "foo", the result will be "bar" not "../bar".
*/
- public static StripPrefixedPath maybeDeprefix(String entry, Optional prefix) {
+ public static StripPrefixedPath maybeDeprefix(byte[] entry, Optional prefix) {
Preconditions.checkNotNull(entry);
PathFragment entryPath = relativize(entry);
- if (!prefix.isPresent()) {
+ if (prefix.isEmpty()) {
return new StripPrefixedPath(entryPath, false, false);
}
- PathFragment prefixPath = relativize(prefix.get());
+ // Bazel parses Starlark files, which are the ultimate source of prefixes, as Latin-1
+ // (ISO-8859-1).
+ PathFragment prefixPath = relativize(prefix.get().getBytes(ISO_8859_1));
boolean found = false;
boolean skip = false;
if (entryPath.startsWith(prefixPath)) {
@@ -64,8 +68,8 @@ public static StripPrefixedPath maybeDeprefix(String entry, Optional pre
/**
* Normalize the path and, if it is absolute, make it relative (e.g., /foo/bar becomes foo/bar).
*/
- private static PathFragment relativize(String path) {
- PathFragment entryPath = PathFragment.create(path);
+ private static PathFragment relativize(byte[] path) {
+ PathFragment entryPath = createPathFragment(path);
if (entryPath.isAbsolute()) {
entryPath = entryPath.toRelative();
}
@@ -79,10 +83,10 @@ private StripPrefixedPath(PathFragment pathFragment, boolean found, boolean skip
}
public static PathFragment maybeDeprefixSymlink(
- PathFragment linkPathFragment, Optional prefix, Path root) {
- boolean wasAbsolute = linkPathFragment.isAbsolute();
+ byte[] rawTarget, Optional prefix, Path root) {
+ boolean wasAbsolute = createPathFragment(rawTarget).isAbsolute();
// Strip the prefix from the link path if set.
- linkPathFragment = maybeDeprefix(linkPathFragment.getPathString(), prefix).getPathFragment();
+ PathFragment linkPathFragment = maybeDeprefix(rawTarget, prefix).getPathFragment();
if (wasAbsolute) {
// Recover the path to an absolute path as maybeDeprefix() relativize the path
// even if the prefix is not set
@@ -103,4 +107,10 @@ public boolean skip() {
return skip;
}
+ static PathFragment createPathFragment(byte[] rawBytes) {
+ // Bazel internally represents paths as raw bytes by using the Latin-1 encoding, which has the
+ // property that (new String(bytes, ISO_8859_1)).getBytes(ISO_8859_1)) equals bytes for every
+ // byte array bytes.
+ return PathFragment.create(new String(rawBytes, ISO_8859_1));
+ }
}
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipDecompressor.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipDecompressor.java
index 8990e7c4c08e64..4a8f7709a5798b 100644
--- a/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipDecompressor.java
+++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/ZipDecompressor.java
@@ -15,9 +15,9 @@
package com.google.devtools.build.lib.bazel.repository;
import static com.google.devtools.build.lib.bazel.repository.StripPrefixedPath.maybeDeprefixSymlink;
+import static java.nio.charset.StandardCharsets.UTF_8;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.io.ByteStreams;
import com.google.devtools.build.lib.bazel.repository.DecompressorValue.Decompressor;
@@ -29,11 +29,11 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
-import java.nio.charset.Charset;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
+import java.util.Optional;
import java.util.Set;
import javax.annotation.Nullable;
@@ -89,7 +89,8 @@ public Path decompress(DecompressorDescriptor descriptor)
for (ZipFileEntry entry : entries) {
String entryName = entry.getName();
entryName = renameFiles.getOrDefault(entryName, entryName);
- StripPrefixedPath entryPath = StripPrefixedPath.maybeDeprefix(entryName, prefix);
+ StripPrefixedPath entryPath =
+ StripPrefixedPath.maybeDeprefix(entryName.getBytes(UTF_8), prefix);
foundPrefix = foundPrefix || entryPath.foundPrefix();
if (entryPath.skip()) {
continue;
@@ -102,12 +103,9 @@ public Path decompress(DecompressorDescriptor descriptor)
Set prefixes = new HashSet<>();
for (ZipFileEntry entry : entries) {
StripPrefixedPath entryPath =
- StripPrefixedPath.maybeDeprefix(entry.getName(), Optional.absent());
- Optional suggestion =
- CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment());
- if (suggestion.isPresent()) {
- prefixes.add(suggestion.get());
- }
+ StripPrefixedPath.maybeDeprefix(entry.getName().getBytes(UTF_8), Optional.empty());
+ CouldNotFindPrefixException.maybeMakePrefixSuggestion(entryPath.getPathFragment())
+ .ifPresent(prefixes::add);
}
throw new CouldNotFindPrefixException(prefix.get(), prefixes);
}
@@ -146,17 +144,22 @@ private static void extractZipEntry(
// For symlinks, the "compressed data" is actually the target name.
int read = reader.getInputStream(entry).read(buffer);
Preconditions.checkState(read == buffer.length);
- PathFragment target = PathFragment.create(new String(buffer, Charset.defaultCharset()));
+
+ PathFragment target = StripPrefixedPath.createPathFragment(buffer);
if (target.containsUplevelReferences()) {
PathFragment pointsTo = strippedRelativePath.getParentDirectory().getRelative(target);
if (pointsTo.containsUplevelReferences()) {
- throw new IOException("Zip entries cannot refer to files outside of their directory: "
- + reader.getFilename() + " has a symlink " + strippedRelativePath + " pointing to "
- + target);
+ throw new IOException(
+ "Zip entries cannot refer to files outside of their directory: "
+ + reader.getFilename()
+ + " has a symlink "
+ + strippedRelativePath
+ + " pointing to "
+ + new String(buffer, UTF_8));
}
}
- target = maybeDeprefixSymlink(target, prefix, destinationDirectory);
- symlinks.put(outputPath, target);
+
+ symlinks.put(outputPath, maybeDeprefixSymlink(buffer, prefix, destinationDirectory));
} else {
try (InputStream input = reader.getInputStream(entry);
OutputStream output = outputPath.getOutputStream()) {
diff --git a/src/test/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPathTest.java b/src/test/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPathTest.java
index df1ea4db0fb19f..784b16348e0580 100644
--- a/src/test/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPathTest.java
+++ b/src/test/java/com/google/devtools/build/lib/bazel/repository/StripPrefixedPathTest.java
@@ -15,13 +15,14 @@
package com.google.devtools.build.lib.bazel.repository;
import static com.google.common.truth.Truth.assertThat;
+import static java.nio.charset.StandardCharsets.UTF_8;
-import com.google.common.base.Optional;
import com.google.devtools.build.lib.clock.BlazeClock;
import com.google.devtools.build.lib.util.OS;
import com.google.devtools.build.lib.vfs.DigestHashFunction;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.devtools.build.lib.vfs.inmemoryfs.InMemoryFileSystem;
+import java.util.Optional;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
@@ -33,31 +34,32 @@
public class StripPrefixedPathTest {
@Test
public void testStrip() {
- StripPrefixedPath result = StripPrefixedPath.maybeDeprefix("foo/bar", Optional.of("foo"));
+ StripPrefixedPath result =
+ StripPrefixedPath.maybeDeprefix("foo/bar".getBytes(UTF_8), Optional.of("foo"));
assertThat(PathFragment.create("bar")).isEqualTo(result.getPathFragment());
assertThat(result.foundPrefix()).isTrue();
assertThat(result.skip()).isFalse();
- result = StripPrefixedPath.maybeDeprefix("foo", Optional.of("foo"));
+ result = StripPrefixedPath.maybeDeprefix("foo".getBytes(UTF_8), Optional.of("foo"));
assertThat(result.skip()).isTrue();
- result = StripPrefixedPath.maybeDeprefix("bar/baz", Optional.of("foo"));
+ result = StripPrefixedPath.maybeDeprefix("bar/baz".getBytes(UTF_8), Optional.of("foo"));
assertThat(result.foundPrefix()).isFalse();
- result = StripPrefixedPath.maybeDeprefix("foof/bar", Optional.of("foo"));
+ result = StripPrefixedPath.maybeDeprefix("foof/bar".getBytes(UTF_8), Optional.of("foo"));
assertThat(result.foundPrefix()).isFalse();
}
@Test
public void testAbsolute() {
- StripPrefixedPath result = StripPrefixedPath.maybeDeprefix(
- "/foo/bar", Optional.absent());
+ StripPrefixedPath result =
+ StripPrefixedPath.maybeDeprefix("/foo/bar".getBytes(UTF_8), Optional.empty());
assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("foo/bar"));
- result = StripPrefixedPath.maybeDeprefix("///foo/bar/baz", Optional.absent());
+ result = StripPrefixedPath.maybeDeprefix("///foo/bar/baz".getBytes(UTF_8), Optional.empty());
assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("foo/bar/baz"));
- result = StripPrefixedPath.maybeDeprefix("/foo/bar/baz", Optional.of("/foo"));
+ result = StripPrefixedPath.maybeDeprefix("/foo/bar/baz".getBytes(UTF_8), Optional.of("/foo"));
assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("bar/baz"));
}
@@ -66,21 +68,21 @@ public void testWindowsAbsolute() {
if (OS.getCurrent() != OS.WINDOWS) {
return;
}
- StripPrefixedPath result = StripPrefixedPath.maybeDeprefix(
- "c:/foo/bar", Optional.absent());
+ StripPrefixedPath result =
+ StripPrefixedPath.maybeDeprefix("c:/foo/bar".getBytes(UTF_8), Optional.empty());
assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("foo/bar"));
}
@Test
public void testNormalize() {
- StripPrefixedPath result = StripPrefixedPath.maybeDeprefix(
- "../bar", Optional.absent());
+ StripPrefixedPath result =
+ StripPrefixedPath.maybeDeprefix("../bar".getBytes(UTF_8), Optional.empty());
assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("../bar"));
- result = StripPrefixedPath.maybeDeprefix("foo/../baz", Optional.absent());
+ result = StripPrefixedPath.maybeDeprefix("foo/../baz".getBytes(UTF_8), Optional.empty());
assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("baz"));
- result = StripPrefixedPath.maybeDeprefix("foo/../baz", Optional.of("foo"));
+ result = StripPrefixedPath.maybeDeprefix("foo/../baz".getBytes(UTF_8), Optional.of("foo"));
assertThat(result.getPathFragment()).isEqualTo(PathFragment.create("baz"));
}
@@ -91,23 +93,23 @@ public void testDeprefixSymlink() {
PathFragment relativeNoPrefix =
StripPrefixedPath.maybeDeprefixSymlink(
- PathFragment.create("a/b"), Optional.absent(), fileSystem.getPath("/usr"));
+ "a/b".getBytes(UTF_8), Optional.empty(), fileSystem.getPath("/usr"));
// there is no attempt to get absolute path for the relative symlinks target path
assertThat(relativeNoPrefix).isEqualTo(PathFragment.create("a/b"));
PathFragment absoluteNoPrefix =
StripPrefixedPath.maybeDeprefixSymlink(
- PathFragment.create("/a/b"), Optional.absent(), fileSystem.getPath("/usr"));
+ "/a/b".getBytes(UTF_8), Optional.empty(), fileSystem.getPath("/usr"));
assertThat(absoluteNoPrefix).isEqualTo(PathFragment.create("/usr/a/b"));
PathFragment absolutePrefix =
StripPrefixedPath.maybeDeprefixSymlink(
- PathFragment.create("/root/a/b"), Optional.of("root"), fileSystem.getPath("/usr"));
+ "/root/a/b".getBytes(UTF_8), Optional.of("root"), fileSystem.getPath("/usr"));
assertThat(absolutePrefix).isEqualTo(PathFragment.create("/usr/a/b"));
PathFragment relativePrefix =
StripPrefixedPath.maybeDeprefixSymlink(
- PathFragment.create("root/a/b"), Optional.of("root"), fileSystem.getPath("/usr"));
+ "root/a/b".getBytes(UTF_8), Optional.of("root"), fileSystem.getPath("/usr"));
// there is no attempt to get absolute path for the relative symlinks target path
assertThat(relativePrefix).isEqualTo(PathFragment.create("a/b"));
}
diff --git a/src/test/shell/bazel/bazel_workspaces_test.sh b/src/test/shell/bazel/bazel_workspaces_test.sh
index a33b443f17c143..28079bed1dd68f 100755
--- a/src/test/shell/bazel/bazel_workspaces_test.sh
+++ b/src/test/shell/bazel/bazel_workspaces_test.sh
@@ -451,6 +451,102 @@ function test_extract_rename_files() {
ensure_output_contains_exactly_once "external/repo/out_dir/renamed-A.txt" "Second file: A"
}
+# Regression test for https://github.com/bazelbuild/bazel/issues/12986
+# Verifies that tar entries with PAX headers, which are always encoded in UTF-8, are extracted
+# correctly.
+function test_extract_pax_tar_non_ascii_utf8_file_names() {
+ local archive_tar="${TEST_TMPDIR}/pax.tar"
+
+ pushd "${TEST_TMPDIR}"
+ mkdir "Ä_pax_∅"
+ echo "bar" > "Ä_pax_∅/Ä_foo_∅.txt"
+ tar --format=pax -cvf pax.tar "Ä_pax_∅"
+ popd
+
+ set_workspace_command "
+ repository_ctx.extract('${archive_tar}', 'out_dir', 'Ä_pax_∅/')"
+
+ build_and_process_log --exclude_rule "repository @local_config_cc"
+
+ ensure_contains_exactly 'location: .*repos.bzl:3:25' 1
+ ensure_contains_atleast 'context: "repository @repo"' 2
+ ensure_contains_exactly 'extract_event' 1
+
+ ensure_output_contains_exactly_once "external/repo/out_dir/Ä_foo_∅.txt" "bar"
+}
+
+# Verifies that tar entries with USTAR headers, for which an encoding isn't specified, are extracted
+# correctly if that encoding happens to be UTF-8.
+function test_extract_ustar_tar_non_ascii_utf8_file_names() {
+ local archive_tar="${TEST_TMPDIR}/ustar.tar"
+
+ pushd "${TEST_TMPDIR}"
+ mkdir "Ä_ustar_∅"
+ echo "bar" > "Ä_ustar_∅/Ä_foo_∅.txt"
+ tar --format=ustar -cvf ustar.tar "Ä_ustar_∅"
+ popd
+
+ set_workspace_command "
+ repository_ctx.extract('${archive_tar}', 'out_dir', 'Ä_ustar_∅/')"
+
+ build_and_process_log --exclude_rule "repository @local_config_cc"
+
+ ensure_contains_exactly 'location: .*repos.bzl:3:25' 1
+ ensure_contains_atleast 'context: "repository @repo"' 2
+ ensure_contains_exactly 'extract_event' 1
+
+ ensure_output_contains_exactly_once "external/repo/out_dir/Ä_foo_∅.txt" "bar"
+}
+
+# Verifies that tar entries with USTAR headers, for which an encoding isn't specified, are extracted
+# correctly if that encoding is not UTF-8.
+function test_extract_ustar_tar_non_ascii_non_utf8_file_names() {
+ if is_darwin; then
+ echo "Skipping test on macOS due to lack of support for non-UTF-8 filenames"
+ return
+ fi
+
+ local archive_tar="${TEST_TMPDIR}/ustar.tar"
+
+ pushd "${TEST_TMPDIR}"
+ mkdir "Ä_ustar_latin1_∅"
+ echo "bar" > "$(echo -e 'Ä_ustar_latin1_∅/\xC4_foo_latin1_\xD6.txt')"
+ tar --format=ustar -cvf ustar.tar "Ä_ustar_latin1_∅"
+ popd
+
+ set_workspace_command "
+ repository_ctx.extract('${archive_tar}', 'out_dir', 'Ä_ustar_latin1_∅/')"
+
+ build_and_process_log --exclude_rule "repository @local_config_cc"
+
+ ensure_contains_exactly 'location: .*repos.bzl:3:25' 1
+ ensure_contains_atleast 'context: "repository @repo"' 2
+ ensure_contains_exactly 'extract_event' 1
+
+ ensure_output_contains_exactly_once "$(echo -e 'external/repo/out_dir/\xC4_foo_latin1_\xD6.txt')" "bar"
+}
+
+function test_extract_default_zip_non_ascii_utf8_file_names() {
+ local archive_tar="${TEST_TMPDIR}/default.zip"
+
+ pushd "${TEST_TMPDIR}"
+ mkdir "Ä_default_∅"
+ echo "bar" > "Ä_default_∅/Ä_foo_∅.txt"
+ zip default.zip -r "Ä_default_∅"
+ popd
+
+ set_workspace_command "
+ repository_ctx.extract('${archive_tar}', 'out_dir', 'Ä_default_∅/')"
+
+ build_and_process_log --exclude_rule "repository @local_config_cc"
+
+ ensure_contains_exactly 'location: .*repos.bzl:3:25' 1
+ ensure_contains_atleast 'context: "repository @repo"' 2
+ ensure_contains_exactly 'extract_event' 1
+
+ ensure_output_contains_exactly_once "external/repo/out_dir/Ä_foo_∅.txt" "bar"
+}
+
function test_file() {
set_workspace_command 'repository_ctx.file("filefile.sh", "echo filefile", True)'
diff --git a/src/test/shell/integration/minimal_jdk_test.sh b/src/test/shell/integration/minimal_jdk_test.sh
index e66e0c83d96f90..82f40fd9e63a94 100755
--- a/src/test/shell/integration/minimal_jdk_test.sh
+++ b/src/test/shell/integration/minimal_jdk_test.sh
@@ -42,13 +42,13 @@ export BAZEL_SUFFIX="_jdk_minimal"
source "$(rlocation "io_bazel/src/test/shell/integration_test_setup.sh")" \
|| { echo "integration_test_setup.sh not found!" >&2; exit 1; }
-# Bazel's install base is < 366MB with minimal JDK and > 366MB with an all
+# Bazel's install base is < 370MB with minimal JDK and > 370MB with an all
# modules JDK.
-function test_size_less_than_366MB() {
+function test_size_less_than_370MB() {
bazel info
ib=$(bazel info install_base)
size=$(du -s "$ib" | cut -d\ -f1)
- maxsize=$((1024*366))
+ maxsize=$((1024*370))
if [ $size -gt $maxsize ]; then
echo "$ib was too big:" 1>&2
du -a "$ib" 1>&2