From b99c8835979ffe6faaa06840efe8706b5182e20e Mon Sep 17 00:00:00 2001 From: Googler Date: Fri, 16 Feb 2024 07:01:26 -0800 Subject: [PATCH] [7.1.0] Compute output directories in parallel when building the upload manifest. Some users have very large tree artifacts (with hundreds of thousands of files) which are extremely slow to traverse sequentially. In one particular scenario with ~250k files, this change reduces the time spent computing the manifest from ~70s to ~5s. (We could parallelize further across all outputs, but I prefer to wait until there is a concrete need for it.) PiperOrigin-RevId: 607678324 Change-Id: Ia80d0a663909b8ed96c6720d9eb5393fb3027b48 --- .../build/lib/remote/UploadManifest.java | 302 ++++++++++++------ .../build/lib/remote/UploadManifestTest.java | 24 +- 2 files changed, 220 insertions(+), 106 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java b/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java index 3ca5f30e479da1..5210e773d4555c 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java +++ b/src/main/java/com/google/devtools/build/lib/remote/UploadManifest.java @@ -13,12 +13,17 @@ // limitations under the License. package com.google.devtools.build.lib.remote; +import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.base.Preconditions.checkState; import static com.google.common.base.Throwables.throwIfInstanceOf; import static com.google.common.util.concurrent.MoreExecutors.directExecutor; import static com.google.devtools.build.lib.remote.util.RxFutures.toCompletable; import static com.google.devtools.build.lib.remote.util.RxFutures.toSingle; import static com.google.devtools.build.lib.remote.util.RxUtils.mergeBulkTransfer; import static com.google.devtools.build.lib.remote.util.RxUtils.toTransferResult; +import static java.util.Comparator.comparing; +import static java.util.Comparator.naturalOrder; +import static java.util.Comparator.reverseOrder; import build.bazel.remote.execution.v2.Action; import build.bazel.remote.execution.v2.ActionResult; @@ -26,18 +31,27 @@ import build.bazel.remote.execution.v2.Command; import build.bazel.remote.execution.v2.Digest; import build.bazel.remote.execution.v2.Directory; +import build.bazel.remote.execution.v2.FileNode; import build.bazel.remote.execution.v2.OutputSymlink; import build.bazel.remote.execution.v2.SymlinkAbsolutePathStrategy; +import build.bazel.remote.execution.v2.SymlinkNode; import build.bazel.remote.execution.v2.Tree; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import com.google.common.collect.Multimaps; +import com.google.common.collect.SortedSetMultimap; +import com.google.common.collect.TreeMultimap; import com.google.devtools.build.lib.actions.ActionExecutionMetadata; import com.google.devtools.build.lib.actions.ActionUploadFinishedEvent; import com.google.devtools.build.lib.actions.ActionUploadStartedEvent; import com.google.devtools.build.lib.actions.ExecException; import com.google.devtools.build.lib.actions.UserExecException; +import com.google.devtools.build.lib.concurrent.AbstractQueueVisitor; +import com.google.devtools.build.lib.concurrent.ErrorClassifier; +import com.google.devtools.build.lib.concurrent.NamedForkJoinPool; import com.google.devtools.build.lib.events.ExtendedEventHandler; import com.google.devtools.build.lib.remote.common.RemoteActionExecutionContext; import com.google.devtools.build.lib.remote.common.RemoteCacheClient; @@ -66,25 +80,28 @@ import java.time.Instant; import java.util.ArrayList; import java.util.Collection; -import java.util.Comparator; +import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ForkJoinPool; import java.util.stream.Collectors; import javax.annotation.Nullable; /** UploadManifest adds output metadata to a {@link ActionResult}. */ public class UploadManifest { - private final DigestUtil digestUtil; private final RemotePathResolver remotePathResolver; private final ActionResult.Builder result; private final boolean followSymlinks; private final boolean allowDanglingSymlinks; private final boolean allowAbsoluteSymlinks; - private final Map digestToFile = new HashMap<>(); - private final Map digestToBlobs = new HashMap<>(); + private final ConcurrentHashMap digestToFile = new ConcurrentHashMap<>(); + private final ConcurrentHashMap digestToBlobs = new ConcurrentHashMap<>(); @Nullable private ActionKey actionKey; private Digest stderrDigest; private Digest stdoutDigest; @@ -102,7 +119,7 @@ public static UploadManifest create( int exitCode, Instant startTime, int wallTimeInMs) - throws ExecException, IOException { + throws ExecException, IOException, InterruptedException { ActionResult.Builder result = ActionResult.newBuilder(); result.setExitCode(exitCode); @@ -208,7 +225,7 @@ private void setStdoutStderr(FileOutErr outErr) throws IOException { * after execution, both for cache hits and misses. */ @VisibleForTesting - void addFiles(Collection files) throws ExecException, IOException { + void addFiles(Collection files) throws ExecException, IOException, InterruptedException { // TODO(tjgq): Non-dangling absolute symlinks are uploaded as the file or directory they point // to even when followSymlinks is false. This is inconsistent with the treatment of relative // symlinks, but fixing it would require an incompatible change. @@ -341,107 +358,210 @@ private void addFile(Digest digest, Path file) throws IOException { digestToFile.put(digest, file); } - // Field numbers of the 'root' and 'directory' fields in the Tree message. - private static final int TREE_ROOT_FIELD_NUMBER = - Tree.getDescriptor().findFieldByName("root").getNumber(); - private static final int TREE_CHILDREN_FIELD_NUMBER = - Tree.getDescriptor().findFieldByName("children").getNumber(); + private static final class WrappedException extends RuntimeException { + private final Exception wrapped; - private void addDirectory(Path dir) throws ExecException, IOException { - LinkedHashSet directories = new LinkedHashSet<>(); - var ignored = computeDirectory(dir, directories); - - // Convert individual Directory messages to a Tree message. As we want the - // records to be topologically sorted (parents before children), we iterate - // over the directories in reverse insertion order. - ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); - CodedOutputStream codedOutputStream = CodedOutputStream.newInstance(byteArrayOutputStream); - int fieldNumber = TREE_ROOT_FIELD_NUMBER; - for (ByteString directory : Lists.reverse(new ArrayList(directories))) { - codedOutputStream.writeBytes(fieldNumber, directory); - fieldNumber = TREE_CHILDREN_FIELD_NUMBER; + WrappedException(Exception wrapped) { + super(wrapped); + this.wrapped = wrapped; } - codedOutputStream.flush(); - ByteString data = ByteString.copyFrom(byteArrayOutputStream.toByteArray()); - Digest digest = digestUtil.compute(data.toByteArray()); - - if (result != null) { - result - .addOutputDirectoriesBuilder() - .setPath(remotePathResolver.localPathToOutputPath(dir)) - .setTreeDigest(digest) - .setIsTopologicallySorted(true); + Exception unwrap() { + return wrapped; } - - digestToBlobs.put(digest, data); } + /** A thread pool shared by all {@link DirectoryBuilder} instances. */ + private static final ForkJoinPool VISITOR_POOL = + NamedForkJoinPool.newNamedPool( + "upload-manifest-directory-visitor", Runtime.getRuntime().availableProcessors()); + /** - * Computes the {@link Directory} message describing the transitive contents of a directory. - * - * @param path the directory path. - * @param directories an output parameter accepting the wire-format {@link Directory} messages for - * every visited (sub-)directory of {@code path}, including {@code path} itself, in a - * deterministic topological order (children before parents). - * @return the wire-format {@link Directory} message for {@code path}. + * A {@link DirectoryBuilder} constructs a {@link Tree} message for an output directory, doing as + * much as possible in parallel. */ - private ByteString computeDirectory(Path path, LinkedHashSet directories) - throws ExecException, IOException { - Directory.Builder b = Directory.newBuilder(); - - List sortedDirent = new ArrayList<>(path.readdir(Symlinks.NOFOLLOW)); - sortedDirent.sort(Comparator.comparing(Dirent::getName)); - - for (Dirent dirent : sortedDirent) { - String name = dirent.getName(); - Path child = path.getRelative(name); - if (dirent.getType() == Dirent.Type.FILE) { - Digest digest = digestUtil.compute(child); - b.addFilesBuilder().setName(name).setDigest(digest).setIsExecutable(true); - digestToFile.put(digest, child); - continue; + private class DirectoryBuilder extends AbstractQueueVisitor { + private final Path rootDir; + + // Directories found during the traversal, including the root. + // Sorted in reverse so that children iterate before parents. + private final SortedSet dirs = + Collections.synchronizedSortedSet(new TreeSet(reverseOrder())); + + // Maps each directory found during the traversal to its subdirectories. + private final SortedSetMultimap dirToSubdirs = + Multimaps.synchronizedSortedSetMultimap(TreeMultimap.create()); + + // Maps each directory found during the traversal to its files. + private final SortedSetMultimap dirToFiles = + Multimaps.synchronizedSortedSetMultimap( + TreeMultimap.create(naturalOrder(), comparing(FileNode::getName))); + + // Maps each directory found during the traversal to its symlinks. + private final SortedSetMultimap dirToSymlinks = + Multimaps.synchronizedSortedSetMultimap( + TreeMultimap.create( + naturalOrder(), comparing(SymlinkNode::getName))); + + DirectoryBuilder(Path rootDir) { + super( + VISITOR_POOL, + ExecutorOwnership.SHARED, + ExceptionHandlingMode.FAIL_FAST, + ErrorClassifier.DEFAULT); + this.rootDir = checkNotNull(rootDir); + } + + /** + * Returns a {@link Tree} message in wire format describing the directory contents, obeying the + * requirements of the {@code OutputDirectory.is_topologically_sorted} field. + */ + ByteString build() throws ExecException, IOException, InterruptedException { + // Collect directory entries (subdirectories, files, symlinks) in parallel. + // This is a major speedup for large tree artifacts with hundreds of thousands of files. + execute(() -> visit(rootDir, Dirent.Type.DIRECTORY)); + try { + awaitQuiescence(true); + } catch (WrappedException e) { + Throwables.throwIfInstanceOf(e.unwrap(), ExecException.class); + Throwables.throwIfInstanceOf(e.unwrap(), IOException.class); + throw new AssertionError("unexpected exception", e.unwrap()); } - if (dirent.getType() == Dirent.Type.DIRECTORY) { - ByteString dir = computeDirectory(child, directories); - b.addDirectoriesBuilder().setName(name).setDigest(digestUtil.compute(dir.toByteArray())); - continue; + + // Compute the Directory message for every node, including the root. Since each directory + // references its subdirectories by their digest, the messages must be computed in topological + // order (children before parents). In addition, the contents of each Directory message must + // be sorted, which is already ensured by the use of sorted maps. + + HashMap dirToDigest = new HashMap<>(); + LinkedHashSet dirBlobs = new LinkedHashSet<>(); + + for (Path dir : dirs) { + Directory.Builder builder = Directory.newBuilder(); + builder.addAllFiles(dirToFiles.get(dir)); + builder.addAllSymlinks(dirToSymlinks.get(dir)); + for (Path subdir : dirToSubdirs.get(dir)) { + checkState(subdir.getParentDirectory().equals(dir)); + builder + .addDirectoriesBuilder() + .setName(subdir.getBaseName()) + .setDigest(dirToDigest.get(subdir)); + } + ByteString dirBlob = builder.build().toByteString(); + + dirToDigest.put(dir, digestUtil.compute(dirBlob.toByteArray())); + dirBlobs.add(dirBlob); } - if (dirent.getType() == Dirent.Type.SYMLINK) { - PathFragment target = child.readSymbolicLink(); - FileStatus statFollow = child.statIfFound(Symlinks.FOLLOW); - if (statFollow == null || (!followSymlinks && !target.isAbsolute())) { - // Symlink uploaded as a symlink. - if (statFollow == null) { - checkDanglingSymlinkAllowed(child, target); - } - if (target.isAbsolute()) { - checkAbsoluteSymlinkAllowed(child, target); - } - b.addSymlinksBuilder().setName(name).setTarget(target.toString()); - continue; + + // Convert individual Directory messages to a Tree message. As we want the records to be + // topologically sorted (parents before children), we iterate over the directories in reverse + // insertion order. We construct the message through direct byte manipulation to ensure that + // the strict requirements on the encoding are observed. + + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + CodedOutputStream codedOutputStream = CodedOutputStream.newInstance(byteArrayOutputStream); + int fieldNumber = TREE_ROOT_FIELD_NUMBER; + for (ByteString directory : Lists.reverse(new ArrayList<>(dirBlobs))) { + codedOutputStream.writeBytes(fieldNumber, directory); + fieldNumber = TREE_CHILDREN_FIELD_NUMBER; + } + codedOutputStream.flush(); + + return ByteString.copyFrom(byteArrayOutputStream.toByteArray()); + } + + private void visit(Path path, Dirent.Type type) { + try { + if (type == Dirent.Type.FILE) { + visitAsFile(path); + return; } - if (statFollow.isFile() && !statFollow.isSpecialFile()) { - // Symlink to file uploaded as a file. - Digest digest = digestUtil.compute(child); - b.addFilesBuilder().setName(name).setDigest(digest).setIsExecutable(true); - digestToFile.put(digest, child); - continue; + if (type == Dirent.Type.DIRECTORY) { + visitAsDirectory(path); + for (Dirent dirent : path.readdir(Symlinks.NOFOLLOW)) { + Path childPath = path.getChild(dirent.getName()); + Dirent.Type childType = dirent.getType(); + execute(() -> visit(childPath, childType)); + } + return; } - if (statFollow.isDirectory()) { - // Symlink to directory uploaded as a directory. - ByteString dir = computeDirectory(child, directories); - b.addDirectoriesBuilder().setName(name).setDigest(digestUtil.compute(dir.toByteArray())); - continue; + if (type == Dirent.Type.SYMLINK) { + PathFragment target = path.readSymbolicLink(); + FileStatus statFollow = path.statIfFound(Symlinks.FOLLOW); + if (statFollow == null || (!followSymlinks && !target.isAbsolute())) { + // Symlink uploaded as a symlink. + if (statFollow == null) { + checkDanglingSymlinkAllowed(path, target); + } + if (target.isAbsolute()) { + checkAbsoluteSymlinkAllowed(path, target); + } + visitAsSymlink(path, target); + return; + } + if (statFollow.isFile() && !statFollow.isSpecialFile()) { + // Symlink to file uploaded as a file. + execute(() -> visit(path, Dirent.Type.FILE)); + return; + } + if (statFollow.isDirectory()) { + // Symlink to directory uploaded as a directory. + execute(() -> visit(path, Dirent.Type.DIRECTORY)); + return; + } } + rejectSpecialFile(path); + } catch (ExecException | IOException e) { + // We can't throw checked exceptions here since AQV expects Runnables + throw new WrappedException(e); + } + } + + private void visitAsDirectory(Path path) { + dirs.add(path); + if (!path.equals(rootDir)) { + dirToSubdirs.put(path.getParentDirectory(), path); } - // Special file or dereferenced symlink to special file. - rejectSpecialFile(child); } - ByteString directory = b.build().toByteString(); - directories.add(directory); - return directory; + private void visitAsFile(Path path) throws IOException { + Path parentPath = path.getParentDirectory(); + Digest digest = digestUtil.compute(path); + FileNode node = + FileNode.newBuilder() + .setName(path.getBaseName()) + .setDigest(digest) + .setIsExecutable(true) + .build(); + digestToFile.put(digest, path); + dirToFiles.put(parentPath, node); + } + + private void visitAsSymlink(Path path, PathFragment target) { + Path parentPath = path.getParentDirectory(); + SymlinkNode node = + SymlinkNode.newBuilder().setName(path.getBaseName()).setTarget(target.toString()).build(); + dirToSymlinks.put(parentPath, node); + } + } + + // Field numbers of the 'root' and 'directory' fields in the Tree message. + private static final int TREE_ROOT_FIELD_NUMBER = + Tree.getDescriptor().findFieldByName("root").getNumber(); + private static final int TREE_CHILDREN_FIELD_NUMBER = + Tree.getDescriptor().findFieldByName("children").getNumber(); + + private void addDirectory(Path dir) throws ExecException, IOException, InterruptedException { + ByteString treeBlob = new DirectoryBuilder(dir).build(); + Digest treeDigest = digestUtil.compute(treeBlob.toByteArray()); + + result + .addOutputDirectoriesBuilder() + .setPath(remotePathResolver.localPathToOutputPath(dir)) + .setTreeDigest(treeDigest) + .setIsTopologicallySorted(true); + + digestToBlobs.put(treeDigest, treeBlob); } private void checkDanglingSymlinkAllowed(Path file, PathFragment target) throws IOException { diff --git a/src/test/java/com/google/devtools/build/lib/remote/UploadManifestTest.java b/src/test/java/com/google/devtools/build/lib/remote/UploadManifestTest.java index 8d85f8930e7bd9..8ad57364f343fe 100644 --- a/src/test/java/com/google/devtools/build/lib/remote/UploadManifestTest.java +++ b/src/test/java/com/google/devtools/build/lib/remote/UploadManifestTest.java @@ -1188,7 +1188,7 @@ public void actionResult_noFollowSymlinks_relativeDirectorySymlinkInDirectoryAsS public void actionResult_noFollowSymlinks_specialFileError() throws Exception { ActionResult.Builder result = ActionResult.newBuilder(); Path dir = createDirectoryWithSpecialFile("dir", "special"); - Path special = dir.getRelative("special"); + Path special = dir.getChild("special"); UploadManifest um = new UploadManifest( @@ -1208,7 +1208,7 @@ public void actionResult_noFollowSymlinks_specialFileError() throws Exception { public void actionResult_followSymlinks_specialFileSymlinkError() throws Exception { ActionResult.Builder result = ActionResult.newBuilder(); Path dir = createDirectoryWithSymlinkToSpecialFile("dir", "link", "special"); - Path link = dir.getRelative("link"); + Path link = dir.getChild("link"); UploadManifest um = new UploadManifest( @@ -1344,7 +1344,6 @@ private Path createSymlinkToSpecialFile(String execPath, String target) throws I when(link.toString()).thenReturn(execPath); when(link.statIfFound(Symlinks.NOFOLLOW)).thenReturn(SYMLINK_FILE_STATUS); when(link.statIfFound(Symlinks.FOLLOW)).thenReturn(SPECIAL_FILE_STATUS); - when(link.relativeTo(execRoot)).thenReturn(execRoot.getRelative(execPath).relativeTo(execRoot)); when(link.readSymbolicLink()).thenReturn(PathFragment.create(target)); return link; @@ -1359,27 +1358,22 @@ private Path createDirectoryWithSpecialFile(String dirExecPath, String specialNa when(dir.statIfFound(Symlinks.NOFOLLOW)).thenReturn(DIR_FILE_STATUS); when(dir.readdir(Symlinks.NOFOLLOW)) .thenReturn(ImmutableList.of(new Dirent(specialName, Dirent.Type.UNKNOWN))); - when(dir.getRelative(specialName)).thenReturn(special); + when(dir.getChild(specialName)).thenReturn(special); return dir; } private Path createDirectoryWithSymlinkToSpecialFile( - String dirExecPath, String linkName, String specialName) throws IOException { - Path special = createSpecialFile(dirExecPath + "/" + specialName); - Path link = createSymlinkToSpecialFile(dirExecPath + "/" + linkName, specialName); + String dirExecPath, String linkName, String specialExecPath) throws IOException { + Path unusedSpecial = createSpecialFile(specialExecPath); + Path link = createSymlinkToSpecialFile(dirExecPath + "/" + linkName, specialExecPath); Path dir = mock(Path.class); + when(dir.toString()).thenReturn(dirExecPath); when(dir.statIfFound(Symlinks.NOFOLLOW)).thenReturn(DIR_FILE_STATUS); when(dir.readdir(Symlinks.NOFOLLOW)) - .thenReturn( - ImmutableList.of( - new Dirent(linkName, Dirent.Type.SYMLINK), - new Dirent(specialName, Dirent.Type.UNKNOWN))); - when(dir.relativeTo(execRoot)) - .thenReturn(execRoot.getRelative(dirExecPath).relativeTo(execRoot)); - when(dir.getRelative(linkName)).thenReturn(link); - when(dir.getRelative(specialName)).thenReturn(special); + .thenReturn(ImmutableList.of(new Dirent(linkName, Dirent.Type.SYMLINK))); + when(dir.getChild(linkName)).thenReturn(link); return dir; }