From de66620d71c0ab422dfd24cb0a36c0490ec26084 Mon Sep 17 00:00:00 2001 From: Googler Date: Thu, 10 Oct 2024 07:30:34 -0700 Subject: [PATCH] Deserializer for FileInvalidationData. The core of FileDependencyDeserializer is an asynchronous state machine implemented using Futures.transformAsync to recursively add more stages as needed. Asynchrony is a consequence of the fact that the resolution mechanism is an asynchronous remote cache lookup for FileInvalidationData and deserialization typically requires multiple, sequential resolution steps. It uses the usual concurrent map to deduplicate keyed computations. In this case, the concurrent map is a weak-valued Caffeine instance. The rationale is that the map values (which are effectively sets of files) are referenced by the SkyValues that would be invalidated by changes to those files. So as the SkyValues are invalidated and GCd, the cache is automatically cleaned up. This fits well with the transitive structure of the FileDependencies class. ## Servicification Considerations The cache keys are versioned so it is theoretically possible to share a single cache across multiple Bazel instances. At the moment, the BUILD dependencies are somewhat broad due to including Skyframe serialization, but only the FingerprintValueService is needed. Otherwise coupling to Bazel code is deliberately minimized. PiperOrigin-RevId: 684437123 Change-Id: Iec445c222d6f072bd10044102dbcb01c62ab739d --- .../FingerprintValueService.java | 2 +- .../lib/skyframe/serialization/analysis/BUILD | 20 + .../analysis/FileDependencies.java | 184 ++++++++ .../analysis/FileDependencyDeserializer.java | 415 ++++++++++++++++++ 4 files changed, 620 insertions(+), 1 deletion(-) create mode 100644 src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/FileDependencies.java create mode 100644 src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/FileDependencyDeserializer.java diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueService.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueService.java index 7114b20251e9ff..2057f68dcae8bd 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueService.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/FingerprintValueService.java @@ -152,7 +152,7 @@ int fingerprintLength() { * *

Those callbacks may be executing on RPC threads that should not be blocked. */ - Executor getExecutor() { + public Executor getExecutor() { return executor; } } diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/BUILD b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/BUILD index 2493722b46facb..de25f48dca331d 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/BUILD +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/BUILD @@ -78,6 +78,26 @@ java_library( ], ) +java_library( + name = "file_dependency_deserializer", + srcs = [ + "FileDependencies.java", + "FileDependencyDeserializer.java", + ], + deps = [ + ":file_dependency_key_support", + "//src/main/java/com/google/devtools/build/lib/skyframe/serialization", + "//src/main/java/com/google/devtools/build/lib/vfs:ospathpolicy", + "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", + "//src/main/protobuf:file_invalidation_data_java_proto", + "//third_party:caffeine", + "//third_party:error_prone_annotations", + "//third_party:guava", + "//third_party:jsr305", + "@protobuf//:protobuf_java", + ], +) + java_library( name = "file_dependency_key_support", srcs = ["FileDependencyKeySupport.java"], diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/FileDependencies.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/FileDependencies.java new file mode 100644 index 00000000000000..ccd9c3eee666c9 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/FileDependencies.java @@ -0,0 +1,184 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.skyframe.serialization.analysis; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.errorprone.annotations.CanIgnoreReturnValue; +import java.util.ArrayList; + +/** + * Representation of a set of file names that could invalidate a given value. + * + *

Most values can be associated with some set of input files, represented in this nested way to + * facilitate sharing between values. So given a set of changed files, invalidation is performed by + * calling {@link #containsMatch} on an instance and all transitively reachable instances via {@link + * #getDependencyCount} and {@link #getDependency}. If any matches are encountered, the associated + * value is invalidated. + */ +sealed interface FileDependencies extends FileDependencyDeserializer.GetDependenciesResult + permits FileDependencies.SingleResolvedPath, + FileDependencies.SingleResolvedPathAndDependency, + FileDependencies.MultiplePaths { + + boolean containsMatch(ImmutableSet paths); + + int getDependencyCount(); + + FileDependencies getDependency(int index); + + /** + * The real path associated with this node after resolution. + * + *

This is used by {@link FileDependencyDeserializer} to retrieve resolved parent paths but + * isn't directly used by invalidation. + */ + String resolvedPath(); + + /** Returns the resolved paths associated with the current node for testing. */ + // An interface must be used instead of an abstract class to allow records, which are helpful + // here. That means that this method must be public, triggering the warning. + @SuppressWarnings("VisibleForTestingMisused") + @VisibleForTesting + ImmutableList getAllResolvedPathsForTesting(); + + static Builder builder(String firstResolvedPath) { + return new Builder(firstResolvedPath); + } + + static class Builder { + private final ArrayList paths = new ArrayList<>(); + private final ArrayList dependencies = new ArrayList<>(); + + /** + * At least one resolved path is required. + * + *

The last path added is treated as the overall {@link #resolvedPath} of the instance. The + * {@code firstResolvedPath} argument is the {@link #resolvedPath} if it's the only path. + */ + Builder(String firstResolvedPath) { + paths.add(firstResolvedPath); + } + + @CanIgnoreReturnValue + Builder addPath(String path) { + paths.add(path); + return this; + } + + @CanIgnoreReturnValue + Builder addDependency(FileDependencies dependency) { + dependencies.add(dependency); + return this; + } + + FileDependencies build() { + if (paths.size() == 1) { + int dependenciesSize = dependencies.size(); + if (dependenciesSize == 0) { + return new SingleResolvedPath(paths.get(0)); + } + if (dependenciesSize == 1) { + return new SingleResolvedPathAndDependency(paths.get(0), dependencies.get(0)); + } + } + return new MultiplePaths(ImmutableList.copyOf(paths), ImmutableList.copyOf(dependencies)); + } + } + + // The implementations here exist to reduce indirection and memory use. + + record SingleResolvedPath(String resolvedPath) implements FileDependencies { + @Override + public boolean containsMatch(ImmutableSet paths) { + return paths.contains(resolvedPath); + } + + @Override + public int getDependencyCount() { + return 0; + } + + @Override + public FileDependencies getDependency(int index) { + throw new IndexOutOfBoundsException(this + " " + index); + } + + @Override + public ImmutableList getAllResolvedPathsForTesting() { + return ImmutableList.of(resolvedPath); + } + } + + record SingleResolvedPathAndDependency(String resolvedPath, FileDependencies dependency) + implements FileDependencies { + @Override + public boolean containsMatch(ImmutableSet paths) { + return paths.contains(resolvedPath); + } + + @Override + public int getDependencyCount() { + return 1; + } + + @Override + public FileDependencies getDependency(int index) { + if (index != 0) { + throw new IndexOutOfBoundsException(this + " " + index); + } + return dependency; + } + + @Override + public ImmutableList getAllResolvedPathsForTesting() { + return ImmutableList.of(resolvedPath); + } + } + + record MultiplePaths( + ImmutableList resolvedPaths, ImmutableList dependencies) + implements FileDependencies { + @Override + public boolean containsMatch(ImmutableSet paths) { + for (int i = 0; i < resolvedPaths.size(); i++) { + if (paths.contains(resolvedPaths.get(i))) { + return true; + } + } + return false; + } + + @Override + public int getDependencyCount() { + return dependencies.size(); + } + + @Override + public FileDependencies getDependency(int index) { + return dependencies.get(index); + } + + @Override + public String resolvedPath() { + return resolvedPaths.get(resolvedPaths().size() - 1); + } + + @Override + public ImmutableList getAllResolvedPathsForTesting() { + return resolvedPaths; + } + } +} diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/FileDependencyDeserializer.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/FileDependencyDeserializer.java new file mode 100644 index 00000000000000..d4c954a526e96f --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/analysis/FileDependencyDeserializer.java @@ -0,0 +1,415 @@ +// Copyright 2024 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.skyframe.serialization.analysis; + +import static com.google.common.base.Preconditions.checkState; +import static com.google.common.util.concurrent.Futures.immediateFailedFuture; +import static com.google.common.util.concurrent.Futures.immediateFuture; +import static com.google.common.util.concurrent.MoreExecutors.directExecutor; +import static com.google.devtools.build.lib.skyframe.serialization.analysis.FileDependencyKeySupport.FILE_KEY_DELIMITER; +import static com.google.devtools.build.lib.skyframe.serialization.analysis.FileDependencyKeySupport.MAX_KEY_LENGTH; +import static com.google.devtools.build.lib.skyframe.serialization.analysis.FileDependencyKeySupport.MTSV_SENTINEL; +import static com.google.devtools.build.lib.skyframe.serialization.analysis.FileDependencyKeySupport.computeCacheKey; +import static com.google.devtools.build.lib.vfs.PathFragment.SEPARATOR_CHAR; +import static com.google.protobuf.ExtensionRegistry.getEmptyRegistry; +import static java.nio.charset.StandardCharsets.UTF_8; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.google.common.util.concurrent.AbstractFuture; +import com.google.common.util.concurrent.AsyncFunction; +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.devtools.build.lib.skyframe.serialization.FingerprintValueService; +import com.google.devtools.build.lib.skyframe.serialization.KeyBytesProvider; +import com.google.devtools.build.lib.skyframe.serialization.SerializationException; +import com.google.devtools.build.lib.skyframe.serialization.StringKey; +import com.google.devtools.build.lib.skyframe.serialization.proto.FileInvalidationData; +import com.google.devtools.build.lib.skyframe.serialization.proto.Symlink; +import com.google.devtools.build.lib.vfs.OsPathPolicy; +import com.google.protobuf.InvalidProtocolBufferException; +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import javax.annotation.Nullable; + +/** + * Deserializes dependency information persisted by {@link FileDependencySerializer}. + * + *

Fetching a dependency is a mostly linear asynchronous state machine that performs actions then + * waits in an alternating manner. + * + *

    + *
  1. Request the data for a given key. + *
  2. {@link WaitForData}. + *
  3. Request the data for the parent directory (a recursive call). + *
  4. {@link WaitForParent}. + *
  5. Process any symlinks, resolving symlink parents as needed. + *
  6. {@link WaitForSymlinkParent}. + *
  7. Processing symlinks repeats for all the symlinks associated with an entry. + *
+ */ +final class FileDependencyDeserializer { + private static final OsPathPolicy OS = OsPathPolicy.getFilePathOs(); + + private final FingerprintValueService fingerprintValueService; + + /** + * A cache for {@link FileDependencies}, primarily for deduplication. + * + *

The cache keys are as described at {@link FileInvalidationData}. We can potentially strip + * the version information here, but keeping the version enables a single {@link + * FileDependencyDeserializer} instance to be shared across disparate builds. + * + *

While in-flight, the value has type {@link FutureFileDependencies}, which is replaced by + * {@link FileDependencies} once the computation completes. + * + *

References to {@link FileDependencies} form DAGs where certain top-level entries are + * retained by the {@code SkyValue}s that depend on them. When all such associated {@code + * SkyValue}s are invalidated, the dependency information becomes eligible for GC. + */ + private final Cache dependenciesCache = + Caffeine.newBuilder().weakValues().build(); + + FileDependencyDeserializer(FingerprintValueService fingerprintValueService) { + this.fingerprintValueService = fingerprintValueService; + } + + sealed interface GetDependenciesResult permits FileDependencies, FutureFileDependencies {} + + /** + * Reconstitutes the set of file dependencies associated with {@code key}. + * + *

Performs lookups and parent resolution (recursively) and symlink resolution to obtain all + * files associated with {@code key} represented as {@link FileDependencies}. + * + * @param key as described in {@link FileInvalidationData}. + * @return either an immediate {@link FileDependencies} instance or effectively a {@link + * ListenableFuture} instance. + */ + GetDependenciesResult getFileDependencies(String key) { + FutureFileDependencies ownedFuture; + switch (dependenciesCache.get(key, unused -> new FutureFileDependencies())) { + case FileDependencies dependencies: + return dependencies; + case FutureFileDependencies future: + if (!future.tryTakeOwnership()) { + return future; // Owned by a another thread. + } + ownedFuture = future; + break; + } + // `ownedFuture` is owned by this thread, which must complete its value. + try { + ListenableFuture futureBytes; + try { + futureBytes = fingerprintValueService.get(getKeyBytes(key)); + } catch (IOException e) { + ownedFuture.setIoException(e); + return ownedFuture; + } + + ownedFuture.setFutureFiles( + Futures.transformAsync( + futureBytes, new WaitForData(key), fingerprintValueService.getExecutor())); + return ownedFuture; + } finally { + ownedFuture.verifySet(); + } + } + + /** + * The main purpose of this class is to act as a {@link ListenableFuture}. + * + *

Its specific type is explicitly visible to clients to allow them to cleanly distinguish it + * as a permitted subtype of {@link GetDependenciesResult}. + */ + static final class FutureFileDependencies extends AbstractFuture + implements GetDependenciesResult { + /** Used to establish exactly-once ownership of this future with {@link #tryTakeOwnership}. */ + @SuppressWarnings({"UnusedVariable", "FieldCanBeFinal"}) // set with OWNED_HANDLE + private boolean owned = false; + + private boolean isSet = false; + + private boolean tryTakeOwnership() { + return OWNED_HANDLE.compareAndSet(this, false, true); + } + + private void setFutureFiles(ListenableFuture files) { + checkState(setFuture(files), "already set %s", this); + isSet = true; + } + + private void setIoException(IOException e) { + checkState(setException(e)); + isSet = true; + } + + private void verifySet() { + if (!isSet) { + checkState( + setException( + new IllegalStateException( + "future was unexpectedly unset, look for unchecked exceptions in" + + " FileDependencyDeserializer"))); + } + } + + private static final VarHandle OWNED_HANDLE; + + static { + try { + OWNED_HANDLE = + MethodHandles.lookup() + .findVarHandle(FutureFileDependencies.class, "owned", boolean.class); + } catch (ReflectiveOperationException e) { + throw new ExceptionInInitializerError(e); + } + } + } + + private class WaitForData implements AsyncFunction { + private final String key; + + private WaitForData(String key) { + this.key = key; + } + + @Override + public ListenableFuture apply(byte[] bytes) + throws InvalidProtocolBufferException { + var data = FileInvalidationData.parseFrom(bytes, getEmptyRegistry()); + if (data.hasOverflowKey() && !data.getOverflowKey().equals(key)) { + return immediateFailedFuture( + new SerializationException( + String.format( + "Non-matching overflow key. This is possible if there is a key fingerprint" + + " collision. Expected %s got %s", + key, data))); + } + + int pathBegin = key.indexOf(FILE_KEY_DELIMITER) + 1; + int parentDirectoryEnd = key.lastIndexOf(SEPARATOR_CHAR); + + if (parentDirectoryEnd == -1) { + checkState( + !data.hasParentMtsv(), "no parent directory, but had parent MTSV %s, %s", key, data); + return resolveParent(key, data, key.substring(pathBegin), /* parentKey= */ null); + } + + String parentDirectory = key.substring(pathBegin, parentDirectoryEnd); + String parentKey = + computeCacheKey( + parentDirectory, + data.hasParentMtsv() ? data.getParentMtsv() : MTSV_SENTINEL, + FILE_KEY_DELIMITER); + String basename = key.substring(parentDirectoryEnd + 1); + return resolveParent(key, data, basename, parentKey); + } + } + + private ListenableFuture resolveParent( + String key, FileInvalidationData data, String basename, @Nullable String parentKey) { + var waitForParent = new WaitForParent(key, data, basename); + + if (parentKey == null) { + return waitForParent.apply(/* parent= */ null); + } + + switch (getFileDependencies(parentKey)) { + case FileDependencies parent: + return waitForParent.apply(parent); + case FutureFileDependencies future: + return Futures.transformAsync(future, waitForParent, directExecutor()); + } + } + + private class WaitForParent implements AsyncFunction { + private final String key; + private final FileInvalidationData data; + private final String basename; + + private WaitForParent(String key, FileInvalidationData data, String basename) { + this.key = key; + this.data = data; + this.basename = basename; + } + + @Override + public ListenableFuture apply(@Nullable FileDependencies parent) { + FileDependencies.Builder builder; + String parentDirectory; + if (parent == null) { + parentDirectory = null; + builder = FileDependencies.builder(basename); + } else { + parentDirectory = parent.resolvedPath(); + builder = + FileDependencies.builder(getRelative(parentDirectory, basename)).addDependency(parent); + } + return processSymlinks(key, data, /* symlinkIndex= */ 0, parentDirectory, builder); + } + } + + /** + * Processes any symlinks that my be present in {@code data}. + * + * @param key the main key that this symlink belongs to + * @param parentDirectory the real directory containing the symlink + */ + private ListenableFuture processSymlinks( + String key, + FileInvalidationData data, + int symlinkIndex, + @Nullable String parentDirectory, // null if root-level + FileDependencies.Builder builder) { + if (symlinkIndex >= data.getSymlinksCount()) { + FileDependencies dependencies = builder.build(); + // Replaces the cache value with the completed value. The future is likely to become eligible + // for GC shortly after the return below. Clients are expected to retain the meaningful + // top-level values. + dependenciesCache.put(key, dependencies); + return immediateFuture(dependencies); + } + + Symlink link = data.getSymlinks(symlinkIndex); + String linkContents = link.getContents(); + checkState( + OS.getDriveStrLength(linkContents) == 0, + "expected symlink contents to be a relative path: %s", + data); + // Combines the parent directory of the link with its contents and normalizes. + String normalizedLinkTarget = getRelativeAndNormalize(parentDirectory, linkContents); + String normalizedLinkParent = getParentDirectory(normalizedLinkTarget); + + if (!doesSymlinkParentNeedResolution(parentDirectory, normalizedLinkParent)) { + checkState( + !link.hasParentMtsv(), + "no resolution needed for data=%s, symlinkIndex=%s, parentDirectory=%s," + + " normalizedLinkParent=%s but symlink had parent MTSV", + data, + symlinkIndex, + parentDirectory, + normalizedLinkParent); + // Since `normalizedLinkParent` is already a real directory, `normalizedLinkTarget` is the + // resolved symlink path. + if (!normalizedLinkTarget.isEmpty()) { // Avoids adding root as a resolved path. + builder.addPath(normalizedLinkTarget); + } + return processSymlinks(key, data, symlinkIndex + 1, normalizedLinkParent, builder); + } + + String linkBasename = normalizedLinkTarget.substring(normalizedLinkParent.length() + 1); + + String newParentKey = + computeCacheKey( + normalizedLinkParent, + link.hasParentMtsv() ? link.getParentMtsv() : MTSV_SENTINEL, + FILE_KEY_DELIMITER); + + var waitForSymlinkParent = + new WaitForSymlinkParent(key, data, symlinkIndex, linkBasename, builder); + + switch (getFileDependencies(newParentKey)) { + case FileDependencies resolvedParent: + return waitForSymlinkParent.apply(resolvedParent); + case FutureFileDependencies future: + return Futures.transformAsync(future, waitForSymlinkParent, directExecutor()); + } + } + + private class WaitForSymlinkParent implements AsyncFunction { + private final String key; + private final FileInvalidationData data; + private final int symlinkIndex; + private final String linkBasename; + private final FileDependencies.Builder builder; + + private WaitForSymlinkParent( + String key, + FileInvalidationData data, + int symlinkIndex, + String linkBasename, + FileDependencies.Builder builder) { + this.key = key; + this.data = data; + this.symlinkIndex = symlinkIndex; + this.linkBasename = linkBasename; + this.builder = builder; + } + + @Override + public ListenableFuture apply(FileDependencies parent) { + String parentPath = parent.resolvedPath(); + builder.addPath(getRelative(parentPath, linkBasename)).addDependency(parent); + return processSymlinks(key, data, symlinkIndex + 1, parentPath, builder); + } + } + + private static String getRelative(@Nullable String parentDirectory, String basename) { + if (parentDirectory == null) { + return basename; + } + return parentDirectory + SEPARATOR_CHAR + basename; + } + + private static String getRelativeAndNormalize( + @Nullable String parentDirectory, String linkContents) { + int normalizationLevel = OS.needsToNormalize(linkContents); + return OS.normalize(getRelative(parentDirectory, linkContents), normalizationLevel); + } + + @Nullable // null if `path` is at the root level + private static String getParentDirectory(String path) { + int lastSeparator = path.lastIndexOf(SEPARATOR_CHAR); + if (lastSeparator == -1) { // no separator + return null; + } + return path.substring(0, lastSeparator); + } + + /** + * Predicate specifying when a symlink parent directory needs further resolution. + * + *

A relative path specifier in symlink contents can modify the parent directory but it does + * not always do so. For example, the symlink could point to a file in the same directory or the + * symlink could point to a file in an ancestor directory. In both of these cases, the parent + * directory is already fully resolved. + * + * @param previousParent the parent of the actual symlink itself. Null if the parent is actually + * the root directory. + * @param newParent the parent directory after combining the symlink with {@code previousParent}. + * Null if the result is the root directory. + */ + private static boolean doesSymlinkParentNeedResolution( + @Nullable String previousParent, @Nullable String newParent) { + if (newParent == null) { + return false; // Already root level. No parent resolution needed. + } + if (previousParent == null) { + return true; // No previousParent so resolution is needed. + } + // `newParent` is already a resolved path if it is the same as or an ancestor of the already + // resolved `previousParent`. + return !previousParent.startsWith(newParent); + } + + private KeyBytesProvider getKeyBytes(String cacheKey) { + if (cacheKey.length() > MAX_KEY_LENGTH) { + return fingerprintValueService.fingerprint(cacheKey.getBytes(UTF_8)); + } + return new StringKey(cacheKey); + } +}