-
Notifications
You must be signed in to change notification settings - Fork 4.1k
/
Copy pathDigestUtils.java
214 lines (194 loc) · 7.53 KB
/
DigestUtils.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
// Copyright 2014 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.vfs;
import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import com.github.benmanes.caffeine.cache.stats.CacheStats;
import com.google.common.base.Preconditions;
import com.google.common.primitives.Longs;
import java.io.IOException;
/**
* Utility class for getting digests of files.
*
* <p>This class implements an optional cache of file digests when the computation of the digests is
* costly (i.e. when {@link Path#getFastDigest()} is not available). The cache can be enabled via
* the {@link #configureCache(long)} function, but note that enabling this cache might have an
* impact on correctness because not all changes to files can be purely detected from their
* metadata.
*/
public class DigestUtils {
// Typical size for a digest byte array.
public static final int ESTIMATED_SIZE = 32;
/**
* Keys used to cache the values of the digests for files where we don't have fast digests.
*
* <p>The cache keys are derived from many properties of the file metadata in an attempt to be
* able to detect most file changes.
*/
private static class CacheKey {
/** Path to the file. */
private final PathFragment path;
/** File system identifier of the file (typically the inode number). */
private final long nodeId;
/** Last modification time of the file. */
private final long modifiedTime;
/** Size of the file. */
private final long size;
/**
* Constructs a new cache key.
*
* @param path path to the file
* @param status file status data from which to obtain the cache key properties
* @throws IOException if reading the file status data fails
*/
public CacheKey(Path path, FileStatus status) throws IOException {
this.path = path.asFragment();
this.nodeId = status.getNodeId();
this.modifiedTime = status.getLastModifiedTime();
this.size = status.getSize();
}
@Override
public boolean equals(Object object) {
if (object == this) {
return true;
} else if (!(object instanceof CacheKey)) {
return false;
} else {
CacheKey key = (CacheKey) object;
return path.equals(key.path)
&& nodeId == key.nodeId
&& modifiedTime == key.modifiedTime
&& size == key.size;
}
}
@Override
public int hashCode() {
int result = 17;
result = 31 * result + path.hashCode();
result = 31 * result + Longs.hashCode(nodeId);
result = 31 * result + Longs.hashCode(modifiedTime);
result = 31 * result + Longs.hashCode(size);
return result;
}
}
/**
* Global cache of files to their digests.
*
* <p>This is null when the cache is disabled.
*
* <p>Note that we do not use a {@link com.github.benmanes.caffeine.cache.LoadingCache} because
* our keys represent the paths as strings, not as {@link Path} instances. As a result, the
* loading function cannot actually compute the digests of the files so we have to handle this
* externally.
*/
private static Cache<CacheKey, byte[]> globalCache = null;
/** Private constructor to prevent instantiation of utility class. */
private DigestUtils() {}
/**
* Enables the caching of file digests based on file status data.
*
* <p>If the cache was already enabled, this causes the cache to be reinitialized thus losing all
* contents. If the given size is zero, the cache is disabled altogether.
*
* @param maximumSize maximumSize of the cache in number of entries
*/
public static void configureCache(long maximumSize) {
if (maximumSize == 0) {
globalCache = null;
} else {
globalCache = Caffeine.newBuilder().maximumSize(maximumSize).recordStats().build();
}
}
/**
* Obtains cache statistics.
*
* <p>The cache must have previously been enabled by a call to {@link #configureCache(long)}.
*
* @return an immutable snapshot of the cache statistics
*/
public static CacheStats getCacheStats() {
Cache<CacheKey, byte[]> cache = globalCache;
Preconditions.checkNotNull(cache, "configureCache() must have been called with a size >= 0");
return cache.stats();
}
/**
* Gets the digest of {@code path}, using a constant-time xattr call if the filesystem supports
* it, and calculating the digest manually otherwise.
*
* <p>If {@link Path#getFastDigest} has already been attempted and was not available, call {@link
* #manuallyComputeDigest} to skip an additional attempt to obtain the fast digest.
*
* @param path Path of the file.
* @param fileSize Size of the file. Used to determine if digest calculation should be done
* serially or in parallel. Files larger than a certain threshold will be read serially, in
* order to avoid excessive disk seeks.
*/
public static byte[] getDigestWithManualFallback(
Path path, long fileSize, XattrProvider xattrProvider) throws IOException {
byte[] digest = xattrProvider.getFastDigest(path);
return digest != null ? digest : manuallyComputeDigest(path, fileSize);
}
/**
* Gets the digest of {@code path}, using a constant-time xattr call if the filesystem supports
* it, and calculating the digest manually otherwise.
*
* <p>Unlike {@link #getDigestWithManualFallback}, will not rate-limit manual digesting of files,
* so only use this method if the file size is truly unknown and you don't expect many concurrent
* manual digests of large files.
*
* @param path Path of the file.
*/
public static byte[] getDigestWithManualFallbackWhenSizeUnknown(
Path path, XattrProvider xattrProvider) throws IOException {
return getDigestWithManualFallback(path, -1, xattrProvider);
}
/**
* Calculates the digest manually.
*
* @param path Path of the file.
* @param fileSize Size of the file. Used to determine if digest calculation should be done
* serially or in parallel. Files larger than a certain threshold will be read serially, in
* order to avoid excessive disk seeks.
*/
public static byte[] manuallyComputeDigest(Path path, long fileSize) throws IOException {
byte[] digest;
// Attempt a cache lookup if the cache is enabled.
Cache<CacheKey, byte[]> cache = globalCache;
CacheKey key = null;
if (cache != null) {
key = new CacheKey(path, path.stat());
digest = cache.getIfPresent(key);
if (digest != null) {
return digest;
}
}
digest = path.getDigest();
Preconditions.checkNotNull(digest, "Missing digest for %s (size %s)", path, fileSize);
if (cache != null) {
cache.put(key, digest);
}
return digest;
}
/** Compute lhs ^= rhs bitwise operation of the arrays. May clobber either argument. */
public static byte[] xor(byte[] lhs, byte[] rhs) {
int n = rhs.length;
if (lhs.length >= n) {
for (int i = 0; i < n; i++) {
lhs[i] ^= rhs[i];
}
return lhs;
}
return xor(rhs, lhs);
}
}