Skip to content

Commit

Permalink
Snapshot deletion optimisations to speed up snapshot deletion, creation
Browse files Browse the repository at this point in the history
Signed-off-by: Ashish Singh <[email protected]>
  • Loading branch information
ashking94 committed Sep 2, 2024
1 parent 758c2aa commit 05757e2
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
import org.opensearch.common.Nullable;
import org.opensearch.common.Numbers;
import org.opensearch.common.Priority;
import org.opensearch.common.Randomness;
import org.opensearch.common.SetOnce;
import org.opensearch.common.UUIDs;
import org.opensearch.common.blobstore.BlobContainer;
Expand Down Expand Up @@ -1200,6 +1201,10 @@ private void asyncCleanupUnlinkedShardLevelBlobs(
ActionListener<Void> listener
) {
final List<Tuple<BlobPath, String>> filesToDelete = resolveFilesToDelete(oldRepositoryData, snapshotIds, deleteResults);
long startTimeNs = System.nanoTime();
Randomness.shuffle(filesToDelete);
logger.info("[{}] shuffled the filesToDelete with timeElapsedNs={}", metadata.name(), (System.nanoTime() - startTimeNs));

if (filesToDelete.isEmpty()) {
listener.onResponse(null);
return;
Expand All @@ -1217,8 +1222,8 @@ private void asyncCleanupUnlinkedShardLevelBlobs(
staleFilesToDeleteInBatch.size()
);

// Start as many workers as fit into the snapshot pool at once at the most
final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(), staleFilesToDeleteInBatch.size());
// Start as many workers as fit into the snapshot_deletion pool at once at the most
final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT_DELETION).getMax(), staleFilesToDeleteInBatch.size());
for (int i = 0; i < workers; ++i) {
executeStaleShardDelete(staleFilesToDeleteInBatch, remoteStoreLockManagerFactory, groupedListener);
}
Expand Down Expand Up @@ -1321,7 +1326,7 @@ private void executeStaleShardDelete(
if (filesToDelete == null) {
return;
}
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.wrap(listener, l -> {
threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION).execute(ActionRunnable.wrap(listener, l -> {
try {
// filtering files for which remote store lock release and cleanup succeeded,
// remaining files for which it failed will be retried in next snapshot delete run.
Expand Down Expand Up @@ -1385,7 +1390,7 @@ private void writeUpdatedShardMetaDataAndComputeDeletes(
ActionListener<Collection<ShardSnapshotMetaDeleteResult>> onAllShardsCompleted
) {

final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION);
final List<IndexId> indices = oldRepositoryData.indicesToUpdateAfterRemovingSnapshot(snapshotIds);

if (indices.isEmpty()) {
Expand Down Expand Up @@ -1573,7 +1578,7 @@ private void cleanupStaleBlobs(
listener.onResponse(deleteResult);
}, listener::onFailure), 2);

final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION);
final List<String> staleRootBlobs = staleRootBlobs(newRepoData, rootBlobs.keySet());
if (staleRootBlobs.isEmpty()) {
groupedListener.onResponse(DeleteResult.ZERO);
Expand Down Expand Up @@ -1776,7 +1781,7 @@ void cleanupStaleIndices(

// Start as many workers as fit into the snapshot pool at once at the most
final int workers = Math.min(
threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(),
threadPool.info(ThreadPool.Names.SNAPSHOT_DELETION).getMax(),
foundIndices.size() - survivingIndexIds.size()
);
for (int i = 0; i < workers; ++i) {
Expand Down Expand Up @@ -1828,7 +1833,7 @@ private void executeOneStaleIndexDelete(
return;
}
final String indexSnId = indexEntry.getKey();
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.supply(listener, () -> {
threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION).execute(ActionRunnable.supply(listener, () -> {
try {
logger.debug("[{}] Found stale index [{}]. Cleaning it up", metadata.name(), indexSnId);
List<String> matchingShardPaths = findMatchingShardPaths(indexSnId, snapshotShardPaths);
Expand Down Expand Up @@ -2070,8 +2075,7 @@ public void finalizeSnapshot(
stateTransformer,
repositoryUpdatePriority,
ActionListener.wrap(newRepoData -> {
cleanupOldShardGens(existingRepositoryData, updatedRepositoryData);
listener.onResponse(newRepoData);
cleanupOldShardGens(existingRepositoryData, updatedRepositoryData, newRepoData, listener);
}, onUpdateFailure)
);
}, onUpdateFailure), 2 + indices.size());
Expand Down Expand Up @@ -2227,7 +2231,12 @@ private void logShardPathsOperationWarning(IndexId indexId, SnapshotId snapshotI
}

// Delete all old shard gen blobs that aren't referenced any longer as a result from moving to updated repository data
private void cleanupOldShardGens(RepositoryData existingRepositoryData, RepositoryData updatedRepositoryData) {
private void cleanupOldShardGens(
RepositoryData existingRepositoryData,
RepositoryData updatedRepositoryData,
RepositoryData newRepositoryData,
ActionListener<RepositoryData> listener
) {
final List<String> toDelete = new ArrayList<>();
updatedRepositoryData.shardGenerations()
.obsoleteShardGenerations(existingRepositoryData.shardGenerations())
Expand All @@ -2236,10 +2245,61 @@ private void cleanupOldShardGens(RepositoryData existingRepositoryData, Reposito
(shardId, oldGen) -> toDelete.add(shardPath(indexId, shardId).buildAsString() + INDEX_FILE_PREFIX + oldGen)
)
);
if (toDelete.isEmpty()) {
listener.onResponse(newRepositoryData);
}
try {
deleteFromContainer(rootBlobContainer(), toDelete);
AtomicInteger counter = new AtomicInteger();
Collection<List<String>> subList = toDelete.stream()
.collect(Collectors.groupingBy(it -> counter.getAndIncrement() / maxShardBlobDeleteBatch))
.values();
final BlockingQueue<List<String>> staleFilesToDeleteInBatch = new LinkedBlockingQueue<>(subList);
logger.info(
"[{}] cleanupOldShardGens toDeleteSize={} groupSize={}",
metadata.name(),
toDelete.size(),
staleFilesToDeleteInBatch.size()
);
final GroupedActionListener<Void> groupedListener = new GroupedActionListener<>(ActionListener.wrap(r -> {
logger.info("[{}] completed cleanupOldShardGens", metadata.name());
listener.onResponse(newRepositoryData);
}, ex -> {
logger.error(new ParameterizedMessage("[{}] exception in cleanupOldShardGens", metadata.name()), ex);
listener.onResponse(newRepositoryData);
}), staleFilesToDeleteInBatch.size());

// Start as many workers as fit into the snapshot pool at once at the most
final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT_DELETION).getMax(), staleFilesToDeleteInBatch.size());
for (int i = 0; i < workers; ++i) {
executeOldShardGensCleanup(staleFilesToDeleteInBatch, groupedListener);
}
} catch (Exception e) {
logger.warn("Failed to clean up old shard generation blobs", e);
logger.warn(new ParameterizedMessage(" [{}] Failed to clean up old shard generation blobs", metadata.name()), e);
listener.onResponse(newRepositoryData);
}
}

private void executeOldShardGensCleanup(BlockingQueue<List<String>> staleFilesToDeleteInBatch, GroupedActionListener<Void> listener)
throws InterruptedException {
List<String> filesToDelete = staleFilesToDeleteInBatch.poll(0L, TimeUnit.MILLISECONDS);
if (filesToDelete != null) {
threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION).execute(ActionRunnable.wrap(listener, l -> {
try {
deleteFromContainer(rootBlobContainer(), filesToDelete);
l.onResponse(null);
} catch (Exception e) {
logger.warn(
() -> new ParameterizedMessage(
"[{}] Failed to delete following blobs during cleanupOldFiles : {}",
metadata.name(),
filesToDelete
),
e
);
l.onFailure(e);
}
executeOldShardGensCleanup(staleFilesToDeleteInBatch, listener);
}));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ public static class Names {
public static final String REFRESH = "refresh";
public static final String WARMER = "warmer";
public static final String SNAPSHOT = "snapshot";
public static final String SNAPSHOT_DELETION = "snapshot_deletion";
public static final String FORCE_MERGE = "force_merge";
public static final String FETCH_SHARD_STARTED = "fetch_shard_started";
public static final String FETCH_SHARD_STORE = "fetch_shard_store";
Expand Down Expand Up @@ -176,6 +177,7 @@ public static ThreadPoolType fromType(String type) {
map.put(Names.REFRESH, ThreadPoolType.SCALING);
map.put(Names.WARMER, ThreadPoolType.SCALING);
map.put(Names.SNAPSHOT, ThreadPoolType.SCALING);
map.put(Names.SNAPSHOT_DELETION, ThreadPoolType.SCALING);
map.put(Names.FORCE_MERGE, ThreadPoolType.FIXED);
map.put(Names.FETCH_SHARD_STARTED, ThreadPoolType.SCALING);
map.put(Names.FETCH_SHARD_STORE, ThreadPoolType.SCALING);
Expand Down Expand Up @@ -234,6 +236,7 @@ public ThreadPool(
final int halfProcMaxAt5 = halfAllocatedProcessorsMaxFive(allocatedProcessors);
final int halfProcMaxAt10 = halfAllocatedProcessorsMaxTen(allocatedProcessors);
final int genericThreadPoolMax = boundedBy(4 * allocatedProcessors, 128, 512);
final int snapshotDeletionPoolMax = boundedBy(4 * allocatedProcessors, 64, 256);
builders.put(Names.GENERIC, new ScalingExecutorBuilder(Names.GENERIC, 4, genericThreadPoolMax, TimeValue.timeValueSeconds(30)));
builders.put(Names.WRITE, new FixedExecutorBuilder(settings, Names.WRITE, allocatedProcessors, 10000));
builders.put(Names.GET, new FixedExecutorBuilder(settings, Names.GET, allocatedProcessors, 1000));
Expand All @@ -251,6 +254,10 @@ public ThreadPool(
builders.put(Names.REFRESH, new ScalingExecutorBuilder(Names.REFRESH, 1, halfProcMaxAt10, TimeValue.timeValueMinutes(5)));
builders.put(Names.WARMER, new ScalingExecutorBuilder(Names.WARMER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5)));
builders.put(Names.SNAPSHOT, new ScalingExecutorBuilder(Names.SNAPSHOT, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5)));
builders.put(
Names.SNAPSHOT_DELETION,
new ScalingExecutorBuilder(Names.SNAPSHOT_DELETION, 1, snapshotDeletionPoolMax, TimeValue.timeValueMinutes(5))
);
builders.put(
Names.FETCH_SHARD_STARTED,
new ScalingExecutorBuilder(Names.FETCH_SHARD_STARTED, 1, 2 * allocatedProcessors, TimeValue.timeValueMinutes(5))
Expand Down

0 comments on commit 05757e2

Please sign in to comment.