Skip to content

Commit

Permalink
Log a msg when shard snapshot sees interrupt (#116364)
Browse files Browse the repository at this point in the history
Adds debug-level logging when a shard snapshot sees interrupt. The
case we're interested in is shard snapshot pausing during shutdown
(also the only time we pause snapshots). Snapshot abort will also be
caught and logged if there's an async error during snapshotting, but
this should be uncommon.

Relates ES-8773
  • Loading branch information
DiannaHohensee authored Nov 14, 2024
1 parent fc67f7c commit 7fef1cd
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,11 @@ public ShardSnapshotResult getShardSnapshotResult() {
}

public void ensureNotAborted() {
switch (stage.get()) {
ensureNotAborted(stage.get());
}

public static void ensureNotAborted(Stage shardSnapshotStage) {
switch (shardSnapshotStage) {
case ABORTED -> throw new AbortedSnapshotException();
case PAUSING -> throw new PausedSnapshotException();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,11 @@
public abstract class BlobStoreRepository extends AbstractLifecycleComponent implements Repository {
private static final Logger logger = LogManager.getLogger(BlobStoreRepository.class);

private class ShutdownLogger {
// Creating a separate logger so that the log-level can be manipulated separately from the parent class.
private static final Logger shutdownLogger = LogManager.getLogger(ShutdownLogger.class);
}

protected volatile RepositoryMetadata metadata;

protected final ThreadPool threadPool;
Expand Down Expand Up @@ -3467,10 +3472,37 @@ private void doSnapshotShard(SnapshotShardContext context) {
}

private static void ensureNotAborted(ShardId shardId, SnapshotId snapshotId, IndexShardSnapshotStatus snapshotStatus, String fileName) {
var shardSnapshotStage = snapshotStatus.getStage();
try {
snapshotStatus.ensureNotAborted();
IndexShardSnapshotStatus.ensureNotAborted(shardSnapshotStage);

if (shardSnapshotStage != IndexShardSnapshotStatus.Stage.INIT && shardSnapshotStage != IndexShardSnapshotStatus.Stage.STARTED) {
// A normally running shard snapshot should be in stage INIT or STARTED. And we know it's not in PAUSING or ABORTED because
// the ensureNotAborted() call above did not throw. The remaining options don't make sense, if they ever happen.
logger.error(
() -> Strings.format(
"Shard snapshot found an unexpected state. ShardId [{}], SnapshotID [{}], Stage [{}]",
shardId,
snapshotId,
shardSnapshotStage
)
);
assert false;
}
} catch (Exception e) {
logger.debug("[{}] [{}] {} on the file [{}], exiting", shardId, snapshotId, e.getMessage(), fileName);
// We want to see when a shard snapshot operation checks for and finds an interrupt signal during shutdown. A
// PausedSnapshotException indicates we're in shutdown because that's the only case when shard snapshots are signaled to pause.
// An AbortedSnapshotException may also occur during shutdown if an uncommon error occurs.
ShutdownLogger.shutdownLogger.debug(
() -> Strings.format(
"Shard snapshot operation is aborting. ShardId [%s], SnapshotID [%s], File [%s], Stage [%s]",
shardId,
snapshotId,
fileName,
shardSnapshotStage
),
e
);
assert e instanceof AbortedSnapshotException || e instanceof PausedSnapshotException : e;
throw e;
}
Expand Down

0 comments on commit 7fef1cd

Please sign in to comment.