Skip to content

Commit

Permalink
Skip TRANSLOG stage for searchable snapshots recovery stage
Browse files Browse the repository at this point in the history
This commit introduces a change where searchable snapshots
skip the TRANSLOG stage. Since elastic#65531 was introduced, the
cleanFiles peer recovery stage is blocked until the prewarming
completes (this is done to avoid search latency spikes due to a cold
cache). In that phase, the RecoveryState stage is
TRANSLOG which can be confusing as we don't replay
any ops during searchable snapshots recoveries. In order
to avoid that confusion we transition directly to
FINALIZE stage.
  • Loading branch information
fcofdez committed Jan 19, 2021
1 parent c2c40d7 commit 8ad5be6
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,12 @@ private void doRecovery(final long recoveryId, final StartRecoveryRequest preExi
logger.trace("{} preparing shard for peer recovery", recoveryTarget.shardId());
indexShard.prepareForIndexRecovery();
final long startingSeqNo = indexShard.recoverLocallyUpToGlobalCheckpoint();
assert startingSeqNo == UNASSIGNED_SEQ_NO || recoveryTarget.state().getStage() == RecoveryState.Stage.TRANSLOG :
final RecoveryState state = recoveryTarget.state();
assert startingSeqNo == UNASSIGNED_SEQ_NO ||
(state.getIndexType() == RecoveryState.IndexType.REGULAR && state.getStage() == RecoveryState.Stage.TRANSLOG) ||
// Searchable snapshots skip the TRANSLOG recovery state stage
(state.getIndexType() == RecoveryState.IndexType.SEARCHABLE_SNAPSHOT &&
(state.getStage() == RecoveryState.Stage.FINALIZE || state.getStage() == RecoveryState.Stage.DONE) ) :
"unexpected recovery stage [" + recoveryTarget.state().getStage() + "] starting seqno [ " + startingSeqNo + "]";
startRequest = getStartRecoveryRequest(logger, clusterService.localNode(), recoveryTarget, startingSeqNo);
requestToSend = startRequest;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,11 @@ public static Stage fromId(byte id) {
}
}

public enum IndexType {
REGULAR,
SEARCHABLE_SNAPSHOT;
}

private Stage stage;

private final Index index;
Expand Down Expand Up @@ -177,6 +182,9 @@ public synchronized Stage getStage() {
return this.stage;
}

public IndexType getIndexType() {
return IndexType.REGULAR;
}

protected void validateAndSetStage(Stage expected, Stage next) {
if (stage != expected) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public void testRelocationWaitsForPreWarm() throws Exception {
assertEquals(secondDataNode, shardRecoveryState.getTargetNode().getName());
});

assertBusy(() -> assertSame(RecoveryState.Stage.TRANSLOG, getActiveRelocations(restoredIndex).get(0).getStage()));
assertBusy(() -> assertSame(RecoveryState.Stage.FINALIZE, getActiveRelocations(restoredIndex).get(0).getStage()));
final Index restoredIdx = clusterAdmin().prepareState().get().getState().metadata().index(restoredIndex).getIndex();
final IndicesService indicesService = internalCluster().getInstance(IndicesService.class, secondDataNode);
assertEquals(1, indicesService.indexService(restoredIdx).getShard(0).outstandingCleanFilesConditions());
Expand Down Expand Up @@ -123,9 +123,7 @@ private static List<RecoveryState> getActiveRelocations(String restoredIndex) {
.shardRecoveryStates()
.get(restoredIndex)
.stream()
// filter for relocations that are not in stage FINALIZE (they could end up in this stage without progress for good if the
// target node does not have enough cache space available to hold the primary completely
.filter(recoveryState -> recoveryState.getSourceNode() != null && recoveryState.getStage() != RecoveryState.Stage.FINALIZE)
.filter(recoveryState -> recoveryState.getSourceNode() != null)
.collect(Collectors.toList());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,32 @@ public synchronized RecoveryState setStage(Stage stage) {
return this;
}

return super.setStage(stage);
switch (stage) {
case TRANSLOG:
super.setStage(Stage.TRANSLOG);
// Move directly to PRE_WARMING stage
validateAndSetStage(Stage.TRANSLOG, Stage.FINALIZE);
getTranslog().stop();
break;
case FINALIZE:
// skip
break;
default:
super.setStage(stage);
}

return this;
}

@Override
public IndexType getIndexType() {
return IndexType.SEARCHABLE_SNAPSHOT;
}

@Override
public synchronized void validateCurrentStage(Stage expected) {
// We skip the TRANSLOG phase for searchable snapshots
super.validateCurrentStage(expected == Stage.TRANSLOG ? Stage.FINALIZE : expected);
}

public synchronized void setPreWarmComplete() {
Expand Down

0 comments on commit 8ad5be6

Please sign in to comment.