-
Notifications
You must be signed in to change notification settings - Fork 15
[TS | LIP-164000] Reset Sweep Progress #5277
Changes from 5 commits
37a263a
0b25f25
cbcf110
ac7e542
ca30262
b55339b
651491f
44effec
348ca54
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,6 +52,7 @@ | |
public class TargetedSweeper implements MultiTableSweepQueueWriter, BackgroundSweeper { | ||
private static final Logger log = LoggerFactory.getLogger(TargetedSweeper.class); | ||
|
||
private final boolean shouldResetAndStopSweep; | ||
private final Supplier<TargetedSweepRuntimeConfig> runtime; | ||
private final List<Follower> followers; | ||
private final MetricsManager metricsManager; | ||
|
@@ -76,6 +77,7 @@ private TargetedSweeper( | |
this.conservativeScheduler = | ||
new BackgroundSweepScheduler(install.conservativeThreads(), SweeperStrategy.CONSERVATIVE); | ||
this.thoroughScheduler = new BackgroundSweepScheduler(install.thoroughThreads(), SweeperStrategy.THOROUGH); | ||
this.shouldResetAndStopSweep = install.resetTargetedSweepQueueProgressAndStopSweep(); | ||
this.followers = followers; | ||
this.metricsConfiguration = install.metricsConfiguration(); | ||
} | ||
|
@@ -175,8 +177,15 @@ public void initializeWithoutRunning( | |
@Override | ||
public void runInBackground() { | ||
assertInitialized(); | ||
conservativeScheduler.scheduleBackgroundThreads(); | ||
thoroughScheduler.scheduleBackgroundThreads(); | ||
if (shouldResetAndStopSweep) { | ||
log.warn("This AtlasDB node is operating in a mode where it is attempting to reset the progress of " | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this really what we want? We could just allow it to start sweeping. Also the nodes on old versions will still be attempting to sweep anyway There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Discussed offline: this centers around the behaviour of targeted sweep where nodes CAS the bound from (thing I read -> my progress) repeatedly. We need to wait for all nodes to report that they're done with this. |
||
+ "targeted sweep. While in this mode, your data is not getting swept: please restart your node " | ||
+ "once it is confirmed that sweep progress has been reset."); | ||
queue.resetSweepProgress(); | ||
} else { | ||
conservativeScheduler.scheduleBackgroundThreads(); | ||
thoroughScheduler.scheduleBackgroundThreads(); | ||
} | ||
} | ||
|
||
@Override | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
package com.palantir.atlasdb.sweep.queue; | ||
|
||
import static org.assertj.core.api.Assertions.assertThat; | ||
import static org.assertj.core.api.Assertions.assertThatCode; | ||
import static org.assertj.core.api.Assertions.assertThatThrownBy; | ||
import static org.mockito.ArgumentMatchers.any; | ||
import static org.mockito.ArgumentMatchers.anyMap; | ||
|
@@ -37,6 +38,7 @@ | |
|
||
public class ShardProgressTest { | ||
private static final long INITIAL_TIMESTAMP = SweepQueueUtils.INITIAL_TIMESTAMP; | ||
private static final long RESET_TIMESTAMP = SweepQueueUtils.RESET_TIMESTAMP; | ||
|
||
private ShardProgress progress; | ||
private KeyValueService kvs; | ||
|
@@ -186,6 +188,46 @@ public void repeatedlyFailingCasThrows() { | |
.isInstanceOf(CheckAndSetException.class); | ||
} | ||
|
||
@Test | ||
public void canResetProgressForSpecificShards() { | ||
progress.updateLastSweptTimestamp(CONSERVATIVE_TEN, 8888L); | ||
progress.updateLastSweptTimestamp(CONSERVATIVE_TWENTY, 8888L); | ||
assertThat(progress.getLastSweptTimestamp(CONSERVATIVE_TEN)).isEqualTo(8888L); | ||
assertThat(progress.getLastSweptTimestamp(CONSERVATIVE_TWENTY)).isEqualTo(8888L); | ||
|
||
progress.resetProgressForShard(CONSERVATIVE_TEN); | ||
assertThat(progress.getLastSweptTimestamp(CONSERVATIVE_TEN)).isEqualTo(RESET_TIMESTAMP); | ||
assertThat(progress.getLastSweptTimestamp(CONSERVATIVE_TWENTY)).isEqualTo(8888L); | ||
} | ||
|
||
@Test | ||
public void stopsTryingToResetIfSomeoneElseDid() { | ||
KeyValueService mockKvs = mock(KeyValueService.class); | ||
when(mockKvs.get(any(), anyMap())) | ||
.thenReturn(ImmutableMap.of(DUMMY, createValue(8L))) | ||
.thenReturn(ImmutableMap.of(DUMMY, createValue(4L))) | ||
.thenReturn(ImmutableMap.of(DUMMY, createValue(RESET_TIMESTAMP))); | ||
doThrow(new CheckAndSetException("sadness")).when(mockKvs).checkAndSet(any()); | ||
ShardProgress instrumentedProgress = new ShardProgress(mockKvs); | ||
|
||
assertThatCode(() -> instrumentedProgress.resetProgressForShard(CONSERVATIVE_TEN)) | ||
.doesNotThrowAnyException(); | ||
} | ||
|
||
@Test | ||
public void repeatedlyFailingCasThrowsForReset() { | ||
KeyValueService mockKvs = mock(KeyValueService.class); | ||
when(mockKvs.get(any(), anyMap())) | ||
.thenReturn(ImmutableMap.of(DUMMY, createValue(8L))) | ||
.thenReturn(ImmutableMap.of(DUMMY, createValue(9L))) | ||
.thenReturn(ImmutableMap.of(DUMMY, createValue(10L))); | ||
doThrow(new CheckAndSetException("sadness")).when(mockKvs).checkAndSet(any()); | ||
ShardProgress instrumentedProgress = new ShardProgress(mockKvs); | ||
|
||
assertThatCode(() -> instrumentedProgress.resetProgressForShard(CONSERVATIVE_TEN)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably want to verify that it only throws once the value actually repeats There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, I think this is tested by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OH I meant just verify that the method was called 4 times There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added! |
||
.isInstanceOf(CheckAndSetException.class); | ||
} | ||
|
||
private Value createValue(long num) { | ||
SweepShardProgressTable.Value value = SweepShardProgressTable.Value.of(num); | ||
return Value.create(value.persistValue(), 0L); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
type: improvement | ||
improvement: | ||
description: |- | ||
Targeted sweep progress may be reset with the `resetTargetedSweepQueueProgressAndStopSweep` flag in targeted sweep install configuration. This may be useful in cleaning up cruft in the targeted sweep queue that may have been written by failed transactions. | ||
|
||
As the name suggests, this will prevent sweep from cleaning up old cells, so users should not run with this configuration in the steady state. If running your service in HA, once the last node rolls and reports that it has successfully reset the sweep progress table, we can be certain that progress has been reset to zero. | ||
links: | ||
- https://github.com/palantir/atlasdb/pull/5277 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Eh, not a fan of the code duplication, but it's different enough to make it awkward to reuse. We could just delete the entry to make this much simpler but I assume we want HA -- not that sweep will work if we don't have delete consistency anyway...