Skip to content

Commit

Permalink
Fix retention leases sync on recovery test
Browse files Browse the repository at this point in the history
This test had a bug. We attempt to allow only the primary to be
allocated, to force all replicas to recovery from the primary after we
had set the state of the retention leases on the primary. However, in
building the index settings, we were overwriting the settings that
exclude the replicas from being allocated. This means that some of the
replicas would end up assigned and rather than receive retention leases
during recovery, they would be part of the replication group receiving
retention leases as they are manipulated. Since retention lease renewals
are only synced periodically, this means that the replica could be
lagging a little behind in some cases leading to an assertion tripping
in the test. This commit addresses this by ensuring that the replicas
are indeed not allocated until after the retention leases are done being
manipulated on the replica. We did this by not overwriting the exclude
settings.

Closes #39105
  • Loading branch information
jasontedor committed Feb 19, 2019
1 parent fbabd81 commit 44fc57f
Showing 1 changed file with 10 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
Expand Down Expand Up @@ -89,7 +89,7 @@ public void testRetentionLeasesSyncedOnAdd() throws Exception {
.getShardOrNull(new ShardId(resolveIndex("index"), 0));
// we will add multiple retention leases and expect to see them synced to all replicas
final int length = randomIntBetween(1, 8);
final Map<String, RetentionLease> currentRetentionLeases = new HashMap<>();
final Map<String, RetentionLease> currentRetentionLeases = new LinkedHashMap<>();
for (int i = 0; i < length; i++) {
final String id = randomValueOtherThanMany(currentRetentionLeases.keySet()::contains, () -> randomAlphaOfLength(8));
final long retainingSequenceNumber = randomLongBetween(0, Long.MAX_VALUE);
Expand Down Expand Up @@ -136,7 +136,7 @@ public void testRetentionLeaseSyncedOnRemove() throws Exception {
.getInstance(IndicesService.class, primaryShardNodeName)
.getShardOrNull(new ShardId(resolveIndex("index"), 0));
final int length = randomIntBetween(1, 8);
final Map<String, RetentionLease> currentRetentionLeases = new HashMap<>();
final Map<String, RetentionLease> currentRetentionLeases = new LinkedHashMap<>();
for (int i = 0; i < length; i++) {
final String id = randomValueOtherThanMany(currentRetentionLeases.keySet()::contains, () -> randomAlphaOfLength(8));
final long retainingSequenceNumber = randomLongBetween(0, Long.MAX_VALUE);
Expand Down Expand Up @@ -277,7 +277,7 @@ public void testBackgroundRetentionLeaseSync() throws Exception {
.getShardOrNull(new ShardId(resolveIndex("index"), 0));
// we will add multiple retention leases and expect to see them synced to all replicas
final int length = randomIntBetween(1, 8);
final Map<String, RetentionLease> currentRetentionLeases = new HashMap<>(length);
final Map<String, RetentionLease> currentRetentionLeases = new LinkedHashMap<>(length);
final List<String> ids = new ArrayList<>(length);
for (int i = 0; i < length; i++) {
final String id = randomValueOtherThanMany(currentRetentionLeases.keySet()::contains, () -> randomAlphaOfLength(8));
Expand Down Expand Up @@ -318,15 +318,15 @@ public void testRetentionLeasesSyncOnRecovery() throws Exception {
internalCluster().ensureAtLeastNumDataNodes(1 + numberOfReplicas);
/*
* We effectively disable the background sync to ensure that the retention leases are not synced in the background so that the only
* source of retention leases on the replicas would be from the commit point and recovery.
* source of retention leases on the replicas would be from recovery.
*/
final Settings settings = Settings.builder()
final Settings.Builder settings = Settings.builder()
.put("index.number_of_shards", 1)
.put("index.number_of_replicas", 0)
.put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), TimeValue.timeValueHours(24))
.build();
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true)
.put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), TimeValue.timeValueHours(24));
// when we increase the number of replicas below we want to exclude the replicas from being allocated so that they do not recover
assertAcked(prepareCreate("index", 1).setSettings(settings));
assertAcked(prepareCreate("index", 1, settings));
ensureYellow("index");
final AcknowledgedResponse response = client().admin()
.indices()
Expand All @@ -339,7 +339,7 @@ public void testRetentionLeasesSyncOnRecovery() throws Exception {
.getInstance(IndicesService.class, primaryShardNodeName)
.getShardOrNull(new ShardId(resolveIndex("index"), 0));
final int length = randomIntBetween(1, 8);
final Map<String, RetentionLease> currentRetentionLeases = new HashMap<>();
final Map<String, RetentionLease> currentRetentionLeases = new LinkedHashMap<>();
for (int i = 0; i < length; i++) {
final String id = randomValueOtherThanMany(currentRetentionLeases.keySet()::contains, () -> randomAlphaOfLength(8));
final long retainingSequenceNumber = randomLongBetween(0, Long.MAX_VALUE);
Expand All @@ -348,10 +348,6 @@ public void testRetentionLeasesSyncOnRecovery() throws Exception {
final ActionListener<ReplicationResponse> listener = ActionListener.wrap(r -> latch.countDown(), e -> fail(e.toString()));
currentRetentionLeases.put(id, primary.addRetentionLease(id, retainingSequenceNumber, source, listener));
latch.await();
/*
* Now renew the leases; since we do not flush immediately on renewal, this means that the latest retention leases will not be
* in the latest commit point and therefore not transferred during the file-copy phase of recovery.
*/
currentRetentionLeases.put(id, primary.renewRetentionLease(id, retainingSequenceNumber, source));
}

Expand Down

0 comments on commit 44fc57f

Please sign in to comment.