Skip to content

Commit

Permalink
ZOOKEEPER-3781: Create snapshots on followers when snapshot.trust.emp…
Browse files Browse the repository at this point in the history
…ty is true (#111)

snapshot.trust.empty is an escape hatch for users upgrading from 3.4.x to later Zookeeper
versions, allowing nodes to start with a non-empty transaction log but no snapshot.

The intent is for this setting to be enabled for a short while during the upgrade,
and then disabled again, as the check it disables is a safety feature.

Prior to this PR, a node would only write a snapshot locally if it became leader,
or if it had fallen so far behind the leader that the leader sent a SNAP message instead
of a DIFF. This made the upgrade process inconvenient, as not all nodes would create
a snapshot when snapshot.trust.empty was true, meaning that the safety check could
not be flipped back on.

This PR makes follower nodes write a local snapshot when they receive NEWLEADER,
if they have no local snapshot and snapshot.trust.empty is true.

Author: Stig Rohde Døssing <[email protected]>

Reviewers: Enrico Olivelli <[email protected]>, Damien Diederen <[email protected]>

Closes apache#1581 from srdo/zookeeper-3781

Co-authored-by: Stig Rohde Døssing <[email protected]>
  • Loading branch information
anuragmadnawat1 and Stig Rohde Døssing authored Nov 2, 2022
1 parent 907875f commit 1650110
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,10 @@ public void takeSnapshot(boolean syncSnap) {
ServerMetrics.getMetrics().SNAPSHOT_TIME.add(elapsed);
}

public boolean shouldForceWriteInitialSnapshotAfterLeaderElection() {
return txnLogFactory.shouldForceWriteInitialSnapshotAfterLeaderElection();
}

@Override
public long getDataDirSize() {
if (zkDb == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,15 @@ public SnapshotInfo getLastSnapshotInfo() {
return this.snapLog.getLastSnapshotInfo();
}

/**
* whether to force the write of an initial snapshot after a leader election,
* to address ZOOKEEPER-3781 after upgrading from Zookeeper 3.4.x.
* @return true if an initial snapshot should be written even if not otherwise required, false otherwise.
*/
public boolean shouldForceWriteInitialSnapshotAfterLeaderElection() {
return trustEmptySnapshot && getLastSnapshotInfo() == null;
}

/**
* this function restores the server
* database after reading from the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,13 @@ protected void syncWithLeader(long newLeaderZxid) throws Exception {
if (qp.getType() == Leader.DIFF) {
LOG.info("Getting a diff from the leader 0x{}", Long.toHexString(qp.getZxid()));
self.setSyncMode(QuorumPeer.SyncMode.DIFF);
snapshotNeeded = false;
if (zk.shouldForceWriteInitialSnapshotAfterLeaderElection()) {
LOG.info("Forcing a snapshot write as part of upgrading from an older Zookeeper. This should only happen while upgrading.");
snapshotNeeded = true;
syncSnapshot = true;
} else {
snapshotNeeded = false;
}
} else if (qp.getType() == Leader.SNAP) {
self.setSyncMode(QuorumPeer.SyncMode.SNAP);
LOG.info("Getting a snapshot from leader 0x{}", Long.toHexString(qp.getZxid()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@
package org.apache.zookeeper.test;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.util.List;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.PortAssignment;
Expand Down Expand Up @@ -143,6 +145,54 @@ public void testRestoreWithTrustedEmptySnapFiles() throws Exception {
runTest(false, true);
}

@Test
public void testRestoreWithTrustedEmptySnapFilesWhenFollowing() throws Exception {
QuorumUtil qu = new QuorumUtil(1);
try {
qu.startAll();
String connString = qu.getConnectionStringForServer(1);
try (ZooKeeper zk = new ZooKeeper(connString, CONNECTION_TIMEOUT, this)) {
for (int i = 0; i < N_TRANSACTIONS; i++) {
zk.create("/node-" + i, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
}
}
int leaderIndex = qu.getLeaderServer();
//Shut down the cluster and delete the snapshots from the followers
for (int i = 1; i <= qu.ALL; i++) {
qu.shutdown(i);
if (i != leaderIndex) {
FileTxnSnapLog txnLogFactory = qu.getPeer(i).peer.getTxnFactory();
List<File> snapshots = txnLogFactory.findNRecentSnapshots(10);
assertTrue(snapshots.size() > 0, "We have a snapshot to corrupt");
for (File file : snapshots) {
Files.delete(file.toPath());
}
assertEquals(txnLogFactory.findNRecentSnapshots(10).size(), 0);
}
}
//Start while trusting empty snapshots, verify that the followers save snapshots
System.setProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY, "true");
qu.start(leaderIndex);
for (int i = 1; i <= qu.ALL; i++) {
if (i != leaderIndex) {
qu.restart(i);
FileTxnSnapLog txnLogFactory = qu.getPeer(i).peer.getTxnFactory();
List<File> snapshots = txnLogFactory.findNRecentSnapshots(10);
assertTrue(snapshots.size() > 0, "A snapshot should have been created on follower " + i);
}
}
//Check that the created nodes are still there
try (ZooKeeper zk = new ZooKeeper(connString, CONNECTION_TIMEOUT, this)) {
for (int i = 0; i < N_TRANSACTIONS; i++) {
assertNotNull(zk.exists("/node-" + i, false));
}
}
} finally {
System.clearProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY);
qu.tearDown();
}
}

public void process(WatchedEvent event) {
// do nothing
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,22 +251,22 @@ public void shutdownAll() {
public void shutdown(int id) {
QuorumPeer qp = getPeer(id).peer;
try {
LOG.info("Shutting down quorum peer {}", qp.getName());
LOG.info("Shutting down quorum peer {} with id {}", qp.getName(), id);
qp.shutdown();
Election e = qp.getElectionAlg();
if (e != null) {
LOG.info("Shutting down leader election {}", qp.getName());
LOG.info("Shutting down leader election {} with id {}", qp.getName(), id);
e.shutdown();
} else {
LOG.info("No election available to shutdown {}", qp.getName());
LOG.info("No election available to shutdown {} with id {}", qp.getName(), id);
}
LOG.info("Waiting for {} to exit thread", qp.getName());
LOG.info("Waiting for {} with id {} to exit thread", qp.getName(), id);
qp.join(30000);
if (qp.isAlive()) {
fail("QP failed to shutdown in 30 seconds: " + qp.getName());
fail("QP failed to shutdown in 30 seconds: " + qp.getName() + " " + id);
}
} catch (InterruptedException e) {
LOG.debug("QP interrupted: {}", qp.getName(), e);
LOG.debug("QP interrupted: {} {}", qp.getName(), id, e);
}
}

Expand Down

0 comments on commit 1650110

Please sign in to comment.