From 49a38249bfd65dde315eff497c868796720329ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stig=20Rohde=20D=C3=B8ssing?= Date: Sat, 6 Mar 2021 19:38:43 +0000 Subject: [PATCH] ZOOKEEPER-3781: Create snapshots on followers when snapshot.trust.empty is true MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit snapshot.trust.empty is an escape hatch for users upgrading from 3.4.x to later Zookeeper versions, allowing nodes to start with a non-empty transaction log but no snapshot. The intent is for this setting to be enabled for a short while during the upgrade, and then disabled again, as the check it disables is a safety feature. Prior to this PR, a node would only write a snapshot locally if it became leader, or if it had fallen so far behind the leader that the leader sent a SNAP message instead of a DIFF. This made the upgrade process inconvenient, as not all nodes would create a snapshot when snapshot.trust.empty was true, meaning that the safety check could not be flipped back on. This PR makes follower nodes write a local snapshot when they receive NEWLEADER, if they have no local snapshot and snapshot.trust.empty is true. Author: Stig Rohde Døssing Reviewers: Enrico Olivelli , Damien Diederen Closes #1581 from srdo/zookeeper-3781 --- .../zookeeper/server/ZooKeeperServer.java | 4 ++ .../server/persistence/FileTxnSnapLog.java | 9 ++++ .../zookeeper/server/quorum/Learner.java | 8 ++- .../test/EmptiedSnapshotRecoveryTest.java | 50 +++++++++++++++++++ .../org/apache/zookeeper/test/QuorumUtil.java | 12 ++--- 5 files changed, 76 insertions(+), 7 deletions(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java index 8b3e4924b60..f0e3c822653 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/ZooKeeperServer.java @@ -544,6 +544,10 @@ public void takeSnapshot(boolean syncSnap) { ServerMetrics.getMetrics().SNAPSHOT_TIME.add(elapsed); } + public boolean shouldForceWriteInitialSnapshotAfterLeaderElection() { + return txnLogFactory.shouldForceWriteInitialSnapshotAfterLeaderElection(); + } + @Override public long getDataDirSize() { if (zkDb == null) { diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java index f054bc85d16..a4670ec0253 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java @@ -229,6 +229,15 @@ public SnapshotInfo getLastSnapshotInfo() { return this.snapLog.getLastSnapshotInfo(); } + /** + * whether to force the write of an initial snapshot after a leader election, + * to address ZOOKEEPER-3781 after upgrading from Zookeeper 3.4.x. + * @return true if an initial snapshot should be written even if not otherwise required, false otherwise. + */ + public boolean shouldForceWriteInitialSnapshotAfterLeaderElection() { + return trustEmptySnapshot && getLastSnapshotInfo() == null; + } + /** * this function restores the server * database after reading from the diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java index ef36b2f9000..8f3d00c77ea 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Learner.java @@ -561,7 +561,13 @@ protected void syncWithLeader(long newLeaderZxid) throws Exception { if (qp.getType() == Leader.DIFF) { LOG.info("Getting a diff from the leader 0x{}", Long.toHexString(qp.getZxid())); self.setSyncMode(QuorumPeer.SyncMode.DIFF); - snapshotNeeded = false; + if (zk.shouldForceWriteInitialSnapshotAfterLeaderElection()) { + LOG.info("Forcing a snapshot write as part of upgrading from an older Zookeeper. This should only happen while upgrading."); + snapshotNeeded = true; + syncSnapshot = true; + } else { + snapshotNeeded = false; + } } else if (qp.getType() == Leader.SNAP) { self.setSyncMode(QuorumPeer.SyncMode.SNAP); LOG.info("Getting a snapshot from leader 0x{}", Long.toHexString(qp.getZxid())); diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java index 316d7bf2221..b9020cc9adf 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/test/EmptiedSnapshotRecoveryTest.java @@ -19,11 +19,13 @@ package org.apache.zookeeper.test; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; import java.io.File; import java.io.IOException; import java.io.PrintWriter; +import java.nio.file.Files; import java.util.List; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.PortAssignment; @@ -143,6 +145,54 @@ public void testRestoreWithTrustedEmptySnapFiles() throws Exception { runTest(false, true); } + @Test + public void testRestoreWithTrustedEmptySnapFilesWhenFollowing() throws Exception { + QuorumUtil qu = new QuorumUtil(1); + try { + qu.startAll(); + String connString = qu.getConnectionStringForServer(1); + try (ZooKeeper zk = new ZooKeeper(connString, CONNECTION_TIMEOUT, this)) { + for (int i = 0; i < N_TRANSACTIONS; i++) { + zk.create("/node-" + i, new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + } + } + int leaderIndex = qu.getLeaderServer(); + //Shut down the cluster and delete the snapshots from the followers + for (int i = 1; i <= qu.ALL; i++) { + qu.shutdown(i); + if (i != leaderIndex) { + FileTxnSnapLog txnLogFactory = qu.getPeer(i).peer.getTxnFactory(); + List snapshots = txnLogFactory.findNRecentSnapshots(10); + assertTrue(snapshots.size() > 0, "We have a snapshot to corrupt"); + for (File file : snapshots) { + Files.delete(file.toPath()); + } + assertEquals(txnLogFactory.findNRecentSnapshots(10).size(), 0); + } + } + //Start while trusting empty snapshots, verify that the followers save snapshots + System.setProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY, "true"); + qu.start(leaderIndex); + for (int i = 1; i <= qu.ALL; i++) { + if (i != leaderIndex) { + qu.restart(i); + FileTxnSnapLog txnLogFactory = qu.getPeer(i).peer.getTxnFactory(); + List snapshots = txnLogFactory.findNRecentSnapshots(10); + assertTrue(snapshots.size() > 0, "A snapshot should have been created on follower " + i); + } + } + //Check that the created nodes are still there + try (ZooKeeper zk = new ZooKeeper(connString, CONNECTION_TIMEOUT, this)) { + for (int i = 0; i < N_TRANSACTIONS; i++) { + assertNotNull(zk.exists("/node-" + i, false)); + } + } + } finally { + System.clearProperty(FileTxnSnapLog.ZOOKEEPER_SNAPSHOT_TRUST_EMPTY); + qu.tearDown(); + } + } + public void process(WatchedEvent event) { // do nothing } diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java b/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java index 98153b9acac..ce1cd1b3c5f 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/test/QuorumUtil.java @@ -251,22 +251,22 @@ public void shutdownAll() { public void shutdown(int id) { QuorumPeer qp = getPeer(id).peer; try { - LOG.info("Shutting down quorum peer {}", qp.getName()); + LOG.info("Shutting down quorum peer {} with id {}", qp.getName(), id); qp.shutdown(); Election e = qp.getElectionAlg(); if (e != null) { - LOG.info("Shutting down leader election {}", qp.getName()); + LOG.info("Shutting down leader election {} with id {}", qp.getName(), id); e.shutdown(); } else { - LOG.info("No election available to shutdown {}", qp.getName()); + LOG.info("No election available to shutdown {} with id {}", qp.getName(), id); } - LOG.info("Waiting for {} to exit thread", qp.getName()); + LOG.info("Waiting for {} with id {} to exit thread", qp.getName(), id); qp.join(30000); if (qp.isAlive()) { - fail("QP failed to shutdown in 30 seconds: " + qp.getName()); + fail("QP failed to shutdown in 30 seconds: " + qp.getName() + " " + id); } } catch (InterruptedException e) { - LOG.debug("QP interrupted: {}", qp.getName(), e); + LOG.debug("QP interrupted: {} {}", qp.getName(), id, e); } }