From c6fc82437f6627d5c361684c6cd796a87d9f89d1 Mon Sep 17 00:00:00 2001 From: Mate Szalay-Beko Date: Tue, 7 Apr 2020 09:07:50 +0200 Subject: [PATCH] ZOOKEEPER-3769: handling malformed Leader Election notification messages Using ZooKeeper with JDK 12.0.2 on CentOS 7 when the current leader is killed, we saw a few times that some partial Leader Election notification (vote) messages were delivered to the other ZooKeeper servers. The malformed / partial messages are causing different exceptions in the WorkerReceiver thread of FastLeaderElection which were not handled before. This was leading to the death of the WorkerReceiver thread, which caused that the given ZooKeeper Server was unable to receive leader election messages anymore and was not able to re-join to any quorum until it got restarted. In the proposed fix I created unit tests to simulate certain error cases with regards to partial leader election messages, and fixed the error handling in FastLeaderElection. Author: Mate Szalay-Beko Reviewers: Enrico Olivelli , Norbert Kalmar Closes #1300 from symat/ZOOKEEPER-3769-master --- .../server/quorum/FastLeaderElection.java | 115 ++++---- .../FLEMalformedNotificationMessageTest.java | 249 ++++++++++++++++++ 2 files changed, 313 insertions(+), 51 deletions(-) create mode 100644 zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FLEMalformedNotificationMessageTest.java diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/FastLeaderElection.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/FastLeaderElection.java index 3ab10073f4d..967adf05a19 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/FastLeaderElection.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/FastLeaderElection.java @@ -19,6 +19,7 @@ package org.apache.zookeeper.server.quorum; import java.io.IOException; +import java.nio.BufferUnderflowException; import java.nio.ByteBuffer; import java.util.HashMap; import java.util.Map; @@ -237,19 +238,21 @@ public void run() { continue; } + final int capacity = response.buffer.capacity(); + // The current protocol and two previous generations all send at least 28 bytes - if (response.buffer.capacity() < 28) { - LOG.error("Got a short response: {}", response.buffer.capacity()); + if (capacity < 28) { + LOG.error("Got a short response from server {}: {}", response.sid, capacity); continue; } // this is the backwardCompatibility mode in place before ZK-107 // It is for a version of the protocol in which we didn't send peer epoch // With peer epoch and version the message became 40 bytes - boolean backCompatibility28 = (response.buffer.capacity() == 28); + boolean backCompatibility28 = (capacity == 28); // this is the backwardCompatibility mode for no version information - boolean backCompatibility40 = (response.buffer.capacity() == 40); + boolean backCompatibility40 = (capacity == 40); response.buffer.clear(); @@ -263,64 +266,74 @@ public void run() { long rpeerepoch; int version = 0x0; - if (!backCompatibility28) { - rpeerepoch = response.buffer.getLong(); - if (!backCompatibility40) { - /* - * Version added in 3.4.6 - */ + QuorumVerifier rqv = null; - version = response.buffer.getInt(); + try { + if (!backCompatibility28) { + rpeerepoch = response.buffer.getLong(); + if (!backCompatibility40) { + /* + * Version added in 3.4.6 + */ + + version = response.buffer.getInt(); + } else { + LOG.info("Backward compatibility mode (36 bits), server id: {}", response.sid); + } } else { - LOG.info("Backward compatibility mode (36 bits), server id: {}", response.sid); + LOG.info("Backward compatibility mode (28 bits), server id: {}", response.sid); + rpeerepoch = ZxidUtils.getEpochFromZxid(rzxid); } - } else { - LOG.info("Backward compatibility mode (28 bits), server id: {}", response.sid); - rpeerepoch = ZxidUtils.getEpochFromZxid(rzxid); - } - QuorumVerifier rqv = null; + // check if we have a version that includes config. If so extract config info from message. + if (version > 0x1) { + int configLength = response.buffer.getInt(); + + // we want to avoid errors caused by the allocation of a byte array with negative length + // (causing NegativeArraySizeException) or huge length (causing e.g. OutOfMemoryError) + if (configLength < 0 || configLength > capacity) { + throw new IOException(String.format("Invalid configLength in notification message! sid=%d, capacity=%d, version=%d, configLength=%d", + response.sid, capacity, version, configLength)); + } - // check if we have a version that includes config. If so extract config info from message. - if (version > 0x1) { - int configLength = response.buffer.getInt(); - byte[] b = new byte[configLength]; - - response.buffer.get(b); - - synchronized (self) { - try { - rqv = self.configFromString(new String(b)); - QuorumVerifier curQV = self.getQuorumVerifier(); - if (rqv.getVersion() > curQV.getVersion()) { - LOG.info("{} Received version: {} my version: {}", - self.getId(), - Long.toHexString(rqv.getVersion()), - Long.toHexString(self.getQuorumVerifier().getVersion())); - if (self.getPeerState() == ServerState.LOOKING) { - LOG.debug("Invoking processReconfig(), state: {}", self.getServerState()); - self.processReconfig(rqv, null, null, false); - if (!rqv.equals(curQV)) { - LOG.info("restarting leader election"); - self.shuttingDownLE = true; - self.getElectionAlg().shutdown(); - - break; + byte[] b = new byte[configLength]; + response.buffer.get(b); + + synchronized (self) { + try { + rqv = self.configFromString(new String(b)); + QuorumVerifier curQV = self.getQuorumVerifier(); + if (rqv.getVersion() > curQV.getVersion()) { + LOG.info("{} Received version: {} my version: {}", + self.getId(), + Long.toHexString(rqv.getVersion()), + Long.toHexString(self.getQuorumVerifier().getVersion())); + if (self.getPeerState() == ServerState.LOOKING) { + LOG.debug("Invoking processReconfig(), state: {}", self.getServerState()); + self.processReconfig(rqv, null, null, false); + if (!rqv.equals(curQV)) { + LOG.info("restarting leader election"); + self.shuttingDownLE = true; + self.getElectionAlg().shutdown(); + + break; + } + } else { + LOG.debug("Skip processReconfig(), state: {}", self.getServerState()); } - } else { - LOG.debug("Skip processReconfig(), state: {}", self.getServerState()); } + } catch (IOException | ConfigException e) { + LOG.error("Something went wrong while processing config received from {}", response.sid); } - } catch (IOException e) { - LOG.error("Something went wrong while processing config received from {}", response.sid); - } catch (ConfigException e) { - LOG.error("Something went wrong while processing config received from {}", response.sid); } + } else { + LOG.info("Backward compatibility mode (before reconfig), server id: {}", response.sid); } - } else { - LOG.info("Backward compatibility mode (before reconfig), server id: {}", response.sid); + } catch (BufferUnderflowException | IOException e) { + LOG.warn("Skipping the processing of a partial / malformed response message sent by sid={} (message length: {})", + response.sid, capacity, e); + continue; } - /* * If it is from a non-voting server (such as an observer or * a non-voting follower), respond right away. diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FLEMalformedNotificationMessageTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FLEMalformedNotificationMessageTest.java new file mode 100644 index 00000000000..8465c9ee55d --- /dev/null +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/quorum/FLEMalformedNotificationMessageTest.java @@ -0,0 +1,249 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zookeeper.server.quorum; + +import java.io.File; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.nio.ByteBuffer; +import java.util.HashMap; +import org.apache.zookeeper.PortAssignment; +import org.apache.zookeeper.ZKTestCase; +import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer; +import org.apache.zookeeper.server.quorum.QuorumPeer.ServerState; +import org.apache.zookeeper.test.ClientBase; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +public class FLEMalformedNotificationMessageTest extends ZKTestCase { + private static final Logger LOG = LoggerFactory.getLogger(FLEMalformedNotificationMessageTest.class); + private static final byte[] CONFIG_BYTES = "my very invalid config string".getBytes(); + private static final int CONFIG_BYTES_LENGTH = CONFIG_BYTES.length; + + int count; + HashMap peers; + File tmpdir[]; + int port[]; + + QuorumCnxManager mockCnxManager; + FLETestUtils.LEThread leaderElectionThread; + QuorumPeer peerRunningLeaderElection; + + + @Before + public void setUp() throws Exception { + count = 3; + + peers = new HashMap<>(count); + tmpdir = new File[count]; + port = new int[count]; + + LOG.info("FLEMalformedNotificationMessageTest: {}, {}", getTestName(), count); + for (int i = 0; i < count; i++) { + int clientport = PortAssignment.unique(); + peers.put((long) i, + new QuorumServer(i, + new InetSocketAddress(clientport), + new InetSocketAddress(PortAssignment.unique()))); + tmpdir[i] = ClientBase.createTmpDir(); + port[i] = clientport; + } + + /* + * Start server 0 + */ + peerRunningLeaderElection = new QuorumPeer(peers, tmpdir[0], tmpdir[0], port[0], 3, 0, 1000, 2, 2, 2); + peerRunningLeaderElection.startLeaderElection(); + leaderElectionThread = new FLETestUtils.LEThread(peerRunningLeaderElection, 0); + leaderElectionThread.start(); + } + + + @After + public void tearDown() throws Exception { + peerRunningLeaderElection.shutdown(); + mockCnxManager.halt(); + } + + + @Test + public void testTooShortPartialNotificationMessage() throws Exception { + + /* + * Start mock server 1, send a message too short to be compatible with any protocol version + * This simulates the case when only some parts of the whole message is received. + */ + startMockServer(1); + byte requestBytes[] = new byte[12]; + ByteBuffer requestBuffer = ByteBuffer.wrap(requestBytes); + requestBuffer.clear(); + requestBuffer.putInt(ServerState.LOOKING.ordinal()); // state + requestBuffer.putLong(0); // leader + mockCnxManager.toSend(0L, requestBuffer); + + /* + * Assert that the message receiver thread in leader election is still healthy: + * we are sending valid votes and waiting for the leader election to be finished. + */ + sendValidNotifications(1, 0); + leaderElectionThread.join(5000); + if (leaderElectionThread.isAlive()) { + Assert.fail("Leader election thread didn't join, something went wrong."); + } + } + + + @Test + public void testNotificationMessageWithNegativeConfigLength() throws Exception { + + /* + * Start mock server 1, send a message with negative configLength field + */ + startMockServer(1); + byte requestBytes[] = new byte[48]; + ByteBuffer requestBuffer = ByteBuffer.wrap(requestBytes); + requestBuffer.clear(); + requestBuffer.putInt(ServerState.LOOKING.ordinal()); // state + requestBuffer.putLong(0); // leader + requestBuffer.putLong(0); // zxid + requestBuffer.putLong(0); // electionEpoch + requestBuffer.putLong(0); // epoch + requestBuffer.putInt(FastLeaderElection.Notification.CURRENTVERSION); // version + requestBuffer.putInt(-123); // configData.length + mockCnxManager.toSend(0L, requestBuffer); + + /* + * Assert that the message receiver thread in leader election is still healthy: + * we are sending valid votes and waiting for the leader election to be finished. + */ + sendValidNotifications(1, 0); + leaderElectionThread.join(5000); + if (leaderElectionThread.isAlive()) { + Assert.fail("Leader election thread didn't join, something went wrong."); + } + } + + + @Test + public void testNotificationMessageWithInvalidConfigLength() throws Exception { + + /* + * Start mock server 1, send a message with an invalid configLength field + * (instead of sending CONFIG_BYTES_LENGTH, we send 10000) + */ + startMockServer(1); + byte requestBytes[] = new byte[48 + CONFIG_BYTES_LENGTH]; + ByteBuffer requestBuffer = ByteBuffer.wrap(requestBytes); + requestBuffer.clear(); + requestBuffer.putInt(ServerState.LOOKING.ordinal()); // state + requestBuffer.putLong(0); // leader + requestBuffer.putLong(0); // zxid + requestBuffer.putLong(0); // electionEpoch + requestBuffer.putLong(0); // epoch + requestBuffer.putInt(FastLeaderElection.Notification.CURRENTVERSION); // version + requestBuffer.putInt(10000); // configData.length + requestBuffer.put(CONFIG_BYTES); // configData + mockCnxManager.toSend(0L, requestBuffer); + + /* + * Assert that the message receiver thread in leader election is still healthy: + * we are sending valid votes and waiting for the leader election to be finished. + */ + sendValidNotifications(1, 0); + leaderElectionThread.join(5000); + if (leaderElectionThread.isAlive()) { + Assert.fail("Leader election thread didn't join, something went wrong."); + } + } + + + @Test + public void testNotificationMessageWithInvalidConfig() throws Exception { + + /* + * Start mock server 1, send a message with an invalid config field + * (the receiver should not be able to parse the config part of the message) + */ + startMockServer(1); + ByteBuffer requestBuffer = FastLeaderElection.buildMsg(ServerState.LOOKING.ordinal(), 1, 0, 0, 0, CONFIG_BYTES); + mockCnxManager.toSend(0L, requestBuffer); + + /* + * Assert that the message receiver thread in leader election is still healthy: + * we are sending valid votes and waiting for the leader election to be finished. + */ + sendValidNotifications(1, 0); + leaderElectionThread.join(5000); + if (leaderElectionThread.isAlive()) { + Assert.fail("Leader election thread didn't join, something went wrong."); + } + } + + + @Test + public void testNotificationMessageWithBadProtocol() throws Exception { + + /* + * Start mock server 1, send an invalid 30 bytes long message + * (the receiver should not be able to parse the message and should skip it) + * This simulates the case when only some parts of the whole message is received. + */ + startMockServer(1); + byte requestBytes[] = new byte[30]; + ByteBuffer requestBuffer = ByteBuffer.wrap(requestBytes); + requestBuffer.clear(); + requestBuffer.putInt(ServerState.LOOKING.ordinal()); // state + requestBuffer.putLong(1); // leader + requestBuffer.putLong(0); // zxid + requestBuffer.putLong(0); // electionEpoch + requestBuffer.putShort((short) 0); // this is the first two bytes of a proper + // 8 bytes Long we should send here + mockCnxManager.toSend(0L, requestBuffer); + + /* + * Assert that the message receiver thread in leader election is still healthy: + * we are sending valid votes and waiting for the leader election to be finished. + */ + sendValidNotifications(1, 0); + leaderElectionThread.join(5000); + if (leaderElectionThread.isAlive()) { + Assert.fail("Leader election thread didn't join, something went wrong."); + } + } + + + void startMockServer(int sid) throws IOException { + QuorumPeer peer = new QuorumPeer(peers, tmpdir[sid], tmpdir[sid], port[sid], 3, sid, 1000, 2, 2, 2); + mockCnxManager = peer.createCnxnManager(); + mockCnxManager.listener.start(); + } + + + void sendValidNotifications(int fromSid, int toSid) throws InterruptedException { + mockCnxManager.toSend((long) toSid, FLETestUtils.createMsg(ServerState.LOOKING.ordinal(), fromSid, 0, 0)); + mockCnxManager.recvQueue.take(); + mockCnxManager.toSend((long) toSid, FLETestUtils.createMsg(ServerState.FOLLOWING.ordinal(), toSid, 0, 0)); + } + +}