-
Notifications
You must be signed in to change notification settings - Fork 14.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
KAFKA-9654 ReplicaAlterLogDirsThread can't be created again if the pr… #8223
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -209,6 +209,58 @@ class ReplicaManagerTest { | |
} | ||
} | ||
|
||
@Test | ||
def testFencedErrorCausedByBecomeLeader(): Unit = { | ||
val replicaManager = setupReplicaManagerWithMockedPurgatories(new MockTimer) | ||
try { | ||
val brokerList = Seq[Integer](0, 1).asJava | ||
val topicPartition = new TopicPartition(topic, 0) | ||
replicaManager.createPartition(topicPartition) | ||
.createLogIfNotExists(0, isNew = false, isFutureReplica = false, | ||
new LazyOffsetCheckpoints(replicaManager.highWatermarkCheckpoints)) | ||
|
||
def leaderAndIsrRequest(epoch: Int): LeaderAndIsrRequest = new LeaderAndIsrRequest.Builder(ApiKeys.LEADER_AND_ISR.latestVersion, 0, 0, brokerEpoch, | ||
Seq(new LeaderAndIsrPartitionState() | ||
.setTopicName(topic) | ||
.setPartitionIndex(0) | ||
.setControllerEpoch(0) | ||
.setLeader(0) | ||
.setLeaderEpoch(epoch) | ||
.setIsr(brokerList) | ||
.setZkVersion(0) | ||
.setReplicas(brokerList) | ||
.setIsNew(true)).asJava, | ||
Set(new Node(0, "host1", 0), new Node(1, "host2", 1)).asJava).build() | ||
|
||
replicaManager.becomeLeaderOrFollower(0, leaderAndIsrRequest(0), (_, _) => ()) | ||
val partition = replicaManager.getPartitionOrException(new TopicPartition(topic, 0), expectLeader = true) | ||
.localLogOrException | ||
assertEquals(1, replicaManager.logManager.liveLogDirs.filterNot(_ == partition.dir.getParentFile).size) | ||
|
||
// find the live and different folder | ||
val newReplicaFolder = replicaManager.logManager.liveLogDirs.filterNot(_ == partition.dir.getParentFile).head | ||
assertEquals(0, replicaManager.replicaAlterLogDirsManager.fetcherThreadMap.size) | ||
replicaManager.alterReplicaLogDirs(Map(topicPartition -> newReplicaFolder.getAbsolutePath)) | ||
replicaManager.futureLocalLogOrException(topicPartition) | ||
assertEquals(1, replicaManager.replicaAlterLogDirsManager.fetcherThreadMap.size) | ||
// change the epoch from 0 to 1 in order to make fenced error | ||
replicaManager.becomeLeaderOrFollower(0, leaderAndIsrRequest(1), (_, _) => ()) | ||
TestUtils.waitUntilTrue(() => replicaManager.replicaAlterLogDirsManager.fetcherThreadMap.values.forall(_.partitionCount() == 0), | ||
s"the partition=$topicPartition should be removed from pending state") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @chia7712 In our IC we are consistently getting a failure in this check. Do you have any suggestion on what is happening and how to fix it? Error Message
Stacktrace
Standard Output
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jsancio Thanks for this report. Let me dig in it :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seems there is a ticket (https://issues.apache.org/jira/browse/KAFKA-9750) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks. I update the Jira with the same information above. |
||
// the partition is added to failedPartitions if fenced error happens | ||
// if the thread is done before ReplicaManager#becomeLeaderOrFollower updates epoch,the fenced error does | ||
// not happen and failedPartitions is empty. | ||
if (replicaManager.replicaAlterLogDirsManager.failedPartitions.size != 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be an assertion? Is the test not deterministic for some reason? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the thread is done before ReplicaManager#becomeLeaderOrFollower, the faced error does not happen. |
||
replicaManager.replicaAlterLogDirsManager.shutdownIdleFetcherThreads() | ||
assertEquals(0, replicaManager.replicaAlterLogDirsManager.fetcherThreadMap.size) | ||
// send request again | ||
replicaManager.alterReplicaLogDirs(Map(topicPartition -> newReplicaFolder.getAbsolutePath)) | ||
// the future folder exists so it fails to invoke thread | ||
assertEquals(1, replicaManager.replicaAlterLogDirsManager.fetcherThreadMap.size) | ||
} | ||
} finally replicaManager.shutdown(checkpointHW = false) | ||
} | ||
|
||
@Test | ||
def testReceiveOutOfOrderSequenceExceptionWithLogStartOffset(): Unit = { | ||
val timer = new MockTimer | ||
|
@@ -1279,6 +1331,7 @@ class ReplicaManagerTest { | |
isFuture = false)).once | ||
} | ||
EasyMock.expect(mockLogMgr.initializingLog(topicPartitionObj)).anyTimes | ||
EasyMock.expect(mockLogMgr.getLog(topicPartitionObj, isFuture = true)).andReturn(None) | ||
|
||
EasyMock.expect(mockLogMgr.finishedInitializingLog( | ||
EasyMock.eq(topicPartitionObj), EasyMock.anyObject(), EasyMock.anyObject())).anyTimes | ||
|
@@ -1469,7 +1522,7 @@ class ReplicaManagerTest { | |
|
||
private def setupReplicaManagerWithMockedPurgatories(timer: MockTimer, aliveBrokerIds: Seq[Int] = Seq(0, 1)): ReplicaManager = { | ||
val props = TestUtils.createBrokerConfig(0, TestUtils.MockZkConnect) | ||
props.put("log.dir", TestUtils.tempRelativeDir("data").getAbsolutePath) | ||
props.put("log.dirs", TestUtils.tempRelativeDir("data").getAbsolutePath + "," + TestUtils.tempRelativeDir("data2").getAbsolutePath) | ||
val config = KafkaConfig.fromProps(props) | ||
val logProps = new Properties() | ||
val mockLogMgr = TestUtils.createLogManager(config.logDirs.map(new File(_)), LogConfig(logProps)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moving the replica folder to another location has chance to produce this issue so I separate the origin test case to two cases. The first case always move the folder to same location. Another case does move the folder to different location.