Skip to content

Commit

Permalink
[improve][broker] Gracefully shut down load balancer extension
Browse files Browse the repository at this point in the history
  • Loading branch information
heesung-sn committed May 12, 2023
1 parent 96367e1 commit ed773f4
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,17 @@ public void closeMetadataServiceSession() throws Exception {
localMetadataStore.close();
}

private void closeLeaderElectionService() throws Exception {
if (ExtensibleLoadManagerImpl.isLoadManagerExtensionEnabled(config)) {
ExtensibleLoadManagerImpl.get(loadManager.get()).getLeaderElectionService().close();
} else {
if (this.leaderElectionService != null) {
this.leaderElectionService.close();
this.leaderElectionService = null;
}
}
}

@Override
public void close() throws PulsarServerException {
try {
Expand Down Expand Up @@ -502,10 +513,7 @@ public CompletableFuture<Void> closeAsync() {
this.bkClientFactory = null;
}

if (this.leaderElectionService != null) {
this.leaderElectionService.close();
this.leaderElectionService = null;
}
closeLeaderElectionService();

if (adminClient != null) {
adminClient.close();
Expand Down Expand Up @@ -1316,7 +1324,11 @@ public boolean isRunning() {
* @return a reference of the current <code>LeaderElectionService</code> instance.
*/
public LeaderElectionService getLeaderElectionService() {
return this.leaderElectionService;
if (ExtensibleLoadManagerImpl.isLoadManagerExtensionEnabled(config)) {
return ExtensibleLoadManagerImpl.get(loadManager.get()).getLeaderElectionService();
} else {
return this.leaderElectionService;
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -380,12 +380,22 @@ public CompletableFuture<Optional<BrokerLookupData>> assign(Optional<ServiceUnit
}

public CompletableFuture<Optional<String>> selectAsync(ServiceUnitId bundle) {
return selectAsync(bundle, Optional.empty());
}

public CompletableFuture<Optional<String>> selectAsync(ServiceUnitId bundle,
Optional<Set<String>> excludeBrokerSet) {
BrokerRegistry brokerRegistry = getBrokerRegistry();
return brokerRegistry.getAvailableBrokerLookupDataAsync()
.thenCompose(availableBrokers -> {
LoadManagerContext context = this.getContext();

Map<String, BrokerLookupData> availableBrokerCandidates = new HashMap<>(availableBrokers);
if (excludeBrokerSet.isPresent()) {
for (String exclude : excludeBrokerSet.get()) {
availableBrokerCandidates.remove(exclude);
}
}

// Filter out brokers that do not meet the rules.
List<BrokerFilter> filterPipeline = getBrokerFilterPipeline();
Expand Down Expand Up @@ -685,4 +695,10 @@ private void monitor() {
log.error("Failed to get the channel ownership.", e);
}
}

public void disableBroker() throws Exception {
serviceUnitStateChannel.cleanOwnerships();
leaderElectionService.close();
brokerRegistry.unregister();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public CompletableFuture<Boolean> checkOwnershipAsync(Optional<ServiceUnitId> to

@Override
public void disableBroker() throws Exception {
this.loadManager.getBrokerRegistry().unregister();
this.loadManager.disableBroker();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,4 +206,9 @@ public interface ServiceUnitStateChannel extends Closeable {
* Cancels the ownership monitor.
*/
void cancelOwnershipMonitor();

/**
* Cleans the service unit ownerships from the current broker's channel.
*/
void cleanOwnerships();
}
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ public class ServiceUnitStateChannelImpl implements ServiceUnitStateChannel {

public static final CompressionType MSG_COMPRESSION_TYPE = CompressionType.ZSTD;
private static final long MAX_IN_FLIGHT_STATE_WAITING_TIME_IN_MILLIS = 30 * 1000; // 30sec

private static final int OWNERSHIP_CLEAN_UP_MAX_WAIT_TIME_IN_MILLIS = 5000;
private static final int OWNERSHIP_CLEAN_UP_WAIT_RETRY_DELAY_IN_MILLIS = 100;
private static final int OWNERSHIP_CLEAN_UP_CONVERGENCE_DELAY_IN_MILLIS = 3000;
public static final long VERSION_ID_INIT = 1; // initial versionId
private static final long OWNERSHIP_MONITOR_DELAY_TIME_IN_SECS = 60;
public static final long MAX_CLEAN_UP_DELAY_TIME_IN_SECS = 3 * 60; // 3 mins
Expand Down Expand Up @@ -694,6 +698,8 @@ private void handleOwnEvent(String serviceUnit, ServiceUnitStateData data) {
if (isTargetBroker(data.dstBroker())) {
log(null, serviceUnit, data, null);
lastOwnEventHandledAt = System.currentTimeMillis();
} else if (data.force() && isTargetBroker(data.sourceBroker())) {
closeServiceUnit(serviceUnit);
}
}

Expand Down Expand Up @@ -1114,13 +1120,13 @@ private ServiceUnitStateData getOverrideInactiveBrokerStateData(ServiceUnitState
Map.copyOf(orphanData.splitServiceUnitToDestBroker()),
true, getNextVersionId(orphanData));
} else {
return new ServiceUnitStateData(Owned, selectedBroker, true, getNextVersionId(orphanData));
return new ServiceUnitStateData(Owned, selectedBroker, orphanData.dstBroker(),
true, getNextVersionId(orphanData));
}
}

private void overrideOwnership(String serviceUnit, ServiceUnitStateData orphanData) {

Optional<String> selectedBroker = selectBroker(serviceUnit);
private void overrideOwnership(String serviceUnit, ServiceUnitStateData orphanData, String inactiveBroker) {
Optional<String> selectedBroker = selectBroker(serviceUnit, inactiveBroker);
if (selectedBroker.isPresent()) {
var override = getOverrideInactiveBrokerStateData(orphanData, selectedBroker.get());
log.info("Overriding ownership serviceUnit:{} from orphanData:{} to overrideData:{}",
Expand All @@ -1140,8 +1146,37 @@ private void overrideOwnership(String serviceUnit, ServiceUnitStateData orphanDa
}
}

public void cleanOwnerships() {
doCleanup(lookupServiceAddress);
long started = System.currentTimeMillis();
while (System.currentTimeMillis() - started < OWNERSHIP_CLEAN_UP_MAX_WAIT_TIME_IN_MILLIS) {
boolean cleaned = true;
for (var data : tableview.values()) {
if (data.state() == Owned && data.dstBroker().equals(lookupServiceAddress)) {
cleaned = false;
break;
}
}
if (cleaned) {
try {
MILLISECONDS.sleep(OWNERSHIP_CLEAN_UP_CONVERGENCE_DELAY_IN_MILLIS);
} catch (InterruptedException e) {
log.warn("Interrupted while gracefully waiting for the cleanup convergence.");
}
break;
} else {
try {
MILLISECONDS.sleep(OWNERSHIP_CLEAN_UP_WAIT_RETRY_DELAY_IN_MILLIS);
} catch (InterruptedException e) {
log.warn("Interrupted while delaying the next service unit clean-up. Cleaning broker:{}",
lookupServiceAddress);
}
}
}
}


private void doCleanup(String broker) {
private synchronized void doCleanup(String broker) {
long startTime = System.nanoTime();
log.info("Started ownership cleanup for the inactive broker:{}", broker);
int orphanServiceUnitCleanupCnt = 0;
Expand All @@ -1153,13 +1188,13 @@ private void doCleanup(String broker) {
var state = state(stateData);
if (StringUtils.equals(broker, stateData.dstBroker())) {
if (isActiveState(state)) {
overrideOwnership(serviceUnit, stateData);
overrideOwnership(serviceUnit, stateData, broker);
orphanServiceUnitCleanupCnt++;
}

} else if (StringUtils.equals(broker, stateData.sourceBroker())) {
if (isInFlightState(state)) {
overrideOwnership(serviceUnit, stateData);
overrideOwnership(serviceUnit, stateData, broker);
orphanServiceUnitCleanupCnt++;
}
}
Expand Down Expand Up @@ -1194,18 +1229,20 @@ private void doCleanup(String broker) {

}

private Optional<String> selectBroker(String serviceUnit) {
private Optional<String> selectBroker(String serviceUnit, String inactiveBroker) {
try {
return loadManager.selectAsync(getNamespaceBundle(serviceUnit))
return loadManager.selectAsync(getNamespaceBundle(serviceUnit), Optional.of(Set.of(inactiveBroker)))
.get(inFlightStateWaitingTimeInMillis, MILLISECONDS);
} catch (Throwable e) {
log.error("Failed to select a broker for serviceUnit:{}", serviceUnit);
}
return Optional.empty();
}

private Optional<ServiceUnitStateData> getRollForwardStateData(String serviceUnit, long nextVersionId) {
Optional<String> selectedBroker = selectBroker(serviceUnit);
private Optional<ServiceUnitStateData> getRollForwardStateData(String serviceUnit,
String inactiveBroker,
long nextVersionId) {
Optional<String> selectedBroker = selectBroker(serviceUnit, inactiveBroker);
if (selectedBroker.isEmpty()) {
return Optional.empty();
}
Expand All @@ -1220,7 +1257,7 @@ private Optional<ServiceUnitStateData> getOverrideInFlightStateData(
var state = orphanData.state();
switch (state) {
case Assigning: {
return getRollForwardStateData(serviceUnit, nextVersionId);
return getRollForwardStateData(serviceUnit, orphanData.dstBroker(), nextVersionId);
}
case Splitting: {
return Optional.of(new ServiceUnitStateData(Splitting,
Expand All @@ -1233,7 +1270,7 @@ private Optional<ServiceUnitStateData> getOverrideInFlightStateData(
// rollback to the src
return Optional.of(new ServiceUnitStateData(Owned, orphanData.sourceBroker(), true, nextVersionId));
} else {
return getRollForwardStateData(serviceUnit, nextVersionId);
return getRollForwardStateData(serviceUnit, orphanData.sourceBroker(), nextVersionId);
}
}
default: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -880,6 +880,51 @@ SplitDecision.Reason.Unknown, new AtomicLong(6))
assertEquals(actual, expected);
}

@Test
public void testDisableBroker() throws Exception {
// Test rollback to modular load manager.
ServiceConfiguration defaultConf = getDefaultConf();
defaultConf.setAllowAutoTopicCreation(true);
defaultConf.setForceDeleteNamespaceAllowed(true);
defaultConf.setLoadManagerClassName(ExtensibleLoadManagerImpl.class.getName());
defaultConf.setLoadBalancerSheddingEnabled(false);
try (var additionalPulsarTestContext = createAdditionalPulsarTestContext(defaultConf)) {
var pulsar3 = additionalPulsarTestContext.getPulsarService();
ExtensibleLoadManagerImpl ternaryLoadManager = spy((ExtensibleLoadManagerImpl)
FieldUtils.readField(pulsar3.getLoadManager().get(), "loadManager", true));
String topic = "persistent://public/default/test";

String lookupResult1 = pulsar3.getAdminClient().lookups().lookupTopic(topic);
TopicName topicName = TopicName.get("test");
NamespaceBundle bundle = getBundleAsync(pulsar1, topicName).get();
if (!pulsar3.getBrokerServiceUrl().equals(lookupResult1)) {
admin.namespaces().unloadNamespaceBundle(topicName.getNamespace(), bundle.getBundleRange(),
pulsar3.getLookupServiceAddress());
lookupResult1 = pulsar2.getAdminClient().lookups().lookupTopic(topic);
}
String lookupResult2 = pulsar1.getAdminClient().lookups().lookupTopic(topic);
String lookupResult3 = pulsar2.getAdminClient().lookups().lookupTopic(topic);

assertEquals(lookupResult1, pulsar3.getBrokerServiceUrl());
assertEquals(lookupResult1, lookupResult2);
assertEquals(lookupResult1, lookupResult3);


assertFalse(primaryLoadManager.checkOwnershipAsync(Optional.empty(), bundle).get());
assertFalse(secondaryLoadManager.checkOwnershipAsync(Optional.empty(), bundle).get());
assertTrue(ternaryLoadManager.checkOwnershipAsync(Optional.empty(), bundle).get());

ternaryLoadManager.disableBroker();

assertFalse(ternaryLoadManager.checkOwnershipAsync(Optional.empty(), bundle).get());
if (primaryLoadManager.checkOwnershipAsync(Optional.empty(), bundle).get()) {
assertFalse(secondaryLoadManager.checkOwnershipAsync(Optional.empty(), bundle).get());
} else {
assertTrue(secondaryLoadManager.checkOwnershipAsync(Optional.empty(), bundle).get());
}
}
}

private static abstract class MockBrokerFilter implements BrokerFilter {

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ public void transferTestWhenDestBrokerFails()

// recovered, check the monitor update state : Assigned -> Owned
doReturn(CompletableFuture.completedFuture(Optional.of(lookupServiceAddress1)))
.when(loadManager).selectAsync(any());
.when(loadManager).selectAsync(any(), any());
FieldUtils.writeDeclaredField(channel2, "producer", producer, true);
FieldUtils.writeDeclaredField(channel1,
"inFlightStateWaitingTimeInMillis", 1 , true);
Expand Down Expand Up @@ -735,7 +735,7 @@ public void handleBrokerDeletionEventTest()
var owner1 = channel1.getOwnerAsync(bundle1);
var owner2 = channel2.getOwnerAsync(bundle2);
doReturn(CompletableFuture.completedFuture(Optional.of(lookupServiceAddress2)))
.when(loadManager).selectAsync(any());
.when(loadManager).selectAsync(any(), any());
assertTrue(owner1.get().isEmpty());
assertTrue(owner2.get().isEmpty());

Expand Down Expand Up @@ -1101,7 +1101,7 @@ public void assignTestWhenDestBrokerProducerFails()
FieldUtils.writeDeclaredField(channel2,
"inFlightStateWaitingTimeInMillis", 3 * 1000, true);
doReturn(CompletableFuture.completedFuture(Optional.of(lookupServiceAddress2)))
.when(loadManager).selectAsync(any());
.when(loadManager).selectAsync(any(), any());
channel1.publishAssignEventAsync(bundle, lookupServiceAddress2);
// channel1 is broken. the assign won't be complete.
waitUntilState(channel1, bundle);
Expand Down Expand Up @@ -1440,7 +1440,7 @@ public void testOverrideInactiveBrokerStateData()

// test stable metadata state
doReturn(CompletableFuture.completedFuture(Optional.of(lookupServiceAddress2)))
.when(loadManager).selectAsync(any());
.when(loadManager).selectAsync(any(), any());
leaderChannel.handleMetadataSessionEvent(SessionReestablished);
followerChannel.handleMetadataSessionEvent(SessionReestablished);
FieldUtils.writeDeclaredField(leaderChannel, "lastMetadataSessionEventTimestamp",
Expand Down Expand Up @@ -1505,7 +1505,7 @@ public void testOverrideOrphanStateData()

// test stable metadata state
doReturn(CompletableFuture.completedFuture(Optional.of(lookupServiceAddress2)))
.when(loadManager).selectAsync(any());
.when(loadManager).selectAsync(any(), any());
FieldUtils.writeDeclaredField(leaderChannel, "inFlightStateWaitingTimeInMillis",
-1, true);
FieldUtils.writeDeclaredField(followerChannel, "inFlightStateWaitingTimeInMillis",
Expand Down

0 comments on commit ed773f4

Please sign in to comment.