diff --git a/packages/grpc-js/src/load-balancer-pick-first.ts b/packages/grpc-js/src/load-balancer-pick-first.ts index e042e1161..145ca3e39 100644 --- a/packages/grpc-js/src/load-balancer-pick-first.ts +++ b/packages/grpc-js/src/load-balancer-pick-first.ts @@ -213,8 +213,6 @@ export class PickFirstLoadBalancer implements LoadBalancer { */ private connectionDelayTimeout: NodeJS.Timeout; - private triedAllSubchannels = false; - /** * The LB policy enters sticky TRANSIENT_FAILURE mode when all * subchannels have failed to connect at least once, and it stays in that @@ -225,12 +223,6 @@ export class PickFirstLoadBalancer implements LoadBalancer { private reportHealthStatus: boolean; - /** - * Indicates whether we called channelControlHelper.requestReresolution since - * the last call to updateAddressList - */ - private requestedResolutionSinceLastUpdate = false; - /** * The most recent error reported by any subchannel as it transitioned to * TRANSIENT_FAILURE. @@ -259,6 +251,10 @@ export class PickFirstLoadBalancer implements LoadBalancer { return this.children.every(child => child.hasReportedTransientFailure); } + private resetChildrenReportedTF() { + this.children.every(child => child.hasReportedTransientFailure = false); + } + private calculateAndReportNewState() { if (this.currentPick) { if (this.reportHealthStatus && !this.currentPick.isHealthy()) { @@ -291,7 +287,6 @@ export class PickFirstLoadBalancer implements LoadBalancer { } private requestReresolution() { - this.requestedResolutionSinceLastUpdate = true; this.channelControlHelper.requestReresolution(); } @@ -299,15 +294,8 @@ export class PickFirstLoadBalancer implements LoadBalancer { if (!this.allChildrenHaveReportedTF()) { return; } - if (!this.requestedResolutionSinceLastUpdate) { - /* Each time we get an update we reset each subchannel's - * hasReportedTransientFailure flag, so the next time we get to this - * point after that, each subchannel has reported TRANSIENT_FAILURE - * at least once since then. That is the trigger for requesting - * reresolution, whether or not the LB policy is already in sticky TF - * mode. */ - this.requestReresolution(); - } + this.requestReresolution(); + this.resetChildrenReportedTF(); if (this.stickyTransientFailureMode) { return; } @@ -320,21 +308,16 @@ export class PickFirstLoadBalancer implements LoadBalancer { private removeCurrentPick() { if (this.currentPick !== null) { - /* Unref can cause a state change, which can cause a change in the value - * of this.currentPick, so we hold a local reference to make sure that - * does not impact this function. */ - const currentPick = this.currentPick; - this.currentPick = null; - currentPick.unref(); - currentPick.removeConnectivityStateListener(this.subchannelStateListener); + this.currentPick.removeConnectivityStateListener(this.subchannelStateListener); this.channelControlHelper.removeChannelzChild( - currentPick.getChannelzRef() + this.currentPick.getChannelzRef() ); - if (this.reportHealthStatus) { - currentPick.removeHealthStateWatcher( - this.pickedSubchannelHealthListener - ); - } + this.currentPick.removeHealthStateWatcher( + this.pickedSubchannelHealthListener + ); + // Unref last, to avoid triggering listeners + this.currentPick.unref(); + this.currentPick = null; } } @@ -374,9 +357,6 @@ export class PickFirstLoadBalancer implements LoadBalancer { private startNextSubchannelConnecting(startIndex: number) { clearTimeout(this.connectionDelayTimeout); - if (this.triedAllSubchannels) { - return; - } for (const [index, child] of this.children.entries()) { if (index >= startIndex) { const subchannelState = child.subchannel.getConnectivityState(); @@ -389,7 +369,6 @@ export class PickFirstLoadBalancer implements LoadBalancer { } } } - this.triedAllSubchannels = true; this.maybeEnterStickyTransientFailureMode(); } @@ -418,20 +397,25 @@ export class PickFirstLoadBalancer implements LoadBalancer { this.connectionDelayTimeout.unref?.(); } + /** + * Declare that the specified subchannel should be used to make requests. + * This functions the same independent of whether subchannel is a member of + * this.children and whether it is equal to this.currentPick. + * Prerequisite: subchannel.getConnectivityState() === READY. + * @param subchannel + */ private pickSubchannel(subchannel: SubchannelInterface) { - if (this.currentPick && subchannel.realSubchannelEquals(this.currentPick)) { - return; - } trace('Pick subchannel with address ' + subchannel.getAddress()); this.stickyTransientFailureMode = false; - this.removeCurrentPick(); - this.currentPick = subchannel; + /* Ref before removeCurrentPick and resetSubchannelList to avoid the + * refcount dropping to 0 during this process. */ subchannel.ref(); - if (this.reportHealthStatus) { - subchannel.addHealthStateWatcher(this.pickedSubchannelHealthListener); - } this.channelControlHelper.addChannelzChild(subchannel.getChannelzRef()); + this.removeCurrentPick(); this.resetSubchannelList(); + subchannel.addConnectivityStateListener(this.subchannelStateListener); + subchannel.addHealthStateWatcher(this.pickedSubchannelHealthListener); + this.currentPick = subchannel; clearTimeout(this.connectionDelayTimeout); this.calculateAndReportNewState(); } @@ -448,20 +432,11 @@ export class PickFirstLoadBalancer implements LoadBalancer { private resetSubchannelList() { for (const child of this.children) { - if ( - !( - this.currentPick && - child.subchannel.realSubchannelEquals(this.currentPick) - ) - ) { - /* The connectivity state listener is the same whether the subchannel - * is in the list of children or it is the currentPick, so if it is in - * both, removing it here would cause problems. In particular, that - * always happens immediately after the subchannel is picked. */ - child.subchannel.removeConnectivityStateListener( - this.subchannelStateListener - ); - } + /* Always remoev the connectivity state listener. If the subchannel is + getting picked, it will be re-added then. */ + child.subchannel.removeConnectivityStateListener( + this.subchannelStateListener + ); /* Refs are counted independently for the children list and the * currentPick, so we call unref whether or not the child is the * currentPick. Channelz child references are also refcounted, so @@ -473,20 +448,16 @@ export class PickFirstLoadBalancer implements LoadBalancer { } this.currentSubchannelIndex = 0; this.children = []; - this.triedAllSubchannels = false; - this.requestedResolutionSinceLastUpdate = false; } private connectToAddressList(addressList: SubchannelAddress[]) { + trace('connectToAddressList([' + addressList.map(address => subchannelAddressToString(address)) + '])'); const newChildrenList = addressList.map(address => ({ subchannel: this.channelControlHelper.createSubchannel(address, {}), hasReportedTransientFailure: false, })); - trace('connectToAddressList([' + addressList.map(address => subchannelAddressToString(address)) + '])'); for (const { subchannel } of newChildrenList) { if (subchannel.getConnectivityState() === ConnectivityState.READY) { - this.channelControlHelper.addChannelzChild(subchannel.getChannelzRef()); - subchannel.addConnectivityStateListener(this.subchannelStateListener); this.pickSubchannel(subchannel); return; }