diff --git a/include/envoy/upstream/upstream.h b/include/envoy/upstream/upstream.h index 4f3651db8966..2728b94d9ae8 100644 --- a/include/envoy/upstream/upstream.h +++ b/include/envoy/upstream/upstream.h @@ -150,6 +150,7 @@ class HostSet { #define ALL_CLUSTER_STATS(COUNTER, GAUGE, TIMER) \ COUNTER(lb_healthy_panic) \ COUNTER(lb_local_cluster_not_ok) \ + COUNTER(lb_recalculate_zone_structures) \ COUNTER(lb_zone_cluster_too_small) \ COUNTER(lb_zone_no_capacity_left) \ COUNTER(lb_zone_number_differs) \ diff --git a/source/common/upstream/load_balancer_impl.cc b/source/common/upstream/load_balancer_impl.cc index 6411e6415fd3..1519a98615c8 100644 --- a/source/common/upstream/load_balancer_impl.cc +++ b/source/common/upstream/load_balancer_impl.cc @@ -12,29 +12,90 @@ static const std::string RuntimeZoneEnabled = "upstream.zone_routing.enabled"; static const std::string RuntimeMinClusterSize = "upstream.zone_routing.min_cluster_size"; static const std::string RuntimePanicThreshold = "upstream.healthy_panic_threshold"; -bool LoadBalancerBase::earlyExitNonZoneRouting() { - uint32_t number_of_zones = host_set_.healthyHostsPerZone().size(); - if (number_of_zones < 2 || !runtime_.snapshot().featureEnabled(RuntimeZoneEnabled, 100)) { - return true; +LoadBalancerBase::LoadBalancerBase(const HostSet& host_set, const HostSet* local_host_set, + ClusterStats& stats, Runtime::Loader& runtime, + Runtime::RandomGenerator& random) + : stats_(stats), runtime_(runtime), random_(random), host_set_(host_set), + local_host_set_(local_host_set) { + if (local_host_set_) { + host_set_.addMemberUpdateCb([this](const std::vector&, const std::vector&) + -> void { regenerateZoneRoutingStructures(); }); + local_host_set_->addMemberUpdateCb( + [this](const std::vector&, const std::vector&) + -> void { regenerateZoneRoutingStructures(); }); } +} - const std::vector& local_zone_healthy_hosts = host_set_.healthyHostsPerZone()[0]; - if (local_zone_healthy_hosts.empty()) { - return true; +void LoadBalancerBase::regenerateZoneRoutingStructures() { + stats_.lb_recalculate_zone_structures_.inc(); + + // Do not perform any calculations if we cannot perform zone routing based on non runtime params. + if (earlyExitNonZoneRouting()) { + zone_routing_state_ = ZoneRoutingState::NoZoneRouting; + return; } - // Do not perform zone routing for small clusters. - uint64_t min_cluster_size = runtime_.snapshot().getInteger(RuntimeMinClusterSize, 6U); + size_t num_zones = host_set_.healthyHostsPerZone().size(); - if (host_set_.healthyHosts().size() < min_cluster_size) { - stats_.lb_zone_cluster_too_small_.inc(); + uint64_t local_percentage[num_zones]; + calculateZonePercentage(local_host_set_->healthyHostsPerZone(), local_percentage); + + uint64_t upstream_percentage[num_zones]; + calculateZonePercentage(host_set_.healthyHostsPerZone(), upstream_percentage); + + // If we have lower percent of hosts in the local cluster in the same zone, + // we can push all of the requests directly to upstream cluster in the same zone. + if (upstream_percentage[0] >= local_percentage[0]) { + zone_routing_state_ = ZoneRoutingState::ZoneDirect; + return; + } + + zone_routing_state_ = ZoneRoutingState::ZoneResidual; + + // If we cannot route all requests to the same zone, calculate what percentage can be routed. + // For example, if local percentage is 20% and upstream is 10% + // we can route only 50% of requests directly. + local_percent_to_route_ = upstream_percentage[0] * 10000 / local_percentage[0]; + + // Local zone does not have additional capacity (we have already routed what we could). + // Now we need to figure out how much traffic we can route cross zone and to which exact zone + // we should route. Percentage of requests routed cross zone to a specific zone needed be + // proportional to the residual capacity upstream zone has. + // + // residual_capacity contains capacity left in a given zone, we keep accumulating residual + // capacity to make search for sampled value easier. + // For example, if we have the following upstream and local percentage: + // local_percentage: 40000 40000 20000 + // upstream_percentage: 25000 50000 25000 + // Residual capacity would look like: 0 10000 5000. Now we need to sample proportionally to + // bucket sizes (residual capacity). For simplicity of finding where specific + // sampled value is, we accumulate values in residual capacity. This is what it will look like: + // residual_capacity: 0 10000 15000 + // Now to find a zone to route (bucket) we could simply iterate over residual_capacity searching + // where sampled value is placed. + residual_capacity_.resize(num_zones); + + // Local zone (index 0) does not have residual capacity as we have routed all we could. + residual_capacity_[0] = 0; + for (size_t i = 1; i < num_zones; ++i) { + // Only route to the zones that have additional capacity. + if (upstream_percentage[i] > local_percentage[i]) { + residual_capacity_[i] = + residual_capacity_[i - 1] + upstream_percentage[i] - local_percentage[i]; + } else { + // Zone with index "i" does not have residual capacity, but we keep accumulating previous + // values to make search easier on the next step. + residual_capacity_[i] = residual_capacity_[i - 1]; + } + } +}; + +bool LoadBalancerBase::earlyExitNonZoneRouting() { + if (host_set_.healthyHostsPerZone().size() < 2) { return true; } - // If local cluster is not set, or we are in panic mode for it. - if (local_host_set_ == nullptr || local_host_set_->hosts().empty() || - isGlobalPanic(*local_host_set_)) { - stats_.lb_local_cluster_not_ok_.inc(); + if (host_set_.healthyHostsPerZone()[0].empty()) { return true; } @@ -44,6 +105,13 @@ bool LoadBalancerBase::earlyExitNonZoneRouting() { return true; } + // Do not perform zone routing for small clusters. + uint64_t min_cluster_size = runtime_.snapshot().getInteger(RuntimeMinClusterSize, 6U); + if (host_set_.healthyHosts().size() < min_cluster_size) { + stats_.lb_zone_cluster_too_small_.inc(); + return true; + } + return false; } @@ -77,31 +145,25 @@ void LoadBalancerBase::calculateZonePercentage( } const std::vector& LoadBalancerBase::tryChooseLocalZoneHosts() { + ASSERT(zone_routing_state_ != ZoneRoutingState::NoZoneRouting); + // At this point it's guaranteed to be at least 2 zones. size_t number_of_zones = host_set_.healthyHostsPerZone().size(); ASSERT(number_of_zones >= 2U); ASSERT(local_host_set_->healthyHostsPerZone().size() == host_set_.healthyHostsPerZone().size()); - uint64_t local_percentage[number_of_zones]; - calculateZonePercentage(local_host_set_->healthyHostsPerZone(), local_percentage); - - uint64_t upstream_percentage[number_of_zones]; - calculateZonePercentage(host_set_.healthyHostsPerZone(), upstream_percentage); - // Try to push all of the requests to the same zone first. - // If we have lower percent of hosts in the local cluster in the same zone, - // we can push all of the requests directly to upstream cluster in the same zone. - if (upstream_percentage[0] >= local_percentage[0]) { + if (zone_routing_state_ == ZoneRoutingState::ZoneDirect) { stats_.lb_zone_routing_all_directly_.inc(); return host_set_.healthyHostsPerZone()[0]; } - // If we cannot route all requests to the same zone, calculate what percentage can be routed. - // For example, if local percentage is 20% and upstream is 10% - // we can route only 50% of requests directly. - uint64_t local_percent_route = upstream_percentage[0] * 10000 / local_percentage[0]; - if (random_.random() % 10000 < local_percent_route) { + ASSERT(zone_routing_state_ == ZoneRoutingState::ZoneResidual); + + // If we cannot route all requests to the same zone, we already calculated how much we can + // push to the local zone, check if we can push to local zone on current iteration. + if (random_.random() % 10000 < local_percent_to_route_) { stats_.lb_zone_routing_sampled_.inc(); return host_set_.healthyHostsPerZone()[0]; } @@ -109,53 +171,21 @@ const std::vector& LoadBalancerBase::tryChooseLocalZoneHosts() { // At this point we must route cross zone as we cannot route to the local zone. stats_.lb_zone_routing_cross_zone_.inc(); - // Local zone does not have additional capacity (we have already routed what we could). - // Now we need to figure out how much traffic we can route cross zone and to which exact zone - // we should route. Percentage of requests routed cross zone to a specific zone needed be - // proportional to the residual capacity upstream zone has. - // - // residual_capacity contains capacity left in a given zone, we keep accumulating residual - // capacity to make search for sampled value easier. - // For example, if we have the following upstream and local percentage: - // local_percentage: 40000 40000 20000 - // upstream_percentage: 25000 50000 25000 - // Residual capacity would look like: 0 10000 5000. Now we need to sample proportionally to - // bucket sizes (residual capacity). For simplicity of finding where specific - // sampled value is, we accumulate values in residual capacity. This is what it will look like: - // residual_capacity: 0 10000 15000 - // Now to find a zone to route (bucket) we could simply iterate over residual_capacity searching - // where sampled value is placed. - uint64_t residual_capacity[number_of_zones]; - - // Local zone (index 0) does not have residual capacity as we have routed all we could. - residual_capacity[0] = 0; - for (size_t i = 1; i < number_of_zones; ++i) { - // Only route to the zones that have additional capacity. - if (upstream_percentage[i] > local_percentage[i]) { - residual_capacity[i] = - residual_capacity[i - 1] + upstream_percentage[i] - local_percentage[i]; - } else { - // Zone with index "i" does not have residual capacity, but we keep accumulating previous - // values to make search easier on the next step. - residual_capacity[i] = residual_capacity[i - 1]; - } - } - // This is *extremely* unlikely but possible due to rounding errors when calculating // zone percentages. In this case just select random zone. - if (residual_capacity[number_of_zones - 1] == 0) { + if (residual_capacity_[number_of_zones - 1] == 0) { stats_.lb_zone_no_capacity_left_.inc(); return host_set_.healthyHostsPerZone()[random_.random() % number_of_zones]; } // Random sampling to select specific zone for cross zone traffic based on the additional // capacity in zones. - uint64_t threshold = random_.random() % residual_capacity[number_of_zones - 1]; + uint64_t threshold = random_.random() % residual_capacity_[number_of_zones - 1]; // This potentially can be optimized to be O(log(N)) where N is the number of zones. // Linear scan should be faster for smaller N, in most of the scenarios N will be small. int i = 0; - while (threshold > residual_capacity[i]) { + while (threshold > residual_capacity_[i]) { i++; } @@ -169,7 +199,16 @@ const std::vector& LoadBalancerBase::hostsToUse() { return host_set_.hosts(); } - if (earlyExitNonZoneRouting()) { + if (zone_routing_state_ == ZoneRoutingState::NoZoneRouting) { + return host_set_.healthyHosts(); + } + + if (!runtime_.snapshot().featureEnabled(RuntimeZoneEnabled, 100)) { + return host_set_.healthyHosts(); + } + + if (local_host_set_->hosts().empty() || isGlobalPanic(*local_host_set_)) { + stats_.lb_local_cluster_not_ok_.inc(); return host_set_.healthyHosts(); } diff --git a/source/common/upstream/load_balancer_impl.h b/source/common/upstream/load_balancer_impl.h index 265056f060e6..d10b5d33f87c 100644 --- a/source/common/upstream/load_balancer_impl.h +++ b/source/common/upstream/load_balancer_impl.h @@ -12,9 +12,7 @@ namespace Upstream { class LoadBalancerBase { protected: LoadBalancerBase(const HostSet& host_set, const HostSet* local_host_set, ClusterStats& stats, - Runtime::Loader& runtime, Runtime::RandomGenerator& random) - : stats_(stats), runtime_(runtime), random_(random), host_set_(host_set), - local_host_set_(local_host_set) {} + Runtime::Loader& runtime, Runtime::RandomGenerator& random); /** * Pick the host list to use (healthy or all depending on how many in the set are not healthy). @@ -26,8 +24,11 @@ class LoadBalancerBase { Runtime::RandomGenerator& random_; private: + enum class ZoneRoutingState { NoZoneRouting, ZoneDirect, ZoneResidual }; + /* - * @return decision on quick exit from zone aware host selection. + * @return decision on quick exit from zone aware routing based on cluster configuration. + * This gets recalculated on update callback. */ bool earlyExitNonZoneRouting(); @@ -51,8 +52,17 @@ class LoadBalancerBase { void calculateZonePercentage(const std::vector>& hosts_per_zone, uint64_t* ret); + /** + * Regenerate zone aware routing structures for fast decisions on upstream zone selection. + */ + void regenerateZoneRoutingStructures(); + const HostSet& host_set_; const HostSet* local_host_set_; + + uint64_t local_percent_to_route_{}; + ZoneRoutingState zone_routing_state_{ZoneRoutingState::NoZoneRouting}; + std::vector residual_capacity_; }; /** diff --git a/source/common/upstream/upstream_impl.h b/source/common/upstream/upstream_impl.h index e447816c9df5..14550368e7c1 100644 --- a/source/common/upstream/upstream_impl.h +++ b/source/common/upstream/upstream_impl.h @@ -109,7 +109,10 @@ typedef std::shared_ptr>> ConstHostListsP */ class HostSetImpl : public virtual HostSet { public: - HostSetImpl() : hosts_(new std::vector()), healthy_hosts_(new std::vector()) {} + HostSetImpl() + : hosts_(new std::vector()), healthy_hosts_(new std::vector()), + hosts_per_zone_(new std::vector>()), + healthy_hosts_per_zone_(new std::vector>()) {} ConstHostVectorPtr rawHosts() const { return hosts_; } ConstHostVectorPtr rawHealthyHosts() const { return healthy_hosts_; } diff --git a/test/common/upstream/load_balancer_impl_test.cc b/test/common/upstream/load_balancer_impl_test.cc index ee3a2b753c3a..00f7ed0b7ed6 100644 --- a/test/common/upstream/load_balancer_impl_test.cc +++ b/test/common/upstream/load_balancer_impl_test.cc @@ -110,16 +110,18 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareSmallCluster) { .WillRepeatedly(Return(6)); EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); - EXPECT_EQ(1U, stats_.lb_zone_cluster_too_small_.value()); EXPECT_EQ(cluster_.healthy_hosts_[1], lb_->chooseHost()); - EXPECT_EQ(2U, stats_.lb_zone_cluster_too_small_.value()); EXPECT_EQ(cluster_.healthy_hosts_[2], lb_->chooseHost()); - EXPECT_EQ(3U, stats_.lb_zone_cluster_too_small_.value()); + + // Cluster size is computed once at zone aware struct regeneration point. + EXPECT_EQ(1U, stats_.lb_zone_cluster_too_small_.value()); EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6)) .WillRepeatedly(Return(1)); + // Trigger reload. + local_cluster_hosts_->updateHosts(hosts, hosts, hosts_per_zone, hosts_per_zone, + empty_host_vector_, empty_host_vector_); EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); - EXPECT_EQ(3U, stats_.lb_zone_cluster_too_small_.value()); } TEST_F(RoundRobinLoadBalancerTest, NoZoneAwareDifferentZoneSize) { @@ -146,8 +148,7 @@ TEST_F(RoundRobinLoadBalancerTest, NoZoneAwareDifferentZoneSize) { .WillRepeatedly(Return(50)); EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)) .WillRepeatedly(Return(true)); - EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6)) - .WillOnce(Return(1)); + EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost()); EXPECT_EQ(1U, stats_.lb_zone_number_differs_.value()); } @@ -163,20 +164,19 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingLargeZoneSwitchOnOff) { {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:80")}, {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:82")}})); - cluster_.healthy_hosts_ = *hosts; - cluster_.hosts_ = *hosts; - cluster_.healthy_hosts_per_zone_ = *hosts_per_zone; - local_cluster_hosts_->updateHosts(hosts, hosts, hosts_per_zone, hosts_per_zone, - empty_host_vector_, empty_host_vector_); - EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50)) .WillRepeatedly(Return(50)); EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)) .WillRepeatedly(Return(true)); EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6)) - .Times(2) .WillRepeatedly(Return(3)); + cluster_.healthy_hosts_ = *hosts; + cluster_.hosts_ = *hosts; + cluster_.healthy_hosts_per_zone_ = *hosts_per_zone; + local_cluster_hosts_->updateHosts(hosts, hosts, hosts_per_zone, hosts_per_zone, + empty_host_vector_, empty_host_vector_); + // There is only one host in the given zone for zone aware routing. EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); EXPECT_EQ(1U, stats_.lb_zone_routing_all_directly_.value()); @@ -214,24 +214,24 @@ TEST_F(RoundRobinLoadBalancerTest, ZoneAwareRoutingSmallZone) { {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:1")}, {newTestHost(Upstream::MockCluster{}, "tcp://127.0.0.1:2")}})); - cluster_.healthy_hosts_ = *upstream_hosts; - cluster_.hosts_ = *upstream_hosts; - cluster_.healthy_hosts_per_zone_ = *upstream_hosts_per_zone; - local_cluster_hosts_->updateHosts(local_hosts, local_hosts, local_hosts_per_zone, - local_hosts_per_zone, empty_host_vector_, empty_host_vector_); - EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.healthy_panic_threshold", 50)) .WillRepeatedly(Return(50)); EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)) .WillRepeatedly(Return(true)); EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6)) - .Times(2) .WillRepeatedly(Return(5)); + cluster_.healthy_hosts_ = *upstream_hosts; + cluster_.hosts_ = *upstream_hosts; + cluster_.healthy_hosts_per_zone_ = *upstream_hosts_per_zone; + local_cluster_hosts_->updateHosts(local_hosts, local_hosts, local_hosts_per_zone, + local_hosts_per_zone, empty_host_vector_, empty_host_vector_); + // There is only one host in the given zone for zone aware routing. EXPECT_CALL(random_, random()).WillOnce(Return(100)); EXPECT_EQ(cluster_.healthy_hosts_per_zone_[0][0], lb_->chooseHost()); EXPECT_EQ(1U, stats_.lb_zone_routing_sampled_.value()); + // Force request out of small zone. EXPECT_CALL(random_, random()).WillOnce(Return(9999)).WillOnce(Return(2)); EXPECT_EQ(cluster_.healthy_hosts_per_zone_[1][1], lb_->chooseHost()); @@ -257,7 +257,7 @@ TEST_F(RoundRobinLoadBalancerTest, LowPrecisionForDistribution) { EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100)) .WillRepeatedly(Return(true)); EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6)) - .WillOnce(Return(1)); + .WillRepeatedly(Return(1)); // The following host distribution with current precision should lead to the no_capacity_left // situation. @@ -276,8 +276,6 @@ TEST_F(RoundRobinLoadBalancerTest, LowPrecisionForDistribution) { current[i] = host; } local_hosts_per_zone->push_back(current); - local_cluster_hosts_->updateHosts(local_hosts, local_hosts, local_hosts_per_zone, - local_hosts_per_zone, empty_host_vector_, empty_host_vector_); current.resize(44999); for (int i = 0; i < 44999; ++i) { @@ -293,6 +291,10 @@ TEST_F(RoundRobinLoadBalancerTest, LowPrecisionForDistribution) { cluster_.healthy_hosts_per_zone_ = *upstream_hosts_per_zone; + // To trigger update callback. + local_cluster_hosts_->updateHosts(local_hosts, local_hosts, local_hosts_per_zone, + local_hosts_per_zone, empty_host_vector_, empty_host_vector_); + // Force request out of small zone and to randomly select zone. EXPECT_CALL(random_, random()).WillOnce(Return(9999)).WillOnce(Return(2)); lb_->chooseHost();