Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: optimize zone aware load balancing #227

Merged
merged 9 commits into from
Nov 17, 2016
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/envoy/upstream/upstream.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ class HostSet {
#define ALL_CLUSTER_STATS(COUNTER, GAUGE, TIMER) \
COUNTER(lb_healthy_panic) \
COUNTER(lb_local_cluster_not_ok) \
COUNTER(lb_recalculate_zone_structures) \
COUNTER(lb_zone_cluster_too_small) \
COUNTER(lb_zone_no_capacity_left) \
COUNTER(lb_zone_number_differs) \
Expand Down
160 changes: 101 additions & 59 deletions source/common/upstream/load_balancer_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,91 @@ static const std::string RuntimeZoneEnabled = "upstream.zone_routing.enabled";
static const std::string RuntimeMinClusterSize = "upstream.zone_routing.min_cluster_size";
static const std::string RuntimePanicThreshold = "upstream.healthy_panic_threshold";

bool LoadBalancerBase::earlyExitNonZoneRouting() {
uint32_t number_of_zones = host_set_.healthyHostsPerZone().size();
if (number_of_zones < 2 || !runtime_.snapshot().featureEnabled(RuntimeZoneEnabled, 100)) {
LoadBalancerBase::LoadBalancerBase(const HostSet& host_set, const HostSet* local_host_set,
ClusterStats& stats, Runtime::Loader& runtime,
Runtime::RandomGenerator& random)
: stats_(stats), runtime_(runtime), random_(random), host_set_(host_set),
local_host_set_(local_host_set), early_exit_zone_routing_(true) {
if (local_host_set_) {
host_set_.addMemberUpdateCb([this](const std::vector<HostPtr>&, const std::vector<HostPtr>&)
-> void { regenerateZoneRoutingStructures(); });
local_host_set_->addMemberUpdateCb(
[this](const std::vector<HostPtr>&, const std::vector<HostPtr>&)
-> void { regenerateZoneRoutingStructures(); });
}
}

void LoadBalancerBase::regenerateZoneRoutingStructures() {
stats_.lb_recalculate_zone_structures_.inc();

early_exit_zone_routing_ = earlyExitNonZoneRouting();
// Do not perform any calculations if we cannot perform zone routing based on non runtime params.
if (early_exit_zone_routing_) {
return;
}

size_t num_zones = host_set_.healthyHostsPerZone().size();

uint64_t local_percentage[num_zones];
calculateZonePercentage(local_host_set_->healthyHostsPerZone(), local_percentage);

uint64_t upstream_percentage[num_zones];
calculateZonePercentage(host_set_.healthyHostsPerZone(), upstream_percentage);

// If we have lower percent of hosts in the local cluster in the same zone,
// we can push all of the requests directly to upstream cluster in the same zone.
if ((route_directly_ = upstream_percentage[0] >= local_percentage[0])) {
return;
}

// If we cannot route all requests to the same zone, calculate what percentage can be routed.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: newline before this line

// For example, if local percentage is 20% and upstream is 10%
// we can route only 50% of requests directly.
local_percent_to_route_ = upstream_percentage[0] * 10000 / local_percentage[0];

// Local zone does not have additional capacity (we have already routed what we could).
// Now we need to figure out how much traffic we can route cross zone and to which exact zone
// we should route. Percentage of requests routed cross zone to a specific zone needed be
// proportional to the residual capacity upstream zone has.
//
// residual_capacity contains capacity left in a given zone, we keep accumulating residual
// capacity to make search for sampled value easier.
// For example, if we have the following upstream and local percentage:
// local_percentage: 40000 40000 20000
// upstream_percentage: 25000 50000 25000
// Residual capacity would look like: 0 10000 5000. Now we need to sample proportionally to
// bucket sizes (residual capacity). For simplicity of finding where specific
// sampled value is, we accumulate values in residual capacity. This is what it will look like:
// residual_capacity: 0 10000 15000
// Now to find a zone to route (bucket) we could simply iterate over residual_capacity searching
// where sampled value is placed.
residual_capacity_.resize(num_zones);

// Local zone (index 0) does not have residual capacity as we have routed all we could.
residual_capacity_[0] = 0;
for (size_t i = 1; i < num_zones; ++i) {
// Only route to the zones that have additional capacity.
if (upstream_percentage[i] > local_percentage[i]) {
residual_capacity_[i] =
residual_capacity_[i - 1] + upstream_percentage[i] - local_percentage[i];
} else {
// Zone with index "i" does not have residual capacity, but we keep accumulating previous
// values to make search easier on the next step.
residual_capacity_[i] = residual_capacity_[i - 1];
}
}
};

bool LoadBalancerBase::earlyExitNonZoneRoutingRuntime() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would get rid of this function. Everything in here can either be done ahead of time (min cluster size, etc.) or can be moved into the tryChooseLocalZoneHosts() function

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i'll move runtime checks into tryChooseLocalZoneHosts(), we need access runtime values on each iteration if we are not in NoZoneRouting

// Global kill switch for zone aware routing.
if (!runtime_.snapshot().featureEnabled(RuntimeZoneEnabled, 100)) {
return true;
}

const std::vector<HostPtr>& local_zone_healthy_hosts = host_set_.healthyHostsPerZone()[0];
if (local_zone_healthy_hosts.empty()) {
ASSERT(local_host_set_ != nullptr);

if (local_host_set_->hosts().empty() || isGlobalPanic(*local_host_set_)) {
stats_.lb_local_cluster_not_ok_.inc();
return true;
}

Expand All @@ -31,10 +108,17 @@ bool LoadBalancerBase::earlyExitNonZoneRouting() {
return true;
}

// If local cluster is not set, or we are in panic mode for it.
if (local_host_set_ == nullptr || local_host_set_->hosts().empty() ||
isGlobalPanic(*local_host_set_)) {
stats_.lb_local_cluster_not_ok_.inc();
return false;
}

bool LoadBalancerBase::earlyExitNonZoneRouting() {
uint32_t number_of_zones = host_set_.healthyHostsPerZone().size();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

local var not needed

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed

if (number_of_zones < 2) {
return true;
}

const std::vector<HostPtr>& local_zone_healthy_hosts = host_set_.healthyHostsPerZone()[0];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

local var not needed

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed.

if (local_zone_healthy_hosts.empty()) {
return true;
}

Expand Down Expand Up @@ -83,79 +167,37 @@ const std::vector<HostPtr>& LoadBalancerBase::tryChooseLocalZoneHosts() {
ASSERT(number_of_zones >= 2U);
ASSERT(local_host_set_->healthyHostsPerZone().size() == host_set_.healthyHostsPerZone().size());

uint64_t local_percentage[number_of_zones];
calculateZonePercentage(local_host_set_->healthyHostsPerZone(), local_percentage);

uint64_t upstream_percentage[number_of_zones];
calculateZonePercentage(host_set_.healthyHostsPerZone(), upstream_percentage);

// Try to push all of the requests to the same zone first.
// If we have lower percent of hosts in the local cluster in the same zone,
// we can push all of the requests directly to upstream cluster in the same zone.
if (upstream_percentage[0] >= local_percentage[0]) {
if (route_directly_) {
stats_.lb_zone_routing_all_directly_.inc();
return host_set_.healthyHostsPerZone()[0];
}

// If we cannot route all requests to the same zone, calculate what percentage can be routed.
// For example, if local percentage is 20% and upstream is 10%
// we can route only 50% of requests directly.
uint64_t local_percent_route = upstream_percentage[0] * 10000 / local_percentage[0];
if (random_.random() % 10000 < local_percent_route) {
// If we cannot route all requests to the same zone, we already calculated how much we can
// push to the local zone, check if we can push to local zone on current iteration.
if (random_.random() % 10000 < local_percent_to_route_) {
stats_.lb_zone_routing_sampled_.inc();
return host_set_.healthyHostsPerZone()[0];
}

// At this point we must route cross zone as we cannot route to the local zone.
stats_.lb_zone_routing_cross_zone_.inc();

// Local zone does not have additional capacity (we have already routed what we could).
// Now we need to figure out how much traffic we can route cross zone and to which exact zone
// we should route. Percentage of requests routed cross zone to a specific zone needed be
// proportional to the residual capacity upstream zone has.
//
// residual_capacity contains capacity left in a given zone, we keep accumulating residual
// capacity to make search for sampled value easier.
// For example, if we have the following upstream and local percentage:
// local_percentage: 40000 40000 20000
// upstream_percentage: 25000 50000 25000
// Residual capacity would look like: 0 10000 5000. Now we need to sample proportionally to
// bucket sizes (residual capacity). For simplicity of finding where specific
// sampled value is, we accumulate values in residual capacity. This is what it will look like:
// residual_capacity: 0 10000 15000
// Now to find a zone to route (bucket) we could simply iterate over residual_capacity searching
// where sampled value is placed.
uint64_t residual_capacity[number_of_zones];

// Local zone (index 0) does not have residual capacity as we have routed all we could.
residual_capacity[0] = 0;
for (size_t i = 1; i < number_of_zones; ++i) {
// Only route to the zones that have additional capacity.
if (upstream_percentage[i] > local_percentage[i]) {
residual_capacity[i] =
residual_capacity[i - 1] + upstream_percentage[i] - local_percentage[i];
} else {
// Zone with index "i" does not have residual capacity, but we keep accumulating previous
// values to make search easier on the next step.
residual_capacity[i] = residual_capacity[i - 1];
}
}

// This is *extremely* unlikely but possible due to rounding errors when calculating
// zone percentages. In this case just select random zone.
if (residual_capacity[number_of_zones - 1] == 0) {
if (residual_capacity_[number_of_zones - 1] == 0) {
stats_.lb_zone_no_capacity_left_.inc();
return host_set_.healthyHostsPerZone()[random_.random() % number_of_zones];
}

// Random sampling to select specific zone for cross zone traffic based on the additional
// capacity in zones.
uint64_t threshold = random_.random() % residual_capacity[number_of_zones - 1];
uint64_t threshold = random_.random() % residual_capacity_[number_of_zones - 1];

// This potentially can be optimized to be O(log(N)) where N is the number of zones.
// Linear scan should be faster for smaller N, in most of the scenarios N will be small.
int i = 0;
while (threshold > residual_capacity[i]) {
while (threshold > residual_capacity_[i]) {
i++;
}

Expand All @@ -169,7 +211,7 @@ const std::vector<HostPtr>& LoadBalancerBase::hostsToUse() {
return host_set_.hosts();
}

if (earlyExitNonZoneRouting()) {
if (early_exit_zone_routing_ || earlyExitNonZoneRoutingRuntime()) {
return host_set_.healthyHosts();
}

Expand Down
22 changes: 18 additions & 4 deletions source/common/upstream/load_balancer_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@ namespace Upstream {
class LoadBalancerBase {
protected:
LoadBalancerBase(const HostSet& host_set, const HostSet* local_host_set, ClusterStats& stats,
Runtime::Loader& runtime, Runtime::RandomGenerator& random)
: stats_(stats), runtime_(runtime), random_(random), host_set_(host_set),
local_host_set_(local_host_set) {}
Runtime::Loader& runtime, Runtime::RandomGenerator& random);

/**
* Pick the host list to use (healthy or all depending on how many in the set are not healthy).
Expand All @@ -27,10 +25,16 @@ class LoadBalancerBase {

private:
/*
* @return decision on quick exit from zone aware host selection.
* @return decision on quick exit from zone aware routing based on cluster configuration
* or other non runtime params.
*/
bool earlyExitNonZoneRouting();

/*
* @return decision on quick exit from zone aware routing based on runtime params.
*/
bool earlyExitNonZoneRoutingRuntime();

/**
* For the given host_set it @return if we should be in a panic mode or not.
* For example, if majority of hosts are unhealthy we'll be likely in a panic mode.
Expand All @@ -51,8 +55,18 @@ class LoadBalancerBase {
void calculateZonePercentage(const std::vector<std::vector<HostPtr>>& hosts_per_zone,
uint64_t* ret);

/**
* Regenerate zone aware routing structures for fast decisions on upstream zone selection.
*/
void regenerateZoneRoutingStructures();

const HostSet& host_set_;
const HostSet* local_host_set_;

bool early_exit_zone_routing_;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: just initialize {true} here with the other initializers

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, early_exit_zone_routing_ and route_directly_ are mutually exclusive. Combine these into a state_ enum which has something like [NoZoneRouting, ZoneDirect, ZoneResidual] or something. You can have cleaner logic and better asserts this way.

uint64_t local_percent_to_route_{};
bool route_directly_{};
std::vector<uint64_t> residual_capacity_;
};

/**
Expand Down
5 changes: 4 additions & 1 deletion source/common/upstream/upstream_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,10 @@ typedef std::shared_ptr<const std::vector<std::vector<HostPtr>>> ConstHostListsP
*/
class HostSetImpl : public virtual HostSet {
public:
HostSetImpl() : hosts_(new std::vector<HostPtr>()), healthy_hosts_(new std::vector<HostPtr>()) {}
HostSetImpl()
: hosts_(new std::vector<HostPtr>()), healthy_hosts_(new std::vector<HostPtr>()),
hosts_per_zone_(new std::vector<std::vector<HostPtr>>()),
healthy_hosts_per_zone_(new std::vector<std::vector<HostPtr>>()) {}

ConstHostVectorPtr rawHosts() const { return hosts_; }
ConstHostVectorPtr rawHealthyHosts() const { return healthy_hosts_; }
Expand Down
11 changes: 6 additions & 5 deletions test/common/upstream/load_balancer_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,7 @@ TEST_F(RoundRobinLoadBalancerTest, NoZoneAwareDifferentZoneSize) {
.WillRepeatedly(Return(50));
EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100))
.WillRepeatedly(Return(true));
EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6))
.WillOnce(Return(1));

EXPECT_EQ(cluster_.healthy_hosts_[0], lb_->chooseHost());
EXPECT_EQ(1U, stats_.lb_zone_number_differs_.value());
}
Expand Down Expand Up @@ -257,7 +256,7 @@ TEST_F(RoundRobinLoadBalancerTest, LowPrecisionForDistribution) {
EXPECT_CALL(runtime_.snapshot_, featureEnabled("upstream.zone_routing.enabled", 100))
.WillRepeatedly(Return(true));
EXPECT_CALL(runtime_.snapshot_, getInteger("upstream.zone_routing.min_cluster_size", 6))
.WillOnce(Return(1));
.WillRepeatedly(Return(1));

// The following host distribution with current precision should lead to the no_capacity_left
// situation.
Expand All @@ -276,8 +275,6 @@ TEST_F(RoundRobinLoadBalancerTest, LowPrecisionForDistribution) {
current[i] = host;
}
local_hosts_per_zone->push_back(current);
local_cluster_hosts_->updateHosts(local_hosts, local_hosts, local_hosts_per_zone,
local_hosts_per_zone, empty_host_vector_, empty_host_vector_);

current.resize(44999);
for (int i = 0; i < 44999; ++i) {
Expand All @@ -293,6 +290,10 @@ TEST_F(RoundRobinLoadBalancerTest, LowPrecisionForDistribution) {

cluster_.healthy_hosts_per_zone_ = *upstream_hosts_per_zone;

// To trigger update callback.
local_cluster_hosts_->updateHosts(local_hosts, local_hosts, local_hosts_per_zone,
local_hosts_per_zone, empty_host_vector_, empty_host_vector_);

// Force request out of small zone and to randomly select zone.
EXPECT_CALL(random_, random()).WillOnce(Return(9999)).WillOnce(Return(2));
lb_->chooseHost();
Expand Down