Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

upstream: force a full rebuild on host weight changes #14569

Merged
merged 4 commits into from
Jan 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/root/version_history/current.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ Minor Behavior Changes
----------------------
*Changes that may cause incompatibilities for some users, but should not for most*

* upstream: host weight changes now cause a full load balancer rebuild as opposed to happening
atomically inline. This change has been made to support load balancer pre-computation of data
structures based on host weight, but may have performance implications if host weight changes
are very frequent. This change can be disabled by setting the `envoy.reloadable_features.upstream_host_weight_change_causes_rebuild`
feature flag to false. If setting this flag to false is required in a deployment please open an
issue against the project.

Bug Fixes
---------
*Changes expected to improve the state of the world and are unlikely to have negative effects*
Expand Down
1 change: 1 addition & 0 deletions source/common/runtime/runtime_features.cc
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ constexpr const char* runtime_features[] = {
"envoy.reloadable_features.stop_faking_paths",
"envoy.reloadable_features.strict_1xx_and_204_response_headers",
"envoy.reloadable_features.tls_use_io_handle_bio",
"envoy.reloadable_features.upstream_host_weight_change_causes_rebuild",
"envoy.reloadable_features.vhds_heartbeats",
"envoy.reloadable_features.unify_grpc_handling",
"envoy.restart_features.use_apple_api_for_dns_lookups",
Expand Down
11 changes: 5 additions & 6 deletions source/common/upstream/eds.cc
Original file line number Diff line number Diff line change
Expand Up @@ -247,12 +247,11 @@ bool EdsClusterImpl::updateHostsPerLocality(
// is responsible for both determining whether there was a change and to perform the actual update
// to current_hosts_copy, so it must be called even if we know that we need to update (e.g. if the
// overprovisioning factor changes).
// TODO(htuch): We eagerly update all the host sets here on weight changes, which isn't great,
// since this has the knock on effect that we rebuild the load balancers and locality scheduler.
// We could make this happen lazily, as we do for host-level weight updates, where as things age
// out of the locality scheduler, we discover their new weights. We don't currently have a shared
// object for locality weights that we can update here, we should add something like this to
// improve performance and scalability of locality weight updates.
//
// TODO(htuch): We eagerly update all the host sets here on weight changes, which may have
// performance implications, since this has the knock on effect that we rebuild the load balancers
// and locality scheduler. See the comment in BaseDynamicClusterImpl::updateDynamicHostList
// about this. In the future we may need to do better here.
const bool hosts_updated = updateDynamicHostList(new_hosts, *current_hosts_copy, hosts_added,
hosts_removed, updated_hosts, all_hosts_);
if (hosts_updated || host_set.overprovisioningFactor() != overprovisioning_factor ||
Expand Down
12 changes: 11 additions & 1 deletion source/common/upstream/upstream_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1450,6 +1450,17 @@ bool BaseDynamicClusterImpl::updateDynamicHostList(const HostVector& new_hosts,
if (host->weight() > max_host_weight) {
max_host_weight = host->weight();
}
if (existing_host->second->weight() != host->weight()) {
existing_host->second->weight(host->weight());
if (Runtime::runtimeFeatureEnabled(
"envoy.reloadable_features.upstream_host_weight_change_causes_rebuild")) {
// We do full host set rebuilds so that load balancers can do pre-computation of data
// structures based on host weight. This may become a performance problem in certain
// deployments so it is runtime feature guarded and may also need to be configurable
// and/or dynamic in the future.
hosts_changed = true;
}
}

hosts_changed |=
updateHealthFlag(*host, *existing_host->second, Host::HealthFlag::FAILED_EDS_HEALTH);
Expand Down Expand Up @@ -1485,7 +1496,6 @@ bool BaseDynamicClusterImpl::updateDynamicHostList(const HostVector& new_hosts,
hosts_added_to_current_priority.emplace_back(existing_host->second);
}

existing_host->second->weight(host->weight());
final_hosts.push_back(existing_host->second);
updated_hosts[existing_host->second->address()->asString()] = existing_host->second;
} else {
Expand Down
1 change: 1 addition & 0 deletions test/common/upstream/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ envoy_cc_test(
"//test/mocks/ssl:ssl_mocks",
"//test/mocks/upstream:cluster_manager_mocks",
"//test/mocks/upstream:health_checker_mocks",
"//test/test_common:test_runtime_lib",
"//test/test_common:utility_lib",
"@envoy_api//envoy/config/cluster/v3:pkg_cc_proto",
"@envoy_api//envoy/config/core/v3:pkg_cc_proto",
Expand Down
53 changes: 52 additions & 1 deletion test/common/upstream/eds_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "test/mocks/ssl/mocks.h"
#include "test/mocks/upstream/cluster_manager.h"
#include "test/mocks/upstream/health_checker.h"
#include "test/test_common/test_runtime.h"
#include "test/test_common/utility.h"

#include "gmock/gmock.h"
Expand All @@ -35,7 +36,7 @@ namespace Upstream {
namespace {

class EdsTest : public testing::Test {
protected:
public:
EdsTest() : api_(Api::createApiForTest(stats_)) { resetCluster(); }

void resetCluster() {
Expand Down Expand Up @@ -324,6 +325,56 @@ TEST_F(EdsTest, EdsClusterFromFileIsPrimaryCluster) {
EXPECT_TRUE(initialized_);
}

namespace {

void endpointWeightChangeCausesRebuildTest(EdsTest& test, bool expect_rebuild) {
envoy::config::endpoint::v3::ClusterLoadAssignment cluster_load_assignment;
cluster_load_assignment.set_cluster_name("fare");
auto* endpoints = cluster_load_assignment.add_endpoints();
auto* endpoint = endpoints->add_lb_endpoints();
endpoint->mutable_endpoint()->mutable_address()->mutable_socket_address()->set_address("1.2.3.4");
endpoint->mutable_endpoint()->mutable_address()->mutable_socket_address()->set_port_value(80);
endpoint->mutable_load_balancing_weight()->set_value(30);

test.initialize();
test.doOnConfigUpdateVerifyNoThrow(cluster_load_assignment);
EXPECT_TRUE(test.initialized_);
EXPECT_EQ(0UL, test.stats_.counter("cluster.name.update_no_rebuild").value());
EXPECT_EQ(30UL,
test.stats_.gauge("cluster.name.max_host_weight", Stats::Gauge::ImportMode::Accumulate)
.value());
auto& hosts = test.cluster_->prioritySet().hostSetsPerPriority()[0]->hosts();
EXPECT_EQ(hosts.size(), 1);
EXPECT_EQ(hosts[0]->weight(), 30);

endpoint->mutable_load_balancing_weight()->set_value(31);
test.doOnConfigUpdateVerifyNoThrow(cluster_load_assignment);
EXPECT_EQ(expect_rebuild ? 0UL : 1UL,
test.stats_.counter("cluster.name.update_no_rebuild").value());
EXPECT_EQ(31UL,
test.stats_.gauge("cluster.name.max_host_weight", Stats::Gauge::ImportMode::Accumulate)
.value());
auto& new_hosts = test.cluster_->prioritySet().hostSetsPerPriority()[0]->hosts();
EXPECT_EQ(new_hosts.size(), 1);
EXPECT_EQ(new_hosts[0]->weight(), 31);
}

} // namespace

// Verify that host weight changes cause a full rebuild.
TEST_F(EdsTest, EndpointWeightChangeCausesRebuild) {
endpointWeightChangeCausesRebuildTest(*this, true);
}

// Verify that host weight changes do not cause a full rebuild when the feature flag is disabled.
TEST_F(EdsTest, EndpointWeightChangeCausesRebuildDisabled) {
TestScopedRuntime scoped_runtime;
Runtime::LoaderSingleton::getExisting()->mergeValues(
{{"envoy.reloadable_features.upstream_host_weight_change_causes_rebuild", "false"}});

endpointWeightChangeCausesRebuildTest(*this, false);
}

// Validate that onConfigUpdate() updates the endpoint metadata.
TEST_F(EdsTest, EndpointMetadata) {
envoy::config::endpoint::v3::ClusterLoadAssignment cluster_load_assignment;
Expand Down