Skip to content

Commit

Permalink
Apply review comments
Browse files Browse the repository at this point in the history
Signed-off-by: Kateryna Nezdolii <[email protected]>
  • Loading branch information
Kateryna Nezdolii committed Sep 28, 2021
1 parent 3ed1ff4 commit e4a3c84
Show file tree
Hide file tree
Showing 15 changed files with 19 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ With no slow start enabled Envoy would send a proportional amount of traffic to
This could be undesirable for services that require warm up time to serve full production load and could result in request timeouts, loss of data and deteriorated user experience.

Slow start mode is a mechanism that affects load balancing weight of upstream endpoints and can be configured per upstream cluster.
Currently, slow start is supported in Round Robin and Least Request load balancer types.
Currently, slow start is supported in :ref:`Round Robin <envoy_v3_api_field_config.cluster.v3.Cluster.RoundRobinLbConfig.slow_start_config>` and :ref:`Least Request <envoy_v3_api_field_config.cluster.v3.Cluster.LeastRequestLbConfig.slow_start_config>` load balancer types.

Users can specify a :ref:`slow start window parameter<envoy_v3_api_field_config.cluster.v3.Cluster.SlowStartConfig.slow_start_window>` (in seconds), so that if endpoint "cluster membership duration" (amount of time since it has joined the cluster) is within the configured window, it enters slow start mode.
During slow start window, load balancing weight of a particular endpoint will be scaled with time factor, e.g.:
Expand Down
3 changes: 3 additions & 0 deletions docs/root/version_history/current.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ New Features
* router: added an optional :ref:`override_auto_sni_header <envoy_v3_api_field_config.core.v3.UpstreamHttpProtocolOptions.override_auto_sni_header>` to support setting SNI value from an arbitrary header other than host/authority.
* sxg_filter: added filter to transform response to SXG package to :ref:`contrib images <install_contrib>`. This can be enabled by setting :ref:`SXG <envoy_v3_api_msg_extensions.filters.http.sxg.v3alpha.SXG>` configuration.
* thrift_proxy: added support for :ref:`mirroring requests <envoy_v3_api_field_extensions.filters.network.thrift_proxy.v3.RouteAction.request_mirror_policies>`.
* upstream: added support for :ref:`slow start mode <arch_overview_load_balancing_slow_start>`, which allows to progresively increase traffic for new endpoints.
* upstream: extended :ref:`Round Robin load balancer configuration <envoy_v3_api_field_config.cluster.v3.Cluster.round_robin_lb_config>` with :ref:`slow start <envoy_v3_api_field_config.cluster.v3.Cluster.RoundRobinLbConfig.slow_start_config>` support.
* upstream: extended :ref:`Least Request load balancer configuration <envoy_v3_api_field_config.cluster.v3.Cluster.least_request_lb_config>` with :ref:`slow start <envoy_v3_api_field_config.cluster.v3.Cluster.LeastRequestLbConfig.slow_start_config>` support.

Deprecated
----------
Expand Down
14 changes: 8 additions & 6 deletions source/common/upstream/load_balancer_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -769,9 +769,7 @@ EdfLoadBalancerBase::EdfLoadBalancerBase(
slow_start_config.has_value() && slow_start_config.value().has_aggression()
? absl::optional<Runtime::Double>({slow_start_config.value().aggression(), runtime})
: absl::nullopt),
time_source_(time_source),
slow_start_enabled_(slow_start_window_ > std::chrono::milliseconds(0)),
latest_host_added_time_(time_source_.monotonicTime()) {
time_source_(time_source), latest_host_added_time_(time_source_.monotonicTime()) {
// We fully recompute the schedulers for a given host set here on membership change, which is
// consistent with what other LB implementations do (e.g. thread aware).
// The downside of a full recompute is that time complexity is O(n * log n),
Expand All @@ -781,7 +779,7 @@ EdfLoadBalancerBase::EdfLoadBalancerBase(
[this](uint32_t priority, const HostVector&, const HostVector&) { refresh(priority); });
member_update_cb_ = priority_set.addMemberUpdateCb(
[this](const HostVector& hosts_added, const HostVector&) -> void {
if (slow_start_enabled_) {
if (isSlowStartEnabled()) {
recalculateHostsInSlowStart(hosts_added);
}
});
Expand Down Expand Up @@ -813,7 +811,7 @@ void EdfLoadBalancerBase::refresh(uint32_t priority) {
// Nuke existing scheduler if it exists.
auto& scheduler = scheduler_[source] = Scheduler{};
refreshHostSource(source);
if (slow_start_enabled_) {
if (isSlowStartEnabled()) {
recalculateHostsInSlowStart(hosts);
}

Expand Down Expand Up @@ -871,8 +869,12 @@ void EdfLoadBalancerBase::refresh(uint32_t priority) {
}
}

bool EdfLoadBalancerBase::isSlowStartEnabled() {
return slow_start_window_ > std::chrono::milliseconds(0);
}

bool EdfLoadBalancerBase::noHostsAreInSlowStart() {
if (!slow_start_enabled_) {
if (!isSlowStartEnabled()) {
return true;
}
auto current_time = time_source_.monotonicTime();
Expand Down
2 changes: 1 addition & 1 deletion source/common/upstream/load_balancer_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ class EdfLoadBalancerBase : public ZoneAwareLoadBalancerBase,

virtual void refresh(uint32_t priority);

bool isSlowStartEnabled();
bool noHostsAreInSlowStart();

virtual void recalculateHostsInSlowStart(const HostVector& hosts_added);
Expand Down Expand Up @@ -447,7 +448,6 @@ class EdfLoadBalancerBase : public ZoneAwareLoadBalancerBase,
double aggression_{1.0};
const absl::optional<Runtime::Double> aggression_runtime_;
TimeSource& time_source_;
bool slow_start_enabled_;
MonotonicTime latest_host_added_time_;
};

Expand Down
3 changes: 1 addition & 2 deletions source/common/upstream/logical_dns_cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ LogicalDnsCluster::LogicalDnsCluster(
resolve_timer_(
factory_context.dispatcher().createTimer([this]() -> void { startResolve(); })),
local_info_(factory_context.localInfo()),
load_assignment_(convertPriority(cluster.load_assignment())),
time_source_(factory_context.dispatcher().timeSource()) {
load_assignment_(convertPriority(cluster.load_assignment())) {
failure_backoff_strategy_ =
Config::Utility::prepareDnsRefreshStrategy<envoy::config::cluster::v3::Cluster>(
cluster, dns_refresh_rate_ms_.count(), factory_context.api().randomGenerator());
Expand Down
1 change: 0 additions & 1 deletion source/common/upstream/logical_dns_cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ class LogicalDnsCluster : public ClusterImplBase {
Network::ActiveDnsQuery* active_dns_query_{};
const LocalInfo::LocalInfo& local_info_;
const envoy::config::endpoint::v3::ClusterLoadAssignment load_assignment_;
TimeSource& time_source_;
};

class LogicalDnsClusterFactory : public ClusterFactoryImplBase {
Expand Down
3 changes: 1 addition & 2 deletions source/common/upstream/original_dst_cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,7 @@ OriginalDstCluster::OriginalDstCluster(
use_http_header_(info_->lbOriginalDstConfig()
? info_->lbOriginalDstConfig().value().use_http_header()
: false),
host_map_(std::make_shared<HostMap>()),
time_source_(factory_context.dispatcher().timeSource()) {
host_map_(std::make_shared<HostMap>()) {
if (config.has_load_assignment()) {
throw EnvoyException("ORIGINAL_DST clusters must have no load assignment configured");
}
Expand Down
2 changes: 0 additions & 2 deletions source/common/upstream/original_dst_cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,6 @@ class OriginalDstCluster : public ClusterImplBase {
absl::Mutex host_map_lock_;
HostMapConstSharedPtr host_map_ ABSL_GUARDED_BY(host_map_lock_);

TimeSource& time_source_;

friend class OriginalDstClusterFactory;
};

Expand Down
3 changes: 1 addition & 2 deletions source/common/upstream/strict_dns_cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ StrictDnsClusterImpl::StrictDnsClusterImpl(
dns_resolver_(dns_resolver),
dns_refresh_rate_ms_(
std::chrono::milliseconds(PROTOBUF_GET_MS_OR_DEFAULT(cluster, dns_refresh_rate, 5000))),
respect_dns_ttl_(cluster.respect_dns_ttl()),
time_source_(factory_context.dispatcher().timeSource()) {
respect_dns_ttl_(cluster.respect_dns_ttl()) {
failure_backoff_strategy_ =
Config::Utility::prepareDnsRefreshStrategy<envoy::config::cluster::v3::Cluster>(
cluster, dns_refresh_rate_ms_.count(), factory_context.api().randomGenerator());
Expand Down
1 change: 0 additions & 1 deletion source/common/upstream/strict_dns_cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ class StrictDnsClusterImpl : public BaseDynamicClusterImpl {
const bool respect_dns_ttl_;
Network::DnsLookupFamily dns_lookup_family_;
uint32_t overprovisioning_factor_;
TimeSource& time_source_;
};

/**
Expand Down
3 changes: 1 addition & 2 deletions source/extensions/clusters/dynamic_forward_proxy/cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ Cluster::Cluster(
added_via_api, factory_context.dispatcher().timeSource()),
dns_cache_manager_(cache_manager_factory.get()),
dns_cache_(dns_cache_manager_->getCache(config.dns_cache_config())),
update_callbacks_handle_(dns_cache_->addUpdateCallbacks(*this)), local_info_(local_info),
time_source_(factory_context.dispatcher().timeSource()) {}
update_callbacks_handle_(dns_cache_->addUpdateCallbacks(*this)), local_info_(local_info) {}

void Cluster::startPreInit() {
// If we are attaching to a pre-populated cache we need to initialize our hosts.
Expand Down
2 changes: 0 additions & 2 deletions source/extensions/clusters/dynamic_forward_proxy/cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,6 @@ class Cluster : public Upstream::BaseDynamicClusterImpl,

friend class ClusterFactory;
friend class ClusterTest;

TimeSource& time_source_;
};

class ClusterFactory : public Upstream::ConfigurableClusterFactoryBase<
Expand Down
6 changes: 2 additions & 4 deletions source/extensions/clusters/redis/redis_cluster.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,9 @@ RedisCluster::RedisCluster(
factory_context.clusterManager(), factory_context.api().timeSource())),
registration_handle_(refresh_manager_->registerCluster(
cluster_name_, redirect_refresh_interval_, redirect_refresh_threshold_,
failure_refresh_threshold_, host_degraded_refresh_threshold_,
[&]() {
failure_refresh_threshold_, host_degraded_refresh_threshold_, [&]() {
redis_discovery_session_.resolve_timer_->enableTimer(std::chrono::milliseconds(0));
})),
time_source_(dispatcher_.timeSource()) {
})) {
const auto& locality_lb_endpoints = load_assignment_.endpoints();
for (const auto& locality_lb_endpoint : locality_lb_endpoints) {
for (const auto& lb_endpoint : locality_lb_endpoint.lb_endpoints()) {
Expand Down
2 changes: 0 additions & 2 deletions source/extensions/clusters/redis/redis_cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,6 @@ class RedisCluster : public Upstream::BaseDynamicClusterImpl {
const std::string cluster_name_;
const Common::Redis::ClusterRefreshManagerSharedPtr refresh_manager_;
const Common::Redis::ClusterRefreshManager::HandlePtr registration_handle_;

TimeSource& time_source_;
};

class RedisClusterFactory : public Upstream::ConfigurableClusterFactoryBase<
Expand Down
8 changes: 0 additions & 8 deletions test/common/upstream/load_balancer_impl_test.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#include <functional>
#include <memory>
#include <set>
#include <string>
Expand Down Expand Up @@ -40,7 +39,6 @@ class EdfLoadBalancerBasePeer {
return edf_lb.slow_start_window_;
}
static double aggression(EdfLoadBalancerBase& edf_lb) { return edf_lb.aggression_; }
static bool isSlowStartEnabled(EdfLoadBalancerBase& edf_lb) { return edf_lb.slow_start_enabled_; }
static const std::chrono::milliseconds latestHostAddedTime(EdfLoadBalancerBase& edf_lb) {
return std::chrono::time_point_cast<std::chrono::milliseconds>(edf_lb.latest_host_added_time_)
.time_since_epoch();
Expand Down Expand Up @@ -1586,9 +1584,6 @@ TEST_P(RoundRobinLoadBalancerTest, SlowStartWithDefaultParams) {
const auto aggression =
EdfLoadBalancerBasePeer::aggression(static_cast<EdfLoadBalancerBase&>(*lb_));
EXPECT_EQ(1.0, aggression);
const auto slow_start_enabled_ =
EdfLoadBalancerBasePeer::isSlowStartEnabled(static_cast<EdfLoadBalancerBase&>(*lb_));
EXPECT_EQ(false, slow_start_enabled_);
const auto latest_host_added_time =
EdfLoadBalancerBasePeer::latestHostAddedTime(static_cast<EdfLoadBalancerBase&>(*lb_));
EXPECT_EQ(std::chrono::milliseconds(0), latest_host_added_time);
Expand Down Expand Up @@ -2143,9 +2138,6 @@ TEST_P(LeastRequestLoadBalancerTest, SlowStartWithDefaultParams) {
const auto aggression =
EdfLoadBalancerBasePeer::aggression(static_cast<EdfLoadBalancerBase&>(lb_2));
EXPECT_EQ(1.0, aggression);
const auto slow_start_enabled_ =
EdfLoadBalancerBasePeer::isSlowStartEnabled(static_cast<EdfLoadBalancerBase&>(lb_2));
EXPECT_EQ(false, slow_start_enabled_);
const auto latest_host_added_time =
EdfLoadBalancerBasePeer::latestHostAddedTime(static_cast<EdfLoadBalancerBase&>(lb_2));
EXPECT_EQ(std::chrono::milliseconds(0), latest_host_added_time);
Expand Down

0 comments on commit e4a3c84

Please sign in to comment.