Skip to content

Commit

Permalink
Add debug log for slow config updates for GRPC subscriptions (#14343)
Browse files Browse the repository at this point in the history
Risk Level: Low
Testing: 
Docs Changes: N/A
Release Notes: N/A

Signed-off-by: Adam Schaub <[email protected]>
  • Loading branch information
adamsjob authored Jan 22, 2021
1 parent 5bb39c0 commit f8fcf7f
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ Every cluster has a statistics tree rooted at *cluster.<name>.* with the followi
update_attempt, Counter, Total attempted cluster membership updates by service discovery
update_success, Counter, Total successful cluster membership updates by service discovery
update_failure, Counter, Total failed cluster membership updates by service discovery
update_duration, Histogram, Amount of time spent updating configs
update_empty, Counter, Total cluster membership updates ending with empty cluster load assignment and continuing with previous config
update_no_rebuild, Counter, Total successful cluster membership updates that didn't result in any cluster load balancing structure rebuilds
version, Gauge, Hash of the contents from the last successful API fetch
Expand Down
5 changes: 3 additions & 2 deletions include/envoy/config/subscription.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,22 +201,23 @@ using SubscriptionPtr = std::unique_ptr<Subscription>;
/**
* Per subscription stats. @see stats_macros.h
*/
#define ALL_SUBSCRIPTION_STATS(COUNTER, GAUGE, TEXT_READOUT) \
#define ALL_SUBSCRIPTION_STATS(COUNTER, GAUGE, TEXT_READOUT, HISTOGRAM) \
COUNTER(init_fetch_timeout) \
COUNTER(update_attempt) \
COUNTER(update_failure) \
COUNTER(update_rejected) \
COUNTER(update_success) \
GAUGE(update_time, NeverImport) \
GAUGE(version, NeverImport) \
HISTOGRAM(update_duration, Milliseconds) \
TEXT_READOUT(version_text)

/**
* Struct definition for per subscription stats. @see stats_macros.h
*/
struct SubscriptionStats {
ALL_SUBSCRIPTION_STATS(GENERATE_COUNTER_STRUCT, GENERATE_GAUGE_STRUCT,
GENERATE_TEXT_READOUT_STRUCT)
GENERATE_TEXT_READOUT_STRUCT, GENERATE_HISTOGRAM_STRUCT)
};

} // namespace Config
Expand Down
17 changes: 17 additions & 0 deletions source/common/config/grpc_subscription_impl.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include "common/config/grpc_subscription_impl.h"

#include <chrono>

#include "common/common/assert.h"
#include "common/common/logger.h"
#include "common/common/utility.h"
Expand All @@ -12,6 +14,8 @@
namespace Envoy {
namespace Config {

constexpr std::chrono::milliseconds UpdateDurationLogThreshold = std::chrono::milliseconds(50);

GrpcSubscriptionImpl::GrpcSubscriptionImpl(GrpcMuxSharedPtr grpc_mux,
SubscriptionCallbacks& callbacks,
OpaqueResourceDecoder& resource_decoder,
Expand Down Expand Up @@ -69,14 +73,23 @@ void GrpcSubscriptionImpl::onConfigUpdate(const std::vector<Config::DecodedResou
// supply those versions to onConfigUpdate() along with the xDS response ("system")
// version_info. This way, both types of versions can be tracked and exposed for debugging by
// the configuration update targets.
auto start = dispatcher_.timeSource().monotonicTime();
callbacks_.onConfigUpdate(resources, version_info);
std::chrono::milliseconds update_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
dispatcher_.timeSource().monotonicTime() - start);
stats_.update_success_.inc();
stats_.update_attempt_.inc();
stats_.update_time_.set(DateUtil::nowToMilliseconds(dispatcher_.timeSource()));
stats_.version_.set(HashUtil::xxHash64(version_info));
stats_.version_text_.set(version_info);
stats_.update_duration_.recordValue(update_duration.count());
ENVOY_LOG(debug, "gRPC config for {} accepted with {} resources with version {}", type_url_,
resources.size(), version_info);

if (update_duration > UpdateDurationLogThreshold) {
ENVOY_LOG(debug, "gRPC config update took {} ms! Resources names: {}", update_duration.count(),
absl::StrJoin(resources, ",", ResourceNameFormatter()));
}
}

void GrpcSubscriptionImpl::onConfigUpdate(
Expand All @@ -85,11 +98,15 @@ void GrpcSubscriptionImpl::onConfigUpdate(
const std::string& system_version_info) {
disableInitFetchTimeoutTimer();
stats_.update_attempt_.inc();
auto start = dispatcher_.timeSource().monotonicTime();
callbacks_.onConfigUpdate(added_resources, removed_resources, system_version_info);
std::chrono::milliseconds update_duration = std::chrono::duration_cast<std::chrono::milliseconds>(
dispatcher_.timeSource().monotonicTime() - start);
stats_.update_success_.inc();
stats_.update_time_.set(DateUtil::nowToMilliseconds(dispatcher_.timeSource()));
stats_.version_.set(HashUtil::xxHash64(system_version_info));
stats_.version_text_.set(system_version_info);
stats_.update_duration_.recordValue(update_duration.count());
}

void GrpcSubscriptionImpl::onConfigUpdateFailed(ConfigUpdateFailureReason reason,
Expand Down
7 changes: 7 additions & 0 deletions source/common/config/grpc_subscription_impl.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include <chrono>
#include <memory>

#include "envoy/config/grpc_mux.h"
Expand Down Expand Up @@ -59,6 +60,12 @@ class GrpcSubscriptionImpl : public Subscription,
Event::TimerPtr init_fetch_timeout_timer_;
const bool is_aggregated_;
const bool use_namespace_matching_;

struct ResourceNameFormatter {
void operator()(std::string* out, const Config::DecodedResourceRef& resource) {
out->append(resource.get().name());
}
};
};

using GrpcSubscriptionImplPtr = std::unique_ptr<GrpcSubscriptionImpl>;
Expand Down
5 changes: 3 additions & 2 deletions source/common/config/utility.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "envoy/server/filter_config.h"
#include "envoy/stats/histogram.h"
#include "envoy/stats/scope.h"
#include "envoy/stats/stats_macros.h"
#include "envoy/stats/stats_matcher.h"
#include "envoy/stats/tag_producer.h"
#include "envoy/upstream/cluster_manager.h"
Expand Down Expand Up @@ -238,8 +239,8 @@ class Utility {
* @return SubscriptionStats for scope.
*/
static SubscriptionStats generateStats(Stats::Scope& scope) {
return {
ALL_SUBSCRIPTION_STATS(POOL_COUNTER(scope), POOL_GAUGE(scope), POOL_TEXT_READOUT(scope))};
return {ALL_SUBSCRIPTION_STATS(POOL_COUNTER(scope), POOL_GAUGE(scope), POOL_TEXT_READOUT(scope),
POOL_HISTOGRAM(scope))};
}

/**
Expand Down

0 comments on commit f8fcf7f

Please sign in to comment.