Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Per zone rq (total, timing, per response code, etc) stats for upstream cluster. #36

Merged
merged 10 commits into from
Aug 24, 2016
17 changes: 17 additions & 0 deletions docs/configuration/cluster_manager/cluster_stats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,20 @@ If alternate tree statistics are configured, they will be present in the
*cluster.<name>.<alt name>.* namespace. The statistics produced are the same as documented in
the dynamic HTTP statistics section :ref:`above
<config_cluster_manager_cluster_stats_dynamic_http>`.

.. _config_cluster_manager_cluster_per_az_stats:

Per service zone dynamic HTTP statistics
----------------------------------------

If the service zone is available for the local service (via :option:`--service-zone`)
and the :ref:`upstream cluster <arch_overview_service_discovery_sds>`,
Envoy will track the following statistics in *cluster.<name>.zone.<from_zone>.<to_zone>.* namespace.

.. csv-table::
:header: Name, Type, Description
:widths: 1, 1, 2

upstream_rq_<\*xx>, Counter, "Aggregate HTTP response codes (e.g., 2xx, 3xx, etc.)"
upstream_rq_<\*>, Counter, "Specific HTTP response codes (e.g., 201, 302, etc.)"
upstream_rq_time, Timer, Request time milliseconds
19 changes: 13 additions & 6 deletions source/common/http/async_client_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ const HeaderMapImpl AsyncRequestImpl::REQUEST_TIMEOUT_HEADER{

AsyncClientImpl::AsyncClientImpl(const Upstream::Cluster& cluster,
AsyncClientConnPoolFactory& factory, Stats::Store& stats_store,
Event::Dispatcher& dispatcher)
Event::Dispatcher& dispatcher, const std::string& local_zone_name)
: cluster_(cluster), factory_(factory), stats_store_(stats_store), dispatcher_(dispatcher),
stat_prefix_(fmt::format("cluster.{}.", cluster.name())) {}
local_zone_name_(local_zone_name), stat_prefix_(fmt::format("cluster.{}.", cluster.name())) {}

AsyncClientImpl::~AsyncClientImpl() { ASSERT(active_requests_.empty()); }

Expand Down Expand Up @@ -62,6 +62,10 @@ AsyncRequestImpl::AsyncRequestImpl(MessagePtr&& request, AsyncClientImpl& parent

AsyncRequestImpl::~AsyncRequestImpl() { ASSERT(!stream_encoder_); }

const std::string& AsyncRequestImpl::upstreamZone() {
return upstream_host_ ? upstream_host_->zone() : EMPTY_STRING;
}

void AsyncRequestImpl::cancel() {
ASSERT(stream_encoder_);
stream_encoder_->resetStream();
Expand All @@ -77,7 +81,8 @@ void AsyncRequestImpl::decodeHeaders(HeaderMapPtr&& headers, bool end_stream) {
#endif

CodeUtility::ResponseStatInfo info{parent_.stats_store_, parent_.stat_prefix_,
response_->headers(), true, EMPTY_STRING, EMPTY_STRING};
response_->headers(), true, EMPTY_STRING, EMPTY_STRING,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

az stats should be wired up for async client. You have the host info.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

parent_.local_zone_name_, upstreamZone()};
CodeUtility::chargeResponseStat(info);

if (end_stream) {
Expand Down Expand Up @@ -115,7 +120,7 @@ void AsyncRequestImpl::onComplete() {
CodeUtility::ResponseTimingInfo info{
parent_.stats_store_, parent_.stat_prefix_, stream_encoder_->requestCompleteTime(),
response_->headers().get(Headers::get().EnvoyUpstreamCanary) == "true", true, EMPTY_STRING,
EMPTY_STRING};
EMPTY_STRING, parent_.local_zone_name_, upstreamZone()};
CodeUtility::chargeResponseTiming(info);

callbacks_.onSuccess(std::move(response_));
Expand All @@ -124,15 +129,17 @@ void AsyncRequestImpl::onComplete() {

void AsyncRequestImpl::onResetStream(StreamResetReason) {
CodeUtility::ResponseStatInfo info{parent_.stats_store_, parent_.stat_prefix_,
SERVICE_UNAVAILABLE_HEADER, true, EMPTY_STRING, EMPTY_STRING};
SERVICE_UNAVAILABLE_HEADER, true, EMPTY_STRING, EMPTY_STRING,
parent_.local_zone_name_, upstreamZone()};
CodeUtility::chargeResponseStat(info);
callbacks_.onFailure(AsyncClient::FailureReason::Reset);
cleanup();
}

void AsyncRequestImpl::onRequestTimeout() {
CodeUtility::ResponseStatInfo info{parent_.stats_store_, parent_.stat_prefix_,
REQUEST_TIMEOUT_HEADER, true, EMPTY_STRING, EMPTY_STRING};
REQUEST_TIMEOUT_HEADER, true, EMPTY_STRING, EMPTY_STRING,
parent_.local_zone_name_, upstreamZone()};
CodeUtility::chargeResponseStat(info);
parent_.cluster_.stats().upstream_rq_timeout_.inc();
stream_encoder_->resetStream();
Expand Down
11 changes: 9 additions & 2 deletions source/common/http/async_client_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ class AsyncRequestImpl;
class AsyncClientImpl final : public AsyncClient {
public:
AsyncClientImpl(const Upstream::Cluster& cluster, AsyncClientConnPoolFactory& factory,
Stats::Store& stats_store, Event::Dispatcher& dispatcher);
Stats::Store& stats_store, Event::Dispatcher& dispatcher,
const std::string& local_zone_name);
~AsyncClientImpl();

// Http::AsyncClient
Expand All @@ -45,6 +46,7 @@ class AsyncClientImpl final : public AsyncClient {
AsyncClientConnPoolFactory& factory_;
Stats::Store& stats_store_;
Event::Dispatcher& dispatcher_;
const std::string local_zone_name_;
const std::string stat_prefix_;
std::list<std::unique_ptr<AsyncRequestImpl>> active_requests_;

Expand All @@ -71,6 +73,8 @@ class AsyncRequestImpl final : public AsyncClient::Request,
void cancel() override;

private:
const std::string& upstreamZone();

// Http::StreamDecoder
void decodeHeaders(HeaderMapPtr&& headers, bool end_stream) override;
void decodeData(const Buffer::Instance& data, bool end_stream) override;
Expand All @@ -80,7 +84,9 @@ class AsyncRequestImpl final : public AsyncClient::Request,
void onResetStream(StreamResetReason reason) override;

// Http::PooledStreamEncoderCallbacks
void onUpstreamHostSelected(Upstream::HostDescriptionPtr) override {}
void onUpstreamHostSelected(Upstream::HostDescriptionPtr upstream_host) override {
upstream_host_ = upstream_host;
}

void onComplete();

Expand All @@ -93,6 +99,7 @@ class AsyncRequestImpl final : public AsyncClient::Request,
Event::TimerPtr request_timeout_;
std::unique_ptr<MessageImpl> response_;
PooledStreamEncoderPtr stream_encoder_;
Upstream::HostDescriptionPtr upstream_host_;

static const HeaderMapImpl SERVICE_UNAVAILABLE_HEADER;
static const HeaderMapImpl REQUEST_TIMEOUT_HEADER;
Expand Down
15 changes: 15 additions & 0 deletions source/common/http/codes.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ void CodeUtility::chargeResponseStat(const ResponseStatInfo& info) {
info.store_.counter(fmt::format("vhost.{}.vcluster.{}.upstream_rq_{}", info.request_vhost_name_,
info.request_vcluster_name_, response_code)).inc();
}

// Handle per zone stats.
if (!info.from_zone_.empty() && !info.to_zone_.empty()) {
info.store_.counter(fmt::format("{}zone.{}.{}.upstream_rq_{}", info.prefix_, info.from_zone_,
info.to_zone_, group_string)).inc();
info.store_.counter(fmt::format("{}zone.{}.{}.upstream_rq_{}", info.prefix_, info.from_zone_,
info.to_zone_, response_code)).inc();
}
}

void CodeUtility::chargeResponseTiming(const ResponseTimingInfo& info) {
Expand All @@ -71,6 +79,13 @@ void CodeUtility::chargeResponseTiming(const ResponseTimingInfo& info) {
info.request_vcluster_name_ + ".upstream_rq_time",
ms);
}

// Handle per zone stats.
if (!info.from_zone_.empty() && !info.to_zone_.empty()) {
info.store_.deliverTimingToSinks(fmt::format("{}zone.{}.{}.upstream_rq_time", info.prefix_,
info.from_zone_, info.to_zone_),
ms);
}
}
}

Expand Down
4 changes: 4 additions & 0 deletions source/common/http/codes.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ class CodeUtility {
bool internal_request_;
const std::string& request_vhost_name_;
const std::string& request_vcluster_name_;
const std::string& from_zone_;
const std::string& to_zone_;
};

/**
Expand All @@ -42,6 +44,8 @@ class CodeUtility {
bool internal_request_;
const std::string& request_vhost_name_;
const std::string& request_vcluster_name_;
const std::string& from_zone_;
const std::string& to_zone_;
};

/**
Expand Down
2 changes: 1 addition & 1 deletion source/common/http/filter/ratelimit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void RateLimitFilter::complete(RateLimit::LimitStatus status) {
config_->stats().counter(cluster_ratelimit_stat_prefix_ + "over_limit").inc();
Http::CodeUtility::ResponseStatInfo info{config_->stats(), cluster_stat_prefix_,
TOO_MANY_REQUESTS_HEADER, true, EMPTY_STRING,
EMPTY_STRING};
EMPTY_STRING, EMPTY_STRING, EMPTY_STRING};
Http::CodeUtility::chargeResponseStat(info);
break;
}
Expand Down
15 changes: 11 additions & 4 deletions source/common/router/router.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "envoy/upstream/upstream.h"

#include "common/common/assert.h"
#include "common/common/empty_string.h"
#include "common/common/enum_to_int.h"
#include "common/common/utility.h"
#include "common/http/codes.h"
Expand Down Expand Up @@ -88,19 +89,24 @@ Filter::~Filter() {
ASSERT(!retry_state_);
}

const std::string& Filter::upstreamZone() {
return upstream_host_ ? upstream_host_->zone() : EMPTY_STRING;
}

void Filter::chargeUpstreamCode(const Http::HeaderMap& response_headers) {
if (!callbacks_->requestInfo().healthCheck()) {
Http::CodeUtility::ResponseStatInfo info{
config_->stats_store_, stat_prefix_, response_headers,
downstream_headers_->get(Http::Headers::get().EnvoyInternalRequest) == "true",
route_->virtualHostName(), request_vcluster_name_};
route_->virtualHostName(), request_vcluster_name_, config_->service_zone_, upstreamZone()};

Http::CodeUtility::chargeResponseStat(info);

for (const std::string& alt_prefix : alt_stat_prefixes_) {
Http::CodeUtility::ResponseStatInfo info{
config_->stats_store_, alt_prefix, response_headers,
downstream_headers_->get(Http::Headers::get().EnvoyInternalRequest) == "true", "", ""};
downstream_headers_->get(Http::Headers::get().EnvoyInternalRequest) == "true", "", "",
config_->service_zone_, upstreamZone()};

Http::CodeUtility::chargeResponseStat(info);
}
Expand Down Expand Up @@ -414,7 +420,7 @@ void Filter::onUpstreamComplete() {
upstream_request_->upstream_encoder_->requestCompleteTime(),
upstream_request_->upstream_canary_,
downstream_headers_->get(Http::Headers::get().EnvoyInternalRequest) == "true",
route_->virtualHostName(), request_vcluster_name_};
route_->virtualHostName(), request_vcluster_name_, config_->service_zone_, upstreamZone()};

Http::CodeUtility::chargeResponseTiming(info);

Expand All @@ -423,7 +429,8 @@ void Filter::onUpstreamComplete() {
config_->stats_store_, alt_prefix,
upstream_request_->upstream_encoder_->requestCompleteTime(),
upstream_request_->upstream_canary_,
downstream_headers_->get(Http::Headers::get().EnvoyInternalRequest) == "true", "", ""};
downstream_headers_->get(Http::Headers::get().EnvoyInternalRequest) == "true", "", "",
config_->service_zone_, upstreamZone()};

Http::CodeUtility::chargeResponseTiming(info);
}
Expand Down
14 changes: 9 additions & 5 deletions source/common/router/router.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,17 @@ class FilterUtility {
*/
class FilterConfig {
public:
FilterConfig(const std::string& stat_prefix, Stats::Store& stats, Upstream::ClusterManager& cm,
Runtime::Loader& runtime, Runtime::RandomGenerator& random,
ShadowWriterPtr&& shadow_writer)
: stats_store_(stats), cm_(cm), runtime_(runtime), random_(random),
stats_{ALL_ROUTER_STATS(POOL_COUNTER_PREFIX(stats, stat_prefix))},
FilterConfig(const std::string& stat_prefix, const std::string& service_zone, Stats::Store& stats,
Upstream::ClusterManager& cm, Runtime::Loader& runtime,
Runtime::RandomGenerator& random, ShadowWriterPtr&& shadow_writer)
: stats_store_(stats), service_zone_(service_zone), cm_(cm), runtime_(runtime),
random_(random), stats_{ALL_ROUTER_STATS(POOL_COUNTER_PREFIX(stats, stat_prefix))},
shadow_writer_(std::move(shadow_writer)) {}

ShadowWriter& shadowWriter() { return *shadow_writer_; }

Stats::Store& stats_store_;
const std::string service_zone_;
Upstream::ClusterManager& cm_;
Runtime::Loader& runtime_;
Runtime::RandomGenerator& random_;
Expand Down Expand Up @@ -123,6 +124,7 @@ class Filter : Logger::Loggable<Logger::Id::router>, public Http::StreamDecoderF

// Http::PooledStreamEncoderCallbacks
void onUpstreamHostSelected(Upstream::HostDescriptionPtr host) override {
parent_.upstream_host_ = host;
parent_.callbacks_->requestInfo().onUpstreamHostSelected(host);
}

Expand All @@ -139,6 +141,7 @@ class Filter : Logger::Loggable<Logger::Id::router>, public Http::StreamDecoderF
Http::AccessLog::FailureReason
streamResetReasonToFailureReason(Http::StreamResetReason reset_reason);

const std::string& upstreamZone();
void chargeUpstreamCode(const Http::HeaderMap& response_headers);
void chargeUpstreamCode(Http::Code code);
void cleanup();
Expand Down Expand Up @@ -176,6 +179,7 @@ class Filter : Logger::Loggable<Logger::Id::router>, public Http::StreamDecoderF
Http::HeaderMap* downstream_trailers_{};
bool downstream_end_stream_{};
bool do_shadowing_{};
Upstream::HostDescriptionPtr upstream_host_;
};

class ProdFilter : public Filter {
Expand Down
17 changes: 9 additions & 8 deletions source/common/upstream/cluster_manager_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ ClusterManagerImpl::ClusterManagerImpl(const Json::Object& config, Stats::Store&
}

tls.set(thread_local_slot_,
[this, &stats, &runtime, &random](Event::Dispatcher& dispatcher)
[this, &stats, &runtime, &random, local_zone_name](Event::Dispatcher& dispatcher)
-> ThreadLocal::ThreadLocalObjectPtr {
return ThreadLocal::ThreadLocalObjectPtr{
new ThreadLocalClusterManagerImpl(*this, dispatcher, runtime, random)};
return ThreadLocal::ThreadLocalObjectPtr{new ThreadLocalClusterManagerImpl(
*this, dispatcher, runtime, random, local_zone_name)};
});

// To avoid threading issues, for those clusters that start with hosts already in them (like
Expand Down Expand Up @@ -202,11 +202,11 @@ Http::AsyncClient& ClusterManagerImpl::httpAsyncClientForCluster(const std::stri

ClusterManagerImpl::ThreadLocalClusterManagerImpl::ThreadLocalClusterManagerImpl(
ClusterManagerImpl& parent, Event::Dispatcher& dispatcher, Runtime::Loader& runtime,
Runtime::RandomGenerator& random)
Runtime::RandomGenerator& random, const std::string& local_zone_name)
: parent_(parent), dispatcher_(dispatcher) {
for (auto& cluster : parent.primary_clusters_) {
thread_local_clusters_[cluster.first].reset(
new ClusterEntry(*this, *cluster.second, runtime, random, parent.stats_, dispatcher));
thread_local_clusters_[cluster.first].reset(new ClusterEntry(
*this, *cluster.second, runtime, random, parent.stats_, dispatcher, local_zone_name));
}

for (auto& cluster : thread_local_clusters_) {
Expand Down Expand Up @@ -251,9 +251,10 @@ void ClusterManagerImpl::ThreadLocalClusterManagerImpl::shutdown() {

ClusterManagerImpl::ThreadLocalClusterManagerImpl::ClusterEntry::ClusterEntry(
ThreadLocalClusterManagerImpl& parent, const Cluster& cluster, Runtime::Loader& runtime,
Runtime::RandomGenerator& random, Stats::Store& stats_store, Event::Dispatcher& dispatcher)
Runtime::RandomGenerator& random, Stats::Store& stats_store, Event::Dispatcher& dispatcher,
const std::string& local_zone_name)
: parent_(parent), primary_cluster_(cluster),
http_async_client_(cluster, *this, stats_store, dispatcher) {
http_async_client_(cluster, *this, stats_store, dispatcher, local_zone_name) {

switch (cluster.lbType()) {
case LoadBalancerType::LeastRequest: {
Expand Down
6 changes: 4 additions & 2 deletions source/common/upstream/cluster_manager_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ class ClusterManagerImpl : public ClusterManager {
struct ClusterEntry : public Http::AsyncClientConnPoolFactory {
ClusterEntry(ThreadLocalClusterManagerImpl& parent, const Cluster& cluster,
Runtime::Loader& runtime, Runtime::RandomGenerator& random,
Stats::Store& stats_store, Event::Dispatcher& dispatcher);
Stats::Store& stats_store, Event::Dispatcher& dispatcher,
const std::string& local_zone_name);

// Http::AsyncClientConnPoolFactory
Http::ConnectionPool::Instance* connPool() override;
Expand All @@ -80,7 +81,8 @@ class ClusterManagerImpl : public ClusterManager {
typedef std::unique_ptr<ClusterEntry> ClusterEntryPtr;

ThreadLocalClusterManagerImpl(ClusterManagerImpl& parent, Event::Dispatcher& dispatcher,
Runtime::Loader& runtime, Runtime::RandomGenerator& random);
Runtime::Loader& runtime, Runtime::RandomGenerator& random,
const std::string& local_zone_name);

static void updateClusterMembership(const std::string& name, ConstHostVectorPtr hosts,
ConstHostVectorPtr healthy_hosts,
Expand Down
3 changes: 2 additions & 1 deletion source/server/config/http/router.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ class FilterConfig : public HttpFilterConfigFactory {
}

Router::FilterConfigPtr config(new Router::FilterConfig(
stat_prefix, server.stats(), server.clusterManager(), server.runtime(), server.random(),
stat_prefix, server.options().serviceZone(), server.stats(), server.clusterManager(),
server.runtime(), server.random(),
Router::ShadowWriterPtr{new Router::ShadowWriterImpl(server.clusterManager())}));

return [config](Http::FilterChainFactoryCallbacks& callbacks) -> void {
Expand Down
Loading