Skip to content

Commit

Permalink
added histogram latency information to Hystrix dashboard stream (#3986)
Browse files Browse the repository at this point in the history
Adding the latency information from existing histogram to Hystrix event stream that was added in #3425
Risk Level: Low
Testing:
Docs Changes: admin.rst
Release Notes: No addition to release note of original Hystrix stream PR, which is still pending for 1.8.0
Fixes #3753

Signed-off-by: trabetti <[email protected]>
  • Loading branch information
trabetti authored and htuch committed Aug 29, 2018
1 parent cf87d50 commit 3b47cba
Show file tree
Hide file tree
Showing 8 changed files with 221 additions and 53 deletions.
3 changes: 2 additions & 1 deletion docs/root/operations/admin.rst
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ The fields are:
In Envoy, service unavailable response will cause **outlier detection** - removing a node off the
load balancer pool, but requests are not rejected as a result. Therefore, this counter is always
set to '0'.
* Latency information is currently unavailable.
* Latency information represents data since last flush.
Mean latency is currently not available.


4 changes: 2 additions & 2 deletions source/common/stats/histogram_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ HistogramStatisticsImpl::HistogramStatisticsImpl(const histogram_t* histogram_pt
}

const std::vector<double>& HistogramStatisticsImpl::supportedQuantiles() const {
static const std::vector<double> supported_quantiles = {0, 0.25, 0.5, 0.75, 0.90,
0.95, 0.99, 0.999, 1};
static const std::vector<double> supported_quantiles = {0, 0.25, 0.5, 0.75, 0.90,
0.95, 0.99, 0.995, 0.999, 1};
return supported_quantiles;
}

Expand Down
1 change: 1 addition & 0 deletions source/extensions/stat_sinks/hystrix/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ envoy_cc_library(
"//include/envoy/stats:stats_interface",
"//source/common/buffer:buffer_lib",
"//source/common/common:logger_lib",
"//source/common/config:well_known_names",
"//source/common/http:headers_lib",
],
)
74 changes: 62 additions & 12 deletions source/extensions/stat_sinks/hystrix/hystrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,19 @@

#include "common/buffer/buffer_impl.h"
#include "common/common/logger.h"
#include "common/config/well_known_names.h"
#include "common/http/headers.h"

#include "absl/strings/str_cat.h"
#include "absl/strings/str_split.h"
#include "fmt/printf.h"

namespace Envoy {
namespace Extensions {
namespace StatSinks {
namespace Hystrix {

const uint64_t HystrixSink::DEFAULT_NUM_BUCKETS;

ClusterStatsCache::ClusterStatsCache(const std::string& cluster_name)
: cluster_name_(cluster_name) {}

Expand All @@ -43,6 +45,19 @@ void ClusterStatsCache::printRollingWindow(absl::string_view name, RollingWindow
out_str << std::endl;
}

void HystrixSink::addHistogramToStream(const QuantileLatencyMap& latency_map, absl::string_view key,
std::stringstream& ss) {
// TODO: Consider if we better use join here
ss << ", \"" << key << "\": {";
bool is_first = true;
for (const std::pair<double, double>& element : latency_map) {
const std::string quantile = fmt::sprintf("%g", element.first * 100);
HystrixSink::addDoubleToStream(quantile, element.second, ss, is_first);
is_first = false;
}
ss << "}";
}

// Add new value to rolling window, in place of oldest one.
void HystrixSink::pushNewValue(RollingWindow& rolling_window, uint64_t value) {
if (rolling_window.empty()) {
Expand Down Expand Up @@ -118,6 +133,11 @@ void HystrixSink::addIntToStream(absl::string_view key, uint64_t value, std::str
addInfoToStream(key, std::to_string(value), info, is_first);
}

void HystrixSink::addDoubleToStream(absl::string_view key, double value, std::stringstream& info,
bool is_first) {
addInfoToStream(key, std::to_string(value), info, is_first);
}

void HystrixSink::addInfoToStream(absl::string_view key, absl::string_view value,
std::stringstream& info, bool is_first) {
if (!is_first) {
Expand All @@ -131,7 +151,7 @@ void HystrixSink::addHystrixCommand(ClusterStatsCache& cluster_stats_cache,
absl::string_view cluster_name,
uint64_t max_concurrent_requests, uint64_t reporting_hosts,
std::chrono::milliseconds rolling_window_ms,
std::stringstream& ss) {
const QuantileLatencyMap& histogram, std::stringstream& ss) {

std::time_t currentTime = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now());

Expand Down Expand Up @@ -161,7 +181,7 @@ void HystrixSink::addHystrixCommand(ClusterStatsCache& cluster_stats_cache,
addIntToStream("rollingCountResponsesFromCache", 0, ss);

// Envoy's "circuit breaker" has similar meaning to hystrix's isolation
// so we count upstream_rq_pending_overflow and present it as ss
// so we count upstream_rq_pending_overflow and present it as rollingCountSemaphoreRejected
addIntToStream("rollingCountSemaphoreRejected", rejected, ss);

// Hystrix's short circuit is not similar to Envoy's since it is triggered by 503 responses
Expand All @@ -173,12 +193,8 @@ void HystrixSink::addHystrixCommand(ClusterStatsCache& cluster_stats_cache,
addIntToStream("rollingCountTimeout", timeouts, ss);
addIntToStream("rollingCountBadRequests", 0, ss);
addIntToStream("currentConcurrentExecutionCount", 0, ss);
addIntToStream("latencyExecute_mean", 0, ss);

// TODO trabetti : add histogram information once available by PR #2932
addInfoToStream(
"latencyExecute",
"{\"0\":0,\"25\":0,\"50\":0,\"75\":0,\"90\":0,\"95\":0,\"99\":0,\"99.5\":0,\"100\":0}", ss);
addStringToStream("latencyExecute_mean", "null", ss);
addHistogramToStream(histogram, "latencyExecute", ss);
addIntToStream("propertyValue_circuitBreakerRequestVolumeThreshold", 0, ss);
addIntToStream("propertyValue_circuitBreakerSleepWindowInMilliseconds", 0, ss);
addIntToStream("propertyValue_circuitBreakerErrorThresholdPercentage", 0, ss);
Expand Down Expand Up @@ -230,10 +246,11 @@ void HystrixSink::addClusterStatsToStream(ClusterStatsCache& cluster_stats_cache
uint64_t max_concurrent_requests,
uint64_t reporting_hosts,
std::chrono::milliseconds rolling_window_ms,
const QuantileLatencyMap& histogram,
std::stringstream& ss) {

addHystrixCommand(cluster_stats_cache, cluster_name, max_concurrent_requests, reporting_hosts,
rolling_window_ms, ss);
rolling_window_ms, histogram, ss);
addHystrixThreadPool(cluster_name, max_concurrent_requests, reporting_hosts, rolling_window_ms,
ss);
}
Expand Down Expand Up @@ -299,13 +316,46 @@ Http::Code HystrixSink::handlerHystrixEventStream(absl::string_view,
return Http::Code::OK;
}

void HystrixSink::flush(Stats::Source&) {
void HystrixSink::flush(Stats::Source& source) {
if (callbacks_list_.empty()) {
return;
}
incCounter();
std::stringstream ss;
Upstream::ClusterManager::ClusterInfoMap clusters = server_.clusterManager().clusters();

// Save a map of the relevant histograms per cluster in a convenient format.
std::unordered_map<std::string, QuantileLatencyMap> time_histograms;
for (const Stats::ParentHistogramSharedPtr& histogram : source.cachedHistograms()) {
if (histogram->tagExtractedName() == "cluster.upstream_rq_time") {
// TODO(mrice32): add an Envoy utility function to look up and return a tag for a metric.
auto it = std::find_if(histogram->tags().begin(), histogram->tags().end(),
[](const Stats::Tag& tag) {
return (tag.name_ == Config::TagNames::get().CLUSTER_NAME);
});

// Make sure we found the cluster name tag
ASSERT(it != histogram->tags().end());
auto it_bool_pair = time_histograms.emplace(std::make_pair(it->value_, QuantileLatencyMap()));
// Make sure histogram with this name was not already added
ASSERT(it_bool_pair.second);
QuantileLatencyMap& hist_map = it_bool_pair.first->second;

const std::vector<double>& supported_quantiles =
histogram->intervalStatistics().supportedQuantiles();
for (size_t i = 0; i < supported_quantiles.size(); ++i) {
// binary-search here is likely not worth it, as hystrix_quantiles has <10 elements.
if (std::find(hystrix_quantiles.begin(), hystrix_quantiles.end(), supported_quantiles[i]) !=
hystrix_quantiles.end()) {
const double value = histogram->intervalStatistics().computedQuantiles()[i];
if (!std::isnan(value)) {
hist_map[supported_quantiles[i]] = value;
}
}
}
}
}

for (auto& cluster : clusters) {
Upstream::ClusterInfoConstSharedPtr cluster_info = cluster.second.get().info();

Expand All @@ -323,7 +373,7 @@ void HystrixSink::flush(Stats::Source&) {
*cluster_stats_cache_ptr, cluster_info->name(),
cluster_info->resourceManager(Upstream::ResourcePriority::Default).pendingRequests().max(),
cluster_info->statsScope().gauge("membership_total").value(), server_.statsFlushInterval(),
ss);
time_histograms[cluster_info->name()], ss);
}

Buffer::OwnedImpl data;
Expand Down
40 changes: 28 additions & 12 deletions source/extensions/stat_sinks/hystrix/hystrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ namespace Hystrix {
typedef std::vector<uint64_t> RollingWindow;
typedef std::map<const std::string, RollingWindow> RollingStatsMap;

using QuantileLatencyMap = std::unordered_map<double, double>;
static const std::vector<double> hystrix_quantiles = {0, 0.25, 0.5, 0.75, 0.90,
0.95, 0.99, 0.995, 1};

struct {
const std::string AllowHeadersHystrix{"Accept, Cache-Control, X-Requested-With, Last-Event-ID"};
} AccessControlAllowHeadersValue;
Expand Down Expand Up @@ -74,7 +78,8 @@ class HystrixSink : public Stats::Sink, public Logger::Loggable<Logger::Id::hyst
void addClusterStatsToStream(ClusterStatsCache& cluster_stats_cache,
absl::string_view cluster_name, uint64_t max_concurrent_requests,
uint64_t reporting_hosts,
std::chrono::milliseconds rolling_window_ms, std::stringstream& ss);
std::chrono::milliseconds rolling_window_ms,
const QuantileLatencyMap& histogram, std::stringstream& ss);

/**
* Calculate values needed to create the stream and write into the map.
Expand All @@ -96,33 +101,44 @@ class HystrixSink : public Stats::Sink, public Logger::Loggable<Logger::Id::hyst
*/
uint64_t getRollingValue(RollingWindow rolling_window);

private:
/**
* Format the given key and absl::string_view value to "key"="value", and adding to the
* Format the given key and value to "key"=value, and adding to the stringstream.
*/
static void addInfoToStream(absl::string_view key, absl::string_view value,
std::stringstream& info, bool is_first = false);

/**
* Format the given key and double value to "key"=<string of uint64_t>, and adding to the
* stringstream.
*/
void addStringToStream(absl::string_view key, absl::string_view value, std::stringstream& info,
bool is_first = false);
static void addDoubleToStream(absl::string_view key, double value, std::stringstream& info,
bool is_first);

/**
* Format the given key and uint64_t value to "key"=<string of uint64_t>, and adding to the
* Format the given key and absl::string_view value to "key"="value", and adding to the
* stringstream.
*/
void addIntToStream(absl::string_view key, uint64_t value, std::stringstream& info,
bool is_first = false);
static void addStringToStream(absl::string_view key, absl::string_view value,
std::stringstream& info, bool is_first = false);

/**
* Format the given key and value to "key"=value, and adding to the stringstream.
* Format the given key and uint64_t value to "key"=<string of uint64_t>, and adding to the
* stringstream.
*/
void addInfoToStream(absl::string_view key, absl::string_view value, std::stringstream& info,
bool is_first = false);
static void addIntToStream(absl::string_view key, uint64_t value, std::stringstream& info,
bool is_first = false);

static void addHistogramToStream(const QuantileLatencyMap& latency_map, absl::string_view key,
std::stringstream& ss);

private:
/**
* Generate HystrixCommand event stream.
*/
void addHystrixCommand(ClusterStatsCache& cluster_stats_cache, absl::string_view cluster_name,
uint64_t max_concurrent_requests, uint64_t reporting_hosts,
std::chrono::milliseconds rolling_window_ms, std::stringstream& ss);
std::chrono::milliseconds rolling_window_ms,
const QuantileLatencyMap& histogram, std::stringstream& ss);

/**
* Generate HystrixThreadPool event stream.
Expand Down
62 changes: 36 additions & 26 deletions test/common/stats/thread_local_store_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,24 @@ class StatsThreadLocalStoreTest : public testing::Test {
std::unique_ptr<ThreadLocalStoreImpl> store_;
};

class HistogramWrapper {
public:
HistogramWrapper() : histogram_(hist_alloc()) {}

~HistogramWrapper() { hist_free(histogram_); }

const histogram_t* getHistogram() { return histogram_; }

void setHistogramValues(const std::vector<uint64_t>& values) {
for (uint64_t value : values) {
hist_insert_intscale(histogram_, value, 0, 1);
}
}

private:
histogram_t* histogram_;
};

class HistogramTest : public testing::Test {
public:
typedef std::map<std::string, ParentHistogramSharedPtr> NameHistogramMap;
Expand Down Expand Up @@ -88,15 +106,20 @@ class HistogramTest : public testing::Test {

std::vector<ParentHistogramSharedPtr> histogram_list = store_->histograms();

histogram_t* hist1_cumulative = makeHistogram(h1_cumulative_values_);
histogram_t* hist2_cumulative = makeHistogram(h2_cumulative_values_);
histogram_t* hist1_interval = makeHistogram(h1_interval_values_);
histogram_t* hist2_interval = makeHistogram(h2_interval_values_);
HistogramWrapper hist1_cumulative;
HistogramWrapper hist2_cumulative;
HistogramWrapper hist1_interval;
HistogramWrapper hist2_interval;

HistogramStatisticsImpl h1_cumulative_statistics(hist1_cumulative);
HistogramStatisticsImpl h2_cumulative_statistics(hist2_cumulative);
HistogramStatisticsImpl h1_interval_statistics(hist1_interval);
HistogramStatisticsImpl h2_interval_statistics(hist2_interval);
hist1_cumulative.setHistogramValues(h1_cumulative_values_);
hist2_cumulative.setHistogramValues(h2_cumulative_values_);
hist1_interval.setHistogramValues(h1_interval_values_);
hist2_interval.setHistogramValues(h2_interval_values_);

HistogramStatisticsImpl h1_cumulative_statistics(hist1_cumulative.getHistogram());
HistogramStatisticsImpl h2_cumulative_statistics(hist2_cumulative.getHistogram());
HistogramStatisticsImpl h1_interval_statistics(hist1_interval.getHistogram());
HistogramStatisticsImpl h2_interval_statistics(hist2_interval.getHistogram());

NameHistogramMap name_histogram_map = makeHistogramMap(histogram_list);
const ParentHistogramSharedPtr& h1 = name_histogram_map["h1"];
Expand All @@ -109,11 +132,6 @@ class HistogramTest : public testing::Test {
EXPECT_EQ(h2->intervalStatistics().summary(), h2_interval_statistics.summary());
}

hist_free(hist1_cumulative);
hist_free(hist2_cumulative);
hist_free(hist1_interval);
hist_free(hist2_interval);

h1_interval_values_.clear();
h2_interval_values_.clear();

Expand All @@ -133,14 +151,6 @@ class HistogramTest : public testing::Test {
}
}

histogram_t* makeHistogram(const std::vector<uint64_t>& values) {
histogram_t* histogram = hist_alloc();
for (uint64_t value : values) {
hist_insert_intscale(histogram, value, 0, 1);
}
return histogram;
}

MOCK_METHOD1(alloc, RawStatData*(const std::string& name));
MOCK_METHOD1(free, void(RawStatData& data));

Expand Down Expand Up @@ -610,9 +620,9 @@ TEST_F(HistogramTest, BasicHistogramSummaryValidate) {

const std::string h1_expected_summary =
"P0: 1, P25: 1.025, P50: 1.05, P75: 1.075, P90: 1.09, P95: 1.095, "
"P99: 1.099, P99.9: 1.0999, P100: 1.1";
const std::string h2_expected_summary =
"P0: 0, P25: 25, P50: 50, P75: 75, P90: 90, P95: 95, P99: 99, P99.9: 99.9, P100: 100";
"P99: 1.099, P99.5: 1.0995, P99.9: 1.0999, P100: 1.1";
const std::string h2_expected_summary = "P0: 0, P25: 25, P50: 50, P75: 75, P90: 90, P95: 95, "
"P99: 99, P99.5: 99.5, P99.9: 99.9, P100: 100";

for (size_t i = 0; i < 100; ++i) {
expectCallAndAccumulate(h2, i);
Expand All @@ -639,8 +649,8 @@ TEST_F(HistogramTest, BasicHistogramMergeSummary) {
}
EXPECT_EQ(1, validateMerge());

const std::string expected_summary =
"P0: 0, P25: 25, P50: 50, P75: 75, P90: 90, P95: 95, P99: 99, P99.9: 99.9, P100: 100";
const std::string expected_summary = "P0: 0, P25: 25, P50: 50, P75: 75, P90: 90, P95: 95, P99: "
"99, P99.5: 99.5, P99.9: 99.9, P100: 100";

NameHistogramMap name_histogram_map = makeHistogramMap(store_->histograms());
EXPECT_EQ(expected_summary, name_histogram_map["h1"]->cumulativeStatistics().summary());
Expand Down
Loading

0 comments on commit 3b47cba

Please sign in to comment.