-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
added histogram latency information to Hystrix dashboard stream #3986
Changes from 5 commits
92f2a12
5aec25a
a37a9a5
1e8b632
7b03da0
87b2d37
af829b3
9bb6ba2
bd40d1a
b929103
04fae8a
94929e5
eb95fdf
34afb7b
e780519
e30818c
9bbc77c
cf3adc9
3d6395a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -10,14 +10,16 @@ | |||
#include "common/http/headers.h" | ||||
|
||||
#include "absl/strings/str_cat.h" | ||||
#include "absl/strings/str_split.h" | ||||
|
||||
namespace Envoy { | ||||
namespace Extensions { | ||||
namespace StatSinks { | ||||
namespace Hystrix { | ||||
|
||||
const uint64_t HystrixSink::DEFAULT_NUM_BUCKETS; | ||||
|
||||
static const std::vector<double> hystrix_quantiles = {0, 0.25, 0.5, 0.75, 0.90, | ||||
0.95, 0.99, 0.995, 1}; | ||||
ClusterStatsCache::ClusterStatsCache(const std::string& cluster_name) | ||||
: cluster_name_(cluster_name) {} | ||||
|
||||
|
@@ -41,6 +43,14 @@ void ClusterStatsCache::printRollingWindow(absl::string_view name, RollingWindow | |||
out_str << std::endl; | ||||
} | ||||
|
||||
void ClusterStatsCache::addHistogramToStream(std::stringstream& ss) { | ||||
bool is_first = true; | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems a place where a join operation would make sense, but I can see that there is already a stylized approach to building these strings in this code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you want me to consider changing the code to use join? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Up to you, TODO is fine as well. |
||||
for (std::pair<std::string, double> element : timing_) { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. const std::pair<std::string, double>& |
||||
HystrixSink::addDoubleToStream(element.first, element.second, ss, is_first); | ||||
is_first = false; | ||||
} | ||||
} | ||||
|
||||
// Add new value to rolling window, in place of oldest one. | ||||
void HystrixSink::pushNewValue(RollingWindow& rolling_window, uint64_t value) { | ||||
if (rolling_window.empty()) { | ||||
|
@@ -65,7 +75,8 @@ uint64_t HystrixSink::getRollingValue(RollingWindow rolling_window) { | |||
} | ||||
|
||||
void HystrixSink::updateRollingWindowMap(const Upstream::ClusterInfo& cluster_info, | ||||
ClusterStatsCache& cluster_stats_cache) { | ||||
ClusterStatsCache& cluster_stats_cache, | ||||
std::unordered_map<std::string, double>& histogram) { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we have a 'using' shortcut for this map type, declared in the HystrixSink scope, I think. |
||||
const std::string cluster_name = cluster_info.name(); | ||||
Upstream::ClusterStats& cluster_stats = cluster_info.stats(); | ||||
Stats::Scope& cluster_stats_scope = cluster_info.statsScope(); | ||||
|
@@ -100,6 +111,8 @@ void HystrixSink::updateRollingWindowMap(const Upstream::ClusterInfo& cluster_in | |||
uint64_t total = errors + timeouts + success + rejected; | ||||
pushNewValue(cluster_stats_cache.total_, total); | ||||
|
||||
cluster_stats_cache.timing_ = histogram; | ||||
|
||||
ENVOY_LOG(trace, "{}", printRollingWindows()); | ||||
} | ||||
|
||||
|
@@ -116,6 +129,11 @@ void HystrixSink::addIntToStream(absl::string_view key, uint64_t value, std::str | |||
addInfoToStream(key, std::to_string(value), info, is_first); | ||||
} | ||||
|
||||
void HystrixSink::addDoubleToStream(absl::string_view key, double value, std::stringstream& info, | ||||
bool is_first) { | ||||
addInfoToStream(key, std::to_string(value), info, is_first); | ||||
} | ||||
|
||||
void HystrixSink::addInfoToStream(absl::string_view key, absl::string_view value, | ||||
std::stringstream& info, bool is_first) { | ||||
if (!is_first) { | ||||
|
@@ -159,7 +177,7 @@ void HystrixSink::addHystrixCommand(ClusterStatsCache& cluster_stats_cache, | |||
addIntToStream("rollingCountResponsesFromCache", 0, ss); | ||||
|
||||
// Envoy's "circuit breaker" has similar meaning to hystrix's isolation | ||||
// so we count upstream_rq_pending_overflow and present it as ss | ||||
// so we count upstream_rq_pending_overflow and present it as rollingCountSemaphoreRejected | ||||
addIntToStream("rollingCountSemaphoreRejected", rejected, ss); | ||||
|
||||
// Hystrix's short circuit is not similar to Envoy's since it is triggered by 503 responses | ||||
|
@@ -172,11 +190,10 @@ void HystrixSink::addHystrixCommand(ClusterStatsCache& cluster_stats_cache, | |||
addIntToStream("rollingCountBadRequests", 0, ss); | ||||
addIntToStream("currentConcurrentExecutionCount", 0, ss); | ||||
addIntToStream("latencyExecute_mean", 0, ss); | ||||
|
||||
// TODO trabetti : add histogram information once available by PR #2932 | ||||
addInfoToStream( | ||||
"latencyExecute", | ||||
"{\"0\":0,\"25\":0,\"50\":0,\"75\":0,\"90\":0,\"95\":0,\"99\":0,\"99.5\":0,\"100\":0}", ss); | ||||
ss << ", \"latencyExecute\": {"; | ||||
cluster_stats_cache.addHistogramToStream(ss); | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not pass the key into the method as a string_view as you do for the others? This would allow the method to handle the brackets, etc. You can still concatenate them directly to the underlying stringstream directly IIUC. |
||||
ss << "}"; | ||||
// addInfoToStream("latencyExecute", "{" + cluster_stats_cache.printTimingHistogram() + "}", ss); | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove this line? |
||||
addIntToStream("propertyValue_circuitBreakerRequestVolumeThreshold", 0, ss); | ||||
addIntToStream("propertyValue_circuitBreakerSleepWindowInMilliseconds", 0, ss); | ||||
addIntToStream("propertyValue_circuitBreakerErrorThresholdPercentage", 0, ss); | ||||
|
@@ -304,6 +321,32 @@ void HystrixSink::flush(Stats::Source&) { | |||
incCounter(); | ||||
std::stringstream ss; | ||||
Upstream::ClusterManager::ClusterInfoMap clusters = server_.clusterManager().clusters(); | ||||
|
||||
// Save a map of the relevant histograms per cluster in a convenient format. | ||||
std::unordered_map<absl::string_view, std::unordered_map<std::string, double>, StringViewHash> | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using a Also, can't we use QuantileLatencyMap here instead of typing out the entire type? |
||||
time_histograms; | ||||
for (const Stats::ParentHistogramSharedPtr histogram : server_.stats().histograms()) { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: I think we should be using the envoy/include/envoy/stats/stats.h Line 301 in 3460595
|
||||
// histogram->name() on clusters of the format "cluster.cluster_name.histogram_name" | ||||
// i.e. "cluster.service1.upstream_rq_time". | ||||
const std::vector<absl::string_view> split_name = absl::StrSplit(histogram->name(), '.'); | ||||
if (split_name[0] == "cluster" && split_name[2] == "upstream_rq_time") { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check split_name.size()>2 first There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you mean there could be a case where the histogram name is made up of more parts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. more parts would be OK. less parts would crash. I would prefer to be locally paranoid and have an assertion rather than a SEGV if that occurs. |
||||
std::unordered_map<std::string, double>& hist_map = time_histograms[split_name[1]]; | ||||
for (size_t i = 0; i < histogram->cumulativeStatistics().supportedQuantiles().size(); ++i) { | ||||
if (std::find(hystrix_quantiles.begin(), hystrix_quantiles.end(), | ||||
histogram->cumulativeStatistics().supportedQuantiles()[i]) != | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you make a temp ref for supportedQuartiles()? Would make this long std::find call easier to read. |
||||
hystrix_quantiles.end()) { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should there be handling for a case where the find() fails? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doesn't it suppose to return hystrix_quantiles.end() if it fails? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Of course, but is that expected? Should you handle that case? |
||||
if (histogram->cumulativeStatistics().supportedQuantiles()[i] == 0.995) { | ||||
hist_map["99.5"] = histogram->cumulativeStatistics().computedQuantiles()[i]; | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why does this require special-casing? add comment in code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the Hystrix formatting requirement for these strings? Could we possibly take advantage of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. only the 99.5 is not an int. all others are integers and it didn't work when i tried sending these as double. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What are the requirements? Would "95.0" be okay? Or does it have to be exactly "95"? There are some float formatting options in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sprintf with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also you might want to consider using integers scaled by 10. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. amazing, you have the solution for everything 😀 it worked. I am keeping it as a double to make it more clear, does it improve a lot to scale by 10 and use integer? |
||||
} else { | ||||
hist_map[std::to_string( | ||||
int(100 * histogram->cumulativeStatistics().supportedQuantiles()[i]))] = | ||||
histogram->cumulativeStatistics().computedQuantiles()[i]; | ||||
} | ||||
} | ||||
} | ||||
} | ||||
} | ||||
|
||||
for (auto& cluster : clusters) { | ||||
Upstream::ClusterInfoConstSharedPtr cluster_info = cluster.second.get().info(); | ||||
|
||||
|
@@ -314,7 +357,8 @@ void HystrixSink::flush(Stats::Source&) { | |||
} | ||||
|
||||
// update rolling window with cluster stats | ||||
updateRollingWindowMap(*cluster_info, *cluster_stats_cache_ptr); | ||||
updateRollingWindowMap(*cluster_info, *cluster_stats_cache_ptr, | ||||
time_histograms[cluster_info->name()]); | ||||
|
||||
// append it to stream to be sent | ||||
addClusterStatsToStream( | ||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,8 @@ struct ClusterStatsCache { | |
void printToStream(std::stringstream& out_str); | ||
void printRollingWindow(absl::string_view name, RollingWindow rolling_window, | ||
std::stringstream& out_str); | ||
void addHistogramToStream(std::stringstream& ss); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. blank line? |
||
std::string cluster_name_; | ||
|
||
// Rolling windows | ||
|
@@ -34,6 +36,8 @@ struct ClusterStatsCache { | |
RollingWindow total_; | ||
RollingWindow timeouts_; | ||
RollingWindow rejected_; | ||
|
||
std::unordered_map<std::string, double> timing_; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need this to be cached if we're just going to regenerate the map each time? Also, why not make this a double to double instead by storing the quantile as a double rather than a string? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I still have an open question if it worth to do any kind of calculation over the percentile values, instead of giving the values since the beginning of the cluster's operation. Otherwise, I can pass it directly without using the cache.
Just to overcome the inconsistent representation - "99.5" vs. all other values which are integers (25, 50, 99, etc..), and if I pass them as doubles, dashboard do not accept. It was convenient to handle this here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just mapping "99.5" to 99.5? and "25" to 25.0? Unless there's a big efficiency issue I'd vote for doing this type coercion closer to where it's consumed. Otherwise commenting why you are doing this would be really helpful. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is mapping the string representation of the quantile to its current value from the histogram There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't mean in the PR; comments; I mean adding comments in the code. Document both what it's doing and also why you think this is helpful from a code-structure or performance perspective. |
||
}; | ||
|
||
typedef std::unique_ptr<ClusterStatsCache> ClusterStatsCachePtr; | ||
|
@@ -78,7 +82,8 @@ class HystrixSink : public Stats::Sink, public Logger::Loggable<Logger::Id::hyst | |
* Calculate values needed to create the stream and write into the map. | ||
*/ | ||
void updateRollingWindowMap(const Upstream::ClusterInfo& cluster_info, | ||
ClusterStatsCache& cluster_stats_cache); | ||
ClusterStatsCache& cluster_stats_cache, | ||
std::unordered_map<std::string, double>& histogram); | ||
/** | ||
* Clear map. | ||
*/ | ||
|
@@ -94,27 +99,34 @@ class HystrixSink : public Stats::Sink, public Logger::Loggable<Logger::Id::hyst | |
*/ | ||
uint64_t getRollingValue(RollingWindow rolling_window); | ||
|
||
private: | ||
/** | ||
* Format the given key and absl::string_view value to "key"="value", and adding to the | ||
* Format the given key and value to "key"=value, and adding to the stringstream. | ||
*/ | ||
static void addInfoToStream(absl::string_view key, absl::string_view value, | ||
std::stringstream& info, bool is_first = false); | ||
|
||
/** | ||
* Format the given key and double value to "key"=<string of uint64_t>, and adding to the | ||
* stringstream. | ||
*/ | ||
void addStringToStream(absl::string_view key, absl::string_view value, std::stringstream& info, | ||
bool is_first = false); | ||
static void addDoubleToStream(absl::string_view key, double value, std::stringstream& info, | ||
bool is_first); | ||
|
||
/** | ||
* Format the given key and uint64_t value to "key"=<string of uint64_t>, and adding to the | ||
* Format the given key and absl::string_view value to "key"="value", and adding to the | ||
* stringstream. | ||
*/ | ||
void addIntToStream(absl::string_view key, uint64_t value, std::stringstream& info, | ||
bool is_first = false); | ||
static void addStringToStream(absl::string_view key, absl::string_view value, | ||
std::stringstream& info, bool is_first = false); | ||
|
||
/** | ||
* Format the given key and value to "key"=value, and adding to the stringstream. | ||
* Format the given key and uint64_t value to "key"=<string of uint64_t>, and adding to the | ||
* stringstream. | ||
*/ | ||
void addInfoToStream(absl::string_view key, absl::string_view value, std::stringstream& info, | ||
bool is_first = false); | ||
static void addIntToStream(absl::string_view key, uint64_t value, std::stringstream& info, | ||
bool is_first = false); | ||
|
||
private: | ||
/** | ||
* Generate HystrixCommand event stream. | ||
*/ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
s/represent/represents/
s/start/the start/