diff --git a/docs/root/operations/admin.rst b/docs/root/operations/admin.rst index 38cc64f7fe18..694a917f3659 100644 --- a/docs/root/operations/admin.rst +++ b/docs/root/operations/admin.rst @@ -352,6 +352,7 @@ The fields are: In Envoy, service unavailable response will cause **outlier detection** - removing a node off the load balancer pool, but requests are not rejected as a result. Therefore, this counter is always set to '0'. - * Latency information is currently unavailable. + * Latency information represents data since last flush. + Mean latency is currently not available. diff --git a/source/common/stats/histogram_impl.cc b/source/common/stats/histogram_impl.cc index b59fe01a495a..83fe9086a8ca 100644 --- a/source/common/stats/histogram_impl.cc +++ b/source/common/stats/histogram_impl.cc @@ -17,8 +17,8 @@ HistogramStatisticsImpl::HistogramStatisticsImpl(const histogram_t* histogram_pt } const std::vector& HistogramStatisticsImpl::supportedQuantiles() const { - static const std::vector supported_quantiles = {0, 0.25, 0.5, 0.75, 0.90, - 0.95, 0.99, 0.999, 1}; + static const std::vector supported_quantiles = {0, 0.25, 0.5, 0.75, 0.90, + 0.95, 0.99, 0.995, 0.999, 1}; return supported_quantiles; } diff --git a/source/extensions/stat_sinks/hystrix/BUILD b/source/extensions/stat_sinks/hystrix/BUILD index 334121f54e07..418c680513cc 100644 --- a/source/extensions/stat_sinks/hystrix/BUILD +++ b/source/extensions/stat_sinks/hystrix/BUILD @@ -35,6 +35,7 @@ envoy_cc_library( "//include/envoy/stats:stats_interface", "//source/common/buffer:buffer_lib", "//source/common/common:logger_lib", + "//source/common/config:well_known_names", "//source/common/http:headers_lib", ], ) diff --git a/source/extensions/stat_sinks/hystrix/hystrix.cc b/source/extensions/stat_sinks/hystrix/hystrix.cc index 49568b564a53..ed339a4a4089 100644 --- a/source/extensions/stat_sinks/hystrix/hystrix.cc +++ b/source/extensions/stat_sinks/hystrix/hystrix.cc @@ -9,9 +9,12 @@ #include "common/buffer/buffer_impl.h" #include "common/common/logger.h" +#include "common/config/well_known_names.h" #include "common/http/headers.h" #include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "fmt/printf.h" namespace Envoy { namespace Extensions { @@ -19,7 +22,6 @@ namespace StatSinks { namespace Hystrix { const uint64_t HystrixSink::DEFAULT_NUM_BUCKETS; - ClusterStatsCache::ClusterStatsCache(const std::string& cluster_name) : cluster_name_(cluster_name) {} @@ -43,6 +45,19 @@ void ClusterStatsCache::printRollingWindow(absl::string_view name, RollingWindow out_str << std::endl; } +void HystrixSink::addHistogramToStream(const QuantileLatencyMap& latency_map, absl::string_view key, + std::stringstream& ss) { + // TODO: Consider if we better use join here + ss << ", \"" << key << "\": {"; + bool is_first = true; + for (const std::pair& element : latency_map) { + const std::string quantile = fmt::sprintf("%g", element.first * 100); + HystrixSink::addDoubleToStream(quantile, element.second, ss, is_first); + is_first = false; + } + ss << "}"; +} + // Add new value to rolling window, in place of oldest one. void HystrixSink::pushNewValue(RollingWindow& rolling_window, uint64_t value) { if (rolling_window.empty()) { @@ -118,6 +133,11 @@ void HystrixSink::addIntToStream(absl::string_view key, uint64_t value, std::str addInfoToStream(key, std::to_string(value), info, is_first); } +void HystrixSink::addDoubleToStream(absl::string_view key, double value, std::stringstream& info, + bool is_first) { + addInfoToStream(key, std::to_string(value), info, is_first); +} + void HystrixSink::addInfoToStream(absl::string_view key, absl::string_view value, std::stringstream& info, bool is_first) { if (!is_first) { @@ -131,7 +151,7 @@ void HystrixSink::addHystrixCommand(ClusterStatsCache& cluster_stats_cache, absl::string_view cluster_name, uint64_t max_concurrent_requests, uint64_t reporting_hosts, std::chrono::milliseconds rolling_window_ms, - std::stringstream& ss) { + const QuantileLatencyMap& histogram, std::stringstream& ss) { std::time_t currentTime = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); @@ -161,7 +181,7 @@ void HystrixSink::addHystrixCommand(ClusterStatsCache& cluster_stats_cache, addIntToStream("rollingCountResponsesFromCache", 0, ss); // Envoy's "circuit breaker" has similar meaning to hystrix's isolation - // so we count upstream_rq_pending_overflow and present it as ss + // so we count upstream_rq_pending_overflow and present it as rollingCountSemaphoreRejected addIntToStream("rollingCountSemaphoreRejected", rejected, ss); // Hystrix's short circuit is not similar to Envoy's since it is triggered by 503 responses @@ -173,12 +193,8 @@ void HystrixSink::addHystrixCommand(ClusterStatsCache& cluster_stats_cache, addIntToStream("rollingCountTimeout", timeouts, ss); addIntToStream("rollingCountBadRequests", 0, ss); addIntToStream("currentConcurrentExecutionCount", 0, ss); - addIntToStream("latencyExecute_mean", 0, ss); - - // TODO trabetti : add histogram information once available by PR #2932 - addInfoToStream( - "latencyExecute", - "{\"0\":0,\"25\":0,\"50\":0,\"75\":0,\"90\":0,\"95\":0,\"99\":0,\"99.5\":0,\"100\":0}", ss); + addStringToStream("latencyExecute_mean", "null", ss); + addHistogramToStream(histogram, "latencyExecute", ss); addIntToStream("propertyValue_circuitBreakerRequestVolumeThreshold", 0, ss); addIntToStream("propertyValue_circuitBreakerSleepWindowInMilliseconds", 0, ss); addIntToStream("propertyValue_circuitBreakerErrorThresholdPercentage", 0, ss); @@ -230,10 +246,11 @@ void HystrixSink::addClusterStatsToStream(ClusterStatsCache& cluster_stats_cache uint64_t max_concurrent_requests, uint64_t reporting_hosts, std::chrono::milliseconds rolling_window_ms, + const QuantileLatencyMap& histogram, std::stringstream& ss) { addHystrixCommand(cluster_stats_cache, cluster_name, max_concurrent_requests, reporting_hosts, - rolling_window_ms, ss); + rolling_window_ms, histogram, ss); addHystrixThreadPool(cluster_name, max_concurrent_requests, reporting_hosts, rolling_window_ms, ss); } @@ -299,13 +316,46 @@ Http::Code HystrixSink::handlerHystrixEventStream(absl::string_view, return Http::Code::OK; } -void HystrixSink::flush(Stats::Source&) { +void HystrixSink::flush(Stats::Source& source) { if (callbacks_list_.empty()) { return; } incCounter(); std::stringstream ss; Upstream::ClusterManager::ClusterInfoMap clusters = server_.clusterManager().clusters(); + + // Save a map of the relevant histograms per cluster in a convenient format. + std::unordered_map time_histograms; + for (const Stats::ParentHistogramSharedPtr& histogram : source.cachedHistograms()) { + if (histogram->tagExtractedName() == "cluster.upstream_rq_time") { + // TODO(mrice32): add an Envoy utility function to look up and return a tag for a metric. + auto it = std::find_if(histogram->tags().begin(), histogram->tags().end(), + [](const Stats::Tag& tag) { + return (tag.name_ == Config::TagNames::get().CLUSTER_NAME); + }); + + // Make sure we found the cluster name tag + ASSERT(it != histogram->tags().end()); + auto it_bool_pair = time_histograms.emplace(std::make_pair(it->value_, QuantileLatencyMap())); + // Make sure histogram with this name was not already added + ASSERT(it_bool_pair.second); + QuantileLatencyMap& hist_map = it_bool_pair.first->second; + + const std::vector& supported_quantiles = + histogram->intervalStatistics().supportedQuantiles(); + for (size_t i = 0; i < supported_quantiles.size(); ++i) { + // binary-search here is likely not worth it, as hystrix_quantiles has <10 elements. + if (std::find(hystrix_quantiles.begin(), hystrix_quantiles.end(), supported_quantiles[i]) != + hystrix_quantiles.end()) { + const double value = histogram->intervalStatistics().computedQuantiles()[i]; + if (!std::isnan(value)) { + hist_map[supported_quantiles[i]] = value; + } + } + } + } + } + for (auto& cluster : clusters) { Upstream::ClusterInfoConstSharedPtr cluster_info = cluster.second.get().info(); @@ -323,7 +373,7 @@ void HystrixSink::flush(Stats::Source&) { *cluster_stats_cache_ptr, cluster_info->name(), cluster_info->resourceManager(Upstream::ResourcePriority::Default).pendingRequests().max(), cluster_info->statsScope().gauge("membership_total").value(), server_.statsFlushInterval(), - ss); + time_histograms[cluster_info->name()], ss); } Buffer::OwnedImpl data; diff --git a/source/extensions/stat_sinks/hystrix/hystrix.h b/source/extensions/stat_sinks/hystrix/hystrix.h index 3745cd1a4c86..a78e57bf1490 100644 --- a/source/extensions/stat_sinks/hystrix/hystrix.h +++ b/source/extensions/stat_sinks/hystrix/hystrix.h @@ -18,6 +18,10 @@ namespace Hystrix { typedef std::vector RollingWindow; typedef std::map RollingStatsMap; +using QuantileLatencyMap = std::unordered_map; +static const std::vector hystrix_quantiles = {0, 0.25, 0.5, 0.75, 0.90, + 0.95, 0.99, 0.995, 1}; + struct { const std::string AllowHeadersHystrix{"Accept, Cache-Control, X-Requested-With, Last-Event-ID"}; } AccessControlAllowHeadersValue; @@ -74,7 +78,8 @@ class HystrixSink : public Stats::Sink, public Logger::Loggable, and adding to the * stringstream. */ - void addStringToStream(absl::string_view key, absl::string_view value, std::stringstream& info, - bool is_first = false); + static void addDoubleToStream(absl::string_view key, double value, std::stringstream& info, + bool is_first); /** - * Format the given key and uint64_t value to "key"=, and adding to the + * Format the given key and absl::string_view value to "key"="value", and adding to the * stringstream. */ - void addIntToStream(absl::string_view key, uint64_t value, std::stringstream& info, - bool is_first = false); + static void addStringToStream(absl::string_view key, absl::string_view value, + std::stringstream& info, bool is_first = false); /** - * Format the given key and value to "key"=value, and adding to the stringstream. + * Format the given key and uint64_t value to "key"=, and adding to the + * stringstream. */ - void addInfoToStream(absl::string_view key, absl::string_view value, std::stringstream& info, - bool is_first = false); + static void addIntToStream(absl::string_view key, uint64_t value, std::stringstream& info, + bool is_first = false); + + static void addHistogramToStream(const QuantileLatencyMap& latency_map, absl::string_view key, + std::stringstream& ss); +private: /** * Generate HystrixCommand event stream. */ void addHystrixCommand(ClusterStatsCache& cluster_stats_cache, absl::string_view cluster_name, uint64_t max_concurrent_requests, uint64_t reporting_hosts, - std::chrono::milliseconds rolling_window_ms, std::stringstream& ss); + std::chrono::milliseconds rolling_window_ms, + const QuantileLatencyMap& histogram, std::stringstream& ss); /** * Generate HystrixThreadPool event stream. diff --git a/test/common/stats/thread_local_store_test.cc b/test/common/stats/thread_local_store_test.cc index ce27b93452fa..e4e63fcb4e96 100644 --- a/test/common/stats/thread_local_store_test.cc +++ b/test/common/stats/thread_local_store_test.cc @@ -47,6 +47,24 @@ class StatsThreadLocalStoreTest : public testing::Test { std::unique_ptr store_; }; +class HistogramWrapper { +public: + HistogramWrapper() : histogram_(hist_alloc()) {} + + ~HistogramWrapper() { hist_free(histogram_); } + + const histogram_t* getHistogram() { return histogram_; } + + void setHistogramValues(const std::vector& values) { + for (uint64_t value : values) { + hist_insert_intscale(histogram_, value, 0, 1); + } + } + +private: + histogram_t* histogram_; +}; + class HistogramTest : public testing::Test { public: typedef std::map NameHistogramMap; @@ -88,15 +106,20 @@ class HistogramTest : public testing::Test { std::vector histogram_list = store_->histograms(); - histogram_t* hist1_cumulative = makeHistogram(h1_cumulative_values_); - histogram_t* hist2_cumulative = makeHistogram(h2_cumulative_values_); - histogram_t* hist1_interval = makeHistogram(h1_interval_values_); - histogram_t* hist2_interval = makeHistogram(h2_interval_values_); + HistogramWrapper hist1_cumulative; + HistogramWrapper hist2_cumulative; + HistogramWrapper hist1_interval; + HistogramWrapper hist2_interval; - HistogramStatisticsImpl h1_cumulative_statistics(hist1_cumulative); - HistogramStatisticsImpl h2_cumulative_statistics(hist2_cumulative); - HistogramStatisticsImpl h1_interval_statistics(hist1_interval); - HistogramStatisticsImpl h2_interval_statistics(hist2_interval); + hist1_cumulative.setHistogramValues(h1_cumulative_values_); + hist2_cumulative.setHistogramValues(h2_cumulative_values_); + hist1_interval.setHistogramValues(h1_interval_values_); + hist2_interval.setHistogramValues(h2_interval_values_); + + HistogramStatisticsImpl h1_cumulative_statistics(hist1_cumulative.getHistogram()); + HistogramStatisticsImpl h2_cumulative_statistics(hist2_cumulative.getHistogram()); + HistogramStatisticsImpl h1_interval_statistics(hist1_interval.getHistogram()); + HistogramStatisticsImpl h2_interval_statistics(hist2_interval.getHistogram()); NameHistogramMap name_histogram_map = makeHistogramMap(histogram_list); const ParentHistogramSharedPtr& h1 = name_histogram_map["h1"]; @@ -109,11 +132,6 @@ class HistogramTest : public testing::Test { EXPECT_EQ(h2->intervalStatistics().summary(), h2_interval_statistics.summary()); } - hist_free(hist1_cumulative); - hist_free(hist2_cumulative); - hist_free(hist1_interval); - hist_free(hist2_interval); - h1_interval_values_.clear(); h2_interval_values_.clear(); @@ -133,14 +151,6 @@ class HistogramTest : public testing::Test { } } - histogram_t* makeHistogram(const std::vector& values) { - histogram_t* histogram = hist_alloc(); - for (uint64_t value : values) { - hist_insert_intscale(histogram, value, 0, 1); - } - return histogram; - } - MOCK_METHOD1(alloc, RawStatData*(const std::string& name)); MOCK_METHOD1(free, void(RawStatData& data)); @@ -610,9 +620,9 @@ TEST_F(HistogramTest, BasicHistogramSummaryValidate) { const std::string h1_expected_summary = "P0: 1, P25: 1.025, P50: 1.05, P75: 1.075, P90: 1.09, P95: 1.095, " - "P99: 1.099, P99.9: 1.0999, P100: 1.1"; - const std::string h2_expected_summary = - "P0: 0, P25: 25, P50: 50, P75: 75, P90: 90, P95: 95, P99: 99, P99.9: 99.9, P100: 100"; + "P99: 1.099, P99.5: 1.0995, P99.9: 1.0999, P100: 1.1"; + const std::string h2_expected_summary = "P0: 0, P25: 25, P50: 50, P75: 75, P90: 90, P95: 95, " + "P99: 99, P99.5: 99.5, P99.9: 99.9, P100: 100"; for (size_t i = 0; i < 100; ++i) { expectCallAndAccumulate(h2, i); @@ -639,8 +649,8 @@ TEST_F(HistogramTest, BasicHistogramMergeSummary) { } EXPECT_EQ(1, validateMerge()); - const std::string expected_summary = - "P0: 0, P25: 25, P50: 50, P75: 75, P90: 90, P95: 95, P99: 99, P99.9: 99.9, P100: 100"; + const std::string expected_summary = "P0: 0, P25: 25, P50: 50, P75: 75, P90: 90, P95: 95, P99: " + "99, P99.5: 99.5, P99.9: 99.9, P100: 100"; NameHistogramMap name_histogram_map = makeHistogramMap(store_->histograms()); EXPECT_EQ(expected_summary, name_histogram_map["h1"]->cumulativeStatistics().summary()); diff --git a/test/extensions/stats_sinks/hystrix/hystrix_test.cc b/test/extensions/stats_sinks/hystrix/hystrix_test.cc index 7e0aff5a4169..564da63373ee 100644 --- a/test/extensions/stats_sinks/hystrix/hystrix_test.cc +++ b/test/extensions/stats_sinks/hystrix/hystrix_test.cc @@ -10,6 +10,8 @@ #include "test/mocks/upstream/mocks.h" #include "absl/strings/str_split.h" +#include "circllhist.h" +#include "fmt/printf.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -19,6 +21,7 @@ using testing::InSequence; using testing::Invoke; using testing::NiceMock; using testing::Return; +using testing::ReturnPointee; using testing::ReturnRef; namespace Envoy { @@ -95,6 +98,24 @@ class ClusterTestInfo { NiceMock retry_4xx_counter_; }; +class HistogramWrapper { +public: + HistogramWrapper() : histogram_(hist_alloc()) {} + + ~HistogramWrapper() { hist_free(histogram_); } + + const histogram_t* getHistogram() { return histogram_; } + + void setHistogramValues(const std::vector& values) { + for (uint64_t value : values) { + hist_insert_intscale(histogram_, value, 0, 1); + } + } + +private: + histogram_t* histogram_; +}; + class HystrixSinkTest : public testing::Test { public: HystrixSinkTest() { sink_.reset(new HystrixSink(server_, window_size_)); } @@ -432,6 +453,61 @@ TEST_F(HystrixSinkTest, AddAndRemoveClusters) { validateResults(cluster_message_map[cluster2_name_], 0, 0, 0, 0, 0, window_size_); } +TEST_F(HystrixSinkTest, HistogramTest) { + InSequence s; + std::vector stored_histograms; + + // Create histogram for the Hystrix sink to read. + auto histogram = std::make_shared>(); + histogram->name_ = "cluster." + cluster1_name_ + ".upstream_rq_time"; + const std::string tag_extracted_name = "cluster.upstream_rq_time"; + ON_CALL(*histogram, tagExtractedName()) + .WillByDefault(testing::ReturnRefOfCopy(tag_extracted_name)); + std::vector tags; + Stats::Tag tag = { + Config::TagNames::get().CLUSTER_NAME, // name_ + cluster1_name_ // value_ + }; + tags.emplace_back(tag); + ON_CALL(*histogram, tags()).WillByDefault(testing::ReturnRef(tags)); + + histogram->used_ = true; + + // Init with data such that the quantile value is equal to the quantile. + std::vector h1_interval_values; + for (size_t i = 0; i < 100; ++i) { + h1_interval_values.push_back(i); + } + + HistogramWrapper hist1_interval; + hist1_interval.setHistogramValues(h1_interval_values); + + Stats::HistogramStatisticsImpl h1_interval_statistics(hist1_interval.getHistogram()); + ON_CALL(*histogram, intervalStatistics()) + .WillByDefault(testing::ReturnRef(h1_interval_statistics)); + stored_histograms.push_back(histogram); + + ON_CALL(source_, cachedHistograms()).WillByDefault(ReturnPointee(&stored_histograms)); + + Buffer::OwnedImpl buffer = createClusterAndCallbacks(); + // Register callback to sink. + sink_->registerConnection(&callbacks_); + sink_->flush(source_); + + std::unordered_map cluster_message_map = + buildClusterMap(buffer.toString()); + + Json::ObjectSharedPtr latency = Json::Factory::loadFromString(cluster_message_map[cluster1_name_]) + ->getObject("latencyExecute"); + + // Data was added such that the value equals the quantile: + // "latencyExecute": {"99.5": 99.500000, "95": 95.000000, "90": 90.000000, "100": 100.000000, "0": + // 0.000000, "25": 25.000000, "99": 99.000000, "50": 50.000000, "75": 75.000000}. + for (const double quantile : hystrix_quantiles) { + EXPECT_EQ(quantile * 100, latency->getDouble(fmt::sprintf("%g", quantile * 100))); + } +} + TEST_F(HystrixSinkTest, HystrixEventStreamHandler) { InSequence s; Buffer::OwnedImpl buffer = createClusterAndCallbacks(); diff --git a/test/server/http/admin_test.cc b/test/server/http/admin_test.cc index 7b44491fce32..003c19c52929 100644 --- a/test/server/http/admin_test.cc +++ b/test/server/http/admin_test.cc @@ -125,6 +125,7 @@ TEST_P(AdminStatsTest, StatsAsJson) { 90.0, 95.0, 99.0, + 99.5, 99.9, 100.0 ], @@ -160,6 +161,10 @@ TEST_P(AdminStatsTest, StatsAsJson) { "interval": null, "cumulative": 109.9 }, + { + "interval": null, + "cumulative": 109.95 + }, { "interval": null, "cumulative": 109.99 @@ -201,6 +206,10 @@ TEST_P(AdminStatsTest, StatsAsJson) { "interval": 109.9, "cumulative": 209.8 }, + { + "interval": 109.95, + "cumulative": 209.9 + }, { "interval": 109.99, "cumulative": 209.98 @@ -262,6 +271,7 @@ TEST_P(AdminStatsTest, UsedOnlyStatsAsJson) { 90.0, 95.0, 99.0, + 99.5, 99.9, 100.0 ], @@ -297,6 +307,10 @@ TEST_P(AdminStatsTest, UsedOnlyStatsAsJson) { "interval": 109.9, "cumulative": 209.8 }, + { + "interval": 109.95, + "cumulative": 209.9 + }, { "interval": 109.99, "cumulative": 209.98