Skip to content

Commit

Permalink
metrics service: flush histogram buckets (envoyproxy#8180)
Browse files Browse the repository at this point in the history
Signed-off-by: Rama Chavali <[email protected]>
  • Loading branch information
ramaraochavali authored and danzh1989 committed Sep 24, 2019
1 parent 0eb88bd commit b3a5649
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 13 deletions.
1 change: 1 addition & 0 deletions docs/root/intro/version_history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ Version history
* listeners: added :ref:`continue_on_listener_filters_timeout <envoy_api_field_Listener.continue_on_listener_filters_timeout>` to configure whether a listener will still create a connection when listener filters time out.
* listeners: added :ref:`HTTP inspector listener filter <config_listener_filters_http_inspector>`.
* lua: extended `httpCall()` and `respond()` APIs to accept headers with entry values that can be a string or table of strings.
* metrics_service: added support for flushing histogram buckets.
* outlier_detector: added :ref:`support for the grpc-status response header <arch_overview_outlier_detection_grpc>` by mapping it to HTTP status. Guarded by envoy.reloadable_features.outlier_detection_support_for_grpc_status which defaults to true.
* performance: new buffer implementation enabled by default (to disable add "--use-libevent-buffers 1" to the command-line arguments when starting Envoy).
* performance: stats symbol table implementation (disabled by default; to test it, add "--use-fake-symbol-table 0" to the command-line arguments when starting Envoy).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,43 @@ void MetricsServiceSink::flushGauge(const Stats::Gauge& gauge) {
gauage_metric->set_value(gauge.value());
}

void MetricsServiceSink::flushHistogram(const Stats::ParentHistogram& histogram) {
io::prometheus::client::MetricFamily* metrics_family = message_.add_envoy_metrics();
metrics_family->set_type(io::prometheus::client::MetricType::SUMMARY);
metrics_family->set_name(histogram.name());
auto* metric = metrics_family->add_metric();
metric->set_timestamp_ms(std::chrono::duration_cast<std::chrono::milliseconds>(
time_source_.systemTime().time_since_epoch())
.count());
auto* summary_metric = metric->mutable_summary();
const Stats::HistogramStatistics& hist_stats = histogram.intervalStatistics();
void MetricsServiceSink::flushHistogram(const Stats::ParentHistogram& envoy_histogram) {
// TODO(ramaraochavali): Currently we are sending both quantile information and bucket
// information. We should make this configurable if it turns out that sending both affects
// performance.

// Add summary information for histograms.
io::prometheus::client::MetricFamily* summary_metrics_family = message_.add_envoy_metrics();
summary_metrics_family->set_type(io::prometheus::client::MetricType::SUMMARY);
summary_metrics_family->set_name(envoy_histogram.name());
auto* summary_metric = summary_metrics_family->add_metric();
summary_metric->set_timestamp_ms(std::chrono::duration_cast<std::chrono::milliseconds>(
time_source_.systemTime().time_since_epoch())
.count());
auto* summary = summary_metric->mutable_summary();
const Stats::HistogramStatistics& hist_stats = envoy_histogram.intervalStatistics();
for (size_t i = 0; i < hist_stats.supportedQuantiles().size(); i++) {
auto* quantile = summary_metric->add_quantile();
auto* quantile = summary->add_quantile();
quantile->set_quantile(hist_stats.supportedQuantiles()[i]);
quantile->set_value(hist_stats.computedQuantiles()[i]);
}

// Add bucket information for histograms.
io::prometheus::client::MetricFamily* histogram_metrics_family = message_.add_envoy_metrics();
histogram_metrics_family->set_type(io::prometheus::client::MetricType::HISTOGRAM);
histogram_metrics_family->set_name(envoy_histogram.name());
auto* histogram_metric = histogram_metrics_family->add_metric();
histogram_metric->set_timestamp_ms(std::chrono::duration_cast<std::chrono::milliseconds>(
time_source_.systemTime().time_since_epoch())
.count());
auto* histogram = histogram_metric->mutable_histogram();
histogram->set_sample_count(hist_stats.sampleCount());
histogram->set_sample_sum(hist_stats.sampleSum());
for (size_t i = 0; i < hist_stats.supportedBuckets().size(); i++) {
auto* bucket = histogram->add_bucket();
bucket->set_upper_bound(hist_stats.supportedBuckets()[i]);
bucket->set_cumulative_count(hist_stats.computedBuckets()[i]);
}
}

void MetricsServiceSink::flush(Stats::MetricSnapshot& snapshot) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class MetricsServiceSink : public Stats::Sink {

void flushCounter(const Stats::Counter& counter);
void flushGauge(const Stats::Gauge& gauge);
void flushHistogram(const Stats::ParentHistogram& histogram);
void flushHistogram(const Stats::ParentHistogram& envoy_histogram);

private:
GrpcMetricsStreamerSharedPtr grpc_metrics_streamer_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,11 @@ class MetricsServiceIntegrationTest : public Grpc::GrpcClientIntegrationParamTes

ABSL_MUST_USE_RESULT
AssertionResult waitForMetricsRequest() {
bool known_summary_exists = false;
bool known_histogram_exists = false;
bool known_counter_exists = false;
bool known_gauge_exists = false;

// Sometimes stats do not come in the first flush cycle, this loop ensures that we wait till
// required stats are flushed.
// TODO(ramaraochavali): Figure out a more robust way to find out all required stats have been
Expand Down Expand Up @@ -98,11 +100,18 @@ class MetricsServiceIntegrationTest : public Grpc::GrpcClientIntegrationParamTes
}
if (metrics_family.name() == "cluster.cluster_0.upstream_rq_time" &&
metrics_family.type() == ::io::prometheus::client::MetricType::SUMMARY) {
known_histogram_exists = true;
known_summary_exists = true;
Stats::HistogramStatisticsImpl empty_statistics;
EXPECT_EQ(metrics_family.metric(0).summary().quantile_size(),
empty_statistics.supportedQuantiles().size());
}
if (metrics_family.name() == "cluster.cluster_0.upstream_rq_time" &&
metrics_family.type() == ::io::prometheus::client::MetricType::HISTOGRAM) {
known_histogram_exists = true;
Stats::HistogramStatisticsImpl empty_statistics;
EXPECT_EQ(metrics_family.metric(0).histogram().bucket_size(),
empty_statistics.supportedBuckets().size());
}
ASSERT(metrics_family.metric(0).has_timestamp_ms());
if (known_counter_exists && known_gauge_exists && known_histogram_exists) {
break;
Expand All @@ -111,6 +120,7 @@ class MetricsServiceIntegrationTest : public Grpc::GrpcClientIntegrationParamTes
}
EXPECT_TRUE(known_counter_exists);
EXPECT_TRUE(known_gauge_exists);
EXPECT_TRUE(known_summary_exists);
EXPECT_TRUE(known_histogram_exists);

return AssertionSuccess();
Expand Down

0 comments on commit b3a5649

Please sign in to comment.