Skip to content

Commit

Permalink
api: compute average for histograms and summaries
Browse files Browse the repository at this point in the history
This patch introduces tool to compute counters per second rate. No
additional deepcopies are performed, same as in collect.

Part of tarantool/tarantool#7725
Part of tarantool/tarantool#7728
  • Loading branch information
DifferentialOrange committed Feb 16, 2023
1 parent 4a1c318 commit d45585e
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- tools to compute metrics aggregates:
- per second rate for counters;
- min and max for gauges;
- average for histograms and summaries;

### Changed
- Setup cartridge hotreload inside the role
Expand Down
4 changes: 3 additions & 1 deletion doc/monitoring/api_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,9 @@ Metrics functions
* ``rate`` for counter collectors: per second rate of value change for the last
two observations;
* ``min`` for gauge collectors: minimal value for the history of observations;
* ``max`` for gauge collectors: maximal value for the history of observations.
* ``max`` for gauge collectors: maximal value for the history of observations;
* ``average`` for histogram and summary collectors: observations average value
(over all history of observations).

:param table output_with_aggregates_prev: a previous result of this method call.
Use ``nil`` if this is the first invokation. You may use
Expand Down
59 changes: 59 additions & 0 deletions metrics/aggregates.lua
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
local string_utils = require('metrics.string_utils')
local Counter = require('metrics.collectors.counter')
local Gauge = require('metrics.collectors.gauge')
local Histogram = require('metrics.collectors.histogram')
local Summary = require('metrics.collectors.summary')

-- Otherwise we need to implement different average processors.
assert(Histogram.SUM_SUFFIX == Summary.SUM_SUFFIX)
assert(Histogram.COUNT_SUFFIX == Summary.COUNT_SUFFIX)

local mksec_in_sec = 1e6

local RATE_SUFFIX = 'per_second'
local MIN_SUFFIX = 'min'
local MAX_SUFFIX = 'max'
local AVERAGE_SUFFIX = 'average'

local function compute_rate_value(time_delta, obs_prev, obs)
if obs_prev == nil then
Expand Down Expand Up @@ -133,16 +140,68 @@ local function compute_gauge_max(output_with_aggregates_prev, output, coll_key,
math.max, MAX_SUFFIX, "Maximum of ")
end

local function compute_average_value(sum_obs, count_obs)
-- For each sum there should be count, otherwise info is malformed.
if sum_obs == nil then
return nil
end

if count_obs.value == 0 then
return {
label_pairs = count_obs.label_pairs,
value = 0,
}
end

return {
label_pairs = count_obs.label_pairs,
-- Force to float division instead of possible cdata integer division.
value = tonumber(sum_obs.value) / tonumber(count_obs.value),
}
end

local function compute_collector_average(_, output, _, coll_obs)
local name = string_utils.build_name(coll_obs.name_prefix, AVERAGE_SUFFIX)
local kind = Gauge.kind
local registry_key = string_utils.build_registry_key(name, kind)

if output[registry_key] ~= nil then
-- If, for any reason, registry collision had happenned,
-- we assume that there is already an aggregate metric with the
-- similar meaning.
return registry_key, output[registry_key]
end

local values = {}

for key, count_obs in pairs(coll_obs.observations[Histogram.COUNT_SUFFIX]) do
local sum_obs = coll_obs.observations[Histogram.SUM_SUFFIX][key]
values[key] = compute_average_value(sum_obs, count_obs)
end

return registry_key, {
name = name,
name_prefix = coll_obs.name_prefix,
help = "Average value (over all time) of " .. coll_obs.name,
kind = kind,
metainfo = coll_obs.metainfo,
timestamp = coll_obs.timestamp,
observations = {[''] = values}
}
end

local default_kind_rules = {
[Counter.kind] = { 'rate' },
[Gauge.kind] = { 'min', 'max' },
[Histogram.kind] = { 'average' },
[Summary.kind] = { 'average' },
}

local rule_processors = {
rate = compute_counter_rate,
min = compute_gauge_min,
max = compute_gauge_max,
average = compute_collector_average,
}

local function compute(output_with_aggregates_prev, output, kind_rules)
Expand Down
138 changes: 138 additions & 0 deletions test/aggregates_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,105 @@ local function get_gauge_example(timestamp, value1, value2)
return res
end

local function get_histogram_example(timestamp, count_1, sum_1, count_2, sum_2)
return {
http_server_request_latencyhistogram = {
name = 'http_server_request_latency',
name_prefix = 'http_server_request_latency',
kind = 'histogram',
help = 'HTTP server request latency',
metainfo = { default = true },
timestamp = timestamp,
observations = {
count = {
["code\t200"] = {
label_pairs = { alias = 'router', code = '200' },
value = count_1,
},
["code\t400"] = {
label_pairs = { alias = 'router', code = '400' },
value = count_2,
}
},
sum = {
["code\t200"] = {
label_pairs = { alias = 'router', code = '200' },
value = sum_1,
},
["code\t400"] = {
label_pairs = { alias = 'router', code = '400' },
value = sum_2,
}
},
bucket = {
["code\t200\tle\t0.1"] = {
label_pairs = { alias = 'router', code = '200', le = '0.1' },
value = 2064, -- Not used anywhere, so don't fill.
},
["code\t200\tle\tinf"] = {
label_pairs = { alias = 'router', code = '200', le = 'inf' },
value = count_1,
},
["code\t400\tle\t0.1"] = {
label_pairs = { alias = 'router', code = '400', le = '0.1' },
value = 323, -- Not used anywhere, so don't fill.
},
["code\t400\tle\ttinf"] = {
label_pairs = { alias = 'router', code = '400', le = 'tinf' },
value = count_2,
},
}
}
}
}
end

local function get_summary_example(timestamp, count_1, sum_1, count_2, sum_2)
return {
http_server_request_latencysummary = {
name = 'http_server_request_latency',
name_prefix = 'http_server_request_latency',
kind = 'summary',
help = 'HTTP server request latency',
metainfo = { default = true },
timestamp = timestamp,
observations = {
count = {
["code\t200"] = {
label_pairs = { alias = 'router', code = '200' },
value = count_1,
},
["code\t400"] = {
label_pairs = { alias = 'router', code = '400' },
value = count_2,
}
},
sum = {
["code\t200"] = {
label_pairs = { alias = 'router', code = '200' },
value = sum_1,
},
["code\t400"] = {
label_pairs = { alias = 'router', code = '400' },
value = sum_2,
}
},
[''] = {
["code\t200\tquantile\t0.5"] = {
label_pairs = { alias = 'router', code = '200', le = '0.5' },
value = 2064, -- Not used anywhere, so don't fill.
},
["code\t400\tquantile\t0.5"] = {
label_pairs = { alias = 'router', code = '400', le = '0.5' },
value = 323,
},
}
}

}
}
end

g.test_unknown_rule = function()
local output = get_counter_example(1676364616294847ULL, 14148, 3204)

Expand Down Expand Up @@ -243,3 +342,42 @@ g.test_gauge_min_max_disabled = function()
t.assert_equals(utils.len(output_with_aggregates_2), 1,
"No min or max computed due to options")
end

local average_cases = {
histogram = get_histogram_example,
summary = get_summary_example,
}

for k, generator in pairs(average_cases) do
g['test_' .. k .. '_average'] = function()
local output = generator(1676364616294847ULL, 20000, 10000, 1000, 150)

local output_with_aggregates = metrics.compute_aggregates(nil, output)
t.assert_equals(utils.len(output_with_aggregates), 2,
"Average computed for a single observation")

local average_obs = output_with_aggregates['http_server_request_latency_averagegauge']
t.assert_not_equals(average_obs, nil, "Average computed")
t.assert_equals(average_obs.name, 'http_server_request_latency_average')
t.assert_equals(average_obs.name_prefix, 'http_server_request_latency')
t.assert_equals(average_obs.kind, 'gauge')
t.assert_equals(average_obs.help, 'Average value (over all time) of http_server_request_latency')
t.assert_equals(average_obs.metainfo.default, true)
t.assert_equals(average_obs.timestamp, 1676364616294847ULL)
t.assert_equals(average_obs.observations['']['code\t200'].label_pairs,
{ alias = 'router', code = '200' })
t.assert_almost_equals(average_obs.observations['']['code\t200'].value, 10000 / 20000)
t.assert_equals(average_obs.observations['']['code\t400'].label_pairs,
{ alias = 'router', code = '400' })
t.assert_almost_equals(average_obs.observations['']['code\t400'].value, 150 / 1000)
end

g['test_' .. k .. '_average_disabled'] = function()
local output = generator(1676364616294847ULL, 20000, 10000, 1000, 150)

local opts = {[k] = {}}
local output_with_aggregates = metrics.compute_aggregates(nil, output, opts)
t.assert_equals(utils.len(output_with_aggregates), 1,
"No average computed due to options")
end
end

0 comments on commit d45585e

Please sign in to comment.