From 24096b3f60eb4c92709f44890880a7e7c377dfe8 Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Tue, 14 Dec 2021 10:21:04 +0300 Subject: [PATCH] Integrate CRUD statistics with metrics rock If `metrics` [1] found, you can use metrics collectors to store statistics. It is required to use `>= 0.9.0` to support age buckets in summary and crucial bugfixes under high load [2]. The metrics are part of global registry and can be exported together (e.g. to Prometheus) with default tools without any additional configuration. Disabling stats destroys the collectors. Metrics collectors are used by default if supported. To explicitly set driver, call `crud.enable_stats{ driver = driver }` ('local' or 'metrics'). If `metrics` used, `latency` statistics are changed to 0.99 quantile of request execution time (with aging). Add CI matrix to run tests with `metrics` installed. To get full coverage on coveralls, #248 must be resolved. 1. https://github.com/tarantool/metrics 2. https://github.com/tarantool/metrics/issues/235 Closes #224 --- .github/workflows/test_on_push.yaml | 12 +- CHANGELOG.md | 1 + README.md | 48 +++- crud/stats/metrics_registry.lua | 323 ++++++++++++++++++++++++ crud/stats/module.lua | 109 ++++++-- test/integration/stats_test.lua | 373 ++++++++++++++++++++++++++-- test/unit/stats_test.lua | 193 +++++++++----- 7 files changed, 954 insertions(+), 105 deletions(-) create mode 100644 crud/stats/metrics_registry.lua diff --git a/.github/workflows/test_on_push.yaml b/.github/workflows/test_on_push.yaml index bcf6d348c..71fa0da09 100644 --- a/.github/workflows/test_on_push.yaml +++ b/.github/workflows/test_on_push.yaml @@ -13,13 +13,19 @@ jobs: matrix: # We need 1.10.6 here to check that module works with # old Tarantool versions that don't have "tuple-keydef"/"tuple-merger" support. - tarantool-version: ["1.10.6", "1.10", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7"] + tarantool-version: ["1.10.6", "1.10", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7", "2.8"] + metrics-version: [""] remove-merger: [false] include: - tarantool-version: "2.7" remove-merger: true + - tarantool-version: "2.8" + metrics-version: "0.1.8" + - tarantool-version: "2.8" + metrics-version: "0.9.0" - tarantool-version: "2.8" coveralls: true + metrics-version: "0.12.0" fail-fast: false runs-on: [ubuntu-latest] steps: @@ -47,6 +53,10 @@ jobs: tarantool --version ./deps.sh + - name: Install metrics + if: matrix.metrics-version != '' + run: tarantoolctl rocks install metrics ${{ matrix.metrics-version }} + - name: Remove external merger if needed if: ${{ matrix.remove-merger }} run: rm .rocks/lib/tarantool/tuple/merger.so diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a3d9325f..403ddf61c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added * Statistics for CRUD operations on router (#224). +* Integrate CRUD statistics with `metrics` (#224). ### Changed diff --git a/README.md b/README.md index f2d33a92c..68295a0cd 100644 --- a/README.md +++ b/README.md @@ -610,6 +610,15 @@ crud.enable_stats() crud.reset_stats() ``` +If [`metrics`](https://github.com/tarantool/metrics) `0.9.0` or greater +found, metrics collectors will be used by default to store statistics +instead of local collectors. You can manually choose driver if needed. +``` +-- Use metrics collectors. +crud.enable_stats({ driver = 'metrics' }) +``` + +To get statistics in code, call `crud.stats()`. ```lua crud.stats() --- @@ -645,9 +654,41 @@ Possible statistics operation labels are Each operation section contains of different collectors for success calls and error (both error throw and `nil, err`) returns. `count` is total requests count since instance start -or stats restart. `latency` is average time of requests execution, +or stats restart. `latency` is 0.99 quantile of request execution +time if `metrics` driver used, otherwise `latency` is total average. `time` is total time of requests execution. +In `metrics` registry statistics are stored as `tnt_crud_stats` metrics +with `operation`, `status` and `name` labels. Collector +`tnt_crud_space_not_found` stores count of calls to unknown spaces. +``` +metrics:collect() +--- +- - label_pairs: + status: ok + operation: insert + name: customers + value: 221411 + metric_name: tnt_crud_stats_count + - label_pairs: + status: ok + operation: insert + name: customers + value: 10.49834896344692 + metric_name: tnt_crud_stats_sum + - label_pairs: + status: ok + operation: insert + name: customers + quantile: 0.99 + value: 0.00023606420935973 + metric_name: tnt_crud_stats + - label_pairs: [] + value: 3 + metric_name: tnt_crud_space_not_found +... +``` + `select` section additionally contains `details` collectors. ```lua crud.stats('my_space').select.details @@ -661,7 +702,10 @@ crud.stats('my_space').select.details (including those not executed successfully). `tuples_fetched` is a count of tuples fetched from storages during execution, `tuples_lookup` is a count of tuples looked up on storages -while collecting response for call. +while collecting response for call. In `metrics` registry they +are stored as `tnt_crud_map_reduces`, `tnt_crud_tuples_fetched` +and `tnt_crud_tuples_lookup` metrics with +`{ operation = 'select', name = space_name }` labels. ## Cartridge roles diff --git a/crud/stats/metrics_registry.lua b/crud/stats/metrics_registry.lua new file mode 100644 index 000000000..478590fbf --- /dev/null +++ b/crud/stats/metrics_registry.lua @@ -0,0 +1,323 @@ +local is_package, metrics = pcall(require, 'metrics') + +local dev_checks = require('crud.common.dev_checks') +local op_module = require('crud.stats.operation') +local registry_common = require('crud.stats.registry_common') + +local registry = {} +local internal_registry = {} + +local metric_name = { + -- Summary collector for all operations. + stats = 'tnt_crud_stats', + -- `*_count` and `*_sum` are automatically created + -- by summary collector. + stats_count = 'tnt_crud_stats_count', + stats_sum = 'tnt_crud_stats_sum', + + -- Counter collector for spaces not found. + space_not_found = 'tnt_crud_space_not_found', + + -- Counter collectors for select/pairs details. + details = { + tuples_fetched = 'tnt_crud_tuples_fetched', + tuples_lookup = 'tnt_crud_tuples_lookup', + map_reduces = 'tnt_crud_map_reduces', + } +} + +local LATENCY_QUANTILE = 0.99 + +local DEFAULT_QUANTILES = { + [LATENCY_QUANTILE] = 1e-3, +} + +local DEFAULT_SUMMARY_PARAMS = { + age_buckets_count = 2, + max_age_time = 60, +} + +--- Check if application supports metrics rock for registry +-- +-- `metrics >= 0.9.0` is required to use summary with +-- age buckets. `metrics >= 0.5.0, < 0.9.0` is unsupported +-- due to quantile overflow bug +-- (https://github.com/tarantool/metrics/issues/235). +-- +-- @function is_supported +-- +-- @treturn boolean Returns true if `metrics >= 0.9.0` found, false otherwise. +-- +function registry.is_supported() + if is_package == false then + return false + end + + -- Only metrics >= 0.9.0 supported. + local is_summary, summary = pcall(require, 'metrics.collectors.summary') + if is_summary == false or summary.rotate_age_buckets == nil then + return false + end + + return true +end + +--- Initialize collectors in global metrics registry +-- +-- Registries are not meant to used explicitly +-- by users, init is not guaranteed to be idempotent. +-- Destroy collectors only through this registry methods. +-- +-- @function init +-- +-- @treturn boolean Returns true. +-- +function registry.init() + internal_registry[metric_name.stats] = metrics.summary( + metric_name.stats, + 'CRUD router calls statistics', + DEFAULT_QUANTILES, + DEFAULT_SUMMARY_PARAMS) + + internal_registry[metric_name.space_not_found] = metrics.counter( + metric_name.space_not_found, + 'Spaces not found during CRUD calls') + + internal_registry[metric_name.details.tuples_fetched] = metrics.counter( + metric_name.details.tuples_fetched, + 'Tuples fetched from CRUD storages during select/pairs') + + internal_registry[metric_name.details.tuples_lookup] = metrics.counter( + metric_name.details.tuples_lookup, + 'Tuples looked up on CRUD storages while collecting response during select/pairs') + + internal_registry[metric_name.details.map_reduces] = metrics.counter( + metric_name.details.map_reduces, + 'Map reduces planned during CRUD select/pairs') + + return true +end + +--- Unregister collectors in global metrics registry +-- +-- Registries are not meant to used explicitly +-- by users, destroy is not guaranteed to be idempotent. +-- Destroy collectors only through this registry methods. +-- +-- @function destroy +-- +-- @treturn boolean Returns true. +-- +function registry.destroy() + for _, c in pairs(internal_registry) do + metrics.registry:unregister(c) + end + + internal_registry = {} + return true +end + +--- Get copy of global metrics registry +-- +-- Registries are not meant to used explicitly +-- by users, get is not guaranteed to work without init. +-- +-- @function get +-- +-- @tparam string space_name +-- (Optional) If specified, returns table with statistics +-- of operations on table, separated by operation type and +-- execution status. If there wasn't any requests for table, +-- returns {}. In not specified, returns table with statistics +-- about all existing spaces and count of calls to spaces +-- that wasn't found. +-- +-- @treturn table Returns copy of metrics registry. +function registry.get(space_name) + dev_checks('?string') + + local stats = { + spaces = {}, + space_not_found = 0, + } + + -- Fill operation basic statistics values. + for _, obs in ipairs(internal_registry[metric_name.stats]:collect()) do + local op = obs.label_pairs.operation + local status = obs.label_pairs.status + local name = obs.label_pairs.name + + if space_name ~= nil and name ~= space_name then + goto stats_continue + end + + registry_common.init_collectors_if_required(stats.spaces, name, op) + local space_stats = stats.spaces[name] + + if obs.metric_name == metric_name.stats then + if obs.label_pairs.quantile == LATENCY_QUANTILE then + space_stats[op][status].latency = obs.value + end + elseif obs.metric_name == metric_name.stats_sum then + space_stats[op][status].time = obs.value + elseif obs.metric_name == metric_name.stats_count then + space_stats[op][status].count = obs.value + end + + :: stats_continue :: + end + + -- Fill select/pairs detail statistics values. + for stat_name, metric_name in pairs(metric_name.details) do + for _, obs in ipairs(internal_registry[metric_name]:collect()) do + local name = obs.label_pairs.name + local op = obs.label_pairs.operation + + if space_name ~= nil and name ~= space_name then + goto details_continue + end + + registry_common.init_collectors_if_required(stats.spaces, name, op) + stats.spaces[name][op].details[stat_name] = obs.value + + :: details_continue :: + end + end + + if space_name ~= nil then + return stats.spaces[space_name] or {} + end + + local _, obs = next(internal_registry[metric_name.space_not_found]:collect()) + if obs ~= nil then + stats.space_not_found = obs.value + end + + return stats +end + +--- Check if space statistics are present in registry +-- +-- @function is_unknown_space +-- +-- @tparam string space_name +-- Name of space. +-- +-- @treturn boolean True, if space stats found. False otherwise. +-- +function registry.is_unknown_space(space_name) + dev_checks('string') + + for _, obs in ipairs(internal_registry[metric_name.stats]:collect()) do + local name = obs.label_pairs.name + + if name == space_name then + return false + end + end + + for _, metric_name in pairs(metric_name.details) do + for _, obs in ipairs(internal_registry[metric_name]:collect()) do + local name = obs.label_pairs.name + + if name == space_name then + return false + end + end + end + + return true +end + +--- Increase requests count and update latency info +-- +-- @function observe +-- +-- @tparam string space_name +-- Name of space. +-- +-- @tparam number latency +-- Time of call execution. +-- +-- @tparam string op +-- Label of registry collectors. +-- Use `require('crud.common.const').OP` to pick one. +-- +-- @tparam string success +-- 'ok' if no errors on execution, 'error' otherwise. +-- +-- @treturn boolean Returns true. +-- +function registry.observe(latency, space_name, op, status) + dev_checks('number', 'string', 'string', 'string') + + -- Use `operations` label to be consistent with `tnt_stats_op_*` labels. + -- Use `name` label to be consistent with `tnt_space_*` labels. + -- Use `status` label to be consistent with `tnt_vinyl_*` and HTTP metrics labels. + local label_pairs = { operation = op, name = space_name, status = status } + + internal_registry[metric_name.stats]:observe(latency, label_pairs) + + return true +end + +--- Increase count of "space not found" collector by one +-- +-- @function observe_space_not_found +-- +-- @treturn boolean Returns true. +-- +function registry.observe_space_not_found() + internal_registry[metric_name.space_not_found]:inc(1) + + return true +end + +--- Increase statistics of storage select/pairs calls +-- +-- @function observe_fetch +-- +-- @tparam string space_name +-- Name of space. +-- +-- @tparam number tuples_fetched +-- Count of tuples fetched during storage call. +-- +-- @tparam number tuples_lookup +-- Count of tuples looked up on storages while collecting response. +-- +-- @treturn boolean Returns true. +-- +function registry.observe_fetch(tuples_fetched, tuples_lookup, space_name) + dev_checks('number', 'number', 'string') + + local label_pairs = { name = space_name, operation = op_module.SELECT } + + internal_registry[metric_name.details.tuples_fetched]:inc(tuples_fetched, label_pairs) + internal_registry[metric_name.details.tuples_lookup]:inc(tuples_lookup, label_pairs) + + return true +end + +--- Increase statistics of planned map reduces during select/pairs +-- +-- @function observe_map_reduces +-- +-- @tparam number count +-- Count of map reduces planned. +-- +-- @tparam string space_name +-- Name of space. +-- +-- @treturn boolean Returns true. +-- +function registry.observe_map_reduces(count, space_name) + dev_checks('number', 'string') + + local label_pairs = { name = space_name, operation = op_module.SELECT } + internal_registry[metric_name.details.map_reduces]:inc(count, label_pairs) + + return true +end + +return registry diff --git a/crud/stats/module.lua b/crud/stats/module.lua index b07160186..0e30cd176 100644 --- a/crud/stats/module.lua +++ b/crud/stats/module.lua @@ -6,12 +6,35 @@ local vshard = require('vshard') local dev_checks = require('crud.common.dev_checks') local utils = require('crud.common.utils') local op_module = require('crud.stats.operation') -local registry = require('crud.stats.local_registry') local StatsError = errors.new_class('StatsError', {capture_stack = false}) local stats = {} -local is_enabled = false +local internal = { + registry = nil, + driver = nil, +} +stats.internal = internal + +local local_registry = require('crud.stats.local_registry') +local metrics_registry = require('crud.stats.metrics_registry') + +local drivers = { + ['local'] = local_registry, +} +if metrics_registry.is_supported() then + drivers['metrics'] = metrics_registry +end + +--- Check if statistics module if enabled +-- +-- @function is_enabled +-- +-- @treturn[1] boolean Returns true or false. +-- +function stats.is_enabled() + return internal.registry ~= nil +end --- Initializes statistics registry, enables callbacks and wrappers -- @@ -19,20 +42,52 @@ local is_enabled = false -- -- @function enable -- +-- @tparam table opts +-- +-- @tfield string driver +-- 'local' or 'metrics'. +-- If 'local', stores statistics in local registry (some Lua tables) +-- and computes latency as overall average. 'metrics' requires +-- `metrics >= 0.9.0` installed and stores statistics in +-- global metrics registry (integrated with exporters) +-- and computes latency as 0.99 quantile with aging. +-- If 'metrics' driver is available, it is used by default, +-- otherwise 'local' is used. +-- -- @treturn boolean Returns true. -- -function stats.enable() - if is_enabled then - return true - end +function stats.enable(opts) + checks({ driver = '?string' }) StatsError:assert( rawget(_G, 'crud') ~= nil, "Can be enabled only on crud router" ) - registry.init() - is_enabled = true + opts = opts or {} + if opts.driver == nil then + if drivers.metrics ~= nil then + opts.driver = 'metrics' + else + opts.driver = 'local' + end + end + + StatsError:assert( + drivers[opts.driver] ~= nil, + 'Unsupported driver: %s', opts.driver + ) + + if internal.driver == opts.driver then + return true + end + + -- Disable old driver registry, if another one was requested. + stats.disable() + + internal.driver = opts.driver + internal.registry = drivers[opts.driver] + internal.registry.init() return true end @@ -47,12 +102,12 @@ end -- @treturn boolean Returns true. -- function stats.reset() - if not is_enabled then + if not stats.is_enabled() then return true end - registry.destroy() - registry.init() + internal.registry.destroy() + internal.registry.init() return true end @@ -66,12 +121,13 @@ end -- @treturn boolean Returns true. -- function stats.disable() - if not is_enabled then + if not stats.is_enabled() then return true end - registry.destroy() - is_enabled = false + internal.registry.destroy() + internal.registry = nil + internal.driver = nil return true end @@ -95,7 +151,11 @@ end function stats.get(space_name) checks('?string') - return registry.get(space_name) + if not stats.is_enabled() then + return {} + end + + return internal.registry.get(space_name) end local function wrap_tail(space_name, op, opts, start_time, call_status, ...) @@ -127,11 +187,11 @@ local function wrap_tail(space_name, op, opts, start_time, call_status, ...) -- at worst it would be a single excessive check for an instance lifetime. -- If we can't verify space existence because of network errors, -- it is treated as unknown as well. - if status == 'error' and registry.is_unknown_space(space_name) then + if status == 'error' and internal.registry.is_unknown_space(space_name) then if type(err) == 'table' and type(err.err) == 'string' then space_not_found_msg = utils.space_doesnt_exist_msg(space_name) if string.find(err.err, space_not_found_msg) ~= nil then - registry.observe_space_not_found() + internal.registry.observe_space_not_found() goto return_values end end @@ -141,7 +201,7 @@ local function wrap_tail(space_name, op, opts, start_time, call_status, ...) -- Check explicitly if space do not exist. space = utils.get_space(space_name, vshard.router.routeall()) if space == nil then - registry.observe_space_not_found() + internal.registry.observe_space_not_found() goto return_values end end @@ -155,11 +215,12 @@ local function wrap_tail(space_name, op, opts, start_time, call_status, ...) space_name = space.name end - registry.observe(latency, space_name, op, status) + internal.registry.observe(latency, space_name, op, status) if context_stats ~= nil then if context_stats.map_reduces ~= nil then - registry.observe_map_reduces(context_stats.map_reduces, space_name) + internal.registry.observe_map_reduces( + context_stats.map_reduces, space_name) end utils.drop_context_section('router_stats') end @@ -204,7 +265,7 @@ function stats.wrap(func, op, opts) dev_checks('function', 'string', { pairs = '?boolean' }) return function(...) - if not is_enabled then + if not stats.is_enabled() then return func(...) end @@ -244,11 +305,11 @@ local storage_stats_schema = { tuples_fetched = 'number', tuples_lookup = 'numbe local function update_fetch_stats(storage_stats, space_name) dev_checks(storage_stats_schema, 'string') - if not is_enabled then + if not stats.is_enabled() then return true end - registry.observe_fetch( + internal.registry.observe_fetch( storage_stats.tuples_fetched, storage_stats.tuples_lookup, space_name @@ -265,7 +326,7 @@ end -- @treturn[2] function Dummy function, if stats disabled. -- function stats.get_fetch_callback() - if not is_enabled then + if not stats.is_enabled() then return utils.pass end diff --git a/test/integration/stats_test.lua b/test/integration/stats_test.lua index d4a9ea200..365265872 100644 --- a/test/integration/stats_test.lua +++ b/test/integration/stats_test.lua @@ -4,14 +4,20 @@ local t = require('luatest') local stats_registry_common = require('crud.stats.registry_common') -local g = t.group('stats_integration') +local pgroup = t.group('stats_integration', { + { driver = 'local' }, + { driver = 'metrics' }, +}) +local group_metrics = t.group('stats_metrics_integration', { + { driver = 'metrics' }, +}) local helpers = require('test.helper') local space_id = 542 local space_name = 'customers' local unknown_space_name = 'non_existing_space' -g.before_all(function(g) +local function before_all(g) g.cluster = helpers.Cluster:new({ datadir = fio.tempdir(), server_command = helpers.entrypoint('srv_select'), @@ -26,20 +32,56 @@ g.before_all(function(g) t.assert_equals(helpers.is_space_exist(g.router, space_name), true) t.assert_equals(helpers.is_space_exist(g.router, unknown_space_name), false) -end) -g.after_all(function(g) + if g.params.driver == 'metrics' then + local is_metrics_supported = g.router:eval([[ + return require('crud.stats.metrics_registry').is_supported() + ]]) + t.skip_if(is_metrics_supported == false, 'Metrics registry is unsupported') + end +end + +local function after_all(g) helpers.stop_cluster(g.cluster) -end) +end + + +local function get_stats(g, space_name) + return g.router:eval("return crud.stats(...)", { space_name }) +end + +local function enable_stats(g, params) + params = params or g.params + g.router:eval("crud.enable_stats(...)", { params }) +end -g.before_each(function(g) +local function disable_stats(g) + g.router:eval("crud.disable_stats()") +end + +local function before_each(g) + enable_stats(g) helpers.truncate_space_on_cluster(g.cluster, space_name) -end) +end -function g:get_stats(space_name) - return self.router:eval("return crud.stats(...)", { space_name }) +local function get_metrics(g) + return g.router:eval("return require('metrics').collect()") end +pgroup.before_all(before_all) + +pgroup.after_all(after_all) + +pgroup.before_each(before_each) + + +group_metrics.before_all(before_all) + +group_metrics.after_all(after_all) + +group_metrics.before_each(before_each) + + -- If there weren't any operations, space stats is {}. -- To compute stats diff, this helper return real stats -- if they're already present or default stats if @@ -264,12 +306,12 @@ for name, case in pairs(simple_operation_cases) do local test_name = ('test_%s'):format(name) if case.prepare ~= nil then - g.before_test(test_name, case.prepare) + pgroup.before_test(test_name, case.prepare) end - g[test_name] = function(g) + pgroup[test_name] = function(g) -- Collect stats before call. - local stats_before = g:get_stats(space_name) + local stats_before = get_stats(g, space_name) t.assert_type(stats_before, 'table') -- Call operation. @@ -295,7 +337,7 @@ for name, case in pairs(simple_operation_cases) do end -- Collect stats after call. - local stats_after = g:get_stats(space_name) + local stats_after = get_stats(g, space_name) t.assert_type(stats_after, 'table') t.assert_not_equals(stats_after[case.op], nil) @@ -420,9 +462,9 @@ local unknown_space_cases = { for name, case in pairs(unknown_space_cases) do local test_name = ('test_%s_on_unknown_space'):format(name) - g[test_name] = function(g) + pgroup[test_name] = function(g) -- Collect stats before call. - local stats_before = g:get_stats() + local stats_before = get_stats(g) t.assert_type(stats_before, 'table') -- Call operation. @@ -436,7 +478,7 @@ for name, case in pairs(unknown_space_cases) do t.assert_not_equals(err, nil) -- Collect stats after call. - local stats_after = g:get_stats() + local stats_after = get_stats(g) t.assert_type(stats_after, 'table') t.assert_equals(stats_after.space_not_found - stats_before.space_not_found, 1, @@ -519,14 +561,14 @@ local select_cases = { for name, case in pairs(select_cases) do local test_name = ('test_%s_details'):format(name) - g.before_test(test_name, prepare_select_data) + pgroup.before_test(test_name, prepare_select_data) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = 'select' local space_name = space_name -- Collect stats before call. - local stats_before = g:get_stats(space_name) + local stats_before = get_stats(g, space_name) t.assert_type(stats_before, 'table') -- Call operation. @@ -540,7 +582,7 @@ for name, case in pairs(select_cases) do t.assert_equals(err, nil) -- Collect stats after call. - local stats_after = g:get_stats(space_name) + local stats_after = get_stats(g, space_name) t.assert_type(stats_after, 'table') local op_before = get_before_stats(stats_before, op) @@ -561,10 +603,297 @@ for name, case in pairs(select_cases) do end end -g.test_resolve_name_from_id = function(g) +pgroup.test_resolve_name_from_id = function(g) local op = 'len' g.router:call('crud.len', { space_id }) - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_not_equals(stats[op], nil, "Statistics is filled by name") end + + +-- Generate non-null stats for all cases. +local function generate_stats(g) + for _, case in pairs(simple_operation_cases) do + if case.prepare ~= nil then + case.prepare(g) + end + + local _, err + if case.eval ~= nil then + if case.pcall then + _, err = pcall(g.router.eval, g.router, case.eval, case.args) + else + _, err = g.router:eval(case.eval, case.args) + end + else + _, err = g.router:call(case.func, case.args) + end + + if case.expect_error ~= true then + t.assert_equals(err, nil) + else + t.assert_not_equals(err, nil) + end + end + + -- Generate non-null select details. + prepare_select_data(g) + for _, case in pairs(select_cases) do + local _, err + if case.eval ~= nil then + _, err = g.router:eval(case.eval, { space_name, case.conditions }) + else + _, err = g.router:call(case.func, { space_name, case.conditions }) + end + + t.assert_equals(err, nil) + end + + -- Generate non-null space_not_found stats. + local case = unknown_space_cases.insert + local _, err = g.router:call(case.func, case.args) + t.assert_not_equals(err, nil) +end + +-- https://github.com/tarantool/metrics/blob/fc5a67072340b12f983f09b7d383aca9e2f10cf1/test/utils.lua#L22-L31 +local function find_obs(metric_name, label_pairs, observations) + for _, obs in pairs(observations) do + local same_label_pairs = pcall(t.assert_equals, obs.label_pairs, label_pairs) + if obs.metric_name == metric_name and same_label_pairs then + return obs + end + end + t.assert_items_include( + observations, + { metric_name = metric_name, label_pairs = label_pairs }, + 'Observation found') +end + +-- https://github.com/tarantool/metrics/blob/fc5a67072340b12f983f09b7d383aca9e2f10cf1/test/utils.lua#L55-L63 +local function find_metric(metric_name, metrics_data) + local m = {} + for _, v in ipairs(metrics_data) do + if v.metric_name == metric_name then + table.insert(m, v) + end + end + return #m > 0 and m or nil +end + +local function get_unique_label_values(metrics_data, label_key) + local label_values_map = {} + for _, v in ipairs(metrics_data) do + local label_pairs = v.label_pairs or {} + if label_pairs[label_key] ~= nil then + label_values_map[label_pairs[label_key]] = true + end + end + + local label_values = {} + for k, _ in pairs(label_values_map) do + table.insert(label_values, k) + end + + return label_values +end + +local function validate_stats(metrics) + local stats = find_metric('tnt_crud_stats', metrics) + t.assert_type(stats, 'table', '`tnt_crud_stats` summary metrics found') + + local stats_count = find_metric('tnt_crud_stats_count', metrics) + t.assert_type(stats_count, 'table', '`tnt_crud_stats` summary metrics found') + + local stats_sum = find_metric('tnt_crud_stats_sum', metrics) + t.assert_type(stats_sum, 'table', '`tnt_crud_stats` summary metrics found') + + + local expected_operations = { 'insert', 'get', 'replace', 'update', + 'upsert', 'delete', 'select', 'truncate', 'len', 'borders' } + + t.assert_items_equals(get_unique_label_values(stats, 'operation'), expected_operations, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(stats_count, 'operation'), expected_operations, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(stats_sum, 'operation'), expected_operations, + 'Metrics are labelled with operation') + + + local expected_statuses = { 'ok', 'error' } + + t.assert_items_equals( get_unique_label_values(stats, 'status'), expected_statuses, + 'Metrics are labelled with status') + + t.assert_items_equals(get_unique_label_values(stats_count, 'status'), expected_statuses, + 'Metrics are labelled with status') + + t.assert_items_equals(get_unique_label_values(stats_sum, 'status'), expected_statuses, + 'Metrics are labelled with status') + + + local expected_names = { space_name } + + t.assert_items_equals(get_unique_label_values(stats, 'name'), expected_names, + 'Metrics are labelled with space name (only existing spaces)') + + t.assert_items_equals(get_unique_label_values(stats_count, 'name'), + expected_names, + 'Metrics are labelled with space name (only existing spaces)') + + t.assert_items_equals( + get_unique_label_values(stats_sum, 'name'), + expected_names, + 'Metrics are labelled with space name (only existing spaces)') + + + local tuples_fetched = find_metric('tnt_crud_tuples_fetched', metrics) + t.assert_type(tuples_fetched, 'table', '`tnt_crud_tuples_fetched` metrics found') + + t.assert_items_equals(get_unique_label_values(tuples_fetched, 'operation'), { 'select' }, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(tuples_fetched, 'name'), expected_names, + 'Metrics are labelled with space name (only existing spaces)') + + + local tuples_lookup = find_metric('tnt_crud_tuples_lookup', metrics) + t.assert_type(tuples_lookup, 'table', '`tnt_crud_tuples_lookup` metrics found') + + t.assert_items_equals( get_unique_label_values(tuples_lookup, 'operation'), { 'select' }, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(tuples_lookup, 'name'), expected_names, + 'Metrics are labelled with space name (only existing spaces)') + + + local map_reduces = find_metric('tnt_crud_map_reduces', metrics) + t.assert_type(map_reduces, 'table', '`tnt_crud_map_reduces` metrics found') + + t.assert_items_equals(get_unique_label_values(map_reduces, 'operation'), { 'select' }, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(map_reduces, 'name'), expected_names, + 'Metrics are labelled with space name (only existing spaces)') + + + local space_not_found = find_metric('tnt_crud_space_not_found', metrics) + t.assert_type(space_not_found, 'table', '`tnt_crud_space_not_found` metrics found') +end + + +group_metrics.before_test( + 'test_stats_stored_in_global_metrics_registry', + generate_stats) + +group_metrics.test_stats_stored_in_global_metrics_registry = function(g) + local metrics = get_metrics(g) + validate_stats(metrics) +end + + +group_metrics.before_test('test_metrics_updated_per_call', generate_stats) + +group_metrics.test_metrics_updated_per_call = function(g) + local metrics_before = get_metrics(g) + local stats_labels = { operation = 'select', status = 'ok', name = space_name } + local details_labels = { operation = 'select', name = space_name } + + local count_before = find_obs('tnt_crud_stats_count', stats_labels, metrics_before) + local time_before = find_obs('tnt_crud_stats_sum', stats_labels, metrics_before) + local tuples_lookup_before = find_obs('tnt_crud_tuples_lookup', details_labels, metrics_before) + local tuples_fetched_before = find_obs('tnt_crud_tuples_fetched', details_labels, metrics_before) + local map_reduces_before = find_obs('tnt_crud_map_reduces', details_labels, metrics_before) + + local case = select_cases['select_by_secondary_index'] + local _, err = g.router:call(case.func, { space_name, case.conditions }) + t.assert_equals(err, nil) + + local metrics_after = get_metrics(g) + local count_after = find_obs('tnt_crud_stats_count', stats_labels, metrics_after) + local time_after = find_obs('tnt_crud_stats_sum', stats_labels, metrics_after) + local tuples_lookup_after = find_obs('tnt_crud_tuples_lookup', details_labels, metrics_after) + local tuples_fetched_after = find_obs('tnt_crud_tuples_fetched', details_labels, metrics_after) + local map_reduces_after = find_obs('tnt_crud_map_reduces', details_labels, metrics_after) + + t.assert_equals(count_after.value - count_before.value, 1, + '`select` metrics count increased') + t.assert_ge(time_after.value - time_before.value, 0, + '`select` total time increased') + t.assert_ge(tuples_lookup_after.value - tuples_lookup_before.value, case.tuples_lookup, + '`select` tuples lookup expected change') + t.assert_ge(tuples_fetched_after.value - tuples_fetched_before.value, case.tuples_fetched, + '`select` tuples feched expected change') + t.assert_ge(map_reduces_after.value - map_reduces_before.value, case.tuples_lookup, + '`select` map reduces expected change') +end + + +group_metrics.before_test( + 'test_space_not_found_metrics_updated_per_call', + generate_stats) + +group_metrics.test_space_not_found_metrics_updated_per_call = function(g) + local metrics_before = get_metrics(g) + + local space_not_found_before = find_obs('tnt_crud_space_not_found', {}, metrics_before) + + local case = unknown_space_cases.insert + local _, err = g.router:call(case.func, case.args) + t.assert_not_equals(err, nil) + + local metrics_after = get_metrics(g) + local space_not_found_after = find_obs('tnt_crud_space_not_found', {}, metrics_after) + + t.assert_equals(space_not_found_after.value - space_not_found_before.value, 1, + '`tnt_crud_space_not_found` metrics count increased') +end + + +group_metrics.before_test( + 'test_metrics_collectors_destroyed_if_stats_disabled', + generate_stats) + +group_metrics.test_metrics_collectors_destroyed_if_stats_disabled = function(g) + disable_stats(g) + + local metrics = get_metrics(g) + + local stats = find_metric('tnt_crud_stats', metrics) + t.assert_equals(stats, nil, '`tnt_crud_stats` summary metrics not found') + + local stats_count = find_metric('tnt_crud_stats_count', metrics) + t.assert_equals(stats_count, nil, '`tnt_crud_stats` summary metrics not found') + + local stats_sum = find_metric('tnt_crud_stats_sum', metrics) + t.assert_equals(stats_sum, nil, '`tnt_crud_stats` summary metrics not found') + + local tuples_fetched = find_metric('tnt_crud_tuples_fetched', metrics) + t.assert_equals(tuples_fetched, nil, '`tnt_crud_tuples_fetched` metrics not found') + + local tuples_lookup = find_metric('tnt_crud_tuples_lookup', metrics) + t.assert_equals(tuples_lookup, nil, '`tnt_crud_tuples_lookup` metrics not found') + + local map_reduces = find_metric('tnt_crud_map_reduces', metrics) + t.assert_equals(map_reduces, nil, '`tnt_crud_map_reduces` metrics not found') + + local space_not_found = find_metric('tnt_crud_space_not_found', metrics) + t.assert_equals(space_not_found, nil, '`tnt_crud_space_not_found` metrics not found') +end + + +group_metrics.before_test( + 'test_stats_stored_in_metrics_registry_after_switch_to_metrics_driver', + disable_stats) + +group_metrics.test_stats_stored_in_metrics_registry_after_switch_to_metrics_driver = function(g) + enable_stats(g, { driver = 'local' }) + -- Switch to metrics driver. + enable_stats(g, { driver = 'metrics' }) + + generate_stats(g) + local metrics = get_metrics(g) + validate_stats(metrics) +end diff --git a/test/unit/stats_test.lua b/test/unit/stats_test.lua index 3daa77e65..2c464171f 100644 --- a/test/unit/stats_test.lua +++ b/test/unit/stats_test.lua @@ -6,14 +6,18 @@ local t = require('luatest') local stats_module = require('crud.stats.module') local utils = require('crud.common.utils') -local g = t.group('stats_unit') +local pgroup = t.group('stats_unit', { + { driver = 'local' }, + { driver = 'metrics' }, +}) +local group_driver = t.group('stats_driver_unit') local helpers = require('test.helper') local space_id = 542 local space_name = 'customers' local unknown_space_name = 'non_existing_space' -g.before_all(function(g) +local function before_all(g) -- Enable test cluster for "is space exist?" checks. g.cluster = helpers.Cluster:new({ datadir = fio.tempdir(), @@ -29,47 +33,64 @@ g.before_all(function(g) t.assert_equals(helpers.is_space_exist(g.router, space_name), true) t.assert_equals(helpers.is_space_exist(g.router, unknown_space_name), false) -end) -g.after_all(function(g) - helpers.stop_cluster(g.cluster) -end) + g.is_metrics_supported = g.router:eval([[ + return require('crud.stats.metrics_registry').is_supported() + ]]) --- Reset statistics between tests, reenable if needed. -g.before_each(function(g) - g:enable_stats() -end) + if g.params ~= nil and g.params.driver == 'metrics' then + t.skip_if(g.is_metrics_supported == false, 'Metrics registry is unsupported') + end +end -g.after_each(function(g) - g:disable_stats() -end) +local function after_all(g) + helpers.stop_cluster(g.cluster) +end -function g:get_stats(space_name) - return self.router:eval("return stats_module.get(...)", { space_name }) +local function get_stats(g, space_name) + return g.router:eval("return stats_module.get(...)", { space_name }) end -function g:enable_stats() - self.router:eval("stats_module.enable()") +local function enable_stats(g, params) + params = params or g.params + g.router:eval("stats_module.enable(...)", { params }) end -function g:disable_stats() - self.router:eval("stats_module.disable()") +local function disable_stats(g) + g.router:eval("stats_module.disable()") end -function g:reset_stats() - self.router:eval("return stats_module.reset()") +local function reset_stats(g) + g.router:eval("return stats_module.reset()") end -g.test_get_format_after_enable = function(g) - local stats = g:get_stats() +pgroup.before_all(before_all) + +pgroup.after_all(after_all) + +-- Reset statistics between tests, reenable if needed. +pgroup.before_each(enable_stats) + +pgroup.after_each(disable_stats) + + +group_driver.before_all(before_all) + +group_driver.after_all(after_all) + +group_driver.after_each(disable_stats) + + +pgroup.test_get_format_after_enable = function(g) + local stats = get_stats(g) t.assert_type(stats, 'table') t.assert_equals(stats.spaces, {}) t.assert_equals(stats.space_not_found, 0) end -g.test_get_by_space_name_format_after_enable = function(g) - local stats = g:get_stats(space_name) +pgroup.test_get_by_space_name_format_after_enable = function(g) + local stats = get_stats(g, space_name) t.assert_type(stats, 'table') t.assert_equals(stats, {}) @@ -127,7 +148,7 @@ for name, case in pairs(observe_cases) do for _, op in pairs(case.operations) do local test_name = ('test_%s_%s'):format(op, name) - g[test_name] = function(g) + pgroup[test_name] = function(g) -- Call wrapped functions on server side. -- Collect execution times from outside. local run_count = 10 @@ -153,10 +174,10 @@ for name, case in pairs(observe_cases) do local total_time = fun.foldl(function(acc, x) return acc + x end, 0, time_diffs) -- Validate stats format after execution. - local total_stats = g:get_stats() + local total_stats = get_stats(g) t.assert_type(total_stats, 'table', 'Total stats present after observations') - local space_stats = g:get_stats(space_name) + local space_stats = get_stats(g, space_name) t.assert_type(space_stats, 'table', 'Space stats present after observations') t.assert_equals(total_stats.spaces[space_name], space_stats, @@ -252,7 +273,7 @@ for name_head, disable_case in pairs(disable_stats_cases) do for name_tail, return_case in pairs(preserve_return_cases) do local test_name = ('test_%s%s'):format(name_head, name_tail) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.INSERT local eval = ([[ @@ -276,7 +297,7 @@ for name_head, disable_case in pairs(disable_stats_cases) do local test_name = ('test_%spairs_wrapper_preserves_return_values'):format(name_head) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.INSERT local input = { a = 'a', b = 'b' } @@ -306,7 +327,7 @@ for name_head, disable_case in pairs(disable_stats_cases) do for name_tail, throw_case in pairs(preserve_throw_cases) do local test_name = ('test_%s%s'):format(name_head, name_tail) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.INSERT local eval = ([[ @@ -362,7 +383,7 @@ local error_cases = { for name, case in pairs(error_cases) do local test_name = ('test_%s_increases_space_not_found_count'):format(name) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.INSERT local eval = ([[ @@ -391,7 +412,7 @@ for name, case in pairs(error_cases) do t.assert_str_contains(err_msg, case.msg, "Error preserved") - local stats = g:get_stats() + local stats = get_stats(g) t.assert_equals(stats.space_not_found, 1) t.assert_equals(stats.spaces[unknown_space_name], nil, @@ -399,13 +420,13 @@ for name, case in pairs(error_cases) do end end -g.test_stats_is_empty_after_disable = function(g) - g:disable_stats() +pgroup.test_stats_is_empty_after_disable = function(g) + disable_stats(g) local op = stats_module.op.INSERT g.router:eval(call_wrapped, { 'return_true', op, {}, space_name }) - local stats = g:get_stats() + local stats = get_stats(g) t.assert_equals(stats, {}) end @@ -413,57 +434,57 @@ local function prepare_non_default_stats(g) local op = stats_module.op.INSERT g.router:eval(call_wrapped, { 'return_true', op, {}, space_name }) - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_equals(stats[op].ok.count, 1, 'Non-zero stats prepared') return stats end -g.test_enable_is_idempotent = function(g) +pgroup.test_enable_with_same_driver_is_idempotent = function(g) local stats_before = prepare_non_default_stats(g) - g:enable_stats() + enable_stats(g) - local stats_after = g:get_stats(space_name) + local stats_after = get_stats(g, space_name) t.assert_equals(stats_after, stats_before, 'Stats have not been reset') end -g.test_reset = function(g) +pgroup.test_reset = function(g) prepare_non_default_stats(g) - g:reset_stats() + reset_stats(g) - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_equals(stats, {}, 'Stats have been reset') end -g.test_reset_for_disabled_stats_does_not_init_module = function(g) - g:disable_stats() +pgroup.test_reset_for_disabled_stats_does_not_init_module = function(g) + disable_stats(g) - local stats_before = g:get_stats() + local stats_before = get_stats(g) t.assert_equals(stats_before, {}, "Stats is empty") - g:reset_stats() + reset_stats(g) - local stats_after = g:get_stats() + local stats_after = get_stats(g) t.assert_equals(stats_after, {}, "Stats is still empty") end -g.test_enabling_stats_on_non_router_throws_error = function(g) +pgroup.test_enabling_stats_on_non_router_throws_error = function(g) local storage = g.cluster:server('s1-master').net_box t.assert_error(storage.eval, storage, " require('crud.stats.module').enable() ") end -g.test_stats_fetch_callback = function(g) +pgroup.test_stats_fetch_callback = function(g) local storage_cursor_stats = { tuples_fetched = 5, tuples_lookup = 25 } g.router:eval([[ stats_module.get_fetch_callback()(...) ]], { storage_cursor_stats, space_name }) local op = stats_module.op.SELECT - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_not_equals(stats[op], nil, 'Fetch stats update inits SELECT collectors') @@ -476,8 +497,8 @@ g.test_stats_fetch_callback = function(g) 'tuples_lookup is inremented by expected value') end -g.test_disable_stats_before_fetch_callback_get_do_not_break_call = function(g) - g:disable_stats() +pgroup.test_disable_stats_before_fetch_callback_get_do_not_break_call = function(g) + disable_stats(g) local storage_cursor_stats = { tuples_fetched = 5, tuples_lookup = 25 } g.router:eval([[ stats_module.get_fetch_callback()(...) ]], @@ -486,7 +507,7 @@ g.test_disable_stats_before_fetch_callback_get_do_not_break_call = function(g) t.success('No unexpected errors') end -g.test_disable_stats_after_fetch_callback_get_do_not_break_call = function(g) +pgroup.test_disable_stats_after_fetch_callback_get_do_not_break_call = function(g) local storage_cursor_stats = { tuples_fetched = 5, tuples_lookup = 25 } g.router:eval([[ @@ -498,10 +519,70 @@ g.test_disable_stats_after_fetch_callback_get_do_not_break_call = function(g) t.success('No unexpected errors') end -g.test_resolve_name_from_id = function(g) +pgroup.test_space_is_known_to_registry_after_details_observe = function(g) + local storage_cursor_stats = { tuples_fetched = 5, tuples_lookup = 25 } + + g.router:eval([[ stats_module.get_fetch_callback()(...) ]], + { storage_cursor_stats, space_name }) + + local is_unknown_space = g.router:eval([[ + return stats_module.internal.registry.is_unknown_space(...) + ]], { space_name }) + + t.assert_equals(is_unknown_space, false) +end + +pgroup.test_resolve_name_from_id = function(g) local op = stats_module.op.LEN g.router:eval(call_wrapped, { 'return_true', stats_module.op.LEN, {}, space_id }) - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_not_equals(stats[op], nil, "Statistics is filled by name") end + +group_driver.test_default_driver = function(g) + local driver = g.router:eval(" return stats_module.internal.driver ") + + if g.is_metrics_supported then + t.assert_equals(driver, 'metrics') + else + t.assert_equals(driver, 'local') + end +end + +group_driver.before_test( + 'test_stats_reenable_with_different_driver_reset_stats', + function(g) + t.skip_if(g.is_metrics_supported == false, 'Metrics registry is unsupported') + end +) + +group_driver.test_stats_reenable_with_different_driver_reset_stats = function(g) + enable_stats(g, { driver = 'metrics' }) + + prepare_non_default_stats(g) + + enable_stats(g, { driver = 'local' }) + local stats = get_stats(g) + t.assert_equals(stats.spaces, {}, 'Stats have been reset') +end + +group_driver.test_unknown_driver_throws_error = function(g) + t.assert_error_msg_contains( + 'Unsupported driver: unknown', + enable_stats, g, { driver = 'unknown' }) +end + +group_driver.before_test( + 'test_stats_enable_with_metrics_throws_error_if_unsupported', + function(g) + t.skip_if(g.is_metrics_supported == true, 'Metrics registry is supported') + end +) + +group_driver.test_stats_enable_with_metrics_throws_error_if_unsupported = function(g) + t.assert_error_msg_contains( + 'Unsupported driver: metrics', + enable_stats, g, { driver = 'metrics' }) +end +