diff --git a/.github/workflows/test_on_push.yaml b/.github/workflows/test_on_push.yaml index bcf6d348c..71fa0da09 100644 --- a/.github/workflows/test_on_push.yaml +++ b/.github/workflows/test_on_push.yaml @@ -13,13 +13,19 @@ jobs: matrix: # We need 1.10.6 here to check that module works with # old Tarantool versions that don't have "tuple-keydef"/"tuple-merger" support. - tarantool-version: ["1.10.6", "1.10", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7"] + tarantool-version: ["1.10.6", "1.10", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7", "2.8"] + metrics-version: [""] remove-merger: [false] include: - tarantool-version: "2.7" remove-merger: true + - tarantool-version: "2.8" + metrics-version: "0.1.8" + - tarantool-version: "2.8" + metrics-version: "0.9.0" - tarantool-version: "2.8" coveralls: true + metrics-version: "0.12.0" fail-fast: false runs-on: [ubuntu-latest] steps: @@ -47,6 +53,10 @@ jobs: tarantool --version ./deps.sh + - name: Install metrics + if: matrix.metrics-version != '' + run: tarantoolctl rocks install metrics ${{ matrix.metrics-version }} + - name: Remove external merger if needed if: ${{ matrix.remove-merger }} run: rm .rocks/lib/tarantool/tuple/merger.so diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a3d9325f..403ddf61c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added * Statistics for CRUD operations on router (#224). +* Integrate CRUD statistics with `metrics` (#224). ### Changed diff --git a/README.md b/README.md index f2d33a92c..68295a0cd 100644 --- a/README.md +++ b/README.md @@ -610,6 +610,15 @@ crud.enable_stats() crud.reset_stats() ``` +If [`metrics`](https://github.com/tarantool/metrics) `0.9.0` or greater +found, metrics collectors will be used by default to store statistics +instead of local collectors. You can manually choose driver if needed. +``` +-- Use metrics collectors. +crud.enable_stats({ driver = 'metrics' }) +``` + +To get statistics in code, call `crud.stats()`. ```lua crud.stats() --- @@ -645,9 +654,41 @@ Possible statistics operation labels are Each operation section contains of different collectors for success calls and error (both error throw and `nil, err`) returns. `count` is total requests count since instance start -or stats restart. `latency` is average time of requests execution, +or stats restart. `latency` is 0.99 quantile of request execution +time if `metrics` driver used, otherwise `latency` is total average. `time` is total time of requests execution. +In `metrics` registry statistics are stored as `tnt_crud_stats` metrics +with `operation`, `status` and `name` labels. Collector +`tnt_crud_space_not_found` stores count of calls to unknown spaces. +``` +metrics:collect() +--- +- - label_pairs: + status: ok + operation: insert + name: customers + value: 221411 + metric_name: tnt_crud_stats_count + - label_pairs: + status: ok + operation: insert + name: customers + value: 10.49834896344692 + metric_name: tnt_crud_stats_sum + - label_pairs: + status: ok + operation: insert + name: customers + quantile: 0.99 + value: 0.00023606420935973 + metric_name: tnt_crud_stats + - label_pairs: [] + value: 3 + metric_name: tnt_crud_space_not_found +... +``` + `select` section additionally contains `details` collectors. ```lua crud.stats('my_space').select.details @@ -661,7 +702,10 @@ crud.stats('my_space').select.details (including those not executed successfully). `tuples_fetched` is a count of tuples fetched from storages during execution, `tuples_lookup` is a count of tuples looked up on storages -while collecting response for call. +while collecting response for call. In `metrics` registry they +are stored as `tnt_crud_map_reduces`, `tnt_crud_tuples_fetched` +and `tnt_crud_tuples_lookup` metrics with +`{ operation = 'select', name = space_name }` labels. ## Cartridge roles diff --git a/crud/stats/metrics_registry.lua b/crud/stats/metrics_registry.lua new file mode 100644 index 000000000..478590fbf --- /dev/null +++ b/crud/stats/metrics_registry.lua @@ -0,0 +1,323 @@ +local is_package, metrics = pcall(require, 'metrics') + +local dev_checks = require('crud.common.dev_checks') +local op_module = require('crud.stats.operation') +local registry_common = require('crud.stats.registry_common') + +local registry = {} +local internal_registry = {} + +local metric_name = { + -- Summary collector for all operations. + stats = 'tnt_crud_stats', + -- `*_count` and `*_sum` are automatically created + -- by summary collector. + stats_count = 'tnt_crud_stats_count', + stats_sum = 'tnt_crud_stats_sum', + + -- Counter collector for spaces not found. + space_not_found = 'tnt_crud_space_not_found', + + -- Counter collectors for select/pairs details. + details = { + tuples_fetched = 'tnt_crud_tuples_fetched', + tuples_lookup = 'tnt_crud_tuples_lookup', + map_reduces = 'tnt_crud_map_reduces', + } +} + +local LATENCY_QUANTILE = 0.99 + +local DEFAULT_QUANTILES = { + [LATENCY_QUANTILE] = 1e-3, +} + +local DEFAULT_SUMMARY_PARAMS = { + age_buckets_count = 2, + max_age_time = 60, +} + +--- Check if application supports metrics rock for registry +-- +-- `metrics >= 0.9.0` is required to use summary with +-- age buckets. `metrics >= 0.5.0, < 0.9.0` is unsupported +-- due to quantile overflow bug +-- (https://github.com/tarantool/metrics/issues/235). +-- +-- @function is_supported +-- +-- @treturn boolean Returns true if `metrics >= 0.9.0` found, false otherwise. +-- +function registry.is_supported() + if is_package == false then + return false + end + + -- Only metrics >= 0.9.0 supported. + local is_summary, summary = pcall(require, 'metrics.collectors.summary') + if is_summary == false or summary.rotate_age_buckets == nil then + return false + end + + return true +end + +--- Initialize collectors in global metrics registry +-- +-- Registries are not meant to used explicitly +-- by users, init is not guaranteed to be idempotent. +-- Destroy collectors only through this registry methods. +-- +-- @function init +-- +-- @treturn boolean Returns true. +-- +function registry.init() + internal_registry[metric_name.stats] = metrics.summary( + metric_name.stats, + 'CRUD router calls statistics', + DEFAULT_QUANTILES, + DEFAULT_SUMMARY_PARAMS) + + internal_registry[metric_name.space_not_found] = metrics.counter( + metric_name.space_not_found, + 'Spaces not found during CRUD calls') + + internal_registry[metric_name.details.tuples_fetched] = metrics.counter( + metric_name.details.tuples_fetched, + 'Tuples fetched from CRUD storages during select/pairs') + + internal_registry[metric_name.details.tuples_lookup] = metrics.counter( + metric_name.details.tuples_lookup, + 'Tuples looked up on CRUD storages while collecting response during select/pairs') + + internal_registry[metric_name.details.map_reduces] = metrics.counter( + metric_name.details.map_reduces, + 'Map reduces planned during CRUD select/pairs') + + return true +end + +--- Unregister collectors in global metrics registry +-- +-- Registries are not meant to used explicitly +-- by users, destroy is not guaranteed to be idempotent. +-- Destroy collectors only through this registry methods. +-- +-- @function destroy +-- +-- @treturn boolean Returns true. +-- +function registry.destroy() + for _, c in pairs(internal_registry) do + metrics.registry:unregister(c) + end + + internal_registry = {} + return true +end + +--- Get copy of global metrics registry +-- +-- Registries are not meant to used explicitly +-- by users, get is not guaranteed to work without init. +-- +-- @function get +-- +-- @tparam string space_name +-- (Optional) If specified, returns table with statistics +-- of operations on table, separated by operation type and +-- execution status. If there wasn't any requests for table, +-- returns {}. In not specified, returns table with statistics +-- about all existing spaces and count of calls to spaces +-- that wasn't found. +-- +-- @treturn table Returns copy of metrics registry. +function registry.get(space_name) + dev_checks('?string') + + local stats = { + spaces = {}, + space_not_found = 0, + } + + -- Fill operation basic statistics values. + for _, obs in ipairs(internal_registry[metric_name.stats]:collect()) do + local op = obs.label_pairs.operation + local status = obs.label_pairs.status + local name = obs.label_pairs.name + + if space_name ~= nil and name ~= space_name then + goto stats_continue + end + + registry_common.init_collectors_if_required(stats.spaces, name, op) + local space_stats = stats.spaces[name] + + if obs.metric_name == metric_name.stats then + if obs.label_pairs.quantile == LATENCY_QUANTILE then + space_stats[op][status].latency = obs.value + end + elseif obs.metric_name == metric_name.stats_sum then + space_stats[op][status].time = obs.value + elseif obs.metric_name == metric_name.stats_count then + space_stats[op][status].count = obs.value + end + + :: stats_continue :: + end + + -- Fill select/pairs detail statistics values. + for stat_name, metric_name in pairs(metric_name.details) do + for _, obs in ipairs(internal_registry[metric_name]:collect()) do + local name = obs.label_pairs.name + local op = obs.label_pairs.operation + + if space_name ~= nil and name ~= space_name then + goto details_continue + end + + registry_common.init_collectors_if_required(stats.spaces, name, op) + stats.spaces[name][op].details[stat_name] = obs.value + + :: details_continue :: + end + end + + if space_name ~= nil then + return stats.spaces[space_name] or {} + end + + local _, obs = next(internal_registry[metric_name.space_not_found]:collect()) + if obs ~= nil then + stats.space_not_found = obs.value + end + + return stats +end + +--- Check if space statistics are present in registry +-- +-- @function is_unknown_space +-- +-- @tparam string space_name +-- Name of space. +-- +-- @treturn boolean True, if space stats found. False otherwise. +-- +function registry.is_unknown_space(space_name) + dev_checks('string') + + for _, obs in ipairs(internal_registry[metric_name.stats]:collect()) do + local name = obs.label_pairs.name + + if name == space_name then + return false + end + end + + for _, metric_name in pairs(metric_name.details) do + for _, obs in ipairs(internal_registry[metric_name]:collect()) do + local name = obs.label_pairs.name + + if name == space_name then + return false + end + end + end + + return true +end + +--- Increase requests count and update latency info +-- +-- @function observe +-- +-- @tparam string space_name +-- Name of space. +-- +-- @tparam number latency +-- Time of call execution. +-- +-- @tparam string op +-- Label of registry collectors. +-- Use `require('crud.common.const').OP` to pick one. +-- +-- @tparam string success +-- 'ok' if no errors on execution, 'error' otherwise. +-- +-- @treturn boolean Returns true. +-- +function registry.observe(latency, space_name, op, status) + dev_checks('number', 'string', 'string', 'string') + + -- Use `operations` label to be consistent with `tnt_stats_op_*` labels. + -- Use `name` label to be consistent with `tnt_space_*` labels. + -- Use `status` label to be consistent with `tnt_vinyl_*` and HTTP metrics labels. + local label_pairs = { operation = op, name = space_name, status = status } + + internal_registry[metric_name.stats]:observe(latency, label_pairs) + + return true +end + +--- Increase count of "space not found" collector by one +-- +-- @function observe_space_not_found +-- +-- @treturn boolean Returns true. +-- +function registry.observe_space_not_found() + internal_registry[metric_name.space_not_found]:inc(1) + + return true +end + +--- Increase statistics of storage select/pairs calls +-- +-- @function observe_fetch +-- +-- @tparam string space_name +-- Name of space. +-- +-- @tparam number tuples_fetched +-- Count of tuples fetched during storage call. +-- +-- @tparam number tuples_lookup +-- Count of tuples looked up on storages while collecting response. +-- +-- @treturn boolean Returns true. +-- +function registry.observe_fetch(tuples_fetched, tuples_lookup, space_name) + dev_checks('number', 'number', 'string') + + local label_pairs = { name = space_name, operation = op_module.SELECT } + + internal_registry[metric_name.details.tuples_fetched]:inc(tuples_fetched, label_pairs) + internal_registry[metric_name.details.tuples_lookup]:inc(tuples_lookup, label_pairs) + + return true +end + +--- Increase statistics of planned map reduces during select/pairs +-- +-- @function observe_map_reduces +-- +-- @tparam number count +-- Count of map reduces planned. +-- +-- @tparam string space_name +-- Name of space. +-- +-- @treturn boolean Returns true. +-- +function registry.observe_map_reduces(count, space_name) + dev_checks('number', 'string') + + local label_pairs = { name = space_name, operation = op_module.SELECT } + internal_registry[metric_name.details.map_reduces]:inc(count, label_pairs) + + return true +end + +return registry diff --git a/crud/stats/module.lua b/crud/stats/module.lua index b07160186..0e30cd176 100644 --- a/crud/stats/module.lua +++ b/crud/stats/module.lua @@ -6,12 +6,35 @@ local vshard = require('vshard') local dev_checks = require('crud.common.dev_checks') local utils = require('crud.common.utils') local op_module = require('crud.stats.operation') -local registry = require('crud.stats.local_registry') local StatsError = errors.new_class('StatsError', {capture_stack = false}) local stats = {} -local is_enabled = false +local internal = { + registry = nil, + driver = nil, +} +stats.internal = internal + +local local_registry = require('crud.stats.local_registry') +local metrics_registry = require('crud.stats.metrics_registry') + +local drivers = { + ['local'] = local_registry, +} +if metrics_registry.is_supported() then + drivers['metrics'] = metrics_registry +end + +--- Check if statistics module if enabled +-- +-- @function is_enabled +-- +-- @treturn[1] boolean Returns true or false. +-- +function stats.is_enabled() + return internal.registry ~= nil +end --- Initializes statistics registry, enables callbacks and wrappers -- @@ -19,20 +42,52 @@ local is_enabled = false -- -- @function enable -- +-- @tparam table opts +-- +-- @tfield string driver +-- 'local' or 'metrics'. +-- If 'local', stores statistics in local registry (some Lua tables) +-- and computes latency as overall average. 'metrics' requires +-- `metrics >= 0.9.0` installed and stores statistics in +-- global metrics registry (integrated with exporters) +-- and computes latency as 0.99 quantile with aging. +-- If 'metrics' driver is available, it is used by default, +-- otherwise 'local' is used. +-- -- @treturn boolean Returns true. -- -function stats.enable() - if is_enabled then - return true - end +function stats.enable(opts) + checks({ driver = '?string' }) StatsError:assert( rawget(_G, 'crud') ~= nil, "Can be enabled only on crud router" ) - registry.init() - is_enabled = true + opts = opts or {} + if opts.driver == nil then + if drivers.metrics ~= nil then + opts.driver = 'metrics' + else + opts.driver = 'local' + end + end + + StatsError:assert( + drivers[opts.driver] ~= nil, + 'Unsupported driver: %s', opts.driver + ) + + if internal.driver == opts.driver then + return true + end + + -- Disable old driver registry, if another one was requested. + stats.disable() + + internal.driver = opts.driver + internal.registry = drivers[opts.driver] + internal.registry.init() return true end @@ -47,12 +102,12 @@ end -- @treturn boolean Returns true. -- function stats.reset() - if not is_enabled then + if not stats.is_enabled() then return true end - registry.destroy() - registry.init() + internal.registry.destroy() + internal.registry.init() return true end @@ -66,12 +121,13 @@ end -- @treturn boolean Returns true. -- function stats.disable() - if not is_enabled then + if not stats.is_enabled() then return true end - registry.destroy() - is_enabled = false + internal.registry.destroy() + internal.registry = nil + internal.driver = nil return true end @@ -95,7 +151,11 @@ end function stats.get(space_name) checks('?string') - return registry.get(space_name) + if not stats.is_enabled() then + return {} + end + + return internal.registry.get(space_name) end local function wrap_tail(space_name, op, opts, start_time, call_status, ...) @@ -127,11 +187,11 @@ local function wrap_tail(space_name, op, opts, start_time, call_status, ...) -- at worst it would be a single excessive check for an instance lifetime. -- If we can't verify space existence because of network errors, -- it is treated as unknown as well. - if status == 'error' and registry.is_unknown_space(space_name) then + if status == 'error' and internal.registry.is_unknown_space(space_name) then if type(err) == 'table' and type(err.err) == 'string' then space_not_found_msg = utils.space_doesnt_exist_msg(space_name) if string.find(err.err, space_not_found_msg) ~= nil then - registry.observe_space_not_found() + internal.registry.observe_space_not_found() goto return_values end end @@ -141,7 +201,7 @@ local function wrap_tail(space_name, op, opts, start_time, call_status, ...) -- Check explicitly if space do not exist. space = utils.get_space(space_name, vshard.router.routeall()) if space == nil then - registry.observe_space_not_found() + internal.registry.observe_space_not_found() goto return_values end end @@ -155,11 +215,12 @@ local function wrap_tail(space_name, op, opts, start_time, call_status, ...) space_name = space.name end - registry.observe(latency, space_name, op, status) + internal.registry.observe(latency, space_name, op, status) if context_stats ~= nil then if context_stats.map_reduces ~= nil then - registry.observe_map_reduces(context_stats.map_reduces, space_name) + internal.registry.observe_map_reduces( + context_stats.map_reduces, space_name) end utils.drop_context_section('router_stats') end @@ -204,7 +265,7 @@ function stats.wrap(func, op, opts) dev_checks('function', 'string', { pairs = '?boolean' }) return function(...) - if not is_enabled then + if not stats.is_enabled() then return func(...) end @@ -244,11 +305,11 @@ local storage_stats_schema = { tuples_fetched = 'number', tuples_lookup = 'numbe local function update_fetch_stats(storage_stats, space_name) dev_checks(storage_stats_schema, 'string') - if not is_enabled then + if not stats.is_enabled() then return true end - registry.observe_fetch( + internal.registry.observe_fetch( storage_stats.tuples_fetched, storage_stats.tuples_lookup, space_name @@ -265,7 +326,7 @@ end -- @treturn[2] function Dummy function, if stats disabled. -- function stats.get_fetch_callback() - if not is_enabled then + if not stats.is_enabled() then return utils.pass end diff --git a/test/integration/stats_test.lua b/test/integration/stats_test.lua index d4a9ea200..365265872 100644 --- a/test/integration/stats_test.lua +++ b/test/integration/stats_test.lua @@ -4,14 +4,20 @@ local t = require('luatest') local stats_registry_common = require('crud.stats.registry_common') -local g = t.group('stats_integration') +local pgroup = t.group('stats_integration', { + { driver = 'local' }, + { driver = 'metrics' }, +}) +local group_metrics = t.group('stats_metrics_integration', { + { driver = 'metrics' }, +}) local helpers = require('test.helper') local space_id = 542 local space_name = 'customers' local unknown_space_name = 'non_existing_space' -g.before_all(function(g) +local function before_all(g) g.cluster = helpers.Cluster:new({ datadir = fio.tempdir(), server_command = helpers.entrypoint('srv_select'), @@ -26,20 +32,56 @@ g.before_all(function(g) t.assert_equals(helpers.is_space_exist(g.router, space_name), true) t.assert_equals(helpers.is_space_exist(g.router, unknown_space_name), false) -end) -g.after_all(function(g) + if g.params.driver == 'metrics' then + local is_metrics_supported = g.router:eval([[ + return require('crud.stats.metrics_registry').is_supported() + ]]) + t.skip_if(is_metrics_supported == false, 'Metrics registry is unsupported') + end +end + +local function after_all(g) helpers.stop_cluster(g.cluster) -end) +end + + +local function get_stats(g, space_name) + return g.router:eval("return crud.stats(...)", { space_name }) +end + +local function enable_stats(g, params) + params = params or g.params + g.router:eval("crud.enable_stats(...)", { params }) +end -g.before_each(function(g) +local function disable_stats(g) + g.router:eval("crud.disable_stats()") +end + +local function before_each(g) + enable_stats(g) helpers.truncate_space_on_cluster(g.cluster, space_name) -end) +end -function g:get_stats(space_name) - return self.router:eval("return crud.stats(...)", { space_name }) +local function get_metrics(g) + return g.router:eval("return require('metrics').collect()") end +pgroup.before_all(before_all) + +pgroup.after_all(after_all) + +pgroup.before_each(before_each) + + +group_metrics.before_all(before_all) + +group_metrics.after_all(after_all) + +group_metrics.before_each(before_each) + + -- If there weren't any operations, space stats is {}. -- To compute stats diff, this helper return real stats -- if they're already present or default stats if @@ -264,12 +306,12 @@ for name, case in pairs(simple_operation_cases) do local test_name = ('test_%s'):format(name) if case.prepare ~= nil then - g.before_test(test_name, case.prepare) + pgroup.before_test(test_name, case.prepare) end - g[test_name] = function(g) + pgroup[test_name] = function(g) -- Collect stats before call. - local stats_before = g:get_stats(space_name) + local stats_before = get_stats(g, space_name) t.assert_type(stats_before, 'table') -- Call operation. @@ -295,7 +337,7 @@ for name, case in pairs(simple_operation_cases) do end -- Collect stats after call. - local stats_after = g:get_stats(space_name) + local stats_after = get_stats(g, space_name) t.assert_type(stats_after, 'table') t.assert_not_equals(stats_after[case.op], nil) @@ -420,9 +462,9 @@ local unknown_space_cases = { for name, case in pairs(unknown_space_cases) do local test_name = ('test_%s_on_unknown_space'):format(name) - g[test_name] = function(g) + pgroup[test_name] = function(g) -- Collect stats before call. - local stats_before = g:get_stats() + local stats_before = get_stats(g) t.assert_type(stats_before, 'table') -- Call operation. @@ -436,7 +478,7 @@ for name, case in pairs(unknown_space_cases) do t.assert_not_equals(err, nil) -- Collect stats after call. - local stats_after = g:get_stats() + local stats_after = get_stats(g) t.assert_type(stats_after, 'table') t.assert_equals(stats_after.space_not_found - stats_before.space_not_found, 1, @@ -519,14 +561,14 @@ local select_cases = { for name, case in pairs(select_cases) do local test_name = ('test_%s_details'):format(name) - g.before_test(test_name, prepare_select_data) + pgroup.before_test(test_name, prepare_select_data) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = 'select' local space_name = space_name -- Collect stats before call. - local stats_before = g:get_stats(space_name) + local stats_before = get_stats(g, space_name) t.assert_type(stats_before, 'table') -- Call operation. @@ -540,7 +582,7 @@ for name, case in pairs(select_cases) do t.assert_equals(err, nil) -- Collect stats after call. - local stats_after = g:get_stats(space_name) + local stats_after = get_stats(g, space_name) t.assert_type(stats_after, 'table') local op_before = get_before_stats(stats_before, op) @@ -561,10 +603,297 @@ for name, case in pairs(select_cases) do end end -g.test_resolve_name_from_id = function(g) +pgroup.test_resolve_name_from_id = function(g) local op = 'len' g.router:call('crud.len', { space_id }) - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_not_equals(stats[op], nil, "Statistics is filled by name") end + + +-- Generate non-null stats for all cases. +local function generate_stats(g) + for _, case in pairs(simple_operation_cases) do + if case.prepare ~= nil then + case.prepare(g) + end + + local _, err + if case.eval ~= nil then + if case.pcall then + _, err = pcall(g.router.eval, g.router, case.eval, case.args) + else + _, err = g.router:eval(case.eval, case.args) + end + else + _, err = g.router:call(case.func, case.args) + end + + if case.expect_error ~= true then + t.assert_equals(err, nil) + else + t.assert_not_equals(err, nil) + end + end + + -- Generate non-null select details. + prepare_select_data(g) + for _, case in pairs(select_cases) do + local _, err + if case.eval ~= nil then + _, err = g.router:eval(case.eval, { space_name, case.conditions }) + else + _, err = g.router:call(case.func, { space_name, case.conditions }) + end + + t.assert_equals(err, nil) + end + + -- Generate non-null space_not_found stats. + local case = unknown_space_cases.insert + local _, err = g.router:call(case.func, case.args) + t.assert_not_equals(err, nil) +end + +-- https://github.com/tarantool/metrics/blob/fc5a67072340b12f983f09b7d383aca9e2f10cf1/test/utils.lua#L22-L31 +local function find_obs(metric_name, label_pairs, observations) + for _, obs in pairs(observations) do + local same_label_pairs = pcall(t.assert_equals, obs.label_pairs, label_pairs) + if obs.metric_name == metric_name and same_label_pairs then + return obs + end + end + t.assert_items_include( + observations, + { metric_name = metric_name, label_pairs = label_pairs }, + 'Observation found') +end + +-- https://github.com/tarantool/metrics/blob/fc5a67072340b12f983f09b7d383aca9e2f10cf1/test/utils.lua#L55-L63 +local function find_metric(metric_name, metrics_data) + local m = {} + for _, v in ipairs(metrics_data) do + if v.metric_name == metric_name then + table.insert(m, v) + end + end + return #m > 0 and m or nil +end + +local function get_unique_label_values(metrics_data, label_key) + local label_values_map = {} + for _, v in ipairs(metrics_data) do + local label_pairs = v.label_pairs or {} + if label_pairs[label_key] ~= nil then + label_values_map[label_pairs[label_key]] = true + end + end + + local label_values = {} + for k, _ in pairs(label_values_map) do + table.insert(label_values, k) + end + + return label_values +end + +local function validate_stats(metrics) + local stats = find_metric('tnt_crud_stats', metrics) + t.assert_type(stats, 'table', '`tnt_crud_stats` summary metrics found') + + local stats_count = find_metric('tnt_crud_stats_count', metrics) + t.assert_type(stats_count, 'table', '`tnt_crud_stats` summary metrics found') + + local stats_sum = find_metric('tnt_crud_stats_sum', metrics) + t.assert_type(stats_sum, 'table', '`tnt_crud_stats` summary metrics found') + + + local expected_operations = { 'insert', 'get', 'replace', 'update', + 'upsert', 'delete', 'select', 'truncate', 'len', 'borders' } + + t.assert_items_equals(get_unique_label_values(stats, 'operation'), expected_operations, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(stats_count, 'operation'), expected_operations, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(stats_sum, 'operation'), expected_operations, + 'Metrics are labelled with operation') + + + local expected_statuses = { 'ok', 'error' } + + t.assert_items_equals( get_unique_label_values(stats, 'status'), expected_statuses, + 'Metrics are labelled with status') + + t.assert_items_equals(get_unique_label_values(stats_count, 'status'), expected_statuses, + 'Metrics are labelled with status') + + t.assert_items_equals(get_unique_label_values(stats_sum, 'status'), expected_statuses, + 'Metrics are labelled with status') + + + local expected_names = { space_name } + + t.assert_items_equals(get_unique_label_values(stats, 'name'), expected_names, + 'Metrics are labelled with space name (only existing spaces)') + + t.assert_items_equals(get_unique_label_values(stats_count, 'name'), + expected_names, + 'Metrics are labelled with space name (only existing spaces)') + + t.assert_items_equals( + get_unique_label_values(stats_sum, 'name'), + expected_names, + 'Metrics are labelled with space name (only existing spaces)') + + + local tuples_fetched = find_metric('tnt_crud_tuples_fetched', metrics) + t.assert_type(tuples_fetched, 'table', '`tnt_crud_tuples_fetched` metrics found') + + t.assert_items_equals(get_unique_label_values(tuples_fetched, 'operation'), { 'select' }, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(tuples_fetched, 'name'), expected_names, + 'Metrics are labelled with space name (only existing spaces)') + + + local tuples_lookup = find_metric('tnt_crud_tuples_lookup', metrics) + t.assert_type(tuples_lookup, 'table', '`tnt_crud_tuples_lookup` metrics found') + + t.assert_items_equals( get_unique_label_values(tuples_lookup, 'operation'), { 'select' }, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(tuples_lookup, 'name'), expected_names, + 'Metrics are labelled with space name (only existing spaces)') + + + local map_reduces = find_metric('tnt_crud_map_reduces', metrics) + t.assert_type(map_reduces, 'table', '`tnt_crud_map_reduces` metrics found') + + t.assert_items_equals(get_unique_label_values(map_reduces, 'operation'), { 'select' }, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(map_reduces, 'name'), expected_names, + 'Metrics are labelled with space name (only existing spaces)') + + + local space_not_found = find_metric('tnt_crud_space_not_found', metrics) + t.assert_type(space_not_found, 'table', '`tnt_crud_space_not_found` metrics found') +end + + +group_metrics.before_test( + 'test_stats_stored_in_global_metrics_registry', + generate_stats) + +group_metrics.test_stats_stored_in_global_metrics_registry = function(g) + local metrics = get_metrics(g) + validate_stats(metrics) +end + + +group_metrics.before_test('test_metrics_updated_per_call', generate_stats) + +group_metrics.test_metrics_updated_per_call = function(g) + local metrics_before = get_metrics(g) + local stats_labels = { operation = 'select', status = 'ok', name = space_name } + local details_labels = { operation = 'select', name = space_name } + + local count_before = find_obs('tnt_crud_stats_count', stats_labels, metrics_before) + local time_before = find_obs('tnt_crud_stats_sum', stats_labels, metrics_before) + local tuples_lookup_before = find_obs('tnt_crud_tuples_lookup', details_labels, metrics_before) + local tuples_fetched_before = find_obs('tnt_crud_tuples_fetched', details_labels, metrics_before) + local map_reduces_before = find_obs('tnt_crud_map_reduces', details_labels, metrics_before) + + local case = select_cases['select_by_secondary_index'] + local _, err = g.router:call(case.func, { space_name, case.conditions }) + t.assert_equals(err, nil) + + local metrics_after = get_metrics(g) + local count_after = find_obs('tnt_crud_stats_count', stats_labels, metrics_after) + local time_after = find_obs('tnt_crud_stats_sum', stats_labels, metrics_after) + local tuples_lookup_after = find_obs('tnt_crud_tuples_lookup', details_labels, metrics_after) + local tuples_fetched_after = find_obs('tnt_crud_tuples_fetched', details_labels, metrics_after) + local map_reduces_after = find_obs('tnt_crud_map_reduces', details_labels, metrics_after) + + t.assert_equals(count_after.value - count_before.value, 1, + '`select` metrics count increased') + t.assert_ge(time_after.value - time_before.value, 0, + '`select` total time increased') + t.assert_ge(tuples_lookup_after.value - tuples_lookup_before.value, case.tuples_lookup, + '`select` tuples lookup expected change') + t.assert_ge(tuples_fetched_after.value - tuples_fetched_before.value, case.tuples_fetched, + '`select` tuples feched expected change') + t.assert_ge(map_reduces_after.value - map_reduces_before.value, case.tuples_lookup, + '`select` map reduces expected change') +end + + +group_metrics.before_test( + 'test_space_not_found_metrics_updated_per_call', + generate_stats) + +group_metrics.test_space_not_found_metrics_updated_per_call = function(g) + local metrics_before = get_metrics(g) + + local space_not_found_before = find_obs('tnt_crud_space_not_found', {}, metrics_before) + + local case = unknown_space_cases.insert + local _, err = g.router:call(case.func, case.args) + t.assert_not_equals(err, nil) + + local metrics_after = get_metrics(g) + local space_not_found_after = find_obs('tnt_crud_space_not_found', {}, metrics_after) + + t.assert_equals(space_not_found_after.value - space_not_found_before.value, 1, + '`tnt_crud_space_not_found` metrics count increased') +end + + +group_metrics.before_test( + 'test_metrics_collectors_destroyed_if_stats_disabled', + generate_stats) + +group_metrics.test_metrics_collectors_destroyed_if_stats_disabled = function(g) + disable_stats(g) + + local metrics = get_metrics(g) + + local stats = find_metric('tnt_crud_stats', metrics) + t.assert_equals(stats, nil, '`tnt_crud_stats` summary metrics not found') + + local stats_count = find_metric('tnt_crud_stats_count', metrics) + t.assert_equals(stats_count, nil, '`tnt_crud_stats` summary metrics not found') + + local stats_sum = find_metric('tnt_crud_stats_sum', metrics) + t.assert_equals(stats_sum, nil, '`tnt_crud_stats` summary metrics not found') + + local tuples_fetched = find_metric('tnt_crud_tuples_fetched', metrics) + t.assert_equals(tuples_fetched, nil, '`tnt_crud_tuples_fetched` metrics not found') + + local tuples_lookup = find_metric('tnt_crud_tuples_lookup', metrics) + t.assert_equals(tuples_lookup, nil, '`tnt_crud_tuples_lookup` metrics not found') + + local map_reduces = find_metric('tnt_crud_map_reduces', metrics) + t.assert_equals(map_reduces, nil, '`tnt_crud_map_reduces` metrics not found') + + local space_not_found = find_metric('tnt_crud_space_not_found', metrics) + t.assert_equals(space_not_found, nil, '`tnt_crud_space_not_found` metrics not found') +end + + +group_metrics.before_test( + 'test_stats_stored_in_metrics_registry_after_switch_to_metrics_driver', + disable_stats) + +group_metrics.test_stats_stored_in_metrics_registry_after_switch_to_metrics_driver = function(g) + enable_stats(g, { driver = 'local' }) + -- Switch to metrics driver. + enable_stats(g, { driver = 'metrics' }) + + generate_stats(g) + local metrics = get_metrics(g) + validate_stats(metrics) +end diff --git a/test/unit/stats_test.lua b/test/unit/stats_test.lua index 3daa77e65..2c464171f 100644 --- a/test/unit/stats_test.lua +++ b/test/unit/stats_test.lua @@ -6,14 +6,18 @@ local t = require('luatest') local stats_module = require('crud.stats.module') local utils = require('crud.common.utils') -local g = t.group('stats_unit') +local pgroup = t.group('stats_unit', { + { driver = 'local' }, + { driver = 'metrics' }, +}) +local group_driver = t.group('stats_driver_unit') local helpers = require('test.helper') local space_id = 542 local space_name = 'customers' local unknown_space_name = 'non_existing_space' -g.before_all(function(g) +local function before_all(g) -- Enable test cluster for "is space exist?" checks. g.cluster = helpers.Cluster:new({ datadir = fio.tempdir(), @@ -29,47 +33,64 @@ g.before_all(function(g) t.assert_equals(helpers.is_space_exist(g.router, space_name), true) t.assert_equals(helpers.is_space_exist(g.router, unknown_space_name), false) -end) -g.after_all(function(g) - helpers.stop_cluster(g.cluster) -end) + g.is_metrics_supported = g.router:eval([[ + return require('crud.stats.metrics_registry').is_supported() + ]]) --- Reset statistics between tests, reenable if needed. -g.before_each(function(g) - g:enable_stats() -end) + if g.params ~= nil and g.params.driver == 'metrics' then + t.skip_if(g.is_metrics_supported == false, 'Metrics registry is unsupported') + end +end -g.after_each(function(g) - g:disable_stats() -end) +local function after_all(g) + helpers.stop_cluster(g.cluster) +end -function g:get_stats(space_name) - return self.router:eval("return stats_module.get(...)", { space_name }) +local function get_stats(g, space_name) + return g.router:eval("return stats_module.get(...)", { space_name }) end -function g:enable_stats() - self.router:eval("stats_module.enable()") +local function enable_stats(g, params) + params = params or g.params + g.router:eval("stats_module.enable(...)", { params }) end -function g:disable_stats() - self.router:eval("stats_module.disable()") +local function disable_stats(g) + g.router:eval("stats_module.disable()") end -function g:reset_stats() - self.router:eval("return stats_module.reset()") +local function reset_stats(g) + g.router:eval("return stats_module.reset()") end -g.test_get_format_after_enable = function(g) - local stats = g:get_stats() +pgroup.before_all(before_all) + +pgroup.after_all(after_all) + +-- Reset statistics between tests, reenable if needed. +pgroup.before_each(enable_stats) + +pgroup.after_each(disable_stats) + + +group_driver.before_all(before_all) + +group_driver.after_all(after_all) + +group_driver.after_each(disable_stats) + + +pgroup.test_get_format_after_enable = function(g) + local stats = get_stats(g) t.assert_type(stats, 'table') t.assert_equals(stats.spaces, {}) t.assert_equals(stats.space_not_found, 0) end -g.test_get_by_space_name_format_after_enable = function(g) - local stats = g:get_stats(space_name) +pgroup.test_get_by_space_name_format_after_enable = function(g) + local stats = get_stats(g, space_name) t.assert_type(stats, 'table') t.assert_equals(stats, {}) @@ -127,7 +148,7 @@ for name, case in pairs(observe_cases) do for _, op in pairs(case.operations) do local test_name = ('test_%s_%s'):format(op, name) - g[test_name] = function(g) + pgroup[test_name] = function(g) -- Call wrapped functions on server side. -- Collect execution times from outside. local run_count = 10 @@ -153,10 +174,10 @@ for name, case in pairs(observe_cases) do local total_time = fun.foldl(function(acc, x) return acc + x end, 0, time_diffs) -- Validate stats format after execution. - local total_stats = g:get_stats() + local total_stats = get_stats(g) t.assert_type(total_stats, 'table', 'Total stats present after observations') - local space_stats = g:get_stats(space_name) + local space_stats = get_stats(g, space_name) t.assert_type(space_stats, 'table', 'Space stats present after observations') t.assert_equals(total_stats.spaces[space_name], space_stats, @@ -252,7 +273,7 @@ for name_head, disable_case in pairs(disable_stats_cases) do for name_tail, return_case in pairs(preserve_return_cases) do local test_name = ('test_%s%s'):format(name_head, name_tail) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.INSERT local eval = ([[ @@ -276,7 +297,7 @@ for name_head, disable_case in pairs(disable_stats_cases) do local test_name = ('test_%spairs_wrapper_preserves_return_values'):format(name_head) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.INSERT local input = { a = 'a', b = 'b' } @@ -306,7 +327,7 @@ for name_head, disable_case in pairs(disable_stats_cases) do for name_tail, throw_case in pairs(preserve_throw_cases) do local test_name = ('test_%s%s'):format(name_head, name_tail) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.INSERT local eval = ([[ @@ -362,7 +383,7 @@ local error_cases = { for name, case in pairs(error_cases) do local test_name = ('test_%s_increases_space_not_found_count'):format(name) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.INSERT local eval = ([[ @@ -391,7 +412,7 @@ for name, case in pairs(error_cases) do t.assert_str_contains(err_msg, case.msg, "Error preserved") - local stats = g:get_stats() + local stats = get_stats(g) t.assert_equals(stats.space_not_found, 1) t.assert_equals(stats.spaces[unknown_space_name], nil, @@ -399,13 +420,13 @@ for name, case in pairs(error_cases) do end end -g.test_stats_is_empty_after_disable = function(g) - g:disable_stats() +pgroup.test_stats_is_empty_after_disable = function(g) + disable_stats(g) local op = stats_module.op.INSERT g.router:eval(call_wrapped, { 'return_true', op, {}, space_name }) - local stats = g:get_stats() + local stats = get_stats(g) t.assert_equals(stats, {}) end @@ -413,57 +434,57 @@ local function prepare_non_default_stats(g) local op = stats_module.op.INSERT g.router:eval(call_wrapped, { 'return_true', op, {}, space_name }) - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_equals(stats[op].ok.count, 1, 'Non-zero stats prepared') return stats end -g.test_enable_is_idempotent = function(g) +pgroup.test_enable_with_same_driver_is_idempotent = function(g) local stats_before = prepare_non_default_stats(g) - g:enable_stats() + enable_stats(g) - local stats_after = g:get_stats(space_name) + local stats_after = get_stats(g, space_name) t.assert_equals(stats_after, stats_before, 'Stats have not been reset') end -g.test_reset = function(g) +pgroup.test_reset = function(g) prepare_non_default_stats(g) - g:reset_stats() + reset_stats(g) - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_equals(stats, {}, 'Stats have been reset') end -g.test_reset_for_disabled_stats_does_not_init_module = function(g) - g:disable_stats() +pgroup.test_reset_for_disabled_stats_does_not_init_module = function(g) + disable_stats(g) - local stats_before = g:get_stats() + local stats_before = get_stats(g) t.assert_equals(stats_before, {}, "Stats is empty") - g:reset_stats() + reset_stats(g) - local stats_after = g:get_stats() + local stats_after = get_stats(g) t.assert_equals(stats_after, {}, "Stats is still empty") end -g.test_enabling_stats_on_non_router_throws_error = function(g) +pgroup.test_enabling_stats_on_non_router_throws_error = function(g) local storage = g.cluster:server('s1-master').net_box t.assert_error(storage.eval, storage, " require('crud.stats.module').enable() ") end -g.test_stats_fetch_callback = function(g) +pgroup.test_stats_fetch_callback = function(g) local storage_cursor_stats = { tuples_fetched = 5, tuples_lookup = 25 } g.router:eval([[ stats_module.get_fetch_callback()(...) ]], { storage_cursor_stats, space_name }) local op = stats_module.op.SELECT - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_not_equals(stats[op], nil, 'Fetch stats update inits SELECT collectors') @@ -476,8 +497,8 @@ g.test_stats_fetch_callback = function(g) 'tuples_lookup is inremented by expected value') end -g.test_disable_stats_before_fetch_callback_get_do_not_break_call = function(g) - g:disable_stats() +pgroup.test_disable_stats_before_fetch_callback_get_do_not_break_call = function(g) + disable_stats(g) local storage_cursor_stats = { tuples_fetched = 5, tuples_lookup = 25 } g.router:eval([[ stats_module.get_fetch_callback()(...) ]], @@ -486,7 +507,7 @@ g.test_disable_stats_before_fetch_callback_get_do_not_break_call = function(g) t.success('No unexpected errors') end -g.test_disable_stats_after_fetch_callback_get_do_not_break_call = function(g) +pgroup.test_disable_stats_after_fetch_callback_get_do_not_break_call = function(g) local storage_cursor_stats = { tuples_fetched = 5, tuples_lookup = 25 } g.router:eval([[ @@ -498,10 +519,70 @@ g.test_disable_stats_after_fetch_callback_get_do_not_break_call = function(g) t.success('No unexpected errors') end -g.test_resolve_name_from_id = function(g) +pgroup.test_space_is_known_to_registry_after_details_observe = function(g) + local storage_cursor_stats = { tuples_fetched = 5, tuples_lookup = 25 } + + g.router:eval([[ stats_module.get_fetch_callback()(...) ]], + { storage_cursor_stats, space_name }) + + local is_unknown_space = g.router:eval([[ + return stats_module.internal.registry.is_unknown_space(...) + ]], { space_name }) + + t.assert_equals(is_unknown_space, false) +end + +pgroup.test_resolve_name_from_id = function(g) local op = stats_module.op.LEN g.router:eval(call_wrapped, { 'return_true', stats_module.op.LEN, {}, space_id }) - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_not_equals(stats[op], nil, "Statistics is filled by name") end + +group_driver.test_default_driver = function(g) + local driver = g.router:eval(" return stats_module.internal.driver ") + + if g.is_metrics_supported then + t.assert_equals(driver, 'metrics') + else + t.assert_equals(driver, 'local') + end +end + +group_driver.before_test( + 'test_stats_reenable_with_different_driver_reset_stats', + function(g) + t.skip_if(g.is_metrics_supported == false, 'Metrics registry is unsupported') + end +) + +group_driver.test_stats_reenable_with_different_driver_reset_stats = function(g) + enable_stats(g, { driver = 'metrics' }) + + prepare_non_default_stats(g) + + enable_stats(g, { driver = 'local' }) + local stats = get_stats(g) + t.assert_equals(stats.spaces, {}, 'Stats have been reset') +end + +group_driver.test_unknown_driver_throws_error = function(g) + t.assert_error_msg_contains( + 'Unsupported driver: unknown', + enable_stats, g, { driver = 'unknown' }) +end + +group_driver.before_test( + 'test_stats_enable_with_metrics_throws_error_if_unsupported', + function(g) + t.skip_if(g.is_metrics_supported == true, 'Metrics registry is supported') + end +) + +group_driver.test_stats_enable_with_metrics_throws_error_if_unsupported = function(g) + t.assert_error_msg_contains( + 'Unsupported driver: metrics', + enable_stats, g, { driver = 'metrics' }) +end +