From 7fd9652dd82c3ffb985a07a026884fe655bf04b7 Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Fri, 28 Jan 2022 19:37:53 +0300 Subject: [PATCH] stats: integrate with metrics rock If `metrics` [1] found, you can use metrics collectors to store statistics. `metrics >= 0.10.0` is required to use metrics driver. (`metrics >= 0.9.0` is required to use summary quantiles with age buckets. `metrics >= 0.5.0, < 0.9.0` is unsupported due to quantile overflow bug [2]. `metrics == 0.9.0` has bug that do not permits to create summary collector without quantiles [3]. In fact, user may use `metrics >= 0.5.0`, `metrics != 0.9.0` if he wants to use metrics without quantiles, and `metrics >= 0.9.0` if he wants to use metrics with quantiles. But this is confusing, so let's use a single restriction for both cases.) The metrics are part of global registry and can be exported together (e.g. to Prometheus) with default tools without any additional configuration. Disabling stats destroys the collectors. Metrics collectors are used by default if supported. To explicitly set driver, call `crud.cfg{ stats = true, stats_driver = driver }` ('local' or 'metrics'). To enable quantiles, call ``` crud.cfg{ stats = true, stats_driver = 'metrics', stats_quantiles = true, } ``` With quantiles, `latency` statistics are changed to 0.99 quantile of request execution time (with aging). Quantiles computations increases performance overhead up to 10% when used in statistics. Add CI matrix to run tests with `metrics` installed. To get full coverage on coveralls, #248 must be resolved. 1. https://github.com/tarantool/metrics 2. https://github.com/tarantool/metrics/issues/235 3. https://github.com/tarantool/metrics/issues/262 Closes #224 --- .github/workflows/test_on_push.yaml | 21 +- CHANGELOG.md | 1 + README.md | 60 +++- crud/cfg.lua | 70 ++++- crud/common/stash.lua | 6 +- crud/stats/init.lua | 119 +++++++- crud/stats/local_registry.lua | 17 +- crud/stats/metrics_registry.lua | 376 +++++++++++++++++++++++ test/integration/cfg_test.lua | 7 +- test/integration/stats_test.lua | 442 +++++++++++++++++++++++++--- test/unit/stats_test.lua | 207 +++++++++---- 11 files changed, 1190 insertions(+), 136 deletions(-) create mode 100644 crud/stats/metrics_registry.lua diff --git a/.github/workflows/test_on_push.yaml b/.github/workflows/test_on_push.yaml index bcf6d348c..bec97dc57 100644 --- a/.github/workflows/test_on_push.yaml +++ b/.github/workflows/test_on_push.yaml @@ -13,13 +13,21 @@ jobs: matrix: # We need 1.10.6 here to check that module works with # old Tarantool versions that don't have "tuple-keydef"/"tuple-merger" support. - tarantool-version: ["1.10.6", "1.10", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7"] + tarantool-version: ["1.10.6", "1.10", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7", "2.8"] + metrics-version: [""] remove-merger: [false] include: + - tarantool-version: "1.10" + metrics-version: "0.12.0" - tarantool-version: "2.7" remove-merger: true + - tarantool-version: "2.8" + metrics-version: "0.1.8" + - tarantool-version: "2.8" + metrics-version: "0.10.0" - tarantool-version: "2.8" coveralls: true + metrics-version: "0.12.0" fail-fast: false runs-on: [ubuntu-latest] steps: @@ -47,6 +55,10 @@ jobs: tarantool --version ./deps.sh + - name: Install metrics + if: matrix.metrics-version != '' + run: tarantoolctl rocks install metrics ${{ matrix.metrics-version }} + - name: Remove external merger if needed if: ${{ matrix.remove-merger }} run: rm .rocks/lib/tarantool/tuple/merger.so @@ -71,6 +83,7 @@ jobs: strategy: matrix: bundle_version: [ "1.10.11-0-gf0b0e7ecf-r422", "2.7.3-0-gdddf926c3-r422" ] + metrics-version: ["", "0.12.0"] fail-fast: false runs-on: [ ubuntu-latest ] steps: @@ -86,6 +99,12 @@ jobs: tarantool --version ./deps.sh + - name: Install metrics + if: matrix.metrics-version != '' + run: | + source tarantool-enterprise/env.sh + tarantoolctl rocks install metrics ${{ matrix.metrics-version }} + # This server starts and listen on 8084 port that is used for tests - name: Stop Mono server run: sudo kill -9 $(sudo lsof -t -i tcp:8084) || true diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ebfd2031..135bfffeb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added * Statistics for CRUD operations on router (#224). +* Integrate CRUD statistics with [`metrics`](https://github.com/tarantool/metrics) (#224). ### Changed diff --git a/README.md b/README.md index 9396b207d..5f9891dcc 100644 --- a/README.md +++ b/README.md @@ -694,11 +694,28 @@ crud.cfg{ stats = false } crud.reset_stats() ``` +If [`metrics`](https://github.com/tarantool/metrics) `0.10.0` or greater +found, metrics collectors will be used by default to store statistics +instead of local collectors. Quantiles in metrics summary collections +are disabled by default. You can manually choose driver and enable quantiles. +```lua +-- Use metrics collectors. (Default if metrics found). +crud.cfg{ stats = true, stats_driver = 'metrics' } + +-- Use metrics collectors with 0.99 quantiles. +crud.cfg{ stats = true, stats_driver = 'metrics', stats_quantiles = true } + +-- Use simple local collectors. +crud.cfg{ stats = true, stats_driver = 'local' } +``` + You can use `crud.cfg` to check current stats state. ```lua crud.cfg --- -- stats: true +- stats_quantiles: true + stats: true + stats_driver: local ... ``` Beware that iterating through `crud.cfg` with pairs is not supported yet, @@ -750,9 +767,39 @@ and `borders` (for `min` and `max` calls). Each operation section contains of different collectors for success calls and error (both error throw and `nil, err`) returns. `count` is total requests count since instance start -or stats restart. `latency` is average time of requests execution, +or stats restart. `latency` is 0.99 quantile of request execution +time if `metrics` driver used and quantiles enabled, +otherwise `latency` is total average. `time` is the total time of requests execution. +In [`metrics`](https://www.tarantool.io/en/doc/latest/book/monitoring/) +registry statistics are stored as `tnt_crud_stats` metrics +with `operation`, `status` and `name` labels. +``` +metrics:collect() +--- +- - label_pairs: + status: ok + operation: insert + name: customers + value: 221411 + metric_name: tnt_crud_stats_count + - label_pairs: + status: ok + operation: insert + name: customers + value: 10.49834896344692 + metric_name: tnt_crud_stats_sum + - label_pairs: + status: ok + operation: insert + name: customers + quantile: 0.99 + value: 0.00023606420935973 + metric_name: tnt_crud_stats +... +``` + `select` section additionally contains `details` collectors. ```lua crud.stats('my_space').select.details @@ -769,6 +816,10 @@ looked up on storages while collecting responses for calls (including scrolls for multibatch requests). Details data is updated as part of the request process, so you may get new details before `select`/`pairs` call is finished and observed with count, latency and time collectors. +In [`metrics`](https://www.tarantool.io/en/doc/latest/book/monitoring/) +registry they are stored as `tnt_crud_map_reduces`, +`tnt_crud_tuples_fetched` and `tnt_crud_tuples_lookup` metrics +with `{ operation = 'select', name = space_name }` labels. Since `pairs` request behavior differs from any other crud request, its statistics collection also has specific behavior. Statistics (`select` @@ -780,7 +831,10 @@ collector. Statistics are preserved between package reloads. Statistics are preserved between [Tarantool Cartridge role reloads](https://www.tarantool.io/en/doc/latest/book/cartridge/cartridge_api/modules/cartridge.roles/#reload) -if you use CRUD Cartridge roles. +if you use CRUD Cartridge roles. Beware that metrics 0.12.0 and below do not +support preserving stats between role reload +(see [tarantool/metrics#334](https://github.com/tarantool/metrics/issues/334)), +thus this feature will be unsupported for `metrics` driver. ## Cartridge roles diff --git a/crud/cfg.lua b/crud/cfg.lua index 47c305a80..a12db3150 100644 --- a/crud/cfg.lua +++ b/crud/cfg.lua @@ -17,11 +17,49 @@ local function set_defaults_if_empty(cfg) cfg.stats = false end + if cfg.stats_driver == nil then + cfg.stats_driver = stats.get_default_driver() + end + + if cfg.stats_quantiles == nil then + cfg.stats_quantiles = false + end + return cfg end local cfg = set_defaults_if_empty(stash.get(stash.name.cfg)) +local function configure_stats(cfg, opts) + if (opts.stats == nil) + and (opts.stats_driver == nil) + and (opts.stats_quantiles == nil) then + return + end + + if opts.stats == nil then + opts.stats = cfg.stats + end + + if opts.stats_driver == nil then + opts.stats_driver = cfg.stats_driver + end + + if opts.stats_quantiles == nil then + opts.stats_quantiles = cfg.stats_quantiles + end + + if opts.stats == true then + stats.enable{ driver = opts.stats_driver, quantiles = opts.stats_quantiles } + else + stats.disable() + end + + rawset(cfg, 'stats', opts.stats) + rawset(cfg, 'stats_driver', opts.stats_driver) + rawset(cfg, 'stats_quantiles', opts.stats_quantiles) +end + --- Configure CRUD module. -- -- @function __call @@ -34,22 +72,32 @@ local cfg = set_defaults_if_empty(stash.get(stash.name.cfg)) -- Enable or disable statistics collect. -- Statistics are observed only on router instances. -- +-- @string[opt] opts.stats_driver +-- `'local'` or `'metrics'`. +-- If `'local'`, stores statistics in local registry (some Lua tables) +-- and computes latency as overall average. `'metrics'` requires +-- `metrics >= 0.10.0` installed and stores statistics in +-- global metrics registry (integrated with exporters). +-- `'metrics'` driver supports computing latency as 0.99 quantile with aging. +-- If `'metrics'` driver is available, it is used by default, +-- otherwise `'local'` is used. +-- +-- @bool[opt] opts.stats_quantiles +-- Enable or disable statistics quantiles (only for metrics driver). +-- Quantiles computations increases performance overhead up to 10%. +-- -- @return Configuration table. -- local function __call(self, opts) - checks('table', { stats = '?boolean' }) + checks('table', { + stats = '?boolean', + stats_driver = '?string', + stats_quantiles = '?boolean' + }) - opts = opts or {} + opts = table.deepcopy(opts) or {} - if opts.stats ~= nil then - if opts.stats == true then - stats.enable() - else - stats.disable() - end - - rawset(cfg, 'stats', opts.stats) - end + configure_stats(cfg, opts) return self end diff --git a/crud/common/stash.lua b/crud/common/stash.lua index 3cb65ed1c..0557cb40e 100644 --- a/crud/common/stash.lua +++ b/crud/common/stash.lua @@ -16,10 +16,14 @@ local stash = {} -- @tfield string stats_local_registry -- Stash for local metrics registry. -- +-- @tfield string stats_metrics_registry +-- Stash for metrics rocks statistics registry. +-- stash.name = { cfg = '__crud_cfg', stats_internal = '__crud_stats_internal', - stats_local_registry = '__crud_stats_local_registry' + stats_local_registry = '__crud_stats_local_registry', + stats_metrics_registry = '__crud_stats_metrics_registry' } --- Setup Tarantool Cartridge reload. diff --git a/crud/stats/init.lua b/crud/stats/init.lua index 1b707bb27..60760d153 100644 --- a/crud/stats/init.lua +++ b/crud/stats/init.lua @@ -4,6 +4,7 @@ local clock = require('clock') local checks = require('checks') +local errors = require('errors') local fiber = require('fiber') local fun = require('fun') local log = require('log') @@ -13,11 +14,29 @@ local dev_checks = require('crud.common.dev_checks') local stash = require('crud.common.stash') local utils = require('crud.common.utils') local op_module = require('crud.stats.operation') -local registry = require('crud.stats.local_registry') + +local StatsError = errors.new_class('StatsError', {capture_stack = false}) local stats = {} local internal = stash.get(stash.name.stats_internal) +local local_registry = require('crud.stats.local_registry') +local metrics_registry = require('crud.stats.metrics_registry') + +local drivers = { + ['local'] = local_registry, +} +if metrics_registry.is_supported() then + drivers['metrics'] = metrics_registry +end + +function internal:get_registry() + if self.driver == nil then + return nil + end + return drivers[self.driver] +end + --- Check if statistics module was enabled. -- -- @function is_enabled @@ -25,7 +44,21 @@ local internal = stash.get(stash.name.stats_internal) -- @treturn boolean Returns `true` or `false`. -- function stats.is_enabled() - return internal.is_enabled == true + return internal.driver ~= nil +end + +--- Get default statistics driver name. +-- +-- @function get_default_driver +-- +-- @treturn string `metrics` if supported, `local` if unsupported. +-- +function stats.get_default_driver() + if drivers.metrics ~= nil then + return 'metrics' + else + return 'local' + end end --- Initializes statistics registry, enables callbacks and wrappers. @@ -34,15 +67,60 @@ end -- -- @function enable -- +-- @tab[opt] opts +-- +-- @string[opt] opts.driver +-- `'local'` or `'metrics'`. +-- If `'local'`, stores statistics in local registry (some Lua tables) +-- and computes latency as overall average. `'metrics'` requires +-- `metrics >= 0.9.0` installed and stores statistics in +-- global metrics registry (integrated with exporters). +-- `'metrics'` driver supports computing latency as 0.99 quantile with aging. +-- If `'metrics'` driver is available, it is used by default, +-- otherwise `'local'` is used. +-- +-- @bool[opt=false] opts.quantiles +-- If `'metrics'` driver used, you can enable +-- computing requests latency as 0.99 quantile with aging. +-- Performance overhead for enabling is near 10%. +-- -- @treturn boolean Returns `true`. -- -function stats.enable() - if stats.is_enabled() then +function stats.enable(opts) + checks({ driver = '?string', quantiles = '?boolean' }) + + StatsError:assert( + rawget(_G, 'crud') ~= nil, + 'Can be enabled only on crud router' + ) + + opts = table.deepcopy(opts) or {} + if opts.driver == nil then + opts.driver = stats.get_default_driver() + end + + StatsError:assert( + drivers[opts.driver] ~= nil, + 'Unsupported driver: %s', opts.driver + ) + + if opts.quantiles == nil then + opts.quantiles = false + end + + -- Do not reinit if called with same options. + if internal.driver == opts.driver + and internal.quantiles == opts.quantiles then return true end - internal.is_enabled = true - registry.init() + -- Disable old driver registry, if another one was requested. + stats.disable() + + internal.driver = opts.driver + internal.quantiles = opts.quantiles + + internal:get_registry().init({ quantiles = internal.quantiles }) return true end @@ -61,8 +139,8 @@ function stats.reset() return true end - registry.destroy() - registry.init() + internal:get_registry().destroy() + internal:get_registry().init({ quantiles = internal.quantiles }) return true end @@ -80,8 +158,9 @@ function stats.disable() return true end - registry.destroy() - internal.is_enabled = false + internal:get_registry().destroy() + internal.driver = nil + internal.quantiles = nil return true end @@ -108,7 +187,7 @@ function stats.get(space_name) return {} end - return registry.get(space_name) + return internal:get_registry().get(space_name) end local function resolve_space_name(space_id) @@ -146,6 +225,8 @@ jit.off(keep_observer_alive) local function wrap_pairs_gen(build_latency, space_name, op, gen, param, state) local total_latency = build_latency + local registry = internal:get_registry() + -- If pairs() cycle will be interrupted with break, -- we'll never get a proper obervation. -- We create an object with the same lifespan as gen() @@ -156,7 +237,11 @@ local function wrap_pairs_gen(build_latency, space_name, op, gen, param, state) local gc_observer = setmt__gc({}, { __gc = function() if observed == false then - registry.observe(total_latency, space_name, op, 'ok') + -- Do not call observe directly because metrics + -- collectors may yield, for example + -- https://github.com/tarantool/metrics/blob/a23f8d49779205dd45bd211e21a1d34f26010382/metrics/collectors/shared.lua#L85 + -- Calling fiber.yield is prohibited in gc. + fiber.new(registry.observe, total_latency, space_name, op, 'ok') observed = true end end @@ -200,6 +285,8 @@ local function wrap_tail(space_name, op, pairs, start_time, call_status, ...) local finish_time = clock.monotonic() local latency = finish_time - start_time + local registry = internal:get_registry() + -- If space id is provided instead of name, try to resolve name. -- If resolve have failed, use id as string to observe space. -- If using space id will be deprecated, remove this code as well, @@ -316,7 +403,7 @@ local function update_fetch_stats(storage_stats, space_name) return true end - registry.observe_fetch( + internal:get_registry().observe_fetch( storage_stats.tuples_fetched, storage_stats.tuples_lookup, space_name @@ -356,7 +443,7 @@ function stats.update_map_reduces(space_name) return true end - registry.observe_map_reduces(1, space_name) + internal:get_registry().observe_map_reduces(1, space_name) return true end @@ -394,7 +481,9 @@ stats.op = op_module --- Stats module internal state (for debug/test). -- --- @tfield[opt] boolean is_enabled Is currently enabled. +-- @tfield[opt] string driver Current statistics registry driver (if nil, stats disabled). +-- +-- @tfield[opt] boolean quantiles Is quantiles computed. stats.internal = internal return stats diff --git a/crud/stats/local_registry.lua b/crud/stats/local_registry.lua index 9626f75ba..0fd6be5ec 100644 --- a/crud/stats/local_registry.lua +++ b/crud/stats/local_registry.lua @@ -2,6 +2,8 @@ -- @module crud.stats.local_registry -- +local errors = require('errors') + local dev_checks = require('crud.common.dev_checks') local stash = require('crud.common.stash') local op_module = require('crud.stats.operation') @@ -9,6 +11,7 @@ local registry_utils = require('crud.stats.registry_utils') local registry = {} local internal = stash.get(stash.name.stats_local_registry) +local StatsLocalError = errors.new_class('StatsLocalError', {capture_stack = false}) --- Initialize local metrics registry. -- @@ -17,9 +20,19 @@ local internal = stash.get(stash.name.stats_local_registry) -- -- @function init -- --- @treturn boolean Returns true. +-- @tab opts -- -function registry.init() +-- @bool opts.quantiles +-- Quantiles is not supported for local, only `false` is valid. +-- +-- @treturn boolean Returns `true`. +-- +function registry.init(opts) + dev_checks({ quantiles = 'boolean' }) + + StatsLocalError:assert(opts.quantiles == false, + "Quantiles are not supported for 'local' statistics registry") + internal.registry = {} internal.registry.spaces = {} diff --git a/crud/stats/metrics_registry.lua b/crud/stats/metrics_registry.lua new file mode 100644 index 000000000..0716aa4cd --- /dev/null +++ b/crud/stats/metrics_registry.lua @@ -0,0 +1,376 @@ +---- Internal module used to store statistics in `metrics` registry. +-- @module crud.stats.metrics_registry +-- + +local is_package, metrics = pcall(require, 'metrics') + +local dev_checks = require('crud.common.dev_checks') +local op_module = require('crud.stats.operation') +local stash = require('crud.common.stash') +local registry_utils = require('crud.stats.registry_utils') + +local registry = {} +-- Used to cache collectors. +local internal = stash.get(stash.name.stats_metrics_registry) + +local metric_name = { + -- Summary collector for all operations. + stats = 'tnt_crud_stats', + -- `*_count` and `*_sum` are automatically created + -- by summary collector. + stats_count = 'tnt_crud_stats_count', + stats_sum = 'tnt_crud_stats_sum', + + -- Counter collectors for select/pairs details. + details = { + tuples_fetched = 'tnt_crud_tuples_fetched', + tuples_lookup = 'tnt_crud_tuples_lookup', + map_reduces = 'tnt_crud_map_reduces', + } +} + +local LATENCY_QUANTILE = 0.99 + +-- Increasing tolerance threshold affects performance. +local DEFAULT_QUANTILES = { + [LATENCY_QUANTILE] = 1e-2, +} + +local DEFAULT_AGE_PARAMS = { + age_buckets_count = 2, + max_age_time = 60, +} + +--- Check if application supports metrics rock for registry +-- +-- `metrics >= 0.10.0` is required. +-- `metrics >= 0.9.0` is required to use summary quantiles with +-- age buckets. `metrics >= 0.5.0, < 0.9.0` is unsupported +-- due to quantile overflow bug +-- (https://github.com/tarantool/metrics/issues/235). +-- `metrics == 0.9.0` has bug that do not permits +-- to create summary collector without quantiles +-- (https://github.com/tarantool/metrics/issues/262). +-- In fact, user may use `metrics >= 0.5.0`, `metrics != 0.9.0` +-- if he wants to use metrics without quantiles, and `metrics >= 0.9.0` +-- if he wants to use metrics with quantiles. But this is confusing, +-- so we use a single restriction solving both cases. +-- +-- @function is_supported +-- +-- @treturn boolean Returns `true` if `metrics >= 0.10.0` found, `false` otherwise. +-- +function registry.is_supported() + if is_package == false then + return false + end + + -- Only metrics >= 0.10.0 supported. + if metrics.unregister_callback == nil then + return false + end + + return true +end + +--- Initialize collectors in global metrics registry +-- +-- Registries are not meant to used explicitly +-- by users, init is not guaranteed to be idempotent. +-- Destroy collectors only through this registry methods. +-- +-- @function init +-- +-- @tab opts +-- +-- @bool opts.quantiles +-- If `true`, computes latency as 0.99 quantile with aging. +-- +-- @treturn boolean Returns `true`. +-- +function registry.init(opts) + dev_checks({ quantiles = 'boolean' }) + + internal.opts = table.deepcopy(opts) + + local quantile_params = nil + local age_params = nil + if opts.quantiles == true then + quantile_params = DEFAULT_QUANTILES + age_params = DEFAULT_AGE_PARAMS + end + + internal.registry = {} + internal.registry[metric_name.stats] = metrics.summary( + metric_name.stats, + 'CRUD router calls statistics', + quantile_params, + age_params) + + internal.registry[metric_name.details.tuples_fetched] = metrics.counter( + metric_name.details.tuples_fetched, + 'Tuples fetched from CRUD storages during select/pairs') + + internal.registry[metric_name.details.tuples_lookup] = metrics.counter( + metric_name.details.tuples_lookup, + 'Tuples looked up on CRUD storages while collecting response during select/pairs') + + internal.registry[metric_name.details.map_reduces] = metrics.counter( + metric_name.details.map_reduces, + 'Map reduces planned during CRUD select/pairs') + + return true +end + +--- Unregister collectors in global metrics registry. +-- +-- Registries are not meant to used explicitly +-- by users, destroy is not guaranteed to be idempotent. +-- Destroy collectors only through this registry methods. +-- +-- @function destroy +-- +-- @treturn boolean Returns `true`. +-- +function registry.destroy() + for _, c in pairs(internal.registry) do + metrics.registry:unregister(c) + end + + internal.registry = nil + internal.opts = nil + + return true +end + +--- Compute `latency` field of an observation. +-- +-- If it is a `{ time = ..., count = ... }` observation, +-- compute latency as overall average and store it +-- inside observation object. +-- +-- @function compute_obs_latency +-- @local +-- +-- @tab obs +-- Objects from `registry_utils` +-- `stats.spaces[name][op][status]`. +-- If something like `details` collector +-- passed, do nothing. +-- +local function compute_obs_latency(obs) + if obs.count == nil or obs.time == nil then + return + end + + if obs.count == 0 then + obs.latency = 0 + else + obs.latency = obs.time / obs.count + end +end + +--- Compute `latency` field of each observation. +-- +-- If quantiles disabled, we need to compute +-- latency as overall average from `time` and +-- `count` values. +-- +-- @function compute_latencies +-- @local +-- +-- @tab stats +-- Object from registry_utils stats. +-- +local function compute_latencies(stats) + for _, space_stats in pairs(stats.spaces) do + for _, op_stats in pairs(space_stats) do + for _, obs in pairs(op_stats) do + compute_obs_latency(obs) + end + end + end +end + +--- Get copy of global metrics registry. +-- +-- Registries are not meant to used explicitly +-- by users, get is not guaranteed to work without init. +-- +-- @function get +-- +-- @string[opt] space_name +-- If specified, returns table with statistics +-- of operations on table, separated by operation type and +-- execution status. If there wasn't any requests for table, +-- returns `{}`. If not specified, returns table with statistics +-- about all existing spaces, count of calls to spaces +-- that wasn't found and count of schema reloads. +-- +-- @treturn table Returns copy of metrics registry. +function registry.get(space_name) + dev_checks('?string') + + local stats = { + spaces = {}, + } + + -- Fill operation basic statistics values. + for _, obs in ipairs(internal.registry[metric_name.stats]:collect()) do + local op = obs.label_pairs.operation + local status = obs.label_pairs.status + local name = obs.label_pairs.name + + if space_name ~= nil and name ~= space_name then + goto stats_continue + end + + registry_utils.init_collectors_if_required(stats.spaces, name, op) + local space_stats = stats.spaces[name] + + -- metric_name.stats presents only if quantiles enabled. + if obs.metric_name == metric_name.stats then + if obs.label_pairs.quantile == LATENCY_QUANTILE then + space_stats[op][status].latency = obs.value + end + elseif obs.metric_name == metric_name.stats_sum then + space_stats[op][status].time = obs.value + elseif obs.metric_name == metric_name.stats_count then + space_stats[op][status].count = obs.value + end + + :: stats_continue :: + end + + if not internal.opts.quantiles then + compute_latencies(stats) + end + + -- Fill select/pairs detail statistics values. + for stat_name, metric_name in pairs(metric_name.details) do + for _, obs in ipairs(internal.registry[metric_name]:collect()) do + local name = obs.label_pairs.name + local op = obs.label_pairs.operation + + if space_name ~= nil and name ~= space_name then + goto details_continue + end + + registry_utils.init_collectors_if_required(stats.spaces, name, op) + stats.spaces[name][op].details[stat_name] = obs.value + + :: details_continue :: + end + end + + if space_name ~= nil then + return stats.spaces[space_name] or {} + end + + return stats +end + +--- Increase requests count and update latency info. +-- +-- @function observe +-- +-- @string space_name +-- Name of space. +-- +-- @number latency +-- Time of call execution. +-- +-- @string op +-- Label of registry collectors. +-- Use `require('crud.stats').op` to pick one. +-- +-- @string success +-- `'ok'` if no errors on execution, `'error'` otherwise. +-- +-- @treturn boolean Returns `true`. +-- +function registry.observe(latency, space_name, op, status) + dev_checks('number', 'string', 'string', 'string') + + -- Use `operations` label to be consistent with `tnt_stats_op_*` labels. + -- Use `name` label to be consistent with `tnt_space_*` labels. + -- Use `status` label to be consistent with `tnt_vinyl_*` and HTTP metrics labels. + local label_pairs = { operation = op, name = space_name, status = status } + + internal.registry[metric_name.stats]:observe(latency, label_pairs) + + return true +end + +--- Increase statistics of storage select/pairs calls. +-- +-- @function observe_fetch +-- +-- @string space_name +-- Name of space. +-- +-- @number tuples_fetched +-- Count of tuples fetched during storage call. +-- +-- @number tuples_lookup +-- Count of tuples looked up on storages while collecting response. +-- +-- @treturn boolean Returns `true`. +-- +function registry.observe_fetch(tuples_fetched, tuples_lookup, space_name) + dev_checks('number', 'number', 'string') + + local label_pairs = { name = space_name, operation = op_module.SELECT } + + internal.registry[metric_name.details.tuples_fetched]:inc(tuples_fetched, label_pairs) + internal.registry[metric_name.details.tuples_lookup]:inc(tuples_lookup, label_pairs) + + return true +end + +--- Increase statistics of planned map reduces during select/pairs. +-- +-- @function observe_map_reduces +-- +-- @number count +-- Count of map reduces planned. +-- +-- @string space_name +-- Name of space. +-- +-- @treturn boolean Returns `true`. +-- +function registry.observe_map_reduces(count, space_name) + dev_checks('number', 'string') + + local label_pairs = { name = space_name, operation = op_module.SELECT } + internal.registry[metric_name.details.map_reduces]:inc(count, label_pairs) + + return true +end + +-- Workaround for https://github.com/tarantool/metrics/issues/334 . +-- This workaround does not prevent observations reset between role reloads, +-- but it fixes collector unlink from registry. Without this workaround, +-- we will continue to use cached collectors that are already cleaned up +-- from registry and changes will not appear in metrics export output. +local function workaround_role_reload() + if not registry.is_supported() then + return + end + + -- Check if this registry was enabled before reload. + if internal.registry == nil then + return + end + + -- Check if base collector is in metrics package registry. + -- If it's not, then registry has beed cleaned up on role reload. + if metrics.registry:find('summary', metric_name.stats) == nil then + registry.init(internal.opts) + end +end + +workaround_role_reload() + +return registry \ No newline at end of file diff --git a/test/integration/cfg_test.lua b/test/integration/cfg_test.lua index 718a21c1f..81ae0e1a6 100644 --- a/test/integration/cfg_test.lua +++ b/test/integration/cfg_test.lua @@ -2,6 +2,7 @@ local fio = require('fio') local t = require('luatest') +local stats = require('crud.stats') local helpers = require('test.helper') local group = t.group('cfg') @@ -21,7 +22,11 @@ group.after_all(function(g) helpers.stop_cluster(g.cluster) end) group.test_defaults = function(g) local cfg = g.cluster:server('router'):eval("return require('crud').cfg") - t.assert_equals(cfg, { stats = false }) + t.assert_equals(cfg, { + stats = false, + stats_driver = stats.get_default_driver(), + stats_quantiles = false, + }) end group.test_change_value = function(g) diff --git a/test/integration/stats_test.lua b/test/integration/stats_test.lua index 9da069771..cbe4827f6 100644 --- a/test/integration/stats_test.lua +++ b/test/integration/stats_test.lua @@ -4,7 +4,16 @@ local t = require('luatest') local stats_registry_utils = require('crud.stats.registry_utils') -local g = t.group('stats_integration') +local pgroup = t.group('stats_integration', { + { driver = 'local' }, + { driver = 'metrics', quantiles = false }, + { driver = 'metrics', quantiles = true }, +}) +local group_metrics = t.group('stats_metrics_integration', { + { driver = 'metrics', quantiles = false }, + { driver = 'metrics', quantiles = true }, +}) + local helpers = require('test.helper') local space_id = 542 @@ -13,7 +22,7 @@ local non_existing_space_id = 100500 local non_existing_space_name = 'non_existing_space' local new_space_name = 'newspace' -g.before_all(function(g) +local function before_all(g) g.cluster = helpers.Cluster:new({ datadir = fio.tempdir(), server_command = helpers.entrypoint('srv_stats'), @@ -23,24 +32,66 @@ g.before_all(function(g) g.cluster:start() g.router = g.cluster:server('router').net_box - helpers.prepare_simple_functions(g.router) - g.router:eval("require('crud').cfg{ stats = true }") -end) + if g.params.driver == 'metrics' then + local is_metrics_supported = g.router:eval([[ + return require('crud.stats.metrics_registry').is_supported() + ]]) + t.skip_if(is_metrics_supported == false, 'Metrics registry is unsupported') + end +end -g.after_all(function(g) +local function after_all(g) helpers.stop_cluster(g.cluster) -end) +end + +local function get_stats(g, space_name) + return g.router:eval("return require('crud').stats(...)", { space_name }) +end + +local function enable_stats(g, params) + params = params or g.params + g.router:eval([[ + local params = ... + require('crud').cfg{ + stats = true, + stats_driver = params.driver, + stats_quantiles = params.quantiles + } + ]], { params }) +end -g.before_each(function(g) +local function disable_stats(g) + g.router:eval("require('crud').cfg{ stats = false }") +end + +local function before_each(g) g.router:eval("crud = require('crud')") + enable_stats(g) helpers.truncate_space_on_cluster(g.cluster, space_name) helpers.drop_space_on_cluster(g.cluster, new_space_name) -end) +end -function g:get_stats(space_name) - return self.router:eval("return require('crud').stats(...)", { space_name }) +local function get_metrics(g) + return g.router:eval("return require('metrics').collect()") end +pgroup.before_all(before_all) + +pgroup.after_all(after_all) + +pgroup.before_each(before_each) + +pgroup.after_each(disable_stats) + + +group_metrics.before_all(before_all) + +group_metrics.after_all(after_all) + +group_metrics.before_each(before_each) + +group_metrics.after_each(disable_stats) + local function create_new_space(g) helpers.call_on_storages(g.cluster, function(server) @@ -414,12 +465,12 @@ for name, case in pairs(simple_operation_cases) do local test_name = ('test_%s'):format(name) if case.prepare ~= nil then - g.before_test(test_name, case.prepare) + pgroup.before_test(test_name, case.prepare) end - g[test_name] = function(g) + pgroup[test_name] = function(g) -- Collect stats before call. - local stats_before = g:get_stats(space_name) + local stats_before = get_stats(g, space_name) t.assert_type(stats_before, 'table') -- Call operation. @@ -445,7 +496,7 @@ for name, case in pairs(simple_operation_cases) do end -- Collect stats after call. - local stats_after = g:get_stats(space_name) + local stats_after = get_stats(g, space_name) t.assert_type(stats_after, 'table') t.assert_not_equals(stats_after[case.op], nil) @@ -491,7 +542,7 @@ end -- Call some operation on non-existing -- space and ensure statistics are updated. -g.before_test('test_non_existing_space', function(g) +pgroup.before_test('test_non_existing_space', function(g) t.assert_equals( helpers.is_space_exist(g.router, non_existing_space_name), false, @@ -499,11 +550,11 @@ g.before_test('test_non_existing_space', function(g) ) end) -g.test_non_existing_space = function(g) +pgroup.test_non_existing_space = function(g) local op = 'get' -- Collect stats before call. - local stats_before = g:get_stats(non_existing_space_name) + local stats_before = get_stats(g, non_existing_space_name) t.assert_type(stats_before, 'table') local op_before = set_defaults_if_empty(stats_before, op) @@ -512,7 +563,7 @@ g.test_non_existing_space = function(g) t.assert_not_equals(err, nil) -- Collect stats after call. - local stats_after = g:get_stats(non_existing_space_name) + local stats_after = get_stats(g, non_existing_space_name) t.assert_type(stats_after, 'table') local op_after = stats_after[op] t.assert_type(op_after, 'table', 'Section has been created if not existed') @@ -525,14 +576,14 @@ end for name, case in pairs(select_cases) do local test_name = ('test_%s_details'):format(name) - g.before_test(test_name, prepare_select_data) + pgroup.before_test(test_name, prepare_select_data) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = 'select' local space_name = space_name -- Collect stats before call. - local stats_before = g:get_stats(space_name) + local stats_before = get_stats(g, space_name) t.assert_type(stats_before, 'table') -- Call operation. @@ -546,7 +597,7 @@ for name, case in pairs(select_cases) do t.assert_equals(err, nil) -- Collect stats after call. - local stats_after = g:get_stats(space_name) + local stats_after = get_stats(g, space_name) t.assert_type(stats_after, 'table') local op_before = set_defaults_if_empty(stats_before, op) @@ -569,55 +620,58 @@ for name, case in pairs(select_cases) do end -g.test_resolve_name_from_id = function(g) +pgroup.test_resolve_name_from_id = function(g) local op = 'len' g.router:call('crud.len', { space_id }) - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_not_equals(stats[op], nil, "Statistics is filled by name") end -g.test_resolve_nonexisting_space_from_id = function(g) +pgroup.test_resolve_nonexisting_space_from_id = function(g) local op = 'len' g.router:call('crud.len', { non_existing_space_id }) - local stats = g:get_stats(tostring(non_existing_space_id)) + local stats = get_stats(g, tostring(non_existing_space_id)) t.assert_not_equals(stats[op], nil, "Statistics is filled by id as string") end -g.before_test( +pgroup.before_test( 'test_role_reload_do_not_reset_observations', generate_stats) -g.test_role_reload_do_not_reset_observations = function(g) - local stats_before = g:get_stats() +pgroup.test_role_reload_do_not_reset_observations = function(g) + t.xfail_if(g.params.driver == 'metrics', + 'See https://github.com/tarantool/metrics/issues/334') + + local stats_before = get_stats(g) helpers.reload_roles(g.cluster:server('router')) - local stats_after = g:get_stats() + local stats_after = get_stats(g) t.assert_equals(stats_after, stats_before) end -g.before_test( +pgroup.before_test( 'test_module_reload_do_not_reset_observations', generate_stats) -g.test_module_reload_do_not_reset_observations = function(g) - local stats_before = g:get_stats() +pgroup.test_module_reload_do_not_reset_observations = function(g) + local stats_before = get_stats(g) helpers.reload_package(g.cluster:server('router')) - local stats_after = g:get_stats() + local stats_after = get_stats(g) t.assert_equals(stats_after, stats_before) end -g.test_spaces_created_in_runtime_supported_with_stats = function(g) +pgroup.test_spaces_created_in_runtime_supported_with_stats = function(g) local op = 'insert' - local stats_before = g:get_stats(new_space_name) + local stats_before = get_stats(g, new_space_name) local op_before = set_defaults_if_empty(stats_before, op) create_new_space(g) @@ -625,7 +679,7 @@ g.test_spaces_created_in_runtime_supported_with_stats = function(g) local _, err = g.router:call('crud.insert', { new_space_name, { 1, box.NULL }}) t.assert_equals(err, nil) - local stats_after = g:get_stats(new_space_name) + local stats_after = get_stats(g, new_space_name) local op_after = stats_after[op] t.assert_type(op_after, 'table', "'insert' stats found for new space") t.assert_type(op_after.ok, 'table', "success 'insert' stats found for new space") @@ -634,7 +688,7 @@ g.test_spaces_created_in_runtime_supported_with_stats = function(g) end -g.before_test( +pgroup.before_test( 'test_spaces_dropped_in_runtime_supported_with_stats', function(g) create_new_space(g) @@ -643,9 +697,9 @@ g.before_test( t.assert_equals(err, nil) end) -g.test_spaces_dropped_in_runtime_supported_with_stats = function(g) +pgroup.test_spaces_dropped_in_runtime_supported_with_stats = function(g) local op = 'insert' - local stats_before = g:get_stats(new_space_name) + local stats_before = get_stats(g, new_space_name) local op_before = set_defaults_if_empty(stats_before, op) t.assert_type(op_before, 'table', "'insert' stats found for new space") @@ -654,10 +708,314 @@ g.test_spaces_dropped_in_runtime_supported_with_stats = function(g) local _, err = g.router:call('crud.insert', { new_space_name, { 2, box.NULL }}) t.assert_not_equals(err, nil, "Should trigger 'space not found' error") - local stats_after = g:get_stats(new_space_name) + local stats_after = get_stats(g, new_space_name) local op_after = stats_after[op] t.assert_type(op_after, 'table', "'insert' stats found for dropped new space") t.assert_type(op_after.error, 'table', "error 'insert' stats found for dropped new space") t.assert_equals(op_after.error.count - op_before.error.count, 1, "Error requests count incremented since space was known to registry before drop") end + +-- https://github.com/tarantool/metrics/blob/fc5a67072340b12f983f09b7d383aca9e2f10cf1/test/utils.lua#L22-L31 +local function find_obs(metric_name, label_pairs, observations) + for _, obs in pairs(observations) do + local same_label_pairs = pcall(t.assert_equals, obs.label_pairs, label_pairs) + if obs.metric_name == metric_name and same_label_pairs then + return obs + end + end + + return { value = 0 } +end + +-- https://github.com/tarantool/metrics/blob/fc5a67072340b12f983f09b7d383aca9e2f10cf1/test/utils.lua#L55-L63 +local function find_metric(metric_name, metrics_data) + local m = {} + for _, v in ipairs(metrics_data) do + if v.metric_name == metric_name then + table.insert(m, v) + end + end + return #m > 0 and m or nil +end + +local function get_unique_label_values(metrics_data, label_key) + local label_values_map = {} + for _, v in ipairs(metrics_data) do + local label_pairs = v.label_pairs or {} + if label_pairs[label_key] ~= nil then + label_values_map[label_pairs[label_key]] = true + end + end + + local label_values = {} + for k, _ in pairs(label_values_map) do + table.insert(label_values, k) + end + + return label_values +end + +local function validate_metrics(g, metrics) + local quantile_stats + if g.params.quantiles == true then + quantile_stats = find_metric('tnt_crud_stats', metrics) + t.assert_type(quantile_stats, 'table', '`tnt_crud_stats` summary metrics found') + end + + local stats_count = find_metric('tnt_crud_stats_count', metrics) + t.assert_type(stats_count, 'table', '`tnt_crud_stats` summary metrics found') + + local stats_sum = find_metric('tnt_crud_stats_sum', metrics) + t.assert_type(stats_sum, 'table', '`tnt_crud_stats` summary metrics found') + + + local expected_operations = { 'insert', 'get', 'replace', 'update', + 'upsert', 'delete', 'select', 'truncate', 'len', 'count', 'borders' } + + if g.params.quantiles == true then + t.assert_items_equals(get_unique_label_values(quantile_stats, 'operation'), expected_operations, + 'Metrics are labelled with operation') + end + + t.assert_items_equals(get_unique_label_values(stats_count, 'operation'), expected_operations, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(stats_sum, 'operation'), expected_operations, + 'Metrics are labelled with operation') + + + local expected_statuses = { 'ok', 'error' } + + if g.params.quantiles == true then + t.assert_items_equals( + get_unique_label_values(quantile_stats, 'status'), + expected_statuses, + 'Metrics are labelled with status') + end + + t.assert_items_equals(get_unique_label_values(stats_count, 'status'), expected_statuses, + 'Metrics are labelled with status') + + t.assert_items_equals(get_unique_label_values(stats_sum, 'status'), expected_statuses, + 'Metrics are labelled with status') + + + local expected_names = { space_name } + + if g.params.quantiles == true then + t.assert_items_equals( + get_unique_label_values(quantile_stats, 'name'), + expected_names, + 'Metrics are labelled with space name') + end + + t.assert_items_equals(get_unique_label_values(stats_count, 'name'), + expected_names, + 'Metrics are labelled with space name') + + t.assert_items_equals( + get_unique_label_values(stats_sum, 'name'), + expected_names, + 'Metrics are labelled with space name') + + if g.params.quantiles == true then + local expected_quantiles = { 0.99 } + t.assert_items_equals(get_unique_label_values(quantile_stats, 'quantile'), expected_quantiles, + 'Quantile metrics presents') + end + + + local tuples_fetched = find_metric('tnt_crud_tuples_fetched', metrics) + t.assert_type(tuples_fetched, 'table', '`tnt_crud_tuples_fetched` metrics found') + + t.assert_items_equals(get_unique_label_values(tuples_fetched, 'operation'), { 'select' }, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(tuples_fetched, 'name'), expected_names, + 'Metrics are labelled with space name') + + + local tuples_lookup = find_metric('tnt_crud_tuples_lookup', metrics) + t.assert_type(tuples_lookup, 'table', '`tnt_crud_tuples_lookup` metrics found') + + t.assert_items_equals( get_unique_label_values(tuples_lookup, 'operation'), { 'select' }, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(tuples_lookup, 'name'), expected_names, + 'Metrics are labelled with space name') + + + local map_reduces = find_metric('tnt_crud_map_reduces', metrics) + t.assert_type(map_reduces, 'table', '`tnt_crud_map_reduces` metrics found') + + t.assert_items_equals(get_unique_label_values(map_reduces, 'operation'), { 'select' }, + 'Metrics are labelled with operation') + + t.assert_items_equals(get_unique_label_values(map_reduces, 'name'), expected_names, + 'Metrics are labelled with space name') +end + +local function check_updated_per_call(g) + local metrics_before = get_metrics(g) + local stats_labels = { operation = 'select', status = 'ok', name = space_name } + local details_labels = { operation = 'select', name = space_name } + + local count_before = find_obs('tnt_crud_stats_count', stats_labels, metrics_before) + local time_before = find_obs('tnt_crud_stats_sum', stats_labels, metrics_before) + local tuples_lookup_before = find_obs('tnt_crud_tuples_lookup', details_labels, metrics_before) + local tuples_fetched_before = find_obs('tnt_crud_tuples_fetched', details_labels, metrics_before) + local map_reduces_before = find_obs('tnt_crud_map_reduces', details_labels, metrics_before) + + local case = select_cases['select_by_secondary_index'] + local _, err = g.router:call(case.func, { space_name, case.conditions }) + t.assert_equals(err, nil) + + local metrics_after = get_metrics(g) + local count_after = find_obs('tnt_crud_stats_count', stats_labels, metrics_after) + local time_after = find_obs('tnt_crud_stats_sum', stats_labels, metrics_after) + local tuples_lookup_after = find_obs('tnt_crud_tuples_lookup', details_labels, metrics_after) + local tuples_fetched_after = find_obs('tnt_crud_tuples_fetched', details_labels, metrics_after) + local map_reduces_after = find_obs('tnt_crud_map_reduces', details_labels, metrics_after) + + t.assert_equals(count_after.value - count_before.value, 1, + '`select` metrics count increased') + t.assert_ge(time_after.value - time_before.value, 0, + '`select` total time increased') + t.assert_ge(tuples_lookup_after.value - tuples_lookup_before.value, case.tuples_lookup, + '`select` tuples lookup expected change') + t.assert_ge(tuples_fetched_after.value - tuples_fetched_before.value, case.tuples_fetched, + '`select` tuples feched expected change') + t.assert_ge(map_reduces_after.value - map_reduces_before.value, case.tuples_lookup, + '`select` map reduces expected change') +end + + +group_metrics.before_test( + 'test_stats_stored_in_global_metrics_registry', + generate_stats) + +group_metrics.test_stats_stored_in_global_metrics_registry = function(g) + local metrics = get_metrics(g) + validate_metrics(g, metrics) +end + + +group_metrics.before_test('test_metrics_updated_per_call', generate_stats) + +group_metrics.test_metrics_updated_per_call = check_updated_per_call + + + +group_metrics.before_test( + 'test_metrics_collectors_destroyed_if_stats_disabled', + generate_stats) + +group_metrics.test_metrics_collectors_destroyed_if_stats_disabled = function(g) + disable_stats(g) + + local metrics = get_metrics(g) + + local stats = find_metric('tnt_crud_stats', metrics) + t.assert_equals(stats, nil, '`tnt_crud_stats` summary metrics not found') + + local stats_count = find_metric('tnt_crud_stats_count', metrics) + t.assert_equals(stats_count, nil, '`tnt_crud_stats` summary metrics not found') + + local stats_sum = find_metric('tnt_crud_stats_sum', metrics) + t.assert_equals(stats_sum, nil, '`tnt_crud_stats` summary metrics not found') + + local tuples_fetched = find_metric('tnt_crud_tuples_fetched', metrics) + t.assert_equals(tuples_fetched, nil, '`tnt_crud_tuples_fetched` metrics not found') + + local tuples_lookup = find_metric('tnt_crud_tuples_lookup', metrics) + t.assert_equals(tuples_lookup, nil, '`tnt_crud_tuples_lookup` metrics not found') + + local map_reduces = find_metric('tnt_crud_map_reduces', metrics) + t.assert_equals(map_reduces, nil, '`tnt_crud_map_reduces` metrics not found') +end + + +group_metrics.before_test( + 'test_stats_stored_in_metrics_registry_after_switch_to_metrics_driver', + disable_stats) + +group_metrics.test_stats_stored_in_metrics_registry_after_switch_to_metrics_driver = function(g) + enable_stats(g, { driver = 'local', quantiles = false }) + -- Switch to metrics driver. + enable_stats(g) + + generate_stats(g) + local metrics = get_metrics(g) + validate_metrics(g, metrics) +end + +group_metrics.before_test( + 'test_role_reload_do_not_reset_metrics_observations', + generate_stats) + +group_metrics.test_role_reload_do_not_reset_metrics_observations = function(g) + t.xfail('See https://github.com/tarantool/metrics/issues/334') + + helpers.reload_roles(g.cluster:server('router')) + g.router:eval("crud = require('crud')") + local metrics = get_metrics(g) + validate_metrics(g, metrics) +end + + +group_metrics.before_test( + 'test_module_reload_do_not_reset_metrics_observations', + generate_stats) + +group_metrics.test_module_reload_do_not_reset_metrics_observations = function(g) + g.router:eval([[ + local function startswith(text, prefix) + return text:find(prefix, 1, true) == 1 + end + + for k, _ in pairs(package.loaded) do + if startswith(k, 'crud') then + package.loaded[k] = nil + end + end + + crud = require('crud') + ]]) + + local metrics = get_metrics(g) + validate_metrics(g, metrics) +end + +group_metrics.before_test( + 'test_stats_changed_in_metrics_registry_after_role_reload', + prepare_select_data) + +group_metrics.test_stats_changed_in_metrics_registry_after_role_reload = function(g) + helpers.reload_roles(g.cluster:server('router')) + g.router:eval("crud = require('crud')") + check_updated_per_call(g) +end + + +group_metrics.before_test( + 'test_stats_changed_in_metrics_registry_after_module_reload', + prepare_select_data) + +group_metrics.test_stats_changed_in_metrics_registry_after_module_reload = function(g) + g.router:eval([[ + local function startswith(text, prefix) + return text:find(prefix, 1, true) == 1 + end + + for k, _ in pairs(package.loaded) do + if startswith(k, 'crud') then + package.loaded[k] = nil + end + end + + crud = require('crud') + ]]) + + check_updated_per_call(g) +end diff --git a/test/unit/stats_test.lua b/test/unit/stats_test.lua index 057038202..bfa0f0cd2 100644 --- a/test/unit/stats_test.lua +++ b/test/unit/stats_test.lua @@ -5,12 +5,17 @@ local t = require('luatest') local stats_module = require('crud.stats') -local g = t.group('stats_unit') +local pgroup = t.group('stats_unit', { + { driver = 'local' }, + { driver = 'metrics', quantiles = false }, + { driver = 'metrics', quantiles = true }, +}) +local group_driver = t.group('stats_driver_unit') local helpers = require('test.helper') local space_name = 'customers' -g.before_all(function(g) +local function before_all(g) -- Enable test cluster for "is space exist?" checks. g.cluster = helpers.Cluster:new({ datadir = fio.tempdir(), @@ -23,47 +28,62 @@ g.before_all(function(g) helpers.prepare_simple_functions(g.router) g.router:eval("stats_module = require('crud.stats')") -end) -g.after_all(function(g) - helpers.stop_cluster(g.cluster) -end) + g.is_metrics_supported = g.router:eval([[ + return require('crud.stats.metrics_registry').is_supported() + ]]) --- Reset statistics between tests, reenable if needed. -g.before_each(function(g) - g:enable_stats() -end) + if g.params ~= nil and g.params.driver == 'metrics' then + t.skip_if(g.is_metrics_supported == false, 'Metrics registry is unsupported') + end +end -g.after_each(function(g) - g:disable_stats() -end) +local function after_all(g) + helpers.stop_cluster(g.cluster) +end -function g:get_stats(space_name) - return self.router:eval("return stats_module.get(...)", { space_name }) +local function get_stats(g, space_name) + return g.router:eval("return stats_module.get(...)", { space_name }) end -function g:enable_stats() - self.router:eval("stats_module.enable()") +local function enable_stats(g, params) + params = params or g.params + g.router:eval("stats_module.enable(...)", { params }) end -function g:disable_stats() - self.router:eval("stats_module.disable()") +local function disable_stats(g) + g.router:eval("stats_module.disable()") end -function g:reset_stats() - self.router:eval("return stats_module.reset()") +local function reset_stats(g) + g.router:eval("return stats_module.reset()") end +pgroup.before_all(before_all) + +pgroup.after_all(after_all) + +-- Reset statistics between tests, reenable if needed. +pgroup.before_each(enable_stats) + +pgroup.after_each(disable_stats) + -g.test_get_format_after_enable = function(g) - local stats = g:get_stats() +group_driver.before_all(before_all) + +group_driver.after_all(after_all) + +group_driver.after_each(disable_stats) + +pgroup.test_get_format_after_enable = function(g) + local stats = get_stats(g) t.assert_type(stats, 'table') t.assert_equals(stats.spaces, {}) end -g.test_get_by_space_name_format_after_enable = function(g) - local stats = g:get_stats(space_name) +pgroup.test_get_by_space_name_format_after_enable = function(g) + local stats = get_stats(g, space_name) t.assert_type(stats, 'table') t.assert_equals(stats, {}) @@ -105,7 +125,7 @@ for name, case in pairs(observe_cases) do for _, op in pairs(case.operations) do local test_name = ('test_%s_%s'):format(op, name) - g[test_name] = function(g) + pgroup[test_name] = function(g) -- Call wrapped functions on server side. -- Collect execution times from outside. local run_count = 10 @@ -131,10 +151,10 @@ for name, case in pairs(observe_cases) do local total_time = fun.sum(time_diffs) -- Validate stats format after execution. - local total_stats = g:get_stats() + local total_stats = get_stats(g) t.assert_type(total_stats, 'table', 'Total stats present after observations') - local space_stats = g:get_stats(space_name) + local space_stats = get_stats(g, space_name) t.assert_type(space_stats, 'table', 'Space stats present after observations') t.assert_equals(total_stats.spaces[space_name], space_stats, @@ -311,6 +331,7 @@ local pairs_cases = { post_eval = [[ collectgarbage('collect') collectgarbage('collect') + require('fiber').yield() ]], build_sleep_multiplier = 2, iterations_expected = 5, @@ -322,11 +343,11 @@ local pairs_cases = { for name, case in pairs(pairs_cases) do local test_name = ('test_pairs_wrapper_observes_all_iterations_on_%s'):format(name) - g.before_test(test_name, function(g) + pgroup.before_test(test_name, function(g) g.router:eval(case.prepare, { helpers.simple_functions_params() }) end) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.SELECT local params = helpers.simple_functions_params() @@ -348,10 +369,10 @@ for name, case in pairs(pairs_cases) do local time_diff = after_finish - before_start -- Validate stats format after execution. - local total_stats = g:get_stats() + local total_stats = get_stats(g) t.assert_type(total_stats, 'table', 'Total stats present after observations') - local space_stats = g:get_stats(space_name) + local space_stats = get_stats(g, space_name) t.assert_type(space_stats, 'table', 'Space stats present after observations') t.assert_equals(total_stats.spaces[space_name], space_stats, @@ -435,7 +456,7 @@ for name_head, disable_case in pairs(disable_stats_cases) do for name_tail, return_case in pairs(preserve_return_cases) do local test_name = ('test_%s%s'):format(name_head, name_tail) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.INSERT local eval = ([[ @@ -459,7 +480,7 @@ for name_head, disable_case in pairs(disable_stats_cases) do local test_name = ('test_%spairs_wrapper_preserves_return_values'):format(name_head) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.INSERT local input = { a = 'a', b = 'b' } @@ -489,7 +510,7 @@ for name_head, disable_case in pairs(disable_stats_cases) do for name_tail, throw_case in pairs(preserve_throw_cases) do local test_name = ('test_%s%s'):format(name_head, name_tail) - g[test_name] = function(g) + pgroup[test_name] = function(g) local op = stats_module.op.INSERT local eval = ([[ @@ -513,14 +534,13 @@ for name_head, disable_case in pairs(disable_stats_cases) do end end - -g.test_stats_is_empty_after_disable = function(g) - g:disable_stats() +pgroup.test_stats_is_empty_after_disable = function(g) + disable_stats(g) local op = stats_module.op.INSERT g.router:eval(call_wrapped, { 'return_true', op, {}, space_name }) - local stats = g:get_stats() + local stats = get_stats(g) t.assert_equals(stats, {}) end @@ -529,52 +549,52 @@ local function prepare_non_default_stats(g) local op = stats_module.op.INSERT g.router:eval(call_wrapped, { 'return_true', op, {}, space_name }) - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_equals(stats[op].ok.count, 1, 'Non-zero stats prepared') return stats end -g.test_enable_is_idempotent = function(g) +pgroup.test_enable_is_idempotent = function(g) local stats_before = prepare_non_default_stats(g) - g:enable_stats() + enable_stats(g) - local stats_after = g:get_stats(space_name) + local stats_after = get_stats(g, space_name) t.assert_equals(stats_after, stats_before, 'Stats have not been reset') end -g.test_reset = function(g) +pgroup.test_reset = function(g) prepare_non_default_stats(g) - g:reset_stats() + reset_stats(g) - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_equals(stats, {}, 'Stats have been reset') end -g.test_reset_for_disabled_stats_does_not_init_module = function(g) - g:disable_stats() +pgroup.test_reset_for_disabled_stats_does_not_init_module = function(g) + disable_stats(g) - local stats_before = g:get_stats() + local stats_before = get_stats(g) t.assert_equals(stats_before, {}, "Stats is empty") - g:reset_stats() + reset_stats(g) - local stats_after = g:get_stats() + local stats_after = get_stats(g) t.assert_equals(stats_after, {}, "Stats is still empty") end -g.test_stats_fetch_callback = function(g) +pgroup.test_stats_fetch_callback = function(g) local storage_cursor_stats = { tuples_fetched = 5, tuples_lookup = 25 } g.router:eval([[ stats_module.get_fetch_callback()(...) ]], { storage_cursor_stats, space_name }) local op = stats_module.op.SELECT - local stats = g:get_stats(space_name) + local stats = get_stats(g, space_name) t.assert_not_equals(stats[op], nil, 'Fetch stats update inits SELECT collectors') @@ -587,8 +607,8 @@ g.test_stats_fetch_callback = function(g) 'tuples_lookup is inremented by expected value') end -g.test_disable_stats_before_fetch_callback_get_do_not_break_call = function(g) - g:disable_stats() +pgroup.test_disable_stats_before_fetch_callback_get_do_not_break_call = function(g) + disable_stats(g) local storage_cursor_stats = { tuples_fetched = 5, tuples_lookup = 25 } g.router:eval([[ stats_module.get_fetch_callback()(...) ]], @@ -597,7 +617,7 @@ g.test_disable_stats_before_fetch_callback_get_do_not_break_call = function(g) t.success('No unexpected errors') end -g.test_disable_stats_after_fetch_callback_get_do_not_break_call = function(g) +pgroup.test_disable_stats_after_fetch_callback_get_do_not_break_call = function(g) local storage_cursor_stats = { tuples_fetched = 5, tuples_lookup = 25 } g.router:eval([[ @@ -609,21 +629,88 @@ g.test_disable_stats_after_fetch_callback_get_do_not_break_call = function(g) t.success('No unexpected errors') end -g.test_map_reduce_increment = function(g) +pgroup.test_map_reduce_increment = function(g) local op = stats_module.op.SELECT local _, err = g.router:eval([[ stats_module.update_map_reduces(...) ]], { space_name }) t.assert_equals(err, nil) - local stats = g:get_stats() + local stats = get_stats(g) t.assert_equals(stats.spaces[space_name][op].details.map_reduces, 1, "Counter of map reduces incremented") end -g.test_disable_stats_do_not_break_map_reduce_update_call = function(g) - g:disable_stats() +pgroup.test_disable_stats_do_not_break_map_reduce_update_call = function(g) + disable_stats(g) local _, err = g.router:eval([[ stats_module.update_map_reduces(...) ]], { space_name }) t.assert_equals(err, nil) end + + +group_driver.test_default_driver = function(g) + enable_stats(g) + + local driver = g.router:eval(" return stats_module.internal.driver ") + + if g.is_metrics_supported then + t.assert_equals(driver, 'metrics') + else + t.assert_equals(driver, 'local') + end +end + + +group_driver.test_default_quantiles = function(g) + enable_stats(g) + + local quantiles = g.router:eval(" return stats_module.internal.quantiles ") + t.assert_equals(quantiles, false) +end + + +group_driver.before_test( + 'test_stats_reenable_with_different_driver_reset_stats', + function(g) + t.skip_if(g.is_metrics_supported == false, 'Metrics registry is unsupported') + end +) + +group_driver.test_stats_reenable_with_different_driver_reset_stats = function(g) + enable_stats(g, { driver = 'metrics' }) + + prepare_non_default_stats(g) + + enable_stats(g, { driver = 'local' }) + local stats = get_stats(g) + t.assert_equals(stats.spaces, {}, 'Stats have been reset') +end + + +group_driver.test_unknown_driver_throws_error = function(g) + t.assert_error_msg_contains( + 'Unsupported driver: unknown', + enable_stats, g, { driver = 'unknown' }) +end + + +group_driver.before_test( + 'test_stats_enable_with_metrics_throws_error_if_unsupported', + function(g) + t.skip_if(g.is_metrics_supported == true, 'Metrics registry is supported') + end +) + +group_driver.test_stats_enable_with_metrics_throws_error_if_unsupported = function(g) + t.assert_error_msg_contains( + 'Unsupported driver: metrics', + enable_stats, g, { driver = 'metrics' }) +end + + +group_driver.test_stats_enable_with_local_throws_error_if_quantiles_enabled = function(g) + t.assert_error_msg_contains( + 'Quantiles are not supported', + enable_stats, g, { driver = 'local', quantiles = true }) +end