From 0a6f75a41c387d06e7ea5c14c99b15f0970ed83a Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Wed, 3 Jul 2024 18:27:49 +0300 Subject: [PATCH] metrics: introduce config alerts gauge Since there is no full support of Tarantool 3 config instances in luatest yet (only treegen support in master), I had borrowed some test helpers from tarantool/crud [1]. 1. https://github.com/tarantool/crud/blob/98b120ef7095fa34525ef9d335a1458a2edf0cca/test/tarantool3_helpers Part of tarantool/grafana-dashboard#224 --- .luacheckrc | 1 + CHANGELOG.md | 3 + doc/monitoring/api_reference.rst | 1 + doc/monitoring/metrics_reference.rst | 18 ++ metrics/tarantool.lua | 1 + metrics/tarantool/config.lua | 46 +++++ metrics/utils.lua | 16 ++ test/tarantool/config_metrics_test.lua | 179 +++++++++++++++++++ test/tarantool3_helpers/server.lua | 235 +++++++++++++++++++++++++ test/tarantool3_helpers/treegen.lua | 146 +++++++++++++++ test/utils.lua | 7 + 11 files changed, 653 insertions(+) create mode 100644 metrics/tarantool/config.lua create mode 100644 test/tarantool/config_metrics_test.lua create mode 100644 test/tarantool3_helpers/server.lua create mode 100644 test/tarantool3_helpers/treegen.lua diff --git a/.luacheckrc b/.luacheckrc index 596e54e3..c3434092 100644 --- a/.luacheckrc +++ b/.luacheckrc @@ -2,3 +2,4 @@ include_files = {"**/*.lua", "*.rockspec", "*.luacheckrc"} exclude_files = {"lua_modules/", ".luarocks/", ".rocks/", "tmp/", ".history/"} max_line_length = 120 +max_comment_line_length = 200 diff --git a/CHANGELOG.md b/CHANGELOG.md index e438e53b..acf59f63 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added +- New Tarantool 3 metrics: + - tnt_config_alerts ## [1.1.0] - 2024-05-17 ### Added diff --git a/doc/monitoring/api_reference.rst b/doc/monitoring/api_reference.rst index 42a65c10..5ebefbe4 100644 --- a/doc/monitoring/api_reference.rst +++ b/doc/monitoring/api_reference.rst @@ -566,6 +566,7 @@ Metrics functions * ``cartridge_failover`` * ``clock`` * ``event_loop`` + * ``config`` See :ref:`metrics reference ` for details. All metric collectors from the collection have ``metainfo.default = true``. diff --git a/doc/monitoring/metrics_reference.rst b/doc/monitoring/metrics_reference.rst index 4909d7ec..3ab4d29c 100644 --- a/doc/monitoring/metrics_reference.rst +++ b/doc/monitoring/metrics_reference.rst @@ -993,3 +993,21 @@ Read view statistics * - ``tnt_memtx_index_read_view`` - Memory (in bytes) held for read views. + + +Tarantool configuration +~~~~~~~~~~~~~~~~~~~~~~~ + +These metrics are available starting from Tarantool 3.0. + +.. container:: table + + .. list-table:: + :widths: 25 75 + :header-rows: 0 + + * - ``tnt_config_alerts`` + - Count of current instance :ref:`configuration apply alerts `. + ``{level="warn"}`` label covers warnings and + ``{level="error"}`` covers errors. + \ No newline at end of file diff --git a/metrics/tarantool.lua b/metrics/tarantool.lua index 5c64bbad..b3b57612 100644 --- a/metrics/tarantool.lua +++ b/metrics/tarantool.lua @@ -23,6 +23,7 @@ local default_metrics = { cartridge_failover = require('metrics.cartridge.failover'), clock = require('metrics.tarantool.clock'), event_loop = require('metrics.tarantool.event_loop'), + config = require('metrics.tarantool.config'), } local all_metrics_map = {} diff --git a/metrics/tarantool/config.lua b/metrics/tarantool/config.lua new file mode 100644 index 00000000..3656e2e9 --- /dev/null +++ b/metrics/tarantool/config.lua @@ -0,0 +1,46 @@ +local utils = require('metrics.utils') + +local collectors_list = {} + +local function get_config_alerts(config_info) + -- https://github.com/tarantool/tarantool/blob/319357d5973d15d08b8eda6a230eada08b710802/src/box/lua/config/utils/aboard.lua#L17-L18 + local config_alerts = { + warn = 0, + error = 0, + } + + for _, alert in pairs(config_info.alerts) do + config_alerts[alert.type] = config_alerts[alert.type] + 1 + end + + return config_alerts +end + +local function update() + if not utils.is_tarantool3() then + return + end + + -- Can migrate to box.info().config later + -- https://github.com/tarantool/tarantool/commit/a1544d3bbc029c6fb2a148e580afe2b20e269b8d + local config = require('config') + local config_info = config:info() + + local config_alerts = get_config_alerts(config_info) + + for level, count in pairs(config_alerts) do + collectors_list.config_alerts = utils.set_gauge( + 'config_alerts', + 'Tarantool 3 configuration alerts', + count, + {level = level}, + nil, + {default = true} + ) + end +end + +return { + update = update, + list = collectors_list, +} diff --git a/metrics/utils.lua b/metrics/utils.lua index aca221c8..b2499b31 100644 --- a/metrics/utils.lua +++ b/metrics/utils.lua @@ -36,4 +36,20 @@ function utils.delete_collectors(list) table.clear(list) end +local function get_tarantool_version() + local version_parts = rawget(_G, '_TARANTOOL'):split('-', 3) + + local major_minor_patch_parts = version_parts[1]:split('.', 2) + local major = tonumber(major_minor_patch_parts[1]) + local minor = tonumber(major_minor_patch_parts[2]) + local patch = tonumber(major_minor_patch_parts[3]) + + return major, minor, patch +end + +function utils.is_tarantool3() + local major = get_tarantool_version() + return major == 3 +end + return utils diff --git a/test/tarantool/config_metrics_test.lua b/test/tarantool/config_metrics_test.lua new file mode 100644 index 00000000..eec535f7 --- /dev/null +++ b/test/tarantool/config_metrics_test.lua @@ -0,0 +1,179 @@ +local t = require('luatest') +local g = t.group() + +local fio = require('fio') +local yaml = require('yaml') + +local utils = require('test.utils') +local treegen = require('test.tarantool3_helpers.treegen') +local server_helper = require('test.tarantool3_helpers.server') + +g.before_all(function(cg) + cg.treegen = {} + treegen.init(cg.treegen) +end) + +g.after_all(function(cg) + treegen.clean(cg.treegen) +end) + + +local default_config = { + credentials = { + users = { + guest = { + roles = {'super'}, + }, + replicator = { + password = 'replicating', + roles = {'replication'}, + }, + }, + }, + iproto = { + advertise = { + peer = { + login = 'replicator', + }, + }, + }, + groups = { + servers = { + replicasets = { + ['server-001'] = { + leader = 'server-001-a', + instances = { + ['server-001-a'] = { + iproto = { + listen = {{uri = 'localhost:3301'}}, + }, + }, + }, + }, + }, + }, + }, + replication = { + failover = 'manual', + }, + metrics = { + include = {'all'}, + }, +} + +local function write_config(cg, config) + return treegen.write_script(cg.server_dir, 'config.yaml', yaml.encode(config)) +end + +local function start_server(cg) + t.skip_if(not utils.is_tarantool_3_config_supported(), + 'Skip since Tarantool 3 config is unsupported') + + cg.server_dir = treegen.prepare_directory(cg.treegen, {}, {}) + local config_file = write_config(cg, default_config) + + cg.server = server_helper:new{ + alias = 'server-001-a', + config_file = config_file, + chdir = cg.server_dir, + } + cg.server:start{wait_until_ready = true} +end + +local function stop_server(cg) + if cg.server ~= nil then + cg.server:stop() + cg.server = nil + end + + if cg.server_dir ~= nil then + fio.rmtree(cg.server_dir) + cg.server_dir = nil + end +end + +local function reload_config(cg, config) + write_config(cg, config) + cg.server:exec(function() + pcall(function() + require('config'):reload() + end) + end) +end + +local function assert_config_alerts_metrics(server, expected_values) + local observations = server:exec(function() + local metrics = require('metrics') + metrics.invoke_callbacks() + return metrics.collect() + end) + + local warnings = utils.find_obs( + 'tnt_config_alerts', + {level = 'warn', alias = 'server-001-a'}, + observations + ) + t.assert_equals(warnings.value, expected_values['warn']) + + local errors = utils.find_obs( + 'tnt_config_alerts', + {level = 'error', alias = 'server-001-a'}, + observations + ) + t.assert_equals(errors.value, expected_values['error']) +end + + +g.before_test('test_config_alerts_if_healthy', start_server) +g.after_test('test_config_alerts_if_healthy', stop_server) + +g.test_config_alerts_if_healthy = function(cg) + assert_config_alerts_metrics(cg.server, {warn = 0, error = 0}) +end + + +g.before_test('test_config_alerts_if_minor_trouble', start_server) +g.after_test('test_config_alerts_if_minor_trouble', stop_server) + +g.test_config_alerts_if_minor_trouble = function(cg) + local config = table.deepcopy(default_config) + config['credentials']['users']['user_one'] = {roles = {'role_two'}} + reload_config(cg, config) + + assert_config_alerts_metrics(cg.server, {warn = 1, error = 0}) +end + + +g.before_test('test_config_alerts_if_critical_failure', start_server) +g.after_test('test_config_alerts_if_critical_failure', stop_server) + +g.test_config_alerts_if_critical_failure = function(cg) + local config = table.deepcopy(default_config) + config['groups']['servers'] = {} + reload_config(cg, config) + + assert_config_alerts_metrics(cg.server, {warn = 0, error = 1}) +end + + +g.before_test('test_config_alerts_if_unsupported', function(cg) + t.skip_if(utils.is_tarantool_3_config_supported(), + 'Skip since Tarantool 3 config is supported') + utils.create_server(cg) +end) + +g.after_test('test_config_alerts_if_unsupported', function(cg) + utils.drop_server(cg) + cg.server = nil +end) + +g.test_config_alerts_if_unsupported = function(cg) + local observations = cg.server:exec(function() + local metrics = require('metrics') + metrics.invoke_callbacks() + return metrics.collect() + end) + + local alerts = utils.find_metric('tnt_config_alerts', observations) + t.assert_equals(alerts, nil) +end diff --git a/test/tarantool3_helpers/server.lua b/test/tarantool3_helpers/server.lua new file mode 100644 index 00000000..82dd551a --- /dev/null +++ b/test/tarantool3_helpers/server.lua @@ -0,0 +1,235 @@ +-- Borrowed from https://github.com/tarantool/crud/blob/98b120ef7095fa34525ef9d335a1458a2edf0cca/test/tarantool3_helpers/server.lua + +local fun = require('fun') +local yaml = require('yaml') +local urilib = require('uri') +local fio = require('fio') +local luatest = require('luatest') + +local utils = require('test.utils') + +local WAIT_TIMEOUT = 60 +local WAIT_DELAY = 0.1 + +-- Join paths in an intuitive way. +-- +-- If a component is nil, it is skipped. +-- +-- If a component is an absolute path, it skips all the previous +-- components. +-- +-- The wrapper is written for two components for simplicity. +local function pathjoin(a, b) + -- No first path -- skip it. + if a == nil then + return b + end + -- No second path -- skip it. + if b == nil then + return a + end + -- The absolute path is checked explicitly due to gh-8816. + if b:startswith('/') then + return b + end + return fio.pathjoin(a, b) +end + +local function find_instance(groups, instance_name) + for _, group in pairs(groups or {}) do + for _, replicaset in pairs(group.replicasets or {}) do + local instance = (replicaset.instances or {})[instance_name] + + if instance ~= nil then + return group, replicaset, instance + end + end + end + + return nil, nil, nil +end + +-- Determine advertise URI for given instance from a cluster +-- configuration. +local function find_advertise_uri(config, instance_name, dir) + if config == nil or next(config) == nil then + return nil + end + + -- Determine listen and advertise options that are in effect + -- for the given instance. + local advertise = nil + local listen = nil + + local group, replicaset, instance = find_instance(config.groups, instance_name) + + if instance ~= nil then + if instance.iproto ~= nil then + if instance.iproto.advertise ~= nil then + advertise = advertise or instance.iproto.advertise.client + end + listen = listen or instance.iproto.listen + end + if replicaset.iproto ~= nil then + if replicaset.iproto.advertise ~= nil then + advertise = advertise or replicaset.iproto.advertise.client + end + listen = listen or replicaset.iproto.listen + end + if group.iproto ~= nil then + if group.iproto.advertise ~= nil then + advertise = advertise or group.iproto.advertise.client + end + listen = listen or group.iproto.listen + end + end + + if config.iproto ~= nil then + if config.iproto.advertise ~= nil then + advertise = advertise or config.iproto.advertise.client + end + listen = listen or config.iproto.listen + end + + local uris + if advertise ~= nil then + uris = {{uri = advertise}} + else + uris = listen + end + + for _, uri in ipairs(uris or {}) do + uri = table.copy(uri) + uri.uri = uri.uri:gsub('{{ *instance_name *}}', instance_name) + uri.uri = uri.uri:gsub('unix/:%./', ('unix/:%s/'):format(dir)) + local u = urilib.parse(uri) + if u.ipv4 ~= '0.0.0.0' and u.ipv6 ~= '::' and u.service ~= '0' then + return uri + end + end + error('No suitable URI to connect is found') +end + +local Server = luatest.Server:inherit({}) + +-- Adds the following options: +-- +-- * config_file (string) +-- +-- An argument of the `--config <...>` CLI option. +-- +-- Used to deduce advertise URI to connect net.box to the +-- instance. +-- +-- The special value '' means running without `--config <...>` +-- CLI option (but still pass `--name `). +-- * remote_config (table) +-- +-- If `config_file` is not passed, this config value is used to +-- deduce the advertise URI to connect net.box to the instance. +Server.constructor_checks = fun.chain(Server.constructor_checks, { + config_file = 'string', + remote_config = '?table', +}):tomap() + +function Server:new(object, extra) + extra = extra or {} + extra._tags = {} + + return getmetatable(self).new(self, object, extra) +end + +function Server:initialize() + if self.config_file ~= nil then + self.command = arg[-1] + + self.args = fun.chain(self.args or {}, { + '--name', self.alias + }):totable() + + if self.config_file ~= '' then + table.insert(self.args, '--config') + table.insert(self.args, self.config_file) + + -- Take into account self.chdir to calculate a config + -- file path. + local config_file_path = pathjoin(self.chdir, self.config_file) + + -- Read the provided config file. + local fh, err = fio.open(config_file_path, {'O_RDONLY'}) + if fh == nil then + error(('Unable to open file %q: %s'):format(config_file_path, + err)) + end + self.config = yaml.decode(fh:read()) + fh:close() + end + + if self.net_box_uri == nil then + local config = self.config or self.remote_config + + -- NB: listen and advertise URIs are relative to + -- process.work_dir, which, in turn, is relative to + -- self.chdir. + local work_dir + if config.process ~= nil and config.process.work_dir ~= nil then + work_dir = config.process.work_dir + end + local dir = pathjoin(self.chdir, work_dir) + self.net_box_uri = find_advertise_uri(config, self.alias, dir) + end + end + + self.env = self.env or {} + + if self.env['LUA_PATH'] == nil then + self.env['LUA_PATH'] = utils.LUA_PATH + end + + getmetatable(getmetatable(self)).initialize(self) +end + +function Server:connect_net_box() + getmetatable(getmetatable(self)).connect_net_box(self) + + if self.config_file == nil then + return + end + + if not self.net_box then + return + end + + -- Replace the ready condition. + local saved_eval = self.net_box.eval + self.net_box.eval = function(self_, expr, args, opts) + if expr == 'return _G.ready' then + expr = "return require('config'):info().status == 'ready' or " .. + "require('config'):info().status == 'check_warnings'" + end + return saved_eval(self_, expr, args, opts) + end +end + +function Server:wait_for_rw() + luatest.helpers.retrying({timeout = WAIT_TIMEOUT, delay = WAIT_DELAY}, function() + local ro, err = self:exec(function() + return box.info.ro + end) + + luatest.assert_equals(err, nil) + luatest.assert_equals(ro, false) + end) +end + +-- Enable the startup waiting if the advertise URI of the instance +-- is determined. +function Server:start(opts) + opts = opts or {} + if self.config_file and opts.wait_until_ready == nil then + opts.wait_until_ready = self.net_box_uri ~= nil + end + getmetatable(getmetatable(self)).start(self, opts) +end + +return Server diff --git a/test/tarantool3_helpers/treegen.lua b/test/tarantool3_helpers/treegen.lua new file mode 100644 index 00000000..3d4615cc --- /dev/null +++ b/test/tarantool3_helpers/treegen.lua @@ -0,0 +1,146 @@ +-- Borrowed from https://github.com/tarantool/tarantool/blob/b5864c40a0bfc8f26cc65189f3a5c76e441a9396/test/treegen.lua + +-- Working tree generator. +-- +-- Generates a tree of Lua files using provided templates and +-- filenames. Reworked to be used inside the Cluster. + +local fio = require('fio') +local log = require('log') +local fun = require('fun') + +local treegen = {} + +local function find_template(storage, script) + for _, template_def in ipairs(storage.templates) do + if script:match(template_def.pattern) then + return template_def.template + end + end + error(("treegen: can't find a template for script %q"):format(script)) +end + +-- Write provided script into the given directory. +function treegen.write_script(dir, script, body) + local script_abspath = fio.pathjoin(dir, script) + local flags = {'O_CREAT', 'O_WRONLY', 'O_TRUNC'} + local mode = tonumber('644', 8) + + local scriptdir_abspath = fio.dirname(script_abspath) + log.info(('Creating a directory: %s'):format(scriptdir_abspath)) + fio.mktree(scriptdir_abspath) + + log.info(('Writing a script: %s'):format(script_abspath)) + local fh = fio.open(script_abspath, flags, mode) + fh:write(body) + fh:close() + return script_abspath +end + +-- Generate a script that follows a template and write it at the +-- given path in the given directory. +local function gen_script(storage, dir, script, replacements) + local template = find_template(storage, script) + replacements = fun.chain({script = script}, replacements):tomap() + local body = template:gsub('<(.-)>', replacements) + treegen.write_script(dir, script, body) +end + +function treegen.init(storage) + storage.tempdirs = {} + storage.templates = {} +end + +-- Remove all temporary directories created by the test +-- unless KEEP_DATA environment variable is set to a +-- non-empty value. +function treegen.clean(storage) + local dirs = table.copy(storage.tempdirs) or {} + storage.tempdirs = nil + + local keep_data = (os.getenv('KEEP_DATA') or '') ~= '' + + for _, dir in ipairs(dirs) do + if keep_data then + log.info(('Left intact due to KEEP_DATA env var: %s'):format(dir)) + else + log.info(('Recursively removing: %s'):format(dir)) + fio.rmtree(dir) + end + end + + storage.templates = nil +end + +function treegen.add_template(storage, pattern, template) + table.insert(storage.templates, { + pattern = pattern, + template = template, + }) +end + +-- Create a temporary directory with given scripts. +-- +-- The scripts are generated using templates added by +-- treegen.add_template(). +-- +-- Example for {'foo/bar.lua', 'baz.lua'}: +-- +-- / +-- + tmp/ +-- + rfbWOJ/ +-- + foo/ +-- | + bar.lua +-- + baz.lua +-- +-- The return value is '/tmp/rfbWOJ' for this example. +function treegen.prepare_directory(storage, scripts, replacements) + replacements = replacements or {} + + assert(type(scripts) == 'table') + assert(type(replacements) == 'table') + + local dir = fio.tempdir() + + -- fio.tempdir() follows the TMPDIR environment variable. + -- If it ends with a slash, the return value contains a double + -- slash in the middle: for example, if TMPDIR=/tmp/, the + -- result is like `/tmp//rfbWOJ`. + -- + -- It looks harmless on the first glance, but this directory + -- path may be used later to form an URI for a Unix domain + -- socket. As result the URI looks like + -- `unix/:/tmp//rfbWOJ/instance-001.iproto`. + -- + -- It confuses net_box.connect(): it reports EAI_NONAME error + -- from getaddrinfo(). + -- + -- It seems, the reason is a peculiar of the URI parsing: + -- + -- tarantool> uri.parse('unix/:/foo/bar.iproto') + -- --- + -- - host: unix/ + -- service: /foo/bar.iproto + -- unix: /foo/bar.iproto + -- ... + -- + -- tarantool> uri.parse('unix/:/foo//bar.iproto') + -- --- + -- - host: unix + -- path: /foo//bar.iproto + -- ... + -- + -- Let's normalize the path using fio.abspath(), which + -- eliminates the double slashes. + dir = fio.abspath(dir) + + table.insert(storage.tempdirs, dir) + + for _, script in ipairs(scripts) do + gen_script(storage, dir, script, replacements) + end + + return dir +end + +return treegen diff --git a/test/utils.lua b/test/utils.lua index 61dff405..0f8253e2 100644 --- a/test/utils.lua +++ b/test/utils.lua @@ -3,6 +3,8 @@ local t = require('luatest') local fun = require('fun') local metrics = require('metrics') +local luatest_utils = require('luatest.utils') + local utils = {} function utils.create_server(g) @@ -108,6 +110,11 @@ function utils.clear_spaces() end end +function utils.is_tarantool_3_config_supported() + local tarantool_version = luatest_utils.get_tarantool_version() + return luatest_utils.version_ge(tarantool_version, luatest_utils.version(3, 0, 0)) +end + -- Empty by default. Empty LUA_PATH satisfies built-in package tests. -- For tarantool/metrics, LUA_PATH is set up through test.helper utils.LUA_PATH = nil