Skip to content

Commit

Permalink
metrics: introduce config alerts gauge
Browse files Browse the repository at this point in the history
Since there is no full support of Tarantool 3 config instances in
luatest yet (only treegen support in master), I had borrowed some test
helpers from tarantool/crud [1].

1. https://github.com/tarantool/crud/blob/98b120ef7095fa34525ef9d335a1458a2edf0cca/test/tarantool3_helpers

Part of tarantool/grafana-dashboard#224
  • Loading branch information
DifferentialOrange committed Jul 5, 2024
1 parent 4e15db9 commit 0a6f75a
Show file tree
Hide file tree
Showing 11 changed files with 653 additions and 0 deletions.
1 change: 1 addition & 0 deletions .luacheckrc
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ include_files = {"**/*.lua", "*.rockspec", "*.luacheckrc"}
exclude_files = {"lua_modules/", ".luarocks/", ".rocks/", "tmp/", ".history/"}

max_line_length = 120
max_comment_line_length = 200
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Added
- New Tarantool 3 metrics:
- tnt_config_alerts

## [1.1.0] - 2024-05-17
### Added
Expand Down
1 change: 1 addition & 0 deletions doc/monitoring/api_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,7 @@ Metrics functions
* ``cartridge_failover``
* ``clock``
* ``event_loop``
* ``config``

See :ref:`metrics reference <metrics-reference>` for details.
All metric collectors from the collection have ``metainfo.default = true``.
Expand Down
18 changes: 18 additions & 0 deletions doc/monitoring/metrics_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -993,3 +993,21 @@ Read view statistics

* - ``tnt_memtx_index_read_view``
- Memory (in bytes) held for read views.


Tarantool configuration
~~~~~~~~~~~~~~~~~~~~~~~

These metrics are available starting from Tarantool 3.0.

.. container:: table

.. list-table::
:widths: 25 75
:header-rows: 0

* - ``tnt_config_alerts``
- Count of current instance :ref:`configuration apply alerts <config_api_reference_info>`.
``{level="warn"}`` label covers warnings and
``{level="error"}`` covers errors.

1 change: 1 addition & 0 deletions metrics/tarantool.lua
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ local default_metrics = {
cartridge_failover = require('metrics.cartridge.failover'),
clock = require('metrics.tarantool.clock'),
event_loop = require('metrics.tarantool.event_loop'),
config = require('metrics.tarantool.config'),
}

local all_metrics_map = {}
Expand Down
46 changes: 46 additions & 0 deletions metrics/tarantool/config.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
local utils = require('metrics.utils')

local collectors_list = {}

local function get_config_alerts(config_info)
-- https://github.com/tarantool/tarantool/blob/319357d5973d15d08b8eda6a230eada08b710802/src/box/lua/config/utils/aboard.lua#L17-L18
local config_alerts = {
warn = 0,
error = 0,
}

for _, alert in pairs(config_info.alerts) do
config_alerts[alert.type] = config_alerts[alert.type] + 1
end

return config_alerts
end

local function update()
if not utils.is_tarantool3() then
return
end

-- Can migrate to box.info().config later
-- https://github.com/tarantool/tarantool/commit/a1544d3bbc029c6fb2a148e580afe2b20e269b8d
local config = require('config')
local config_info = config:info()

local config_alerts = get_config_alerts(config_info)

for level, count in pairs(config_alerts) do
collectors_list.config_alerts = utils.set_gauge(
'config_alerts',
'Tarantool 3 configuration alerts',
count,
{level = level},
nil,
{default = true}
)
end
end

return {
update = update,
list = collectors_list,
}
16 changes: 16 additions & 0 deletions metrics/utils.lua
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,20 @@ function utils.delete_collectors(list)
table.clear(list)
end

local function get_tarantool_version()
local version_parts = rawget(_G, '_TARANTOOL'):split('-', 3)

local major_minor_patch_parts = version_parts[1]:split('.', 2)
local major = tonumber(major_minor_patch_parts[1])
local minor = tonumber(major_minor_patch_parts[2])
local patch = tonumber(major_minor_patch_parts[3])

return major, minor, patch
end

function utils.is_tarantool3()
local major = get_tarantool_version()
return major == 3
end

return utils
179 changes: 179 additions & 0 deletions test/tarantool/config_metrics_test.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
local t = require('luatest')
local g = t.group()

local fio = require('fio')
local yaml = require('yaml')

local utils = require('test.utils')
local treegen = require('test.tarantool3_helpers.treegen')
local server_helper = require('test.tarantool3_helpers.server')

g.before_all(function(cg)
cg.treegen = {}
treegen.init(cg.treegen)
end)

g.after_all(function(cg)
treegen.clean(cg.treegen)
end)


local default_config = {
credentials = {
users = {
guest = {
roles = {'super'},
},
replicator = {
password = 'replicating',
roles = {'replication'},
},
},
},
iproto = {
advertise = {
peer = {
login = 'replicator',
},
},
},
groups = {
servers = {
replicasets = {
['server-001'] = {
leader = 'server-001-a',
instances = {
['server-001-a'] = {
iproto = {
listen = {{uri = 'localhost:3301'}},
},
},
},
},
},
},
},
replication = {
failover = 'manual',
},
metrics = {
include = {'all'},
},
}

local function write_config(cg, config)
return treegen.write_script(cg.server_dir, 'config.yaml', yaml.encode(config))
end

local function start_server(cg)
t.skip_if(not utils.is_tarantool_3_config_supported(),
'Skip since Tarantool 3 config is unsupported')

cg.server_dir = treegen.prepare_directory(cg.treegen, {}, {})
local config_file = write_config(cg, default_config)

cg.server = server_helper:new{
alias = 'server-001-a',
config_file = config_file,
chdir = cg.server_dir,
}
cg.server:start{wait_until_ready = true}
end

local function stop_server(cg)
if cg.server ~= nil then
cg.server:stop()
cg.server = nil
end

if cg.server_dir ~= nil then
fio.rmtree(cg.server_dir)
cg.server_dir = nil
end
end

local function reload_config(cg, config)
write_config(cg, config)
cg.server:exec(function()
pcall(function()
require('config'):reload()
end)
end)
end

local function assert_config_alerts_metrics(server, expected_values)
local observations = server:exec(function()
local metrics = require('metrics')
metrics.invoke_callbacks()
return metrics.collect()
end)

local warnings = utils.find_obs(
'tnt_config_alerts',
{level = 'warn', alias = 'server-001-a'},
observations
)
t.assert_equals(warnings.value, expected_values['warn'])

local errors = utils.find_obs(
'tnt_config_alerts',
{level = 'error', alias = 'server-001-a'},
observations
)
t.assert_equals(errors.value, expected_values['error'])
end


g.before_test('test_config_alerts_if_healthy', start_server)
g.after_test('test_config_alerts_if_healthy', stop_server)

g.test_config_alerts_if_healthy = function(cg)
assert_config_alerts_metrics(cg.server, {warn = 0, error = 0})
end


g.before_test('test_config_alerts_if_minor_trouble', start_server)
g.after_test('test_config_alerts_if_minor_trouble', stop_server)

g.test_config_alerts_if_minor_trouble = function(cg)
local config = table.deepcopy(default_config)
config['credentials']['users']['user_one'] = {roles = {'role_two'}}
reload_config(cg, config)

assert_config_alerts_metrics(cg.server, {warn = 1, error = 0})
end


g.before_test('test_config_alerts_if_critical_failure', start_server)
g.after_test('test_config_alerts_if_critical_failure', stop_server)

g.test_config_alerts_if_critical_failure = function(cg)
local config = table.deepcopy(default_config)
config['groups']['servers'] = {}
reload_config(cg, config)

assert_config_alerts_metrics(cg.server, {warn = 0, error = 1})
end


g.before_test('test_config_alerts_if_unsupported', function(cg)
t.skip_if(utils.is_tarantool_3_config_supported(),
'Skip since Tarantool 3 config is supported')
utils.create_server(cg)
end)

g.after_test('test_config_alerts_if_unsupported', function(cg)
utils.drop_server(cg)
cg.server = nil
end)

g.test_config_alerts_if_unsupported = function(cg)
local observations = cg.server:exec(function()
local metrics = require('metrics')
metrics.invoke_callbacks()
return metrics.collect()
end)

local alerts = utils.find_metric('tnt_config_alerts', observations)
t.assert_equals(alerts, nil)
end
Loading

0 comments on commit 0a6f75a

Please sign in to comment.