From 60cea564e51323dca836b034efce258457a6b451 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Fri, 10 May 2024 17:21:33 +0200 Subject: [PATCH 1/5] admin/server: fix rearm_log_level_timer _log_level_resets is a map, we want to extract the next expiring level_reset to rearm the _log_level_timer. previously we would extract the min_element based on the key (the logger name) so this sequence of actions set log-level kafka trace 10 set log-level raft trace 10 only kafka would be expired, because once expired the next run would pick again kafka, see that there is no expiration, and not rearm the timer. (cherry picked from commit bf6bc91ae98162e9504cb33fbffde62f5045211c) --- src/v/redpanda/admin/server.cc | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/v/redpanda/admin/server.cc b/src/v/redpanda/admin/server.cc index 322607298286..453bc60e5bb2 100644 --- a/src/v/redpanda/admin/server.cc +++ b/src/v/redpanda/admin/server.cc @@ -688,11 +688,17 @@ void admin_server::log_exception( void admin_server::rearm_log_level_timer() { _log_level_timer.cancel(); - auto next = std::min_element( - _log_level_resets.begin(), _log_level_resets.end()); + if (_log_level_resets.empty()) { + return; + } - if (next != _log_level_resets.end() && next->second.expires.has_value()) { - _log_level_timer.arm(next->second.expires.value()); + auto reset_values = _log_level_resets | std::views::values; + auto& lvl_rst = *std::ranges::min_element( + reset_values, std::less<>{}, [](level_reset const& l) { + return l.expires.value_or(ss::timer<>::clock::time_point::max()); + }); + if (lvl_rst.expires.has_value()) { + _log_level_timer.arm(lvl_rst.expires.value()); } } From b6fb9cdd911565c8973e15cdd78f20806d4fa8c2 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Fri, 10 May 2024 17:31:07 +0200 Subject: [PATCH 2/5] admin/server: remove unused level_reset::operator< (cherry picked from commit 0717cd1a0c9f352ad8a66d6cdf10ae58f36147a1) --- src/v/redpanda/admin/server.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/v/redpanda/admin/server.h b/src/v/redpanda/admin/server.h index 78f69d88f39e..7ddb2f82f5c3 100644 --- a/src/v/redpanda/admin/server.h +++ b/src/v/redpanda/admin/server.h @@ -631,16 +631,6 @@ class admin_server { : level(level) , expires(expires) {} - friend bool operator<(const level_reset& l, const level_reset& r) { - if (!l.expires.has_value()) { - return false; - } else if (!r.expires.has_value()) { - return true; - } else { - return l.expires.value() < r.expires.value(); - } - } - ss::log_level level; std::optional expires; }; From 81f4a4e8b246f12b58156e15c35b3679305ad2ee Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Fri, 10 May 2024 17:32:59 +0200 Subject: [PATCH 3/5] tests/log_level_test: test_log_level_multiple_expiry the test shows that this sequence of actions set log-level admin-api-server trace 10 set log-level raft trace 10 works and both logger are reset after the expiry period (cherry picked from commit 6593dbec194446702799c3223caeb688000019cf) --- tests/rptest/tests/log_level_test.py | 29 ++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/rptest/tests/log_level_test.py b/tests/rptest/tests/log_level_test.py index 4c90b8aa258a..1c88c3b76879 100644 --- a/tests/rptest/tests/log_level_test.py +++ b/tests/rptest/tests/log_level_test.py @@ -7,11 +7,13 @@ # the Business Source License, use of this software will be governed # by the Apache License, Version 2.0 +import time import ducktape.errors import requests.exceptions import urllib.parse import json +from ducktape.mark import parametrize from ducktape.utils.util import wait_until from rptest.services.cluster import cluster from rptest.tests.redpanda_test import RedpandaTest @@ -101,6 +103,33 @@ def test_log_level_control(self): backoff_sec=1, err_msg="Never saw message") + @cluster(num_nodes=1) + @parametrize(loggers=("admin_api_server", "raft")) + @parametrize(loggers=("raft", "admin_api_server")) + def test_log_level_multiple_expiry(self, loggers=tuple[str, str]): + """ + Check that more than one logger can be in a modified level and be expired correctly + see https://redpandadata.atlassian.net/browse/CORE-96 + """ + admin = Admin(self.redpanda) + node = self.redpanda.nodes[0] + + first_logger, second_logger = loggers + # set two loggers to trace, expect that both of them expires in a timely fashion + with self.redpanda.monitor_log(node) as mon: + admin.set_log_level(first_logger, "trace", expires=10) + time.sleep(1) + admin.set_log_level(second_logger, "trace", expires=10) + mon.wait_until(f"Expiring log level for {{{first_logger}}}", + timeout_sec=15, + backoff_sec=1, + err_msg=f"Never saw Expiring for {first_logger}") + mon.wait_until(f"Expiring log level for {{{second_logger}}}", + timeout_sec=15, + backoff_sec=1, + err_msg=f"Never saw Expiring for {second_logger}") + + @cluster(num_nodes=3) def test_max_expiry(self): admin = Admin(self.redpanda) From 14fbcfaaef11bfa1588b3dddaa2c671e6700cf90 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Fri, 10 May 2024 17:38:18 +0200 Subject: [PATCH 4/5] admin/server: ensure that a true expire-never logger is never expired previously, this sequence of actions set log-level raft trace 10 set log-level raft error 0 would result in raft logging at error level for only 10 seconds, despite the legit request to expire never. this is because in the code, the second request would not correctly reset the expire field if a previous name-level_reset was already in the map used for this operation (cherry picked from commit d781fb13e6051bf3b01821b9faac4687fa69f1c6) --- src/v/redpanda/admin/server.cc | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/v/redpanda/admin/server.cc b/src/v/redpanda/admin/server.cc index 453bc60e5bb2..b5533f81b68a 100644 --- a/src/v/redpanda/admin/server.cc +++ b/src/v/redpanda/admin/server.cc @@ -1437,17 +1437,20 @@ void admin_server::register_config_routes() { ss::global_logger_registry().set_logger_level(name, new_level); - // expires=0 is same as not specifying it at all - if (expires_v / 1s > 0) { - auto when = ss::timer<>::clock::now() + expires_v; - auto res = _log_level_resets.try_emplace(name, cur_level, when); - if (!res.second) { - res.first->second.expires = when; + auto when = [&]() -> std::optional { + // expires=0 is same as not specifying it at all + if (expires_v / 1s > 0) { + return ss::timer<>::clock::now() + expires_v; + } else { + // new log level never expires, but we still want an entry in + // the resets map as a record of the default + return std::nullopt; } - } else { - // new log level never expires, but we still want an entry in the - // resets map as a record of the default - _log_level_resets.try_emplace(name, cur_level, std::nullopt); + }(); + + auto res = _log_level_resets.try_emplace(name, cur_level, when); + if (!res.second) { + res.first->second.expires = when; } rsp.expiration = expires_v / 1s; From 85c74790535d05b2bd9bca165993ffe840884ce3 Mon Sep 17 00:00:00 2001 From: Andrea Barbadoro Date: Fri, 10 May 2024 17:46:57 +0200 Subject: [PATCH 5/5] tests/log_level_test: test_log_level_persist_a_never_expire_request test that set log-level admin_api_server trace 10 set log-level admin_api_server error 0 never resets the logger to the info level (cherry picked from commit 84748a8dcb4931258c5b9ef0cb3a32b9f2b9202b) --- tests/rptest/tests/log_level_test.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/rptest/tests/log_level_test.py b/tests/rptest/tests/log_level_test.py index 1c88c3b76879..d0ff84908363 100644 --- a/tests/rptest/tests/log_level_test.py +++ b/tests/rptest/tests/log_level_test.py @@ -129,6 +129,33 @@ def test_log_level_multiple_expiry(self, loggers=tuple[str, str]): backoff_sec=1, err_msg=f"Never saw Expiring for {second_logger}") + @cluster(num_nodes=1) + def test_log_level_persist_a_never_expire_request(self): + """ + check that this sequence of actions + set log-level admin_api_server trace 10 + set log-level admin_api_server error 0 + + never resets the logger to the info level + """ + admin = Admin(self.redpanda) + node = self.redpanda.nodes[0] + + with self.redpanda.monitor_log(node) as mon: + admin.set_log_level("admin_api_server", "trace", expires=10) + time.sleep(1) + admin.set_log_level("admin_api_server", "error", expires=0) + + try: + mon.wait_until("Expiring log level for {admin_api_server}", + timeout_sec=15, + backoff_sec=1) + assert False, "Should not have seen message" + except ducktape.errors.TimeoutError: + pass + + level = admin.get_log_level("admin_api_server")[0]["level"] + assert level == "error", f"expected level=error, got {level=}" @cluster(num_nodes=3) def test_max_expiry(self):