Skip to content

Commit

Permalink
r/consensus: made raft election timeout runtime configurable
Browse files Browse the repository at this point in the history
All other Raft timeouts are configurable in runtime. Made election
timeout runtime configurable as well.

Fixes: redpanda-data#13491

Signed-off-by: Michal Maslanka <[email protected]>
  • Loading branch information
mmaslankaprv committed Sep 20, 2023
1 parent 54c0d86 commit fbc1130
Show file tree
Hide file tree
Showing 10 changed files with 49 additions and 17 deletions.
2 changes: 1 addition & 1 deletion src/v/config/configuration.cc
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ configuration::configuration()
*this,
"election_timeout_ms",
"Election timeout expressed in milliseconds",
{.visibility = visibility::tunable},
{.needs_restart = needs_restart::no, .visibility = visibility::tunable},
1'500ms)
, kafka_group_recovery_timeout_ms(
*this,
Expand Down
3 changes: 1 addition & 2 deletions src/v/raft/group_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,7 @@ ss::future<ss::lw_shared_ptr<raft::consensus>> group_manager::create_group(
_self,
id,
std::move(raft_cfg),
raft::timeout_jitter(
config::shard_local_cfg().raft_election_timeout_ms()),
raft::timeout_jitter(_configuration.election_timeout_ms),
log,
scheduling_config(_raft_sg, raft_priority()),
_configuration.raft_io_timeout_ms,
Expand Down
1 change: 1 addition & 0 deletions src/v/raft/group_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class group_manager {
config::binding<std::chrono::milliseconds> raft_io_timeout_ms;
config::binding<bool> enable_lw_heartbeat;
config::binding<size_t> recovery_concurrency_per_shard;
config::binding<std::chrono::milliseconds> election_timeout_ms;
};
using config_provider_fn = ss::noncopyable_function<configuration()>;

Expand Down
6 changes: 4 additions & 2 deletions src/v/raft/tests/jitter_tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0

#include "config/property.h"
#include "raft/timeout_jitter.h"

#include <seastar/core/thread.hh>
Expand All @@ -19,9 +20,10 @@
using namespace std::chrono_literals; // NOLINT

SEASTAR_THREAD_TEST_CASE(base_jitter_gurantees) {
raft::timeout_jitter jit(100ms, 75ms);
raft::timeout_jitter jit(
config::mock_binding<std::chrono::milliseconds>(100ms));
auto const low = jit.base_duration();
auto const high = jit.base_duration() + 75ms;
auto const high = jit.base_duration() + 50ms;
BOOST_CHECK_EQUAL(
std::chrono::duration_cast<std::chrono::milliseconds>(low).count(),
(100ms).count());
Expand Down
2 changes: 2 additions & 0 deletions src/v/raft/tests/raft_fixture.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "bytes/iobuf_parser.h"
#include "config/property.h"
#include "config/throughput_control_group.h"
#include "model/fundamental.h"
#include "model/metadata.h"
#include "model/record.h"
Expand Down Expand Up @@ -55,6 +56,7 @@
#include <absl/container/flat_hash_set.h>
#include <fmt/core.h>

#include <chrono>
#include <filesystem>
#include <memory>
#include <optional>
Expand Down
3 changes: 2 additions & 1 deletion src/v/raft/tests/raft_fixture.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ class raft_node_instance : public ss::weakly_referencable<raft_node_instance> {
ss::sstring _base_directory;
ss::shared_ptr<in_memory_test_protocol> _protocol;
ss::sharded<storage::api> _storage;
std::chrono::milliseconds _election_timeout = 500ms;
config::binding<std::chrono::milliseconds> _election_timeout
= config::mock_binding(500ms);
ss::sharded<features::feature_table> _features;
ss::sharded<coordinated_recovery_throttle> _recovery_throttle;
recovery_memory_quota _recovery_mem_quota;
Expand Down
6 changes: 4 additions & 2 deletions src/v/raft/tests/raft_group_fixture.h
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,8 @@ struct raft_group {
broker,
_id,
get_raft_cfg(),
raft::timeout_jitter(heartbeat_interval * 10),
raft::timeout_jitter(config::mock_binding<std::chrono::milliseconds>(
heartbeat_interval * 10)),
ssx::sformat("{}/{}", _storage_dir, node_id()),
[this, node_id](raft::leadership_status st) {
election_callback(node_id, st);
Expand All @@ -445,7 +446,8 @@ struct raft_group {
_id,
raft::group_configuration(
std::vector<raft::vnode>{}, model::revision_id(0)),
raft::timeout_jitter(heartbeat_interval * 10),
raft::timeout_jitter(config::mock_binding<std::chrono::milliseconds>(
heartbeat_interval * 10)),
ssx::sformat("{}/{}", _storage_dir, node_id()),
[this, node_id](raft::leadership_status st) {
election_callback(node_id, st);
Expand Down
8 changes: 1 addition & 7 deletions src/v/raft/tests/simple_raft_fixture.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,6 @@ struct simple_raft_fixture {
, _data_dir("test_dir_" + random_generators::gen_alphanum_string(6)) {}

void create_raft(storage::ntp_config::default_overrides overrides = {}) {
ss::smp::invoke_on_all([]() {
// We want immediate elections, to avoid a sleep at the start of
// every instantiation of a test setup.
config::shard_local_cfg().raft_election_timeout_ms.set_value(10ms);
}).get();

// configure and start kvstore
storage::kvstore_config kv_conf(
8192,
Expand Down Expand Up @@ -89,7 +83,7 @@ struct simple_raft_fixture {
.enable_lw_heartbeat = config::mock_binding<bool>(true),
.recovery_concurrency_per_shard
= config::mock_binding<size_t>(64),
};
.election_timeout_ms = config::mock_binding(10ms)};
},
[] {
return raft::recovery_memory_quota::configuration{
Expand Down
33 changes: 31 additions & 2 deletions src/v/raft/timeout_jitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,41 @@

#pragma once

#include "config/property.h"
#include "raft/types.h"
#include "random/simple_time_jitter.h"

namespace raft {
class timeout_jitter {
public:
explicit timeout_jitter(config::binding<std::chrono::milliseconds> timeout)
: _base_timeout(timeout)
, _time_jitter(_base_timeout()) {
timeout.watch([this] { update_base_timeout(); });
}
raft::clock_type::time_point operator()() { return _time_jitter(); }

using timeout_jitter
= simple_time_jitter<raft::clock_type, raft::duration_type>;
raft::clock_type::duration base_duration() const {
return _time_jitter.base_duration();
}

raft::clock_type::duration next_duration() {
return _time_jitter.next_duration();
}

raft::clock_type::duration next_jitter_duration() {
return _time_jitter.next_jitter_duration();
}

private:
void update_base_timeout() {
_time_jitter
= simple_time_jitter<raft::clock_type, raft::duration_type>(
_base_timeout());
}

config::binding<std::chrono::milliseconds> _base_timeout;
simple_time_jitter<raft::clock_type, raft::duration_type> _time_jitter;
};

} // namespace raft
2 changes: 2 additions & 0 deletions src/v/redpanda/application.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1116,6 +1116,8 @@ void application::wire_up_redpanda_services(
.recovery_concurrency_per_shard
= config::shard_local_cfg()
.raft_recovery_concurrency_per_shard.bind(),
.election_timeout_ms
= config::shard_local_cfg().raft_election_timeout_ms.bind(),
};
},
[] {
Expand Down

0 comments on commit fbc1130

Please sign in to comment.