From a63531e02c4bafafd829ffdb656e7f7cfea9e04d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Mon, 11 Nov 2024 05:53:55 -0500 Subject: [PATCH] core: reactor_config: add reserve_io_control_blocks This new option allows reserving the configured amount of IOCBs, so they are available to a side application running in parallell to the seastar application. The use case is ScyllaDB and its tool applications, both ScyllaDB and its tool applications need IOCBs to run, so ScyllaDB has to be able to tell seastar to reserve some IOCBs for the tools as well. --- include/seastar/core/reactor_config.hh | 10 ++++++++++ include/seastar/core/smp.hh | 2 +- src/core/reactor.cc | 13 ++++++++++--- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/include/seastar/core/reactor_config.hh b/include/seastar/core/reactor_config.hh index 77f174179e..048298d654 100644 --- a/include/seastar/core/reactor_config.hh +++ b/include/seastar/core/reactor_config.hh @@ -170,6 +170,16 @@ struct reactor_options : public program_options::option_group { /// /// Default: 10000. program_options::value max_networking_io_control_blocks; + /// \brief Leave this many I/O control blocks (IOCBs) as reserve. + /// + /// This is to allows leaving a (small) reserve aside so other applications + /// also using IOCBs can run alongside the seastar application. + /// The reserve takes precedence over \ref max_networking_io_control_blocks. + /// + /// Default: 0 + /// + /// \see max_networking_io_control_blocks + program_options::value reserve_io_control_blocks; /// \brief Enable seastar heap profiling. /// /// Allocations will be sampled every N bytes on average. Zero means off. diff --git a/include/seastar/core/smp.hh b/include/seastar/core/smp.hh index a3a8045f3f..c3dea0c5fb 100644 --- a/include/seastar/core/smp.hh +++ b/include/seastar/core/smp.hh @@ -478,7 +478,7 @@ private: void pin(unsigned cpu_id); void allocate_reactor(unsigned id, reactor_backend_selector rbs, reactor_config cfg); void create_thread(std::function thread_loop); - unsigned adjust_max_networking_aio_io_control_blocks(unsigned network_iocbs); + unsigned adjust_max_networking_aio_io_control_blocks(unsigned network_iocbs, unsigned reserve_iocbs); static void log_aiocbs(log_level level, unsigned storage, unsigned preempt, unsigned network); public: static unsigned count; diff --git a/src/core/reactor.cc b/src/core/reactor.cc index d4b1802ccd..682c0d3df2 100644 --- a/src/core/reactor.cc +++ b/src/core/reactor.cc @@ -3800,6 +3800,9 @@ reactor_options::reactor_options(program_options::option_group* parent_group) , max_networking_io_control_blocks(*this, "max-networking-io-control-blocks", 10000, "Maximum number of I/O control blocks (IOCBs) to allocate per shard. This translates to the number of sockets supported per shard." " Requires tuning /proc/sys/fs/aio-max-nr. Only valid for the linux-aio reactor backend (see --reactor-backend).") + , reserve_io_control_blocks(*this, "reserve-io-control-blocks", 0, + "Reserve this many IOCBs, so it is available to any side application that runs parallel to the seastar appliation." + " Takes precedence over --max-networking-io-control-blocks. Only valid for the linux-aio reactor backend (see --reactor-backend).") #ifdef SEASTAR_HEAPPROF , heapprof(*this, "heapprof", 0, "Enable seastar heap profiling. Sample every ARG bytes. 0 means off") #else @@ -4167,14 +4170,14 @@ void smp::log_aiocbs(log_level level, unsigned storage, unsigned preempt, unsign seastar_logger.log(level, "total {:{}} {:{}}", percpu_total, percpu_width, allcpus_total, allcpus_width); } -unsigned smp::adjust_max_networking_aio_io_control_blocks(unsigned network_iocbs) +unsigned smp::adjust_max_networking_aio_io_control_blocks(unsigned network_iocbs, unsigned reserve_iocbs) { static unsigned constexpr storage_iocbs = reactor::max_aio; static unsigned constexpr preempt_iocbs = 2; auto aio_max_nr = read_first_line_as("/proc/sys/fs/aio-max-nr"); auto aio_nr = read_first_line_as("/proc/sys/fs/aio-nr"); - auto available_aio = aio_max_nr - aio_nr; + auto available_aio = std::max(aio_max_nr - aio_nr - reserve_iocbs, 0u); auto requested_aio_network = network_iocbs * smp::count; auto requested_aio_other = (storage_iocbs + preempt_iocbs) * smp::count; auto requested_aio = requested_aio_network + requested_aio_other; @@ -4199,6 +4202,9 @@ unsigned smp::adjust_max_networking_aio_io_control_blocks(unsigned network_iocbs "Set /proc/sys/fs/aio-max-nr to at least {} (minimum) or {} (recommended for networking performance)", aio_nr + (requested_aio_other + smp::count), aio_nr + requested_aio); + if (reserve_iocbs) { + err.append(format(", with an added reserve of {} (requested via io_control_blocks_reserve config)", reserve_iocbs)); + } unsigned smp_count_max = available_aio / (storage_iocbs + preempt_iocbs + 1); if (smp_count_max > 0) { err.append(format(", or decrease the logical CPU count of the application to {} (maximum)", smp_count_max)); @@ -4394,7 +4400,8 @@ void smp::configure(const smp_options& smp_opts, const reactor_options& reactor_ auto max_networking_aio_io_control_blocks = reactor_opts.max_networking_io_control_blocks.get_value(); // Prevent errors about insufficient AIO blocks, when they are not needed by the reactor backend. if (reactor_opts.reactor_backend.get_selected_candidate().name() == "linux-aio") { - max_networking_aio_io_control_blocks = adjust_max_networking_aio_io_control_blocks(max_networking_aio_io_control_blocks); + max_networking_aio_io_control_blocks = adjust_max_networking_aio_io_control_blocks(max_networking_aio_io_control_blocks, + reactor_opts.reserve_io_control_blocks.get_value()); } reactor_config reactor_cfg = {