Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

spdk: do not use huge pages #898

Merged
merged 2 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-container.yml
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ jobs:
strategy:
fail-fast: false
matrix:
test: ["sanity", "ns_lb_change", "no_subsystems", "state_transitions", "state_transitions_both_gws", "state_transitions_loop", "state_transitions_rand_loop", "late_registration", "late_registration_loop", "4gws", "4gws_loop", "4gws_create_delete", "4gws_create_delete_loop", "namespaces", "namespaces_loop", "mtls", "notify", "ceph_status", "blocklist", "main_exit"]
test: ["sanity", "no_huge", "ns_lb_change", "no_subsystems", "state_transitions", "state_transitions_both_gws", "state_transitions_loop", "state_transitions_rand_loop", "late_registration", "late_registration_loop", "4gws", "4gws_loop", "4gws_create_delete", "4gws_create_delete_loop", "namespaces", "namespaces_loop", "mtls", "notify", "ceph_status", "blocklist", "main_exit"]
runs-on: ubuntu-latest
env:
HUGEPAGES: 1024 # 4 spdk instances
Expand Down
38 changes: 0 additions & 38 deletions control/grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,44 +302,6 @@ def __init__(self, config: GatewayConfig, gateway_state: GatewayStateHandler, rp
config.display_environment_info(self.logger)
self.ceph_utils = ceph_utils
self.ceph_utils.fetch_and_display_ceph_version()
requested_hugepages_val = os.getenv("HUGEPAGES", "")
if not requested_hugepages_val:
self.logger.warning("Can't get requested huge pages count")
else:
requested_hugepages_val = requested_hugepages_val.strip()
try:
requested_hugepages_val = int(requested_hugepages_val)
self.logger.info(f"Requested huge pages count is {requested_hugepages_val}")
except ValueError:
self.logger.warning(f"Requested huge pages count value {requested_hugepages_val} is not numeric")
requested_hugepages_val = None
hugepages_file = os.getenv("HUGEPAGES_DIR", "")
if not hugepages_file:
hugepages_file = "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
self.logger.warning("No huge pages file defined, will use /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages")
else:
hugepages_file = hugepages_file.strip()
if os.access(hugepages_file, os.F_OK):
try:
hugepages_val = ""
with open(hugepages_file) as f:
hugepages_val = f.readline()
hugepages_val = hugepages_val.strip()
if hugepages_val:
try:
hugepages_val = int(hugepages_val)
self.logger.info(f"Actual huge pages count is {hugepages_val}")
except ValueError:
self.logger.warning(f"Actual huge pages count value {hugepages_val} is not numeric")
hugepages_val = ""
if requested_hugepages_val and hugepages_val != "" and requested_hugepages_val > hugepages_val:
self.logger.warning(f"The actual huge page count {hugepages_val} is smaller than the requested value of {requested_hugepages_val}")
else:
self.logger.warning(f"Can't read actual huge pages count value from {hugepages_file}")
except Exception as ex:
self.logger.exception(f"Can't read actual huge pages count value from {hugepages_file}")
else:
self.logger.warning(f"Can't find huge pages file {hugepages_file}")
self.config = config
config.dump_config_file(self.logger)
self.rpc_lock = rpc_lock
Expand Down
51 changes: 51 additions & 0 deletions control/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,16 @@ def _start_spdk(self, omap_state):
if spdk_tgt_cmd_extra_args:
cmd += shlex.split(spdk_tgt_cmd_extra_args)

# No huge pages configuration controlled by spdk.mem_size conf option
spdk_memsize = self.config.getint_with_default("spdk", "mem_size", None)
if spdk_memsize:
self.logger.info(f"SPDK will not use huge pages, mem size: {spdk_memsize}")
cmd += ["--no-huge", "-s", str(spdk_memsize)]
else:
self.logger.info(f"SPDK will use huge pages, probing...")
self.probe_huge_pages()


# If not provided in configuration,
# calculate cpu mask available for spdk reactors
if not cpumask_set(cmd):
Expand Down Expand Up @@ -707,6 +717,47 @@ def _ping(self):
self.logger.exception(f"spdk_get_version failed")
return False

def probe_huge_pages(self):
"""Probe kernel's huge pages confiuguration"""
requested_hugepages_val = os.getenv("HUGEPAGES", "")
if not requested_hugepages_val:
self.logger.warning("Can't get requested huge pages count")
else:
requested_hugepages_val = requested_hugepages_val.strip()
try:
requested_hugepages_val = int(requested_hugepages_val)
self.logger.info(f"Requested huge pages count is {requested_hugepages_val}")
except ValueError:
self.logger.warning(f"Requested huge pages count value {requested_hugepages_val} is not numeric")
requested_hugepages_val = None
hugepages_file = os.getenv("HUGEPAGES_DIR", "")
if not hugepages_file:
hugepages_file = "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
self.logger.warning("No huge pages file defined, will use /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages")
else:
hugepages_file = hugepages_file.strip()
if os.access(hugepages_file, os.F_OK):
try:
hugepages_val = ""
with open(hugepages_file) as f:
hugepages_val = f.readline()
hugepages_val = hugepages_val.strip()
if hugepages_val:
try:
hugepages_val = int(hugepages_val)
self.logger.info(f"Actual huge pages count is {hugepages_val}")
except ValueError:
self.logger.warning(f"Actual huge pages count value {hugepages_val} is not numeric")
hugepages_val = ""
if requested_hugepages_val and hugepages_val != "" and requested_hugepages_val > hugepages_val:
self.logger.warning(f"The actual huge page count {hugepages_val} is smaller than the requested value of {requested_hugepages_val}")
else:
self.logger.warning(f"Can't read actual huge pages count value from {hugepages_file}")
except Exception as ex:
self.logger.exception(f"Can't read actual huge pages count value from {hugepages_file}")
else:
self.logger.warning(f"Can't find huge pages file {hugepages_file}")

def gateway_rpc_caller(self, requests, is_add_req):
"""Passes RPC requests to gateway service."""
for key, val in requests.items():
Expand Down
87 changes: 87 additions & 0 deletions tests/ceph-nvmeof.no-huge.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#
# Copyright (c) 2021 International Business Machines
# All rights reserved.
#
# SPDX-License-Identifier: LGPL-3.0-or-later
#
# Authors: [email protected], [email protected]
#

[gateway]
name =
group =
addr = 0.0.0.0
port = 5500
enable_auth = False
state_update_notify = True
state_update_timeout_in_msec = 2000
state_update_interval_sec = 5
enable_spdk_discovery_controller = False
#omap_file_lock_duration = 20
#omap_file_lock_retries = 30
#omap_file_lock_retry_sleep_interval = 1.0
#omap_file_update_reloads = 10
#enable_prometheus_exporter = True
#prometheus_exporter_ssl = True
#prometheus_port = 10008
#prometheus_bdev_pools = rbd
#prometheus_stats_interval = 10
#verify_nqns = True
#allowed_consecutive_spdk_ping_failures = 1
#spdk_ping_interval_in_seconds = 2.0
#max_hosts_per_namespace = 1
#max_namespaces_with_netmask = 1000
#max_subsystems = 128
#max_namespaces = 256
#max_hosts_per_subsystem = 32

[gateway-logs]
log_level=debug
#log_files_enabled = True
#log_files_rotation_enabled = True
#verbose_log_messages = True
#max_log_file_size_in_mb=10
#max_log_files_count=20
#max_log_directory_backups=10
#
# Notice that if you change the log directory the log files will only be visible inside the container
#
#log_directory = /var/log/ceph/

[discovery]
addr = 0.0.0.0
port = 8009

[ceph]
pool = rbd
config_file = /etc/ceph/ceph.conf

[mtls]
server_key = ./server.key
client_key = ./client.key
server_cert = ./server.crt
client_cert = ./client.crt

[spdk]
bdevs_per_cluster = 32
mem_size=4096
tgt_path = /usr/local/bin/nvmf_tgt
#rpc_socket_dir = /var/tmp/
#rpc_socket_name = spdk.sock
#tgt_cmd_extra_args = --env-context="--no-huge -m1024" --iova-mode=va
timeout = 60.0
#log_level =
#protocol_log_level = WARNING
#log_file_dir =

# Example value: -m 0x3 -L all
# tgt_cmd_extra_args =

# transports = tcp

# Example value: {"max_queue_depth" : 16, "max_io_size" : 4194304, "io_unit_size" : 1048576, "zcopy" : false}
transport_tcp_options = {"in_capsule_data_size" : 8192, "max_io_qpairs_per_ctrlr" : 7}

[monitor]
#timeout = 1.0
#log_file_dir =
1 change: 1 addition & 0 deletions tests/ha/no_huge.sh
13 changes: 13 additions & 0 deletions tests/ha/start_up_no_huge.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/sh

set -ex

# Check if GITHUB_WORKSPACE is defined
if [ -n "$GITHUB_WORKSPACE" ]; then
test_dir="$GITHUB_WORKSPACE/tests/ha"
else
test_dir=$(dirname $0)
fi

export NVMEOF_CONFIG=./tests/ceph-nvmeof.no-huge.conf
$test_dir/start_up.sh
Loading