diff --git a/ceph-nvmeof.conf b/ceph-nvmeof.conf index 05f385ff..95abf43d 100644 --- a/ceph-nvmeof.conf +++ b/ceph-nvmeof.conf @@ -68,6 +68,7 @@ tgt_path = /usr/local/bin/nvmf_tgt timeout = 60.0 #log_level = #protocol_log_level = WARNING +#log_file_dir = # Example value: -m 0x3 -L all # tgt_cmd_extra_args = @@ -79,3 +80,4 @@ transport_tcp_options = {"in_capsule_data_size" : 8192, "max_io_qpairs_per_ctrlr [monitor] #timeout = 1.0 +#log_file_dir = diff --git a/control/server.py b/control/server.py index 4a2f92a0..e0d18478 100644 --- a/control/server.py +++ b/control/server.py @@ -92,6 +92,8 @@ def __init__(self, config: GatewayConfig): self.gw_logger_object = GatewayLogger(self.config) self.logger = self.gw_logger_object.logger self.spdk_process = None + self.spdk_log_file = None + self.spdk_log_file_path = None self.gateway_rpc = None self.server = None self.discovery_pid = None @@ -102,6 +104,8 @@ def __init__(self, config: GatewayConfig): self.rpc_lock = threading.Lock() self.group_id = 0 self.monitor_client = '/usr/bin/ceph-nvmeof-monitor-client' + self.monitor_client_log_file = None + self.monitor_client_log_file_path = None self.omap_state = None self.omap_lock = None @@ -130,6 +134,28 @@ def __exit__(self, exc_type, exc_value, traceback): if self.spdk_process: self._stop_spdk() + if self.spdk_log_file: + try: + close(self.spdk_log_file) + except Exception: + pass + self.spdk_log_file = None + + if self.spdk_log_file_path: + GatewayLogger.compress_file(self.spdk_log_file_path, f"{self.spdk_log_file_path}.gz") + self.spdk_log_file_path = None + + if self.monitor_client_log_file: + try: + close(self.monitor_client_log_file) + except Exception: + pass + self.monitor_client_log_file = None + + if self.monitor_client_log_file_path: + GatewayLogger.compress_file(self.monitor_client_log_file_path, f"{self.monitor_client_log_file_path}.gz") + self.monitor_client_log_file_path = None + if self.server: if logger: logger.info("Stopping the server...") @@ -272,11 +298,26 @@ def _start_monitor_client(self): "--server-cert", self.config.get("mtls", "server_cert"), "--client-key", self.config.get("mtls", "client_key"), "--client-cert", self.config.get("mtls", "client_cert") ] + + self.monitor_client_log_file = None + self.monitor_client_log_file_path = None + log_stderr = None + log_file_dir = self.config.get_with_default("monitor", "log_file_dir", None) + self.monitor_client_log_file_path = self.handle_process_output_file(log_file_dir, "monitor-client") + if self.monitor_client_log_file_path: + try: + self.monitor_client_log_file = open(self.monitor_client_log_file_path, "wt") + log_stderr = subprocess.STDOUT + except Exception: + pass + self.logger.info(f"Starting {' '.join(cmd)}") try: # start monitor client process - self.monitor_client_process = subprocess.Popen(cmd) + self.monitor_client_process = subprocess.Popen(cmd, stdout=self.monitor_client_log_file, stderr=log_stderr) self.logger.info(f"monitor client process id: {self.monitor_client_process.pid}") + if self.monitor_client_log_file and self.monitor_client_log_file_path: + self.logger.info(f"Monitor log file is {self.monitor_client_log_file_path}") # wait for monitor notification of the group id self._wait_for_group_id() except Exception: @@ -365,6 +406,36 @@ def _grpc_server(self, address): return server + def handle_process_output_file(self, log_file_dir, prefix): + if not log_file_dir or not log_file_dir.strip(): + return None + + try: + os.makedirs(log_file_dir, 0o755, True) + except Exception: + return None + + if not log_file_dir.endswith("/"): + log_file_dir += "/" + log_file_path = f"{log_file_dir}{prefix}-{self.name}.log" + + log_files = [f"{log_file_path}{suffix}" for suffix in ['.bak', '.gz.bak']] + for log_file in log_files: + try: + os.remove(log_file) + except Exception: + pass + try: + os.rename(log_file_path, f"{log_file_path}.bak") + except FileNotFoundError: + pass + try: + os.rename(f"{log_file_path}.gz", f"{log_file_path}.gz.bak") + except FileNotFoundError: + pass + + return log_file_path + def _start_spdk(self, omap_state): """Starts SPDK process.""" @@ -403,11 +474,22 @@ def _start_spdk(self, omap_state): self.logger.info(f"SPDK autodetecting cpu_mask: {cpu_mask}") cmd += shlex.split(cpu_mask) + self.spdk_log_file = None + self.spdk_log_file_path = None + log_stderr = None + log_file_dir = self.config.get_with_default("spdk", "log_file_dir", None) + self.spdk_log_file_path = self.handle_process_output_file(log_file_dir, "spdk") + if self.spdk_log_file_path: + try: + self.spdk_log_file = open(self.spdk_log_file_path, "wt") + log_stderr = subprocess.STDOUT + except Exception: + pass self.logger.info(f"Starting {' '.join(cmd)}") try: # start spdk process time.sleep(2) # this is a temporary hack, we have a timing issue here. Once we solve it the sleep will ve removed - self.spdk_process = subprocess.Popen(cmd) + self.spdk_process = subprocess.Popen(cmd, stdout=self.spdk_log_file, stderr=log_stderr) except Exception: self.logger.exception(f"Unable to start SPDK") raise @@ -424,6 +506,8 @@ def _start_spdk(self, omap_state): # connect timeout: spdk client retries 5 times per sec conn_retries = int(timeout * 5) self.logger.info(f"SPDK process id: {self.spdk_process.pid}") + if self.spdk_log_file and self.spdk_log_file_path: + self.logger.info(f"SPDK log file is {self.spdk_log_file_path}") self.logger.info( f"Attempting to initialize SPDK: rpc_socket: {self.spdk_rpc_socket_path}," f" conn_retries: {conn_retries}, timeout: {timeout}, log level: {protocol_log_level}" @@ -497,6 +581,15 @@ def _stop_monitor_client(self): timeout = self.config.getfloat_with_default("monitor", "timeout", 1.0) self._stop_subprocess(self.monitor_client_process, timeout) self.monitor_client_process = None + if self.monitor_client_log_file: + try: + close(self.monitor_client_log_file) + except Exception: + pass + self.monitor_client_log_file = None + if self.monitor_client_log_file_path: + GatewayLogger.compress_file(self.monitor_client_log_file_path, f"{self.monitor_client_log_file_path}.gz") + self.monitor_client_log_file_path = None def _stop_spdk(self): """Stops SPDK process.""" @@ -504,6 +597,15 @@ def _stop_spdk(self): timeout = self.config.getfloat_with_default("spdk", "timeout", 60.0) self._stop_subprocess(self.spdk_process, timeout) self.spdk_process = None + if self.spdk_log_file: + try: + close(self.spdk_log_file) + except Exception: + pass + self.spdk_log_file = None + if self.spdk_log_file_path: + GatewayLogger.compress_file(self.spdk_log_file_path, f"{self.spdk_log_file_path}.gz") + self.spdk_log_file_path = None # Clean spdk rpc socket if self.spdk_rpc_socket_path and os.path.exists(self.spdk_rpc_socket_path): diff --git a/control/utils.py b/control/utils.py index 08a89303..570f5415 100644 --- a/control/utils.py +++ b/control/utils.py @@ -244,7 +244,7 @@ def __init__(self, config=None): max_log_file_size = 0 max_log_files_count = 0 try: - os.makedirs(self.log_directory, 0o777, True) + os.makedirs(self.log_directory, 0o755, True) logdir_ok = True self.handler = logging.handlers.RotatingFileHandler(self.log_directory + "/" + GatewayLogger.NVME_LOG_FILE_NAME, maxBytes = max_log_file_size * 1024 * 1024, diff --git a/tests/ceph-nvmeof.tls.conf b/tests/ceph-nvmeof.tls.conf index 398960d6..571b4161 100644 --- a/tests/ceph-nvmeof.tls.conf +++ b/tests/ceph-nvmeof.tls.conf @@ -28,6 +28,8 @@ enable_spdk_discovery_controller = False #verify_nqns = True #allowed_consecutive_spdk_ping_failures = 1 #spdk_ping_interval_in_seconds = 2.0 +#max_hosts_per_namespace = 1 +#max_namespaces_with_netmask = 1000 [gateway-logs] log_level=debug @@ -63,7 +65,9 @@ tgt_path = /usr/local/bin/nvmf_tgt #rpc_socket_name = spdk.sock #tgt_cmd_extra_args = --env-context="--no-huge -m1024" --iova-mode=va timeout = 60.0 -#log_level = WARNING +#log_level = +#protocol_log_level = WARNING +#log_file_dir = /var/log/ceph # Example value: -m 0x3 -L all # tgt_cmd_extra_args = diff --git a/tests/test_log_files.py b/tests/test_log_files.py index 00633605..44eafccd 100644 --- a/tests/test_log_files.py +++ b/tests/test_log_files.py @@ -10,6 +10,7 @@ import shutil import stat import gzip +import glob config = "ceph-nvmeof.conf" subsystem_prefix = "nqn.2016-06.io.spdk:cnode" @@ -36,6 +37,7 @@ def gateway(config, request): config.config["gateway-logs"]["max_log_file_size_in_mb"] = "10" config.config["gateway-logs"]["log_files_rotation_enabled"] = "True" config.config["gateway"]["name"] = request.node.name.replace("_", "-") + config.config["spdk"]["log_file_dir"] = "/var/log/ceph" if request.node.name == "test_log_files_disabled": config.config["gateway-logs"]["log_files_enabled"] = "False" elif request.node.name == "test_log_files_rotation": @@ -43,6 +45,8 @@ def gateway(config, request): elif request.node.name == "test_log_files_disable_rotation": config.config["gateway-logs"]["max_log_file_size_in_mb"] = "1" config.config["gateway-logs"]["log_files_rotation_enabled"] = "False" + elif request.node.name == "test_no_spdk_log": + config.config["spdk"]["log_file_dir"] = "" with GatewayServer(config) as gateway: @@ -73,6 +77,9 @@ def test_log_files(gateway): assert "nvmeof-log.gz" in files or "nvmeof-log.0" in files with open(f"/var/log/ceph/nvmeof-{gw.name}/nvmeof-log", "r") as f: assert f"Starting gateway {gw.name}" in f.read() + spdk_files = glob.glob("/var/log/ceph/spdk*") + assert len(spdk_files) == 1 + assert f"spdk-{gw.name}" in spdk_files[0] def test_log_files_disabled(gateway): gw = gateway @@ -82,22 +89,22 @@ def test_log_files_disabled(gateway): assert subs_list.status == 0 assert len(subs_list.subsystems) == 1 assert subs_list.subsystems[0].nqn == subsystem_prefix + "1" - files = os.listdir("/var/log/ceph") + files = glob.glob("/var/log/ceph/nvme*") assert files == [] def test_log_files_rotation(gateway): gw = gateway - files = os.listdir("/var/log/ceph") + files = glob.glob("/var/log/ceph/nvme*") assert len(files) == 1 - assert files[0] == f"nvmeof-{gw.name}" + assert f"nvmeof-{gw.name}" in files[0] cli(["subsystem", "add", "--subsystem", subsystem_prefix + "2"]) cli(["subsystem", "add", "--subsystem", subsystem_prefix + "3"]) cli(["subsystem", "add", "--subsystem", subsystem_prefix + "4"]) for i in range(2000): cli(["subsystem", "list"]) - files = os.listdir("/var/log/ceph") + files = glob.glob("/var/log/ceph/nvme*") assert len(files) == 1 - assert files[0] == f"nvmeof-{gw.name}" + assert f"nvmeof-{gw.name}" in files[0] files = os.listdir(f"/var/log/ceph/nvmeof-{gw.name}") assert len(files) > 1 assert "nvmeof-log.1" in files @@ -119,17 +126,17 @@ def test_log_files_rotation(gateway): def test_log_files_disable_rotation(gateway): gw = gateway - files = os.listdir("/var/log/ceph") + files = glob.glob("/var/log/ceph/nvme*") assert len(files) == 1 - assert files[0] == f"nvmeof-{gw.name}" + assert f"nvmeof-{gw.name}" in files[0] cli(["subsystem", "add", "--subsystem", subsystem_prefix + "5"]) cli(["subsystem", "add", "--subsystem", subsystem_prefix + "6"]) cli(["subsystem", "add", "--subsystem", subsystem_prefix + "7"]) for i in range(2000): cli(["subsystem", "list"]) - files = os.listdir("/var/log/ceph") + files = glob.glob("/var/log/ceph/nvme*") assert len(files) == 1 - assert files[0] == f"nvmeof-{gw.name}" + assert f"nvmeof-{gw.name}" in files[0] files = os.listdir(f"/var/log/ceph/nvmeof-{gw.name}") assert len(files) == 1 assert files[0] == "nvmeof-log" @@ -138,3 +145,11 @@ def test_log_files_disable_rotation(gateway): with open(f"/var/log/ceph/nvmeof-{gw.name}/nvmeof-log", mode="r") as f: check_for = f"Starting gateway {gw.name}" assert check_for in f.read() + +def test_no_spdk_log(gateway): + gw = gateway + files = glob.glob("/var/log/ceph/nvme*") + assert len(files) == 1 + assert f"nvmeof-{gw.name}" in files[0] + spdk_files = glob.glob("/var/log/ceph/spdk*") + assert len(spdk_files) == 0