From a83cbf2a0787d8b7fd1ff3f935f480a58285d316 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Berland?= Date: Thu, 28 Dec 2023 10:09:55 +0100 Subject: [PATCH] Fix cli error messages with Scheduler Alters the XML-file written in error conditions in runpath to be valid XML, both for Scheduler and for JobQueue. --- pyproject.toml | 2 ++ src/_ert_job_runner/reporting/file.py | 17 +++++++++++----- src/ert/job_queue/job_queue_node.py | 8 ++++++++ src/ert/scheduler/job.py | 20 ++++++++++++++++++- .../cli/test_integration_cli.py | 2 +- .../job_runner/test_file_reporter.py | 2 +- 6 files changed, 43 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5c42d6cdbf4..ad6d4b845b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ dependencies=[ "typing_extensions", "jinja2", "lark", + "lxml", "matplotlib", "numpy<2", "packaging", @@ -129,6 +130,7 @@ style = [ ] types = [ "mypy", + "types-lxml", "types-requests", "types-PyYAML", "types-python-dateutil", diff --git a/src/_ert_job_runner/reporting/file.py b/src/_ert_job_runner/reporting/file.py index 55cfdfa3cb2..ba57953ea9b 100644 --- a/src/_ert_job_runner/reporting/file.py +++ b/src/_ert_job_runner/reporting/file.py @@ -158,8 +158,6 @@ def _add_log_line(self, job): time_str = time.strftime(TIME_FORMAT, time.localtime()) f.write(f"{time_str} Calling: {job.job_data['executable']} {args}\n") - # This file will be read by the job_queue_node_fscanf_EXIT() function - # in job_queue.c. Be very careful with changes in output format. def _dump_error_file(self, job, error_msg): with append(ERROR_file) as file: file.write("\n") @@ -176,11 +174,20 @@ def _dump_error_file(self, job, error_msg): if stderr: stderr_file = os.path.join(os.getcwd(), job.std_err) else: - stderr = f"\n" + stderr = f"Not written by:{job.name()}\n" else: - stderr = f"\n" + stderr = f"stderr: Could not find file:{job.std_err}\n" else: - stderr = "\n" + stderr = "stderr: Not redirected\n" + + # Escape XML characters + stderr = ( + stderr.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace('"', """) + .replace("'", "'") + ) file.write(f" \n{stderr}\n") if stderr_file: diff --git a/src/ert/job_queue/job_queue_node.py b/src/ert/job_queue/job_queue_node.py index 69ec7ea62e9..218d00c1b09 100644 --- a/src/ert/job_queue/job_queue_node.py +++ b/src/ert/job_queue/job_queue_node.py @@ -300,6 +300,14 @@ def _handle_end_status( ) def _transition_to_failure(self, message: str) -> None: + # Parse XML entities: + message = ( + message.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace(""", '"') + .replace("'", "'") + ) logger.error(message) self._transition_status( thread_status=ThreadStatus.DONE, diff --git a/src/ert/scheduler/job.py b/src/ert/scheduler/job.py index d1d63912eb2..90caa69fc29 100644 --- a/src/ert/scheduler/job.py +++ b/src/ert/scheduler/job.py @@ -4,12 +4,15 @@ import logging import uuid from enum import Enum -from typing import TYPE_CHECKING, Optional +from pathlib import Path +from typing import TYPE_CHECKING, List, Optional from cloudevents.conversion import to_json from cloudevents.http import CloudEvent +from lxml import etree from ert.callbacks import forward_model_ok +from ert.constant_filenames import ERROR_file from ert.job_queue.queue import _queue_state_event_type from ert.load_status import LoadStatus from ert.scheduler.driver import Driver @@ -169,6 +172,7 @@ async def _handle_failure(self) -> None: f"failed after reaching max submit ({self._requested_max_submit}):" f"\n\t{self._callback_status_msg}" ) + log_info_from_exit_file(Path(self.real.run_arg.runpath) / ERROR_file) async def _send(self, state: State) -> None: self.state = state @@ -187,3 +191,17 @@ async def _send(self, state: State) -> None: }, ) await self._scheduler._events.put(to_json(event)) + + +def log_info_from_exit_file(exit_file_path: Path) -> None: + if not exit_file_path.exists(): + return + exit_file = etree.parse(exit_file_path) + filecontents: List[str] = [] + for element in ["job", "reason", "stderr_file", "stderr"]: + filecontents.append(str(exit_file.findtext(element))) + logger.error( + "job {} failed with: '{}'\n\tstderr file: '{}',\n\tits contents:{}".format( + *filecontents + ) + ) diff --git a/tests/integration_tests/cli/test_integration_cli.py b/tests/integration_tests/cli/test_integration_cli.py index 958123b35e9..982a672ac40 100644 --- a/tests/integration_tests/cli/test_integration_cli.py +++ b/tests/integration_tests/cli/test_integration_cli.py @@ -432,7 +432,7 @@ def test_that_prior_is_not_overwritten_in_ensemble_experiment( storage.close() -@pytest.mark.scheduler(skip=True) +@pytest.mark.scheduler() @pytest.mark.integration_test @pytest.mark.usefixtures("copy_poly_case", "try_queue_and_scheduler", "monkeypatch") def test_failing_job_cli_error_message(): diff --git a/tests/unit_tests/job_runner/test_file_reporter.py b/tests/unit_tests/job_runner/test_file_reporter.py index 40c21b3e46d..4e7e2a1a639 100644 --- a/tests/unit_tests/job_runner/test_file_reporter.py +++ b/tests/unit_tests/job_runner/test_file_reporter.py @@ -110,7 +110,7 @@ def test_report_with_failed_exit_message_argument(reporter): "massive_failure" in content ), "ERROR file missing reason" assert ( - "" in content + "stderr: Not redirected" in content ), "ERROR had invalid stderr information" with open(STATUS_json, "r", encoding="utf-8") as f: content = "".join(f.readlines())