Skip to content

Commit

Permalink
Fix cli error messages with Scheduler
Browse files Browse the repository at this point in the history
Alters the XML-file written in error conditions in runpath to be valid
XML, both for Scheduler and for JobQueue.
  • Loading branch information
berland committed Jan 2, 2024
1 parent f662de3 commit a83cbf2
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 8 deletions.
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ dependencies=[
"typing_extensions",
"jinja2",
"lark",
"lxml",
"matplotlib",
"numpy<2",
"packaging",
Expand Down Expand Up @@ -129,6 +130,7 @@ style = [
]
types = [
"mypy",
"types-lxml",
"types-requests",
"types-PyYAML",
"types-python-dateutil",
Expand Down
17 changes: 12 additions & 5 deletions src/_ert_job_runner/reporting/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,6 @@ def _add_log_line(self, job):
time_str = time.strftime(TIME_FORMAT, time.localtime())
f.write(f"{time_str} Calling: {job.job_data['executable']} {args}\n")

# This file will be read by the job_queue_node_fscanf_EXIT() function
# in job_queue.c. Be very careful with changes in output format.
def _dump_error_file(self, job, error_msg):
with append(ERROR_file) as file:
file.write("<error>\n")
Expand All @@ -176,11 +174,20 @@ def _dump_error_file(self, job, error_msg):
if stderr:
stderr_file = os.path.join(os.getcwd(), job.std_err)
else:
stderr = f"<Not written by:{job.name()}>\n"
stderr = f"Not written by:{job.name()}\n"
else:
stderr = f"<stderr: Could not find file:{job.std_err}>\n"
stderr = f"stderr: Could not find file:{job.std_err}\n"
else:
stderr = "<stderr: Not redirected>\n"
stderr = "stderr: Not redirected\n"

# Escape XML characters
stderr = (
stderr.replace("&", "&amp;")
.replace("<", "&lt;")
.replace(">", "&gt;")
.replace('"', "&quot;")
.replace("'", "&apos;")
)

file.write(f" <stderr>\n{stderr}</stderr>\n")
if stderr_file:
Expand Down
8 changes: 8 additions & 0 deletions src/ert/job_queue/job_queue_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,14 @@ def _handle_end_status(
)

def _transition_to_failure(self, message: str) -> None:
# Parse XML entities:
message = (
message.replace("&amp;", "&")
.replace("&lt;", "<")
.replace("&gt;", ">")
.replace("&quot;", '"')
.replace("&apos;", "'")
)
logger.error(message)
self._transition_status(
thread_status=ThreadStatus.DONE,
Expand Down
20 changes: 19 additions & 1 deletion src/ert/scheduler/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
import logging
import uuid
from enum import Enum
from typing import TYPE_CHECKING, Optional
from pathlib import Path
from typing import TYPE_CHECKING, List, Optional

from cloudevents.conversion import to_json
from cloudevents.http import CloudEvent
from lxml import etree

from ert.callbacks import forward_model_ok
from ert.constant_filenames import ERROR_file
from ert.job_queue.queue import _queue_state_event_type
from ert.load_status import LoadStatus
from ert.scheduler.driver import Driver
Expand Down Expand Up @@ -169,6 +172,7 @@ async def _handle_failure(self) -> None:
f"failed after reaching max submit ({self._requested_max_submit}):"
f"\n\t{self._callback_status_msg}"
)
log_info_from_exit_file(Path(self.real.run_arg.runpath) / ERROR_file)

async def _send(self, state: State) -> None:
self.state = state
Expand All @@ -187,3 +191,17 @@ async def _send(self, state: State) -> None:
},
)
await self._scheduler._events.put(to_json(event))


def log_info_from_exit_file(exit_file_path: Path) -> None:
if not exit_file_path.exists():
return
exit_file = etree.parse(exit_file_path)
filecontents: List[str] = []
for element in ["job", "reason", "stderr_file", "stderr"]:
filecontents.append(str(exit_file.findtext(element)))
logger.error(
"job {} failed with: '{}'\n\tstderr file: '{}',\n\tits contents:{}".format(
*filecontents
)
)
2 changes: 1 addition & 1 deletion tests/integration_tests/cli/test_integration_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ def test_that_prior_is_not_overwritten_in_ensemble_experiment(
storage.close()


@pytest.mark.scheduler(skip=True)
@pytest.mark.scheduler()
@pytest.mark.integration_test
@pytest.mark.usefixtures("copy_poly_case", "try_queue_and_scheduler", "monkeypatch")
def test_failing_job_cli_error_message():
Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests/job_runner/test_file_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def test_report_with_failed_exit_message_argument(reporter):
"<reason>massive_failure</reason>" in content
), "ERROR file missing reason"
assert (
"<stderr: Not redirected>" in content
"stderr: Not redirected" in content
), "ERROR had invalid stderr information"
with open(STATUS_json, "r", encoding="utf-8") as f:
content = "".join(f.readlines())
Expand Down

0 comments on commit a83cbf2

Please sign in to comment.