Skip to content

Commit

Permalink
Merge pull request #667 from opensafely-core/level-4-check-fixes
Browse files Browse the repository at this point in the history
level 4 check fixes
  • Loading branch information
bloodearnest authored Oct 13, 2023
2 parents f99328e + 261eb6f commit 5719264
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 10 deletions.
1 change: 1 addition & 0 deletions jobrunner/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def database_urls_from_env(env):
".html",
".pdf",
".txt",
".log",
".json",
]

Expand Down
18 changes: 13 additions & 5 deletions jobrunner/executors/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,8 +415,10 @@ def finalize_job(job_definition):
if job_definition.cancelled:
write_job_logs(job_definition, job_metadata, copy_log_to_workspace=False)
else:
write_job_logs(job_definition, job_metadata, copy_log_to_workspace=True)
excluded = persist_outputs(job_definition, results.outputs, job_metadata)
write_job_logs(
job_definition, job_metadata, copy_log_to_workspace=True, excluded=excluded
)
results.level4_excluded_files.update(**excluded)

RESULTS[job_definition.id] = results
Expand All @@ -443,11 +445,13 @@ def get_job_metadata(job_definition, outputs, container_metadata):
return job_metadata


def write_job_logs(job_definition, job_metadata, copy_log_to_workspace=True):
def write_job_logs(
job_definition, job_metadata, copy_log_to_workspace=True, excluded=None
):
"""Copy logs to log dir and workspace."""
# Dump useful info in log directory
log_dir = get_log_dir(job_definition)
write_log_file(job_definition, job_metadata, log_dir / "logs.txt")
write_log_file(job_definition, job_metadata, log_dir / "logs.txt", excluded)
with open(log_dir / "metadata.json", "w") as f:
json.dump(job_metadata, f, indent=2)

Expand Down Expand Up @@ -643,7 +647,7 @@ def get_unmatched_outputs(job_definition, outputs):
return [filename for filename in all_outputs if filename not in outputs]


def write_log_file(job_definition, job_metadata, filename):
def write_log_file(job_definition, job_metadata, filename, excluded):
"""
This dumps the (timestamped) Docker logs for a job to disk, followed by
some useful metadata about the job and its outputs
Expand All @@ -659,6 +663,10 @@ def write_log_file(job_definition, job_metadata, filename):
f.write(f"{key}: {job_metadata[key]}\n")
f.write("\noutputs:\n")
f.write(tabulate(outputs, separator=" - ", indent=2, empty="(no outputs)"))
if excluded:
f.write("\nexcluded files:\n")
for excluded_file, msg in excluded.items():
f.write(f"{excluded_file}: {msg}")


# Keys of fields to log in manifest.json and log file
Expand Down Expand Up @@ -776,7 +784,7 @@ def redact_environment_variables(container_metadata):

def write_manifest_file(workspace_dir, manifest):
manifest_file = workspace_dir / METADATA_DIR / MANIFEST_FILE
manifest_file.parent.mkdir(exist_ok=True)
manifest_file.parent.mkdir(exist_ok=True, parents=True)
manifest_file_tmp = manifest_file.with_suffix(".tmp")
manifest_file_tmp.write_text(json.dumps(manifest, indent=2))
manifest_file_tmp.replace(manifest_file)
Expand Down
5 changes: 1 addition & 4 deletions jobrunner/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,10 +466,7 @@ def save_results(job, job_definition, results):
message = "Completed successfully"

if results.level4_excluded_files:
files = "\n".join(
f"{f}: {msg}" for f, msg in results.level4_excluded_files.items()
)
message += f", but {len(results.level4_excluded_files)} file(s) marked as moderately_sensitive were excluded:\n{files}"
message += f", but {len(results.level4_excluded_files)} file(s) marked as moderately_sensitive were excluded. See job log for details."

set_code(job, code, message, error=error, results=results)

Expand Down
14 changes: 14 additions & 0 deletions tests/test_local_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,10 @@ def test_finalize_large_level4_outputs(
assert "output/output.txt" in txt
assert "1.0Mb" in txt
assert "0.5Mb" in txt
log_file = level4_dir / "metadata/action.log"
log = log_file.read_text()
assert "excluded files:" in log
assert "output/output.txt: File size of 1.0Mb is larger that limit of 0.5Mb." in log


@pytest.mark.needs_docker
Expand Down Expand Up @@ -547,6 +551,11 @@ def test_finalize_invalid_file_type(docker_cleanup, job_definition, tmp_work_dir
txt = message_file.read_text()
assert "output/output.rds" in txt

log_file = level4_dir / "metadata/action.log"
log = log_file.read_text()
assert "excluded files:" in log
assert "output/output.rds: File type of .rds is not valid level 4 file" in log


@pytest.mark.needs_docker
def test_finalize_patient_id_header(docker_cleanup, job_definition, tmp_work_dir):
Expand Down Expand Up @@ -585,6 +594,11 @@ def test_finalize_patient_id_header(docker_cleanup, job_definition, tmp_work_dir
assert "output/output.csv" in txt
assert "patient_id" in txt

log_file = level4_dir / "metadata/action.log"
log = log_file.read_text()
assert "excluded files:" in log
assert "output/output.csv: File has patient_id column" in log


@pytest.mark.needs_docker
def test_finalize_large_level4_outputs_cleanup(
Expand Down
2 changes: 1 addition & 1 deletion tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,7 @@ def test_handle_job_finalized_success_with_large_file(db):
assert job.state == State.SUCCEEDED
assert "Completed successfully" in job.status_message
assert "were excluded" in job.status_message
assert "output/output.csv: too big" in job.status_message
assert "output/output.csv: too big" not in job.status_message


@pytest.mark.parametrize(
Expand Down

0 comments on commit 5719264

Please sign in to comment.