From a8932daccad7ac2f9d7b3efb631cf7d6b4d82f9b Mon Sep 17 00:00:00 2001 From: bloodearnest Date: Fri, 10 Nov 2023 12:31:41 +0000 Subject: [PATCH] Perform level 4 files checks in local_run Because local_run doesn't have a level 4 directory configured, the checks were skipped. This change move that conditional to different location, so the checks are performed in local_run, and adds tests Partial fix to #675: --- jobrunner/executors/local.py | 36 +++++++++++++++------------ tests/test_local_executor.py | 48 +++++++++++++++++++++++------------- 2 files changed, 51 insertions(+), 33 deletions(-) diff --git a/jobrunner/executors/local.py b/jobrunner/executors/local.py index 71158ec5..ae71d997 100644 --- a/jobrunner/executors/local.py +++ b/jobrunner/executors/local.py @@ -487,13 +487,18 @@ def persist_outputs(job_definition, outputs, job_metadata): # Copy out medium privacy files medium_privacy_dir = get_medium_privacy_workspace(job_definition.workspace) - if medium_privacy_dir: - for filename, privacy_level in outputs.items(): - if privacy_level == "moderately_sensitive": - ok, job_msg, file_msg = check_l4_file( - job_definition, filename, sizes[filename], workspace_dir - ) + for filename, privacy_level in outputs.items(): + if privacy_level == "moderately_sensitive": + ok, job_msg, file_msg = check_l4_file( + job_definition, filename, sizes[filename], workspace_dir + ) + + if not ok: + excluded_files[filename] = job_msg + + # local run currently does not have a level 4 directory + if medium_privacy_dir: message_file = medium_privacy_dir / (filename + ".txt") if ok: @@ -503,19 +508,18 @@ def persist_outputs(job_definition, outputs, job_metadata): # if it previously had a too big notice, delete it delete_files_from_directory(medium_privacy_dir, [message_file]) else: - excluded_files[filename] = job_msg message_file.parent.mkdir(exist_ok=True, parents=True) message_file.write_text(file_msg) - # this can be removed once osrelease is dead - write_manifest_file( - medium_privacy_dir, - { - # this currently needs to exist, but is not used - "repo": None, - "workspace": job_definition.workspace, - }, - ) + # this can be removed once osrelease is dead + write_manifest_file( + medium_privacy_dir, + { + # this currently needs to exist, but is not used + "repo": None, + "workspace": job_definition.workspace, + }, + ) return excluded_files diff --git a/tests/test_local_executor.py b/tests/test_local_executor.py index f1713c0e..24472a19 100644 --- a/tests/test_local_executor.py +++ b/tests/test_local_executor.py @@ -464,9 +464,17 @@ def test_finalize_failed_oomkilled(docker_cleanup, job_definition, tmp_work_dir) assert workspace_log_file_exists(job_definition) +@pytest.fixture(params=[True, False]) +def local_run(request, monkeypatch): + # local_run does not have a level 4 configured + if request.param: + monkeypatch.setattr(config, "MEDIUM_PRIVACY_WORKSPACES_DIR", None) + return request.param + + @pytest.mark.needs_docker def test_finalize_large_level4_outputs( - docker_cleanup, job_definition, tmp_work_dir, volume_api + docker_cleanup, job_definition, tmp_work_dir, volume_api, local_run ): job_definition.args = [ "truncate", @@ -498,19 +506,21 @@ def test_finalize_large_level4_outputs( "output/output.txt": "File size of 1.0Mb is larger that limit of 0.5Mb.", } - level4_dir = local.get_medium_privacy_workspace(job_definition.workspace) - message_file = level4_dir / "output/output.txt.txt" - txt = message_file.read_text() - assert "output/output.txt" in txt - assert "1.0Mb" in txt - assert "0.5Mb" in txt - log_file = level4_dir / "metadata/action.log" + log_file = local.get_log_dir(job_definition) / "logs.txt" log = log_file.read_text() assert "Invalid moderately_sensitive outputs:" in log assert ( "output/output.txt - File size of 1.0Mb is larger that limit of 0.5Mb." in log ) + if not local_run: + level4_dir = local.get_medium_privacy_workspace(job_definition.workspace) + message_file = level4_dir / "output/output.txt.txt" + txt = message_file.read_text() + assert "output/output.txt" in txt + assert "1.0Mb" in txt + assert "0.5Mb" in txt + @pytest.mark.needs_docker def test_finalize_invalid_file_type(docker_cleanup, job_definition, tmp_work_dir): @@ -544,14 +554,16 @@ def test_finalize_invalid_file_type(docker_cleanup, job_definition, tmp_work_dir txt = message_file.read_text() assert "output/output.rds" in txt - log_file = level4_dir / "metadata/action.log" + log_file = local.get_log_dir(job_definition) / "logs.txt" log = log_file.read_text() assert "Invalid moderately_sensitive outputs:" in log assert "output/output.rds - File type of .rds is not valid level 4 file" in log @pytest.mark.needs_docker -def test_finalize_patient_id_header(docker_cleanup, job_definition, tmp_work_dir): +def test_finalize_patient_id_header( + docker_cleanup, job_definition, tmp_work_dir, local_run +): job_definition.args = [ "sh", "-c", @@ -581,17 +593,19 @@ def test_finalize_patient_id_header(docker_cleanup, job_definition, tmp_work_dir "output/output.csv": "File has patient_id column", } - level4_dir = local.get_medium_privacy_workspace(job_definition.workspace) - message_file = level4_dir / "output/output.csv.txt" - txt = message_file.read_text() - assert "output/output.csv" in txt - assert "patient_id" in txt - - log_file = level4_dir / "metadata/action.log" + log_file = local.get_log_dir(job_definition) / "logs.txt" log = log_file.read_text() assert "Invalid moderately_sensitive outputs:" in log assert "output/output.csv - File has patient_id column" in log + if not local_run: + level4_dir = local.get_medium_privacy_workspace(job_definition.workspace) + + message_file = level4_dir / "output/output.csv.txt" + txt = message_file.read_text() + assert "output/output.csv" in txt + assert "patient_id" in txt + @pytest.mark.needs_docker def test_finalize_large_level4_outputs_cleanup(