Skip to content

Commit

Permalink
Perform level 4 files checks in local_run
Browse files Browse the repository at this point in the history
Because local_run doesn't have a level 4 directory configured, the
checks were skipped.

This change move that conditional to different location, so the checks
are performed in local_run, and adds tests

Partial fix to #675:
  • Loading branch information
bloodearnest committed Nov 10, 2023
1 parent 699ecd2 commit a8932da
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 33 deletions.
36 changes: 20 additions & 16 deletions jobrunner/executors/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,13 +487,18 @@ def persist_outputs(job_definition, outputs, job_metadata):

# Copy out medium privacy files
medium_privacy_dir = get_medium_privacy_workspace(job_definition.workspace)
if medium_privacy_dir:
for filename, privacy_level in outputs.items():
if privacy_level == "moderately_sensitive":
ok, job_msg, file_msg = check_l4_file(
job_definition, filename, sizes[filename], workspace_dir
)

for filename, privacy_level in outputs.items():
if privacy_level == "moderately_sensitive":
ok, job_msg, file_msg = check_l4_file(
job_definition, filename, sizes[filename], workspace_dir
)

if not ok:
excluded_files[filename] = job_msg

# local run currently does not have a level 4 directory
if medium_privacy_dir:
message_file = medium_privacy_dir / (filename + ".txt")

if ok:
Expand All @@ -503,19 +508,18 @@ def persist_outputs(job_definition, outputs, job_metadata):
# if it previously had a too big notice, delete it
delete_files_from_directory(medium_privacy_dir, [message_file])
else:
excluded_files[filename] = job_msg
message_file.parent.mkdir(exist_ok=True, parents=True)
message_file.write_text(file_msg)

# this can be removed once osrelease is dead
write_manifest_file(
medium_privacy_dir,
{
# this currently needs to exist, but is not used
"repo": None,
"workspace": job_definition.workspace,
},
)
# this can be removed once osrelease is dead
write_manifest_file(
medium_privacy_dir,
{
# this currently needs to exist, but is not used
"repo": None,
"workspace": job_definition.workspace,
},
)

return excluded_files

Expand Down
48 changes: 31 additions & 17 deletions tests/test_local_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,9 +464,17 @@ def test_finalize_failed_oomkilled(docker_cleanup, job_definition, tmp_work_dir)
assert workspace_log_file_exists(job_definition)


@pytest.fixture(params=[True, False])
def local_run(request, monkeypatch):
# local_run does not have a level 4 configured
if request.param:
monkeypatch.setattr(config, "MEDIUM_PRIVACY_WORKSPACES_DIR", None)
return request.param


@pytest.mark.needs_docker
def test_finalize_large_level4_outputs(
docker_cleanup, job_definition, tmp_work_dir, volume_api
docker_cleanup, job_definition, tmp_work_dir, volume_api, local_run
):
job_definition.args = [
"truncate",
Expand Down Expand Up @@ -498,19 +506,21 @@ def test_finalize_large_level4_outputs(
"output/output.txt": "File size of 1.0Mb is larger that limit of 0.5Mb.",
}

level4_dir = local.get_medium_privacy_workspace(job_definition.workspace)
message_file = level4_dir / "output/output.txt.txt"
txt = message_file.read_text()
assert "output/output.txt" in txt
assert "1.0Mb" in txt
assert "0.5Mb" in txt
log_file = level4_dir / "metadata/action.log"
log_file = local.get_log_dir(job_definition) / "logs.txt"
log = log_file.read_text()
assert "Invalid moderately_sensitive outputs:" in log
assert (
"output/output.txt - File size of 1.0Mb is larger that limit of 0.5Mb." in log
)

if not local_run:
level4_dir = local.get_medium_privacy_workspace(job_definition.workspace)
message_file = level4_dir / "output/output.txt.txt"
txt = message_file.read_text()
assert "output/output.txt" in txt
assert "1.0Mb" in txt
assert "0.5Mb" in txt


@pytest.mark.needs_docker
def test_finalize_invalid_file_type(docker_cleanup, job_definition, tmp_work_dir):
Expand Down Expand Up @@ -544,14 +554,16 @@ def test_finalize_invalid_file_type(docker_cleanup, job_definition, tmp_work_dir
txt = message_file.read_text()
assert "output/output.rds" in txt

log_file = level4_dir / "metadata/action.log"
log_file = local.get_log_dir(job_definition) / "logs.txt"
log = log_file.read_text()
assert "Invalid moderately_sensitive outputs:" in log
assert "output/output.rds - File type of .rds is not valid level 4 file" in log


@pytest.mark.needs_docker
def test_finalize_patient_id_header(docker_cleanup, job_definition, tmp_work_dir):
def test_finalize_patient_id_header(
docker_cleanup, job_definition, tmp_work_dir, local_run
):
job_definition.args = [
"sh",
"-c",
Expand Down Expand Up @@ -581,17 +593,19 @@ def test_finalize_patient_id_header(docker_cleanup, job_definition, tmp_work_dir
"output/output.csv": "File has patient_id column",
}

level4_dir = local.get_medium_privacy_workspace(job_definition.workspace)
message_file = level4_dir / "output/output.csv.txt"
txt = message_file.read_text()
assert "output/output.csv" in txt
assert "patient_id" in txt

log_file = level4_dir / "metadata/action.log"
log_file = local.get_log_dir(job_definition) / "logs.txt"
log = log_file.read_text()
assert "Invalid moderately_sensitive outputs:" in log
assert "output/output.csv - File has patient_id column" in log

if not local_run:
level4_dir = local.get_medium_privacy_workspace(job_definition.workspace)

message_file = level4_dir / "output/output.csv.txt"
txt = message_file.read_text()
assert "output/output.csv" in txt
assert "patient_id" in txt


@pytest.mark.needs_docker
def test_finalize_large_level4_outputs_cleanup(
Expand Down

0 comments on commit a8932da

Please sign in to comment.