Skip to content

Commit

Permalink
Merge pull request #349 from opensafely-core/archive
Browse files Browse the repository at this point in the history
feat: handle workspace archiving
  • Loading branch information
bloodearnest authored Mar 8, 2022
2 parents 7f8e863 + f2b4cec commit b00f96f
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 0 deletions.
11 changes: 11 additions & 0 deletions jobrunner/executors/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
copy_local_workspace_to_volume,
ensure_overwritable,
get_container_metadata,
get_high_privacy_archive,
get_high_privacy_workspace,
get_log_dir,
get_medium_privacy_workspace,
Expand Down Expand Up @@ -59,6 +60,16 @@ def prepare(self, job):
if current.state != ExecutorState.UNKNOWN:
return current

# Check the workspace is not archived
workspace_dir = get_high_privacy_workspace(job.workspace)
if not workspace_dir.exists():
archive = get_high_privacy_archive(job.workspace)
if archive.exists():
return JobStatus(
ExecutorState.ERROR,
f"Workspace {job.workspace} has been archived. Contact the OpenSAFELY tech team to resolve",
)

# Check the image exists locally and error if not. Newer versions of
# docker-cli support `--pull=never` as an argument to `docker run` which
# would make this simpler, but it looks like it will be a while before this
Expand Down
5 changes: 5 additions & 0 deletions jobrunner/manage_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,11 @@ def get_high_privacy_workspace(workspace):
return config.HIGH_PRIVACY_WORKSPACES_DIR / workspace


def get_high_privacy_archive(workspace):
name = config.HIGH_PRIVACY_STORAGE_BASE / "archives" / workspace
return name.with_suffix(".tar.xz")


def get_medium_privacy_workspace(workspace):
if config.MEDIUM_PRIVACY_WORKSPACES_DIR:
return config.MEDIUM_PRIVACY_WORKSPACES_DIR / workspace
Expand Down
37 changes: 37 additions & 0 deletions scripts/archive.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash
set -euo pipefail

workspace=$1
workspace_dir=$HIGH_PRIVACY_STORAGE_BASE/workspaces/$workspace
archive=$HIGH_PRIVACY_STORAGE_BASE/archives/$workspace.tar.xz


if ! test -d "$workspace_dir"; then
if test -f "$archive"; then
echo "$workspace is already archived at $archive"
exit 1
else
echo "Directory $workspace_dir does not exist"
exit 1
fi
fi

index=$(mktemp)
tar --directory "$HIGH_PRIVACY_STORAGE_BASE/workspaces" --create --xz --verbose --file "$archive" "$workspace/" | tee "$index"


# compare the list of files we expect to check that the tar seems good.
if ! diff -u "$index" <(tar --list --file "$archive"); then
echo "$archive does not contain the expected list of files!"
echo "Exiting *without* deleting $workspace_dir"
exit 1
fi

read -p "$archive created. About to remove $workspace_dir directory. Are you sure? " -n 1 -r
if test "$REPLY" != "y"; then
echo "Not removing $workspace_dir"
exit 1
fi
echo

rm -rf "$workspace_dir"
31 changes: 31 additions & 0 deletions scripts/test_archive.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash
set -euo pipefail

HIGH_PRIVACY_STORAGE_BASE=$(mktemp -d)
trap 'rm -rf $HIGH_PRIVACY_STORAGE_BASE' EXIT

error () {
echo "$@"
exit 1
}

mkdir -p "$HIGH_PRIVACY_STORAGE_BASE/archives"
DIR="$HIGH_PRIVACY_STORAGE_BASE/workspaces/test-workspace"
mkdir -p "$DIR"

echo "foo" > "$HIGH_PRIVACY_STORAGE_BASE/workspaces/test-workspace/foo.txt"
echo "bar,baz" > "$HIGH_PRIVACY_STORAGE_BASE/workspaces/test-workspace/bar.csv"

export HIGH_PRIVACY_STORAGE_BASE

EXPECTED_ARCHIVE="$HIGH_PRIVACY_STORAGE_BASE/archives/test-workspace.tar.xz"

echo y | ./scripts/archive.sh test-workspace

test -f "$EXPECTED_ARCHIVE" || error "Could not find $EXPECTED_ARCHIVE"
test -d "$DIR" && error "$DIR still exists"

echo y | ./scripts/unarchive.sh test-workspace

test -f "$EXPECTED_ARCHIVE" && error "$EXPECTED_ARCHIVE still exists"
test -d "$DIR" || error "$DIR does not exist"
28 changes: 28 additions & 0 deletions scripts/unarchive.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash
set -euo pipefail

workspace=$1
workspace_dir=$HIGH_PRIVACY_STORAGE_BASE/workspaces/$workspace
archive=$HIGH_PRIVACY_STORAGE_BASE/archives/$workspace.tar.xz


if ! test -f "$archive"; then
if test -d "$workspace_dir"; then
echo "$workspace_dir already exists"
exit 1
else
echo "Archive file $archive does not exist"
exit 1
fi
fi

tar --directory "$HIGH_PRIVACY_STORAGE_BASE/workspaces" --extract --xz --verbose --file "$archive"

read -p "$workspace_dir created from $archive. About to remove $archive. Are you sure? " -n 1 -r
if test "$REPLY" != "y"; then
echo "Not removing $archive"
exit 1
fi
echo

rm -rf "$archive"
26 changes: 26 additions & 0 deletions tests/test_local_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from jobrunner.lib import docker
from jobrunner.manage_jobs import (
container_name,
get_high_privacy_archive,
get_high_privacy_workspace,
get_medium_privacy_workspace,
)
Expand Down Expand Up @@ -149,6 +150,31 @@ def test_prepare_no_image(use_api, docker_cleanup, test_repo):
assert job.image in status.message.lower()


@pytest.mark.needs_docker
def test_prepare_no_archived(use_api, docker_cleanup, test_repo):
job = JobDefinition(
id="test_prepare_no_image",
study=test_repo.study,
workspace="test",
action="action",
image="invalid-test-image",
args=["/usr/bin/true"],
env={},
inputs=["output/input.csv"],
output_spec={},
allow_database_access=False,
)

api = local.LocalDockerAPI()
archive = get_high_privacy_archive(job.workspace)
archive.parent.mkdir(parents=True)
archive.write_text("I exist")
status = api.prepare(job)

assert status.state == ExecutorState.ERROR
assert "has been archived"


@pytest.mark.needs_docker
def test_prepare_job_bad_commit(use_api, docker_cleanup, test_repo):
job = JobDefinition(
Expand Down

0 comments on commit b00f96f

Please sign in to comment.