Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: handle workspace archiving #349

Merged
merged 1 commit into from
Mar 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions jobrunner/executors/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
copy_local_workspace_to_volume,
ensure_overwritable,
get_container_metadata,
get_high_privacy_archive,
get_high_privacy_workspace,
get_log_dir,
get_medium_privacy_workspace,
Expand Down Expand Up @@ -59,6 +60,16 @@ def prepare(self, job):
if current.state != ExecutorState.UNKNOWN:
return current

# Check the workspace is not archived
workspace_dir = get_high_privacy_workspace(job.workspace)
if not workspace_dir.exists():
archive = get_high_privacy_archive(job.workspace)
if archive.exists():
return JobStatus(
ExecutorState.ERROR,
f"Workspace {job.workspace} has been archived. Contact the OpenSAFELY tech team to resolve",
)

# Check the image exists locally and error if not. Newer versions of
# docker-cli support `--pull=never` as an argument to `docker run` which
# would make this simpler, but it looks like it will be a while before this
Expand Down
5 changes: 5 additions & 0 deletions jobrunner/manage_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,11 @@ def get_high_privacy_workspace(workspace):
return config.HIGH_PRIVACY_WORKSPACES_DIR / workspace


def get_high_privacy_archive(workspace):
name = config.HIGH_PRIVACY_STORAGE_BASE / "archives" / workspace
return name.with_suffix(".tar.xz")


def get_medium_privacy_workspace(workspace):
if config.MEDIUM_PRIVACY_WORKSPACES_DIR:
return config.MEDIUM_PRIVACY_WORKSPACES_DIR / workspace
Expand Down
37 changes: 37 additions & 0 deletions scripts/archive.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash
set -euo pipefail

workspace=$1
workspace_dir=$HIGH_PRIVACY_STORAGE_BASE/workspaces/$workspace
archive=$HIGH_PRIVACY_STORAGE_BASE/archives/$workspace.tar.xz


if ! test -d "$workspace_dir"; then
if test -f "$archive"; then
echo "$workspace is already archived at $archive"
exit 1
else
echo "Directory $workspace_dir does not exist"
exit 1
fi
fi

index=$(mktemp)
tar --directory "$HIGH_PRIVACY_STORAGE_BASE/workspaces" --create --xz --verbose --file "$archive" "$workspace/" | tee "$index"


# compare the list of files we expect to check that the tar seems good.
if ! diff -u "$index" <(tar --list --file "$archive"); then
echo "$archive does not contain the expected list of files!"
echo "Exiting *without* deleting $workspace_dir"
exit 1
fi

read -p "$archive created. About to remove $workspace_dir directory. Are you sure? " -n 1 -r
if test "$REPLY" != "y"; then
echo "Not removing $workspace_dir"
exit 1
fi
echo

rm -rf "$workspace_dir"
31 changes: 31 additions & 0 deletions scripts/test_archive.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash
set -euo pipefail

HIGH_PRIVACY_STORAGE_BASE=$(mktemp -d)
trap 'rm -rf $HIGH_PRIVACY_STORAGE_BASE' EXIT

error () {
echo "$@"
exit 1
}

mkdir -p "$HIGH_PRIVACY_STORAGE_BASE/archives"
DIR="$HIGH_PRIVACY_STORAGE_BASE/workspaces/test-workspace"
mkdir -p "$DIR"

echo "foo" > "$HIGH_PRIVACY_STORAGE_BASE/workspaces/test-workspace/foo.txt"
echo "bar,baz" > "$HIGH_PRIVACY_STORAGE_BASE/workspaces/test-workspace/bar.csv"

export HIGH_PRIVACY_STORAGE_BASE

EXPECTED_ARCHIVE="$HIGH_PRIVACY_STORAGE_BASE/archives/test-workspace.tar.xz"

echo y | ./scripts/archive.sh test-workspace

test -f "$EXPECTED_ARCHIVE" || error "Could not find $EXPECTED_ARCHIVE"
test -d "$DIR" && error "$DIR still exists"

echo y | ./scripts/unarchive.sh test-workspace

test -f "$EXPECTED_ARCHIVE" && error "$EXPECTED_ARCHIVE still exists"
test -d "$DIR" || error "$DIR does not exist"
28 changes: 28 additions & 0 deletions scripts/unarchive.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash
set -euo pipefail

workspace=$1
workspace_dir=$HIGH_PRIVACY_STORAGE_BASE/workspaces/$workspace
archive=$HIGH_PRIVACY_STORAGE_BASE/archives/$workspace.tar.xz


if ! test -f "$archive"; then
if test -d "$workspace_dir"; then
echo "$workspace_dir already exists"
exit 1
else
echo "Archive file $archive does not exist"
exit 1
fi
fi

tar --directory "$HIGH_PRIVACY_STORAGE_BASE/workspaces" --extract --xz --verbose --file "$archive"

read -p "$workspace_dir created from $archive. About to remove $archive. Are you sure? " -n 1 -r
if test "$REPLY" != "y"; then
echo "Not removing $archive"
exit 1
fi
echo

rm -rf "$archive"
26 changes: 26 additions & 0 deletions tests/test_local_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from jobrunner.lib import docker
from jobrunner.manage_jobs import (
container_name,
get_high_privacy_archive,
get_high_privacy_workspace,
get_medium_privacy_workspace,
)
Expand Down Expand Up @@ -149,6 +150,31 @@ def test_prepare_no_image(use_api, docker_cleanup, test_repo):
assert job.image in status.message.lower()


@pytest.mark.needs_docker
def test_prepare_no_archived(use_api, docker_cleanup, test_repo):
job = JobDefinition(
id="test_prepare_no_image",
study=test_repo.study,
workspace="test",
action="action",
image="invalid-test-image",
args=["/usr/bin/true"],
env={},
inputs=["output/input.csv"],
output_spec={},
allow_database_access=False,
)

api = local.LocalDockerAPI()
archive = get_high_privacy_archive(job.workspace)
archive.parent.mkdir(parents=True)
archive.write_text("I exist")
status = api.prepare(job)

assert status.state == ExecutorState.ERROR
assert "has been archived"


@pytest.mark.needs_docker
def test_prepare_job_bad_commit(use_api, docker_cleanup, test_repo):
job = JobDefinition(
Expand Down