opensafely-core · bloodearnest · Oct 3, 2024 · Sep 26, 2024 · Sep 27, 2024 · Sep 27, 2024
diff --git a/docker/.gitignore b/docker/.gitignore
@@ -0,0 +1,2 @@
+medium/
+high/
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -7,18 +7,12 @@
 # the latest base image, by design.
 #
 # hadolint ignore=DL3007
-FROM ghcr.io/opensafely-core/base-docker:latest as base-python
+FROM ghcr.io/opensafely-core/base-docker:22.04 as base-python
 
 # we are going to use an apt cache on the host, so disable the default debian
 # docker clean up that deletes that cache on every apt install
 RUN rm -f /etc/apt/apt.conf.d/docker-clean
 
-# preemptively create a docker group using specific gid, before docker is
-# installed. This allows us to set the host docker gid to the same thing in
-# systems we control, and then we can just mount /var/run/docker.sock into the
-# container.
-RUN groupadd --gid 10010 docker
-
 # ensure fully working base python3 installation
 # see: https://gist.github.com/tiran/2dec9e03c6f901814f6d1e8dad09528e
 # use space efficient utility from base image
@@ -81,13 +75,6 @@ RUN --mount=type=cache,target=/root/.cache \
 #    version of the code included when the prod image was built.
 FROM base-python as job-runner-base
 
-# Create a non-root opensafely user to run the app as
-# We currently use host bind mounts in production backends, which means if we
-# need to match the uid inside the container to the one on the host.  To do
-# this, we reserve uid/gid 10000 to use as our running user on the host.
-# Note: we are currently not running as this user, but we will in future.
-RUN useradd --create-home --user-group --uid 10000 -G docker opensafely
-
 # copy venv over from builder image. These will have root:root ownership, but
 # are readable by all.
 COPY --from=builder /opt/venv /opt/venv
@@ -105,11 +92,6 @@ CMD ["/opt/venv/bin/python", "-m", "jobrunner.service"]
 # This may not be necessary, but it probably doesn't hurt
 ENV PYTHONPATH=/app
 
-# We are not ready to do this step yet
-# switch to running as the user
-# USER opensafely
-
-
 ##################################################
 #
 # Production image
@@ -145,9 +127,6 @@ LABEL org.opencontainers.image.revision=$GITREF
 #
 FROM job-runner-base as job-runner-dev
 
-# switch back to root to run the install of dev requirements.txt
-#USER root
-
 # TODO: its possible python dev dependencies might need some additional build packages installed?
 
 # install development requirements
@@ -156,21 +135,3 @@ COPY requirements.dev.txt /tmp/requirements.dev.txt
 # hadolint ignore=DL3042
 RUN --mount=type=cache,target=/root/.cache \
     python -m pip install --requirement /tmp/requirements.dev.txt
-
-# modify container docker gid to match host
-ARG DOCKER_HOST_GROUPID
-RUN groupmod -g $DOCKER_HOST_GROUPID docker
-
-# in dev, ensure opensafely uid matches host user id
-ARG DEV_USERID=1000
-ARG DEV_GROUPID=1000
-RUN usermod -u $DEV_USERID opensafely
-# Modify opensafely only if group id does not already exist. We run dev
-# containers with an explicit group id anyway, so file permissions on the host
-# will be correct, and we do not actually rely on named opensafely group access to
-# anything.
-RUN grep -q ":$DEV_GROUPID:" /etc/group || groupmod -g $DEV_GROUPID opensafely
-
-
-# switch back to opensafely
-#USER opensafely
diff --git a/docker/dependencies.txt b/docker/dependencies.txt
@@ -2,4 +2,3 @@
 docker.io
 sqlite3
 git
-openssh-client
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
@@ -1,6 +1,8 @@
 # note: this docker-compose file is intended to define the *building* of
 # job-runner images, and running them locally in dev, not for running them in
 # production
+name: job-runner
+
 services:
   prod:
     # image name, both locally and public
@@ -23,25 +25,40 @@ services:
         - GITREF
     # use dockers builitin PID daemon
     init: true
-    # paths relative to docker-compose.yaml
+    # run as the the current user to preserve file permissions
+    user: "${DEV_USERID:-1000}:${DEV_GROUPID:-1000}"
+    # run with additional group docker (the *hosts* gid for docker)
+    group_add: ["${DOCKER_HOST_GROUPID}"]
+    # paths are relative to docker-compose.yaml
     volumes:
-      # use the default dev workdir
-      - ../workdir:/workdir
-      # used to configure ssh access for docker
-      - ./ssh:/home/opensafely/.ssh
       # docker control
       - /var/run/docker.sock:/var/run/docker.sock
+      # use the default dev workdir
+      - ../workdir:/workdir
+      # used to avoid ugly uid/gid lookup failures, but not strictly required
+      - /etc/group:/etc/group:ro
+      - /etc/passwd:/etc/passwd:ro
+      # these paths must be absolute on the host, and must be mount to the
+      # identical absolute paths inside the container. This allows for
+      # job-runner to use the same value for both its own file access, and for
+      # what to pass to the host docker as a bindmount source directory, which
+      # needs to be on the host. 
+      # We can potentially fix this by adding an explicit bind mount source configs, if we want to
+      - ${MEDIUM_PRIVACY_STORAGE_BASE}:${MEDIUM_PRIVACY_STORAGE_BASE}
+      - ${HIGH_PRIVACY_STORAGE_BASE}:${HIGH_PRIVACY_STORAGE_BASE}
     # paths relative to docker-compose.yaml file
     env_file:
+      # load jobrunner local dev config
       - ../.env
-      - docker-compose.env
+      # explicitly load local .env after parent dir .env
+      - .env
     # ensure WORKDIR environment points to fixed location
     environment:
       # default dev config
       WORKDIR: /workdir
-      MEDIUM_PRIVACY_STORAGE_BASE: /workdir/workspaces
-      HIGH_PRIVACY_STORAGE_BASE: /workdir/workspaces
-      DOCKER_HOST: ${DOCKER_HOST:-unix:///var/run/docker.sock}
+      # force using mounted docker socket
+      DOCKER_HOST: unix:///var/run/docker.sock
+      # used to enable in CI
       PRIVATE_REPO_TOKEN: ${PRIVATE_REPO_TOKEN:-}
 
   # main development service
@@ -50,20 +67,9 @@ services:
         service: prod
     image: job-runner-dev
     container_name: job-runner-dev
-    # running as a specific uid/gid allows files written to mounted volumes by
-    # the docker container's default user to match the host user's uid/gid, for
-    # convienience.
-    user: ${DEV_USERID:-1000}:${DEV_GROUPID:-1000}
-    # also run with additional group docker
-    group_add: [docker]
     build:
       # the dev stage in the Dockerfile
       target: job-runner-dev
-      # pass the uid/gid as build arg
-      args:
-        - DEV_USERID=${DEV_USERID:-1000}
-        - DEV_GROUPID=${DEV_GROUPID:-1000}
-        - DOCKER_HOST_GROUPID=${DOCKER_HOST_GROUPID}
     # Some tricks are needed here to be able to test the BindMountVolumeAPI
     # when running inside docker, as we need the volumes to be mountable by the
     # host docker. Our pytest fixtures create the directories in /tmp, so we

diff --git a/docker/justfile b/docker/justfile
@@ -1,35 +1,13 @@
 set dotenv-load := true
 
-export DOCKER_USERID := `id -u`
-export DOCKER_GROUPID := `id -g`
 export PYTEST_HOST_TMP := "/tmp/jobrunner-docker"
-export SSH_KEY := "ssh/id_jobrunner_dev"
 
-# used to identify the dev ssh key we generate
-
-export SSH_COMMENT := "local jobrunner dev key"
-
-# what is the hosts ip from docker containers POV?
-
-export SSH_HOST := `docker network inspect bridge --format='{{(index .IPAM.Config 0).Gateway}}'`
-
-# the dev build needs to know the hosts docker group ID, to match it in the container
-
-export DOCKER_HOST_GROUPID := `getent group docker | awk -F: '{print $3}'`
-
-# dev builds remap's appuser's uid to the running user, for easy file
-# permissions when we mount things in.
-
-export DEV_USERID := `id -u`
-export DEV_GROUPID := `id -g`
-
-build env="dev": tmpdir
+build env="dev": tmpdir docker-compose-env
     #!/usr/bin/env bash
     set -eu
 
-    # ensure env files exist
+    # ensure parent env files exist
     test -f ../.env || cp ../dotenv-sample ../.env
-    touch docker-compose.env
 
     # enable modern docker build features
     export DOCKER_BUILDKIT=1
@@ -50,17 +28,50 @@ tmpdir:
     fi
     mkdir $PYTEST_HOST_TMP
 
+docker-compose-env:
+    #!/bin/bash
+    mkdir -p {medium,high}
+    test -f .env && exit 0
+    cat <<EOF > .env
+    DEV_USERID=$(id -u)
+    DEV_GROUPID=$(id -g)
+    DOCKER_HOST_GROUPID=$(getent group docker | awk -F: '{print $3}')
+    PYTEST_HOST_TMP=$PYTEST_HOST_TMP
+    MEDIUM_PRIVACY_STORAGE_BASE=$(realpath $PWD/medium)
+    HIGH_PRIVACY_STORAGE_BASE=$(realpath $PWD/high)
+    EOF
+
 # run tests in docker container
 test *args: build
     #!/bin/bash
     docker compose run --rm test {{ args }}
 
+functional-test:
+    #!/bin/bash
+    container=job-runner-prod-1
+    trap 'docker compose kill prod' EXIT
+    docker compose up -d prod
+    docker compose exec prod python3 -m jobrunner.cli.add_job https://github.com/opensafely/research-template generate_dataset
+    attempts=""
+    # use docker logs reather than docker compose logs, as those will include previous run's logs 
+    while ! docker logs -n 10 $container |& grep -q "Completed successfully";
+    do
+        if test "$attempts" = ".........."; then
+            docker compose logs prod
+            exit 1;
+        fi
+        attempts="${attempts}."
+        echo "waiting..."
+        sleep 1
+    done
+    docker logs -n 10 $container |& grep "Completed successfully"
+
 # run dev server in docker container
 service: build
     docker compose up dev
 
 # run command in dev container
-run env="dev" *args="bash": build
+run env="dev" *args="": build
     docker compose run --rm {{ env }} {{ args }}
 
 # exec command in existing dev container
@@ -69,34 +80,5 @@ exec *args="bash":
 
 clean:
     #!/bin/bash
-    rm -rf $PYTEST_HOST_TMP
+    rm -rf $PYTEST_HOST_TMP .env
     docker image rm job-runner job-runner-dev || true
-    # clean up local ssh config
-    rm -f ssh/known_hosts $SSH_KEY*
-    sed -i '/$(SSH_COMMENT)/d' ~/.ssh/authorized_keys
-
-# setup dev ssh key and config
-setup-ssh:
-    #!/bin/bash -x
-    test -f "$SSH_KEY" && { echo "ssh key aleady set up"; exit 0; }
-
-    # create known_hosts file so ssh works without prompting
-    ssh-keyscan $SSH_HOST > ssh/known_hosts
-
-    # create the key
-    ssh-keygen -t ed25519 -N '' -C "$SSH_COMMENT" -f "$SSH_KEY"
-
-    # this is a little unpleasent, modifying the users authorizesd keys
-    # we do use the comment in clean command  to clean it up automatically though.
-    grep -q "$(cat $SSH_KEY.pub)" ~/.ssh/authorized_keys || cat $SSH_KEY.pub >> ~/.ssh/authorized_keys
-
-    # quick test to fail early if ssh doesn't work for some reason
-    ssh -i $SSH_KEY -o UserKnownHostsFile=ssh/known_hosts $USER@$SSH_HOST true || { echo "Failed to ssh into $SSH_HOST"; tail /var/log/auth.log; exit 1; }
-
-# enable using ssh to talk to docker on host
-enable-ssh: setup-ssh
-    echo "DOCKER_HOST=ssh://$USER@$SSH_HOST" > docker-compose.env
-
-# disable using ssh to talk to docker on host
-disable-ssh:
-    rm docker-compose.env
diff --git a/docker/ssh/config b/docker/ssh/config
diff --git a/jobrunner/executors/local.py b/jobrunner/executors/local.py
@@ -175,7 +175,9 @@ def execute(self, job_definition):
                 label=LABEL,
                 labels=get_job_labels(job_definition),
                 extra_args=extra_args,
+                volume_type=volume_api.volume_type,
             )
+
         except Exception as exc:
             return JobStatus(
                 ExecutorState.ERROR, f"Failed to start docker container: {exc}"

diff --git a/jobrunner/executors/volumes.py b/jobrunner/executors/volumes.py
@@ -37,6 +37,7 @@ class DockerVolumeAPI:
     # don't run with UIDs for now. We maybe be able to support this in future.
     requires_root = True
     supported_platforms = ("linux", "win32", "darwin")
+    volume_type = "volume"  # https://docs.docker.com/engine/storage/volumes/
 
     def volume_name(job):
         return docker_volume_name(job)
@@ -93,6 +94,7 @@ class BindMountVolumeAPI:
     # Only works running jobs with uid:gid
     requires_root = False
     supported_platforms = ("linux",)
+    volume_type = "bind"  # https://docs.docker.com/engine/storage/bind-mounts/
 
     def volume_name(job):
         """Return the absolute path to the volume directory.

diff --git a/jobrunner/lib/docker.py b/jobrunner/lib/docker.py
@@ -405,6 +405,7 @@ def run(
     label=None,
     labels=None,
     extra_args=None,
+    volume_type="volume",
 ):
     run_args = ["run", "--init", "--detach", "--label", LABEL, "--name", name]
     if extra_args is not None:
@@ -413,7 +414,9 @@ def run(
     if not allow_network_access:
         run_args.extend(["--network", "none"])
     if volume:
-        run_args.extend(["--volume", f"{volume[0]}:{volume[1]}"])
+        run_args.extend(
+            ["--mount", f"type={volume_type},source={volume[0]},target={volume[1]}"]
+        )
     # These lables are in addition to the default LABEL which is always applied
     # Single unary label
     if label is not None:
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,4 +2,3 @@ @@
     docker.io
     sqlite3
     git
-    openssh-client