Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix docker image to actually work #742

Merged
merged 5 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docker/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
medium/
high/
41 changes: 1 addition & 40 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,12 @@
# the latest base image, by design.
#
# hadolint ignore=DL3007
FROM ghcr.io/opensafely-core/base-docker:latest as base-python
FROM ghcr.io/opensafely-core/base-docker:22.04 as base-python

# we are going to use an apt cache on the host, so disable the default debian
# docker clean up that deletes that cache on every apt install
RUN rm -f /etc/apt/apt.conf.d/docker-clean

# preemptively create a docker group using specific gid, before docker is
# installed. This allows us to set the host docker gid to the same thing in
# systems we control, and then we can just mount /var/run/docker.sock into the
# container.
RUN groupadd --gid 10010 docker

# ensure fully working base python3 installation
# see: https://gist.github.com/tiran/2dec9e03c6f901814f6d1e8dad09528e
# use space efficient utility from base image
Expand Down Expand Up @@ -81,13 +75,6 @@ RUN --mount=type=cache,target=/root/.cache \
# version of the code included when the prod image was built.
FROM base-python as job-runner-base

# Create a non-root opensafely user to run the app as
# We currently use host bind mounts in production backends, which means if we
# need to match the uid inside the container to the one on the host. To do
# this, we reserve uid/gid 10000 to use as our running user on the host.
# Note: we are currently not running as this user, but we will in future.
RUN useradd --create-home --user-group --uid 10000 -G docker opensafely

# copy venv over from builder image. These will have root:root ownership, but
# are readable by all.
COPY --from=builder /opt/venv /opt/venv
Expand All @@ -105,11 +92,6 @@ CMD ["/opt/venv/bin/python", "-m", "jobrunner.service"]
# This may not be necessary, but it probably doesn't hurt
ENV PYTHONPATH=/app

# We are not ready to do this step yet
# switch to running as the user
# USER opensafely


##################################################
#
# Production image
Expand Down Expand Up @@ -145,9 +127,6 @@ LABEL org.opencontainers.image.revision=$GITREF
#
FROM job-runner-base as job-runner-dev

# switch back to root to run the install of dev requirements.txt
#USER root

# TODO: its possible python dev dependencies might need some additional build packages installed?

# install development requirements
Expand All @@ -156,21 +135,3 @@ COPY requirements.dev.txt /tmp/requirements.dev.txt
# hadolint ignore=DL3042
RUN --mount=type=cache,target=/root/.cache \
python -m pip install --requirement /tmp/requirements.dev.txt

# modify container docker gid to match host
ARG DOCKER_HOST_GROUPID
RUN groupmod -g $DOCKER_HOST_GROUPID docker

# in dev, ensure opensafely uid matches host user id
ARG DEV_USERID=1000
ARG DEV_GROUPID=1000
RUN usermod -u $DEV_USERID opensafely
# Modify opensafely only if group id does not already exist. We run dev
# containers with an explicit group id anyway, so file permissions on the host
# will be correct, and we do not actually rely on named opensafely group access to
# anything.
RUN grep -q ":$DEV_GROUPID:" /etc/group || groupmod -g $DEV_GROUPID opensafely


# switch back to opensafely
#USER opensafely
1 change: 0 additions & 1 deletion docker/dependencies.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@
docker.io
sqlite3
git
openssh-client
46 changes: 26 additions & 20 deletions docker/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# note: this docker-compose file is intended to define the *building* of
# job-runner images, and running them locally in dev, not for running them in
# production
name: job-runner

services:
prod:
# image name, both locally and public
Expand All @@ -23,25 +25,40 @@ services:
- GITREF
# use dockers builitin PID daemon
init: true
# paths relative to docker-compose.yaml
# run as the the current user to preserve file permissions
user: "${DEV_USERID:-1000}:${DEV_GROUPID:-1000}"
# run with additional group docker (the *hosts* gid for docker)
group_add: ["${DOCKER_HOST_GROUPID}"]
bloodearnest marked this conversation as resolved.
Show resolved Hide resolved
# paths are relative to docker-compose.yaml
volumes:
# use the default dev workdir
- ../workdir:/workdir
# used to configure ssh access for docker
- ./ssh:/home/opensafely/.ssh
# docker control
- /var/run/docker.sock:/var/run/docker.sock
# use the default dev workdir
- ../workdir:/workdir
# used to avoid ugly uid/gid lookup failures, but not strictly required
- /etc/group:/etc/group:ro
- /etc/passwd:/etc/passwd:ro
# these paths must be absolute on the host, and must be mount to the
# identical absolute paths inside the container. This allows for
# job-runner to use the same value for both its own file access, and for
# what to pass to the host docker as a bindmount source directory, which
# needs to be on the host.
# We can potentially fix this by adding an explicit bind mount source configs, if we want to
bloodearnest marked this conversation as resolved.
Show resolved Hide resolved
- ${MEDIUM_PRIVACY_STORAGE_BASE}:${MEDIUM_PRIVACY_STORAGE_BASE}
- ${HIGH_PRIVACY_STORAGE_BASE}:${HIGH_PRIVACY_STORAGE_BASE}
# paths relative to docker-compose.yaml file
env_file:
# load jobrunner local dev config
- ../.env
- docker-compose.env
# explicitly load local .env after parent dir .env
- .env
# ensure WORKDIR environment points to fixed location
environment:
# default dev config
WORKDIR: /workdir
MEDIUM_PRIVACY_STORAGE_BASE: /workdir/workspaces
HIGH_PRIVACY_STORAGE_BASE: /workdir/workspaces
DOCKER_HOST: ${DOCKER_HOST:-unix:///var/run/docker.sock}
# force using mounted docker socket
DOCKER_HOST: unix:///var/run/docker.sock
# used to enable in CI
PRIVATE_REPO_TOKEN: ${PRIVATE_REPO_TOKEN:-}

# main development service
Expand All @@ -50,20 +67,9 @@ services:
service: prod
image: job-runner-dev
container_name: job-runner-dev
# running as a specific uid/gid allows files written to mounted volumes by
# the docker container's default user to match the host user's uid/gid, for
# convienience.
user: ${DEV_USERID:-1000}:${DEV_GROUPID:-1000}
# also run with additional group docker
group_add: [docker]
build:
# the dev stage in the Dockerfile
target: job-runner-dev
# pass the uid/gid as build arg
args:
- DEV_USERID=${DEV_USERID:-1000}
- DEV_GROUPID=${DEV_GROUPID:-1000}
- DOCKER_HOST_GROUPID=${DOCKER_HOST_GROUPID}
# Some tricks are needed here to be able to test the BindMountVolumeAPI
# when running inside docker, as we need the volumes to be mountable by the
# host docker. Our pytest fixtures create the directories in /tmp, so we
Expand Down
92 changes: 37 additions & 55 deletions docker/justfile
Original file line number Diff line number Diff line change
@@ -1,35 +1,13 @@
set dotenv-load := true

export DOCKER_USERID := `id -u`
export DOCKER_GROUPID := `id -g`
export PYTEST_HOST_TMP := "/tmp/jobrunner-docker"
export SSH_KEY := "ssh/id_jobrunner_dev"

# used to identify the dev ssh key we generate

export SSH_COMMENT := "local jobrunner dev key"

# what is the hosts ip from docker containers POV?

export SSH_HOST := `docker network inspect bridge --format='{{(index .IPAM.Config 0).Gateway}}'`

# the dev build needs to know the hosts docker group ID, to match it in the container

export DOCKER_HOST_GROUPID := `getent group docker | awk -F: '{print $3}'`

# dev builds remap's appuser's uid to the running user, for easy file
# permissions when we mount things in.

export DEV_USERID := `id -u`
export DEV_GROUPID := `id -g`

build env="dev": tmpdir
build env="dev": tmpdir docker-compose-env
#!/usr/bin/env bash
set -eu

# ensure env files exist
# ensure parent env files exist
test -f ../.env || cp ../dotenv-sample ../.env
touch docker-compose.env

# enable modern docker build features
export DOCKER_BUILDKIT=1
Expand All @@ -50,17 +28,50 @@ tmpdir:
fi
mkdir $PYTEST_HOST_TMP

docker-compose-env:
#!/bin/bash
mkdir -p {medium,high}
test -f .env && exit 0
cat <<EOF > .env
DEV_USERID=$(id -u)
DEV_GROUPID=$(id -g)
DOCKER_HOST_GROUPID=$(getent group docker | awk -F: '{print $3}')
PYTEST_HOST_TMP=$PYTEST_HOST_TMP
MEDIUM_PRIVACY_STORAGE_BASE=$(realpath $PWD/medium)
HIGH_PRIVACY_STORAGE_BASE=$(realpath $PWD/high)
EOF

# run tests in docker container
test *args: build
#!/bin/bash
docker compose run --rm test {{ args }}

functional-test:
#!/bin/bash
container=job-runner-prod-1
trap 'docker compose kill prod' EXIT
docker compose up -d prod
docker compose exec prod python3 -m jobrunner.cli.add_job https://github.com/opensafely/research-template generate_dataset
attempts=""
# use docker logs reather than docker compose logs, as those will include previous run's logs
while ! docker logs -n 10 $container |& grep -q "Completed successfully";
do
if test "$attempts" = ".........."; then
docker compose logs prod
exit 1;
fi
attempts="${attempts}."
echo "waiting..."
sleep 1
done
docker logs -n 10 $container |& grep "Completed successfully"

# run dev server in docker container
service: build
docker compose up dev

# run command in dev container
run env="dev" *args="bash": build
run env="dev" *args="": build
docker compose run --rm {{ env }} {{ args }}

# exec command in existing dev container
Expand All @@ -69,34 +80,5 @@ exec *args="bash":

clean:
#!/bin/bash
rm -rf $PYTEST_HOST_TMP
rm -rf $PYTEST_HOST_TMP .env
docker image rm job-runner job-runner-dev || true
# clean up local ssh config
rm -f ssh/known_hosts $SSH_KEY*
sed -i '/$(SSH_COMMENT)/d' ~/.ssh/authorized_keys

# setup dev ssh key and config
setup-ssh:
#!/bin/bash -x
test -f "$SSH_KEY" && { echo "ssh key aleady set up"; exit 0; }

# create known_hosts file so ssh works without prompting
ssh-keyscan $SSH_HOST > ssh/known_hosts

# create the key
ssh-keygen -t ed25519 -N '' -C "$SSH_COMMENT" -f "$SSH_KEY"

# this is a little unpleasent, modifying the users authorizesd keys
# we do use the comment in clean command to clean it up automatically though.
grep -q "$(cat $SSH_KEY.pub)" ~/.ssh/authorized_keys || cat $SSH_KEY.pub >> ~/.ssh/authorized_keys

# quick test to fail early if ssh doesn't work for some reason
ssh -i $SSH_KEY -o UserKnownHostsFile=ssh/known_hosts $USER@$SSH_HOST true || { echo "Failed to ssh into $SSH_HOST"; tail /var/log/auth.log; exit 1; }

# enable using ssh to talk to docker on host
enable-ssh: setup-ssh
echo "DOCKER_HOST=ssh://$USER@$SSH_HOST" > docker-compose.env

# disable using ssh to talk to docker on host
disable-ssh:
rm docker-compose.env
9 changes: 0 additions & 9 deletions docker/ssh/config

This file was deleted.

2 changes: 2 additions & 0 deletions jobrunner/executors/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,9 @@ def execute(self, job_definition):
label=LABEL,
labels=get_job_labels(job_definition),
extra_args=extra_args,
volume_type=volume_api.volume_type,
bloodearnest marked this conversation as resolved.
Show resolved Hide resolved
)

except Exception as exc:
return JobStatus(
ExecutorState.ERROR, f"Failed to start docker container: {exc}"
Expand Down
2 changes: 2 additions & 0 deletions jobrunner/executors/volumes.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class DockerVolumeAPI:
# don't run with UIDs for now. We maybe be able to support this in future.
requires_root = True
supported_platforms = ("linux", "win32", "darwin")
volume_type = "volume" # https://docs.docker.com/engine/storage/volumes/

def volume_name(job):
return docker_volume_name(job)
Expand Down Expand Up @@ -93,6 +94,7 @@ class BindMountVolumeAPI:
# Only works running jobs with uid:gid
requires_root = False
supported_platforms = ("linux",)
volume_type = "bind" # https://docs.docker.com/engine/storage/bind-mounts/

def volume_name(job):
"""Return the absolute path to the volume directory.
Expand Down
5 changes: 4 additions & 1 deletion jobrunner/lib/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,7 @@ def run(
label=None,
labels=None,
extra_args=None,
volume_type="volume",
):
run_args = ["run", "--init", "--detach", "--label", LABEL, "--name", name]
if extra_args is not None:
Expand All @@ -413,7 +414,9 @@ def run(
if not allow_network_access:
run_args.extend(["--network", "none"])
if volume:
run_args.extend(["--volume", f"{volume[0]}:{volume[1]}"])
run_args.extend(
["--mount", f"type={volume_type},source={volume[0]},target={volume[1]}"]
)
# These lables are in addition to the default LABEL which is always applied
# Single unary label
if label is not None:
Expand Down
Loading