Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dockerise the CLI #11

Merged
merged 10 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
.git/

**/*~
**/.#*
**/*#
**/htmlcov
**/__pycache__
**/*.pyc
**/.python-version
**/.env
**/.venv
**/venv
**/.coverage
**/*.egg-info/
94 changes: 94 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
---
name: CI

env:
IMAGE_NAME: metrics
PUBLIC_IMAGE_NAME: ghcr.io/ebmdatalab/metrics
REGISTRY: ghcr.io
SSH_AUTH_SOCK: /tmp/agent.sock

on:
push:

Expand Down Expand Up @@ -30,12 +36,100 @@ jobs:
run: |
just test

lint-dockerfile:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- uses: hadolint/hadolint-action@54c9adbab1582c2ef04b2016b760714a4bfde3cf # v3.1.0
with:
dockerfile: docker/Dockerfile

docker-test-and-build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- uses: "opensafely-core/setup-action@v1"
with:
install-just: true

- name: Build docker image for both prod and dev
run: |
just docker-build prod
just docker-build dev

- name: Run smoke test on prod
run: |
just docker-run prod python -m metrics

- name: Save docker image
run: |
docker save metrics | gzip > /tmp/metrics.tar.gz

- name: Upload docker image
uses: actions/upload-artifact@v3
with:
name: metrics-image
path: /tmp/metrics.tar.gz

deploy:
needs:
- check
- test
- docker-test-and-build
- lint-dockerfile

runs-on: ubuntu-latest

permissions:
contents: read
packages: write

if: github.ref == 'refs/heads/main'

concurrency: deploy-production

steps:
- uses: actions/checkout@v4
- uses: "opensafely-core/setup-action@v1"
with:
install-just: true

- name: Download docker image
uses: actions/download-artifact@v3
with:
name: metrics-image
path: /tmp/image

- name: Import docker image
run: gunzip -c /tmp/image/metrics.tar.gz | docker load

- name: Test image we imported from previous job works
run: |
SKIP_BUILD=1 just docker-run prod python -m metrics

- name: Publish image
run: |
echo ${{ secrets.GITHUB_TOKEN }} | docker login "$REGISTRY" -u ${{ github.actor }} --password-stdin
docker tag "$IMAGE_NAME" "$PUBLIC_IMAGE_NAME":latest
docker push "$PUBLIC_IMAGE_NAME":latest

- name: Deploy image
run: |
ssh-agent -a "$SSH_AUTH_SOCK" > /dev/null
ssh-add - <<< "${{ secrets.DOKKU3_DEPLOY_SSH_KEY }}"
SHA=$(docker inspect --format='{{index .RepoDigests 0}}' "$PUBLIC_IMAGE_NAME":latest)
ssh -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" [email protected] git:from-image metrics "$SHA"

required-checks:
if: always()

needs:
- check
- test
- docker-test-and-build
- lint-dockerfile

runs-on: Ubuntu-latest

Expand Down
53 changes: 52 additions & 1 deletion docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,63 @@ services:
timescaledb:
image: timescale/timescaledb-ha:pg14-latest
environment:
POSTGRES_PASSWORD: password
POSTGRES_DB: metrics
POSTGRES_PASSWORD: pass
POSTGRES_USER: user
ports:
- 5433:5432
volumes:
- timescaledb:/home/postgres/pgdata/data

metrics-prod:
# image name, both locally and public
image: metrics
build:
dockerfile: docker/Dockerfile
# the prod stage in the Dockerfile
target: metrics-prod
# should speed up the build in CI, where we have a cold cache
cache_from: # should speed up the build in CI, where we have a cold cache
- ghcr.io/opensafely-core/base-docker
- ghcr.io/ebmdatalab/metrics
args:
# this makes the image work for later cache_from: usage
- BUILDKIT_INLINE_CACHE=1
# env vars should be supplied by just
- BUILD_DATE
- GITREF
# use dockers builitin PID daemon
init: true
environment:
- GITHUB_TOKEN=dummy
- SLACK_SIGNING_SECRET=dummy
- SLACK_TECH_SUPPORT_CHANNEL_ID=dummy
- SLACK_TOKEN=dummy
- TIMESCALEDB_URL=dummy

# main development service
metrics-dev:
extends:
service: metrics-prod
image: metrics-dev
container_name: metrics-dev
# running as a specific uid/gid allows files written to mounted volumes by
# the docker container's default user to match the host user's uid/gid, for
# convienience.
user: ${DEV_USERID:-1000}:${DEV_GROUPID:-1000}
build:
# the dev stage in the Dockerfile
target: metrics-dev
# pass the uid/gid as build arg
args:
- DEV_USERID=${DEV_USERID:-1000}
- DEV_GROUPID=${DEV_GROUPID:-1000}
volumes:
# mount our current code
- .:/app
env_file:
- .env

volumes:
postgres:
grafana:
Expand Down
160 changes: 160 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# syntax=docker/dockerfile:1.2
#################################################
#
# Create base image with python installed.
#
# DL3007 ignored because base-docker we specifically always want to build on
# the latest base image, by design.
#
# hadolint ignore=DL3007
FROM ghcr.io/opensafely-core/base-docker:22.04 as base-python

# we are going to use an apt cache on the host, so disable the default debian
# docker clean up that deletes that cache on every apt install
RUN rm -f /etc/apt/apt.conf.d/docker-clean

# ensure fully working base python3.11 installation using deadsnakes ppa
# see: https://gist.github.com/tiran/2dec9e03c6f901814f6d1e8dad09528e
# use space efficient utility from base image
RUN --mount=type=cache,target=/var/cache/apt \
echo "deb http://ppa.launchpad.net/deadsnakes/ppa/ubuntu jammy main" > /etc/apt/sources.list.d/deadsnakes-ppa.list &&\
/usr/lib/apt/apt-helper download-file 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xf23c5a6cf475977595c89f51ba6932366a755776' /etc/apt/trusted.gpg.d/deadsnakes.asc

# install any additional system dependencies
COPY docker/dependencies.txt /tmp/dependencies.txt
RUN --mount=type=cache,target=/var/cache/apt \
/root/docker-apt-install.sh /tmp/dependencies.txt


##################################################
#
# Build image
#
# Ok, now we have local base image with python and our system dependencies on.
# We'll use this as the base for our builder image, where we'll build and
# install any python packages needed.
#
# We use a separate, disposable build image to avoid carrying the build
# dependencies into the production image.
FROM base-python as builder

# Install any system build dependencies
COPY docker/build-dependencies.txt /tmp/build-dependencies.txt
RUN --mount=type=cache,target=/var/cache/apt \
/root/docker-apt-install.sh /tmp/build-dependencies.txt

# Install everything in venv for isolation from system python libraries
RUN python3.11 -m venv /opt/venv
ENV VIRTUAL_ENV=/opt/venv/ PATH="/opt/venv/bin:$PATH"

# The cache mount means a) /root/.cache is not in the image, and b) it's preserved
# between docker builds locally, for faster dev rebuild.
COPY requirements.prod.txt /tmp/requirements.prod.txt

# DL3042: using cache mount instead
# DL3013: we always want latest pip/setuptools/wheel, at least for now
# hadolint ignore=DL3042,DL3013
RUN --mount=type=cache,target=/root/.cache \
/opt/venv/bin/python -m pip install -U pip setuptools wheel && \
/opt/venv/bin/python -m pip install --no-deps --require-hashes --requirement /tmp/requirements.prod.txt


##################################################
#
# Base project image
#
# Ok, we've built everything we need, build an image with all dependencies but
# no code.
#
# Not including the code at this stage has two benefits:
#
# 1) this image only rebuilds when the handfull of files needed to build metrics-base
# changes. If we do `COPY . /app` now, this will rebuild when *any* file changes.
#
# 2) Ensures we *have* to mount the volume for dev image, as there's no embedded
# version of the code. Otherwise, we could end up accidentally using the
# version of the code included when the prod image was built.
FROM base-python as metrics-base

# Create a non-root metrics user to run the app as
RUN useradd --create-home --user-group metrics

# copy venv over from builder image. These will have root:root ownership, but
# are readable by all.
COPY --from=builder /opt/venv /opt/venv

# Ensure we're using the venv by default
ENV VIRTUAL_ENV=/opt/venv/ PATH="/opt/venv/bin:$PATH"

RUN mkdir /app
WORKDIR /app

# We set command rather than entrypoint, to make it easier to run different
# things from the cli
CMD ["/opt/venv/bin/python", "-m", "metrics"]

# This may not be necessary, but it probably doesn't hurt
ENV PYTHONPATH=/app

# switch to running as the user
USER metrics


##################################################
#
# Production image
#
# Copy code in, add proper metadata
FROM metrics-base as metrics-prod

# Adjust this metadata to fit project. Note that the base-docker image does set
# some basic metadata.
LABEL org.opencontainers.image.title="metrics" \
org.opencontainers.image.description="Bennett Institute internal metrics tranformation tool" \
org.opencontainers.image.source="https://github.com/ebmdatalab/metrics"

# copy application code
COPY metrics /app/metrics

# finally, tag with build information. These will change regularly, therefore
# we do them as the last action.
ARG BUILD_DATE=unknown
LABEL org.opencontainers.image.created=$BUILD_DATE
ARG GITREF=unknown
LABEL org.opencontainers.image.revision=$GITREF



##################################################
#
# Dev image
#
# Now we build a dev image from our metrics-dev image. This is basically
# installing dev dependencies and matching local UID/GID. It is expected that
# the current code will be mounted in /app when this is run
#
FROM metrics-base as metrics-dev

# switch back to root to run the install of dev requirements.txt
USER root

# install development requirements
COPY requirements.dev.txt /tmp/requirements.dev.txt
# using cache mount instead
# hadolint ignore=DL3042
RUN --mount=type=cache,target=/root/.cache \
python -m pip install --requirement /tmp/requirements.dev.txt

# in dev, ensure metrics uid matches host user id
ARG DEV_USERID=1000
ARG DEV_GROUPID=1000
RUN usermod -u $DEV_USERID metrics
# Modify metrics only if group id does not already exist. We run dev
# containers with an explicit group id anyway, so file permissions on the host
# will be correct, and we do not actually rely on named metrics group access to
# anything.
RUN grep -q ":$DEV_GROUPID:" /etc/group || groupmod -g $DEV_GROUPID metrics


# switch back to metrics
USER metrics
4 changes: 4 additions & 0 deletions docker/build-dependencies.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# list ubuntu packges needed to build dependencies, one per line
build-essential
libpq-dev
python3.11-dev
7 changes: 7 additions & 0 deletions docker/dependencies.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# list ubuntu packages needed in production, one per line
git
postgresql-client
python3.11
python3.11-distutils
python3.11-venv
tzdata
17 changes: 17 additions & 0 deletions dotenv-sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# The DSN for access the timescaledb database
TIMESCALEDB_URL=postgres://user:pass@localhost:5433/metrics

# API token for pulling data from Github
GITHUB_TOKEN=

# Slack API access credentials.
# The slack app used for this will need the following OAuth scopes:
# * channels:history
# * groups:history
# * im:history
# * npim:history
SLACK_SIGNING_SECRET=
SLACK_TOKEN=

# Slack channel ID for tech-support-channel
SLACK_TECH_SUPPORT_CHANNEL_ID=C0270Q313H7
Loading