diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..1aa92f57 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,17 @@ +/.cache +/.dep +/.env +/.git +/.github +/.idea +/.local +/.vscode* +/Dockerfile +/data +/deps/minimap2/build +/deps/minimap2/products +/docs +/example_datasets +/pangraph +/pangraph.tar.gz +/vendor diff --git a/.gitignore b/.gitignore index 8f1c12b5..8d586274 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +\.cache .dep .local data @@ -13,6 +14,9 @@ pangraph.tar.gz bin tutorial +deps/minimap2/build +deps/minimap2/products + *.aux *.bbl *.blg diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..396b749f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,63 @@ +# Stage: builder image +# This istage builds use a lot of dependencies and produce the binaries. The results will be copied +# to another image and the builder image will be discarded. +FROM debian:11 as builder + +SHELL ["bash", "-c"] + + +RUN set -euxo pipefail \ +&& export DEBIAN_FRONTEND=noninteractive \ +&& apt-get update -qq --yes \ +&& apt-get install -qq --no-install-recommends --yes \ + build-essential \ + ca-certificates \ + curl \ + mafft \ + make \ + mash \ +>/dev/null \ +&& apt-get autoremove --yes >/dev/null \ +&& apt-get clean autoclean >/dev/null \ +&& rm -rf /var/lib/apt/lists/* + +ENV PATH="/build_dir/bin:/build_dir/vendor/julia/bin:$PATH" + +COPY . /build_dir/ + +RUN set -euxo pipefail \ +&& cd /build_dir \ +&& make + + +# Stage: production image +# We start over, from clean debian image, and copy the binaries from the builder stage. +FROM debian:11 as prod + +# Copy pangraph from the builder stage +COPY --from=builder /build_dir/pangraph/ /usr/ + +# Copy julia dependencies from the builder stage +COPY --from=builder /root/.julia/artifacts /root/.julia/artifacts +COPY --from=builder /root/.julia/conda/3/bin /root/.julia/conda/3/bin +COPY --from=builder /root/.julia/conda/3/lib /root/.julia/conda/3/lib + +SHELL ["bash", "-c"] + +RUN set -euxo pipefail \ +&& export DEBIAN_FRONTEND=noninteractive \ +&& apt-get update -qq --yes \ +&& apt-get install -qq --no-install-recommends --yes \ + mafft \ + mash \ +>/dev/null \ +&& apt-get autoremove --yes >/dev/null \ +&& apt-get clean autoclean >/dev/null \ +&& rm -rf /var/lib/apt/lists/* + +# Allows non-root users to read dependencies +RUN set -euxo pipefail \ +&& chmod -R +r /root/ \ +&& chmod +x /root/ + +CMD ["/usr/bin/pangraph"] diff --git a/Makefile b/Makefile index 8bc1a83e..214dd5b0 100644 --- a/Makefile +++ b/Makefile @@ -5,44 +5,51 @@ version := 1.7.2 ifeq ($(jc),) -jc := ./vendor/julia-$(version)/bin/julia +jc := ./vendor/julia/bin/julia endif -jflags := -q --project=. -julia := julia $(jflags) +jflags := --project=. srcs := $(wildcard src/*.jl src/*/*.jl) +# julia := julia $(jflags) datadir := data/synthetic testdatum := $(datadir)/test.fa -all: pangraph install +all: pangraph install: pangraph/bin/pangraph ln -s $$(pwd)/$< bin/pangraph -environment: - bin/setup-pangraph +environment: $(jc) + $(jc) $(jflags) -e 'import Pkg; Pkg.instantiate();' + $(jc) $(jflags) -e 'import Pkg; Pkg.add(name="Conda"); import Conda; Conda.add("ete3", channel="etetoolkit")' \ + $(jc) $(jflags) -e 'import Pkg; Pkg.build();' pangraph: pangraph/bin/pangraph $(datadir): mkdir -p $@ -$(testdatum): | $(jc) $(datadir) - $(jc) $(jflags) -e 'import Pkg; Pkg.instantiate(); Pkg.build()' +$(testdatum): | environment $(jc) $(datadir) $(jc) $(jflags) -e 'using PanGraph; PanGraph.Simulation.test()' # TODO: look for ARM vs x86 +# TODO: julia gets installed into a directory containing version number. This makes it impossible to refer to the +# installation outside of this file. $(jc): ifeq ($(shell uname -s),Linux) + mkdir -p vendor && \ cd vendor && \ curl -L https://julialang-s3.julialang.org/bin/linux/x64/$(basename $(version))/julia-$(version)-linux-x86_64.tar.gz -o julia-$(version)-linux-x86_64.tar.gz && \ - tar xzf julia-$(version)-linux-x86_64.tar.gz + tar xzf julia-$(version)-linux-x86_64.tar.gz && \ + mv julia-$(version) julia else ifeq ($(shell uname -s),Darwin) + mkdir -p vendor && \ cd vendor && \ curl -L https://julialang-s3.julialang.org/bin/mac/x64/$(basename $(version))/julia-$(version)-mac64.tar.gz -o julia-$(version)-mac64.tar.gz && \ - tar xzf julia-$(version)-mac64.tar.gz + tar xzf julia-$(version)-mac64.tar.gz && \ + mv julia-$(version) julia else $(error unsupported host system) endif @@ -61,3 +68,25 @@ clean: rm -rf pangraph pangraph.tar.gz include script/rules.mk + + +export CONTAINER_NAME=neherlab/pangraph + +SHELL=bash +.ONESHELL: +docker: + set -euxo pipefail + + # If $RELEASE_VERSION is set, use it as an additional docker tag + export DOCKER_TAGS="--tag $${CONTAINER_NAME}:latest" + if [ ! -z "$${RELEASE_VERSION:-}" ]; then + export DOCKER_TAGS="$${DOCKER_TAGS} --tag $${CONTAINER_NAME}:$${RELEASE_VERSION}" + fi + + docker build --target prod $${DOCKER_TAGS} . + +docker-push: + set -euxo pipefail + : "$${RELEASE_VERSION:?The RELEASE_VERSION environment variable is required.}" + docker push ${CONTAINER_NAME}:${RELEASE_VERSION} + docker push ${CONTAINER_NAME}:latest diff --git a/README.md b/README.md index e1e5b7b4..c24faa7c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # pangraph [![Documentation](https://img.shields.io/badge/Documentation-Link-blue.svg)](https://neherlab.github.io/pangraph/) +![Docker Image Version (latest semver)](https://img.shields.io/docker/v/neherlab/pangraph?label=docker) +![Docker Pulls](https://img.shields.io/docker/pulls/neherlab/pangraph) > a bioinformatic toolkit to align large sets of closely related genomes into a graph data structure diff --git a/vendor/minimap2/Makefile b/deps/minimap2/Makefile similarity index 100% rename from vendor/minimap2/Makefile rename to deps/minimap2/Makefile diff --git a/vendor/minimap2/build_tarballs.jl b/deps/minimap2/build_tarballs.jl similarity index 100% rename from vendor/minimap2/build_tarballs.jl rename to deps/minimap2/build_tarballs.jl diff --git a/vendor/minimap2/static/patches/Makefile.diff b/deps/minimap2/static/patches/Makefile.diff similarity index 100% rename from vendor/minimap2/static/patches/Makefile.diff rename to deps/minimap2/static/patches/Makefile.diff diff --git a/docs/dev/buiding-docker.md b/docs/dev/buiding-docker.md new file mode 100644 index 00000000..fbd01876 --- /dev/null +++ b/docs/dev/buiding-docker.md @@ -0,0 +1,33 @@ +## 👷 Building pangraph Docker image locally + +### Install dependencies + + - Install bash and make. + + - Install Docker: https://docs.docker.com/get-docker/ + + - Optionally setup Docker so that it runs without `sudo`: https://docs.docker.com/engine/install/linux-postinstall/ + + +### Build Docker image locally + +Run: + +```bash +make docker +``` + +This will build the Docker image tagged `neherlab/pangraph` (more precisely `neherlab/pangraph:latest`). If already exists, it will replace the existing image with that tag. The build will take some time. + +If completed successfully, then the image can be used right away. Refer to user documentation. Skip the "Pull Docker image" step. + + +### Explore contents, layers and optimize image size + +You could use [dive tool](https://github.com/wagoodman/dive) to see what's inside an image: + +```bash +dive neherlab/pangraph: +``` + +Each [layer](https://stackoverflow.com/questions/31222377/what-are-docker-image-layers) reflects `FROM`, `COPY` and `RUN` commands and the files that have been added to the overlay file system of the image. This can be used to find redundant files. You could then further optimize `Dockerfile` and make the image smaller. diff --git a/docs/dev/releasing.md b/docs/dev/releasing.md new file mode 100644 index 00000000..a0ec7c7f --- /dev/null +++ b/docs/dev/releasing.md @@ -0,0 +1,75 @@ +## 🆕 Releasing pangraph + +### Releasing a new version + +Continuous integration (CI) will build a new version of the Docker container (see `Dockerfile`) on every pushed git tag. + +Make sure you are on a correct branch and commit. Most of the time you want to release code from `master`: + +```bash +git checkout master +``` + +In order to create and push a git tag, run: + +``` +git tag $RELEASE_VERSION +git push origin --tags +``` + +where `$RELEASE_VERSION` is a valid [semantic version](https://semver.org/), without a `v` prefix (i.e. `1.2.3` is correct, `v1.2.3` is not). + +The CI workflow will build the container image and will push it to Docker Hub. The image will be tagged with: + + - `latest` (and will overwrite existing `latest` tag there) + - `$RELEASE_VERSION` + +Both tags should point to the same image, i.e. their sha hashes should be exactly the same. + +This image version can then be referred to as: + + - `neherlab/pangraph:$RELEASE_VERSION` + - `neherlab/pangraph:latest` + - `neherlab/pangraph` (which is the same as `neherlab/pangraph:latest`) + +for example in `docker pull` and `docker run` commands. + + +### Monitoring and debugging CI build + +The status of the builds can be seen on GitHub Actions page: + +https://github.com/neherlab/pangraph/actions + +### Verifying CI build + +After CI build successfully finishes, check Docker Hub to ensure that the new tag is present and that the "latest" tag is updated and points to the same hash: + +https://hub.docker.com/r/neherlab/pangraph + +Pull and run the new version to make sure it works as expected: + +```bash +docker pull neherlab/pangraph:$RELEASE_VERSION + +docker run --rm -it \ + --name "pangraph-$(date +%s)" \ + --volume="$(pwd)/path-to-fasta:/workdir" \ + --user="$(id -u):$(id -g)" \ + --workdir=/workdir neherlab/pangraph:$RELEASE_VERSION \ + bash -c "pangraph build --circular --alpha 0 --beta 0 /workdir/test.fa" +``` + +Here we mount local directory `path-to-fasta` as `/workdir` so that pangraph can read the `/workdir/test.fa"` file. + +> 👷 TODO: implement automated tests + + +### Modifying continuous integration workflow + +See `.github/workflows/build.yml` + + +### Modifying Docker image + +See `Dockerfile` diff --git a/docs/src/index.md b/docs/src/index.md index 7fb7b933..5e2205cb 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -33,6 +33,56 @@ The documentation, and source code, uses the following terminology: There are multiple ways to install PanGraph (either the library or just command line interface) +### Using Docker + +Docker container image for PanGraph is available on Docker Hub: https://hub.docker.com/r/neherlab/pangraph + + - Install Docker + + Install Docker as described on the official website: https://docs.docker.com/get-docker/ + + Optionally setup Docker so that it runs without `sudo` on Linux: https://docs.docker.com/engine/install/linux-postinstall/ + + - Pull a version of the image + + To obtain the latest version, run: + + ```bash + docker pull neherlab/pangraph:latest + ``` + + To obtain a specific version, for example `1.2.3`, run: + + ```bash + docker pull neherlab/pangraph:1.2.3 + ``` + + - Run PanGraph container + + Issue `docker run` command: + + ```bash + docker run --rm -it \ + --name "pangraph-$(date +%s)" \ + --volume="$(pwd):/workdir" \ + --user="$(id -u):$(id -g)" \ + --workdir=/workdir neherlab/pangraph:latest \ + bash -c "pangraph build --circular --alpha 0 --beta 0 /workdir/data/synthetic/test.fa" + ``` + + Here we mount current directory `.` (expressed as absolute path, using `pwd` shell command) as `/workdir` into the container so that pangraph can read the local + file `./data/synthetic/test.fa` as `/workdir/data/synthetic/test.fa"`: + + ``` + . -> /workdir + ./data/synthetic/test.fa -> /workdir/data/synthetic/test.fa + ``` + + The `--name` flag sets the name of the container and the `date` command there ensures that a unique name is created on every run. This is optional. The `--rm` flag deletes the container (but not the image) after run. + + Replace `:latest` with a specific version if desired. The `:latest` tag can also be omitted, as it is the default. + + ### From Julia REPL ```julia (@v1.x) pkg> add https://github.com/neherlab/pangraph.git