From 7ee6bdb3c45175245e0dfe4f2e83004e0286f78e Mon Sep 17 00:00:00 2001 From: Ayaz Salikhov Date: Fri, 16 Jul 2021 17:13:33 +0300 Subject: [PATCH 1/4] Support arm builds Co-authored-by: Erik Sundell --- .github/workflows/docker.yml | 17 +++++- Makefile | 90 +++++++++++++++++++++++++++-- README.md | 16 ++++- docs/index.rst | 19 +++++- tagging/github_workflow_commands.py | 55 ++++++++++++++++++ tagging/tag_image.py | 15 +++++ 6 files changed, 200 insertions(+), 12 deletions(-) create mode 100644 tagging/github_workflow_commands.py diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 56781ed01f..3bebbc82fe 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -32,6 +32,15 @@ jobs: !contains(github.event.pull_request.title, 'ci skip') steps: + # Setup docker to build for multiple platforms, see: + # https://github.com/docker/build-push-action/tree/master#usage + # https://github.com/docker/build-push-action/blob/master/docs/advanced/multi-platform.md + - name: Set up QEMU (for docker buildx) + uses: docker/setup-qemu-action@27d0a4f181a40b142cce983c5393082c365d1480 # dependabot updates to latest release + + - name: Set up Docker Buildx (for multi-arch builds) + uses: docker/setup-buildx-action@0d135e0c2fc0dba0729c1a47ecfcf5a3c7f8579e # dependabot updates to latest release + - name: Clone Main Repo uses: actions/checkout@v2 with: @@ -48,11 +57,14 @@ jobs: make -C main dev-env - name: Build Docker Images - run: make -C main build-test-all + run: make -C main build-all-multi env: # Full logs for CI build BUILDKIT_PROGRESS: plain + - name: Test Docker Images + run: make -C main test-all + - name: Clone Wiki uses: actions/checkout@v2 with: @@ -60,6 +72,7 @@ jobs: path: wiki - name: Run Post-Build Hooks + id: hook-all run: make -C main hook-all - name: Push Wiki to GitHub @@ -78,4 +91,4 @@ jobs: - name: Push Images to DockerHub if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main' - run: make -C main push-all + run: make -C main push-all-multi diff --git a/Makefile b/Makefile index af48dce42b..0dd1c3e973 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,22 @@ SHELL:=bash OWNER?=jupyter # Need to list the images in build dependency order + +# Images supporting the following architectures: +# - linux/amd64 +# - linux/arm64 +MULTI_IMAGES:= \ + base-notebook \ + minimal-notebook +# Images that can only be built on the amd64 architecture (aka. x86_64) +AMD64_ONLY_IMAGES:= \ + r-notebook \ + scipy-notebook \ + tensorflow-notebook \ + datascience-notebook \ + pyspark-notebook \ + all-spark-notebook +# All of the images ALL_IMAGES:= \ base-notebook \ minimal-notebook \ @@ -26,19 +42,73 @@ export DOCKER_BUILDKIT:=1 help: @echo "jupyter/docker-stacks" @echo "=====================" - @echo "Replace % with a stack directory name (e.g., make build/minimal-notebook)" + @echo "Replace % with a stack directory name (e.g., make build-multi/minimal-notebook)" @echo @grep -E '^[a-zA-Z0-9_%/-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' build/%: DARGS?= -build/%: ## build the latest image for a stack - docker build $(DARGS) --rm --force-rm -t $(OWNER)/$(notdir $@):latest --build-arg OWNER=$(OWNER) ./$(notdir $@) +build/%: ## build the latest image for a stack using the system's architecture + @echo "::group::Build $(OWNER)/$(notdir $@) (system's architecture)" + docker build $(DARGS) --rm --force-rm -t $(OWNER)/$(notdir $@):latest ./$(notdir $@) --build-arg OWNER=$(OWNER) @echo -n "Built image size: " @docker images $(OWNER)/$(notdir $@):latest --format "{{.Size}}" + @echo "::endgroup::Build $(OWNER)/$(notdir $@) (system's architecture)" build-all: $(foreach I, $(ALL_IMAGES), build/$(I)) ## build all stacks -build-test-all: $(foreach I, $(ALL_IMAGES), build/$(I) test/$(I)) ## build and test all stacks + +# Limitations on docker buildx build (using docker/buildx 0.5.1): +# +# 1. Can't --load and --push at the same time +# +# 2. Can't --load multiple platforms +# +# What does it mean to --load? +# +# - It means that the built image can be referenced by `docker` CLI, for example +# when using the `docker tag` or `docker push` commands. +# +# Workarounds due to limitations: +# +# 1. We always build a dedicated image using the current system architecture +# named as OWNER/-notebook so we always can reference that image no +# matter what during tests etc. +# +# 2. We always also build a multi-platform image during build-multi that will be +# inaccessible with `docker tag` and `docker push` etc, but this will help us +# test the build on the different platform and provide cached layers for +# later. +# +# 3. We let push-multi refer to rebuilding a multi image with `--push`. +# +# We can rely on the cached layer from build-multi now even though we never +# tagged the multi image. +# +# Outcomes of the workaround: +# +# 1. We can keep using the previously defined Makefile commands that doesn't +# include `-multi` suffix as before. +# +# 2. Assuming we have setup docker/dockerx properly to build in arm64 +# architectures as well, then we can build and publish such images via the +# `-multi` suffix without needing a local registry. +# +# 3. If we get dedicated arm64 runners, we can test everything separately +# without needing to update this Makefile, and if all tests succeeds we can +# do a publish job that creates a multi-platform image for us. +# +build-multi/%: DARGS?= +build-multi/%: ## build the latest image for a stack on both amd64 and arm64 + @echo "::group::Build $(OWNER)/$(notdir $@) (system's architecture)" + docker buildx build $(DARGS) --rm --force-rm -t $(OWNER)/$(notdir $@):latest ./$(notdir $@) --build-arg OWNER=$(OWNER) + @echo -n "Built image size: " + @docker images $(OWNER)/$(notdir $@):latest --format "{{.Size}}" + @echo "::endgroup::Build $(OWNER)/$(notdir $@) (system's architecture)" + + @echo "::group::Build $(OWNER)/$(notdir $@) (amd64,arm64)" + docker buildx build $(DARGS) --rm --force-rm -t build-multi-tmp-cache/$(notdir $@):latest ./$(notdir $@) --build-arg OWNER=$(OWNER) --platform "linux/amd64,linux/arm64" + @echo "::endgroup::Build $(OWNER)/$(notdir $@) (amd64,arm64)" +build-all-multi: $(foreach I, $(MULTI_IMAGES), build-multi/$(I)) $(foreach I, $(AMD64_ONLY_IMAGES), build/$(I)) ## build all stacks @@ -110,9 +180,18 @@ pull-all: $(foreach I, $(ALL_IMAGES), pull/$(I)) ## pull all images push/%: DARGS?= push/%: ## push all tags for a jupyter image + @echo "::group::Push $(OWNER)/$(notdir $@) (system's architecture)" docker push --all-tags $(DARGS) $(OWNER)/$(notdir $@) + @echo "::endgroup::Push $(OWNER)/$(notdir $@) (system's architecture)" push-all: $(foreach I, $(ALL_IMAGES), push/$(I)) ## push all tagged images +push-multi/%: DARGS?= +push-multi/%: ## push all tags for a jupyter image that support multiple architectures + @echo "::group::Push $(OWNER)/$(notdir $@) (amd64,arm64)" + docker buildx build $(DARGS) --rm --force-rm $($(subst -,_,$(notdir $@))_EXTRA_TAG_ARGS) -t $(OWNER)/$(notdir $@):latest ./$(notdir $@) --build-arg OWNER=$(OWNER) --platform "linux/amd64,linux/arm64" + @echo "::endgroup::Push $(OWNER)/$(notdir $@) (amd64,arm64)" +push-all-multi: $(foreach I, $(MULTI_IMAGES), push-multi/$(I)) $(foreach I, $(AMD64_ONLY_IMAGES), push/$(I)) ## push all tagged images + run/%: DARGS?= @@ -126,7 +205,8 @@ run-sudo/%: ## run a bash in interactive mode as root in a stack test/%: ## run tests against a stack (only common tests or common tests + specific tests) + @echo "::group::test/$(OWNER)/$(notdir $@)" @if [ ! -d "$(notdir $@)/test" ]; then TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest -m "not info" test; \ else TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest -m "not info" test $(notdir $@)/test; fi - + @echo "::endgroup::test/$(OWNER)/$(notdir $@)" test-all: $(foreach I, $(ALL_IMAGES), test/$(I)) ## test all stacks diff --git a/README.md b/README.md index a023019a33..e332eb889a 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,18 @@ This change is tracked in the issue [#1217](https://github.com/jupyter/docker-st - [Jupyter Website](https://jupyter.org) - [Images on DockerHub](https://hub.docker.com/u/jupyter) -## Architectures +## CPU Architectures -Currently published containers only support x86, some containers may support cross-building with docker buildx. +All published containers support amd64 (x86_64). The base-notebook and +minimal-notebook containers also support arm64. The ambition is to have all +containers support both amd64 and arm64. + +### Caveats for arm64 images + +- The manifests we publish in this projects wiki as well as the image tags for + the multi platform images that also support arm, are all based on the amd64 + version even though details about the installed packages versions could differ + between architectures. For the status about this, see + [#1401](https://github.com/jupyter/docker-stacks/issues/1401). +- Only the amd64 images are actively tested currently. For the status about + this, see [#1402](https://github.com/jupyter/docker-stacks/issues/1402). diff --git a/docs/index.rst b/docs/index.rst index 9b42b9ac01..586809170a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -44,10 +44,23 @@ Docker destroys the container after notebook server exit, but any files written docker run --rm -p 10000:8888 -e JUPYTER_ENABLE_LAB=yes -v "${PWD}":/home/jovyan/work jupyter/datascience-notebook:33add21fab64 -Architectures ------------ -Currently published containers only support x86, some containers may support cross-building with docker buildx. +CPU Architectures +----------------- + +All published containers support amd64 (x86_64). The base-notebook and +minimal-notebook containers also support arm64. The ambition is to have all +containers support both amd64 and arm64. + +Caveats for arm64 images +^^^^^^^^^^^^^^^^^^^^^^^^ +- The manifests we publish in this projects wiki as well as the image tags for + the multi platform images that also support arm, are all based on the amd64 + version even though details about the installed packages versions could differ + between architectures. For the status about this, see + [#1401](https://github.com/jupyter/docker-stacks/issues/1401). +- Only the amd64 images are actively tested currently. For the status about + this, see [#1402](https://github.com/jupyter/docker-stacks/issues/1402). Table of Contents ----------------- diff --git a/tagging/github_workflow_commands.py b/tagging/github_workflow_commands.py new file mode 100644 index 0000000000..6613fe69b8 --- /dev/null +++ b/tagging/github_workflow_commands.py @@ -0,0 +1,55 @@ +""" +GitHub Workflow Commands (gwc) for GitHub Actions can help us pass information +from a Workflow's Job's various build steps to others via "output" and improve +the presented logs when viewed via the GitHub web based UI. + +Reference: https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions + +Workflow commands relies on emitting messages: + + print("::{command name} parameter1={data},parameter2={data}::{command value}") + +The functions defined in this file will only emit such messages if found to be +in a GitHub CI environment. +""" + +import json +import os + +from contextlib import contextmanager + + +def _gwc(command_name, command_value="", **params): + if not os.environ.get("GITHUB_ACTIONS"): + return + + # Assume non-string values are meant to be dumped as JSON + if not isinstance(command_value, str): + command_value = json.dumps(command_value) + print(f"dumped json: {command_value}") + + if params: + comma_sep_params = ",".join([f"{k}={v}" for k, v in params.items()]) + print(f"::{command_name} {comma_sep_params}::{command_value}") + else: + print(f"::{command_name}::{command_value}") + + +@contextmanager +def _gwc_group(group_name): + """ + Entering the context prints the group command, and exiting the context + prints the endgroup command.<< + """ + try: + yield _gwc("group", group_name) + finally: + _gwc("endgroup", group_name) + + +def _gwc_set_env(env_name, env_value): + if not os.environ.get("GITHUB_ACTIONS") or not os.environ.get("GITHUB_ENV"): + return + + with open(os.environ["GITHUB_ENV"], "a") as f: + f.write(f"{env_name}={env_value}\n") diff --git a/tagging/tag_image.py b/tagging/tag_image.py index 3aef779a2f..176a62bcdb 100755 --- a/tagging/tag_image.py +++ b/tagging/tag_image.py @@ -6,26 +6,41 @@ from plumbum.cmd import docker from .docker_runner import DockerRunner from .get_taggers_and_manifests import get_taggers_and_manifests +from .github_workflow_commands import _gwc_set_env logger = logging.getLogger(__name__) def tag_image(short_image_name: str, owner: str) -> None: + """ + Tags /:latest with the tags reported by all taggers + for the given image. + + Tags are in a GitHub Actions environment also saved to environment variables + in a format making it easy to append them. + """ logger.info(f"Tagging image: {short_image_name}") taggers, _ = get_taggers_and_manifests(short_image_name) image = f"{owner}/{short_image_name}:latest" with DockerRunner(image) as container: + tags = [] for tagger in taggers: tagger_name = tagger.__name__ tag_value = tagger.tag_value(container) + tags.append(tag_value) logger.info( f"Applying tag tagger_name: {tagger_name} tag_value: {tag_value}" ) docker["tag", image, f"{owner}/{short_image_name}:{tag_value}"]() + if tags: + env_name = f'{short_image_name.replace("-", "_")}_EXTRA_TAG_ARGS' + docker_build_tag_args = "-t " + " -t ".join(tags) + _gwc_set_env(env_name, docker_build_tag_args) + if __name__ == "__main__": logging.basicConfig(level=logging.INFO) From 71a5e3ff1624e417ca26bb4f95d8b639e9a14d3d Mon Sep 17 00:00:00 2001 From: Ayaz Salikhov Date: Fri, 16 Jul 2021 17:56:27 +0300 Subject: [PATCH 2/4] Remove unused function --- tagging/github_workflow_commands.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/tagging/github_workflow_commands.py b/tagging/github_workflow_commands.py index 6613fe69b8..46005472fd 100644 --- a/tagging/github_workflow_commands.py +++ b/tagging/github_workflow_commands.py @@ -16,8 +16,6 @@ import json import os -from contextlib import contextmanager - def _gwc(command_name, command_value="", **params): if not os.environ.get("GITHUB_ACTIONS"): @@ -35,18 +33,6 @@ def _gwc(command_name, command_value="", **params): print(f"::{command_name}::{command_value}") -@contextmanager -def _gwc_group(group_name): - """ - Entering the context prints the group command, and exiting the context - prints the endgroup command.<< - """ - try: - yield _gwc("group", group_name) - finally: - _gwc("endgroup", group_name) - - def _gwc_set_env(env_name, env_value): if not os.environ.get("GITHUB_ACTIONS") or not os.environ.get("GITHUB_ENV"): return From 8d80097ee50511bc90359cc1351754e3f897714c Mon Sep 17 00:00:00 2001 From: Ayaz Salikhov Date: Fri, 16 Jul 2021 17:59:31 +0300 Subject: [PATCH 3/4] Simplify endgroup --- Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 0dd1c3e973..ffe66cb811 100644 --- a/Makefile +++ b/Makefile @@ -54,7 +54,7 @@ build/%: ## build the latest image for a stack using the system's architecture docker build $(DARGS) --rm --force-rm -t $(OWNER)/$(notdir $@):latest ./$(notdir $@) --build-arg OWNER=$(OWNER) @echo -n "Built image size: " @docker images $(OWNER)/$(notdir $@):latest --format "{{.Size}}" - @echo "::endgroup::Build $(OWNER)/$(notdir $@) (system's architecture)" + @echo "::endgroup::" build-all: $(foreach I, $(ALL_IMAGES), build/$(I)) ## build all stacks # Limitations on docker buildx build (using docker/buildx 0.5.1): @@ -103,11 +103,11 @@ build-multi/%: ## build the latest image for a stack on both amd64 and arm64 docker buildx build $(DARGS) --rm --force-rm -t $(OWNER)/$(notdir $@):latest ./$(notdir $@) --build-arg OWNER=$(OWNER) @echo -n "Built image size: " @docker images $(OWNER)/$(notdir $@):latest --format "{{.Size}}" - @echo "::endgroup::Build $(OWNER)/$(notdir $@) (system's architecture)" + @echo "::endgroup::" @echo "::group::Build $(OWNER)/$(notdir $@) (amd64,arm64)" docker buildx build $(DARGS) --rm --force-rm -t build-multi-tmp-cache/$(notdir $@):latest ./$(notdir $@) --build-arg OWNER=$(OWNER) --platform "linux/amd64,linux/arm64" - @echo "::endgroup::Build $(OWNER)/$(notdir $@) (amd64,arm64)" + @echo "::endgroup::" build-all-multi: $(foreach I, $(MULTI_IMAGES), build-multi/$(I)) $(foreach I, $(AMD64_ONLY_IMAGES), build/$(I)) ## build all stacks @@ -182,14 +182,14 @@ push/%: DARGS?= push/%: ## push all tags for a jupyter image @echo "::group::Push $(OWNER)/$(notdir $@) (system's architecture)" docker push --all-tags $(DARGS) $(OWNER)/$(notdir $@) - @echo "::endgroup::Push $(OWNER)/$(notdir $@) (system's architecture)" + @echo "::endgroup::" push-all: $(foreach I, $(ALL_IMAGES), push/$(I)) ## push all tagged images push-multi/%: DARGS?= push-multi/%: ## push all tags for a jupyter image that support multiple architectures @echo "::group::Push $(OWNER)/$(notdir $@) (amd64,arm64)" docker buildx build $(DARGS) --rm --force-rm $($(subst -,_,$(notdir $@))_EXTRA_TAG_ARGS) -t $(OWNER)/$(notdir $@):latest ./$(notdir $@) --build-arg OWNER=$(OWNER) --platform "linux/amd64,linux/arm64" - @echo "::endgroup::Push $(OWNER)/$(notdir $@) (amd64,arm64)" + @echo "::endgroup::" push-all-multi: $(foreach I, $(MULTI_IMAGES), push-multi/$(I)) $(foreach I, $(AMD64_ONLY_IMAGES), push/$(I)) ## push all tagged images @@ -208,5 +208,5 @@ test/%: ## run tests against a stack (only common tests or common tests + specif @echo "::group::test/$(OWNER)/$(notdir $@)" @if [ ! -d "$(notdir $@)/test" ]; then TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest -m "not info" test; \ else TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest -m "not info" test $(notdir $@)/test; fi - @echo "::endgroup::test/$(OWNER)/$(notdir $@)" + @echo "::endgroup::" test-all: $(foreach I, $(ALL_IMAGES), test/$(I)) ## test all stacks From a0ac0e3e721bcf338377c0fa36458b1271ad910b Mon Sep 17 00:00:00 2001 From: Ayaz Salikhov Date: Fri, 16 Jul 2021 18:52:38 +0300 Subject: [PATCH 4/4] Simplify github workflow commands --- tagging/github_set_env.py | 11 ++++++++ tagging/github_workflow_commands.py | 41 ----------------------------- tagging/tag_image.py | 4 +-- 3 files changed, 13 insertions(+), 43 deletions(-) create mode 100644 tagging/github_set_env.py delete mode 100644 tagging/github_workflow_commands.py diff --git a/tagging/github_set_env.py b/tagging/github_set_env.py new file mode 100644 index 0000000000..f2827564f0 --- /dev/null +++ b/tagging/github_set_env.py @@ -0,0 +1,11 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +import os + + +def github_set_env(env_name, env_value): + if not os.environ.get("GITHUB_ACTIONS") or not os.environ.get("GITHUB_ENV"): + return + + with open(os.environ["GITHUB_ENV"], "a") as f: + f.write(f"{env_name}={env_value}\n") diff --git a/tagging/github_workflow_commands.py b/tagging/github_workflow_commands.py deleted file mode 100644 index 46005472fd..0000000000 --- a/tagging/github_workflow_commands.py +++ /dev/null @@ -1,41 +0,0 @@ -""" -GitHub Workflow Commands (gwc) for GitHub Actions can help us pass information -from a Workflow's Job's various build steps to others via "output" and improve -the presented logs when viewed via the GitHub web based UI. - -Reference: https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions - -Workflow commands relies on emitting messages: - - print("::{command name} parameter1={data},parameter2={data}::{command value}") - -The functions defined in this file will only emit such messages if found to be -in a GitHub CI environment. -""" - -import json -import os - - -def _gwc(command_name, command_value="", **params): - if not os.environ.get("GITHUB_ACTIONS"): - return - - # Assume non-string values are meant to be dumped as JSON - if not isinstance(command_value, str): - command_value = json.dumps(command_value) - print(f"dumped json: {command_value}") - - if params: - comma_sep_params = ",".join([f"{k}={v}" for k, v in params.items()]) - print(f"::{command_name} {comma_sep_params}::{command_value}") - else: - print(f"::{command_name}::{command_value}") - - -def _gwc_set_env(env_name, env_value): - if not os.environ.get("GITHUB_ACTIONS") or not os.environ.get("GITHUB_ENV"): - return - - with open(os.environ["GITHUB_ENV"], "a") as f: - f.write(f"{env_name}={env_value}\n") diff --git a/tagging/tag_image.py b/tagging/tag_image.py index 176a62bcdb..ae795e7d01 100755 --- a/tagging/tag_image.py +++ b/tagging/tag_image.py @@ -6,7 +6,7 @@ from plumbum.cmd import docker from .docker_runner import DockerRunner from .get_taggers_and_manifests import get_taggers_and_manifests -from .github_workflow_commands import _gwc_set_env +from .github_set_env import github_set_env logger = logging.getLogger(__name__) @@ -39,7 +39,7 @@ def tag_image(short_image_name: str, owner: str) -> None: if tags: env_name = f'{short_image_name.replace("-", "_")}_EXTRA_TAG_ARGS' docker_build_tag_args = "-t " + " -t ".join(tags) - _gwc_set_env(env_name, docker_build_tag_args) + github_set_env(env_name, docker_build_tag_args) if __name__ == "__main__":