From 7587c292b461d4d35b018ce44a8834c61a8c3e73 Mon Sep 17 00:00:00 2001
From: Erik Sundell <erik.i.sundell@gmail.com>
Date: Sun, 11 Jul 2021 02:58:15 +0200
Subject: [PATCH] Makefile / GH Workflow: handle multi arch images

---
 .github/workflows/docker.yml        |  30 ++++---
 Makefile                            | 126 +++++++++++++++++++++++-----
 tagging/github_workflow_commands.py |  55 ++++++++++++
 tagging/tag_image.py                |  15 ++++
 4 files changed, 193 insertions(+), 33 deletions(-)
 create mode 100644 tagging/github_workflow_commands.py

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index 6c8f2eae1f..54d91a69a7 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -22,28 +22,19 @@ on:
       - ".pre-commit-config.yaml"
 
 jobs:
-  build-images:
-    name: Build Docker Images
+  build-test-publish-images:
+    name: Build, test, and publish Docker Images
     runs-on: ubuntu-latest
     permissions:
       contents: write
     if: >
       !contains(github.event.head_commit.message, 'ci skip') &&
       !contains(github.event.pull_request.title, 'ci skip')
-    steps:
-      - name: Should we push this image to a public registry?
-        run: |
-          if [ "${{ (github.ref == 'refs/heads/master') || (github.ref == 'refs/heads/main') }}" = "true" ]; then
-              # Empty => Docker Hub
-              echo "REGISTRY=" >> $GITHUB_ENV
-          else
-              echo "REGISTRY=localhost:5000/" >> $GITHUB_ENV
-          fi
 
+    steps:
       # Setup docker to build for multiple platforms, see:
       # https://github.com/docker/build-push-action/tree/master#usage
       # https://github.com/docker/build-push-action/blob/master/docs/advanced/multi-platform.md
-
       - name: Set up QEMU (for docker buildx)
         uses: docker/setup-qemu-action@27d0a4f181a40b142cce983c5393082c365d1480 # dependabot updates to latest release
 
@@ -57,37 +48,50 @@ jobs:
         uses: actions/checkout@v2
         with:
           path: main
+
       - name: Set Up Python
         uses: actions/setup-python@v2
         with:
           python-version: 3.x
+
       - name: Install Dev Dependencies
         run: |
           python -m pip install --upgrade pip
           make -C main dev-env
+
       - name: Build Docker Images
-        run: make -C main build-all
+        run: make -C main build-all-multi
         env:
           # Full logs for CI build
           BUILDKIT_PROGRESS: plain
+
       - name: Test Docker Images
         run: make -C main test-all
+
       - name: Clone Wiki
         uses: actions/checkout@v2
         with:
           repository: ${{github.repository}}.wiki
           path: wiki
+
       - name: Run Post-Build Hooks
+        id: hook-all
         run: make -C main hook-all
+
       - name: Push Wiki to GitHub
         if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main'
         uses: stefanzweifel/git-auto-commit-action@5dd17c3b53a58c1cb5eaab903826abe94765ccd6 # dependabot updates to latest release
         with:
           commit_message: "[ci skip] Automated publish for ${{github.sha}}"
           repository: wiki/
+
       - name: Login to Docker Hub
         if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main'
         uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 # dependabot updates to latest release
         with:
           username: ${{secrets.DOCKERHUB_USERNAME}}
           password: ${{secrets.DOCKERHUB_TOKEN}}
+
+      - name: Push Images to DockerHub
+        if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/main'
+        run: make -C main push-all-multi
diff --git a/Makefile b/Makefile
index 4c8e85939f..5d7262e4ae 100644
--- a/Makefile
+++ b/Makefile
@@ -8,18 +8,23 @@ OWNER?=jupyter
 
 # Need to list the images in build dependency order
 
-# These are images we can cross-build
-CROSS_IMAGES:= base-notebook \
+# Images supporting the following architectures:
+# - linux/amd64
+# - linux/arm64
+MULTI_IMAGES:= \
+	base-notebook \
 	minimal-notebook
-# These images that aren't currently supported for cross-building, your help is welcome.
-X86_IMAGES:= r-notebook \
+# Images that can only be built on the amd64 architecture (aka. x86_64)
+AMD64_ONLY_IMAGES:= \
+	r-notebook \
 	scipy-notebook \
 	tensorflow-notebook \
 	datascience-notebook \
 	pyspark-notebook \
 	all-spark-notebook
 # All of the images
-ALL_IMAGES:=base-notebook \
+ALL_IMAGES:= \
+	base-notebook \
 	minimal-notebook \
 	r-notebook \
 	scipy-notebook \
@@ -35,27 +40,79 @@ export DOCKER_BUILDKIT:=1
 help:
 	@echo "jupyter/docker-stacks"
 	@echo "====================="
-	@echo "Replace % with a stack directory name (e.g., make build-cross/minimal-notebook)"
+	@echo "Replace % with a stack directory name (e.g., make build-multi/minimal-notebook)"
 	@echo
 	@grep -E '^[a-zA-Z0-9_%/-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
 
-build-x86/%: DARGS?=
-build-x86/%: ## build the latest image for a stack on x86 only
-	docker buildx build $(DARGS) --rm --force-rm -t $(OWNER)/$(notdir $@):latest ./$(notdir $@) --build-arg OWNER=$(OWNER) --platform "linux/amd64" --push
+
+
+
+
+build/%: DARGS?=
+build/%: ## build the latest image for a stack on amd64 only
+	@echo "::group::Build $(OWNER)/$(notdir $@) (amd64)"
+	docker build $(DARGS) --rm --force-rm -t $(OWNER)/$(notdir $@):latest ./$(notdir $@) --build-arg OWNER=$(OWNER)
 	@echo -n "Built image size: "
 	@docker images $(OWNER)/$(notdir $@):latest --format "{{.Size}}"
-
-build-cross/%: DARGS?=
-build-cross/%: ## build the latest image for a stack on x86 and ARM
-	docker buildx build $(DARGS) --rm --force-rm -t $(OWNER)/$(notdir $@):latest ./$(notdir $@) --build-arg OWNER=$(OWNER) --platform "linux/amd64,linux/arm64" --push
+	@echo "::endgroup::Build $(OWNER)/$(notdir $@) (amd64)"
+
+build-all: $(foreach I,$(ALL_IMAGES), build/$(I)) ## build all stacks
+
+# Limitations on docker buildx build (using docker/buildx 0.5.1):
+#
+# 1. Can't --load and --push at the same time
+#
+# 2. Can't --load multiple platforms
+#
+# What does it mean to --load?
+#
+# - It means that the built image can be referenced by `docker` CLI, for example
+#   when using the `docker tag` or `docker push` commands.
+#
+# Workarounds due to limitations:
+#
+# 1. We always make a dedicated amd64 build named as OWNER/<stack>-notebook so
+#    we always can reference that image no matter what.
+#
+# 2. We always also build a multi-platform image during build-multi that will be
+#    inaccessible with `docker tag` and `docker push` etc, but this will help us
+#    test the build on the different platform and provide cached layers for
+#    later.
+#
+# 3. We let push-multi refer to rebuilding a multi image with `--push`.
+#
+#    We now rely on the cached layer.
+#
+# Outcomes of the workaround:
+#
+# 1. We can keep using the previously defined Makefile commands that doesn't
+#    include `-multi` suffix as before.
+#
+# 2. Assuming we have setup docker/dockerx properly to build in arm64
+#    architectures as well, then we can build and publish such images via the
+#    `-multi` suffix without needing a local registry.
+#
+build-multi/%: DARGS?=
+build-multi/%: ## build the latest image for a stack on amd64 and arm64
+	@echo "::group::Build $(OWNER)/$(notdir $@) (amd64)"
+	docker buildx build $(DARGS) --rm --force-rm -t $(OWNER)$(notdir $@):latest ./$(notdir $@) --build-arg OWNER=$(OWNER) --platform "linux/amd64" --load
 	@echo -n "Built image size: "
 	@docker images $(OWNER)/$(notdir $@):latest --format "{{.Size}}"
+	@echo "::endgroup::Build $(OWNER)/$(notdir $@) (amd64)"
+
+	@echo "::group::Build $(OWNER)/$(notdir $@) (amd64,arm64)"
+	docker buildx build $(DARGS) --rm --force-rm -t build-multi-tmp-cache/$(notdir $@):latest ./$(notdir $@) --build-arg OWNER=$(OWNER) --platform "linux/amd64,linux/arm64"
+	@echo "::endgroup::Build $(OWNER)/$(notdir $@) (amd64,arm64)"
+
+build-all-multi: $(foreach I,$(MULTI_IMAGES), build-multi/$(I)) $(foreach I,$(AMD64_ONLY_IMAGES), build/$(I)) ## build all stacks
+
 
-build-all: $(foreach I,$(CROSS_IMAGES), build-cross/$(I) ) $(foreach I,$(X86_IMAGES), build-x86/$(I) ) ## build all stacks
 
 check-outdated/%: ## check the outdated conda packages in a stack and produce a report (experimental)
 	@TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest test/test_outdated.py
-check-outdated-all: $(foreach I,$(ALL_IMAGES), check-outdated/$(I) ) ## check all the stacks for outdated conda packages
+check-outdated-all: $(foreach I,$(ALL_IMAGES), check-outdated/$(I)) ## check all the stacks for outdated conda packages
+
+
 
 cont-clean-all: cont-stop-all cont-rm-all ## clean all containers (stop + rm)
 
@@ -67,6 +124,8 @@ cont-rm-all: ## remove all containers
 	@echo "Removing all containers ..."
 	-docker rm --force $(shell docker ps -a -q) 2> /dev/null
 
+
+
 dev/%: ARGS?=
 dev/%: DARGS?=-e JUPYTER_ENABLE_LAB=yes
 dev/%: PORT?=8888
@@ -76,15 +135,22 @@ dev/%: ## run a foreground container for a stack
 dev-env: ## install libraries required to build docs and run tests
 	@pip install -r requirements-dev.txt
 
+
+
 docs: ## build HTML documentation
 	sphinx-build docs/ docs/_build/
 
+
+
+
 hook/%: WIKI_PATH?=../wiki
 hook/%: ## run post-build hooks for an image
-	python3 -m tagging.tag_image --short-image-name "$(notdir $@)" --owner "$(OWNER)" && \
-	python3 -m tagging.create_manifests --short-image-name "$(notdir $@)" --owner "$(OWNER)" --wiki-path "$(WIKI_PATH)"
+	python3 -m tagging.tag_image --short-image-name "$(notdir $@)" --owner "$(OWNER)"
+
+# python3 -m tagging.create_manifests --short-image-name "$(notdir $@)" --owner "$(OWNER)" --wiki-path "$(WIKI_PATH)"
+hook-all: $(foreach I,$(ALL_IMAGES),hook/$(I)) ## run amd64 post-build hooks for all images
+
 
-hook-all: $(foreach I,$(ALL_IMAGES),hook/$(I) ) ## run post-build hooks for all images
 
 img-clean: img-rm-dang img-rm ## clean dangling and jupyter images
 
@@ -100,6 +166,8 @@ img-rm-dang: ## remove dangling images (tagged None)
 	@echo "Removing dangling images ..."
 	-docker rmi --force $(shell docker images -f "dangling=true" -q) 2> /dev/null
 
+
+
 pre-commit-all: ## run pre-commit hook on all files
 	@pre-commit run --all-files || (printf "\n\n\n" && git --no-pager diff --color=always)
 
@@ -107,17 +175,31 @@ pre-commit-install: ## set up the git hook scripts
 	@pre-commit --version
 	@pre-commit install
 
+
+
 pull/%: DARGS?=
 pull/%: ## pull a jupyter image
 	docker pull $(DARGS) $(OWNER)/$(notdir $@)
 
-pull-all: $(foreach I,$(ALL_IMAGES),pull/$(I) ) ## pull all images
+pull-all: $(foreach I,$(ALL_IMAGES),pull/$(I)) ## pull all images
 
 push/%: DARGS?=
 push/%: ## push all tags for a jupyter image
+	@echo "::group::Push $(OWNER)/$(notdir $@) (amd64)"
 	docker push --all-tags $(DARGS) $(OWNER)/$(notdir $@)
+	@echo "::endgroup::Push $(OWNER)/$(notdir $@) (amd64)"
+
+push-all: $(foreach I,$(ALL_IMAGES),push/$(I)) ## push all tagged images
+
+push-multi/%: DARGS?=
+push-multi/%: ## push all tags for a jupyter image that support multiple architectures
+	@echo "::group::Push $(OWNER)/$(notdir $@) (amd64,arm64)"
+	docker buildx build $(DARGS) --rm --force-rm $($(subst -,_,$(notdir $@))_EXTRA_TAG_ARGS) -t $(OWNER)/$(notdir $@):latest ./$(notdir $@) --build-arg OWNER=$(OWNER) --platform "linux/amd64,linux/arm64"
+	@echo "::endgroup::Push $(OWNER)/$(notdir $@) (amd64,arm64)"
+
+push-all-multi: $(foreach I,$(MULTI_IMAGES),push-multi/$(I)) $(foreach I,$(AMD64_ONLY_IMAGES),push/$(I)) ## push all tagged images
+
 
-push-all: $(foreach I,$(ALL_IMAGES),push/$(I) ) ## push all tagged images
 
 run/%: DARGS?=
 run/%: ## run a bash in interactive mode in a stack
@@ -127,8 +209,12 @@ run-sudo/%: DARGS?=
 run-sudo/%: ## run a bash in interactive mode as root in a stack
 	docker run -it --rm -u root $(DARGS) $(OWNER)/$(notdir $@) $(SHELL)
 
+
+
 test/%: ## run tests against a stack (only common tests or common tests + specific tests)
+	@echo "::group::test/$(OWNER)/$(notdir $@)"
 	@if [ ! -d "$(notdir $@)/test" ]; then TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest -m "not info" test; \
 	else TEST_IMAGE="$(OWNER)/$(notdir $@)" pytest -m "not info" test $(notdir $@)/test; fi
+	@echo "::endgroup::test/$(OWNER)/$(notdir $@)"
 
 test-all: $(foreach I,$(ALL_IMAGES),test/$(I)) ## test all stacks
diff --git a/tagging/github_workflow_commands.py b/tagging/github_workflow_commands.py
new file mode 100644
index 0000000000..6613fe69b8
--- /dev/null
+++ b/tagging/github_workflow_commands.py
@@ -0,0 +1,55 @@
+"""
+GitHub Workflow Commands (gwc) for GitHub Actions can help us pass information
+from a Workflow's Job's various build steps to others via "output" and improve
+the presented logs when viewed via the GitHub web based UI.
+
+Reference: https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions
+
+Workflow commands relies on emitting messages:
+
+    print("::{command name} parameter1={data},parameter2={data}::{command value}")
+
+The functions defined in this file will only emit such messages if found to be
+in a GitHub CI environment.
+"""
+
+import json
+import os
+
+from contextlib import contextmanager
+
+
+def _gwc(command_name, command_value="", **params):
+    if not os.environ.get("GITHUB_ACTIONS"):
+        return
+
+    # Assume non-string values are meant to be dumped as JSON
+    if not isinstance(command_value, str):
+        command_value = json.dumps(command_value)
+        print(f"dumped json: {command_value}")
+
+    if params:
+        comma_sep_params = ",".join([f"{k}={v}" for k, v in params.items()])
+        print(f"::{command_name} {comma_sep_params}::{command_value}")
+    else:
+        print(f"::{command_name}::{command_value}")
+
+
+@contextmanager
+def _gwc_group(group_name):
+    """
+    Entering the context prints the group command, and exiting the context
+    prints the endgroup command.<<
+    """
+    try:
+        yield _gwc("group", group_name)
+    finally:
+        _gwc("endgroup", group_name)
+
+
+def _gwc_set_env(env_name, env_value):
+    if not os.environ.get("GITHUB_ACTIONS") or not os.environ.get("GITHUB_ENV"):
+        return
+
+    with open(os.environ["GITHUB_ENV"], "a") as f:
+        f.write(f"{env_name}={env_value}\n")
diff --git a/tagging/tag_image.py b/tagging/tag_image.py
index 3aef779a2f..176a62bcdb 100755
--- a/tagging/tag_image.py
+++ b/tagging/tag_image.py
@@ -6,26 +6,41 @@
 from plumbum.cmd import docker
 from .docker_runner import DockerRunner
 from .get_taggers_and_manifests import get_taggers_and_manifests
+from .github_workflow_commands import _gwc_set_env
 
 
 logger = logging.getLogger(__name__)
 
 
 def tag_image(short_image_name: str, owner: str) -> None:
+    """
+    Tags <owner>/<short_image_name>:latest with the tags reported by all taggers
+    for the given image.
+
+    Tags are in a GitHub Actions environment also saved to environment variables
+    in a format making it easy to append them.
+    """
     logger.info(f"Tagging image: {short_image_name}")
     taggers, _ = get_taggers_and_manifests(short_image_name)
 
     image = f"{owner}/{short_image_name}:latest"
 
     with DockerRunner(image) as container:
+        tags = []
         for tagger in taggers:
             tagger_name = tagger.__name__
             tag_value = tagger.tag_value(container)
+            tags.append(tag_value)
             logger.info(
                 f"Applying tag tagger_name: {tagger_name} tag_value: {tag_value}"
             )
             docker["tag", image, f"{owner}/{short_image_name}:{tag_value}"]()
 
+        if tags:
+            env_name = f'{short_image_name.replace("-", "_")}_EXTRA_TAG_ARGS'
+            docker_build_tag_args = "-t " + " -t ".join(tags)
+            _gwc_set_env(env_name, docker_build_tag_args)
+
 
 if __name__ == "__main__":
     logging.basicConfig(level=logging.INFO)