From 4f5ef1d8ec525d392c7d61a222a7fe80c0c3cd6c Mon Sep 17 00:00:00 2001 From: Barend Garvelink <159024183+barend-xebia@users.noreply.github.com> Date: Thu, 28 Nov 2024 10:51:01 +0100 Subject: [PATCH 1/5] add Spark 3.5 on JDK 17 bycatch: netcat package was renamed bycatch: deprecated ENV syntax in Dockerfile --- .github/workflows/dockerhub_ci.yml | 17 +++++++++++++---- Dockerfile | 4 ++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/.github/workflows/dockerhub_ci.yml b/.github/workflows/dockerhub_ci.yml index 2f87c26..af16415 100644 --- a/.github/workflows/dockerhub_ci.yml +++ b/.github/workflows/dockerhub_ci.yml @@ -22,7 +22,13 @@ jobs: strategy: matrix: # Specify which tags to build - DOCKER_TAG: ["3.4.0,3.4,latest", "3.3.2,3.3", "3.1.3,3.1", "3.0.3,3.0", "2.4.8,2.4"] + DOCKER_TAG: + - "3.5.3,3.5,latest" + - "3.4.4,3.4" + - "3.3.4,3.3" + - "3.1.3,3.1" + - "3.0.3,3.0" + - "2.4.8,2.4" timeout-minutes: 30 steps: @@ -37,7 +43,10 @@ jobs: # Manipulate DOCKER_TAG to create build args SPARK_MAJOR_VERSION=${DOCKER_TAG:0:1} SPARK_MAJOR_MINOR_VERSION=${DOCKER_TAG:0:3} - if [[ ${SPARK_MAJOR_MINOR_VERSION} = "3.3" || ${SPARK_MAJOR_MINOR_VERSION} = "3.4" || ${DOCKER_TAG} == "latest" ]]; then + if [[ ${SPARK_MAJOR_MINOR_VERSION} = "3.5" || ${DOCKER_TAG} == "latest" ]]; then + OPENJDK_VERSION=17 + HADOOP_VERSION=3 + elif [[ ${SPARK_MAJOR_MINOR_VERSION} = "3.3" || ${SPARK_MAJOR_MINOR_VERSION} = "3.4" ]]; then OPENJDK_VERSION=11 HADOOP_VERSION=3 elif [ ${SPARK_MAJOR_VERSION} = "3" ]; then @@ -97,8 +106,8 @@ jobs: IMAGE_NAME: ${{ steps.prep.outputs.image_name }} run: | if [[ -f "docker-compose.test.yml" ]]; then - docker-compose --file docker-compose.test.yml build - docker-compose --file docker-compose.test.yml run sut + docker compose --file docker-compose.test.yml build + docker compose --file docker-compose.test.yml run sut fi - name: Login to DockerHub diff --git a/Dockerfile b/Dockerfile index 7cb37b0..437611e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,11 +9,11 @@ LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \ org.label-schema.build-date=$BUILD_DATE \ org.label-schema.version=$SPARK_VERSION -ENV SPARK_HOME /usr/spark +ENV SPARK_HOME=/usr/spark ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}" RUN apt-get update && \ - apt-get install -y wget netcat procps libpostgresql-jdbc-java && \ + apt-get install -y wget netcat-openbsd procps libpostgresql-jdbc-java && \ wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ From fee7160f26b385ff47a9416c0b885c08b818d483 Mon Sep 17 00:00:00 2001 From: Barend Garvelink <159024183+barend-xebia@users.noreply.github.com> Date: Thu, 28 Nov 2024 11:40:23 +0100 Subject: [PATCH 2/5] use latest tags of imported actions --- .github/workflows/dockerhub_ci.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/dockerhub_ci.yml b/.github/workflows/dockerhub_ci.yml index af16415..042af99 100644 --- a/.github/workflows/dockerhub_ci.yml +++ b/.github/workflows/dockerhub_ci.yml @@ -33,7 +33,7 @@ jobs: timeout-minutes: 30 steps: - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Parse args id: args @@ -81,13 +81,13 @@ jobs: echo ::set-output name=created::$(date -u +'%Y-%m-%dT%H:%M:%SZ') - name: Set up QEMU - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 - name: Build image - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v6 with: context: . file: ./Dockerfile @@ -112,7 +112,7 @@ jobs: - name: Login to DockerHub if: github.ref == 'refs/heads/master' - uses: docker/login-action@v1 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB }} @@ -120,7 +120,7 @@ jobs: - name: Push image # Build and push because of multi platform build if: github.ref == 'refs/heads/master' - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v6 with: context: . file: ./Dockerfile @@ -138,7 +138,7 @@ jobs: - name: Report Status if: always() && github.ref == 'refs/heads/master' - uses: ravsamhq/notify-slack-action@master + uses: ravsamhq/notify-slack-action@v2 with: status: ${{ job.status }} notify_when: 'failure' From 0007020a9523f78ca56c1c92b80a6d65302df82f Mon Sep 17 00:00:00 2001 From: Barend Garvelink <159024183+barend-xebia@users.noreply.github.com> Date: Thu, 28 Nov 2024 12:22:17 +0100 Subject: [PATCH 3/5] adopt docker/metadata-action@v5 --- .github/workflows/dockerhub_ci.yml | 136 +++++++++++++++-------------- 1 file changed, 69 insertions(+), 67 deletions(-) diff --git a/.github/workflows/dockerhub_ci.yml b/.github/workflows/dockerhub_ci.yml index 042af99..a1faef8 100644 --- a/.github/workflows/dockerhub_ci.yml +++ b/.github/workflows/dockerhub_ci.yml @@ -21,92 +21,94 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - # Specify which tags to build - DOCKER_TAG: - - "3.5.3,3.5,latest" - - "3.4.4,3.4" - - "3.3.4,3.3" - - "3.1.3,3.1" - - "3.0.3,3.0" - - "2.4.8,2.4" + image: + - + SPARK_VERSION: "3.5.3" + OPENJDK_VERSION: "17" + HADOOP_VERSION: "3" + LATEST: "true" + - + SPARK_VERSION: "3.4.4" + OPENJDK_VERSION: "11" + HADOOP_VERSION: "3" + LATEST: "false" + - + SPARK_VERSION: "3.3.4" + OPENJDK_VERSION: "11" + HADOOP_VERSION: "3" + LATEST: "false" + - + SPARK_VERSION: "3.1.3" + OPENJDK_VERSION: "11" + HADOOP_VERSION: "3.2" + LATEST: "false" + - + SPARK_VERSION: "3.0.3" + OPENJDK_VERSION: "11" + HADOOP_VERSION: "3.2" + LATEST: "false" + - + SPARK_VERSION: "2.4.8" + OPENJDK_VERSION: "8" + HADOOP_VERSION: "2.7" + LATEST: "false" timeout-minutes: 30 steps: - name: Checkout code uses: actions/checkout@v4 - - name: Parse args - id: args - run: | - DOCKER_TAG="${{ matrix.DOCKER_TAG }}" - - # Manipulate DOCKER_TAG to create build args - SPARK_MAJOR_VERSION=${DOCKER_TAG:0:1} - SPARK_MAJOR_MINOR_VERSION=${DOCKER_TAG:0:3} - if [[ ${SPARK_MAJOR_MINOR_VERSION} = "3.5" || ${DOCKER_TAG} == "latest" ]]; then - OPENJDK_VERSION=17 - HADOOP_VERSION=3 - elif [[ ${SPARK_MAJOR_MINOR_VERSION} = "3.3" || ${SPARK_MAJOR_MINOR_VERSION} = "3.4" ]]; then - OPENJDK_VERSION=11 - HADOOP_VERSION=3 - elif [ ${SPARK_MAJOR_VERSION} = "3" ]; then - OPENJDK_VERSION=11 - HADOOP_VERSION=3.2 - else - OPENJDK_VERSION=8 - HADOOP_VERSION=2.7 - fi - - BUILD_ARGS="OPENJDK_VERSION=${OPENJDK_VERSION} - HADOOP_VERSION=${HADOOP_VERSION} - SPARK_VERSION=${DOCKER_TAG%%,*}" - - # No modification needed beyond this point - BUILD_ARGS="${BUILD_ARGS//'%'/'%25'}" - BUILD_ARGS="${BUILD_ARGS//$'\n'/'%0A'}" - BUILD_ARGS="${BUILD_ARGS//$'\r'/'%0D'}" - - echo "::set-output name=build_args::$BUILD_ARGS" - - - name: Prepare - id: prep - run: | - DOCKER_TAG="${{ matrix.DOCKER_TAG }}" - DOCKER_IMAGE_NAME="$DOCKER_REPO:${DOCKER_TAG%%,*}" - echo ::set-output name=image_name::${DOCKER_IMAGE_NAME} - - TAGS="$DOCKER_REPO:${DOCKER_TAG//,/,$DOCKER_REPO:}" - echo ::set-output name=tags::${TAGS} - - echo ::set-output name=created::$(date -u +'%Y-%m-%dT%H:%M:%SZ') - - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx + id: setup-buildx uses: docker/setup-buildx-action@v3 + - name: Generate image metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: "${{ env.DOCKER_REPO }}" + flavor: | + latest=${{ matrix.image.LATEST }} + prefix= + suffix= + tags: | + type=semver,pattern={{version}},value=${{ matrix.image.SPARK_VERSION }} + type=semver,pattern={{major}}.{{minor}},value=${{ matrix.image.SPARK_VERSION }} + labels: | + org.opencontainers.image.source=${{ github.event.repository.clone_url }} + org.opencontainers.image.revision=${{ github.sha }} + org.opencontainers.image.vendor=Xebia Data, https://xebia.com/ + org.opencontainers.image.description=An Apache Spark Docker image. + - name: Build image + id: docker-build uses: docker/build-push-action@v6 with: context: . file: ./Dockerfile push: false load: true - tags: ${{ steps.prep.outputs.tags }} + tags: ${{ steps.meta.outputs.tags }} build-args: | - ${{ steps.args.outputs.build_args }} - labels: | - org.opencontainers.image.source=${{ github.event.repository.clone_url }} - org.opencontainers.image.created=${{ steps.prep.outputs.created }} - org.opencontainers.image.revision=${{ github.sha }} + HADOOP_VERSION=${{ matrix.image.HADOOP_VERSION }} + OPENJDK_VERSION=${{ matrix.image.OPENJDK_VERSION }} + SPARK_VERSION=${{ matrix.image.SPARK_VERSION }} + labels: ${{ steps.meta.outputs.labels }} + annotations: ${{ steps.meta.outputs.annotations }} + # https://docs.docker.com/build/ci/github-actions/share-image-jobs/ + outputs: type=docker,dest=/tmp/gdd-spark-${{ matrix.image.SPARK_VERSION }}.tar - name: Test image env: - IMAGE_NAME: ${{ steps.prep.outputs.image_name }} + IMAGE_NAME: "godatadriven/spark:${{ matrix.image.SPARK_VERSION }}" run: | + docker load --input /tmp/gdd-spark-${{ matrix.image.SPARK_VERSION }}.tar + docker image ls -a if [[ -f "docker-compose.test.yml" ]]; then - docker compose --file docker-compose.test.yml build + docker compose --file docker-compose.test.yml build --builder ${{ steps.setup-buildx.outputs.name }} docker compose --file docker-compose.test.yml run sut fi @@ -127,13 +129,13 @@ jobs: push: true provenance: false sbom: false - tags: ${{ steps.prep.outputs.tags }} + tags: ${{ steps.meta.outputs.tags }} build-args: | - ${{ steps.args.outputs.build_args }} - labels: | - org.opencontainers.image.source=${{ github.event.repository.clone_url }} - org.opencontainers.image.created=${{ steps.prep.outputs.created }} - org.opencontainers.image.revision=${{ github.sha }} + HADOOP_VERSION=${{ matrix.image.HADOOP_VERSION }} + OPENJDK_VERSION=${{ matrix.image.OPENJDK_VERSION }} + SPARK_VERSION=${{ matrix.image.SPARK_VERSION }} + labels: ${{ steps.meta.outputs.labels }} + annotations: ${{ steps.meta.outputs.annotations }} platforms: linux/amd64,linux/arm64 - name: Report Status From ae964897588105ce2c041c11a7d86df960bf4e30 Mon Sep 17 00:00:00 2001 From: Barend Garvelink <159024183+barend-xebia@users.noreply.github.com> Date: Thu, 28 Nov 2024 16:54:27 +0100 Subject: [PATCH 4/5] run tests without docker-compose --- .github/workflows/dockerhub_ci.yml | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/.github/workflows/dockerhub_ci.yml b/.github/workflows/dockerhub_ci.yml index a1faef8..a4edd3f 100644 --- a/.github/workflows/dockerhub_ci.yml +++ b/.github/workflows/dockerhub_ci.yml @@ -98,19 +98,18 @@ jobs: SPARK_VERSION=${{ matrix.image.SPARK_VERSION }} labels: ${{ steps.meta.outputs.labels }} annotations: ${{ steps.meta.outputs.annotations }} - # https://docs.docker.com/build/ci/github-actions/share-image-jobs/ - outputs: type=docker,dest=/tmp/gdd-spark-${{ matrix.image.SPARK_VERSION }}.tar - name: Test image - env: - IMAGE_NAME: "godatadriven/spark:${{ matrix.image.SPARK_VERSION }}" run: | - docker load --input /tmp/gdd-spark-${{ matrix.image.SPARK_VERSION }}.tar - docker image ls -a - if [[ -f "docker-compose.test.yml" ]]; then - docker compose --file docker-compose.test.yml build --builder ${{ steps.setup-buildx.outputs.name }} - docker compose --file docker-compose.test.yml run sut - fi + IMAGE_NAME="godatadriven/spark:${{ matrix.image.SPARK_VERSION }}" + TEST_IMAGE_NAME="spark-image-tests:${{ matrix.image.SPARK_VERSION }}" + docker build \ + --file tests/Dockerfile \ + --build-arg IMAGE_NAME="${IMAGE_NAME}" \ + --tag "${TEST_IMAGE_NAME}" \ + ./tests/ + + docker run --rm "${TEST_IMAGE_NAME}" - name: Login to DockerHub if: github.ref == 'refs/heads/master' From d9b65702ebc42318b2bc8da7b4c37f6e9e9286ca Mon Sep 17 00:00:00 2001 From: Barend Garvelink <159024183+barend-xebia@users.noreply.github.com> Date: Fri, 29 Nov 2024 10:36:42 +0100 Subject: [PATCH 5/5] tune the image annotations, rely more on defaults --- .github/workflows/dockerhub_ci.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/dockerhub_ci.yml b/.github/workflows/dockerhub_ci.yml index a4edd3f..bc58cb4 100644 --- a/.github/workflows/dockerhub_ci.yml +++ b/.github/workflows/dockerhub_ci.yml @@ -78,10 +78,9 @@ jobs: type=semver,pattern={{version}},value=${{ matrix.image.SPARK_VERSION }} type=semver,pattern={{major}}.{{minor}},value=${{ matrix.image.SPARK_VERSION }} labels: | - org.opencontainers.image.source=${{ github.event.repository.clone_url }} - org.opencontainers.image.revision=${{ github.sha }} org.opencontainers.image.vendor=Xebia Data, https://xebia.com/ - org.opencontainers.image.description=An Apache Spark Docker image. + annotations: | + org.opencontainers.image.vendor=Xebia Data, https://xebia.com/ - name: Build image id: docker-build