diff --git a/.github/workflows/dockerhub_ci.yml b/.github/workflows/dockerhub_ci.yml index 2f87c26..bc58cb4 100644 --- a/.github/workflows/dockerhub_ci.yml +++ b/.github/workflows/dockerhub_ci.yml @@ -21,89 +21,98 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - # Specify which tags to build - DOCKER_TAG: ["3.4.0,3.4,latest", "3.3.2,3.3", "3.1.3,3.1", "3.0.3,3.0", "2.4.8,2.4"] + image: + - + SPARK_VERSION: "3.5.3" + OPENJDK_VERSION: "17" + HADOOP_VERSION: "3" + LATEST: "true" + - + SPARK_VERSION: "3.4.4" + OPENJDK_VERSION: "11" + HADOOP_VERSION: "3" + LATEST: "false" + - + SPARK_VERSION: "3.3.4" + OPENJDK_VERSION: "11" + HADOOP_VERSION: "3" + LATEST: "false" + - + SPARK_VERSION: "3.1.3" + OPENJDK_VERSION: "11" + HADOOP_VERSION: "3.2" + LATEST: "false" + - + SPARK_VERSION: "3.0.3" + OPENJDK_VERSION: "11" + HADOOP_VERSION: "3.2" + LATEST: "false" + - + SPARK_VERSION: "2.4.8" + OPENJDK_VERSION: "8" + HADOOP_VERSION: "2.7" + LATEST: "false" timeout-minutes: 30 steps: - name: Checkout code - uses: actions/checkout@v2 - - - name: Parse args - id: args - run: | - DOCKER_TAG="${{ matrix.DOCKER_TAG }}" - - # Manipulate DOCKER_TAG to create build args - SPARK_MAJOR_VERSION=${DOCKER_TAG:0:1} - SPARK_MAJOR_MINOR_VERSION=${DOCKER_TAG:0:3} - if [[ ${SPARK_MAJOR_MINOR_VERSION} = "3.3" || ${SPARK_MAJOR_MINOR_VERSION} = "3.4" || ${DOCKER_TAG} == "latest" ]]; then - OPENJDK_VERSION=11 - HADOOP_VERSION=3 - elif [ ${SPARK_MAJOR_VERSION} = "3" ]; then - OPENJDK_VERSION=11 - HADOOP_VERSION=3.2 - else - OPENJDK_VERSION=8 - HADOOP_VERSION=2.7 - fi - - BUILD_ARGS="OPENJDK_VERSION=${OPENJDK_VERSION} - HADOOP_VERSION=${HADOOP_VERSION} - SPARK_VERSION=${DOCKER_TAG%%,*}" - - # No modification needed beyond this point - BUILD_ARGS="${BUILD_ARGS//'%'/'%25'}" - BUILD_ARGS="${BUILD_ARGS//$'\n'/'%0A'}" - BUILD_ARGS="${BUILD_ARGS//$'\r'/'%0D'}" - - echo "::set-output name=build_args::$BUILD_ARGS" - - - name: Prepare - id: prep - run: | - DOCKER_TAG="${{ matrix.DOCKER_TAG }}" - DOCKER_IMAGE_NAME="$DOCKER_REPO:${DOCKER_TAG%%,*}" - echo ::set-output name=image_name::${DOCKER_IMAGE_NAME} - - TAGS="$DOCKER_REPO:${DOCKER_TAG//,/,$DOCKER_REPO:}" - echo ::set-output name=tags::${TAGS} - - echo ::set-output name=created::$(date -u +'%Y-%m-%dT%H:%M:%SZ') + uses: actions/checkout@v4 - name: Set up QEMU - uses: docker/setup-qemu-action@v2 + uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + id: setup-buildx + uses: docker/setup-buildx-action@v3 + + - name: Generate image metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: "${{ env.DOCKER_REPO }}" + flavor: | + latest=${{ matrix.image.LATEST }} + prefix= + suffix= + tags: | + type=semver,pattern={{version}},value=${{ matrix.image.SPARK_VERSION }} + type=semver,pattern={{major}}.{{minor}},value=${{ matrix.image.SPARK_VERSION }} + labels: | + org.opencontainers.image.vendor=Xebia Data, https://xebia.com/ + annotations: | + org.opencontainers.image.vendor=Xebia Data, https://xebia.com/ - name: Build image - uses: docker/build-push-action@v4 + id: docker-build + uses: docker/build-push-action@v6 with: context: . file: ./Dockerfile push: false load: true - tags: ${{ steps.prep.outputs.tags }} + tags: ${{ steps.meta.outputs.tags }} build-args: | - ${{ steps.args.outputs.build_args }} - labels: | - org.opencontainers.image.source=${{ github.event.repository.clone_url }} - org.opencontainers.image.created=${{ steps.prep.outputs.created }} - org.opencontainers.image.revision=${{ github.sha }} + HADOOP_VERSION=${{ matrix.image.HADOOP_VERSION }} + OPENJDK_VERSION=${{ matrix.image.OPENJDK_VERSION }} + SPARK_VERSION=${{ matrix.image.SPARK_VERSION }} + labels: ${{ steps.meta.outputs.labels }} + annotations: ${{ steps.meta.outputs.annotations }} - name: Test image - env: - IMAGE_NAME: ${{ steps.prep.outputs.image_name }} run: | - if [[ -f "docker-compose.test.yml" ]]; then - docker-compose --file docker-compose.test.yml build - docker-compose --file docker-compose.test.yml run sut - fi + IMAGE_NAME="godatadriven/spark:${{ matrix.image.SPARK_VERSION }}" + TEST_IMAGE_NAME="spark-image-tests:${{ matrix.image.SPARK_VERSION }}" + docker build \ + --file tests/Dockerfile \ + --build-arg IMAGE_NAME="${IMAGE_NAME}" \ + --tag "${TEST_IMAGE_NAME}" \ + ./tests/ + + docker run --rm "${TEST_IMAGE_NAME}" - name: Login to DockerHub if: github.ref == 'refs/heads/master' - uses: docker/login-action@v1 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB }} @@ -111,25 +120,25 @@ jobs: - name: Push image # Build and push because of multi platform build if: github.ref == 'refs/heads/master' - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v6 with: context: . file: ./Dockerfile push: true provenance: false sbom: false - tags: ${{ steps.prep.outputs.tags }} + tags: ${{ steps.meta.outputs.tags }} build-args: | - ${{ steps.args.outputs.build_args }} - labels: | - org.opencontainers.image.source=${{ github.event.repository.clone_url }} - org.opencontainers.image.created=${{ steps.prep.outputs.created }} - org.opencontainers.image.revision=${{ github.sha }} + HADOOP_VERSION=${{ matrix.image.HADOOP_VERSION }} + OPENJDK_VERSION=${{ matrix.image.OPENJDK_VERSION }} + SPARK_VERSION=${{ matrix.image.SPARK_VERSION }} + labels: ${{ steps.meta.outputs.labels }} + annotations: ${{ steps.meta.outputs.annotations }} platforms: linux/amd64,linux/arm64 - name: Report Status if: always() && github.ref == 'refs/heads/master' - uses: ravsamhq/notify-slack-action@master + uses: ravsamhq/notify-slack-action@v2 with: status: ${{ job.status }} notify_when: 'failure' diff --git a/Dockerfile b/Dockerfile index 7cb37b0..437611e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,11 +9,11 @@ LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \ org.label-schema.build-date=$BUILD_DATE \ org.label-schema.version=$SPARK_VERSION -ENV SPARK_HOME /usr/spark +ENV SPARK_HOME=/usr/spark ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}" RUN apt-get update && \ - apt-get install -y wget netcat procps libpostgresql-jdbc-java && \ + apt-get install -y wget netcat-openbsd procps libpostgresql-jdbc-java && \ wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \ rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \