Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add Spark 3.5 on JDK 17 #16

Merged
merged 5 commits into from
Nov 29, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 79 additions & 69 deletions .github/workflows/dockerhub_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,115 +21,125 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
# Specify which tags to build
DOCKER_TAG: ["3.4.0,3.4,latest", "3.3.2,3.3", "3.1.3,3.1", "3.0.3,3.0", "2.4.8,2.4"]
image:
-
SPARK_VERSION: "3.5.3"
OPENJDK_VERSION: "17"
HADOOP_VERSION: "3"
LATEST: "true"
-
SPARK_VERSION: "3.4.4"
OPENJDK_VERSION: "11"
HADOOP_VERSION: "3"
LATEST: "false"
-
SPARK_VERSION: "3.3.4"
OPENJDK_VERSION: "11"
HADOOP_VERSION: "3"
LATEST: "false"
-
SPARK_VERSION: "3.1.3"
OPENJDK_VERSION: "11"
HADOOP_VERSION: "3.2"
LATEST: "false"
-
SPARK_VERSION: "3.0.3"
OPENJDK_VERSION: "11"
HADOOP_VERSION: "3.2"
LATEST: "false"
-
SPARK_VERSION: "2.4.8"
OPENJDK_VERSION: "8"
HADOOP_VERSION: "2.7"
LATEST: "false"

timeout-minutes: 30
steps:
- name: Checkout code
uses: actions/checkout@v2

- name: Parse args
id: args
run: |
DOCKER_TAG="${{ matrix.DOCKER_TAG }}"

# Manipulate DOCKER_TAG to create build args
SPARK_MAJOR_VERSION=${DOCKER_TAG:0:1}
SPARK_MAJOR_MINOR_VERSION=${DOCKER_TAG:0:3}
if [[ ${SPARK_MAJOR_MINOR_VERSION} = "3.3" || ${SPARK_MAJOR_MINOR_VERSION} = "3.4" || ${DOCKER_TAG} == "latest" ]]; then
OPENJDK_VERSION=11
HADOOP_VERSION=3
elif [ ${SPARK_MAJOR_VERSION} = "3" ]; then
OPENJDK_VERSION=11
HADOOP_VERSION=3.2
else
OPENJDK_VERSION=8
HADOOP_VERSION=2.7
fi

BUILD_ARGS="OPENJDK_VERSION=${OPENJDK_VERSION}
HADOOP_VERSION=${HADOOP_VERSION}
SPARK_VERSION=${DOCKER_TAG%%,*}"

# No modification needed beyond this point
BUILD_ARGS="${BUILD_ARGS//'%'/'%25'}"
BUILD_ARGS="${BUILD_ARGS//$'\n'/'%0A'}"
BUILD_ARGS="${BUILD_ARGS//$'\r'/'%0D'}"

echo "::set-output name=build_args::$BUILD_ARGS"

- name: Prepare
id: prep
run: |
DOCKER_TAG="${{ matrix.DOCKER_TAG }}"
DOCKER_IMAGE_NAME="$DOCKER_REPO:${DOCKER_TAG%%,*}"
echo ::set-output name=image_name::${DOCKER_IMAGE_NAME}

TAGS="$DOCKER_REPO:${DOCKER_TAG//,/,$DOCKER_REPO:}"
echo ::set-output name=tags::${TAGS}

echo ::set-output name=created::$(date -u +'%Y-%m-%dT%H:%M:%SZ')
uses: actions/checkout@v4

- name: Set up QEMU
uses: docker/setup-qemu-action@v2
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
id: setup-buildx
uses: docker/setup-buildx-action@v3

- name: Generate image metadata
id: meta
uses: docker/metadata-action@v5
with:
images: "${{ env.DOCKER_REPO }}"
flavor: |
latest=${{ matrix.image.LATEST }}
prefix=
suffix=
tags: |
type=semver,pattern={{version}},value=${{ matrix.image.SPARK_VERSION }}
type=semver,pattern={{major}}.{{minor}},value=${{ matrix.image.SPARK_VERSION }}
labels: |
org.opencontainers.image.source=${{ github.event.repository.clone_url }}
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.vendor=Xebia Data, https://xebia.com/
org.opencontainers.image.description=An Apache Spark Docker image.

- name: Build image
uses: docker/build-push-action@v4
id: docker-build
uses: docker/build-push-action@v6
with:
context: .
file: ./Dockerfile
push: false
load: true
tags: ${{ steps.prep.outputs.tags }}
tags: ${{ steps.meta.outputs.tags }}
build-args: |
${{ steps.args.outputs.build_args }}
labels: |
org.opencontainers.image.source=${{ github.event.repository.clone_url }}
org.opencontainers.image.created=${{ steps.prep.outputs.created }}
org.opencontainers.image.revision=${{ github.sha }}
HADOOP_VERSION=${{ matrix.image.HADOOP_VERSION }}
OPENJDK_VERSION=${{ matrix.image.OPENJDK_VERSION }}
SPARK_VERSION=${{ matrix.image.SPARK_VERSION }}
labels: ${{ steps.meta.outputs.labels }}
annotations: ${{ steps.meta.outputs.annotations }}
barend-xebia marked this conversation as resolved.
Show resolved Hide resolved

- name: Test image
env:
IMAGE_NAME: ${{ steps.prep.outputs.image_name }}
run: |
if [[ -f "docker-compose.test.yml" ]]; then
docker-compose --file docker-compose.test.yml build
docker-compose --file docker-compose.test.yml run sut
fi
IMAGE_NAME="godatadriven/spark:${{ matrix.image.SPARK_VERSION }}"
TEST_IMAGE_NAME="spark-image-tests:${{ matrix.image.SPARK_VERSION }}"
docker build \
--file tests/Dockerfile \
--build-arg IMAGE_NAME="${IMAGE_NAME}" \
--tag "${TEST_IMAGE_NAME}" \
./tests/

docker run --rm "${TEST_IMAGE_NAME}"

- name: Login to DockerHub
if: github.ref == 'refs/heads/master'
uses: docker/login-action@v1
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB }}

- name: Push image
# Build and push because of multi platform build
if: github.ref == 'refs/heads/master'
uses: docker/build-push-action@v4
uses: docker/build-push-action@v6
with:
context: .
file: ./Dockerfile
push: true
provenance: false
sbom: false
tags: ${{ steps.prep.outputs.tags }}
tags: ${{ steps.meta.outputs.tags }}
build-args: |
${{ steps.args.outputs.build_args }}
labels: |
org.opencontainers.image.source=${{ github.event.repository.clone_url }}
org.opencontainers.image.created=${{ steps.prep.outputs.created }}
org.opencontainers.image.revision=${{ github.sha }}
HADOOP_VERSION=${{ matrix.image.HADOOP_VERSION }}
OPENJDK_VERSION=${{ matrix.image.OPENJDK_VERSION }}
SPARK_VERSION=${{ matrix.image.SPARK_VERSION }}
labels: ${{ steps.meta.outputs.labels }}
annotations: ${{ steps.meta.outputs.annotations }}
platforms: linux/amd64,linux/arm64

- name: Report Status
if: always() && github.ref == 'refs/heads/master'
uses: ravsamhq/notify-slack-action@master
uses: ravsamhq/notify-slack-action@v2
with:
status: ${{ job.status }}
notify_when: 'failure'
Expand Down
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ LABEL org.label-schema.name="Apache Spark ${SPARK_VERSION}" \
org.label-schema.build-date=$BUILD_DATE \
org.label-schema.version=$SPARK_VERSION

ENV SPARK_HOME /usr/spark
ENV SPARK_HOME=/usr/spark
ENV PATH="/usr/spark/bin:/usr/spark/sbin:${PATH}"

RUN apt-get update && \
apt-get install -y wget netcat procps libpostgresql-jdbc-java && \
apt-get install -y wget netcat-openbsd procps libpostgresql-jdbc-java && \
wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
tar xzf "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
rm "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" && \
Expand Down