Skip to content

Commit

Permalink
feat(build): add spark 3.4.1 (#59)
Browse files Browse the repository at this point in the history
* add combinations for 3.4.1

* quotes

* spaces

* apply vars

* fix issues with python versions and libreadline

* define path for poetry

* drop python 3.7 runtime

* edits
  • Loading branch information
Fan Ting Wei authored Oct 10, 2023
1 parent 55e86f0 commit 46b6659
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 90 deletions.
160 changes: 81 additions & 79 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,246 +15,246 @@ jobs:
strategy:
matrix:
version:
- spark: "3.1.3"
hadoop: "2.7.4"
scala: "2.12"
java: "8"
python: "3.7"
- spark: "3.1.3"
hadoop: "2.7.4"
scala: "2.12"
java: "8"
python: "3.8"
image: "-buster"
- spark: "3.1.3"
hadoop: "2.7.4"
scala: "2.12"
java: "8"
python: "3.9"
- spark: "3.1.3"
hadoop: "3.2.0"
scala: "2.12"
java: "8"
python: "3.7"
image: "-buster"
- spark: "3.1.3"
hadoop: "3.2.0"
scala: "2.12"
java: "8"
python: "3.8"
image: "-buster"
- spark: "3.1.3"
hadoop: "3.2.0"
scala: "2.12"
java: "8"
python: "3.9"
- spark: "3.1.3"
hadoop: "2.7.4"
scala: "2.12"
java: "11"
python: "3.7"
image: "-buster"
- spark: "3.1.3"
hadoop: "2.7.4"
scala: "2.12"
java: "11"
python: "3.8"
image: "-buster"
- spark: "3.1.3"
hadoop: "2.7.4"
scala: "2.12"
java: "11"
python: "3.9"
- spark: "3.1.3"
hadoop: "3.2.0"
scala: "2.12"
java: "11"
python: "3.7"
image: "-buster"
- spark: "3.1.3"
hadoop: "3.2.0"
scala: "2.12"
java: "11"
python: "3.8"
image: "-buster"
- spark: "3.1.3"
hadoop: "3.2.0"
scala: "2.12"
java: "11"
python: "3.9"
- spark: "3.2.2"
hadoop: "3.3.1"
scala: "2.12"
java: "8"
python: "3.7"
image: "-buster"
- spark: "3.2.2"
hadoop: "3.3.1"
scala: "2.12"
java: "8"
python: "3.8"
image: "-buster"
- spark: "3.2.2"
hadoop: "3.3.1"
scala: "2.12"
java: "8"
python: "3.9"
- spark: "3.2.2"
hadoop: "3.3.1"
scala: "2.13"
java: "8"
python: "3.7"
image: "-buster"
- spark: "3.2.2"
hadoop: "3.3.1"
scala: "2.13"
java: "8"
python: "3.8"
image: "-buster"
- spark: "3.2.2"
hadoop: "3.3.1"
scala: "2.13"
java: "8"
python: "3.9"
- spark: "3.2.2"
hadoop: "3.3.1"
scala: "2.12"
java: "11"
python: "3.7"
image: "-buster"
- spark: "3.2.2"
hadoop: "3.3.1"
scala: "2.12"
java: "11"
python: "3.8"
image: "-buster"
- spark: "3.2.2"
hadoop: "3.3.1"
scala: "2.12"
java: "11"
python: "3.9"
- spark: "3.2.2"
hadoop: "3.3.1"
scala: "2.13"
java: "11"
python: "3.7"
image: "-buster"
- spark: "3.2.2"
hadoop: "3.3.1"
scala: "2.13"
java: "11"
python: "3.8"
image: "-buster"
- spark: "3.2.2"
hadoop: "3.3.1"
scala: "2.13"
java: "11"
python: "3.9"
- spark: "3.3.0"
hadoop: "3.3.2"
scala: "2.12"
java: "8"
python: "3.7"
image: "-buster"
- spark: "3.3.0"
hadoop: "3.3.2"
scala: "2.12"
java: "8"
python: "3.8"
image: "-buster"
- spark: "3.3.0"
hadoop: "3.3.2"
scala: "2.12"
java: "8"
python: "3.9"
- spark: "3.3.0"
hadoop: "3.3.2"
scala: "2.13"
java: "8"
python: "3.7"
image: "-buster"
- spark: "3.3.0"
hadoop: "3.3.2"
scala: "2.13"
java: "8"
python: "3.8"
image: "-buster"
- spark: "3.3.0"
hadoop: "3.3.2"
scala: "2.13"
java: "8"
python: "3.9"
- spark: "3.3.0"
hadoop: "3.3.2"
scala: "2.12"
java: "11"
python: "3.7"
image: "-buster"
- spark: "3.3.0"
hadoop: "3.3.2"
scala: "2.12"
java: "11"
python: "3.8"
image: "-buster"
- spark: "3.3.0"
hadoop: "3.3.2"
scala: "2.12"
java: "11"
python: "3.9"
- spark: "3.3.0"
hadoop: "3.3.2"
scala: "2.13"
java: "11"
python: "3.7"
image: "-buster"
- spark: "3.3.0"
hadoop: "3.3.2"
scala: "2.13"
java: "11"
python: "3.8"
image: "-buster"
- spark: "3.3.0"
hadoop: "3.3.2"
scala: "2.13"
java: "11"
python: "3.9"
- spark: "3.3.1"
hadoop: "3.3.2"
scala: "2.12"
java: "8"
python: "3.7"
image: "-buster"
- spark: "3.3.1"
hadoop: "3.3.2"
scala: "2.12"
java: "8"
python: "3.8"
image: "-buster"
- spark: "3.3.1"
hadoop: "3.3.2"
scala: "2.12"
java: "8"
python: "3.9"
- spark: "3.3.1"
hadoop: "3.3.2"
scala: "2.13"
java: "8"
python: "3.7"
image: "-buster"
- spark: "3.3.1"
hadoop: "3.3.2"
scala: "2.13"
java: "8"
python: "3.8"
image: "-buster"
- spark: "3.3.1"
hadoop: "3.3.2"
scala: "2.13"
java: "8"
python: "3.9"
- spark: "3.3.1"
hadoop: "3.3.2"
scala: "2.12"
java: "11"
python: "3.7"
image: "-buster"
- spark: "3.3.1"
hadoop: "3.3.2"
scala: "2.12"
java: "11"
python: "3.8"
image: "-buster"
- spark: "3.3.1"
hadoop: "3.3.2"
scala: "2.12"
java: "11"
python: "3.9"
image: "-buster"
- spark: "3.3.1"
hadoop: "3.3.2"
scala: "2.13"
java: "11"
python: "3.7"
python: "3.8"
image: "-buster"
- spark: "3.3.1"
hadoop: "3.3.2"
scala: "2.13"
java: "11"
python: "3.9"
image: "-buster"
- spark: "3.4.1"
hadoop: "3.3.4"
scala: "2.12"
java: "8"
python: "3.8"
image: ""
- spark: "3.4.1"
hadoop: "3.3.4"
scala: "2.12"
java: "8"
python: "3.9"
image: ""
- spark: "3.4.1"
hadoop: "3.3.4"
scala: "2.13"
java: "8"
python: "3.8"
image: ""
- spark: "3.4.1"
hadoop: "3.3.4"
scala: "2.13"
java: "8"
python: "3.9"
image: ""
- spark: "3.4.1"
hadoop: "3.3.4"
scala: "2.12"
java: "11"
python: "3.8"
- spark: "3.3.1"
hadoop: "3.3.2"
image: ""
- spark: "3.4.1"
hadoop: "3.3.4"
scala: "2.12"
java: "11"
python: "3.9"
image: ""
- spark: "3.4.1"
hadoop: "3.3.4"
scala: "2.13"
java: "11"
python: "3.8"
image: ""
- spark: "3.4.1"
hadoop: "3.3.4"
scala: "2.13"
java: "11"
python: "3.9"
image: ""
runs-on: ubuntu-20.04
env:
IMAGE_NAME: spark-k8s-addons
Expand All @@ -265,6 +265,7 @@ jobs:
SCALA_VERSION: "${{ matrix.version.scala }}"
JAVA_VERSION: "${{ matrix.version.java }}"
PYTHON_VERSION: "${{ matrix.version.python }}"
IMAGE_VERSION: "${{ matrix.version.image }}"
steps:
- name: Set up Java
uses: actions/setup-java@v2
Expand All @@ -291,7 +292,8 @@ jobs:
--build-arg JAVA_VERSION="${JAVA_VERSION}" \
--build-arg HADOOP_VERSION="${HADOOP_VERSION}" \
--build-arg SCALA_VERSION="${SCALA_VERSION}" \
--build-arg PYTHON_VERSION="${PYTHON_VERSION}"
--build-arg PYTHON_VERSION="${PYTHON_VERSION}" \
--build-arg IMAGE_VERSION="${IMAGE_VERSION}"
- name: Push Docker image
run: bash push-images.sh
env:
Expand Down
12 changes: 9 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ ARG HADOOP_VERSION
ARG SCALA_VERSION
ARG JAVA_VERSION
ARG PYTHON_VERSION
ARG DEBIAN_DIST=buster
ARG IMAGE_VERSION

# For copying over of Python set-up
FROM python:${PYTHON_VERSION}-${DEBIAN_DIST} as python_base
FROM python:${PYTHON_VERSION}${IMAGE_VERSION} as python_base

# While it might make sense to start from `dsaidgovsg/spark-k8s-py` instead,
# it is easier to just COPY over from the above image just the python directory
Expand All @@ -26,6 +26,7 @@ ENV PYTHONPATH="${SPARK_HOME}/python/lib/pyspark.zip:${SPARK_HOME}/python/lib/py

ARG HADOOP_VERSION
ARG PYTHON_VERSION
ARG IMAGE_VERSION

USER root
SHELL ["/bin/bash", "-c"]
Expand All @@ -39,7 +40,12 @@ RUN set -euo pipefail && \
spark-shell --version; \
pyspark --version; \
# Required extra deps
apt-get update && apt-get install --no-install-recommends -y libexpat1 libreadline7 tk; \
if [ "${IMAGE_VERSION}" = "-buster" ]; then \
export LIBREADLINE_VERSION=7 ; \
else \
export LIBREADLINE_VERSION=8 ; \
fi ; \
apt-get update && apt-get install --no-install-recommends -y libexpat1 libreadline"${LIBREADLINE_VERSION}" tk; \
rm -rf /var/lib/apt/lists/*; \
ldconfig; \
# Test every command to return non-error status code for help
Expand Down
Loading

0 comments on commit 46b6659

Please sign in to comment.