Skip to content

[Spark][Version Checksum] Incrementally compute VersionChecksum setTr… #6809

[Spark][Version Checksum] Incrementally compute VersionChecksum setTr…

[Spark][Version Checksum] Incrementally compute VersionChecksum setTr… #6809

Workflow file for this run

name: "Delta Spark Latest"
on: [push, pull_request]
jobs:
test:
name: "DSL: Scala ${{ matrix.scala }}, Shard ${{ matrix.shard }}"
runs-on: ubuntu-20.04
strategy:
matrix:
# These Scala versions must match those in the build.sbt
scala: [2.12.18, 2.13.13]
# Important: This list of shards must be [0..NUM_SHARDS - 1]
shard: [0, 1, 2, 3]
env:
SCALA_VERSION: ${{ matrix.scala }}
# Important: This must be the same as the length of shards in matrix
NUM_SHARDS: 4
steps:
- uses: actions/checkout@v3
- uses: technote-space/get-diff-action@v4
id: git-diff
with:
PATTERNS: |
**
.github/workflows/**
!kernel/**
!connectors/**
- name: install java
uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: "8"
- name: Cache Scala, SBT
uses: actions/cache@v3
with:
path: |
~/.sbt
~/.ivy2
~/.cache/coursier
# Change the key if dependencies are changed. For each key, GitHub Actions will cache the
# the above directories when we use the key for the first time. After that, each run will
# just use the cache. The cache is immutable so we need to use a new key when trying to
# cache new stuff.
key: delta-sbt-cache-spark3.2-scala${{ matrix.scala }}
- name: Install Job dependencies
run: |
sudo apt-get update
sudo apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libffi-dev liblzma-dev python-openssl git
sudo apt install libedit-dev
curl -LO https://github.com/bufbuild/buf/releases/download/v1.28.1/buf-Linux-x86_64.tar.gz
mkdir -p ~/buf
tar -xvzf buf-Linux-x86_64.tar.gz -C ~/buf --strip-components 1
rm buf-Linux-x86_64.tar.gz
sudo apt install python3-pip --fix-missing
sudo pip3 install pipenv==2021.5.29
curl https://pyenv.run | bash
export PATH="~/.pyenv/bin:$PATH"
eval "$(pyenv init -)"
eval "$(pyenv virtualenv-init -)"
pyenv install 3.8.18
pyenv global system 3.8.18
pipenv --python 3.8 install
# Update the pip version to 24.0. By default `pyenv.run` installs the latest pip version
# available. From version 24.1, `pip` doesn't allow installing python packages
# with version string containing `-`. In Delta-Spark case, the pypi package generated has
# `-SNAPSHOT` in version (e.g. `3.3.0-SNAPSHOT`) as the version is picked up from
# the`version.sbt` file.
pipenv run pip install pip==24.0 setuptools==69.5.1 wheel==0.43.0
pipenv run pip install pyspark==3.5.3
pipenv run pip install flake8==3.5.0 pypandoc==1.3.3
pipenv run pip install black==23.9.1
pipenv run pip install importlib_metadata==3.10.0
pipenv run pip install mypy==0.982
pipenv run pip install mypy-protobuf==3.3.0
pipenv run pip install cryptography==37.0.4
pipenv run pip install twine==4.0.1
pipenv run pip install wheel==0.33.4
pipenv run pip install setuptools==41.1.0
pipenv run pip install pydocstyle==3.0.0
pipenv run pip install pandas==1.1.3
pipenv run pip install pyarrow==8.0.0
pipenv run pip install numpy==1.20.3
if: steps.git-diff.outputs.diff
- name: Scala structured logging style check
run: |
if [ -f ./dev/spark_structured_logging_style.py ]; then
python3 ./dev/spark_structured_logging_style.py
fi
if: steps.git-diff.outputs.diff
- name: Run Scala/Java and Python tests
# when changing TEST_PARALLELISM_COUNT make sure to also change it in spark_master_test.yaml
run: |
TEST_PARALLELISM_COUNT=4 pipenv run python run-tests.py --group spark --shard ${{ matrix.shard }}
if: steps.git-diff.outputs.diff