-
Notifications
You must be signed in to change notification settings - Fork 28.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into reorder_keys
- Loading branch information
Showing
542 changed files
with
4,786 additions
and
5,192 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,148 +9,231 @@ on: | |
- master | ||
|
||
jobs: | ||
# TODO(SPARK-32248): Recover JDK 11 builds | ||
# Build: build Spark and run the tests for specified modules. | ||
build: | ||
|
||
name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})" | ||
runs-on: ubuntu-latest | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
java: [ '1.8', '11' ] | ||
hadoop: [ 'hadoop-2.7', 'hadoop-3.2' ] | ||
hive: [ 'hive-1.2', 'hive-2.3' ] | ||
exclude: | ||
- java: '11' | ||
hive: 'hive-1.2' | ||
- hadoop: 'hadoop-3.2' | ||
hive: 'hive-1.2' | ||
name: Build Spark - JDK${{ matrix.java }}/${{ matrix.hadoop }}/${{ matrix.hive }} | ||
|
||
java: | ||
- 1.8 | ||
hadoop: | ||
- hadoop3.2 | ||
hive: | ||
- hive2.3 | ||
# TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now. | ||
# Kinesis tests depends on external Amazon kinesis service. | ||
# Note that the modules below are from sparktestsupport/modules.py. | ||
modules: | ||
- |- | ||
core, unsafe, kvstore, avro, | ||
network-common, network-shuffle, repl, launcher, | ||
examples, sketch, graphx | ||
- |- | ||
catalyst, hive-thriftserver | ||
- |- | ||
streaming, sql-kafka-0-10, streaming-kafka-0-10, | ||
mllib-local, mllib, | ||
yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl | ||
- |- | ||
pyspark-sql, pyspark-mllib, pyspark-resource | ||
- |- | ||
pyspark-core, pyspark-streaming, pyspark-ml | ||
- |- | ||
sparkr | ||
# Here, we split Hive and SQL tests into some of slow ones and the rest of them. | ||
included-tags: [""] | ||
excluded-tags: [""] | ||
comment: [""] | ||
include: | ||
# Hive tests | ||
- modules: hive | ||
java: 1.8 | ||
hadoop: hadoop3.2 | ||
hive: hive2.3 | ||
included-tags: org.apache.spark.tags.SlowHiveTest | ||
comment: "- slow tests" | ||
- modules: hive | ||
java: 1.8 | ||
hadoop: hadoop3.2 | ||
hive: hive2.3 | ||
excluded-tags: org.apache.spark.tags.SlowHiveTest | ||
comment: "- other tests" | ||
# SQL tests | ||
- modules: sql | ||
java: 1.8 | ||
hadoop: hadoop3.2 | ||
hive: hive2.3 | ||
included-tags: org.apache.spark.tags.ExtendedSQLTest | ||
comment: "- slow tests" | ||
- modules: sql | ||
java: 1.8 | ||
hadoop: hadoop3.2 | ||
hive: hive2.3 | ||
excluded-tags: org.apache.spark.tags.ExtendedSQLTest | ||
comment: "- other tests" | ||
env: | ||
MODULES_TO_TEST: ${{ matrix.modules }} | ||
EXCLUDED_TAGS: ${{ matrix.excluded-tags }} | ||
INCLUDED_TAGS: ${{ matrix.included-tags }} | ||
HADOOP_PROFILE: ${{ matrix.hadoop }} | ||
HIVE_PROFILE: ${{ matrix.hive }} | ||
# GitHub Actions' default miniconda to use in pip packaging test. | ||
CONDA_PREFIX: /usr/share/miniconda | ||
GITHUB_PREV_SHA: ${{ github.event.before }} | ||
steps: | ||
- uses: actions/checkout@master | ||
# We split caches because GitHub Action Cache has a 400MB-size limit. | ||
- uses: actions/cache@v1 | ||
- name: Checkout Spark repository | ||
uses: actions/checkout@v2 | ||
# In order to fetch changed files | ||
with: | ||
fetch-depth: 0 | ||
# Cache local repositories. Note that GitHub Actions cache has a 2G limit. | ||
- name: Cache Scala, SBT, Maven and Zinc | ||
uses: actions/cache@v1 | ||
with: | ||
path: build | ||
key: build-${{ hashFiles('**/pom.xml') }} | ||
restore-keys: | | ||
build- | ||
- uses: actions/cache@v1 | ||
- name: Cache Maven local repository | ||
uses: actions/cache@v2 | ||
with: | ||
path: ~/.m2/repository/com | ||
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-com-${{ hashFiles('**/pom.xml') }} | ||
restore-keys: | | ||
${{ matrix.java }}-${{ matrix.hadoop }}-maven-com- | ||
- uses: actions/cache@v1 | ||
with: | ||
path: ~/.m2/repository/org | ||
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-org-${{ hashFiles('**/pom.xml') }} | ||
restore-keys: | | ||
${{ matrix.java }}-${{ matrix.hadoop }}-maven-org- | ||
- uses: actions/cache@v1 | ||
with: | ||
path: ~/.m2/repository/net | ||
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-net-${{ hashFiles('**/pom.xml') }} | ||
path: ~/.m2/repository | ||
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }} | ||
restore-keys: | | ||
${{ matrix.java }}-${{ matrix.hadoop }}-maven-net- | ||
- uses: actions/cache@v1 | ||
${{ matrix.java }}-${{ matrix.hadoop }}-maven- | ||
- name: Cache Ivy local repository | ||
uses: actions/cache@v2 | ||
with: | ||
path: ~/.m2/repository/io | ||
key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-io-${{ hashFiles('**/pom.xml') }} | ||
path: ~/.ivy2/cache | ||
key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }} | ||
restore-keys: | | ||
${{ matrix.java }}-${{ matrix.hadoop }}-maven-io- | ||
- name: Set up JDK ${{ matrix.java }} | ||
${{ matrix.java }}-${{ matrix.hadoop }}-ivy- | ||
- name: Install JDK ${{ matrix.java }} | ||
uses: actions/setup-java@v1 | ||
with: | ||
java-version: ${{ matrix.java }} | ||
- name: Build with Maven | ||
run: | | ||
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" | ||
export MAVEN_CLI_OPTS="--no-transfer-progress" | ||
mkdir -p ~/.m2 | ||
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -P${{ matrix.hive }} -Phive-thriftserver -P${{ matrix.hadoop }} -Phadoop-cloud -Djava.version=${{ matrix.java }} install | ||
rm -rf ~/.m2/repository/org/apache/spark | ||
lint: | ||
runs-on: ubuntu-latest | ||
name: Linters (Java/Scala/Python), licenses, dependencies | ||
steps: | ||
- uses: actions/checkout@master | ||
- uses: actions/setup-java@v1 | ||
# PySpark | ||
- name: Install PyPy3 | ||
# Note that order of Python installations here matters because default python3 is | ||
# overridden by pypy3. | ||
uses: actions/setup-python@v2 | ||
if: contains(matrix.modules, 'pyspark') | ||
with: | ||
java-version: '11' | ||
- uses: actions/setup-python@v1 | ||
python-version: pypy3 | ||
architecture: x64 | ||
- name: Install Python 3.6 | ||
uses: actions/setup-python@v2 | ||
if: contains(matrix.modules, 'pyspark') | ||
with: | ||
python-version: '3.x' | ||
architecture: 'x64' | ||
- name: Scala | ||
run: ./dev/lint-scala | ||
- name: Java | ||
run: ./dev/lint-java | ||
- name: Python | ||
run: | | ||
pip install flake8 sphinx numpy | ||
./dev/lint-python | ||
- name: License | ||
run: ./dev/check-license | ||
- name: Dependencies | ||
run: ./dev/test-dependencies.sh | ||
|
||
lintr: | ||
runs-on: ubuntu-latest | ||
name: Linter (R) | ||
steps: | ||
- uses: actions/checkout@master | ||
- uses: actions/setup-java@v1 | ||
python-version: 3.6 | ||
architecture: x64 | ||
- name: Install Python 3.8 | ||
uses: actions/setup-python@v2 | ||
# We should install one Python that is higher then 3+ for SQL and Yarn because: | ||
# - SQL component also has Python related tests, for example, IntegratedUDFTestUtils. | ||
# - Yarn has a Python specific test too, for example, YarnClusterSuite. | ||
if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) | ||
with: | ||
java-version: '11' | ||
- uses: r-lib/actions/setup-r@v1 | ||
python-version: 3.8 | ||
architecture: x64 | ||
- name: Install Python packages (Python 3.6 and PyPy3) | ||
if: contains(matrix.modules, 'pyspark') | ||
# PyArrow is not supported in PyPy yet, see ARROW-2651. | ||
# TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason. | ||
run: | | ||
python3.6 -m pip install numpy pyarrow pandas scipy | ||
python3.6 -m pip list | ||
pypy3 -m pip install numpy pandas | ||
pypy3 -m pip list | ||
- name: Install Python packages (Python 3.8) | ||
if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) | ||
run: | | ||
python3.8 -m pip install numpy pyarrow pandas scipy | ||
python3.8 -m pip list | ||
# SparkR | ||
- name: Install R 3.6 | ||
uses: r-lib/actions/setup-r@v1 | ||
if: contains(matrix.modules, 'sparkr') | ||
with: | ||
r-version: '3.6.2' | ||
- name: Install lib | ||
r-version: 3.6 | ||
- name: Install R packages | ||
if: contains(matrix.modules, 'sparkr') | ||
run: | | ||
sudo apt-get install -y libcurl4-openssl-dev | ||
- name: install R packages | ||
sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')" | ||
# Show installed packages in R. | ||
sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]' | ||
# Run the tests. | ||
- name: "Run tests: ${{ matrix.modules }}" | ||
run: | | ||
sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" | ||
sudo Rscript -e "devtools::install_github('jimhester/[email protected]')" | ||
- name: package and install SparkR | ||
run: ./R/install-dev.sh | ||
- name: lint-r | ||
run: ./dev/lint-r | ||
# Hive tests become flaky when running in parallel as it's too intensive. | ||
if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi | ||
mkdir -p ~/.m2 | ||
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS" | ||
rm -rf ~/.m2/repository/org/apache/spark | ||
docs: | ||
# Static analysis, and documentation build | ||
lint: | ||
name: Linters, licenses, dependencies and documentation generation | ||
runs-on: ubuntu-latest | ||
name: Generate documents | ||
steps: | ||
- uses: actions/checkout@master | ||
- uses: actions/cache@v1 | ||
- name: Checkout Spark repository | ||
uses: actions/checkout@v2 | ||
- name: Cache Maven local repository | ||
uses: actions/cache@v2 | ||
with: | ||
path: ~/.m2/repository | ||
key: docs-maven-repo-${{ hashFiles('**/pom.xml') }} | ||
restore-keys: | | ||
docs-maven-repo- | ||
- uses: actions/setup-java@v1 | ||
docs-maven- | ||
- name: Install JDK 1.8 | ||
uses: actions/setup-java@v1 | ||
with: | ||
java-version: '1.8' | ||
- uses: actions/setup-python@v1 | ||
java-version: 1.8 | ||
- name: Install Python 3.6 | ||
uses: actions/setup-python@v2 | ||
with: | ||
python-version: '3.x' | ||
architecture: 'x64' | ||
- uses: actions/setup-ruby@v1 | ||
python-version: 3.6 | ||
architecture: x64 | ||
- name: Install Python linter dependencies | ||
run: | | ||
pip3 install flake8 sphinx numpy | ||
- name: Install R 3.6 | ||
uses: r-lib/actions/setup-r@v1 | ||
with: | ||
ruby-version: '2.7' | ||
- uses: r-lib/actions/setup-r@v1 | ||
r-version: 3.6 | ||
- name: Install R linter dependencies and SparkR | ||
run: | | ||
sudo apt-get install -y libcurl4-openssl-dev | ||
sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" | ||
sudo Rscript -e "devtools::install_github('jimhester/[email protected]')" | ||
./R/install-dev.sh | ||
- name: Install Ruby 2.7 for documentation generation | ||
uses: actions/setup-ruby@v1 | ||
with: | ||
r-version: '3.6.2' | ||
- name: Install lib and pandoc | ||
ruby-version: 2.7 | ||
- name: Install dependencies for documentation generation | ||
run: | | ||
sudo apt-get install -y libcurl4-openssl-dev pandoc | ||
- name: Install packages | ||
run: | | ||
pip install sphinx mkdocs numpy | ||
gem install jekyll jekyll-redirect-from rouge | ||
sudo Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" | ||
- name: Run jekyll build | ||
sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')" | ||
- name: Scala linter | ||
run: ./dev/lint-scala | ||
- name: Java linter | ||
run: ./dev/lint-java | ||
- name: Python linter | ||
run: ./dev/lint-python | ||
- name: R linter | ||
run: ./dev/lint-r | ||
- name: License test | ||
run: ./dev/check-license | ||
- name: Dependencies test | ||
run: ./dev/test-dependencies.sh | ||
- name: Run documentation build | ||
run: | | ||
cd docs | ||
jekyll build |
Oops, something went wrong.