forked from apache/gravitino
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[apache#3554] feat(spark-connector): support spark multi Version (apa…
…che#3415) ### What changes were proposed in this pull request? 1. split spark connector to spark common which contains common logic and v3.x which contains adaptor logic 2. add separate GitHub action to do spark IT 3. ./gradlew :spark-connector:spark35-runtime:build to build corresponding spark connector jars ### Why are the changes needed? Fix: apache#3554 ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests with corresponding spark version
- Loading branch information
Showing
82 changed files
with
1,495 additions
and
253 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
name: Spark Integration Test | ||
|
||
# Controls when the workflow will run | ||
on: | ||
# Triggers the workflow on push or pull request events but only for the "main" branch | ||
push: | ||
branches: [ "main", "branch-*" ] | ||
pull_request: | ||
branches: [ "main", "branch-*" ] | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | ||
cancel-in-progress: true | ||
|
||
jobs: | ||
changes: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v3 | ||
- uses: dorny/paths-filter@v2 | ||
id: filter | ||
with: | ||
filters: | | ||
source_changes: | ||
- .github/** | ||
- api/** | ||
- bin/** | ||
- catalogs/** | ||
- clients/client-java/** | ||
- clients/client-java-runtime/** | ||
- clients/filesystem-hadoop3/** | ||
- clients/filesystem-hadoop3-runtime/** | ||
- common/** | ||
- conf/** | ||
- core/** | ||
- dev/** | ||
- gradle/** | ||
- meta/** | ||
- server/** | ||
- server-common/** | ||
- spark-connector/** | ||
- docs/open-api/** | ||
- build.gradle.kts | ||
- gradle.properties | ||
- gradlew | ||
- setting.gradle.kts | ||
outputs: | ||
source_changes: ${{ steps.filter.outputs.source_changes }} | ||
|
||
# Integration test for AMD64 architecture | ||
test-amd64-arch: | ||
needs: changes | ||
if: needs.changes.outputs.source_changes == 'true' | ||
runs-on: ubuntu-latest | ||
timeout-minutes: 90 | ||
strategy: | ||
matrix: | ||
architecture: [linux/amd64] | ||
java-version: [ 8, 11, 17 ] | ||
test-mode: [ embedded, deploy ] | ||
env: | ||
PLATFORM: ${{ matrix.architecture }} | ||
steps: | ||
- uses: actions/checkout@v3 | ||
|
||
- uses: actions/setup-java@v3 | ||
with: | ||
java-version: ${{ matrix.java-version }} | ||
distribution: 'temurin' | ||
|
||
- name: Set up QEMU | ||
uses: docker/setup-qemu-action@v2 | ||
|
||
- name: Check required command | ||
run: | | ||
dev/ci/check_commands.sh | ||
- name: Package Gravitino | ||
if : ${{ matrix.test-mode == 'deploy' }} | ||
run: | | ||
./gradlew compileDistribution -x test -PjdkVersion=${{ matrix.java-version }} | ||
- name: Setup debug Github Action | ||
if: ${{ contains(github.event.pull_request.labels.*.name, 'debug action') }} | ||
uses: csexton/debugger-action@master | ||
|
||
- name: Free up disk space | ||
run: | | ||
dev/ci/util_free_space.sh | ||
- name: Spark Integration Test | ||
id: integrationTest | ||
run: | | ||
./gradlew --rerun-tasks -PskipTests -PtestMode=${{ matrix.test-mode }} -PjdkVersion=${{ matrix.java-version }} :spark-connector:spark-3.3:test --tests "com.datastrato.gravitino.spark.connector.integration.test.**" | ||
./gradlew --rerun-tasks -PskipTests -PtestMode=${{ matrix.test-mode }} -PjdkVersion=${{ matrix.java-version }} :spark-connector:spark-3.4:test --tests "com.datastrato.gravitino.spark.connector.integration.test.**" | ||
./gradlew --rerun-tasks -PskipTests -PtestMode=${{ matrix.test-mode }} -PjdkVersion=${{ matrix.java-version }} :spark-connector:spark-3.5:test --tests "com.datastrato.gravitino.spark.connector.integration.test.**" | ||
- name: Upload integrate tests reports | ||
uses: actions/upload-artifact@v3 | ||
if: ${{ (failure() && steps.integrationTest.outcome == 'failure') || contains(github.event.pull_request.labels.*.name, 'upload log') }} | ||
with: | ||
name: spark-connector-integrate-test-reports-${{ matrix.java-version }}-${{ matrix.test-mode }} | ||
path: | | ||
build/reports | ||
spark-connector/v3.3/spark/build/spark-3.3-integration-test.log | ||
spark-connector/v3.4/spark/build/spark-3.4-integration-test.log | ||
spark-connector/v3.5/spark/build/spark-3.5-integration-test.log | ||
distribution/package/logs/gravitino-server.out | ||
distribution/package/logs/gravitino-server.log |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
/* | ||
* Copyright 2024 Datastrato Pvt Ltd. | ||
* This software is licensed under the Apache License version 2. | ||
*/ | ||
plugins { | ||
`maven-publish` | ||
id("java") | ||
id("idea") | ||
alias(libs.plugins.shadow) | ||
} | ||
|
||
repositories { | ||
mavenCentral() | ||
} | ||
|
||
val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extra["defaultScalaVersion"].toString() | ||
val sparkVersion: String = libs.versions.spark33.get() | ||
val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") | ||
val icebergVersion: String = libs.versions.iceberg4spark.get() | ||
val kyuubiVersion: String = libs.versions.kyuubi4spark33.get() | ||
val scalaJava8CompatVersion: String = libs.versions.scala.java.compat.get() | ||
val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get() | ||
|
||
dependencies { | ||
implementation(project(":catalogs:bundled-catalog", configuration = "shadow")) | ||
implementation(libs.guava) | ||
|
||
compileOnly(project(":clients:client-java-runtime", configuration = "shadow")) | ||
compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") | ||
compileOnly("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") | ||
|
||
compileOnly("org.apache.spark:spark-catalyst_$scalaVersion:$sparkVersion") | ||
compileOnly("org.apache.spark:spark-core_$scalaVersion:$sparkVersion") | ||
compileOnly("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") | ||
compileOnly("org.scala-lang.modules:scala-java8-compat_$scalaVersion:$scalaJava8CompatVersion") | ||
|
||
annotationProcessor(libs.lombok) | ||
compileOnly(libs.lombok) | ||
|
||
testAnnotationProcessor(libs.lombok) | ||
testCompileOnly(libs.lombok) | ||
|
||
// use log from spark, spark3.3 use low version of log4j, to avoid java.lang.NoSuchMethodError: org.apache.logging.slf4j.Log4jLoggerFactory: method <init>()V not found | ||
testImplementation(project(":api")) { | ||
exclude("org.apache.logging.log4j") | ||
} | ||
testImplementation(project(":clients:client-java")) { | ||
exclude("org.apache.logging.log4j") | ||
} | ||
testImplementation(project(":core")) { | ||
exclude("org.apache.logging.log4j") | ||
} | ||
testImplementation(project(":common")) { | ||
exclude("org.apache.logging.log4j") | ||
} | ||
testImplementation(project(":server")) { | ||
exclude("org.apache.logging.log4j") | ||
} | ||
testImplementation(project(":server-common")) { | ||
exclude("org.apache.logging.log4j") | ||
} | ||
testImplementation(project(":integration-test-common", "testArtifacts")) | ||
|
||
testImplementation(libs.hive2.common) { | ||
exclude("org.apache.curator") | ||
// use hadoop from Spark | ||
exclude("org.apache.hadoop") | ||
exclude("org.apache.logging.log4j") | ||
exclude("org.eclipse.jetty.aggregate", "jetty-all") | ||
exclude("org.eclipse.jetty.orbit", "javax.servlet") | ||
} | ||
testImplementation(libs.hive2.metastore) { | ||
exclude("co.cask.tephra") | ||
exclude("com.github.joshelser") | ||
exclude("com.google.code.findbugs", "jsr305") | ||
exclude("com.google.code.findbugs", "sr305") | ||
exclude("com.tdunning", "json") | ||
exclude("com.zaxxer", "HikariCP") | ||
exclude("io.dropwizard.metricss") | ||
exclude("javax.transaction", "transaction-api") | ||
exclude("org.apache.avro") | ||
exclude("org.apache.curator") | ||
exclude("org.apache.hbase") | ||
exclude("org.apache.hadoop") | ||
exclude("org.apache.hive", "hive-common") | ||
exclude("org.apache.hive", "hive-shims") | ||
exclude("org.apache.logging.log4j") | ||
exclude("org.apache.parquet", "parquet-hadoop-bundle") | ||
exclude("org.apache.zookeeper") | ||
exclude("org.eclipse.jetty.aggregate", "jetty-all") | ||
exclude("org.eclipse.jetty.orbit", "javax.servlet") | ||
exclude("org.slf4j") | ||
} | ||
testImplementation(libs.junit.jupiter.api) | ||
testImplementation(libs.junit.jupiter.params) | ||
testImplementation(libs.mysql.driver) | ||
testImplementation(libs.testcontainers) | ||
|
||
testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") | ||
testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") | ||
testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") | ||
testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") | ||
// include spark-sql,spark-catalyst,hive-common,hdfs-client | ||
testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { | ||
// conflict with Gravitino server jersey | ||
exclude("org.glassfish.jersey.core") | ||
exclude("org.glassfish.jersey.containers") | ||
exclude("org.glassfish.jersey.inject") | ||
} | ||
testImplementation("org.scala-lang.modules:scala-collection-compat_$scalaVersion:$scalaCollectionCompatVersion") | ||
|
||
testRuntimeOnly(libs.junit.jupiter.engine) | ||
} | ||
|
||
tasks.test { | ||
val skipUTs = project.hasProperty("skipTests") | ||
if (skipUTs) { | ||
// Only run integration tests | ||
include("**/integration/**") | ||
} | ||
|
||
val skipITs = project.hasProperty("skipITs") | ||
val skipSparkITs = project.hasProperty("skipSparkITs") | ||
if (skipITs || skipSparkITs) { | ||
// Exclude integration tests | ||
exclude("**/integration/**") | ||
} else { | ||
dependsOn(tasks.jar) | ||
|
||
doFirst { | ||
environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.12") | ||
} | ||
|
||
val init = project.extra.get("initIntegrationTest") as (Test) -> Unit | ||
init(this) | ||
} | ||
} | ||
|
||
tasks.clean { | ||
delete("spark-warehouse") | ||
} | ||
|
||
val testJar by tasks.registering(Jar::class) { | ||
archiveClassifier.set("tests") | ||
from(sourceSets["test"].output) | ||
} | ||
|
||
configurations { | ||
create("testArtifacts") | ||
} | ||
|
||
artifacts { | ||
add("testArtifacts", testJar) | ||
} |
File renamed without changes.
File renamed without changes.
File renamed without changes.
Oops, something went wrong.