From 4cf01c6f131856794f4364c07847887270a13ab8 Mon Sep 17 00:00:00 2001 From: sychen Date: Wed, 20 Sep 2023 15:20:18 +0800 Subject: [PATCH 1/8] sbt mr --- build/make-distribution.sh | 1 + project/CelebornBuild.scala | 91 ++++++++++++++++++++++++++++++++++++- 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/build/make-distribution.sh b/build/make-distribution.sh index 26980834427..7276009129a 100755 --- a/build/make-distribution.sh +++ b/build/make-distribution.sh @@ -309,6 +309,7 @@ if [ "$SBT_ENABLED" == "true" ]; then sbt_build_client -Pflink-1.14 sbt_build_client -Pflink-1.15 sbt_build_client -Pflink-1.17 + sbt_build_client -Pmr else echo "build client with $@" ENGINE_COUNT=0 diff --git a/project/CelebornBuild.scala b/project/CelebornBuild.scala index 31d3c836e3a..83781c907b6 100644 --- a/project/CelebornBuild.scala +++ b/project/CelebornBuild.scala @@ -71,6 +71,7 @@ object Dependencies { val guava = "com.google.guava" % "guava" % guavaVersion val hadoopClientApi = "org.apache.hadoop" % "hadoop-client-api" % hadoopVersion val hadoopClientRuntime = "org.apache.hadoop" % "hadoop-client-runtime" % hadoopVersion + val hadoopMapreduceClientApp = "org.apache.hadoop" % "hadoop-mapreduce-client-app" % hadoopVersion val ioDropwizardMetricsCore = "io.dropwizard.metrics" % "metrics-core" % metricsVersion val ioDropwizardMetricsGraphite = "io.dropwizard.metrics" % "metrics-graphite" % metricsVersion val ioDropwizardMetricsJvm = "io.dropwizard.metrics" % "metrics-jvm" % metricsVersion @@ -212,7 +213,7 @@ object CelebornBuild extends sbt.internal.BuildDef { CelebornClient.client, CelebornService.service, CelebornWorker.worker, - CelebornMaster.master) ++ maybeSparkClientModules ++ maybeFlinkClientModules + CelebornMaster.master) ++ maybeSparkClientModules ++ maybeFlinkClientModules ++ maybeMRClientModules } // ThisBuild / parallelExecution := false @@ -267,6 +268,15 @@ object Utils { lazy val maybeFlinkClientModules: Seq[Project] = flinkClientProjects.map(_.modules).getOrElse(Seq.empty) + val MR_VERSION = profiles.filter(_.startsWith("mr")).headOption + + lazy val mrClientProjects = MR_VERSION match { + case Some("mr") => Some(MRClientProjects) + case _ => None + } + + lazy val maybeMRClientModules: Seq[Project] = mrClientProjects.map(_.modules).getOrElse(Seq.empty) + def defaultScalaVersion(): String = { // 1. Inherit the scala version of the spark project // 2. if the spark profile not specified, using the DEFAULT_SCALA_VERSION @@ -872,3 +882,82 @@ trait FlinkClientProjects { ) } } + +//////////////////////////////////////////////////////// +// MR Client // +//////////////////////////////////////////////////////// +object MRClientProjects { + + def mrClient: Project = { + Project("celeborn-client-mr", file("client-mr/mr")) + .dependsOn(CelebornCommon.common % "test->test;compile->compile") + .dependsOn(CelebornClient.client % "test->test;compile->compile") + .settings( + commonSettings, + libraryDependencies ++= Seq( + Dependencies.hadoopClientApi, + Dependencies.hadoopClientRuntime, + Dependencies.hadoopMapreduceClientApp + ) ++ commonUnitTestDependencies + ) + } + + def mrClientShade: Project = { + Project("celeborn-client-mr-shaded", file("client-mr/mr-shaded")) + .dependsOn(mrClient) + .settings( + commonSettings, + + // align final shaded jar name with maven. + (assembly / assemblyJarName) := { + val extension = artifact.value.extension + s"${moduleName.value}_${scalaBinaryVersion.value}-${version.value}.$extension" + }, + + (assembly / test) := {}, + + (assembly / logLevel) := Level.Info, + + // Exclude `scala-library` from assembly. + (assembly / assemblyPackageScala / assembleArtifact) := false, + + (assembly / assemblyExcludedJars) := { + val cp = (assembly / fullClasspath).value + cp filter { v => + val name = v.data.getName + !(name.startsWith("celeborn-") || + name.startsWith("protobuf-java-") || + name.startsWith("guava-") || + name.startsWith("netty-") || + name.startsWith("commons-lang3-") || + name.startsWith("RoaringBitmap-")) + } + }, + + (assembly / assemblyShadeRules) := Seq( + ShadeRule.rename("com.google.protobuf.**" -> "org.apache.celeborn.shaded.com.google.protobuf.@1").inAll, + ShadeRule.rename("com.google.common.**" -> "org.apache.celeborn.shaded.com.google.common.@1").inAll, + ShadeRule.rename("io.netty.**" -> "org.apache.celeborn.shaded.io.netty.@1").inAll, + ShadeRule.rename("org.apache.commons.**" -> "org.apache.celeborn.shaded.org.apache.commons.@1").inAll, + ShadeRule.rename("org.roaringbitmap.**" -> "org.apache.celeborn.shaded.org.roaringbitmap.@1").inAll + ), + + (assembly / assemblyMergeStrategy) := { + case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard + // Drop all proto files that are not needed as artifacts of the build. + case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard + case m if m.toLowerCase(Locale.ROOT).startsWith("meta-inf/native-image") => MergeStrategy.discard + // Drop netty jnilib + case m if m.toLowerCase(Locale.ROOT).endsWith(".jnilib") => MergeStrategy.discard + // rename netty native lib + case "META-INF/native/libnetty_transport_native_epoll_x86_64.so" => CustomMergeStrategy.rename(_ => "META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_x86_64.so") + case "META-INF/native/libnetty_transport_native_epoll_aarch_64.so" => CustomMergeStrategy.rename(_ => "META-INF/native/liborg_apache_celeborn_shaded_netty_transport_native_epoll_aarch_64.so") + case _ => MergeStrategy.first + } + ) + } + + def modules: Seq[Project] = { + Seq(mrClient, mrClientShade) + } +} From 4d00d8a1d9e5f637e10d289ab371e97c0b67693b Mon Sep 17 00:00:00 2001 From: sychen Date: Wed, 20 Sep 2023 22:09:32 +0800 Subject: [PATCH 2/8] mr lz4 scala --- project/CelebornBuild.scala | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/project/CelebornBuild.scala b/project/CelebornBuild.scala index 83781c907b6..579fa83085a 100644 --- a/project/CelebornBuild.scala +++ b/project/CelebornBuild.scala @@ -890,14 +890,14 @@ object MRClientProjects { def mrClient: Project = { Project("celeborn-client-mr", file("client-mr/mr")) - .dependsOn(CelebornCommon.common % "test->test;compile->compile") - .dependsOn(CelebornClient.client % "test->test;compile->compile") + .dependsOn(CelebornCommon.common, CelebornClient.client) .settings( commonSettings, libraryDependencies ++= Seq( Dependencies.hadoopClientApi, Dependencies.hadoopClientRuntime, - Dependencies.hadoopMapreduceClientApp + Dependencies.hadoopMapreduceClientApp, + "org.lz4" % "lz4-java" % "1.7.1" ) ++ commonUnitTestDependencies ) } @@ -918,8 +918,8 @@ object MRClientProjects { (assembly / logLevel) := Level.Info, - // Exclude `scala-library` from assembly. - (assembly / assemblyPackageScala / assembleArtifact) := false, + // include `scala-library` from assembly. + (assembly / assemblyPackageScala / assembleArtifact) := true, (assembly / assemblyExcludedJars) := { val cp = (assembly / fullClasspath).value @@ -930,7 +930,9 @@ object MRClientProjects { name.startsWith("guava-") || name.startsWith("netty-") || name.startsWith("commons-lang3-") || - name.startsWith("RoaringBitmap-")) + name.startsWith("RoaringBitmap-") || + name.startsWith("lz4-java-") || + name.startsWith("scala-library-")) } }, From 77a87de6c1564cb6f3a4c3c408441e7ee7ac9019 Mon Sep 17 00:00:00 2001 From: sychen Date: Wed, 20 Sep 2023 22:33:52 +0800 Subject: [PATCH 3/8] mr lz4 scala --- project/CelebornBuild.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/project/CelebornBuild.scala b/project/CelebornBuild.scala index 579fa83085a..18cd28cea1e 100644 --- a/project/CelebornBuild.scala +++ b/project/CelebornBuild.scala @@ -896,8 +896,7 @@ object MRClientProjects { libraryDependencies ++= Seq( Dependencies.hadoopClientApi, Dependencies.hadoopClientRuntime, - Dependencies.hadoopMapreduceClientApp, - "org.lz4" % "lz4-java" % "1.7.1" + Dependencies.hadoopMapreduceClientApp ) ++ commonUnitTestDependencies ) } From 47604e3f6f454bea55ae009a0fb8d6985741f653 Mon Sep 17 00:00:00 2001 From: sychen Date: Thu, 21 Sep 2023 10:51:09 +0800 Subject: [PATCH 4/8] add zstd --- project/CelebornBuild.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/project/CelebornBuild.scala b/project/CelebornBuild.scala index 18cd28cea1e..0dbafd8df4f 100644 --- a/project/CelebornBuild.scala +++ b/project/CelebornBuild.scala @@ -931,6 +931,7 @@ object MRClientProjects { name.startsWith("commons-lang3-") || name.startsWith("RoaringBitmap-") || name.startsWith("lz4-java-") || + name.startsWith("zstd-jni-") || name.startsWith("scala-library-")) } }, From 33e841b5b7f4970e24b3d4641d246dd205d3c395 Mon Sep 17 00:00:00 2001 From: sychen Date: Thu, 21 Sep 2023 10:56:15 +0800 Subject: [PATCH 5/8] maven add zstd --- client-mr/mr-shaded/pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/client-mr/mr-shaded/pom.xml b/client-mr/mr-shaded/pom.xml index 0ce68acc87e..9ecb39b9f23 100644 --- a/client-mr/mr-shaded/pom.xml +++ b/client-mr/mr-shaded/pom.xml @@ -81,6 +81,7 @@ org.apache.commons:commons-lang3 org.scala-lang:scala-library org.lz4:lz4-java + com.github.luben:zstd-jni org.roaringbitmap:RoaringBitmap From e5305a0abedad73bf6cf3862e1ea95a81b5df9c0 Mon Sep 17 00:00:00 2001 From: sychen Date: Thu, 21 Sep 2023 15:23:16 +0800 Subject: [PATCH 6/8] trigger test From 357466fea8dc518fb086063062a534c20970b132 Mon Sep 17 00:00:00 2001 From: sychen Date: Fri, 29 Sep 2023 00:31:19 +0800 Subject: [PATCH 7/8] exclude license --- project/CelebornBuild.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/project/CelebornBuild.scala b/project/CelebornBuild.scala index 0dbafd8df4f..4e8f1af827b 100644 --- a/project/CelebornBuild.scala +++ b/project/CelebornBuild.scala @@ -946,6 +946,11 @@ object MRClientProjects { (assembly / assemblyMergeStrategy) := { case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard + case m if m.startsWith("META-INF/license/") => MergeStrategy.discard + case m if m == "META-INF/LICENSE.txt" => MergeStrategy.discard + case m if m == "META-INF/NOTICE.txt" => MergeStrategy.discard + case m if m == "LICENSE.txt" => MergeStrategy.discard + case m if m == "NOTICE.txt" => MergeStrategy.discard // Drop all proto files that are not needed as artifacts of the build. case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard case m if m.toLowerCase(Locale.ROOT).startsWith("meta-inf/native-image") => MergeStrategy.discard From cbb8a4290af3af3f0d153b0d31181f80ed423a37 Mon Sep 17 00:00:00 2001 From: sychen Date: Sun, 8 Oct 2023 01:22:33 +0900 Subject: [PATCH 8/8] exclude guava --- project/CelebornBuild.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/project/CelebornBuild.scala b/project/CelebornBuild.scala index 4e8f1af827b..e7600c2edc5 100644 --- a/project/CelebornBuild.scala +++ b/project/CelebornBuild.scala @@ -71,7 +71,8 @@ object Dependencies { val guava = "com.google.guava" % "guava" % guavaVersion val hadoopClientApi = "org.apache.hadoop" % "hadoop-client-api" % hadoopVersion val hadoopClientRuntime = "org.apache.hadoop" % "hadoop-client-runtime" % hadoopVersion - val hadoopMapreduceClientApp = "org.apache.hadoop" % "hadoop-mapreduce-client-app" % hadoopVersion + val hadoopMapreduceClientApp = "org.apache.hadoop" % "hadoop-mapreduce-client-app" % hadoopVersion excludeAll ( + ExclusionRule("com.google.guava", "guava")) val ioDropwizardMetricsCore = "io.dropwizard.metrics" % "metrics-core" % metricsVersion val ioDropwizardMetricsGraphite = "io.dropwizard.metrics" % "metrics-graphite" % metricsVersion val ioDropwizardMetricsJvm = "io.dropwizard.metrics" % "metrics-jvm" % metricsVersion