From e71adead19dd56db7594eafbddd104d77a668983 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Fri, 16 Aug 2024 04:51:50 +0800 Subject: [PATCH 01/12] [#4545] improvement(paimon-catalog): reduce catalog-lakehouse-paimon libs size from 222MB to 156MB --- .../catalog-lakehouse-paimon/build.gradle.kts | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index 757fe706027..214b5454179 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -30,9 +30,15 @@ val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") val paimonVersion: String = libs.versions.paimon.get() dependencies { - implementation(project(":api")) - implementation(project(":common")) - implementation(project(":core")) + implementation(project(":api")) { + exclude("*") + } + implementation(project(":catalogs:catalog-common")) { + exclude("*") + } + implementation(project(":core")) { + exclude("*") + } implementation(libs.bundles.paimon) { exclude("com.sun.jersey") exclude("javax.servlet") @@ -123,6 +129,26 @@ tasks { } } +// run ./gradlew :catalogs:catalog-lakehouse-paimon:calculateDependenciesSize at the root of the project +tasks.register("calculateDependenciesSize") { + group = "verification" + description = "Calculates the total size of all dependencies in the runtimeClasspath configuration for :catalogs:catalog-lakehouse-paimon" + + doLast { + val runtimeClasspath = configurations.runtimeClasspath.get() + var totalSize: Long = 0 + + runtimeClasspath.forEach { file -> + if (file.exists()) { + totalSize += file.length() + } else { + println("File not found: ${file.absolutePath}") + } + } + + println("Total size of dependencies: ${totalSize / (1024 * 1024)} MB") + } +} tasks.test { val skipUTs = project.hasProperty("skipTests") if (skipUTs) { From 02316fe71da030e31e87b40ea02a9a352ddc6b5a Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Sun, 18 Aug 2024 20:14:26 +0800 Subject: [PATCH 02/12] #4545 rollback common module name and reduce the size of calculateDependenciesSize --- catalogs/catalog-lakehouse-paimon/build.gradle.kts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index 4d563e28875..b9df4bb6ed7 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -33,7 +33,7 @@ dependencies { implementation(project(":api")) { exclude("*") } - implementation(project(":catalogs:catalog-common")) { + implementation(project(":common")) { exclude("*") } implementation(project(":core")) { @@ -131,8 +131,8 @@ tasks { } } -// run ./gradlew :catalogs:catalog-lakehouse-paimon:calculateDependenciesSize at the root of the project -tasks.register("calculateDependenciesSize") { +// run ./gradlew :catalogs:catalog-lakehouse-paimon:calcDepsSize at the root of the project +tasks.register("calcDepsSize") { group = "verification" description = "Calculates the total size of all dependencies in the runtimeClasspath configuration for :catalogs:catalog-lakehouse-paimon" From e9e9f7abc7b0aa545b6286af73ecc433d3e9712b Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Tue, 27 Aug 2024 19:45:40 +0800 Subject: [PATCH 03/12] #4591 remove task "Calculates the total size of all dependencies" --- .../catalog-lakehouse-paimon/build.gradle.kts | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index b9df4bb6ed7..2b96bcca628 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -131,26 +131,6 @@ tasks { } } -// run ./gradlew :catalogs:catalog-lakehouse-paimon:calcDepsSize at the root of the project -tasks.register("calcDepsSize") { - group = "verification" - description = "Calculates the total size of all dependencies in the runtimeClasspath configuration for :catalogs:catalog-lakehouse-paimon" - - doLast { - val runtimeClasspath = configurations.runtimeClasspath.get() - var totalSize: Long = 0 - - runtimeClasspath.forEach { file -> - if (file.exists()) { - totalSize += file.length() - } else { - println("File not found: ${file.absolutePath}") - } - } - - println("Total size of dependencies: ${totalSize / (1024 * 1024)} MB") - } -} tasks.test { val skipUTs = project.hasProperty("skipTests") if (skipUTs) { From 3367c691a3a09efa67b634c591d6b53b8e666c44 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Sat, 31 Aug 2024 11:13:24 +0800 Subject: [PATCH 04/12] #4545 remove unused lib --- .../catalog-lakehouse-paimon/build.gradle.kts | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index ab3369ef7ae..92ad6f6d8ee 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -42,23 +42,25 @@ dependencies { implementation(libs.bundles.paimon) { exclude("com.sun.jersey") exclude("javax.servlet") + exclude("org.apache.hive") + exclude("org.apache.hbase") + exclude("it.unimi.dsi") + exclude("org.apache.hadoop") + exclude("org.mortbay.jetty") + exclude("org.apache.parquet") + exclude("org.mortbay.jetty:jetty") + exclude("org.mortbay.jetty:jetty-util") + exclude("org.mortbay.jetty:jetty-sslengine") + exclude("org.eclipse.jetty.aggregate:jetty-all") } implementation(libs.bundles.log4j) implementation(libs.commons.lang3) - implementation(libs.caffeine) implementation(libs.guava) implementation(libs.hadoop2.common) { exclude("com.github.spotbugs") exclude("com.sun.jersey") exclude("javax.servlet") - } - implementation(libs.hadoop2.hdfs) { - exclude("com.sun.jersey") - exclude("javax.servlet") - } - implementation(libs.hadoop2.mapreduce.client.core) { - exclude("com.sun.jersey") - exclude("javax.servlet") + exclude("org.mortbay.jetty") } annotationProcessor(libs.lombok) @@ -67,9 +69,13 @@ dependencies { testImplementation(project(":clients:client-java")) testImplementation(project(":integration-test-common", "testArtifacts")) testImplementation(project(":server")) - testImplementation(project(":server-common")) + testImplementation(project(":server-common")) { + exclude("org.mortbay.jetty") + exclude("com.sun.jersey.contribs") + } testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { exclude("org.apache.hadoop") + exclude("org.rocksdb") } testImplementation("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") { exclude("org.apache.avro") From c90138c81637eb6fa2343a05c53a468128579127 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Wed, 4 Sep 2024 21:40:53 +0800 Subject: [PATCH 05/12] #4545 remove unused lib --- catalogs/catalog-lakehouse-paimon/build.gradle.kts | 1 - 1 file changed, 1 deletion(-) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index 92ad6f6d8ee..7ebc10c5282 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -45,7 +45,6 @@ dependencies { exclude("org.apache.hive") exclude("org.apache.hbase") exclude("it.unimi.dsi") - exclude("org.apache.hadoop") exclude("org.mortbay.jetty") exclude("org.apache.parquet") exclude("org.mortbay.jetty:jetty") From f16d54b739bc62e5ecc5cc386e47700746d1eae0 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Mon, 9 Sep 2024 19:49:27 +0800 Subject: [PATCH 06/12] #4270 exclude org.mortbay.jetty --- catalogs/catalog-lakehouse-paimon/build.gradle.kts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index 5c2a4229c50..3eb2a74aed4 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -61,7 +61,11 @@ dependencies { exclude("javax.servlet") exclude("org.mortbay.jetty") } - + implementation(libs.hadoop2.hdfs) { + exclude("com.sun.jersey") + exclude("javax.servlet") + exclude("org.mortbay.jetty") + } annotationProcessor(libs.lombok) compileOnly(libs.lombok) From 4d9417f9375cab44c278f60ed40cf3c2373357d2 Mon Sep 17 00:00:00 2001 From: qiang_liu Date: Wed, 18 Sep 2024 21:32:21 +0800 Subject: [PATCH 07/12] #4545 rollback implementation(libs.hadoop2.mapreduce.client.core) , reduce lib size to 98M --- catalogs/catalog-lakehouse-paimon/build.gradle.kts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index 69f78b6adf7..07201602ecd 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -52,6 +52,12 @@ dependencies { exclude("org.mortbay.jetty:jetty-sslengine") exclude("org.eclipse.jetty.aggregate:jetty-all") } + implementation(libs.hadoop2.mapreduce.client.core) { + exclude("com.sun.jersey") + exclude("javax.servlet") + exclude(group = "org.mortbay.jetty", module = "jetty-util") + exclude(group = "com.sun.jersey.contribs", module = "jersey-guice") + } implementation(libs.bundles.log4j) implementation(libs.commons.lang3) implementation(libs.guava) From 84a4e3942fd93bc1ebe42576494b98ef64c21454 Mon Sep 17 00:00:00 2001 From: fanng Date: Mon, 23 Sep 2024 18:46:07 +0800 Subject: [PATCH 08/12] continue shrink --- .../catalog-lakehouse-paimon/build.gradle.kts | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index 07201602ecd..3f229bfe709 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -42,21 +42,16 @@ dependencies { implementation(libs.bundles.paimon) { exclude("com.sun.jersey") exclude("javax.servlet") + exclude("org.apache.curator") exclude("org.apache.hive") exclude("org.apache.hbase") - exclude("it.unimi.dsi") + exclude("org.apache.zookeeper") + exclude("org.eclipse.jetty.aggregate:jetty-all") exclude("org.mortbay.jetty") - exclude("org.apache.parquet") exclude("org.mortbay.jetty:jetty") exclude("org.mortbay.jetty:jetty-util") exclude("org.mortbay.jetty:jetty-sslengine") - exclude("org.eclipse.jetty.aggregate:jetty-all") - } - implementation(libs.hadoop2.mapreduce.client.core) { - exclude("com.sun.jersey") - exclude("javax.servlet") - exclude(group = "org.mortbay.jetty", module = "jetty-util") - exclude(group = "com.sun.jersey.contribs", module = "jersey-guice") + exclude("it.unimi.dsi") } implementation(libs.bundles.log4j) implementation(libs.commons.lang3) @@ -68,9 +63,10 @@ dependencies { exclude("org.mortbay.jetty") } implementation(libs.hadoop2.hdfs) { - exclude("com.sun.jersey") - exclude("javax.servlet") - exclude("org.mortbay.jetty") + exclude("*") + } + implementation(libs.hadoop2.mapreduce.client.core) { + exclude("*") } annotationProcessor(libs.lombok) compileOnly(libs.lombok) From 547611848d908a61ef062e3bbc4bd3a98a7e30e5 Mon Sep 17 00:00:00 2001 From: fanng Date: Mon, 23 Sep 2024 18:56:25 +0800 Subject: [PATCH 09/12] continue shrink --- catalogs/catalog-lakehouse-paimon/build.gradle.kts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index 3f229bfe709..ee162b7723a 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -60,6 +60,8 @@ dependencies { exclude("com.github.spotbugs") exclude("com.sun.jersey") exclude("javax.servlet") + exclude("org.apache.curator") + exclude("org.apache.zookeeper") exclude("org.mortbay.jetty") } implementation(libs.hadoop2.hdfs) { From ad178c58239ba072d3402a949e53a7d45633e3e3 Mon Sep 17 00:00:00 2001 From: fanng Date: Mon, 23 Sep 2024 19:42:30 +0800 Subject: [PATCH 10/12] add hdfs client --- catalogs/catalog-lakehouse-paimon/build.gradle.kts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index ee162b7723a..302b1014963 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -67,6 +67,12 @@ dependencies { implementation(libs.hadoop2.hdfs) { exclude("*") } + implementation(libs.hadoop2.hdfs.client) { + exclude("com.sun.jersey") + exclude("javax.servlet") + exclude("org.fusesource.leveldbjni") + exclude("org.mortbay.jetty") + } implementation(libs.hadoop2.mapreduce.client.core) { exclude("*") } From e895a3d778c8293f2c82571f9793f4350e8178aa Mon Sep 17 00:00:00 2001 From: fanng Date: Mon, 23 Sep 2024 22:26:46 +0800 Subject: [PATCH 11/12] remove log4j --- catalogs/catalog-lakehouse-paimon/build.gradle.kts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index 302b1014963..dca2de49848 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -119,7 +119,11 @@ tasks { val copyCatalogLibs by registering(Copy::class) { dependsOn("jar", "runtimeJars") - from("build/libs") + from("build/libs") { + exclude("guava-*.jar") + exclude("log4j-*.jar") + exclude("slf4j-*.jar") + } into("$rootDir/distribution/package/catalogs/lakehouse-paimon/libs") } From d316a76e2f82a585e5dc8056aabe430e17e8e0a5 Mon Sep 17 00:00:00 2001 From: fanng Date: Fri, 27 Sep 2024 13:46:10 +0800 Subject: [PATCH 12/12] add exclude --- .../catalog-lakehouse-paimon/build.gradle.kts | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/catalogs/catalog-lakehouse-paimon/build.gradle.kts b/catalogs/catalog-lakehouse-paimon/build.gradle.kts index dca2de49848..f30fd888005 100644 --- a/catalogs/catalog-lakehouse-paimon/build.gradle.kts +++ b/catalogs/catalog-lakehouse-paimon/build.gradle.kts @@ -52,6 +52,21 @@ dependencies { exclude("org.mortbay.jetty:jetty-util") exclude("org.mortbay.jetty:jetty-sslengine") exclude("it.unimi.dsi") + exclude("com.ververica") + exclude("org.apache.hadoop") + exclude("org.apache.commons") + exclude("org.xerial.snappy") + exclude("com.github.luben") + exclude("com.google.protobuf") + exclude("joda-time") + exclude("org.apache.parquet:parquet-jackson") + exclude("org.apache.parquet:parquet-format-structures") + exclude("org.apache.parquet:parquet-encoding") + exclude("org.apache.parquet:parquet-common") + exclude("org.apache.parquet:parquet-hadoop") + exclude("org.apache.paimon:paimon-codegen-loader") + exclude("org.apache.paimon:paimon-shade-caffeine-2") + exclude("org.apache.paimon:paimon-shade-guava-30") } implementation(libs.bundles.log4j) implementation(libs.commons.lang3)