apache · vinishjail97 · Sep 26, 2024 · Sep 26, 2024 · vinishjail97 · Sep 26, 2024
diff --git a/Dockerfile b/Dockerfile
@@ -23,7 +23,7 @@ WORKDIR /build
 COPY ./ ./
 RUN --mount=type=cache,target=/root/.m2 \
     MAVEN_OPTS=-Dorg.slf4j.simpleLogger.defaultLogLevel=warn mvn  -B  package -DskipTests 
-RUN mv xtable-utilities/target/xtable-utilities-$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)-bundled.jar target/app.jar
+RUN mv xtable-utilities/target/xtable-utilities_2.12-$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)-bundled.jar target/app.jar
 
 FROM eclipse-temurin:17-jre-jammy AS final
 

diff --git a/README.md b/README.md
@@ -38,6 +38,10 @@ future.
    by something like `mvn test -Dtest=TestDeltaSync -pl xtable-core`.
 4. Similarly, use `mvn clean verify` or `mvn verify` to run integration tests.
 
+**Note:** When using Maven version 3.9 or above, Maven automatically caches the build. To ignore build caching, you can 
+add the `-Dmaven.build.cache.enabled=false` parameter. For example, `mvn clean package -DskipTests -Dmaven.build.cache.enabled=false`
+
+
 # Style guide
 1. We use [Maven Spotless plugin](https://github.com/diffplug/spotless/tree/main/plugin-maven) and 
    [Google java format](https://github.com/google/google-java-format) for code style.
@@ -46,7 +50,7 @@ future.
 
 # Running the bundled jar
 1. Get a pre-built bundled jar or create the jar with `mvn install -DskipTests`
-2. create a yaml file that follows the format below:
+2. Create a yaml file that follows the format below:
 ```yaml
 sourceFormat: HUDI
 targetFormats:
@@ -110,7 +114,7 @@ catalogOptions: # all other options are passed through in a map
   key1: value1
   key2: value2
 ```
-5. run with `java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml [--hadoopConfig hdfs-site.xml] [--convertersConfig converters.yaml] [--icebergCatalogConfig catalog.yaml]`
+5. Run with `java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml [--hadoopConfig hdfs-site.xml] [--convertersConfig converters.yaml] [--icebergCatalogConfig catalog.yaml]`
 The bundled jar includes hadoop dependencies for AWS, Azure, and GCP. Sample hadoop configurations for configuring the converters 
 can be found in the [xtable-hadoop-defaults.xml](https://github.com/apache/incubator-xtable/blob/main/utilities/src/main/resources/xtable-hadoop-defaults.xml) file.
 The custom hadoop configurations can be passed in with the `--hadoopConfig [custom-hadoop-config-file]` option.

diff --git a/demo/start_demo.sh b/demo/start_demo.sh
@@ -25,7 +25,7 @@ mvn install -am -pl xtable-core -DskipTests -T 2
 mkdir -p demo/jars
 cp xtable-hudi-support/xtable-hudi-support-utils/target/xtable-hudi-support-utils-0.2.0-SNAPSHOT.jar demo/jars
 cp xtable-api/target/xtable-api-0.2.0-SNAPSHOT.jar demo/jars
-cp xtable-core/target/xtable-core-0.2.0-SNAPSHOT.jar demo/jars
+cp xtable-core/target/xtable-core_2.12-0.2.0-SNAPSHOT.jar demo/jars
 
 cd demo
 docker-compose up
diff --git a/pom.xml b/pom.xml
@@ -48,12 +48,13 @@
 
     <modules>
         <module>xtable-api</module>
+        <module>xtable-hudi-support</module>
         <module>xtable-core</module>
         <module>xtable-utilities</module>
-        <module>xtable-hudi-support</module>
     </modules>
 
     <properties>
+        <project.version>0.2.0-SNAPSHOT</project.version>
         <maven.compiler.target>8</maven.compiler.target>
         <avro.version>1.11.3</avro.version>
         <log4j.version>2.22.0</log4j.version>
@@ -68,8 +69,10 @@
         <maven-deploy-plugin.version>3.1.1</maven-deploy-plugin.version>
         <maven-release-plugin.version>2.5.3</maven-release-plugin.version>
         <parquet.version>1.12.2</parquet.version>
-        <scala.version>2.12.15</scala.version>
-        <scala.version.prefix>2.12</scala.version.prefix>
+        <scala12.version>2.12.20</scala12.version>
+        <scala13.version>2.13.14</scala13.version>
+        <scala.version>${scala12.version}</scala.version>
+        <scala.binary.version>2.12</scala.binary.version>
         <spark.version>3.4.2</spark.version>
         <spark.version.prefix>3.4</spark.version.prefix>
         <iceberg.version>1.4.2</iceberg.version>
@@ -84,7 +87,8 @@
         <delombok.output.dir>${project.build.directory}/delombok</delombok.output.dir>
         <apache-jar-resource-bundle.version>1.7</apache-jar-resource-bundle.version>
         <apache-incubator-disclaimer-resource-bundle.version>1.7</apache-incubator-disclaimer-resource-bundle.version>
-
+        <scala-collection-compat.version>2.8.1</scala-collection-compat.version>
+
         <!-- Test properties -->
         <skipTests>false</skipTests>
         <skipUTs>${skipTests}</skipUTs>
@@ -125,8 +129,8 @@
             </dependency>
             <dependency>
                 <groupId>org.scala-lang.modules</groupId>
-                <artifactId>scala-collection-compat_${scala.version.prefix}</artifactId>
-                <version>2.8.1</version>
+                <artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
+                <version>${scala-collection-compat.version}</version>
             </dependency>
 
             <!-- Avro -->
@@ -229,7 +233,7 @@
             </dependency>
             <dependency>
                 <groupId>org.apache.hudi</groupId>
-                <artifactId>hudi-spark${spark.version.prefix}-bundle_${scala.version.prefix}</artifactId>
+                <artifactId>hudi-spark${spark.version.prefix}-bundle_${scala.binary.version}</artifactId>
                 <version>${hudi.version}</version>
                 <scope>test</scope>
             </dependency>
@@ -265,28 +269,28 @@
             </dependency>
             <dependency>
                 <groupId>org.apache.iceberg</groupId>
-                <artifactId>iceberg-spark-runtime-${spark.version.prefix}_${scala.version.prefix}</artifactId>
+                <artifactId>iceberg-spark-runtime-${spark.version.prefix}_${scala.binary.version}</artifactId>
                 <version>${iceberg.version}</version>
                 <scope>test</scope>
             </dependency>
 
             <!-- Delta -->
             <dependency>
                 <groupId>io.delta</groupId>
-                <artifactId>delta-core_${scala.version.prefix}</artifactId>
+                <artifactId>delta-core_${scala.binary.version}</artifactId>
                 <version>${delta.version}</version>
             </dependency>
             <dependency>
                 <groupId>io.delta</groupId>
-                <artifactId>delta-standalone_${scala.version.prefix}</artifactId>
+                <artifactId>delta-standalone_${scala.binary.version}</artifactId>
                 <version>${delta.standalone.version}</version>
                 <scope>test</scope>
             </dependency>
 
             <!-- Spark -->
             <dependency>
                 <groupId>org.apache.spark</groupId>
-                <artifactId>spark-core_${scala.version.prefix}</artifactId>
+                <artifactId>spark-core_${scala.binary.version}</artifactId>
                 <version>${spark.version}</version>
                 <exclusions>
                     <exclusion>
@@ -306,7 +310,7 @@
             </dependency>
             <dependency>
                 <groupId>org.apache.spark</groupId>
-                <artifactId>spark-sql_${scala.version.prefix}</artifactId>
+                <artifactId>spark-sql_${scala.binary.version}</artifactId>
                 <version>${spark.version}</version>
                 <scope>provided</scope>
             </dependency>
@@ -464,7 +468,7 @@
             </dependency>
             <dependency>
                 <groupId>com.fasterxml.jackson.module</groupId>
-                <artifactId>jackson-module-scala_${scala.version.prefix}</artifactId>
+                <artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
                 <version>${jackson.version}</version>
             </dependency>
 
@@ -867,6 +871,53 @@
     </repositories>
 
     <profiles>
+        <!--Scala 2.12 Profile -->
+        <profile>
+            <id>scala-2.12</id>
+            <activation>
+                <activeByDefault>true</activeByDefault>
+            </activation>
+            <properties>
+                <scala.version>${scala12.version}</scala.version>
+                <scala.binary.version>2.12</scala.binary.version>
+            </properties>
+            <build>
+                <pluginManagement/>
+            </build>
+        </profile>
+
+        <!--Scala 2.13 Profile -->
+        <!-- Once hudi supports scala 2.13 then enable following profile -->
+        <profile>
+            <id>scala-2.13</id>
+            <activation>
+                <activeByDefault>false</activeByDefault>
+            </activation>
+            <properties>
+                <scala.version>${scala13.version}</scala.version>
+                <scala.binary.version>2.13</scala.binary.version>
+            </properties>
+            <build>
+                <pluginManagement>
+                    <plugins>
+                        <plugin>
+                            <groupId>net.alchim31.maven</groupId>
+                            <artifactId>scala-maven-plugin</artifactId>
+                            <configuration>
+                                <args>
+                                    <arg>-unchecked</arg>
+                                    <arg>-deprecation</arg>
+                                    <arg>-feature</arg>
+                                    <arg>-explaintypes</arg>
+                                    <arg>-target:jvm-1.8</arg>
+                                </args>
+                                <compilerPlugins/>
+                            </configuration>
+                        </plugin>
+                    </plugins>
+                </pluginManagement>
+            </build>
+        </profile>
         <profile>
             <id>release</id>
             <activation>

diff --git a/website/docs/biglake-metastore.md b/website/docs/biglake-metastore.md
@@ -25,7 +25,7 @@ This document walks through the steps to register an Apache XTable™ (Incubatin
    export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service_account_key.json
    ```
 5. Clone the Apache XTable™ (Incubating) [repository](https://github.com/apache/incubator-xtable) and create the
-   `xtable-utilities-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
+   `xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
 6. Download the [BigLake Iceberg JAR](gs://spark-lib/biglake/biglake-catalog-iceberg1.2.0-0.1.0-with-dependencies.jar) locally.
    Apache XTable™ (Incubating) requires the JAR to be present in the classpath.
 
@@ -117,7 +117,7 @@ catalogOptions:
 From your terminal under the cloned Apache XTable™ (Incubating) directory, run the sync process using the below command.
 
 ```shell md title="shell"
-java -cp xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar:/path/to/downloaded/biglake-catalog-iceberg1.2.0-0.1.0-with-dependencies.jar org.apache.xtable.utilities.RunSync  --datasetConfig my_config.yaml --icebergCatalogConfig catalog.yaml
+java -cp xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar:/path/to/downloaded/biglake-catalog-iceberg1.2.0-0.1.0-with-dependencies.jar org.apache.xtable.utilities.RunSync  --datasetConfig my_config.yaml --icebergCatalogConfig catalog.yaml
 ```
 
 :::tip Note:

diff --git a/website/docs/fabric.md b/website/docs/fabric.md
@@ -98,7 +98,7 @@ An example hadoop configuration for authenticating to ADLS storage account is as
 ```
 
 ```shell md title="shell"
-java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml --hadoopConfig hadoop.xml
+java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml --hadoopConfig hadoop.xml
 ```
 
 Running the above command will translate the table `people` in Iceberg or Hudi format to Delta Lake format. To validate

diff --git a/website/docs/glue-catalog.md b/website/docs/glue-catalog.md
@@ -19,7 +19,7 @@ This document walks through the steps to register an Apache XTable™ (Incubatin
    also set up access credentials by following the steps
    [here](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-quickstart.html)
 3. Clone the Apache XTable™ (Incubating) [repository](https://github.com/apache/incubator-xtable) and create the
-   `xtable-utilities-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
+   `xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
 
 ## Steps
 ### Running sync
@@ -84,7 +84,7 @@ Replace with appropriate values for `sourceFormat`, `tableBasePath` and `tableNa
 From your terminal under the cloned xtable directory, run the sync process using the below command.
 
  ```shell md title="shell"
- java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
+ java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
  ```
 
 :::tip Note:

diff --git a/website/docs/hms.md b/website/docs/hms.md
@@ -17,7 +17,7 @@ This document walks through the steps to register an Apache XTable™ (Incubatin
    or a distributed system like Amazon EMR, Google Cloud's Dataproc, Azure HDInsight etc.
    This is a required step to register the table in HMS using a Spark client.
 3. Clone the XTable™ (Incubating) [repository](https://github.com/apache/incubator-xtable) and create the
-   `xtable-utilities-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup) 
+   `xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup) 
 4. This guide also assumes that you have configured the Hive Metastore locally or on EMR/Dataproc/HDInsight
    and is already running.
 
@@ -88,7 +88,7 @@ datasets:
 
 From your terminal under the cloned Apache XTable™ (Incubating) directory, run the sync process using the below command.
 ```shell md title="shell"
-java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
+java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
 ```
 
 :::tip Note:

diff --git a/website/docs/how-to.md b/website/docs/how-to.md
@@ -24,7 +24,7 @@ history to enable proper point in time queries.
 1. A compute instance where you can run Apache Spark. This can be your local machine, docker,
    or a distributed service like Amazon EMR, Google Cloud's Dataproc, Azure HDInsight etc
 2. Clone the Apache XTable™ (Incubating) [repository](https://github.com/apache/incubator-xtable) and create the
-   `xtable-utilities-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
+   `xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
 3. Optional: Setup access to write to and/or read from distributed storage services like:
    * Amazon S3 by following the steps 
    [here](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) to install AWSCLIv2 
@@ -351,7 +351,7 @@ Authentication for GCP requires service account credentials to be exported. i.e.
 In your terminal under the cloned Apache XTable™ (Incubating) directory, run the below command.
 
 ```shell md title="shell"
-java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
+java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
 ```
 
 **Optional:**

diff --git a/website/docs/unity-catalog.md b/website/docs/unity-catalog.md
@@ -17,7 +17,7 @@ This document walks through the steps to register an Apache XTable™ (Incubatin
 3. Create a Unity Catalog metastore in Databricks as outlined [here](https://docs.gcp.databricks.com/data-governance/unity-catalog/create-metastore.html#create-a-unity-catalog-metastore).
 4. Create an external location in Databricks as outlined [here](https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-location.html).
 5. Clone the Apache XTable™ (Incubating) [repository](https://github.com/apache/incubator-xtable) and create the
-   `xtable-utilities-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
+   `xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar` by following the steps on the [Installation page](/docs/setup)
 
 ## Pre-requisites (for open-source Unity Catalog)
 1. Source table(s) (Hudi/Iceberg) already written to external storage locations like S3/GCS/ADLS or local.
@@ -48,7 +48,7 @@ datasets:
 From your terminal under the cloned Apache XTable™ (Incubating) directory, run the sync process using the below command.
 
 ```shell md title="shell"
-java -jar xtable-utilities/target/xtable-utilities-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
+java -jar xtable-utilities/target/xtable-utilities_2.12-0.2.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml
 ```
 
 :::tip Note: 

diff --git a/xtable-core/pom.xml b/xtable-core/pom.xml
@@ -25,17 +25,19 @@
         <version>0.2.0-SNAPSHOT</version>
     </parent>
 
-    <artifactId>xtable-core</artifactId>
+    <artifactId>xtable-core_${scala.binary.version}</artifactId>
     <name>XTable Project Core</name>
 
     <dependencies>
         <dependency>
             <groupId>org.apache.xtable</groupId>
             <artifactId>xtable-api</artifactId>
+            <version>${project.version}</version>
         </dependency>
         <dependency>
             <groupId>org.apache.xtable</groupId>
             <artifactId>xtable-hudi-support-utils</artifactId>
+            <version>${project.version}</version>
         </dependency>
         <dependency>
             <groupId>com.fasterxml.jackson.core</groupId>
@@ -47,7 +49,7 @@
         </dependency>
         <dependency>
             <groupId>com.fasterxml.jackson.module</groupId>
-            <artifactId>jackson-module-scala_${scala.version.prefix}</artifactId>
+            <artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
         </dependency>
         <dependency>
             <groupId>com.google.guava</groupId>
@@ -69,7 +71,7 @@
         <!-- Hudi dependencies -->
         <dependency>
             <groupId>org.apache.hudi</groupId>
-            <artifactId>hudi-spark${spark.version.prefix}-bundle_${scala.version.prefix}</artifactId>
+            <artifactId>hudi-spark${spark.version.prefix}-bundle_${scala.binary.version}</artifactId>
             <scope>test</scope>
         </dependency>
         <dependency>
@@ -94,11 +96,11 @@
         <!-- Delta dependencies -->
         <dependency>
             <groupId>io.delta</groupId>
-            <artifactId>delta-core_${scala.version.prefix}</artifactId>
+            <artifactId>delta-core_${scala.binary.version}</artifactId>
         </dependency>
         <dependency>
             <groupId>io.delta</groupId>
-            <artifactId>delta-standalone_${scala.version.prefix}</artifactId>
+            <artifactId>delta-standalone_${scala.binary.version}</artifactId>
         </dependency>
 
         <!-- Hadoop dependencies -->
@@ -120,16 +122,16 @@
         <!-- Spark/Iceberg/Hudi dependencies for reading/writing tables -->
         <dependency>
             <groupId>org.apache.iceberg</groupId>
-            <artifactId>iceberg-spark-runtime-${spark.version.prefix}_${scala.version.prefix}</artifactId>
+            <artifactId>iceberg-spark-runtime-${spark.version.prefix}_${scala.binary.version}</artifactId>
             <scope>test</scope>
         </dependency>
         <dependency>
             <groupId>org.apache.spark</groupId>
-            <artifactId>spark-core_${scala.version.prefix}</artifactId>
+            <artifactId>spark-core_${scala.binary.version}</artifactId>
         </dependency>
         <dependency>
             <groupId>org.apache.spark</groupId>
-            <artifactId>spark-sql_${scala.version.prefix}</artifactId>
+            <artifactId>spark-sql_${scala.binary.version}</artifactId>
         </dependency>
 
         <!-- Mockito -->