From ed8e5b777c9f7243612663c9932549c2c0eddf3e Mon Sep 17 00:00:00 2001 From: caican Date: Sun, 1 Dec 2024 14:18:52 +0800 Subject: [PATCH 01/25] basic ddl --- .../lakehouse/paimon/PaimonConstants.java | 57 ++++++++++ .../paimon/PaimonPropertiesUtils.java | 95 ++++++++++++++++ .../PaimonCatalogPropertiesMetadata.java | 26 +++-- .../PaimonSchemaPropertiesMetadata.java | 2 +- .../paimon/PaimonTablePropertiesMetadata.java | 16 +-- .../storage/PaimonOSSFileSystemConfig.java | 7 +- .../storage/PaimonS3FileSystemConfig.java | 7 +- spark-connector/spark-common/build.gradle.kts | 3 + .../paimon/GravitinoPaimonCatalog.java | 69 ++++++++++++ .../paimon/PaimonPropertiesConstants.java | 49 ++++++++ .../paimon/PaimonPropertiesConverter.java | 64 +++++++++++ .../connector/version/CatalogNameAdaptor.java | 21 +++- ...SparkPaimonCatalogFilesystemBackendIT.java | 40 +++++++ .../test/paimon/SparkPaimonCatalogIT.java | 54 +++++++++ .../paimon/TestPaimonPropertiesConverter.java | 106 ++++++++++++++++++ spark-connector/v3.3/spark/build.gradle.kts | 3 + .../paimon/GravitinoPaimonCatalogSpark33.java | 21 ++++ ...arkPaimonCatalogFilesystemBackendIT33.java | 35 ++++++ .../version/TestCatalogNameAdaptor.java | 4 + spark-connector/v3.4/spark/build.gradle.kts | 3 + .../paimon/GravitinoPaimonCatalogSpark34.java | 37 ++++++ ...arkPaimonCatalogFilesystemBackendIT34.java | 36 ++++++ .../version/TestCatalogNameAdaptor.java | 4 + spark-connector/v3.5/spark/build.gradle.kts | 3 + .../paimon/GravitinoPaimonCatalogSpark35.java | 21 ++++ ...arkPaimonCatalogFilesystemBackendIT35.java | 36 ++++++ .../version/TestCatalogNameAdaptor.java | 4 + 27 files changed, 790 insertions(+), 33 deletions(-) create mode 100644 catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonConstants.java create mode 100644 catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonPropertiesUtils.java create mode 100644 spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java create mode 100644 spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java create mode 100644 spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java create mode 100644 spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java create mode 100644 spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java create mode 100644 spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/paimon/TestPaimonPropertiesConverter.java create mode 100644 spark-connector/v3.3/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark33.java create mode 100644 spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java create mode 100644 spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark34.java create mode 100644 spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java create mode 100644 spark-connector/v3.5/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark35.java create mode 100644 spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonConstants.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonConstants.java new file mode 100644 index 00000000000..291a7ea9694 --- /dev/null +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonConstants.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse.paimon; + +public class PaimonConstants { + + // Paimon catalog properties constants + public static final String CATALOG_BACKEND = "catalog-backend"; + public static final String METASTORE = "metastore"; + public static final String URI = "uri"; + public static final String WAREHOUSE = "warehouse"; + public static final String CATALOG_BACKEND_NAME = "catalog-backend-name"; + + public static final String GRAVITINO_JDBC_USER = "jdbc-user"; + public static final String PAIMON_JDBC_USER = "jdbc.user"; + + public static final String GRAVITINO_JDBC_PASSWORD = "jdbc-password"; + public static final String PAIMON_JDBC_PASSWORD = "jdbc.password"; + + public static final String GRAVITINO_JDBC_DRIVER = "jdbc-driver"; + + // S3 properties needed by Paimon + public static final String S3_ENDPOINT = "s3.endpoint"; + public static final String S3_ACCESS_KEY = "s3.access-key"; + public static final String S3_SECRET_KEY = "s3.secret-key"; + + // OSS related properties + public static final String OSS_ENDPOINT = "fs.oss.endpoint"; + public static final String OSS_ACCESS_KEY = "fs.oss.accessKeyId"; + public static final String OSS_SECRET_KEY = "fs.oss.accessKeySecret"; + + // Iceberg Table properties constants + public static final String COMMENT = "comment"; + public static final String OWNER = "owner"; + public static final String BUCKET_KEY = "bucket-key"; + public static final String MERGE_ENGINE = "merge-engine"; + public static final String SEQUENCE_FIELD = "sequence.field"; + public static final String ROWKIND_FIELD = "rowkind.field"; + public static final String PRIMARY_KEY = "primary-key"; + public static final String PARTITION = "partition"; +} diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonPropertiesUtils.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonPropertiesUtils.java new file mode 100644 index 00000000000..0dcf24f3a67 --- /dev/null +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonPropertiesUtils.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse.paimon; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.Optional; +import org.apache.gravitino.storage.OSSProperties; +import org.apache.gravitino.storage.S3Properties; + +public class PaimonPropertiesUtils { + + // Map that maintains the mapping of keys in Gravitino to that in Paimon, for example, users + // will only need to set the configuration 'catalog-backend' in Gravitino and Gravitino will + // change it to `catalogType` automatically and pass it to Paimon. + public static final Map GRAVITINO_CONFIG_TO_PAIMON; + + static { + Map map = new HashMap(); + map.put(PaimonConstants.CATALOG_BACKEND, PaimonConstants.CATALOG_BACKEND); + map.put(PaimonConstants.GRAVITINO_JDBC_DRIVER, PaimonConstants.GRAVITINO_JDBC_DRIVER); + map.put(PaimonConstants.GRAVITINO_JDBC_USER, PaimonConstants.PAIMON_JDBC_USER); + map.put(PaimonConstants.GRAVITINO_JDBC_PASSWORD, PaimonConstants.PAIMON_JDBC_PASSWORD); + map.put(PaimonConstants.URI, PaimonConstants.URI); + map.put(PaimonConstants.WAREHOUSE, PaimonConstants.WAREHOUSE); + map.put(PaimonConstants.CATALOG_BACKEND_NAME, PaimonConstants.CATALOG_BACKEND_NAME); + // S3 + map.put(S3Properties.GRAVITINO_S3_ENDPOINT, PaimonConstants.S3_ENDPOINT); + map.put(S3Properties.GRAVITINO_S3_ACCESS_KEY_ID, PaimonConstants.S3_ACCESS_KEY); + map.put(S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY, PaimonConstants.S3_SECRET_KEY); + // OSS + map.put(OSSProperties.GRAVITINO_OSS_ENDPOINT, PaimonConstants.OSS_ENDPOINT); + map.put(OSSProperties.GRAVITINO_OSS_ACCESS_KEY_ID, PaimonConstants.OSS_ACCESS_KEY); + map.put(OSSProperties.GRAVITINO_OSS_ACCESS_KEY_SECRET, PaimonConstants.OSS_SECRET_KEY); + GRAVITINO_CONFIG_TO_PAIMON = Collections.unmodifiableMap(map); + } + + /** + * Converts Gravitino properties to Paimon catalog properties, the common transform logic shared + * by Spark connector, Gravitino Paimon catalog. + * + * @param gravitinoProperties a map of Gravitino configuration properties. + * @return a map containing Paimon catalog properties. + */ + public static Map toPaimonCatalogProperties( + Map gravitinoProperties) { + Map paimonProperties = new HashMap<>(); + gravitinoProperties.forEach( + (key, value) -> { + if (GRAVITINO_CONFIG_TO_PAIMON.containsKey(key)) { + paimonProperties.put(GRAVITINO_CONFIG_TO_PAIMON.get(key), value); + } + }); + return paimonProperties; + } + + /** + * Get catalog backend name from Gravitino catalog properties. + * + * @param catalogProperties a map of Gravitino catalog properties. + * @return catalog backend name. + */ + public static String getCatalogBackendName(Map catalogProperties) { + String backendName = catalogProperties.get(PaimonConstants.CATALOG_BACKEND_NAME); + if (backendName != null) { + return backendName; + } + + String catalogBackend = catalogProperties.get(PaimonConstants.CATALOG_BACKEND); + return Optional.ofNullable(catalogBackend) + .map(s -> s.toLowerCase(Locale.ROOT)) + .orElseThrow( + () -> + new UnsupportedOperationException( + String.format("Unsupported catalog backend: %s", catalogBackend))); + } +} diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java index e3b59bff36d..901fb47fadb 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java @@ -45,20 +45,22 @@ */ public class PaimonCatalogPropertiesMetadata extends BaseCatalogPropertiesMetadata { - @VisibleForTesting public static final String GRAVITINO_CATALOG_BACKEND = "catalog-backend"; - public static final String PAIMON_METASTORE = "metastore"; - public static final String WAREHOUSE = "warehouse"; - public static final String URI = "uri"; - public static final String GRAVITINO_JDBC_USER = "jdbc-user"; - public static final String PAIMON_JDBC_USER = "jdbc.user"; - public static final String GRAVITINO_JDBC_PASSWORD = "jdbc-password"; - public static final String PAIMON_JDBC_PASSWORD = "jdbc.password"; - public static final String GRAVITINO_JDBC_DRIVER = "jdbc-driver"; + @VisibleForTesting + public static final String GRAVITINO_CATALOG_BACKEND = PaimonConstants.CATALOG_BACKEND; + + public static final String PAIMON_METASTORE = PaimonConstants.CATALOG_BACKEND; + public static final String WAREHOUSE = PaimonConstants.METASTORE; + public static final String URI = PaimonConstants.URI; + public static final String GRAVITINO_JDBC_USER = PaimonConstants.GRAVITINO_JDBC_USER; + public static final String PAIMON_JDBC_USER = PaimonConstants.PAIMON_JDBC_USER; + public static final String GRAVITINO_JDBC_PASSWORD = PaimonConstants.GRAVITINO_JDBC_PASSWORD; + public static final String PAIMON_JDBC_PASSWORD = PaimonConstants.PAIMON_JDBC_PASSWORD; + public static final String GRAVITINO_JDBC_DRIVER = PaimonConstants.GRAVITINO_JDBC_DRIVER; // S3 properties needed by Paimon - public static final String S3_ENDPOINT = "s3.endpoint"; - public static final String S3_ACCESS_KEY = "s3.access-key"; - public static final String S3_SECRET_KEY = "s3.secret-key"; + public static final String S3_ENDPOINT = PaimonConstants.S3_ENDPOINT; + public static final String S3_ACCESS_KEY = PaimonConstants.S3_ACCESS_KEY; + public static final String S3_SECRET_KEY = PaimonConstants.S3_SECRET_KEY; public static final Map GRAVITINO_CONFIG_TO_PAIMON = ImmutableMap.of( diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchemaPropertiesMetadata.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchemaPropertiesMetadata.java index 9a6ddb5a165..3da05099cc4 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchemaPropertiesMetadata.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchemaPropertiesMetadata.java @@ -34,7 +34,7 @@ */ public class PaimonSchemaPropertiesMetadata extends BasePropertiesMetadata { - public static final String COMMENT = "comment"; + public static final String COMMENT = PaimonConstants.COMMENT; private static final Map> PROPERTIES_METADATA; diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonTablePropertiesMetadata.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonTablePropertiesMetadata.java index 671dd9d6682..ad63df6783f 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonTablePropertiesMetadata.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonTablePropertiesMetadata.java @@ -35,14 +35,14 @@ */ public class PaimonTablePropertiesMetadata extends BasePropertiesMetadata { - public static final String COMMENT = "comment"; - public static final String OWNER = "owner"; - public static final String BUCKET_KEY = "bucket-key"; - public static final String MERGE_ENGINE = "merge-engine"; - public static final String SEQUENCE_FIELD = "sequence.field"; - public static final String ROWKIND_FIELD = "rowkind.field"; - public static final String PRIMARY_KEY = "primary-key"; - public static final String PARTITION = "partition"; + public static final String COMMENT = PaimonConstants.COMMENT; + public static final String OWNER = PaimonConstants.OWNER; + public static final String BUCKET_KEY = PaimonConstants.BUCKET_KEY; + public static final String MERGE_ENGINE = PaimonConstants.MERGE_ENGINE; + public static final String SEQUENCE_FIELD = PaimonConstants.SEQUENCE_FIELD; + public static final String ROWKIND_FIELD = PaimonConstants.ROWKIND_FIELD; + public static final String PRIMARY_KEY = PaimonConstants.PRIMARY_KEY; + public static final String PARTITION = PaimonConstants.PARTITION; private static final Map> PROPERTIES_METADATA; diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonOSSFileSystemConfig.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonOSSFileSystemConfig.java index ad7fa26f3bc..7b703b5b74a 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonOSSFileSystemConfig.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonOSSFileSystemConfig.java @@ -22,6 +22,7 @@ import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.Config; +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants; import org.apache.gravitino.config.ConfigBuilder; import org.apache.gravitino.config.ConfigConstants; import org.apache.gravitino.config.ConfigEntry; @@ -29,9 +30,9 @@ public class PaimonOSSFileSystemConfig extends Config { // OSS related properties - public static final String OSS_ENDPOINT = "fs.oss.endpoint"; - public static final String OSS_ACCESS_KEY = "fs.oss.accessKeyId"; - public static final String OSS_SECRET_KEY = "fs.oss.accessKeySecret"; + public static final String OSS_ENDPOINT = PaimonConstants.OSS_ENDPOINT; + public static final String OSS_ACCESS_KEY = PaimonConstants.OSS_ACCESS_KEY; + public static final String OSS_SECRET_KEY = PaimonConstants.OSS_SECRET_KEY; public PaimonOSSFileSystemConfig(Map properties) { super(false); diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonS3FileSystemConfig.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonS3FileSystemConfig.java index 4184fcc06f1..6588e4a5268 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonS3FileSystemConfig.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonS3FileSystemConfig.java @@ -22,6 +22,7 @@ import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.Config; +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants; import org.apache.gravitino.config.ConfigBuilder; import org.apache.gravitino.config.ConfigConstants; import org.apache.gravitino.config.ConfigEntry; @@ -29,9 +30,9 @@ public class PaimonS3FileSystemConfig extends Config { // S3 related properties - public static final String S3_ENDPOINT = "s3.endpoint"; - public static final String S3_ACCESS_KEY = "s3.access-key"; - public static final String S3_SECRET_KEY = "s3.secret-key"; + public static final String S3_ENDPOINT = PaimonConstants.S3_ENDPOINT; + public static final String S3_ACCESS_KEY = PaimonConstants.S3_ACCESS_KEY; + public static final String S3_SECRET_KEY = PaimonConstants.S3_SECRET_KEY; public PaimonS3FileSystemConfig(Map properties) { super(false); diff --git a/spark-connector/spark-common/build.gradle.kts b/spark-connector/spark-common/build.gradle.kts index 7f3c66aa6e6..dc0af57a00c 100644 --- a/spark-connector/spark-common/build.gradle.kts +++ b/spark-connector/spark-common/build.gradle.kts @@ -31,6 +31,7 @@ val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extr val sparkVersion: String = libs.versions.spark33.get() val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") val icebergVersion: String = libs.versions.iceberg4spark.get() +val paimonVersion: String = libs.versions.paimon.get() // kyuubi hive connector for Spark 3.3 doesn't support scala 2.13 val kyuubiVersion: String = libs.versions.kyuubi4spark34.get() val scalaJava8CompatVersion: String = libs.versions.scala.java.compat.get() @@ -43,6 +44,7 @@ dependencies { compileOnly(project(":clients:client-java-runtime", configuration = "shadow")) compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") compileOnly("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") compileOnly("org.apache.spark:spark-catalyst_$scalaVersion:$sparkVersion") compileOnly("org.apache.spark:spark-core_$scalaVersion:$sparkVersion") @@ -114,6 +116,7 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java new file mode 100644 index 00000000000..3a1b210055a --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.paimon; + +import java.util.Map; +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonPropertiesUtils; +import org.apache.gravitino.spark.connector.PropertiesConverter; +import org.apache.gravitino.spark.connector.SparkTransformConverter; +import org.apache.gravitino.spark.connector.SparkTypeConverter; +import org.apache.gravitino.spark.connector.catalog.BaseCatalog; +import org.apache.paimon.spark.SparkCatalog; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableCatalog; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +public class GravitinoPaimonCatalog extends BaseCatalog { + + @Override + protected TableCatalog createAndInitSparkCatalog( + String name, CaseInsensitiveStringMap options, Map properties) { + String catalogBackendName = PaimonPropertiesUtils.getCatalogBackendName(properties); + TableCatalog paimonCatalog = new SparkCatalog(); + Map all = + getPropertiesConverter().toSparkCatalogProperties(options, properties); + paimonCatalog.initialize(catalogBackendName, new CaseInsensitiveStringMap(all)); + return paimonCatalog; + } + + @Override + protected Table createSparkTable( + Identifier identifier, + org.apache.gravitino.rel.Table gravitinoTable, + Table sparkTable, + TableCatalog sparkCatalog, + PropertiesConverter propertiesConverter, + SparkTransformConverter sparkTransformConverter, + SparkTypeConverter sparkTypeConverter) { + throw new UnsupportedOperationException( + "`createSparkTable` operation is unsupported for paimon spark connector now."); + } + + @Override + protected PropertiesConverter getPropertiesConverter() { + return PaimonPropertiesConverter.getInstance(); + } + + @Override + protected SparkTransformConverter getSparkTransformConverter() { + return new SparkTransformConverter(true); + } +} diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java new file mode 100644 index 00000000000..67190753e64 --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.paimon; + +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants; + +public class PaimonPropertiesConstants { + + public static final String GRAVITINO_PAIMON_CATALOG_BACKEND = PaimonConstants.CATALOG_BACKEND; + static final String PAIMON_CATALOG_METASTORE = PaimonConstants.METASTORE; + + public static final String GRAVITINO_PAIMON_CATALOG_WAREHOUSE = PaimonConstants.WAREHOUSE; + static final String PAIMON_CATALOG_WAREHOUSE = PaimonConstants.WAREHOUSE; + + public static final String GRAVITINO_PAIMON_CATALOG_URI = PaimonConstants.URI; + static final String PAIMON_CATALOG_URI = PaimonConstants.URI; + static final String GRAVITINO_PAIMON_CATALOG_JDBC_USER = PaimonConstants.GRAVITINO_JDBC_USER; + static final String PAIMON_CATALOG_JDBC_USER = PaimonConstants.PAIMON_JDBC_USER; + + static final String GRAVITINO_PAIMON_CATALOG_JDBC_PASSWORD = + PaimonConstants.GRAVITINO_JDBC_PASSWORD; + static final String PAIMON_CATALOG_JDBC_PASSWORD = PaimonConstants.PAIMON_JDBC_PASSWORD; + + public static final String PAIMON_CATALOG_BACKEND_HIVE = "hive"; + static final String GRAVITINO_PAIMON_CATALOG_BACKEND_HIVE = "hive"; + + static final String GRAVITINO_PAIMON_CATALOG_BACKEND_JDBC = "jdbc"; + static final String PAIMON_CATALOG_BACKEND_JDBC = "jdbc"; + + public static final String PAIMON_CATALOG_BACKEND_FILESYSTEM = "filesystem"; + static final String GRAVITINO_PAIMON_CATALOG_BACKEND_FILESYSTEM = "filesystem"; +} diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java new file mode 100644 index 00000000000..335afae7c9d --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.paimon; + +import com.google.common.base.Preconditions; +import java.util.Map; +import org.apache.commons.lang3.StringUtils; +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonPropertiesUtils; +import org.apache.gravitino.spark.connector.PropertiesConverter; + +public class PaimonPropertiesConverter implements PropertiesConverter { + + public static class PaimonPropertiesConverterHolder { + private static final PaimonPropertiesConverter INSTANCE = new PaimonPropertiesConverter(); + } + + private PaimonPropertiesConverter() {} + + public static PaimonPropertiesConverter getInstance() { + return PaimonPropertiesConverter.PaimonPropertiesConverterHolder.INSTANCE; + } + + @Override + public Map toSparkCatalogProperties(Map properties) { + Preconditions.checkArgument(properties != null, "Paimon Catalog properties should not be null"); + Map all = PaimonPropertiesUtils.toPaimonCatalogProperties(properties); + String catalogBackend = all.remove(PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND); + Preconditions.checkArgument( + StringUtils.isNotBlank(catalogBackend), + String.format( + "%s should not be empty", PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND)); + all.put(PaimonPropertiesConstants.PAIMON_CATALOG_METASTORE, catalogBackend); + return all; + } + + @Override + public Map toGravitinoTableProperties(Map properties) { + throw new UnsupportedOperationException( + "`toGravitinoTableProperties` operation is unsupported now."); + } + + @Override + public Map toSparkTableProperties(Map properties) { + throw new UnsupportedOperationException( + "`toSparkTableProperties` operation is unsupported now."); + } +} diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java index 8141c799bf8..9392feac2f1 100644 --- a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java @@ -27,15 +27,24 @@ public class CatalogNameAdaptor { private static final Map catalogNames = ImmutableMap.of( - "hive-3.3", "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark33", - "hive-3.4", "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark34", - "hive-3.5", "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark35", + "hive-3.3", + "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark33", + "hive-3.4", + "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark34", + "hive-3.5", + "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark35", "lakehouse-iceberg-3.3", - "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark33", + "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark33", "lakehouse-iceberg-3.4", - "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark34", + "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark34", "lakehouse-iceberg-3.5", - "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark35"); + "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark35", + "lakehouse-paimon-3.3", + "org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark33", + "lakehouse-paimon-3.4", + "org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark34", + "lakehouse-paimon-3.5", + "org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark35"); private static String sparkVersion() { return package$.MODULE$.SPARK_VERSION(); diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java new file mode 100644 index 00000000000..778f97abf7f --- /dev/null +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import com.google.common.collect.Maps; +import java.util.Map; +import org.apache.gravitino.spark.connector.paimon.PaimonPropertiesConstants; +import org.junit.jupiter.api.Tag; + +/** This class use Apache Paimon FilesystemCatalog for backend catalog. */ +@Tag("gravitino-docker-test") +public abstract class SparkPaimonCatalogFilesystemBackendIT extends SparkPaimonCatalogIT { + + @Override + protected Map getCatalogConfigs() { + Map catalogProperties = Maps.newHashMap(); + catalogProperties.put( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_FILESYSTEM); + catalogProperties.put(PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, warehouse); + catalogProperties.put(PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_URI, hiveMetastoreUri); + return catalogProperties; + } +} diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java new file mode 100644 index 00000000000..37248ebacf4 --- /dev/null +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.integration.test.SparkCommonIT; + +public abstract class SparkPaimonCatalogIT extends SparkCommonIT { + + @Override + protected String getCatalogName() { + return "paimon"; + } + + @Override + protected String getProvider() { + return "lakehouse-paimon"; + } + + @Override + protected boolean supportsSparkSQLClusteredBy() { + return false; + } + + @Override + protected boolean supportsPartition() { + return false; + } + + @Override + protected boolean supportsDelete() { + return false; + } + + @Override + protected boolean supportsSchemaEvolution() { + return false; + } +} diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/paimon/TestPaimonPropertiesConverter.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/paimon/TestPaimonPropertiesConverter.java new file mode 100644 index 00000000000..a3a0e91284a --- /dev/null +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/paimon/TestPaimonPropertiesConverter.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.paimon; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestPaimonPropertiesConverter { + private final PaimonPropertiesConverter paimonPropertiesConverter = + PaimonPropertiesConverter.getInstance(); + + @Test + void testCatalogPropertiesWithHiveBackend() { + Map properties = + paimonPropertiesConverter.toSparkCatalogProperties( + ImmutableMap.of( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND_HIVE, + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_URI, + "hive-uri", + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, + "hive-warehouse", + "key1", + "value1")); + Assertions.assertEquals( + ImmutableMap.of( + PaimonPropertiesConstants.PAIMON_CATALOG_METASTORE, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_HIVE, + PaimonPropertiesConstants.PAIMON_CATALOG_URI, + "hive-uri", + PaimonPropertiesConstants.PAIMON_CATALOG_WAREHOUSE, + "hive-warehouse"), + properties); + } + + @Test + void testCatalogPropertiesWithJdbcBackend() { + Map properties = + paimonPropertiesConverter.toSparkCatalogProperties( + ImmutableMap.of( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_JDBC, + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_URI, + "jdbc-uri", + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, + "jdbc-warehouse", + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_JDBC_USER, + "user", + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_JDBC_PASSWORD, + "passwd", + "key1", + "value1")); + Assertions.assertEquals( + ImmutableMap.of( + PaimonPropertiesConstants.PAIMON_CATALOG_METASTORE, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_JDBC, + PaimonPropertiesConstants.PAIMON_CATALOG_URI, + "jdbc-uri", + PaimonPropertiesConstants.PAIMON_CATALOG_WAREHOUSE, + "jdbc-warehouse", + PaimonPropertiesConstants.PAIMON_CATALOG_JDBC_USER, + "user", + PaimonPropertiesConstants.PAIMON_CATALOG_JDBC_PASSWORD, + "passwd"), + properties); + } + + @Test + void testCatalogPropertiesWithFilesystemBackend() { + Map properties = + paimonPropertiesConverter.toSparkCatalogProperties( + ImmutableMap.of( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND_FILESYSTEM, + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, + "filesystem-warehouse", + "key1", + "value1")); + Assertions.assertEquals( + ImmutableMap.of( + PaimonPropertiesConstants.PAIMON_CATALOG_METASTORE, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_FILESYSTEM, + PaimonPropertiesConstants.PAIMON_CATALOG_WAREHOUSE, + "filesystem-warehouse"), + properties); + } +} diff --git a/spark-connector/v3.3/spark/build.gradle.kts b/spark-connector/v3.3/spark/build.gradle.kts index c4c417d62ef..fcc2604f27a 100644 --- a/spark-connector/v3.3/spark/build.gradle.kts +++ b/spark-connector/v3.3/spark/build.gradle.kts @@ -31,6 +31,7 @@ val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extr val sparkVersion: String = libs.versions.spark33.get() val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") val icebergVersion: String = libs.versions.iceberg4spark.get() +val paimonVersion: String = libs.versions.paimon.get() val kyuubiVersion: String = libs.versions.kyuubi4spark33.get() val scalaJava8CompatVersion: String = libs.versions.scala.java.compat.get() val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get() @@ -43,6 +44,7 @@ dependencies { exclude("com.fasterxml.jackson") } compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation(project(":api")) { exclude("org.apache.logging.log4j") @@ -122,6 +124,7 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { diff --git a/spark-connector/v3.3/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark33.java b/spark-connector/v3.3/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark33.java new file mode 100644 index 00000000000..2fef911a8bd --- /dev/null +++ b/spark-connector/v3.3/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark33.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.paimon; + +public class GravitinoPaimonCatalogSpark33 extends GravitinoPaimonCatalog {} diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java new file mode 100644 index 00000000000..839b959c777 --- /dev/null +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark33; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkPaimonCatalogFilesystemBackendIT33 extends SparkPaimonCatalogFilesystemBackendIT { + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoPaimonCatalogSpark33.class.getName(), catalogClass); + } +} diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java index 1b0af02f87b..37c95e47890 100644 --- a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java @@ -20,6 +20,7 @@ import org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark33; import org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark33; +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark33; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -31,5 +32,8 @@ void testSpark33() { String icebergCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-iceberg"); Assertions.assertEquals(GravitinoIcebergCatalogSpark33.class.getName(), icebergCatalogName); + + String paimonCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-paimon"); + Assertions.assertEquals(GravitinoPaimonCatalogSpark33.class.getName(), paimonCatalogName); } } diff --git a/spark-connector/v3.4/spark/build.gradle.kts b/spark-connector/v3.4/spark/build.gradle.kts index f3308fca34b..f046144e533 100644 --- a/spark-connector/v3.4/spark/build.gradle.kts +++ b/spark-connector/v3.4/spark/build.gradle.kts @@ -31,6 +31,7 @@ val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extr val sparkVersion: String = libs.versions.spark34.get() val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") val icebergVersion: String = libs.versions.iceberg4spark.get() +val paimonVersion: String = libs.versions.paimon.get() val kyuubiVersion: String = libs.versions.kyuubi4spark34.get() val scalaJava8CompatVersion: String = libs.versions.scala.java.compat.get() val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get() @@ -44,6 +45,7 @@ dependencies { } compileOnly(project(":clients:client-java-runtime", configuration = "shadow")) compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation(project(":api")) { exclude("org.apache.logging.log4j") @@ -122,6 +124,7 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { diff --git a/spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark34.java b/spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark34.java new file mode 100644 index 00000000000..eb3e8779369 --- /dev/null +++ b/spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark34.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.paimon; + +import org.apache.gravitino.spark.connector.SparkTableChangeConverter; +import org.apache.gravitino.spark.connector.SparkTableChangeConverter34; +import org.apache.gravitino.spark.connector.SparkTypeConverter; +import org.apache.gravitino.spark.connector.SparkTypeConverter34; + +public class GravitinoPaimonCatalogSpark34 extends GravitinoPaimonCatalog { + @Override + protected SparkTypeConverter getSparkTypeConverter() { + return new SparkTypeConverter34(); + } + + @Override + protected SparkTableChangeConverter getSparkTableChangeConverter( + SparkTypeConverter sparkTypeConverter) { + return new SparkTableChangeConverter34(sparkTypeConverter); + } +} diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java new file mode 100644 index 00000000000..d230707325c --- /dev/null +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark34; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkPaimonCatalogFilesystemBackendIT34 extends SparkPaimonCatalogFilesystemBackendIT { + + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoPaimonCatalogSpark34.class.getName(), catalogClass); + } +} diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java index a2e95c8ea30..af9e67fab88 100644 --- a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java @@ -20,6 +20,7 @@ import org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark34; import org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark34; +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark34; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -31,5 +32,8 @@ void testSpark34() { String icebergCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-iceberg"); Assertions.assertEquals(GravitinoIcebergCatalogSpark34.class.getName(), icebergCatalogName); + + String paimonCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-paimon"); + Assertions.assertEquals(GravitinoPaimonCatalogSpark34.class.getName(), paimonCatalogName); } } diff --git a/spark-connector/v3.5/spark/build.gradle.kts b/spark-connector/v3.5/spark/build.gradle.kts index 7b8cc8447b7..30bafbb1aaf 100644 --- a/spark-connector/v3.5/spark/build.gradle.kts +++ b/spark-connector/v3.5/spark/build.gradle.kts @@ -31,6 +31,7 @@ val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extr val sparkVersion: String = libs.versions.spark35.get() val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") val icebergVersion: String = libs.versions.iceberg4spark.get() +val paimonVersion: String = libs.versions.paimon.get() val kyuubiVersion: String = libs.versions.kyuubi4spark35.get() val scalaJava8CompatVersion: String = libs.versions.scala.java.compat.get() val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get() @@ -45,6 +46,7 @@ dependencies { } compileOnly(project(":clients:client-java-runtime", configuration = "shadow")) compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation(project(":api")) { exclude("org.apache.logging.log4j") @@ -124,6 +126,7 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { diff --git a/spark-connector/v3.5/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark35.java b/spark-connector/v3.5/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark35.java new file mode 100644 index 00000000000..2c39af5b2f7 --- /dev/null +++ b/spark-connector/v3.5/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark35.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.paimon; + +public class GravitinoPaimonCatalogSpark35 extends GravitinoPaimonCatalogSpark34 {} diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java new file mode 100644 index 00000000000..b02f58f70bf --- /dev/null +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark35; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkPaimonCatalogFilesystemBackendIT35 extends SparkPaimonCatalogFilesystemBackendIT { + + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoIcebergCatalogSpark35.class.getName(), catalogClass); + } +} diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java index 5295e82fb24..f02584cd616 100644 --- a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java @@ -20,6 +20,7 @@ import org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark35; import org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark35; +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark35; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -31,5 +32,8 @@ void testSpark35() { String icebergCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-iceberg"); Assertions.assertEquals(GravitinoIcebergCatalogSpark35.class.getName(), icebergCatalogName); + + String paimonCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-paimon"); + Assertions.assertEquals(GravitinoPaimonCatalogSpark35.class.getName(), paimonCatalogName); } } From e4a11d12ba3f6dda24ee51fc544202e092d9478f Mon Sep 17 00:00:00 2001 From: caican Date: Sun, 1 Dec 2024 14:18:52 +0800 Subject: [PATCH 02/25] basic ddl basic dml --- .../lakehouse/paimon/PaimonConstants.java | 57 ++++++++++ .../paimon/PaimonPropertiesUtils.java | 95 ++++++++++++++++ .../PaimonCatalogPropertiesMetadata.java | 26 +++-- .../PaimonSchemaPropertiesMetadata.java | 2 +- .../paimon/PaimonTablePropertiesMetadata.java | 16 +-- .../storage/PaimonOSSFileSystemConfig.java | 7 +- .../storage/PaimonS3FileSystemConfig.java | 7 +- spark-connector/spark-common/build.gradle.kts | 3 + .../paimon/GravitinoPaimonCatalog.java | 69 ++++++++++++ .../paimon/PaimonPropertiesConstants.java | 49 ++++++++ .../paimon/PaimonPropertiesConverter.java | 64 +++++++++++ .../connector/version/CatalogNameAdaptor.java | 21 +++- ...SparkPaimonCatalogFilesystemBackendIT.java | 39 +++++++ .../test/paimon/SparkPaimonCatalogIT.java | 54 +++++++++ .../paimon/TestPaimonPropertiesConverter.java | 106 ++++++++++++++++++ spark-connector/v3.3/spark/build.gradle.kts | 3 + .../paimon/GravitinoPaimonCatalogSpark33.java | 21 ++++ ...arkPaimonCatalogFilesystemBackendIT33.java | 35 ++++++ .../version/TestCatalogNameAdaptor.java | 4 + spark-connector/v3.4/spark/build.gradle.kts | 3 + .../paimon/GravitinoPaimonCatalogSpark34.java | 37 ++++++ ...arkPaimonCatalogFilesystemBackendIT34.java | 36 ++++++ .../version/TestCatalogNameAdaptor.java | 4 + spark-connector/v3.5/spark/build.gradle.kts | 3 + .../paimon/GravitinoPaimonCatalogSpark35.java | 21 ++++ ...arkPaimonCatalogFilesystemBackendIT35.java | 36 ++++++ .../version/TestCatalogNameAdaptor.java | 4 + 27 files changed, 789 insertions(+), 33 deletions(-) create mode 100644 catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonConstants.java create mode 100644 catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonPropertiesUtils.java create mode 100644 spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java create mode 100644 spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java create mode 100644 spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java create mode 100644 spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java create mode 100644 spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java create mode 100644 spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/paimon/TestPaimonPropertiesConverter.java create mode 100644 spark-connector/v3.3/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark33.java create mode 100644 spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java create mode 100644 spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark34.java create mode 100644 spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java create mode 100644 spark-connector/v3.5/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark35.java create mode 100644 spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonConstants.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonConstants.java new file mode 100644 index 00000000000..291a7ea9694 --- /dev/null +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonConstants.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse.paimon; + +public class PaimonConstants { + + // Paimon catalog properties constants + public static final String CATALOG_BACKEND = "catalog-backend"; + public static final String METASTORE = "metastore"; + public static final String URI = "uri"; + public static final String WAREHOUSE = "warehouse"; + public static final String CATALOG_BACKEND_NAME = "catalog-backend-name"; + + public static final String GRAVITINO_JDBC_USER = "jdbc-user"; + public static final String PAIMON_JDBC_USER = "jdbc.user"; + + public static final String GRAVITINO_JDBC_PASSWORD = "jdbc-password"; + public static final String PAIMON_JDBC_PASSWORD = "jdbc.password"; + + public static final String GRAVITINO_JDBC_DRIVER = "jdbc-driver"; + + // S3 properties needed by Paimon + public static final String S3_ENDPOINT = "s3.endpoint"; + public static final String S3_ACCESS_KEY = "s3.access-key"; + public static final String S3_SECRET_KEY = "s3.secret-key"; + + // OSS related properties + public static final String OSS_ENDPOINT = "fs.oss.endpoint"; + public static final String OSS_ACCESS_KEY = "fs.oss.accessKeyId"; + public static final String OSS_SECRET_KEY = "fs.oss.accessKeySecret"; + + // Iceberg Table properties constants + public static final String COMMENT = "comment"; + public static final String OWNER = "owner"; + public static final String BUCKET_KEY = "bucket-key"; + public static final String MERGE_ENGINE = "merge-engine"; + public static final String SEQUENCE_FIELD = "sequence.field"; + public static final String ROWKIND_FIELD = "rowkind.field"; + public static final String PRIMARY_KEY = "primary-key"; + public static final String PARTITION = "partition"; +} diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonPropertiesUtils.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonPropertiesUtils.java new file mode 100644 index 00000000000..0dcf24f3a67 --- /dev/null +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonPropertiesUtils.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse.paimon; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.Optional; +import org.apache.gravitino.storage.OSSProperties; +import org.apache.gravitino.storage.S3Properties; + +public class PaimonPropertiesUtils { + + // Map that maintains the mapping of keys in Gravitino to that in Paimon, for example, users + // will only need to set the configuration 'catalog-backend' in Gravitino and Gravitino will + // change it to `catalogType` automatically and pass it to Paimon. + public static final Map GRAVITINO_CONFIG_TO_PAIMON; + + static { + Map map = new HashMap(); + map.put(PaimonConstants.CATALOG_BACKEND, PaimonConstants.CATALOG_BACKEND); + map.put(PaimonConstants.GRAVITINO_JDBC_DRIVER, PaimonConstants.GRAVITINO_JDBC_DRIVER); + map.put(PaimonConstants.GRAVITINO_JDBC_USER, PaimonConstants.PAIMON_JDBC_USER); + map.put(PaimonConstants.GRAVITINO_JDBC_PASSWORD, PaimonConstants.PAIMON_JDBC_PASSWORD); + map.put(PaimonConstants.URI, PaimonConstants.URI); + map.put(PaimonConstants.WAREHOUSE, PaimonConstants.WAREHOUSE); + map.put(PaimonConstants.CATALOG_BACKEND_NAME, PaimonConstants.CATALOG_BACKEND_NAME); + // S3 + map.put(S3Properties.GRAVITINO_S3_ENDPOINT, PaimonConstants.S3_ENDPOINT); + map.put(S3Properties.GRAVITINO_S3_ACCESS_KEY_ID, PaimonConstants.S3_ACCESS_KEY); + map.put(S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY, PaimonConstants.S3_SECRET_KEY); + // OSS + map.put(OSSProperties.GRAVITINO_OSS_ENDPOINT, PaimonConstants.OSS_ENDPOINT); + map.put(OSSProperties.GRAVITINO_OSS_ACCESS_KEY_ID, PaimonConstants.OSS_ACCESS_KEY); + map.put(OSSProperties.GRAVITINO_OSS_ACCESS_KEY_SECRET, PaimonConstants.OSS_SECRET_KEY); + GRAVITINO_CONFIG_TO_PAIMON = Collections.unmodifiableMap(map); + } + + /** + * Converts Gravitino properties to Paimon catalog properties, the common transform logic shared + * by Spark connector, Gravitino Paimon catalog. + * + * @param gravitinoProperties a map of Gravitino configuration properties. + * @return a map containing Paimon catalog properties. + */ + public static Map toPaimonCatalogProperties( + Map gravitinoProperties) { + Map paimonProperties = new HashMap<>(); + gravitinoProperties.forEach( + (key, value) -> { + if (GRAVITINO_CONFIG_TO_PAIMON.containsKey(key)) { + paimonProperties.put(GRAVITINO_CONFIG_TO_PAIMON.get(key), value); + } + }); + return paimonProperties; + } + + /** + * Get catalog backend name from Gravitino catalog properties. + * + * @param catalogProperties a map of Gravitino catalog properties. + * @return catalog backend name. + */ + public static String getCatalogBackendName(Map catalogProperties) { + String backendName = catalogProperties.get(PaimonConstants.CATALOG_BACKEND_NAME); + if (backendName != null) { + return backendName; + } + + String catalogBackend = catalogProperties.get(PaimonConstants.CATALOG_BACKEND); + return Optional.ofNullable(catalogBackend) + .map(s -> s.toLowerCase(Locale.ROOT)) + .orElseThrow( + () -> + new UnsupportedOperationException( + String.format("Unsupported catalog backend: %s", catalogBackend))); + } +} diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java index e3b59bff36d..901fb47fadb 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java @@ -45,20 +45,22 @@ */ public class PaimonCatalogPropertiesMetadata extends BaseCatalogPropertiesMetadata { - @VisibleForTesting public static final String GRAVITINO_CATALOG_BACKEND = "catalog-backend"; - public static final String PAIMON_METASTORE = "metastore"; - public static final String WAREHOUSE = "warehouse"; - public static final String URI = "uri"; - public static final String GRAVITINO_JDBC_USER = "jdbc-user"; - public static final String PAIMON_JDBC_USER = "jdbc.user"; - public static final String GRAVITINO_JDBC_PASSWORD = "jdbc-password"; - public static final String PAIMON_JDBC_PASSWORD = "jdbc.password"; - public static final String GRAVITINO_JDBC_DRIVER = "jdbc-driver"; + @VisibleForTesting + public static final String GRAVITINO_CATALOG_BACKEND = PaimonConstants.CATALOG_BACKEND; + + public static final String PAIMON_METASTORE = PaimonConstants.CATALOG_BACKEND; + public static final String WAREHOUSE = PaimonConstants.METASTORE; + public static final String URI = PaimonConstants.URI; + public static final String GRAVITINO_JDBC_USER = PaimonConstants.GRAVITINO_JDBC_USER; + public static final String PAIMON_JDBC_USER = PaimonConstants.PAIMON_JDBC_USER; + public static final String GRAVITINO_JDBC_PASSWORD = PaimonConstants.GRAVITINO_JDBC_PASSWORD; + public static final String PAIMON_JDBC_PASSWORD = PaimonConstants.PAIMON_JDBC_PASSWORD; + public static final String GRAVITINO_JDBC_DRIVER = PaimonConstants.GRAVITINO_JDBC_DRIVER; // S3 properties needed by Paimon - public static final String S3_ENDPOINT = "s3.endpoint"; - public static final String S3_ACCESS_KEY = "s3.access-key"; - public static final String S3_SECRET_KEY = "s3.secret-key"; + public static final String S3_ENDPOINT = PaimonConstants.S3_ENDPOINT; + public static final String S3_ACCESS_KEY = PaimonConstants.S3_ACCESS_KEY; + public static final String S3_SECRET_KEY = PaimonConstants.S3_SECRET_KEY; public static final Map GRAVITINO_CONFIG_TO_PAIMON = ImmutableMap.of( diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchemaPropertiesMetadata.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchemaPropertiesMetadata.java index 9a6ddb5a165..3da05099cc4 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchemaPropertiesMetadata.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchemaPropertiesMetadata.java @@ -34,7 +34,7 @@ */ public class PaimonSchemaPropertiesMetadata extends BasePropertiesMetadata { - public static final String COMMENT = "comment"; + public static final String COMMENT = PaimonConstants.COMMENT; private static final Map> PROPERTIES_METADATA; diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonTablePropertiesMetadata.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonTablePropertiesMetadata.java index 671dd9d6682..ad63df6783f 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonTablePropertiesMetadata.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonTablePropertiesMetadata.java @@ -35,14 +35,14 @@ */ public class PaimonTablePropertiesMetadata extends BasePropertiesMetadata { - public static final String COMMENT = "comment"; - public static final String OWNER = "owner"; - public static final String BUCKET_KEY = "bucket-key"; - public static final String MERGE_ENGINE = "merge-engine"; - public static final String SEQUENCE_FIELD = "sequence.field"; - public static final String ROWKIND_FIELD = "rowkind.field"; - public static final String PRIMARY_KEY = "primary-key"; - public static final String PARTITION = "partition"; + public static final String COMMENT = PaimonConstants.COMMENT; + public static final String OWNER = PaimonConstants.OWNER; + public static final String BUCKET_KEY = PaimonConstants.BUCKET_KEY; + public static final String MERGE_ENGINE = PaimonConstants.MERGE_ENGINE; + public static final String SEQUENCE_FIELD = PaimonConstants.SEQUENCE_FIELD; + public static final String ROWKIND_FIELD = PaimonConstants.ROWKIND_FIELD; + public static final String PRIMARY_KEY = PaimonConstants.PRIMARY_KEY; + public static final String PARTITION = PaimonConstants.PARTITION; private static final Map> PROPERTIES_METADATA; diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonOSSFileSystemConfig.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonOSSFileSystemConfig.java index ad7fa26f3bc..7b703b5b74a 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonOSSFileSystemConfig.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonOSSFileSystemConfig.java @@ -22,6 +22,7 @@ import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.Config; +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants; import org.apache.gravitino.config.ConfigBuilder; import org.apache.gravitino.config.ConfigConstants; import org.apache.gravitino.config.ConfigEntry; @@ -29,9 +30,9 @@ public class PaimonOSSFileSystemConfig extends Config { // OSS related properties - public static final String OSS_ENDPOINT = "fs.oss.endpoint"; - public static final String OSS_ACCESS_KEY = "fs.oss.accessKeyId"; - public static final String OSS_SECRET_KEY = "fs.oss.accessKeySecret"; + public static final String OSS_ENDPOINT = PaimonConstants.OSS_ENDPOINT; + public static final String OSS_ACCESS_KEY = PaimonConstants.OSS_ACCESS_KEY; + public static final String OSS_SECRET_KEY = PaimonConstants.OSS_SECRET_KEY; public PaimonOSSFileSystemConfig(Map properties) { super(false); diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonS3FileSystemConfig.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonS3FileSystemConfig.java index 4184fcc06f1..6588e4a5268 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonS3FileSystemConfig.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonS3FileSystemConfig.java @@ -22,6 +22,7 @@ import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.Config; +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants; import org.apache.gravitino.config.ConfigBuilder; import org.apache.gravitino.config.ConfigConstants; import org.apache.gravitino.config.ConfigEntry; @@ -29,9 +30,9 @@ public class PaimonS3FileSystemConfig extends Config { // S3 related properties - public static final String S3_ENDPOINT = "s3.endpoint"; - public static final String S3_ACCESS_KEY = "s3.access-key"; - public static final String S3_SECRET_KEY = "s3.secret-key"; + public static final String S3_ENDPOINT = PaimonConstants.S3_ENDPOINT; + public static final String S3_ACCESS_KEY = PaimonConstants.S3_ACCESS_KEY; + public static final String S3_SECRET_KEY = PaimonConstants.S3_SECRET_KEY; public PaimonS3FileSystemConfig(Map properties) { super(false); diff --git a/spark-connector/spark-common/build.gradle.kts b/spark-connector/spark-common/build.gradle.kts index 7f3c66aa6e6..dc0af57a00c 100644 --- a/spark-connector/spark-common/build.gradle.kts +++ b/spark-connector/spark-common/build.gradle.kts @@ -31,6 +31,7 @@ val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extr val sparkVersion: String = libs.versions.spark33.get() val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") val icebergVersion: String = libs.versions.iceberg4spark.get() +val paimonVersion: String = libs.versions.paimon.get() // kyuubi hive connector for Spark 3.3 doesn't support scala 2.13 val kyuubiVersion: String = libs.versions.kyuubi4spark34.get() val scalaJava8CompatVersion: String = libs.versions.scala.java.compat.get() @@ -43,6 +44,7 @@ dependencies { compileOnly(project(":clients:client-java-runtime", configuration = "shadow")) compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") compileOnly("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") compileOnly("org.apache.spark:spark-catalyst_$scalaVersion:$sparkVersion") compileOnly("org.apache.spark:spark-core_$scalaVersion:$sparkVersion") @@ -114,6 +116,7 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java new file mode 100644 index 00000000000..3a1b210055a --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.paimon; + +import java.util.Map; +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonPropertiesUtils; +import org.apache.gravitino.spark.connector.PropertiesConverter; +import org.apache.gravitino.spark.connector.SparkTransformConverter; +import org.apache.gravitino.spark.connector.SparkTypeConverter; +import org.apache.gravitino.spark.connector.catalog.BaseCatalog; +import org.apache.paimon.spark.SparkCatalog; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableCatalog; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +public class GravitinoPaimonCatalog extends BaseCatalog { + + @Override + protected TableCatalog createAndInitSparkCatalog( + String name, CaseInsensitiveStringMap options, Map properties) { + String catalogBackendName = PaimonPropertiesUtils.getCatalogBackendName(properties); + TableCatalog paimonCatalog = new SparkCatalog(); + Map all = + getPropertiesConverter().toSparkCatalogProperties(options, properties); + paimonCatalog.initialize(catalogBackendName, new CaseInsensitiveStringMap(all)); + return paimonCatalog; + } + + @Override + protected Table createSparkTable( + Identifier identifier, + org.apache.gravitino.rel.Table gravitinoTable, + Table sparkTable, + TableCatalog sparkCatalog, + PropertiesConverter propertiesConverter, + SparkTransformConverter sparkTransformConverter, + SparkTypeConverter sparkTypeConverter) { + throw new UnsupportedOperationException( + "`createSparkTable` operation is unsupported for paimon spark connector now."); + } + + @Override + protected PropertiesConverter getPropertiesConverter() { + return PaimonPropertiesConverter.getInstance(); + } + + @Override + protected SparkTransformConverter getSparkTransformConverter() { + return new SparkTransformConverter(true); + } +} diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java new file mode 100644 index 00000000000..67190753e64 --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.paimon; + +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants; + +public class PaimonPropertiesConstants { + + public static final String GRAVITINO_PAIMON_CATALOG_BACKEND = PaimonConstants.CATALOG_BACKEND; + static final String PAIMON_CATALOG_METASTORE = PaimonConstants.METASTORE; + + public static final String GRAVITINO_PAIMON_CATALOG_WAREHOUSE = PaimonConstants.WAREHOUSE; + static final String PAIMON_CATALOG_WAREHOUSE = PaimonConstants.WAREHOUSE; + + public static final String GRAVITINO_PAIMON_CATALOG_URI = PaimonConstants.URI; + static final String PAIMON_CATALOG_URI = PaimonConstants.URI; + static final String GRAVITINO_PAIMON_CATALOG_JDBC_USER = PaimonConstants.GRAVITINO_JDBC_USER; + static final String PAIMON_CATALOG_JDBC_USER = PaimonConstants.PAIMON_JDBC_USER; + + static final String GRAVITINO_PAIMON_CATALOG_JDBC_PASSWORD = + PaimonConstants.GRAVITINO_JDBC_PASSWORD; + static final String PAIMON_CATALOG_JDBC_PASSWORD = PaimonConstants.PAIMON_JDBC_PASSWORD; + + public static final String PAIMON_CATALOG_BACKEND_HIVE = "hive"; + static final String GRAVITINO_PAIMON_CATALOG_BACKEND_HIVE = "hive"; + + static final String GRAVITINO_PAIMON_CATALOG_BACKEND_JDBC = "jdbc"; + static final String PAIMON_CATALOG_BACKEND_JDBC = "jdbc"; + + public static final String PAIMON_CATALOG_BACKEND_FILESYSTEM = "filesystem"; + static final String GRAVITINO_PAIMON_CATALOG_BACKEND_FILESYSTEM = "filesystem"; +} diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java new file mode 100644 index 00000000000..335afae7c9d --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.paimon; + +import com.google.common.base.Preconditions; +import java.util.Map; +import org.apache.commons.lang3.StringUtils; +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonPropertiesUtils; +import org.apache.gravitino.spark.connector.PropertiesConverter; + +public class PaimonPropertiesConverter implements PropertiesConverter { + + public static class PaimonPropertiesConverterHolder { + private static final PaimonPropertiesConverter INSTANCE = new PaimonPropertiesConverter(); + } + + private PaimonPropertiesConverter() {} + + public static PaimonPropertiesConverter getInstance() { + return PaimonPropertiesConverter.PaimonPropertiesConverterHolder.INSTANCE; + } + + @Override + public Map toSparkCatalogProperties(Map properties) { + Preconditions.checkArgument(properties != null, "Paimon Catalog properties should not be null"); + Map all = PaimonPropertiesUtils.toPaimonCatalogProperties(properties); + String catalogBackend = all.remove(PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND); + Preconditions.checkArgument( + StringUtils.isNotBlank(catalogBackend), + String.format( + "%s should not be empty", PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND)); + all.put(PaimonPropertiesConstants.PAIMON_CATALOG_METASTORE, catalogBackend); + return all; + } + + @Override + public Map toGravitinoTableProperties(Map properties) { + throw new UnsupportedOperationException( + "`toGravitinoTableProperties` operation is unsupported now."); + } + + @Override + public Map toSparkTableProperties(Map properties) { + throw new UnsupportedOperationException( + "`toSparkTableProperties` operation is unsupported now."); + } +} diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java index 8141c799bf8..9392feac2f1 100644 --- a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java @@ -27,15 +27,24 @@ public class CatalogNameAdaptor { private static final Map catalogNames = ImmutableMap.of( - "hive-3.3", "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark33", - "hive-3.4", "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark34", - "hive-3.5", "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark35", + "hive-3.3", + "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark33", + "hive-3.4", + "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark34", + "hive-3.5", + "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark35", "lakehouse-iceberg-3.3", - "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark33", + "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark33", "lakehouse-iceberg-3.4", - "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark34", + "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark34", "lakehouse-iceberg-3.5", - "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark35"); + "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark35", + "lakehouse-paimon-3.3", + "org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark33", + "lakehouse-paimon-3.4", + "org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark34", + "lakehouse-paimon-3.5", + "org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark35"); private static String sparkVersion() { return package$.MODULE$.SPARK_VERSION(); diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java new file mode 100644 index 00000000000..60d5bddbf8d --- /dev/null +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import com.google.common.collect.Maps; +import java.util.Map; +import org.apache.gravitino.spark.connector.paimon.PaimonPropertiesConstants; +import org.junit.jupiter.api.Tag; + +/** This class use Apache Paimon FilesystemCatalog for backend catalog. */ +@Tag("gravitino-docker-test") +public abstract class SparkPaimonCatalogFilesystemBackendIT extends SparkPaimonCatalogIT { + + @Override + protected Map getCatalogConfigs() { + Map catalogProperties = Maps.newHashMap(); + catalogProperties.put( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_FILESYSTEM); + catalogProperties.put(PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, warehouse); + return catalogProperties; + } +} diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java new file mode 100644 index 00000000000..37248ebacf4 --- /dev/null +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.integration.test.SparkCommonIT; + +public abstract class SparkPaimonCatalogIT extends SparkCommonIT { + + @Override + protected String getCatalogName() { + return "paimon"; + } + + @Override + protected String getProvider() { + return "lakehouse-paimon"; + } + + @Override + protected boolean supportsSparkSQLClusteredBy() { + return false; + } + + @Override + protected boolean supportsPartition() { + return false; + } + + @Override + protected boolean supportsDelete() { + return false; + } + + @Override + protected boolean supportsSchemaEvolution() { + return false; + } +} diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/paimon/TestPaimonPropertiesConverter.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/paimon/TestPaimonPropertiesConverter.java new file mode 100644 index 00000000000..a3a0e91284a --- /dev/null +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/paimon/TestPaimonPropertiesConverter.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.paimon; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestPaimonPropertiesConverter { + private final PaimonPropertiesConverter paimonPropertiesConverter = + PaimonPropertiesConverter.getInstance(); + + @Test + void testCatalogPropertiesWithHiveBackend() { + Map properties = + paimonPropertiesConverter.toSparkCatalogProperties( + ImmutableMap.of( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND_HIVE, + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_URI, + "hive-uri", + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, + "hive-warehouse", + "key1", + "value1")); + Assertions.assertEquals( + ImmutableMap.of( + PaimonPropertiesConstants.PAIMON_CATALOG_METASTORE, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_HIVE, + PaimonPropertiesConstants.PAIMON_CATALOG_URI, + "hive-uri", + PaimonPropertiesConstants.PAIMON_CATALOG_WAREHOUSE, + "hive-warehouse"), + properties); + } + + @Test + void testCatalogPropertiesWithJdbcBackend() { + Map properties = + paimonPropertiesConverter.toSparkCatalogProperties( + ImmutableMap.of( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_JDBC, + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_URI, + "jdbc-uri", + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, + "jdbc-warehouse", + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_JDBC_USER, + "user", + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_JDBC_PASSWORD, + "passwd", + "key1", + "value1")); + Assertions.assertEquals( + ImmutableMap.of( + PaimonPropertiesConstants.PAIMON_CATALOG_METASTORE, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_JDBC, + PaimonPropertiesConstants.PAIMON_CATALOG_URI, + "jdbc-uri", + PaimonPropertiesConstants.PAIMON_CATALOG_WAREHOUSE, + "jdbc-warehouse", + PaimonPropertiesConstants.PAIMON_CATALOG_JDBC_USER, + "user", + PaimonPropertiesConstants.PAIMON_CATALOG_JDBC_PASSWORD, + "passwd"), + properties); + } + + @Test + void testCatalogPropertiesWithFilesystemBackend() { + Map properties = + paimonPropertiesConverter.toSparkCatalogProperties( + ImmutableMap.of( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND_FILESYSTEM, + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, + "filesystem-warehouse", + "key1", + "value1")); + Assertions.assertEquals( + ImmutableMap.of( + PaimonPropertiesConstants.PAIMON_CATALOG_METASTORE, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_FILESYSTEM, + PaimonPropertiesConstants.PAIMON_CATALOG_WAREHOUSE, + "filesystem-warehouse"), + properties); + } +} diff --git a/spark-connector/v3.3/spark/build.gradle.kts b/spark-connector/v3.3/spark/build.gradle.kts index c4c417d62ef..fcc2604f27a 100644 --- a/spark-connector/v3.3/spark/build.gradle.kts +++ b/spark-connector/v3.3/spark/build.gradle.kts @@ -31,6 +31,7 @@ val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extr val sparkVersion: String = libs.versions.spark33.get() val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") val icebergVersion: String = libs.versions.iceberg4spark.get() +val paimonVersion: String = libs.versions.paimon.get() val kyuubiVersion: String = libs.versions.kyuubi4spark33.get() val scalaJava8CompatVersion: String = libs.versions.scala.java.compat.get() val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get() @@ -43,6 +44,7 @@ dependencies { exclude("com.fasterxml.jackson") } compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation(project(":api")) { exclude("org.apache.logging.log4j") @@ -122,6 +124,7 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { diff --git a/spark-connector/v3.3/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark33.java b/spark-connector/v3.3/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark33.java new file mode 100644 index 00000000000..2fef911a8bd --- /dev/null +++ b/spark-connector/v3.3/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark33.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.paimon; + +public class GravitinoPaimonCatalogSpark33 extends GravitinoPaimonCatalog {} diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java new file mode 100644 index 00000000000..839b959c777 --- /dev/null +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark33; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkPaimonCatalogFilesystemBackendIT33 extends SparkPaimonCatalogFilesystemBackendIT { + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoPaimonCatalogSpark33.class.getName(), catalogClass); + } +} diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java index 1b0af02f87b..37c95e47890 100644 --- a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java @@ -20,6 +20,7 @@ import org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark33; import org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark33; +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark33; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -31,5 +32,8 @@ void testSpark33() { String icebergCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-iceberg"); Assertions.assertEquals(GravitinoIcebergCatalogSpark33.class.getName(), icebergCatalogName); + + String paimonCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-paimon"); + Assertions.assertEquals(GravitinoPaimonCatalogSpark33.class.getName(), paimonCatalogName); } } diff --git a/spark-connector/v3.4/spark/build.gradle.kts b/spark-connector/v3.4/spark/build.gradle.kts index f3308fca34b..f046144e533 100644 --- a/spark-connector/v3.4/spark/build.gradle.kts +++ b/spark-connector/v3.4/spark/build.gradle.kts @@ -31,6 +31,7 @@ val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extr val sparkVersion: String = libs.versions.spark34.get() val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") val icebergVersion: String = libs.versions.iceberg4spark.get() +val paimonVersion: String = libs.versions.paimon.get() val kyuubiVersion: String = libs.versions.kyuubi4spark34.get() val scalaJava8CompatVersion: String = libs.versions.scala.java.compat.get() val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get() @@ -44,6 +45,7 @@ dependencies { } compileOnly(project(":clients:client-java-runtime", configuration = "shadow")) compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation(project(":api")) { exclude("org.apache.logging.log4j") @@ -122,6 +124,7 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { diff --git a/spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark34.java b/spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark34.java new file mode 100644 index 00000000000..eb3e8779369 --- /dev/null +++ b/spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark34.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.paimon; + +import org.apache.gravitino.spark.connector.SparkTableChangeConverter; +import org.apache.gravitino.spark.connector.SparkTableChangeConverter34; +import org.apache.gravitino.spark.connector.SparkTypeConverter; +import org.apache.gravitino.spark.connector.SparkTypeConverter34; + +public class GravitinoPaimonCatalogSpark34 extends GravitinoPaimonCatalog { + @Override + protected SparkTypeConverter getSparkTypeConverter() { + return new SparkTypeConverter34(); + } + + @Override + protected SparkTableChangeConverter getSparkTableChangeConverter( + SparkTypeConverter sparkTypeConverter) { + return new SparkTableChangeConverter34(sparkTypeConverter); + } +} diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java new file mode 100644 index 00000000000..d230707325c --- /dev/null +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark34; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkPaimonCatalogFilesystemBackendIT34 extends SparkPaimonCatalogFilesystemBackendIT { + + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoPaimonCatalogSpark34.class.getName(), catalogClass); + } +} diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java index a2e95c8ea30..af9e67fab88 100644 --- a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java @@ -20,6 +20,7 @@ import org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark34; import org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark34; +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark34; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -31,5 +32,8 @@ void testSpark34() { String icebergCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-iceberg"); Assertions.assertEquals(GravitinoIcebergCatalogSpark34.class.getName(), icebergCatalogName); + + String paimonCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-paimon"); + Assertions.assertEquals(GravitinoPaimonCatalogSpark34.class.getName(), paimonCatalogName); } } diff --git a/spark-connector/v3.5/spark/build.gradle.kts b/spark-connector/v3.5/spark/build.gradle.kts index 7b8cc8447b7..30bafbb1aaf 100644 --- a/spark-connector/v3.5/spark/build.gradle.kts +++ b/spark-connector/v3.5/spark/build.gradle.kts @@ -31,6 +31,7 @@ val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extr val sparkVersion: String = libs.versions.spark35.get() val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") val icebergVersion: String = libs.versions.iceberg4spark.get() +val paimonVersion: String = libs.versions.paimon.get() val kyuubiVersion: String = libs.versions.kyuubi4spark35.get() val scalaJava8CompatVersion: String = libs.versions.scala.java.compat.get() val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get() @@ -45,6 +46,7 @@ dependencies { } compileOnly(project(":clients:client-java-runtime", configuration = "shadow")) compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation(project(":api")) { exclude("org.apache.logging.log4j") @@ -124,6 +126,7 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { diff --git a/spark-connector/v3.5/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark35.java b/spark-connector/v3.5/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark35.java new file mode 100644 index 00000000000..2c39af5b2f7 --- /dev/null +++ b/spark-connector/v3.5/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark35.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.paimon; + +public class GravitinoPaimonCatalogSpark35 extends GravitinoPaimonCatalogSpark34 {} diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java new file mode 100644 index 00000000000..b02f58f70bf --- /dev/null +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark35; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkPaimonCatalogFilesystemBackendIT35 extends SparkPaimonCatalogFilesystemBackendIT { + + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoIcebergCatalogSpark35.class.getName(), catalogClass); + } +} diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java index 5295e82fb24..f02584cd616 100644 --- a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java @@ -20,6 +20,7 @@ import org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark35; import org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark35; +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark35; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -31,5 +32,8 @@ void testSpark35() { String icebergCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-iceberg"); Assertions.assertEquals(GravitinoIcebergCatalogSpark35.class.getName(), icebergCatalogName); + + String paimonCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-paimon"); + Assertions.assertEquals(GravitinoPaimonCatalogSpark35.class.getName(), paimonCatalogName); } } From a16e949e6206b694cdc43817e76115f8fea90799 Mon Sep 17 00:00:00 2001 From: caican Date: Sun, 1 Dec 2024 15:03:36 +0800 Subject: [PATCH 03/25] basic dml --- .../paimon/GravitinoPaimonCatalog.java | 10 +- .../connector/paimon/SparkPaimonTable.java | 92 +++++++++++++++++++ ...SparkPaimonCatalogFilesystemBackendIT.java | 1 - 3 files changed, 100 insertions(+), 3 deletions(-) create mode 100644 spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/SparkPaimonTable.java diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java index 3a1b210055a..5a7b9a8c7ec 100644 --- a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java @@ -26,6 +26,7 @@ import org.apache.gravitino.spark.connector.SparkTypeConverter; import org.apache.gravitino.spark.connector.catalog.BaseCatalog; import org.apache.paimon.spark.SparkCatalog; +import org.apache.paimon.spark.SparkTable; import org.apache.spark.sql.connector.catalog.Identifier; import org.apache.spark.sql.connector.catalog.Table; import org.apache.spark.sql.connector.catalog.TableCatalog; @@ -53,8 +54,13 @@ protected Table createSparkTable( PropertiesConverter propertiesConverter, SparkTransformConverter sparkTransformConverter, SparkTypeConverter sparkTypeConverter) { - throw new UnsupportedOperationException( - "`createSparkTable` operation is unsupported for paimon spark connector now."); + return new SparkPaimonTable( + identifier, + gravitinoTable, + (SparkTable) sparkTable, + propertiesConverter, + sparkTransformConverter, + sparkTypeConverter); } @Override diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/SparkPaimonTable.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/SparkPaimonTable.java new file mode 100644 index 00000000000..557cf13bec6 --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/SparkPaimonTable.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.paimon; + +import java.util.Map; +import org.apache.gravitino.rel.Table; +import org.apache.gravitino.spark.connector.PropertiesConverter; +import org.apache.gravitino.spark.connector.SparkTransformConverter; +import org.apache.gravitino.spark.connector.SparkTypeConverter; +import org.apache.gravitino.spark.connector.utils.GravitinoTableInfoHelper; +import org.apache.paimon.spark.SparkTable; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.expressions.Transform; +import org.apache.spark.sql.connector.read.ScanBuilder; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +/** + * For spark-connector in Paimon, it explicitly uses SparkTable to identify whether it is an Apache + * Paimon table, so the SparkPaimonTable must extend SparkTable. + */ +public class SparkPaimonTable extends SparkTable { + + private GravitinoTableInfoHelper gravitinoTableInfoHelper; + private org.apache.spark.sql.connector.catalog.Table sparkTable; + + public SparkPaimonTable( + Identifier identifier, + Table gravitinoTable, + SparkTable sparkTable, + PropertiesConverter propertiesConverter, + SparkTransformConverter sparkTransformConverter, + SparkTypeConverter sparkTypeConverter) { + super(sparkTable.getTable()); + this.gravitinoTableInfoHelper = + new GravitinoTableInfoHelper( + true, + identifier, + gravitinoTable, + propertiesConverter, + sparkTransformConverter, + sparkTypeConverter); + this.sparkTable = sparkTable; + } + + @Override + public String name() { + return gravitinoTableInfoHelper.name(); + } + + @Override + @SuppressWarnings("deprecation") + public StructType schema() { + return gravitinoTableInfoHelper.schema(); + } + + @Override + public Map properties() { + return gravitinoTableInfoHelper.properties(); + } + + @Override + public Transform[] partitioning() { + return gravitinoTableInfoHelper.partitioning(); + } + + /** + * If using SparkPaimonTable not SparkTable, we must extract snapshotId or branchName using the + * Paimon specific logic. It's hard to maintenance. + */ + @Override + public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) { + return ((SparkTable) sparkTable).newScanBuilder(options); + } +} diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java index 778f97abf7f..60d5bddbf8d 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java @@ -34,7 +34,6 @@ protected Map getCatalogConfigs() { PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_FILESYSTEM); catalogProperties.put(PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, warehouse); - catalogProperties.put(PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_URI, hiveMetastoreUri); return catalogProperties; } } From e28d32346d7762fb89132fceec535140abd1f5ce Mon Sep 17 00:00:00 2001 From: caican Date: Sun, 1 Dec 2024 16:34:46 +0800 Subject: [PATCH 04/25] support partition --- .../test/paimon/SparkPaimonCatalogIT.java | 86 ++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java index 37248ebacf4..10a3b1532ee 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java @@ -19,6 +19,15 @@ package org.apache.gravitino.spark.connector.integration.test.paimon; import org.apache.gravitino.spark.connector.integration.test.SparkCommonIT; +import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfo; +import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfoChecker; +import org.apache.hadoop.fs.Path; +import org.apache.spark.sql.types.DataTypes; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; public abstract class SparkPaimonCatalogIT extends SparkCommonIT { @@ -39,7 +48,7 @@ protected boolean supportsSparkSQLClusteredBy() { @Override protected boolean supportsPartition() { - return false; + return true; } @Override @@ -49,6 +58,79 @@ protected boolean supportsDelete() { @Override protected boolean supportsSchemaEvolution() { - return false; + return true; + } + + @Test + void testPaimonPartitions() { + String partitionPathString = "name=a/address=beijing"; + + String tableName = "test_paimon_partition_table"; + dropTableIfExists(tableName); + String createTableSQL = getCreatePaimonSimpleTableString(tableName); + createTableSQL = + createTableSQL + " PARTITIONED BY (name, address);"; + sql(createTableSQL); + SparkTableInfo tableInfo = getTableInfo(tableName); + SparkTableInfoChecker checker = + SparkTableInfoChecker.create() + .withName(tableName) + .withColumns(getPaimonSimpleTableColumn()) + .withIdentifyPartition(Collections.singletonList("name")) + .withIdentifyPartition(Collections.singletonList("address")); + checker.check(tableInfo); + + String insertData = + String.format( + "INSERT into %s values(2,'a','beijing');", + tableName); + sql(insertData); + List queryResult = getTableData(tableName); + Assertions.assertEquals(1, queryResult.size()); + Assertions.assertEquals("2,a,beijing", queryResult.get(0)); + Path partitionPath = new Path(getTableLocation(tableInfo), partitionPathString); + checkDirExists(partitionPath); + } + + @Test + void testPaimonPartitionManagement() { + testPaimonCreatePartition(); + testPaimonReplacePartitionMetadata(); + testPaimonLoadPartitionMetadata(); + testPaimonListPartitionIdentifiers(); + testPaimonCreatePartition(); + } + + private void testPaimonDropPartition() { + + } + + private void testPaimonReplacePartitionMetadata() { + + } + + private void testPaimonLoadPartitionMetadata() { + + } + + private void testPaimonListPartitionIdentifiers() { + + } + + private void testPaimonCreatePartition() { + + } + + private String getCreatePaimonSimpleTableString(String tableName) { + return String.format( + "CREATE TABLE %s (id INT COMMENT 'id comment', name STRING COMMENT '', address STRING '') USING paimon", + tableName); + } + + private List getPaimonSimpleTableColumn() { + return Arrays.asList( + SparkTableInfo.SparkColumnInfo.of("id", DataTypes.IntegerType, "id comment"), + SparkTableInfo.SparkColumnInfo.of("name", DataTypes.StringType, ""), + SparkTableInfo.SparkColumnInfo.of("address", DataTypes.StringType, "")); } } From 6de61c2763f6f57ae935323027296b23ae6d731e Mon Sep 17 00:00:00 2001 From: caican Date: Sun, 1 Dec 2024 15:03:36 +0800 Subject: [PATCH 05/25] basic dml basic dml --- .../paimon/GravitinoPaimonCatalog.java | 10 +- .../connector/paimon/SparkPaimonTable.java | 92 +++++++++++++++++++ ...SparkPaimonCatalogFilesystemBackendIT.java | 1 - .../test/paimon/SparkPaimonCatalogIT.java | 53 ++++++++++- 4 files changed, 151 insertions(+), 5 deletions(-) create mode 100644 spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/SparkPaimonTable.java diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java index 3a1b210055a..5a7b9a8c7ec 100644 --- a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java @@ -26,6 +26,7 @@ import org.apache.gravitino.spark.connector.SparkTypeConverter; import org.apache.gravitino.spark.connector.catalog.BaseCatalog; import org.apache.paimon.spark.SparkCatalog; +import org.apache.paimon.spark.SparkTable; import org.apache.spark.sql.connector.catalog.Identifier; import org.apache.spark.sql.connector.catalog.Table; import org.apache.spark.sql.connector.catalog.TableCatalog; @@ -53,8 +54,13 @@ protected Table createSparkTable( PropertiesConverter propertiesConverter, SparkTransformConverter sparkTransformConverter, SparkTypeConverter sparkTypeConverter) { - throw new UnsupportedOperationException( - "`createSparkTable` operation is unsupported for paimon spark connector now."); + return new SparkPaimonTable( + identifier, + gravitinoTable, + (SparkTable) sparkTable, + propertiesConverter, + sparkTransformConverter, + sparkTypeConverter); } @Override diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/SparkPaimonTable.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/SparkPaimonTable.java new file mode 100644 index 00000000000..557cf13bec6 --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/SparkPaimonTable.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.paimon; + +import java.util.Map; +import org.apache.gravitino.rel.Table; +import org.apache.gravitino.spark.connector.PropertiesConverter; +import org.apache.gravitino.spark.connector.SparkTransformConverter; +import org.apache.gravitino.spark.connector.SparkTypeConverter; +import org.apache.gravitino.spark.connector.utils.GravitinoTableInfoHelper; +import org.apache.paimon.spark.SparkTable; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.expressions.Transform; +import org.apache.spark.sql.connector.read.ScanBuilder; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +/** + * For spark-connector in Paimon, it explicitly uses SparkTable to identify whether it is an Apache + * Paimon table, so the SparkPaimonTable must extend SparkTable. + */ +public class SparkPaimonTable extends SparkTable { + + private GravitinoTableInfoHelper gravitinoTableInfoHelper; + private org.apache.spark.sql.connector.catalog.Table sparkTable; + + public SparkPaimonTable( + Identifier identifier, + Table gravitinoTable, + SparkTable sparkTable, + PropertiesConverter propertiesConverter, + SparkTransformConverter sparkTransformConverter, + SparkTypeConverter sparkTypeConverter) { + super(sparkTable.getTable()); + this.gravitinoTableInfoHelper = + new GravitinoTableInfoHelper( + true, + identifier, + gravitinoTable, + propertiesConverter, + sparkTransformConverter, + sparkTypeConverter); + this.sparkTable = sparkTable; + } + + @Override + public String name() { + return gravitinoTableInfoHelper.name(); + } + + @Override + @SuppressWarnings("deprecation") + public StructType schema() { + return gravitinoTableInfoHelper.schema(); + } + + @Override + public Map properties() { + return gravitinoTableInfoHelper.properties(); + } + + @Override + public Transform[] partitioning() { + return gravitinoTableInfoHelper.partitioning(); + } + + /** + * If using SparkPaimonTable not SparkTable, we must extract snapshotId or branchName using the + * Paimon specific logic. It's hard to maintenance. + */ + @Override + public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) { + return ((SparkTable) sparkTable).newScanBuilder(options); + } +} diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java index 778f97abf7f..60d5bddbf8d 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java @@ -34,7 +34,6 @@ protected Map getCatalogConfigs() { PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_FILESYSTEM); catalogProperties.put(PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, warehouse); - catalogProperties.put(PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_URI, hiveMetastoreUri); return catalogProperties; } } diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java index 37248ebacf4..9ae1afaf77e 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java @@ -18,7 +18,16 @@ */ package org.apache.gravitino.spark.connector.integration.test.paimon; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; import org.apache.gravitino.spark.connector.integration.test.SparkCommonIT; +import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfo; +import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfoChecker; +import org.apache.hadoop.fs.Path; +import org.apache.spark.sql.types.DataTypes; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; public abstract class SparkPaimonCatalogIT extends SparkCommonIT { @@ -39,7 +48,7 @@ protected boolean supportsSparkSQLClusteredBy() { @Override protected boolean supportsPartition() { - return false; + return true; } @Override @@ -49,6 +58,46 @@ protected boolean supportsDelete() { @Override protected boolean supportsSchemaEvolution() { - return false; + return true; + } + + @Test + void testPaimonPartitions() { + String partitionPathString = "name=a/address=beijing"; + + String tableName = "test_paimon_partition_table"; + dropTableIfExists(tableName); + String createTableSQL = getCreatePaimonSimpleTableString(tableName); + createTableSQL = createTableSQL + " PARTITIONED BY (name, address);"; + sql(createTableSQL); + SparkTableInfo tableInfo = getTableInfo(tableName); + SparkTableInfoChecker checker = + SparkTableInfoChecker.create() + .withName(tableName) + .withColumns(getPaimonSimpleTableColumn()) + .withIdentifyPartition(Collections.singletonList("name")) + .withIdentifyPartition(Collections.singletonList("address")); + checker.check(tableInfo); + + String insertData = String.format("INSERT into %s values(2,'a','beijing');", tableName); + sql(insertData); + List queryResult = getTableData(tableName); + Assertions.assertEquals(1, queryResult.size()); + Assertions.assertEquals("2,a,beijing", queryResult.get(0)); + Path partitionPath = new Path(getTableLocation(tableInfo), partitionPathString); + checkDirExists(partitionPath); + } + + private String getCreatePaimonSimpleTableString(String tableName) { + return String.format( + "CREATE TABLE %s (id INT COMMENT 'id comment', name STRING COMMENT '', address STRING '') USING paimon", + tableName); + } + + private List getPaimonSimpleTableColumn() { + return Arrays.asList( + SparkTableInfo.SparkColumnInfo.of("id", DataTypes.IntegerType, "id comment"), + SparkTableInfo.SparkColumnInfo.of("name", DataTypes.StringType, ""), + SparkTableInfo.SparkColumnInfo.of("address", DataTypes.StringType, "")); } } From 17690df0988c2296544390312dd21c489f21b56e Mon Sep 17 00:00:00 2001 From: caican Date: Sun, 1 Dec 2024 14:18:52 +0800 Subject: [PATCH 06/25] basic schema ddl --- .../lakehouse/paimon/PaimonConstants.java | 57 ++++++ .../paimon/PaimonPropertiesUtils.java | 95 ++++++++++ .../PaimonCatalogPropertiesMetadata.java | 26 +-- .../PaimonSchemaPropertiesMetadata.java | 2 +- .../paimon/PaimonTablePropertiesMetadata.java | 16 +- .../storage/PaimonOSSFileSystemConfig.java | 7 +- .../storage/PaimonS3FileSystemConfig.java | 7 +- spark-connector/spark-common/build.gradle.kts | 3 + .../paimon/GravitinoPaimonCatalog.java | 69 +++++++ .../paimon/PaimonPropertiesConstants.java | 49 +++++ .../paimon/PaimonPropertiesConverter.java | 64 +++++++ .../connector/version/CatalogNameAdaptor.java | 21 ++- .../integration/test/SparkCommonIT.java | 44 ++--- ...SparkPaimonCatalogFilesystemBackendIT.java | 71 +++++++ .../test/paimon/SparkPaimonCatalogIT.java | 175 ++++++++++++++++++ .../integration/test/util/SparkUtilIT.java | 4 +- .../paimon/TestPaimonPropertiesConverter.java | 106 +++++++++++ spark-connector/v3.3/spark/build.gradle.kts | 3 + .../paimon/GravitinoPaimonCatalogSpark33.java | 21 +++ ...arkPaimonCatalogFilesystemBackendIT33.java | 35 ++++ .../version/TestCatalogNameAdaptor.java | 4 + spark-connector/v3.4/spark/build.gradle.kts | 3 + .../paimon/GravitinoPaimonCatalogSpark34.java | 37 ++++ ...arkPaimonCatalogFilesystemBackendIT34.java | 36 ++++ .../version/TestCatalogNameAdaptor.java | 4 + spark-connector/v3.5/spark/build.gradle.kts | 3 + .../paimon/GravitinoPaimonCatalogSpark35.java | 21 +++ ...arkPaimonCatalogFilesystemBackendIT35.java | 36 ++++ .../version/TestCatalogNameAdaptor.java | 4 + 29 files changed, 965 insertions(+), 58 deletions(-) create mode 100644 catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonConstants.java create mode 100644 catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonPropertiesUtils.java create mode 100644 spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java create mode 100644 spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java create mode 100644 spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java create mode 100644 spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java create mode 100644 spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java create mode 100644 spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/paimon/TestPaimonPropertiesConverter.java create mode 100644 spark-connector/v3.3/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark33.java create mode 100644 spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java create mode 100644 spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark34.java create mode 100644 spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java create mode 100644 spark-connector/v3.5/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark35.java create mode 100644 spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonConstants.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonConstants.java new file mode 100644 index 00000000000..291a7ea9694 --- /dev/null +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonConstants.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse.paimon; + +public class PaimonConstants { + + // Paimon catalog properties constants + public static final String CATALOG_BACKEND = "catalog-backend"; + public static final String METASTORE = "metastore"; + public static final String URI = "uri"; + public static final String WAREHOUSE = "warehouse"; + public static final String CATALOG_BACKEND_NAME = "catalog-backend-name"; + + public static final String GRAVITINO_JDBC_USER = "jdbc-user"; + public static final String PAIMON_JDBC_USER = "jdbc.user"; + + public static final String GRAVITINO_JDBC_PASSWORD = "jdbc-password"; + public static final String PAIMON_JDBC_PASSWORD = "jdbc.password"; + + public static final String GRAVITINO_JDBC_DRIVER = "jdbc-driver"; + + // S3 properties needed by Paimon + public static final String S3_ENDPOINT = "s3.endpoint"; + public static final String S3_ACCESS_KEY = "s3.access-key"; + public static final String S3_SECRET_KEY = "s3.secret-key"; + + // OSS related properties + public static final String OSS_ENDPOINT = "fs.oss.endpoint"; + public static final String OSS_ACCESS_KEY = "fs.oss.accessKeyId"; + public static final String OSS_SECRET_KEY = "fs.oss.accessKeySecret"; + + // Iceberg Table properties constants + public static final String COMMENT = "comment"; + public static final String OWNER = "owner"; + public static final String BUCKET_KEY = "bucket-key"; + public static final String MERGE_ENGINE = "merge-engine"; + public static final String SEQUENCE_FIELD = "sequence.field"; + public static final String ROWKIND_FIELD = "rowkind.field"; + public static final String PRIMARY_KEY = "primary-key"; + public static final String PARTITION = "partition"; +} diff --git a/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonPropertiesUtils.java b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonPropertiesUtils.java new file mode 100644 index 00000000000..0dcf24f3a67 --- /dev/null +++ b/catalogs/catalog-common/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonPropertiesUtils.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.catalog.lakehouse.paimon; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.Optional; +import org.apache.gravitino.storage.OSSProperties; +import org.apache.gravitino.storage.S3Properties; + +public class PaimonPropertiesUtils { + + // Map that maintains the mapping of keys in Gravitino to that in Paimon, for example, users + // will only need to set the configuration 'catalog-backend' in Gravitino and Gravitino will + // change it to `catalogType` automatically and pass it to Paimon. + public static final Map GRAVITINO_CONFIG_TO_PAIMON; + + static { + Map map = new HashMap(); + map.put(PaimonConstants.CATALOG_BACKEND, PaimonConstants.CATALOG_BACKEND); + map.put(PaimonConstants.GRAVITINO_JDBC_DRIVER, PaimonConstants.GRAVITINO_JDBC_DRIVER); + map.put(PaimonConstants.GRAVITINO_JDBC_USER, PaimonConstants.PAIMON_JDBC_USER); + map.put(PaimonConstants.GRAVITINO_JDBC_PASSWORD, PaimonConstants.PAIMON_JDBC_PASSWORD); + map.put(PaimonConstants.URI, PaimonConstants.URI); + map.put(PaimonConstants.WAREHOUSE, PaimonConstants.WAREHOUSE); + map.put(PaimonConstants.CATALOG_BACKEND_NAME, PaimonConstants.CATALOG_BACKEND_NAME); + // S3 + map.put(S3Properties.GRAVITINO_S3_ENDPOINT, PaimonConstants.S3_ENDPOINT); + map.put(S3Properties.GRAVITINO_S3_ACCESS_KEY_ID, PaimonConstants.S3_ACCESS_KEY); + map.put(S3Properties.GRAVITINO_S3_SECRET_ACCESS_KEY, PaimonConstants.S3_SECRET_KEY); + // OSS + map.put(OSSProperties.GRAVITINO_OSS_ENDPOINT, PaimonConstants.OSS_ENDPOINT); + map.put(OSSProperties.GRAVITINO_OSS_ACCESS_KEY_ID, PaimonConstants.OSS_ACCESS_KEY); + map.put(OSSProperties.GRAVITINO_OSS_ACCESS_KEY_SECRET, PaimonConstants.OSS_SECRET_KEY); + GRAVITINO_CONFIG_TO_PAIMON = Collections.unmodifiableMap(map); + } + + /** + * Converts Gravitino properties to Paimon catalog properties, the common transform logic shared + * by Spark connector, Gravitino Paimon catalog. + * + * @param gravitinoProperties a map of Gravitino configuration properties. + * @return a map containing Paimon catalog properties. + */ + public static Map toPaimonCatalogProperties( + Map gravitinoProperties) { + Map paimonProperties = new HashMap<>(); + gravitinoProperties.forEach( + (key, value) -> { + if (GRAVITINO_CONFIG_TO_PAIMON.containsKey(key)) { + paimonProperties.put(GRAVITINO_CONFIG_TO_PAIMON.get(key), value); + } + }); + return paimonProperties; + } + + /** + * Get catalog backend name from Gravitino catalog properties. + * + * @param catalogProperties a map of Gravitino catalog properties. + * @return catalog backend name. + */ + public static String getCatalogBackendName(Map catalogProperties) { + String backendName = catalogProperties.get(PaimonConstants.CATALOG_BACKEND_NAME); + if (backendName != null) { + return backendName; + } + + String catalogBackend = catalogProperties.get(PaimonConstants.CATALOG_BACKEND); + return Optional.ofNullable(catalogBackend) + .map(s -> s.toLowerCase(Locale.ROOT)) + .orElseThrow( + () -> + new UnsupportedOperationException( + String.format("Unsupported catalog backend: %s", catalogBackend))); + } +} diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java index e3b59bff36d..4c9dcb07a80 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonCatalogPropertiesMetadata.java @@ -45,20 +45,22 @@ */ public class PaimonCatalogPropertiesMetadata extends BaseCatalogPropertiesMetadata { - @VisibleForTesting public static final String GRAVITINO_CATALOG_BACKEND = "catalog-backend"; - public static final String PAIMON_METASTORE = "metastore"; - public static final String WAREHOUSE = "warehouse"; - public static final String URI = "uri"; - public static final String GRAVITINO_JDBC_USER = "jdbc-user"; - public static final String PAIMON_JDBC_USER = "jdbc.user"; - public static final String GRAVITINO_JDBC_PASSWORD = "jdbc-password"; - public static final String PAIMON_JDBC_PASSWORD = "jdbc.password"; - public static final String GRAVITINO_JDBC_DRIVER = "jdbc-driver"; + @VisibleForTesting + public static final String GRAVITINO_CATALOG_BACKEND = PaimonConstants.CATALOG_BACKEND; + + public static final String PAIMON_METASTORE = PaimonConstants.METASTORE; + public static final String WAREHOUSE = PaimonConstants.WAREHOUSE; + public static final String URI = PaimonConstants.URI; + public static final String GRAVITINO_JDBC_USER = PaimonConstants.GRAVITINO_JDBC_USER; + public static final String PAIMON_JDBC_USER = PaimonConstants.PAIMON_JDBC_USER; + public static final String GRAVITINO_JDBC_PASSWORD = PaimonConstants.GRAVITINO_JDBC_PASSWORD; + public static final String PAIMON_JDBC_PASSWORD = PaimonConstants.PAIMON_JDBC_PASSWORD; + public static final String GRAVITINO_JDBC_DRIVER = PaimonConstants.GRAVITINO_JDBC_DRIVER; // S3 properties needed by Paimon - public static final String S3_ENDPOINT = "s3.endpoint"; - public static final String S3_ACCESS_KEY = "s3.access-key"; - public static final String S3_SECRET_KEY = "s3.secret-key"; + public static final String S3_ENDPOINT = PaimonConstants.S3_ENDPOINT; + public static final String S3_ACCESS_KEY = PaimonConstants.S3_ACCESS_KEY; + public static final String S3_SECRET_KEY = PaimonConstants.S3_SECRET_KEY; public static final Map GRAVITINO_CONFIG_TO_PAIMON = ImmutableMap.of( diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchemaPropertiesMetadata.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchemaPropertiesMetadata.java index 9a6ddb5a165..3da05099cc4 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchemaPropertiesMetadata.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchemaPropertiesMetadata.java @@ -34,7 +34,7 @@ */ public class PaimonSchemaPropertiesMetadata extends BasePropertiesMetadata { - public static final String COMMENT = "comment"; + public static final String COMMENT = PaimonConstants.COMMENT; private static final Map> PROPERTIES_METADATA; diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonTablePropertiesMetadata.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonTablePropertiesMetadata.java index 671dd9d6682..ad63df6783f 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonTablePropertiesMetadata.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonTablePropertiesMetadata.java @@ -35,14 +35,14 @@ */ public class PaimonTablePropertiesMetadata extends BasePropertiesMetadata { - public static final String COMMENT = "comment"; - public static final String OWNER = "owner"; - public static final String BUCKET_KEY = "bucket-key"; - public static final String MERGE_ENGINE = "merge-engine"; - public static final String SEQUENCE_FIELD = "sequence.field"; - public static final String ROWKIND_FIELD = "rowkind.field"; - public static final String PRIMARY_KEY = "primary-key"; - public static final String PARTITION = "partition"; + public static final String COMMENT = PaimonConstants.COMMENT; + public static final String OWNER = PaimonConstants.OWNER; + public static final String BUCKET_KEY = PaimonConstants.BUCKET_KEY; + public static final String MERGE_ENGINE = PaimonConstants.MERGE_ENGINE; + public static final String SEQUENCE_FIELD = PaimonConstants.SEQUENCE_FIELD; + public static final String ROWKIND_FIELD = PaimonConstants.ROWKIND_FIELD; + public static final String PRIMARY_KEY = PaimonConstants.PRIMARY_KEY; + public static final String PARTITION = PaimonConstants.PARTITION; private static final Map> PROPERTIES_METADATA; diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonOSSFileSystemConfig.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonOSSFileSystemConfig.java index ad7fa26f3bc..7b703b5b74a 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonOSSFileSystemConfig.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonOSSFileSystemConfig.java @@ -22,6 +22,7 @@ import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.Config; +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants; import org.apache.gravitino.config.ConfigBuilder; import org.apache.gravitino.config.ConfigConstants; import org.apache.gravitino.config.ConfigEntry; @@ -29,9 +30,9 @@ public class PaimonOSSFileSystemConfig extends Config { // OSS related properties - public static final String OSS_ENDPOINT = "fs.oss.endpoint"; - public static final String OSS_ACCESS_KEY = "fs.oss.accessKeyId"; - public static final String OSS_SECRET_KEY = "fs.oss.accessKeySecret"; + public static final String OSS_ENDPOINT = PaimonConstants.OSS_ENDPOINT; + public static final String OSS_ACCESS_KEY = PaimonConstants.OSS_ACCESS_KEY; + public static final String OSS_SECRET_KEY = PaimonConstants.OSS_SECRET_KEY; public PaimonOSSFileSystemConfig(Map properties) { super(false); diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonS3FileSystemConfig.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonS3FileSystemConfig.java index 4184fcc06f1..6588e4a5268 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonS3FileSystemConfig.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/storage/PaimonS3FileSystemConfig.java @@ -22,6 +22,7 @@ import java.util.Map; import org.apache.commons.lang3.StringUtils; import org.apache.gravitino.Config; +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants; import org.apache.gravitino.config.ConfigBuilder; import org.apache.gravitino.config.ConfigConstants; import org.apache.gravitino.config.ConfigEntry; @@ -29,9 +30,9 @@ public class PaimonS3FileSystemConfig extends Config { // S3 related properties - public static final String S3_ENDPOINT = "s3.endpoint"; - public static final String S3_ACCESS_KEY = "s3.access-key"; - public static final String S3_SECRET_KEY = "s3.secret-key"; + public static final String S3_ENDPOINT = PaimonConstants.S3_ENDPOINT; + public static final String S3_ACCESS_KEY = PaimonConstants.S3_ACCESS_KEY; + public static final String S3_SECRET_KEY = PaimonConstants.S3_SECRET_KEY; public PaimonS3FileSystemConfig(Map properties) { super(false); diff --git a/spark-connector/spark-common/build.gradle.kts b/spark-connector/spark-common/build.gradle.kts index 7f3c66aa6e6..dc0af57a00c 100644 --- a/spark-connector/spark-common/build.gradle.kts +++ b/spark-connector/spark-common/build.gradle.kts @@ -31,6 +31,7 @@ val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extr val sparkVersion: String = libs.versions.spark33.get() val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") val icebergVersion: String = libs.versions.iceberg4spark.get() +val paimonVersion: String = libs.versions.paimon.get() // kyuubi hive connector for Spark 3.3 doesn't support scala 2.13 val kyuubiVersion: String = libs.versions.kyuubi4spark34.get() val scalaJava8CompatVersion: String = libs.versions.scala.java.compat.get() @@ -43,6 +44,7 @@ dependencies { compileOnly(project(":clients:client-java-runtime", configuration = "shadow")) compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") compileOnly("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") compileOnly("org.apache.spark:spark-catalyst_$scalaVersion:$sparkVersion") compileOnly("org.apache.spark:spark-core_$scalaVersion:$sparkVersion") @@ -114,6 +116,7 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java new file mode 100644 index 00000000000..3a1b210055a --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.paimon; + +import java.util.Map; +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonPropertiesUtils; +import org.apache.gravitino.spark.connector.PropertiesConverter; +import org.apache.gravitino.spark.connector.SparkTransformConverter; +import org.apache.gravitino.spark.connector.SparkTypeConverter; +import org.apache.gravitino.spark.connector.catalog.BaseCatalog; +import org.apache.paimon.spark.SparkCatalog; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableCatalog; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +public class GravitinoPaimonCatalog extends BaseCatalog { + + @Override + protected TableCatalog createAndInitSparkCatalog( + String name, CaseInsensitiveStringMap options, Map properties) { + String catalogBackendName = PaimonPropertiesUtils.getCatalogBackendName(properties); + TableCatalog paimonCatalog = new SparkCatalog(); + Map all = + getPropertiesConverter().toSparkCatalogProperties(options, properties); + paimonCatalog.initialize(catalogBackendName, new CaseInsensitiveStringMap(all)); + return paimonCatalog; + } + + @Override + protected Table createSparkTable( + Identifier identifier, + org.apache.gravitino.rel.Table gravitinoTable, + Table sparkTable, + TableCatalog sparkCatalog, + PropertiesConverter propertiesConverter, + SparkTransformConverter sparkTransformConverter, + SparkTypeConverter sparkTypeConverter) { + throw new UnsupportedOperationException( + "`createSparkTable` operation is unsupported for paimon spark connector now."); + } + + @Override + protected PropertiesConverter getPropertiesConverter() { + return PaimonPropertiesConverter.getInstance(); + } + + @Override + protected SparkTransformConverter getSparkTransformConverter() { + return new SparkTransformConverter(true); + } +} diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java new file mode 100644 index 00000000000..67190753e64 --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.paimon; + +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants; + +public class PaimonPropertiesConstants { + + public static final String GRAVITINO_PAIMON_CATALOG_BACKEND = PaimonConstants.CATALOG_BACKEND; + static final String PAIMON_CATALOG_METASTORE = PaimonConstants.METASTORE; + + public static final String GRAVITINO_PAIMON_CATALOG_WAREHOUSE = PaimonConstants.WAREHOUSE; + static final String PAIMON_CATALOG_WAREHOUSE = PaimonConstants.WAREHOUSE; + + public static final String GRAVITINO_PAIMON_CATALOG_URI = PaimonConstants.URI; + static final String PAIMON_CATALOG_URI = PaimonConstants.URI; + static final String GRAVITINO_PAIMON_CATALOG_JDBC_USER = PaimonConstants.GRAVITINO_JDBC_USER; + static final String PAIMON_CATALOG_JDBC_USER = PaimonConstants.PAIMON_JDBC_USER; + + static final String GRAVITINO_PAIMON_CATALOG_JDBC_PASSWORD = + PaimonConstants.GRAVITINO_JDBC_PASSWORD; + static final String PAIMON_CATALOG_JDBC_PASSWORD = PaimonConstants.PAIMON_JDBC_PASSWORD; + + public static final String PAIMON_CATALOG_BACKEND_HIVE = "hive"; + static final String GRAVITINO_PAIMON_CATALOG_BACKEND_HIVE = "hive"; + + static final String GRAVITINO_PAIMON_CATALOG_BACKEND_JDBC = "jdbc"; + static final String PAIMON_CATALOG_BACKEND_JDBC = "jdbc"; + + public static final String PAIMON_CATALOG_BACKEND_FILESYSTEM = "filesystem"; + static final String GRAVITINO_PAIMON_CATALOG_BACKEND_FILESYSTEM = "filesystem"; +} diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java new file mode 100644 index 00000000000..335afae7c9d --- /dev/null +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.paimon; + +import com.google.common.base.Preconditions; +import java.util.Map; +import org.apache.commons.lang3.StringUtils; +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonPropertiesUtils; +import org.apache.gravitino.spark.connector.PropertiesConverter; + +public class PaimonPropertiesConverter implements PropertiesConverter { + + public static class PaimonPropertiesConverterHolder { + private static final PaimonPropertiesConverter INSTANCE = new PaimonPropertiesConverter(); + } + + private PaimonPropertiesConverter() {} + + public static PaimonPropertiesConverter getInstance() { + return PaimonPropertiesConverter.PaimonPropertiesConverterHolder.INSTANCE; + } + + @Override + public Map toSparkCatalogProperties(Map properties) { + Preconditions.checkArgument(properties != null, "Paimon Catalog properties should not be null"); + Map all = PaimonPropertiesUtils.toPaimonCatalogProperties(properties); + String catalogBackend = all.remove(PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND); + Preconditions.checkArgument( + StringUtils.isNotBlank(catalogBackend), + String.format( + "%s should not be empty", PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND)); + all.put(PaimonPropertiesConstants.PAIMON_CATALOG_METASTORE, catalogBackend); + return all; + } + + @Override + public Map toGravitinoTableProperties(Map properties) { + throw new UnsupportedOperationException( + "`toGravitinoTableProperties` operation is unsupported now."); + } + + @Override + public Map toSparkTableProperties(Map properties) { + throw new UnsupportedOperationException( + "`toSparkTableProperties` operation is unsupported now."); + } +} diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java index 8141c799bf8..9392feac2f1 100644 --- a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/version/CatalogNameAdaptor.java @@ -27,15 +27,24 @@ public class CatalogNameAdaptor { private static final Map catalogNames = ImmutableMap.of( - "hive-3.3", "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark33", - "hive-3.4", "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark34", - "hive-3.5", "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark35", + "hive-3.3", + "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark33", + "hive-3.4", + "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark34", + "hive-3.5", + "org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark35", "lakehouse-iceberg-3.3", - "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark33", + "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark33", "lakehouse-iceberg-3.4", - "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark34", + "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark34", "lakehouse-iceberg-3.5", - "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark35"); + "org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark35", + "lakehouse-paimon-3.3", + "org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark33", + "lakehouse-paimon-3.4", + "org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark34", + "lakehouse-paimon-3.5", + "org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark35"); private static String sparkVersion() { return package$.MODULE$.SPARK_VERSION(); diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java index 63e4801ef94..55ba9853e86 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java @@ -168,7 +168,7 @@ void cleanUp() { } @Test - void testListTables() { + protected void testListTables() { String tableName = "t_list"; dropTableIfExists(tableName); Set tableNames = listTableNames(); @@ -187,7 +187,7 @@ void testLoadCatalogs() { } @Test - void testCreateAndLoadSchema() { + protected void testCreateAndLoadSchema() { String testDatabaseName = "t_create1"; dropDatabaseIfExists(testDatabaseName); sql("CREATE DATABASE " + testDatabaseName + " WITH DBPROPERTIES (ID=001);"); @@ -216,7 +216,7 @@ void testCreateAndLoadSchema() { } @Test - void testAlterSchema() { + protected void testAlterSchema() { String testDatabaseName = "t_alter"; dropDatabaseIfExists(testDatabaseName); sql("CREATE DATABASE " + testDatabaseName + " WITH DBPROPERTIES (ID=001);"); @@ -256,7 +256,7 @@ void testDropSchema() { } @Test - void testCreateSimpleTable() { + protected void testCreateSimpleTable() { String tableName = "simple_table"; dropTableIfExists(tableName); createSimpleTable(tableName); @@ -273,7 +273,7 @@ void testCreateSimpleTable() { } @Test - void testCreateTableWithDatabase() { + protected void testCreateTableWithDatabase() { // test db.table as table identifier String databaseName = "db1"; String tableName = "table1"; @@ -304,7 +304,7 @@ void testCreateTableWithDatabase() { } @Test - void testCreateTableWithComment() { + protected void testCreateTableWithComment() { String tableName = "comment_table"; dropTableIfExists(tableName); String createTableSql = getCreateSimpleTableString(tableName); @@ -324,7 +324,7 @@ void testCreateTableWithComment() { } @Test - void testDropTable() { + protected void testDropTable() { String tableName = "drop_table"; createSimpleTable(tableName); Assertions.assertEquals(true, tableExists(tableName)); @@ -337,7 +337,7 @@ void testDropTable() { } @Test - void testRenameTable() { + protected void testRenameTable() { String tableName = "rename1"; String newTableName = "rename2"; dropTableIfExists(tableName); @@ -364,7 +364,7 @@ void testRenameTable() { } @Test - void testListTable() { + protected void testListTable() { String table1 = "list1"; String table2 = "list2"; dropTableIfExists(table1); @@ -393,7 +393,7 @@ void testListTable() { } @Test - void testAlterTableSetAndRemoveProperty() { + protected void testAlterTableSetAndRemoveProperty() { String tableName = "test_property"; dropTableIfExists(tableName); @@ -411,7 +411,7 @@ void testAlterTableSetAndRemoveProperty() { } @Test - void testAlterTableUpdateComment() { + protected void testAlterTableUpdateComment() { String tableName = "test_comment"; String comment = "comment1"; dropTableIfExists(tableName); @@ -428,7 +428,7 @@ void testAlterTableUpdateComment() { } @Test - void testAlterTableAddAndDeleteColumn() { + protected void testAlterTableAddAndDeleteColumn() { String tableName = "test_column"; dropTableIfExists(tableName); @@ -447,7 +447,7 @@ void testAlterTableAddAndDeleteColumn() { } @Test - void testAlterTableUpdateColumnType() { + protected void testAlterTableUpdateColumnType() { String tableName = "test_column_type"; dropTableIfExists(tableName); @@ -464,7 +464,7 @@ void testAlterTableUpdateColumnType() { } @Test - void testAlterTableRenameColumn() { + protected void testAlterTableRenameColumn() { String tableName = "test_rename_column"; dropTableIfExists(tableName); List simpleTableColumns = getSimpleTableColumn(); @@ -484,7 +484,7 @@ void testAlterTableRenameColumn() { } @Test - void testUpdateColumnPosition() { + protected void testUpdateColumnPosition() { String tableName = "test_column_position"; dropTableIfExists(tableName); @@ -527,7 +527,7 @@ void testUpdateColumnPosition() { } @Test - void testAlterTableUpdateColumnComment() { + protected void testAlterTableUpdateColumnComment() { String tableName = "test_update_column_comment"; dropTableIfExists(tableName); List simpleTableColumns = getSimpleTableColumn(); @@ -550,7 +550,7 @@ void testAlterTableUpdateColumnComment() { } @Test - void testAlterTableReplaceColumns() { + protected void testAlterTableReplaceColumns() { String tableName = "test_replace_columns_table"; dropTableIfExists(tableName); @@ -589,7 +589,7 @@ void testAlterTableReplaceColumns() { } @Test - void testComplexType() { + protected void testComplexType() { String tableName = "complex_type_table"; dropTableIfExists(tableName); @@ -678,7 +678,7 @@ void testCreateSortBucketTable() { // Spark CTAS doesn't copy table properties and partition schema from source table. @Test - void testCreateTableAsSelect() { + protected void testCreateTableAsSelect() { String tableName = "ctas_table"; dropTableIfExists(tableName); createSimpleTable(tableName); @@ -700,7 +700,7 @@ void testCreateTableAsSelect() { } @Test - void testInsertTableAsSelect() { + protected void testInsertTableAsSelect() { String tableName = "insert_select_table"; String newTableName = "new_" + tableName; @@ -793,7 +793,7 @@ protected void deleteDirIfExists(String path) { } @Test - void testTableOptions() { + protected void testTableOptions() { String tableName = "options_table"; dropTableIfExists(tableName); String createTableSql = getCreateSimpleTableString(tableName); @@ -810,7 +810,7 @@ void testTableOptions() { } @Test - void testDropAndWriteTable() { + protected void testDropAndWriteTable() { String tableName = "drop_then_create_write_table"; createSimpleTable(tableName); diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java new file mode 100644 index 00000000000..3d4a3257a91 --- /dev/null +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import com.google.common.collect.Maps; +import java.util.Map; +import org.apache.gravitino.spark.connector.paimon.PaimonPropertiesConstants; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +/** This class use Apache Paimon FilesystemCatalog for backend catalog. */ +@Tag("gravitino-docker-test") +public abstract class SparkPaimonCatalogFilesystemBackendIT extends SparkPaimonCatalogIT { + + @Override + protected Map getCatalogConfigs() { + Map catalogProperties = Maps.newHashMap(); + catalogProperties.put( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_FILESYSTEM); + catalogProperties.put(PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, warehouse); + return catalogProperties; + } + + @Test + @Override + protected void testCreateAndLoadSchema() { + String testDatabaseName = "t_create1"; + dropDatabaseIfExists(testDatabaseName); + sql("CREATE DATABASE " + testDatabaseName + " WITH DBPROPERTIES (ID=001);"); + Map databaseMeta = getDatabaseMetadata(testDatabaseName); + // The database of the Paimon filesystem backend do not store any properties. + Assertions.assertFalse(databaseMeta.containsKey("ID")); + } + + @Test + @Override + protected void testAlterSchema() { + String testDatabaseName = "t_alter"; + dropDatabaseIfExists(testDatabaseName); + sql("CREATE DATABASE " + testDatabaseName + " WITH DBPROPERTIES (ID=001);"); + Map databaseMeta = getDatabaseMetadata(testDatabaseName); + // The database of the Paimon filesystem backend do not store any properties. + Assertions.assertFalse(databaseMeta.containsKey("ID")); + + // The Paimon filesystem backend do not support alter database operation. + Assertions.assertThrows( + UnsupportedOperationException.class, + () -> + sql( + String.format( + "ALTER DATABASE %s SET DBPROPERTIES ('ID'='002')", testDatabaseName))); + } +} diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java new file mode 100644 index 00000000000..f0f1e0791e6 --- /dev/null +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.integration.test.SparkCommonIT; +import org.junit.jupiter.api.Test; + +public abstract class SparkPaimonCatalogIT extends SparkCommonIT { + + @Override + protected String getCatalogName() { + return "paimon"; + } + + @Override + protected String getProvider() { + return "lakehouse-paimon"; + } + + @Override + protected boolean supportsSparkSQLClusteredBy() { + return false; + } + + @Override + protected boolean supportsPartition() { + return false; + } + + @Override + protected boolean supportsDelete() { + return false; + } + + @Override + protected boolean supportsSchemaEvolution() { + return false; + } + + @Test + @Override + protected void testListTables() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testCreateSimpleTable() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testCreateTableWithDatabase() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testCreateTableWithComment() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testDropTable() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testRenameTable() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testListTable() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testAlterTableSetAndRemoveProperty() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testAlterTableUpdateComment() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testAlterTableAddAndDeleteColumn() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testAlterTableUpdateColumnType() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testAlterTableRenameColumn() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testUpdateColumnPosition() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testAlterTableUpdateColumnComment() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testAlterTableReplaceColumns() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testComplexType() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testCreateTableAsSelect() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testInsertTableAsSelect() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testTableOptions() { + // TODO: implement table operations. + } + + @Test + @Override + protected void testDropAndWriteTable() { + // TODO: implement table operations. + } +} diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkUtilIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkUtilIT.java index 646f414841b..12077338a2a 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkUtilIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkUtilIT.java @@ -75,9 +75,7 @@ protected void dropDatabaseIfExists(String database) { // Specify Location explicitly because the default location is local HDFS, Spark will expand the // location to HDFS. protected void createDatabaseIfNotExists(String database) { - sql( - String.format( - "CREATE DATABASE IF NOT EXISTS %s LOCATION '/user/hive/%s'", database, database)); + sql(String.format("CREATE DATABASE IF NOT EXISTS %s", database)); } protected Map getDatabaseMetadata(String database) { diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/paimon/TestPaimonPropertiesConverter.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/paimon/TestPaimonPropertiesConverter.java new file mode 100644 index 00000000000..a3a0e91284a --- /dev/null +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/paimon/TestPaimonPropertiesConverter.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.spark.connector.paimon; + +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class TestPaimonPropertiesConverter { + private final PaimonPropertiesConverter paimonPropertiesConverter = + PaimonPropertiesConverter.getInstance(); + + @Test + void testCatalogPropertiesWithHiveBackend() { + Map properties = + paimonPropertiesConverter.toSparkCatalogProperties( + ImmutableMap.of( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND_HIVE, + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_URI, + "hive-uri", + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, + "hive-warehouse", + "key1", + "value1")); + Assertions.assertEquals( + ImmutableMap.of( + PaimonPropertiesConstants.PAIMON_CATALOG_METASTORE, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_HIVE, + PaimonPropertiesConstants.PAIMON_CATALOG_URI, + "hive-uri", + PaimonPropertiesConstants.PAIMON_CATALOG_WAREHOUSE, + "hive-warehouse"), + properties); + } + + @Test + void testCatalogPropertiesWithJdbcBackend() { + Map properties = + paimonPropertiesConverter.toSparkCatalogProperties( + ImmutableMap.of( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_JDBC, + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_URI, + "jdbc-uri", + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, + "jdbc-warehouse", + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_JDBC_USER, + "user", + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_JDBC_PASSWORD, + "passwd", + "key1", + "value1")); + Assertions.assertEquals( + ImmutableMap.of( + PaimonPropertiesConstants.PAIMON_CATALOG_METASTORE, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_JDBC, + PaimonPropertiesConstants.PAIMON_CATALOG_URI, + "jdbc-uri", + PaimonPropertiesConstants.PAIMON_CATALOG_WAREHOUSE, + "jdbc-warehouse", + PaimonPropertiesConstants.PAIMON_CATALOG_JDBC_USER, + "user", + PaimonPropertiesConstants.PAIMON_CATALOG_JDBC_PASSWORD, + "passwd"), + properties); + } + + @Test + void testCatalogPropertiesWithFilesystemBackend() { + Map properties = + paimonPropertiesConverter.toSparkCatalogProperties( + ImmutableMap.of( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND_FILESYSTEM, + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, + "filesystem-warehouse", + "key1", + "value1")); + Assertions.assertEquals( + ImmutableMap.of( + PaimonPropertiesConstants.PAIMON_CATALOG_METASTORE, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_FILESYSTEM, + PaimonPropertiesConstants.PAIMON_CATALOG_WAREHOUSE, + "filesystem-warehouse"), + properties); + } +} diff --git a/spark-connector/v3.3/spark/build.gradle.kts b/spark-connector/v3.3/spark/build.gradle.kts index c4c417d62ef..fcc2604f27a 100644 --- a/spark-connector/v3.3/spark/build.gradle.kts +++ b/spark-connector/v3.3/spark/build.gradle.kts @@ -31,6 +31,7 @@ val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extr val sparkVersion: String = libs.versions.spark33.get() val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") val icebergVersion: String = libs.versions.iceberg4spark.get() +val paimonVersion: String = libs.versions.paimon.get() val kyuubiVersion: String = libs.versions.kyuubi4spark33.get() val scalaJava8CompatVersion: String = libs.versions.scala.java.compat.get() val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get() @@ -43,6 +44,7 @@ dependencies { exclude("com.fasterxml.jackson") } compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation(project(":api")) { exclude("org.apache.logging.log4j") @@ -122,6 +124,7 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { diff --git a/spark-connector/v3.3/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark33.java b/spark-connector/v3.3/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark33.java new file mode 100644 index 00000000000..2fef911a8bd --- /dev/null +++ b/spark-connector/v3.3/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark33.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.paimon; + +public class GravitinoPaimonCatalogSpark33 extends GravitinoPaimonCatalog {} diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java new file mode 100644 index 00000000000..839b959c777 --- /dev/null +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark33; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkPaimonCatalogFilesystemBackendIT33 extends SparkPaimonCatalogFilesystemBackendIT { + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoPaimonCatalogSpark33.class.getName(), catalogClass); + } +} diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java index 1b0af02f87b..37c95e47890 100644 --- a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java @@ -20,6 +20,7 @@ import org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark33; import org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark33; +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark33; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -31,5 +32,8 @@ void testSpark33() { String icebergCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-iceberg"); Assertions.assertEquals(GravitinoIcebergCatalogSpark33.class.getName(), icebergCatalogName); + + String paimonCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-paimon"); + Assertions.assertEquals(GravitinoPaimonCatalogSpark33.class.getName(), paimonCatalogName); } } diff --git a/spark-connector/v3.4/spark/build.gradle.kts b/spark-connector/v3.4/spark/build.gradle.kts index f3308fca34b..f046144e533 100644 --- a/spark-connector/v3.4/spark/build.gradle.kts +++ b/spark-connector/v3.4/spark/build.gradle.kts @@ -31,6 +31,7 @@ val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extr val sparkVersion: String = libs.versions.spark34.get() val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") val icebergVersion: String = libs.versions.iceberg4spark.get() +val paimonVersion: String = libs.versions.paimon.get() val kyuubiVersion: String = libs.versions.kyuubi4spark34.get() val scalaJava8CompatVersion: String = libs.versions.scala.java.compat.get() val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get() @@ -44,6 +45,7 @@ dependencies { } compileOnly(project(":clients:client-java-runtime", configuration = "shadow")) compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation(project(":api")) { exclude("org.apache.logging.log4j") @@ -122,6 +124,7 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { diff --git a/spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark34.java b/spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark34.java new file mode 100644 index 00000000000..eb3e8779369 --- /dev/null +++ b/spark-connector/v3.4/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark34.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.paimon; + +import org.apache.gravitino.spark.connector.SparkTableChangeConverter; +import org.apache.gravitino.spark.connector.SparkTableChangeConverter34; +import org.apache.gravitino.spark.connector.SparkTypeConverter; +import org.apache.gravitino.spark.connector.SparkTypeConverter34; + +public class GravitinoPaimonCatalogSpark34 extends GravitinoPaimonCatalog { + @Override + protected SparkTypeConverter getSparkTypeConverter() { + return new SparkTypeConverter34(); + } + + @Override + protected SparkTableChangeConverter getSparkTableChangeConverter( + SparkTypeConverter sparkTypeConverter) { + return new SparkTableChangeConverter34(sparkTypeConverter); + } +} diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java new file mode 100644 index 00000000000..d230707325c --- /dev/null +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark34; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkPaimonCatalogFilesystemBackendIT34 extends SparkPaimonCatalogFilesystemBackendIT { + + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoPaimonCatalogSpark34.class.getName(), catalogClass); + } +} diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java index a2e95c8ea30..af9e67fab88 100644 --- a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java @@ -20,6 +20,7 @@ import org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark34; import org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark34; +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark34; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -31,5 +32,8 @@ void testSpark34() { String icebergCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-iceberg"); Assertions.assertEquals(GravitinoIcebergCatalogSpark34.class.getName(), icebergCatalogName); + + String paimonCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-paimon"); + Assertions.assertEquals(GravitinoPaimonCatalogSpark34.class.getName(), paimonCatalogName); } } diff --git a/spark-connector/v3.5/spark/build.gradle.kts b/spark-connector/v3.5/spark/build.gradle.kts index 7b8cc8447b7..30bafbb1aaf 100644 --- a/spark-connector/v3.5/spark/build.gradle.kts +++ b/spark-connector/v3.5/spark/build.gradle.kts @@ -31,6 +31,7 @@ val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extr val sparkVersion: String = libs.versions.spark35.get() val sparkMajorVersion: String = sparkVersion.substringBeforeLast(".") val icebergVersion: String = libs.versions.iceberg4spark.get() +val paimonVersion: String = libs.versions.paimon.get() val kyuubiVersion: String = libs.versions.kyuubi4spark35.get() val scalaJava8CompatVersion: String = libs.versions.scala.java.compat.get() val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get() @@ -45,6 +46,7 @@ dependencies { } compileOnly(project(":clients:client-java-runtime", configuration = "shadow")) compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation(project(":api")) { exclude("org.apache.logging.log4j") @@ -124,6 +126,7 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { diff --git a/spark-connector/v3.5/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark35.java b/spark-connector/v3.5/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark35.java new file mode 100644 index 00000000000..2c39af5b2f7 --- /dev/null +++ b/spark-connector/v3.5/spark/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalogSpark35.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.paimon; + +public class GravitinoPaimonCatalogSpark35 extends GravitinoPaimonCatalogSpark34 {} diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java new file mode 100644 index 00000000000..b02f58f70bf --- /dev/null +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark35; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkPaimonCatalogFilesystemBackendIT35 extends SparkPaimonCatalogFilesystemBackendIT { + + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoIcebergCatalogSpark35.class.getName(), catalogClass); + } +} diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java index 5295e82fb24..f02584cd616 100644 --- a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/version/TestCatalogNameAdaptor.java @@ -20,6 +20,7 @@ import org.apache.gravitino.spark.connector.hive.GravitinoHiveCatalogSpark35; import org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark35; +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark35; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -31,5 +32,8 @@ void testSpark35() { String icebergCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-iceberg"); Assertions.assertEquals(GravitinoIcebergCatalogSpark35.class.getName(), icebergCatalogName); + + String paimonCatalogName = CatalogNameAdaptor.getCatalogName("lakehouse-paimon"); + Assertions.assertEquals(GravitinoPaimonCatalogSpark35.class.getName(), paimonCatalogName); } } From 0915488287394ffa48bb068bbb90abf186ebead0 Mon Sep 17 00:00:00 2001 From: caican Date: Mon, 2 Dec 2024 20:10:18 +0800 Subject: [PATCH 07/25] fix --- .../integration/test/hive/SparkHiveCatalogIT33.java | 5 +---- .../test/iceberg/SparkIcebergCatalogHiveBackendIT33.java | 5 +---- .../test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java | 5 +---- .../integration/test/hive/SparkHiveCatalogIT34.java | 5 +---- .../test/iceberg/SparkIcebergCatalogHiveBackendIT34.java | 5 +---- .../test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java | 5 +---- .../integration/test/hive/SparkHiveCatalogIT35.java | 5 +---- .../test/iceberg/SparkIcebergCatalogHiveBackendIT35.java | 5 +---- .../test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java | 5 +---- 9 files changed, 9 insertions(+), 36 deletions(-) diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT33.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT33.java index cc0630a1902..74915c9cb81 100644 --- a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT33.java +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT33.java @@ -26,10 +26,7 @@ public class SparkHiveCatalogIT33 extends SparkHiveCatalogIT { @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoHiveCatalogSpark33.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT33.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT33.java index 737c3c90e70..ebe322c27c2 100644 --- a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT33.java +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT33.java @@ -26,10 +26,7 @@ public class SparkIcebergCatalogHiveBackendIT33 extends SparkIcebergCatalogHiveB @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoIcebergCatalogSpark33.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java index 839b959c777..2c2f486563f 100644 --- a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java @@ -26,10 +26,7 @@ public class SparkPaimonCatalogFilesystemBackendIT33 extends SparkPaimonCatalogF @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoPaimonCatalogSpark33.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT34.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT34.java index 6e2f43cdea8..3bf4cde32a3 100644 --- a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT34.java +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT34.java @@ -26,10 +26,7 @@ public class SparkHiveCatalogIT34 extends SparkHiveCatalogIT { @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoHiveCatalogSpark34.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT34.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT34.java index 8a2b2177847..c602ff24ae9 100644 --- a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT34.java +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT34.java @@ -27,10 +27,7 @@ public class SparkIcebergCatalogHiveBackendIT34 extends SparkIcebergCatalogHiveB @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoIcebergCatalogSpark34.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java index d230707325c..9a0499a9c90 100644 --- a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java @@ -27,10 +27,7 @@ public class SparkPaimonCatalogFilesystemBackendIT34 extends SparkPaimonCatalogF @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoPaimonCatalogSpark34.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT35.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT35.java index aa59ac7ef46..c5236f88afc 100644 --- a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT35.java +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT35.java @@ -26,10 +26,7 @@ public class SparkHiveCatalogIT35 extends SparkHiveCatalogIT { @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoHiveCatalogSpark35.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT35.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT35.java index b15f9f7cdcf..80be4283fa3 100644 --- a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT35.java +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT35.java @@ -27,10 +27,7 @@ public class SparkIcebergCatalogHiveBackendIT35 extends SparkIcebergCatalogHiveB @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoIcebergCatalogSpark35.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java index b02f58f70bf..c3ba3d554e7 100644 --- a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java @@ -27,10 +27,7 @@ public class SparkPaimonCatalogFilesystemBackendIT35 extends SparkPaimonCatalogF @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoIcebergCatalogSpark35.class.getName(), catalogClass); } } From a83a4066d38c9fb0bad4da54a240489b0cc3f40f Mon Sep 17 00:00:00 2001 From: caican Date: Mon, 2 Dec 2024 20:49:27 +0800 Subject: [PATCH 08/25] fix --- spark-connector/v3.3/spark/build.gradle.kts | 1 + spark-connector/v3.4/spark/build.gradle.kts | 1 + spark-connector/v3.5/spark/build.gradle.kts | 1 + 3 files changed, 3 insertions(+) diff --git a/spark-connector/v3.3/spark/build.gradle.kts b/spark-connector/v3.3/spark/build.gradle.kts index fcc2604f27a..9a9c4dedb4a 100644 --- a/spark-connector/v3.3/spark/build.gradle.kts +++ b/spark-connector/v3.3/spark/build.gradle.kts @@ -155,6 +155,7 @@ tasks.test { dependsOn(":catalogs:catalog-lakehouse-iceberg:jar") dependsOn(":catalogs:catalog-hive:jar") dependsOn(":iceberg:iceberg-rest-server:jar") + dependsOn(":catalogs:catalog-lakehouse-paimon:jar") } } diff --git a/spark-connector/v3.4/spark/build.gradle.kts b/spark-connector/v3.4/spark/build.gradle.kts index f046144e533..d1b24cf7252 100644 --- a/spark-connector/v3.4/spark/build.gradle.kts +++ b/spark-connector/v3.4/spark/build.gradle.kts @@ -155,6 +155,7 @@ tasks.test { dependsOn(":catalogs:catalog-lakehouse-iceberg:jar") dependsOn(":catalogs:catalog-hive:jar") dependsOn(":iceberg:iceberg-rest-server:jar") + dependsOn(":catalogs:catalog-lakehouse-paimon:jar") } } diff --git a/spark-connector/v3.5/spark/build.gradle.kts b/spark-connector/v3.5/spark/build.gradle.kts index 30bafbb1aaf..dd9fd856b07 100644 --- a/spark-connector/v3.5/spark/build.gradle.kts +++ b/spark-connector/v3.5/spark/build.gradle.kts @@ -157,6 +157,7 @@ tasks.test { dependsOn(":catalogs:catalog-lakehouse-iceberg:jar") dependsOn(":catalogs:catalog-hive:jar") dependsOn(":iceberg:iceberg-rest-server:jar") + dependsOn(":catalogs:catalog-lakehouse-paimon:jar") } } From 8846dc9c0d07d275f9eee844cee6efb9e4b0ab2c Mon Sep 17 00:00:00 2001 From: caican Date: Mon, 2 Dec 2024 22:24:00 +0800 Subject: [PATCH 09/25] support partition management --- gradle/libs.versions.toml | 2 +- .../test/paimon/SparkPaimonCatalogIT.java | 43 ++++++++++--------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 4b7441ea297..a61668718b9 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -52,7 +52,7 @@ caffeine = "2.9.3" rocksdbjni = "7.10.2" iceberg = '1.5.2' # used for Gravitino Iceberg catalog and Iceberg REST service iceberg4spark = "1.4.1" # used for compile spark connector -paimon = '0.8.0' +paimon = '0.9.0' spark33 = "3.3.4" spark34 = "3.4.3" spark35 = "3.5.1" diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java index 10a3b1532ee..47d10e0b68a 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java @@ -94,31 +94,34 @@ void testPaimonPartitions() { @Test void testPaimonPartitionManagement() { - testPaimonCreatePartition(); - testPaimonReplacePartitionMetadata(); - testPaimonLoadPartitionMetadata(); - testPaimonListPartitionIdentifiers(); - testPaimonCreatePartition(); + testPaimonListAndDropPartition(); + // TODO: replace, add and load partition operations are unsupported in Paimon now. } - private void testPaimonDropPartition() { - - } - - private void testPaimonReplacePartitionMetadata() { - - } - - private void testPaimonLoadPartitionMetadata() { - - } - - private void testPaimonListPartitionIdentifiers() { + private void testPaimonListAndDropPartition() { + String tableName = "test_paimon_drop_partition"; + dropTableIfExists(tableName); + String createTableSQL = getCreatePaimonSimpleTableString(tableName); + createTableSQL = + createTableSQL + " PARTITIONED BY (name);"; + sql(createTableSQL); - } + String insertData = + String.format( + "INSERT into %s values(1,'a','beijing'), (2,'b','beijing'), (3,'c','beijing');", + tableName); + sql(insertData); + List queryResult = getTableData(tableName); + Assertions.assertEquals(3, queryResult.size()); - private void testPaimonCreatePartition() { + List partitions = getQueryData(String.format("show partitions %s", tableName)); + Assertions.assertEquals(3, partitions.size()); + Assertions.assertEquals("name=a;name=b;name=c", String.join(";", partitions)); + sql(String.format("ALTER TABLE %s DROP PARTITION (`name`='a')", tableName)); + partitions = getQueryData(String.format("show partitions %s", tableName)); + Assertions.assertEquals(2, partitions.size()); + Assertions.assertEquals("name=b;name=c", String.join(";", partitions)); } private String getCreatePaimonSimpleTableString(String tableName) { From c114d1c437ec39bfb6614c9ebda286326faab60c Mon Sep 17 00:00:00 2001 From: caican Date: Mon, 2 Dec 2024 23:09:17 +0800 Subject: [PATCH 10/25] support partition management --- .../test/paimon/SparkPaimonCatalogIT.java | 43 ++++++++----------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java index 47d10e0b68a..a58a9f56503 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java @@ -18,6 +18,9 @@ */ package org.apache.gravitino.spark.connector.integration.test.paimon; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; import org.apache.gravitino.spark.connector.integration.test.SparkCommonIT; import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfo; import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfoChecker; @@ -25,9 +28,6 @@ import org.apache.spark.sql.types.DataTypes; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; public abstract class SparkPaimonCatalogIT extends SparkCommonIT { @@ -68,22 +68,18 @@ void testPaimonPartitions() { String tableName = "test_paimon_partition_table"; dropTableIfExists(tableName); String createTableSQL = getCreatePaimonSimpleTableString(tableName); - createTableSQL = - createTableSQL + " PARTITIONED BY (name, address);"; + createTableSQL = createTableSQL + " PARTITIONED BY (name, address);"; sql(createTableSQL); SparkTableInfo tableInfo = getTableInfo(tableName); SparkTableInfoChecker checker = - SparkTableInfoChecker.create() - .withName(tableName) - .withColumns(getPaimonSimpleTableColumn()) - .withIdentifyPartition(Collections.singletonList("name")) - .withIdentifyPartition(Collections.singletonList("address")); + SparkTableInfoChecker.create() + .withName(tableName) + .withColumns(getPaimonSimpleTableColumn()) + .withIdentifyPartition(Collections.singletonList("name")) + .withIdentifyPartition(Collections.singletonList("address")); checker.check(tableInfo); - String insertData = - String.format( - "INSERT into %s values(2,'a','beijing');", - tableName); + String insertData = String.format("INSERT into %s values(2,'a','beijing');", tableName); sql(insertData); List queryResult = getTableData(tableName); Assertions.assertEquals(1, queryResult.size()); @@ -102,14 +98,13 @@ private void testPaimonListAndDropPartition() { String tableName = "test_paimon_drop_partition"; dropTableIfExists(tableName); String createTableSQL = getCreatePaimonSimpleTableString(tableName); - createTableSQL = - createTableSQL + " PARTITIONED BY (name);"; + createTableSQL = createTableSQL + " PARTITIONED BY (name);"; sql(createTableSQL); String insertData = - String.format( - "INSERT into %s values(1,'a','beijing'), (2,'b','beijing'), (3,'c','beijing');", - tableName); + String.format( + "INSERT into %s values(1,'a','beijing'), (2,'b','beijing'), (3,'c','beijing');", + tableName); sql(insertData); List queryResult = getTableData(tableName); Assertions.assertEquals(3, queryResult.size()); @@ -126,14 +121,14 @@ private void testPaimonListAndDropPartition() { private String getCreatePaimonSimpleTableString(String tableName) { return String.format( - "CREATE TABLE %s (id INT COMMENT 'id comment', name STRING COMMENT '', address STRING '') USING paimon", - tableName); + "CREATE TABLE %s (id INT COMMENT 'id comment', name STRING COMMENT '', address STRING '') USING paimon", + tableName); } private List getPaimonSimpleTableColumn() { return Arrays.asList( - SparkTableInfo.SparkColumnInfo.of("id", DataTypes.IntegerType, "id comment"), - SparkTableInfo.SparkColumnInfo.of("name", DataTypes.StringType, ""), - SparkTableInfo.SparkColumnInfo.of("address", DataTypes.StringType, "")); + SparkTableInfo.SparkColumnInfo.of("id", DataTypes.IntegerType, "id comment"), + SparkTableInfo.SparkColumnInfo.of("name", DataTypes.StringType, ""), + SparkTableInfo.SparkColumnInfo.of("address", DataTypes.StringType, "")); } } From af0b32a94d789b0166b6f8d7df6760720dd48d77 Mon Sep 17 00:00:00 2001 From: caican Date: Tue, 3 Dec 2024 16:53:00 +0800 Subject: [PATCH 11/25] fix --- .../spark/connector/catalog/BaseCatalog.java | 4 ++-- .../paimon/GravitinoPaimonCatalog.java | 9 +++++++++ .../paimon/PaimonPropertiesConstants.java | 2 ++ .../paimon/PaimonPropertiesConverter.java | 7 +++++-- .../integration/test/SparkCommonIT.java | 10 ++++------ .../test/hive/SparkHiveCatalogIT.java | 5 +++++ .../test/iceberg/SparkIcebergCatalogIT.java | 5 +++++ .../test/paimon/SparkPaimonCatalogIT.java | 18 +++++++++++++++++- .../integration/test/util/SparkTableInfo.java | 7 +++++++ 9 files changed, 56 insertions(+), 11 deletions(-) diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/catalog/BaseCatalog.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/catalog/BaseCatalog.java index 2201bd222be..5706895caa4 100644 --- a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/catalog/BaseCatalog.java +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/catalog/BaseCatalog.java @@ -76,11 +76,11 @@ public abstract class BaseCatalog implements TableCatalog, SupportsNamespaces { protected TableCatalog sparkCatalog; protected PropertiesConverter propertiesConverter; protected SparkTransformConverter sparkTransformConverter; + // The Gravitino catalog client to do schema operations. + protected Catalog gravitinoCatalogClient; private SparkTypeConverter sparkTypeConverter; private SparkTableChangeConverter sparkTableChangeConverter; - // The Gravitino catalog client to do schema operations. - private Catalog gravitinoCatalogClient; private String catalogName; private final GravitinoCatalogManager gravitinoCatalogManager; diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java index 5a7b9a8c7ec..86ca680c45b 100644 --- a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/GravitinoPaimonCatalog.java @@ -20,6 +20,7 @@ package org.apache.gravitino.spark.connector.paimon; import java.util.Map; +import org.apache.gravitino.NameIdentifier; import org.apache.gravitino.catalog.lakehouse.paimon.PaimonPropertiesUtils; import org.apache.gravitino.spark.connector.PropertiesConverter; import org.apache.gravitino.spark.connector.SparkTransformConverter; @@ -72,4 +73,12 @@ protected PropertiesConverter getPropertiesConverter() { protected SparkTransformConverter getSparkTransformConverter() { return new SparkTransformConverter(true); } + + @Override + public boolean dropTable(Identifier ident) { + sparkCatalog.invalidateTable(ident); + return gravitinoCatalogClient + .asTableCatalog() + .purgeTable(NameIdentifier.of(getDatabase(ident), ident.name())); + } } diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java index 67190753e64..915308ae8df 100644 --- a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java @@ -46,4 +46,6 @@ public class PaimonPropertiesConstants { public static final String PAIMON_CATALOG_BACKEND_FILESYSTEM = "filesystem"; static final String GRAVITINO_PAIMON_CATALOG_BACKEND_FILESYSTEM = "filesystem"; + + public static final String PAIMON_TABLE_LOCATION = "path"; } diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java index 9ea11dd3a0f..f713ca89ddd 100644 --- a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConverter.java @@ -20,10 +20,10 @@ package org.apache.gravitino.spark.connector.paimon; import com.google.common.base.Preconditions; - import java.util.HashMap; import java.util.Map; import org.apache.commons.lang3.StringUtils; +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants; import org.apache.gravitino.catalog.lakehouse.paimon.PaimonPropertiesUtils; import org.apache.gravitino.spark.connector.PropertiesConverter; @@ -54,7 +54,10 @@ public Map toSparkCatalogProperties(Map properti @Override public Map toGravitinoTableProperties(Map properties) { - return new HashMap<>(properties); + HashMap gravitinoTableProperties = new HashMap<>(properties); + // The owner property of Paimon is a reserved property, so we need to remove it. + gravitinoTableProperties.remove(PaimonConstants.OWNER); + return gravitinoTableProperties; } @Override diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java index 956ef25637d..89c3da94a55 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java @@ -21,11 +21,6 @@ import com.google.common.collect.ImmutableMap; import java.io.File; import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; import org.apache.gravitino.spark.connector.ConnectorConstants; @@ -117,6 +112,8 @@ private static String getRowLevelDeleteTableSql( protected abstract boolean supportsSchemaEvolution(); + protected abstract boolean supportsReplaceColumns(); + // Use a custom database not the original default database because SparkCommonIT couldn't // read&write data to tables in default database. The main reason is default database location is // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address @@ -550,6 +547,7 @@ protected void testAlterTableUpdateColumnComment() { } @Test + @EnabledIf("supportsReplaceColumns") protected void testAlterTableReplaceColumns() { String tableName = "test_replace_columns_table"; dropTableIfExists(tableName); @@ -563,7 +561,7 @@ protected void testAlterTableReplaceColumns() { sql( String.format( - "ALTER TABLE %S REPLACE COLUMNS (id int COMMENT 'new comment', name2 string, age long);", + "ALTER TABLE %s REPLACE COLUMNS (id int COMMENT 'new comment', name2 string, age long);", tableName)); ArrayList updateColumns = new ArrayList<>(); // change comment for id diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT.java index c543d82819e..b95882a0d01 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT.java @@ -79,6 +79,11 @@ protected boolean supportsSchemaEvolution() { return false; } + @Override + protected boolean supportsReplaceColumns() { + return true; + } + @Test void testCreateHiveFormatPartitionTable() { String tableName = "hive_partition_table"; diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogIT.java index 52f4abf3a98..f5fd337a13d 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogIT.java @@ -104,6 +104,11 @@ protected boolean supportsSchemaEvolution() { return true; } + @Override + protected boolean supportsReplaceColumns() { + return true; + } + @Override protected String getTableLocation(SparkTableInfo table) { return String.join(File.separator, table.getTableLocation(), "data"); diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java index 9ae1afaf77e..7dbc2a3d88f 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java @@ -21,9 +21,11 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Map; import org.apache.gravitino.spark.connector.integration.test.SparkCommonIT; import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfo; import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfoChecker; +import org.apache.gravitino.spark.connector.paimon.PaimonPropertiesConstants; import org.apache.hadoop.fs.Path; import org.apache.spark.sql.types.DataTypes; import org.junit.jupiter.api.Assertions; @@ -61,6 +63,20 @@ protected boolean supportsSchemaEvolution() { return true; } + @Override + protected boolean supportsReplaceColumns() { + // Paimon doesn't support replace columns, because it doesn't support drop all fields in table. + // And `ALTER TABLE REPLACE COLUMNS` statement will removes all existing columns at first and + // then adds the new set of columns. + return false; + } + + @Override + protected String getTableLocation(SparkTableInfo table) { + Map tableProperties = table.getTableProperties(); + return tableProperties.get(PaimonPropertiesConstants.PAIMON_TABLE_LOCATION); + } + @Test void testPaimonPartitions() { String partitionPathString = "name=a/address=beijing"; @@ -90,7 +106,7 @@ void testPaimonPartitions() { private String getCreatePaimonSimpleTableString(String tableName) { return String.format( - "CREATE TABLE %s (id INT COMMENT 'id comment', name STRING COMMENT '', address STRING '') USING paimon", + "CREATE TABLE %s (id INT COMMENT 'id comment', name STRING COMMENT '', address STRING COMMENT '') USING paimon", tableName); } diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkTableInfo.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkTableInfo.java index 38b21ddf057..077936c29c5 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkTableInfo.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkTableInfo.java @@ -31,6 +31,7 @@ import org.apache.gravitino.spark.connector.ConnectorConstants; import org.apache.gravitino.spark.connector.hive.SparkHiveTable; import org.apache.gravitino.spark.connector.iceberg.SparkIcebergTable; +import org.apache.gravitino.spark.connector.paimon.SparkPaimonTable; import org.apache.spark.sql.connector.catalog.SupportsMetadataColumns; import org.apache.spark.sql.connector.catalog.Table; import org.apache.spark.sql.connector.catalog.TableCatalog; @@ -71,6 +72,10 @@ public String getTableLocation() { return tableProperties.get(TableCatalog.PROP_LOCATION); } + public Map getTableProperties() { + return tableProperties; + } + // Include database name and table name public String getTableIdentifier() { if (StringUtils.isNotBlank(database)) { @@ -186,6 +191,8 @@ private static StructType getSchema(Table baseTable) { return ((SparkHiveTable) baseTable).schema(); } else if (baseTable instanceof SparkIcebergTable) { return ((SparkIcebergTable) baseTable).schema(); + } else if (baseTable instanceof SparkPaimonTable) { + return ((SparkPaimonTable) baseTable).schema(); } else { throw new IllegalArgumentException( "Doesn't support Spark table: " + baseTable.getClass().getName()); From 79403e28a8923f935b3edabd0b3c7e34fa77a996 Mon Sep 17 00:00:00 2001 From: caican Date: Tue, 3 Dec 2024 19:48:11 +0800 Subject: [PATCH 12/25] fix --- .../spark/connector/integration/test/SparkCommonIT.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java index 89c3da94a55..9f6a0d53d0b 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java @@ -21,6 +21,11 @@ import com.google.common.collect.ImmutableMap; import java.io.File; import java.io.IOException; +import java.util.Map; +import java.util.List; +import java.util.ArrayList; +import java.util.Set; +import java.util.Arrays; import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; import org.apache.gravitino.spark.connector.ConnectorConstants; From 1aa58cd2a4d6d7829dadc41ac10b2a41a73e200d Mon Sep 17 00:00:00 2001 From: caican Date: Tue, 3 Dec 2024 20:45:17 +0800 Subject: [PATCH 13/25] fix --- .../spark/connector/integration/test/SparkCommonIT.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java index 9f6a0d53d0b..40fd4f20a5b 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java @@ -21,11 +21,11 @@ import com.google.common.collect.ImmutableMap; import java.io.File; import java.io.IOException; -import java.util.Map; -import java.util.List; import java.util.ArrayList; -import java.util.Set; import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; import org.apache.commons.io.FileUtils; import org.apache.gravitino.spark.connector.ConnectorConstants; From 3512934298d08d5282b323a451ffa12f036199bd Mon Sep 17 00:00:00 2001 From: caican Date: Wed, 4 Dec 2024 12:15:44 +0800 Subject: [PATCH 14/25] support hive backend --- ...SparkPaimonCatalogFilesystemBackendIT.java | 19 ----- .../SparkPaimonCatalogHiveBackendIT.java | 40 ++++++++++ .../test/paimon/SparkPaimonCatalogIT.java | 76 ++++++++++++------- .../SparkPaimonCatalogHiveBackendIT33.java | 32 ++++++++ .../SparkPaimonCatalogHiveBackendIT34.java | 33 ++++++++ .../SparkPaimonCatalogHiveBackendIT35.java | 33 ++++++++ 6 files changed, 185 insertions(+), 48 deletions(-) create mode 100644 spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT.java create mode 100644 spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT33.java create mode 100644 spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT34.java create mode 100644 spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT35.java diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java index 3d4a3257a91..4f36946abf7 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java @@ -49,23 +49,4 @@ protected void testCreateAndLoadSchema() { // The database of the Paimon filesystem backend do not store any properties. Assertions.assertFalse(databaseMeta.containsKey("ID")); } - - @Test - @Override - protected void testAlterSchema() { - String testDatabaseName = "t_alter"; - dropDatabaseIfExists(testDatabaseName); - sql("CREATE DATABASE " + testDatabaseName + " WITH DBPROPERTIES (ID=001);"); - Map databaseMeta = getDatabaseMetadata(testDatabaseName); - // The database of the Paimon filesystem backend do not store any properties. - Assertions.assertFalse(databaseMeta.containsKey("ID")); - - // The Paimon filesystem backend do not support alter database operation. - Assertions.assertThrows( - UnsupportedOperationException.class, - () -> - sql( - String.format( - "ALTER DATABASE %s SET DBPROPERTIES ('ID'='002')", testDatabaseName))); - } } diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT.java new file mode 100644 index 00000000000..f9260969da3 --- /dev/null +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import com.google.common.collect.Maps; +import java.util.Map; +import org.apache.gravitino.spark.connector.paimon.PaimonPropertiesConstants; +import org.junit.jupiter.api.Tag; + +/** This class use Apache Paimon HiveCatalog for backend catalog. */ +@Tag("gravitino-docker-test") +public abstract class SparkPaimonCatalogHiveBackendIT extends SparkPaimonCatalogIT { + + @Override + protected Map getCatalogConfigs() { + Map catalogProperties = Maps.newHashMap(); + catalogProperties.put( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_HIVE); + catalogProperties.put(PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_URI, hiveMetastoreUri); + catalogProperties.put(PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, warehouse); + return catalogProperties; + } +} diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java index 282a59794af..82d74c50876 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java @@ -77,6 +77,25 @@ protected String getTableLocation(SparkTableInfo table) { return tableProperties.get(PaimonPropertiesConstants.PAIMON_TABLE_LOCATION); } + @Test + @Override + protected void testAlterSchema() { + String testDatabaseName = "t_alter"; + dropDatabaseIfExists(testDatabaseName); + sql("CREATE DATABASE " + testDatabaseName + " WITH DBPROPERTIES (ID=001);"); + Map databaseMeta = getDatabaseMetadata(testDatabaseName); + // The database of the Paimon filesystem backend do not store any properties. + Assertions.assertTrue(databaseMeta.containsKey("ID")); + + // The Paimon filesystem backend do not support alter database operation. + Assertions.assertThrows( + UnsupportedOperationException.class, + () -> + sql( + String.format( + "ALTER DATABASE %s SET DBPROPERTIES ('ID'='002')", testDatabaseName))); + } + @Test void testPaimonPartitions() { String partitionPathString = "name=a/address=beijing"; @@ -104,38 +123,37 @@ void testPaimonPartitions() { checkDirExists(partitionPath); } + @Test + void testPaimonPartitionManagement() { + // replace, add and load partition operations are unsupported in Paimon now. + // Therefore, Paimon spark runtime only supports list and drop partition operations. + testPaimonListAndDropPartition(); + } - @Test - void testPaimonPartitionManagement() { - // replace, add and load partition operations are unsupported in Paimon now. - // Therefore, Paimon spark runtime only supports list and drop partition operations. - testPaimonListAndDropPartition(); - } + private void testPaimonListAndDropPartition() { + String tableName = "test_paimon_drop_partition"; + dropTableIfExists(tableName); + String createTableSQL = getCreatePaimonSimpleTableString(tableName); + createTableSQL = createTableSQL + " PARTITIONED BY (name);"; + sql(createTableSQL); - private void testPaimonListAndDropPartition() { - String tableName = "test_paimon_drop_partition"; - dropTableIfExists(tableName); - String createTableSQL = getCreatePaimonSimpleTableString(tableName); - createTableSQL = createTableSQL + " PARTITIONED BY (name);"; - sql(createTableSQL); + String insertData = + String.format( + "INSERT into %s values(1,'a','beijing'), (2,'b','beijing'), (3,'c','beijing');", + tableName); + sql(insertData); + List queryResult = getTableData(tableName); + Assertions.assertEquals(3, queryResult.size()); - String insertData = - String.format( - "INSERT into %s values(1,'a','beijing'), (2,'b','beijing'), (3,'c','beijing');", - tableName); - sql(insertData); - List queryResult = getTableData(tableName); - Assertions.assertEquals(3, queryResult.size()); - - List partitions = getQueryData(String.format("show partitions %s", tableName)); - Assertions.assertEquals(3, partitions.size()); - Assertions.assertEquals("name=a;name=b;name=c", String.join(";", partitions)); - - sql(String.format("ALTER TABLE %s DROP PARTITION (`name`='a')", tableName)); - partitions = getQueryData(String.format("show partitions %s", tableName)); - Assertions.assertEquals(2, partitions.size()); - Assertions.assertEquals("name=b;name=c", String.join(";", partitions)); - } + List partitions = getQueryData(String.format("show partitions %s", tableName)); + Assertions.assertEquals(3, partitions.size()); + Assertions.assertEquals("name=a;name=b;name=c", String.join(";", partitions)); + + sql(String.format("ALTER TABLE %s DROP PARTITION (`name`='a')", tableName)); + partitions = getQueryData(String.format("show partitions %s", tableName)); + Assertions.assertEquals(2, partitions.size()); + Assertions.assertEquals("name=b;name=c", String.join(";", partitions)); + } private String getCreatePaimonSimpleTableString(String tableName) { return String.format( diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT33.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT33.java new file mode 100644 index 00000000000..aaffcd3367c --- /dev/null +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT33.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark33; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkPaimonCatalogHiveBackendIT33 extends SparkPaimonCatalogHiveBackendIT { + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoPaimonCatalogSpark33.class.getName(), catalogClass); + } +} diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT34.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT34.java new file mode 100644 index 00000000000..05c977a83aa --- /dev/null +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT34.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark34; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkPaimonCatalogHiveBackendIT34 extends SparkPaimonCatalogHiveBackendIT { + + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoPaimonCatalogSpark34.class.getName(), catalogClass); + } +} diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT35.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT35.java new file mode 100644 index 00000000000..81118fc8c33 --- /dev/null +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT35.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark35; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkPaimonCatalogHiveBackendIT35 extends SparkPaimonCatalogHiveBackendIT { + + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoIcebergCatalogSpark35.class.getName(), catalogClass); + } +} From 449937166c26f7bc0b4ccb33ea5ba64eabd191b9 Mon Sep 17 00:00:00 2001 From: caican Date: Wed, 4 Dec 2024 17:31:17 +0800 Subject: [PATCH 15/25] support hive backend --- .../lakehouse/paimon/PaimonSchema.java | 7 +++-- ...SparkPaimonCatalogFilesystemBackendIT.java | 26 +++++++++++++++++++ .../SparkPaimonCatalogHiveBackendIT.java | 24 +++++++++++++++++ .../test/paimon/SparkPaimonCatalogIT.java | 19 -------------- 4 files changed, 55 insertions(+), 21 deletions(-) diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchema.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchema.java index 40061fcfb0f..28529081079 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchema.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchema.java @@ -19,9 +19,10 @@ package org.apache.gravitino.catalog.lakehouse.paimon; import static org.apache.gravitino.meta.AuditInfo.EMPTY; - +import java.util.HashMap; import java.util.Map; import java.util.Optional; +import com.google.common.collect.Maps; import lombok.ToString; import org.apache.gravitino.Schema; import org.apache.gravitino.connector.BaseSchema; @@ -79,15 +80,17 @@ protected PaimonSchema internalBuild() { PaimonSchema paimonSchema = new PaimonSchema(); paimonSchema.name = name; + Map propertiesWithComment = Maps.newHashMap(Optional.ofNullable(properties).orElse(new HashMap<>())); if (comment != null) { paimonSchema.comment = comment; + propertiesWithComment.put(PaimonSchemaPropertiesMetadata.COMMENT, comment); } else if (properties != null) { paimonSchema.comment = properties.get(PaimonSchemaPropertiesMetadata.COMMENT); } else { paimonSchema.comment = null; } - paimonSchema.properties = properties; + paimonSchema.properties = propertiesWithComment; paimonSchema.auditInfo = auditInfo; return paimonSchema; } diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java index 4f36946abf7..08b671e47cb 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java @@ -20,6 +20,8 @@ import com.google.common.collect.Maps; import java.util.Map; + +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants; import org.apache.gravitino.spark.connector.paimon.PaimonPropertiesConstants; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Tag; @@ -48,5 +50,29 @@ protected void testCreateAndLoadSchema() { Map databaseMeta = getDatabaseMetadata(testDatabaseName); // The database of the Paimon filesystem backend do not store any properties. Assertions.assertFalse(databaseMeta.containsKey("ID")); + Assertions.assertFalse( + databaseMeta.containsKey("comment")); + } + + @Test + @Override + protected void testAlterSchema() { + String testDatabaseName = "t_alter"; + dropDatabaseIfExists(testDatabaseName); + sql("CREATE DATABASE " + testDatabaseName + " COMMENT 'db comment' WITH DBPROPERTIES (ID=001);"); + Map databaseMeta = getDatabaseMetadata(testDatabaseName); + // The database of the Paimon filesystem backend do not store any properties. + Assertions.assertFalse( + databaseMeta.get("Properties").contains("(ID,001)")); + Assertions.assertFalse( + databaseMeta.containsKey("Comment")); + + // The Paimon filesystem backend do not support alter database operation. + Assertions.assertThrows( + UnsupportedOperationException.class, + () -> + sql( + String.format( + "ALTER DATABASE %s SET DBPROPERTIES ('ID'='002')", testDatabaseName))); } } diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT.java index f9260969da3..2ca5dbf70d1 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT.java @@ -20,8 +20,12 @@ import com.google.common.collect.Maps; import java.util.Map; + +import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants; import org.apache.gravitino.spark.connector.paimon.PaimonPropertiesConstants; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; /** This class use Apache Paimon HiveCatalog for backend catalog. */ @Tag("gravitino-docker-test") @@ -37,4 +41,24 @@ protected Map getCatalogConfigs() { catalogProperties.put(PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, warehouse); return catalogProperties; } + + @Test + @Override + protected void testAlterSchema() { + String testDatabaseName = "t_alter"; + dropDatabaseIfExists(testDatabaseName); + sql("CREATE DATABASE " + testDatabaseName + " COMMENT 'db comment' WITH DBPROPERTIES (ID=001);"); + Map databaseMeta = getDatabaseMetadata(testDatabaseName); + Assertions.assertTrue( + databaseMeta.get("Properties").contains("(ID,001)")); + Assertions.assertEquals("db comment", databaseMeta.get("Comment")); + + // The Paimon filesystem backend do not support alter database operation. + Assertions.assertThrows( + UnsupportedOperationException.class, + () -> + sql( + String.format( + "ALTER DATABASE %s SET DBPROPERTIES ('ID'='002')", testDatabaseName))); + } } diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java index 82d74c50876..335a1f346d9 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java @@ -77,25 +77,6 @@ protected String getTableLocation(SparkTableInfo table) { return tableProperties.get(PaimonPropertiesConstants.PAIMON_TABLE_LOCATION); } - @Test - @Override - protected void testAlterSchema() { - String testDatabaseName = "t_alter"; - dropDatabaseIfExists(testDatabaseName); - sql("CREATE DATABASE " + testDatabaseName + " WITH DBPROPERTIES (ID=001);"); - Map databaseMeta = getDatabaseMetadata(testDatabaseName); - // The database of the Paimon filesystem backend do not store any properties. - Assertions.assertTrue(databaseMeta.containsKey("ID")); - - // The Paimon filesystem backend do not support alter database operation. - Assertions.assertThrows( - UnsupportedOperationException.class, - () -> - sql( - String.format( - "ALTER DATABASE %s SET DBPROPERTIES ('ID'='002')", testDatabaseName))); - } - @Test void testPaimonPartitions() { String partitionPathString = "name=a/address=beijing"; From 84f48f0083409473af932ec189391b4bfe1054ff Mon Sep 17 00:00:00 2001 From: caican Date: Wed, 4 Dec 2024 17:36:28 +0800 Subject: [PATCH 16/25] support hive backend --- .../lakehouse/paimon/PaimonSchema.java | 6 +++-- ...SparkPaimonCatalogFilesystemBackendIT.java | 26 +++++++++---------- .../SparkPaimonCatalogHiveBackendIT.java | 20 +++++++------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchema.java b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchema.java index 28529081079..31591673f54 100644 --- a/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchema.java +++ b/catalogs/catalog-lakehouse-paimon/src/main/java/org/apache/gravitino/catalog/lakehouse/paimon/PaimonSchema.java @@ -19,10 +19,11 @@ package org.apache.gravitino.catalog.lakehouse.paimon; import static org.apache.gravitino.meta.AuditInfo.EMPTY; + +import com.google.common.collect.Maps; import java.util.HashMap; import java.util.Map; import java.util.Optional; -import com.google.common.collect.Maps; import lombok.ToString; import org.apache.gravitino.Schema; import org.apache.gravitino.connector.BaseSchema; @@ -80,7 +81,8 @@ protected PaimonSchema internalBuild() { PaimonSchema paimonSchema = new PaimonSchema(); paimonSchema.name = name; - Map propertiesWithComment = Maps.newHashMap(Optional.ofNullable(properties).orElse(new HashMap<>())); + Map propertiesWithComment = + Maps.newHashMap(Optional.ofNullable(properties).orElse(new HashMap<>())); if (comment != null) { paimonSchema.comment = comment; propertiesWithComment.put(PaimonSchemaPropertiesMetadata.COMMENT, comment); diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java index 08b671e47cb..3d6f9aef6b9 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT.java @@ -20,8 +20,6 @@ import com.google.common.collect.Maps; import java.util.Map; - -import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants; import org.apache.gravitino.spark.connector.paimon.PaimonPropertiesConstants; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Tag; @@ -50,8 +48,7 @@ protected void testCreateAndLoadSchema() { Map databaseMeta = getDatabaseMetadata(testDatabaseName); // The database of the Paimon filesystem backend do not store any properties. Assertions.assertFalse(databaseMeta.containsKey("ID")); - Assertions.assertFalse( - databaseMeta.containsKey("comment")); + Assertions.assertFalse(databaseMeta.containsKey("comment")); } @Test @@ -59,20 +56,21 @@ protected void testCreateAndLoadSchema() { protected void testAlterSchema() { String testDatabaseName = "t_alter"; dropDatabaseIfExists(testDatabaseName); - sql("CREATE DATABASE " + testDatabaseName + " COMMENT 'db comment' WITH DBPROPERTIES (ID=001);"); + sql( + "CREATE DATABASE " + + testDatabaseName + + " COMMENT 'db comment' WITH DBPROPERTIES (ID=001);"); Map databaseMeta = getDatabaseMetadata(testDatabaseName); // The database of the Paimon filesystem backend do not store any properties. - Assertions.assertFalse( - databaseMeta.get("Properties").contains("(ID,001)")); - Assertions.assertFalse( - databaseMeta.containsKey("Comment")); + Assertions.assertFalse(databaseMeta.get("Properties").contains("(ID,001)")); + Assertions.assertFalse(databaseMeta.containsKey("Comment")); // The Paimon filesystem backend do not support alter database operation. Assertions.assertThrows( - UnsupportedOperationException.class, - () -> - sql( - String.format( - "ALTER DATABASE %s SET DBPROPERTIES ('ID'='002')", testDatabaseName))); + UnsupportedOperationException.class, + () -> + sql( + String.format( + "ALTER DATABASE %s SET DBPROPERTIES ('ID'='002')", testDatabaseName))); } } diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT.java index 2ca5dbf70d1..140c41a7b04 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogHiveBackendIT.java @@ -20,8 +20,6 @@ import com.google.common.collect.Maps; import java.util.Map; - -import org.apache.gravitino.catalog.lakehouse.paimon.PaimonConstants; import org.apache.gravitino.spark.connector.paimon.PaimonPropertiesConstants; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Tag; @@ -47,18 +45,20 @@ protected Map getCatalogConfigs() { protected void testAlterSchema() { String testDatabaseName = "t_alter"; dropDatabaseIfExists(testDatabaseName); - sql("CREATE DATABASE " + testDatabaseName + " COMMENT 'db comment' WITH DBPROPERTIES (ID=001);"); + sql( + "CREATE DATABASE " + + testDatabaseName + + " COMMENT 'db comment' WITH DBPROPERTIES (ID=001);"); Map databaseMeta = getDatabaseMetadata(testDatabaseName); - Assertions.assertTrue( - databaseMeta.get("Properties").contains("(ID,001)")); + Assertions.assertTrue(databaseMeta.get("Properties").contains("(ID,001)")); Assertions.assertEquals("db comment", databaseMeta.get("Comment")); // The Paimon filesystem backend do not support alter database operation. Assertions.assertThrows( - UnsupportedOperationException.class, - () -> - sql( - String.format( - "ALTER DATABASE %s SET DBPROPERTIES ('ID'='002')", testDatabaseName))); + UnsupportedOperationException.class, + () -> + sql( + String.format( + "ALTER DATABASE %s SET DBPROPERTIES ('ID'='002')", testDatabaseName))); } } From 0960a2702d9fcb96098bd2b1b42bd50772252b50 Mon Sep 17 00:00:00 2001 From: caican Date: Wed, 4 Dec 2024 19:38:32 +0800 Subject: [PATCH 17/25] support jdbc backend --- .../paimon/PaimonPropertiesConstants.java | 10 ++- .../integration/test/SparkEnvIT.java | 8 +- .../SparkPaimonCatalogJdbcBackendIT.java | 75 +++++++++++++++++++ .../SparkPaimonCatalogJdbcBackendIT33.java | 32 ++++++++ 4 files changed, 120 insertions(+), 5 deletions(-) create mode 100644 spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogJdbcBackendIT.java create mode 100644 spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogJdbcBackendIT33.java diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java index 915308ae8df..bad530f9122 100644 --- a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/PaimonPropertiesConstants.java @@ -31,18 +31,22 @@ public class PaimonPropertiesConstants { public static final String GRAVITINO_PAIMON_CATALOG_URI = PaimonConstants.URI; static final String PAIMON_CATALOG_URI = PaimonConstants.URI; - static final String GRAVITINO_PAIMON_CATALOG_JDBC_USER = PaimonConstants.GRAVITINO_JDBC_USER; + public static final String GRAVITINO_PAIMON_CATALOG_JDBC_USER = + PaimonConstants.GRAVITINO_JDBC_USER; static final String PAIMON_CATALOG_JDBC_USER = PaimonConstants.PAIMON_JDBC_USER; - static final String GRAVITINO_PAIMON_CATALOG_JDBC_PASSWORD = + public static final String GRAVITINO_PAIMON_CATALOG_JDBC_PASSWORD = PaimonConstants.GRAVITINO_JDBC_PASSWORD; static final String PAIMON_CATALOG_JDBC_PASSWORD = PaimonConstants.PAIMON_JDBC_PASSWORD; + public static final String GRAVITINO_PAIMON_CATALOG_JDBC_DRIVER = + PaimonConstants.GRAVITINO_JDBC_DRIVER; + public static final String PAIMON_CATALOG_BACKEND_HIVE = "hive"; static final String GRAVITINO_PAIMON_CATALOG_BACKEND_HIVE = "hive"; + public static final String PAIMON_CATALOG_BACKEND_JDBC = "jdbc"; static final String GRAVITINO_PAIMON_CATALOG_BACKEND_JDBC = "jdbc"; - static final String PAIMON_CATALOG_BACKEND_JDBC = "jdbc"; public static final String PAIMON_CATALOG_BACKEND_FILESYSTEM = "filesystem"; static final String GRAVITINO_PAIMON_CATALOG_BACKEND_FILESYSTEM = "filesystem"; diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkEnvIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkEnvIT.java index b534a9772f7..e5112d21595 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkEnvIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkEnvIT.java @@ -30,6 +30,8 @@ import org.apache.gravitino.client.GravitinoMetalake; import org.apache.gravitino.integration.test.container.ContainerSuite; import org.apache.gravitino.integration.test.container.HiveContainer; +import org.apache.gravitino.integration.test.container.MySQLContainer; +import org.apache.gravitino.integration.test.util.TestDatabaseName; import org.apache.gravitino.server.web.JettyServerConfig; import org.apache.gravitino.spark.connector.GravitinoSparkConfig; import org.apache.gravitino.spark.connector.iceberg.IcebergPropertiesConstants; @@ -49,8 +51,10 @@ public abstract class SparkEnvIT extends SparkUtilIT { private static final Logger LOG = LoggerFactory.getLogger(SparkEnvIT.class); - private static final ContainerSuite containerSuite = ContainerSuite.getInstance(); - + protected static final ContainerSuite containerSuite = ContainerSuite.getInstance(); + protected static final TestDatabaseName TEST_DB_NAME = + TestDatabaseName.PG_TEST_PAIMON_CATALOG_MULTIPLE_JDBC_LOAD; + protected static MySQLContainer mySQLContainer; protected static final String icebergRestServiceName = "iceberg-rest"; protected String hiveMetastoreUri = "thrift://127.0.0.1:9083"; protected String warehouse; diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogJdbcBackendIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogJdbcBackendIT.java new file mode 100644 index 00000000000..0ed3bb1efeb --- /dev/null +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogJdbcBackendIT.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import com.google.common.collect.Maps; +import java.util.Map; +import org.apache.gravitino.spark.connector.paimon.PaimonPropertiesConstants; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; + +/** This class use Apache Paimon JdbcCatalog for backend catalog. */ +@Tag("gravitino-docker-test") +public abstract class SparkPaimonCatalogJdbcBackendIT extends SparkPaimonCatalogIT { + + @Override + protected Map getCatalogConfigs() { + containerSuite.startMySQLContainer(TEST_DB_NAME); + mySQLContainer = containerSuite.getMySQLContainer(); + Map catalogProperties = Maps.newHashMap(); + catalogProperties.put( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_BACKEND, + PaimonPropertiesConstants.PAIMON_CATALOG_BACKEND_JDBC); + catalogProperties.put( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_URI, + mySQLContainer.getJdbcUrl(TEST_DB_NAME)); + catalogProperties.put(PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_WAREHOUSE, warehouse); + catalogProperties.put( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_JDBC_USER, mySQLContainer.getUsername()); + catalogProperties.put( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_JDBC_PASSWORD, + mySQLContainer.getPassword()); + catalogProperties.put( + PaimonPropertiesConstants.GRAVITINO_PAIMON_CATALOG_JDBC_DRIVER, "com.mysql.cj.jdbc.Driver"); + return catalogProperties; + } + + @Test + @Override + protected void testAlterSchema() { + String testDatabaseName = "t_alter"; + dropDatabaseIfExists(testDatabaseName); + sql( + "CREATE DATABASE " + + testDatabaseName + + " COMMENT 'db comment' WITH DBPROPERTIES (ID=001);"); + Map databaseMeta = getDatabaseMetadata(testDatabaseName); + Assertions.assertTrue(databaseMeta.get("Properties").contains("(ID,001)")); + Assertions.assertEquals("db comment", databaseMeta.get("Comment")); + + // The Paimon filesystem backend do not support alter database operation. + Assertions.assertThrows( + UnsupportedOperationException.class, + () -> + sql( + String.format( + "ALTER DATABASE %s SET DBPROPERTIES ('ID'='002')", testDatabaseName))); + } +} diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogJdbcBackendIT33.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogJdbcBackendIT33.java new file mode 100644 index 00000000000..1e1a7278817 --- /dev/null +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogJdbcBackendIT33.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.paimon; + +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark33; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class SparkPaimonCatalogJdbcBackendIT33 extends SparkPaimonCatalogJdbcBackendIT { + @Test + void testCatalogClassName() { + String catalogClass = + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); + Assertions.assertEquals(GravitinoPaimonCatalogSpark33.class.getName(), catalogClass); + } +} From c665d8b5f6cf4bc30a97c6e1da1b565eac494c53 Mon Sep 17 00:00:00 2001 From: caican Date: Wed, 4 Dec 2024 21:40:43 +0800 Subject: [PATCH 18/25] support metadata columns --- .../test/paimon/SparkPaimonCatalogIT.java | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java index 335a1f346d9..9858ad0d222 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Map; import org.apache.gravitino.spark.connector.integration.test.SparkCommonIT; +import org.apache.gravitino.spark.connector.integration.test.util.SparkMetadataColumnInfo; import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfo; import org.apache.gravitino.spark.connector.integration.test.util.SparkTableInfoChecker; import org.apache.gravitino.spark.connector.paimon.PaimonPropertiesConstants; @@ -30,6 +31,9 @@ import org.apache.spark.sql.types.DataTypes; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; public abstract class SparkPaimonCatalogIT extends SparkCommonIT { @@ -111,6 +115,88 @@ void testPaimonPartitionManagement() { testPaimonListAndDropPartition(); } + @ParameterizedTest + @ValueSource(booleans = {true, false}) + void testPaimonMetadataColumns(boolean isPartitioned) { + testMetadataColumns(); + testFilePathMetadataColumn(isPartitioned); + testRowIndexMetadataColumn(isPartitioned); + } + + private void testMetadataColumns() { + String tableName = "test_metadata_columns"; + dropTableIfExists(tableName); + String createTableSQL = getCreatePaimonSimpleTableString(tableName); + createTableSQL = createTableSQL + " PARTITIONED BY (name);"; + sql(createTableSQL); + + SparkTableInfo tableInfo = getTableInfo(tableName); + + SparkMetadataColumnInfo[] metadataColumns = getPaimonMetadataColumns(); + SparkTableInfoChecker checker = + SparkTableInfoChecker.create() + .withName(tableName) + .withColumns(getPaimonSimpleTableColumn()) + .withMetadataColumns(metadataColumns); + checker.check(tableInfo); + } + + private void testFilePathMetadataColumn(boolean isPartitioned) { + String tableName = "test_file_path_metadata_column"; + dropTableIfExists(tableName); + String createTableSQL = getCreatePaimonSimpleTableString(tableName); + if (isPartitioned) { + createTableSQL = createTableSQL + " PARTITIONED BY (name);"; + } + sql(createTableSQL); + + SparkTableInfo tableInfo = getTableInfo(tableName); + + SparkMetadataColumnInfo[] metadataColumns = getPaimonMetadataColumns(); + SparkTableInfoChecker checker = + SparkTableInfoChecker.create() + .withName(tableName) + .withColumns(getPaimonSimpleTableColumn()) + .withMetadataColumns(metadataColumns); + checker.check(tableInfo); + + String insertData = String.format("INSERT into %s values(2,'a', 'beijing');", tableName); + sql(insertData); + + String getMetadataSQL = String.format("SELECT __paimon_file_path FROM %s", tableName); + List queryResult = getTableMetadata(getMetadataSQL); + Assertions.assertEquals(1, queryResult.size()); + Assertions.assertTrue(queryResult.get(0).contains(tableName)); + } + + private void testRowIndexMetadataColumn(boolean isPartitioned) { + String tableName = "test_row_index_metadata_column"; + dropTableIfExists(tableName); + String createTableSQL = getCreatePaimonSimpleTableString(tableName); + if (isPartitioned) { + createTableSQL = createTableSQL + " PARTITIONED BY (name);"; + } + sql(createTableSQL); + + SparkTableInfo tableInfo = getTableInfo(tableName); + + SparkMetadataColumnInfo[] metadataColumns = getPaimonMetadataColumns(); + SparkTableInfoChecker checker = + SparkTableInfoChecker.create() + .withName(tableName) + .withColumns(getPaimonSimpleTableColumn()) + .withMetadataColumns(metadataColumns); + checker.check(tableInfo); + + String insertData = String.format("INSERT into %s values(2,'a', 'beijing');", tableName); + sql(insertData); + + String getMetadataSQL = String.format("SELECT __paimon_row_index FROM %s", tableName); + List queryResult = getTableMetadata(getMetadataSQL); + Assertions.assertEquals(1, queryResult.size()); + Assertions.assertEquals(0, Integer.parseInt(queryResult.get(0))); + } + private void testPaimonListAndDropPartition() { String tableName = "test_paimon_drop_partition"; dropTableIfExists(tableName); @@ -148,4 +234,12 @@ private List getPaimonSimpleTableColumn() { SparkTableInfo.SparkColumnInfo.of("name", DataTypes.StringType, ""), SparkTableInfo.SparkColumnInfo.of("address", DataTypes.StringType, "")); } + + private SparkMetadataColumnInfo[] getPaimonMetadataColumns() { + return new SparkMetadataColumnInfo[] { + new SparkMetadataColumnInfo("__paimon_file_path", DataTypes.StringType, true), + new SparkMetadataColumnInfo( + "__paimon_row_index", DataTypes.LongType, true) + }; + } } From d0d0e6d80829de82f128a1f17d59b8f9d9fabace Mon Sep 17 00:00:00 2001 From: caican Date: Wed, 4 Dec 2024 21:43:08 +0800 Subject: [PATCH 19/25] support metadata columns --- .../test/paimon/SparkPaimonCatalogIT.java | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java index 9858ad0d222..d5854c57650 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java @@ -32,7 +32,6 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; public abstract class SparkPaimonCatalogIT extends SparkCommonIT { @@ -134,10 +133,10 @@ private void testMetadataColumns() { SparkMetadataColumnInfo[] metadataColumns = getPaimonMetadataColumns(); SparkTableInfoChecker checker = - SparkTableInfoChecker.create() - .withName(tableName) - .withColumns(getPaimonSimpleTableColumn()) - .withMetadataColumns(metadataColumns); + SparkTableInfoChecker.create() + .withName(tableName) + .withColumns(getPaimonSimpleTableColumn()) + .withMetadataColumns(metadataColumns); checker.check(tableInfo); } @@ -154,10 +153,10 @@ private void testFilePathMetadataColumn(boolean isPartitioned) { SparkMetadataColumnInfo[] metadataColumns = getPaimonMetadataColumns(); SparkTableInfoChecker checker = - SparkTableInfoChecker.create() - .withName(tableName) - .withColumns(getPaimonSimpleTableColumn()) - .withMetadataColumns(metadataColumns); + SparkTableInfoChecker.create() + .withName(tableName) + .withColumns(getPaimonSimpleTableColumn()) + .withMetadataColumns(metadataColumns); checker.check(tableInfo); String insertData = String.format("INSERT into %s values(2,'a', 'beijing');", tableName); @@ -182,10 +181,10 @@ private void testRowIndexMetadataColumn(boolean isPartitioned) { SparkMetadataColumnInfo[] metadataColumns = getPaimonMetadataColumns(); SparkTableInfoChecker checker = - SparkTableInfoChecker.create() - .withName(tableName) - .withColumns(getPaimonSimpleTableColumn()) - .withMetadataColumns(metadataColumns); + SparkTableInfoChecker.create() + .withName(tableName) + .withColumns(getPaimonSimpleTableColumn()) + .withMetadataColumns(metadataColumns); checker.check(tableInfo); String insertData = String.format("INSERT into %s values(2,'a', 'beijing');", tableName); @@ -237,9 +236,8 @@ private List getPaimonSimpleTableColumn() { private SparkMetadataColumnInfo[] getPaimonMetadataColumns() { return new SparkMetadataColumnInfo[] { - new SparkMetadataColumnInfo("__paimon_file_path", DataTypes.StringType, true), - new SparkMetadataColumnInfo( - "__paimon_row_index", DataTypes.LongType, true) + new SparkMetadataColumnInfo("__paimon_file_path", DataTypes.StringType, true), + new SparkMetadataColumnInfo("__paimon_row_index", DataTypes.LongType, true) }; } } From 659338f8cec46c675e8bc940a632ad803326e21a Mon Sep 17 00:00:00 2001 From: caican Date: Mon, 2 Dec 2024 20:10:18 +0800 Subject: [PATCH 20/25] fix --- spark-connector/v3.3/spark/build.gradle.kts | 12 ++++++++++-- .../integration/test/hive/SparkHiveCatalogIT33.java | 5 +---- .../iceberg/SparkIcebergCatalogHiveBackendIT33.java | 5 +---- .../SparkPaimonCatalogFilesystemBackendIT33.java | 5 +---- spark-connector/v3.4/spark/build.gradle.kts | 12 ++++++++++-- .../integration/test/hive/SparkHiveCatalogIT34.java | 5 +---- .../iceberg/SparkIcebergCatalogHiveBackendIT34.java | 5 +---- .../SparkPaimonCatalogFilesystemBackendIT34.java | 5 +---- spark-connector/v3.5/spark/build.gradle.kts | 12 ++++++++++-- .../integration/test/hive/SparkHiveCatalogIT35.java | 5 +---- .../iceberg/SparkIcebergCatalogHiveBackendIT35.java | 5 +---- .../SparkPaimonCatalogFilesystemBackendIT35.java | 5 +---- 12 files changed, 39 insertions(+), 42 deletions(-) diff --git a/spark-connector/v3.3/spark/build.gradle.kts b/spark-connector/v3.3/spark/build.gradle.kts index fcc2604f27a..66c65f863b9 100644 --- a/spark-connector/v3.3/spark/build.gradle.kts +++ b/spark-connector/v3.3/spark/build.gradle.kts @@ -44,7 +44,9 @@ dependencies { exclude("com.fasterxml.jackson") } compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") - compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") { + exclude("org.apache.spark") + } testImplementation(project(":api")) { exclude("org.apache.logging.log4j") @@ -124,7 +126,9 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") - testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") { + exclude("org.apache.spark") + } testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { @@ -137,6 +141,9 @@ dependencies { exclude("com.fasterxml.jackson.core") } testImplementation("org.scala-lang.modules:scala-collection-compat_$scalaVersion:$scalaCollectionCompatVersion") + testImplementation("org.apache.spark:spark-catalyst_$scalaVersion:$sparkVersion") + testImplementation("org.apache.spark:spark-core_$scalaVersion:$sparkVersion") + testImplementation("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") testRuntimeOnly(libs.junit.jupiter.engine) } @@ -155,6 +162,7 @@ tasks.test { dependsOn(":catalogs:catalog-lakehouse-iceberg:jar") dependsOn(":catalogs:catalog-hive:jar") dependsOn(":iceberg:iceberg-rest-server:jar") + dependsOn(":catalogs:catalog-lakehouse-paimon:jar") } } diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT33.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT33.java index cc0630a1902..74915c9cb81 100644 --- a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT33.java +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT33.java @@ -26,10 +26,7 @@ public class SparkHiveCatalogIT33 extends SparkHiveCatalogIT { @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoHiveCatalogSpark33.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT33.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT33.java index 737c3c90e70..ebe322c27c2 100644 --- a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT33.java +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT33.java @@ -26,10 +26,7 @@ public class SparkIcebergCatalogHiveBackendIT33 extends SparkIcebergCatalogHiveB @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoIcebergCatalogSpark33.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java index 839b959c777..2c2f486563f 100644 --- a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java @@ -26,10 +26,7 @@ public class SparkPaimonCatalogFilesystemBackendIT33 extends SparkPaimonCatalogF @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoPaimonCatalogSpark33.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.4/spark/build.gradle.kts b/spark-connector/v3.4/spark/build.gradle.kts index f046144e533..aa4134a3c71 100644 --- a/spark-connector/v3.4/spark/build.gradle.kts +++ b/spark-connector/v3.4/spark/build.gradle.kts @@ -45,7 +45,9 @@ dependencies { } compileOnly(project(":clients:client-java-runtime", configuration = "shadow")) compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") - compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") { + exclude("org.apache.spark") + } testImplementation(project(":api")) { exclude("org.apache.logging.log4j") @@ -124,7 +126,9 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") - testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") { + exclude("org.apache.spark") + } testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { @@ -137,6 +141,9 @@ dependencies { exclude("com.fasterxml.jackson.core") } testImplementation("org.scala-lang.modules:scala-collection-compat_$scalaVersion:$scalaCollectionCompatVersion") + testImplementation("org.apache.spark:spark-catalyst_$scalaVersion:$sparkVersion") + testImplementation("org.apache.spark:spark-core_$scalaVersion:$sparkVersion") + testImplementation("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") testRuntimeOnly(libs.junit.jupiter.engine) } @@ -155,6 +162,7 @@ tasks.test { dependsOn(":catalogs:catalog-lakehouse-iceberg:jar") dependsOn(":catalogs:catalog-hive:jar") dependsOn(":iceberg:iceberg-rest-server:jar") + dependsOn(":catalogs:catalog-lakehouse-paimon:jar") } } diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT34.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT34.java index 6e2f43cdea8..3bf4cde32a3 100644 --- a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT34.java +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT34.java @@ -26,10 +26,7 @@ public class SparkHiveCatalogIT34 extends SparkHiveCatalogIT { @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoHiveCatalogSpark34.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT34.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT34.java index 8a2b2177847..c602ff24ae9 100644 --- a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT34.java +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT34.java @@ -27,10 +27,7 @@ public class SparkIcebergCatalogHiveBackendIT34 extends SparkIcebergCatalogHiveB @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoIcebergCatalogSpark34.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java index d230707325c..9a0499a9c90 100644 --- a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java @@ -27,10 +27,7 @@ public class SparkPaimonCatalogFilesystemBackendIT34 extends SparkPaimonCatalogF @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoPaimonCatalogSpark34.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.5/spark/build.gradle.kts b/spark-connector/v3.5/spark/build.gradle.kts index 30bafbb1aaf..15aa018081d 100644 --- a/spark-connector/v3.5/spark/build.gradle.kts +++ b/spark-connector/v3.5/spark/build.gradle.kts @@ -46,7 +46,9 @@ dependencies { } compileOnly(project(":clients:client-java-runtime", configuration = "shadow")) compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") - compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") { + exclude("org.apache.spark") + } testImplementation(project(":api")) { exclude("org.apache.logging.log4j") @@ -126,7 +128,9 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") - testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") { + exclude("org.apache.spark") + } testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { @@ -139,6 +143,9 @@ dependencies { exclude("com.fasterxml.jackson.core") } testImplementation("org.scala-lang.modules:scala-collection-compat_$scalaVersion:$scalaCollectionCompatVersion") + testImplementation("org.apache.spark:spark-catalyst_$scalaVersion:$sparkVersion") + testImplementation("org.apache.spark:spark-core_$scalaVersion:$sparkVersion") + testImplementation("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") testRuntimeOnly(libs.junit.jupiter.engine) } @@ -157,6 +164,7 @@ tasks.test { dependsOn(":catalogs:catalog-lakehouse-iceberg:jar") dependsOn(":catalogs:catalog-hive:jar") dependsOn(":iceberg:iceberg-rest-server:jar") + dependsOn(":catalogs:catalog-lakehouse-paimon:jar") } } diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT35.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT35.java index aa59ac7ef46..c5236f88afc 100644 --- a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT35.java +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT35.java @@ -26,10 +26,7 @@ public class SparkHiveCatalogIT35 extends SparkHiveCatalogIT { @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoHiveCatalogSpark35.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT35.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT35.java index b15f9f7cdcf..80be4283fa3 100644 --- a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT35.java +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT35.java @@ -27,10 +27,7 @@ public class SparkIcebergCatalogHiveBackendIT35 extends SparkIcebergCatalogHiveB @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoIcebergCatalogSpark35.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java index b02f58f70bf..c3ba3d554e7 100644 --- a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java @@ -27,10 +27,7 @@ public class SparkPaimonCatalogFilesystemBackendIT35 extends SparkPaimonCatalogF @Test void testCatalogClassName() { String catalogClass = - getSparkSession() - .sessionState() - .conf() - .getConfString("spark.sql.catalog." + getCatalogName()); + getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoIcebergCatalogSpark35.class.getName(), catalogClass); } } From 069b748f03e2c2fd1bf116e85a149a53aa6e39d7 Mon Sep 17 00:00:00 2001 From: caican Date: Fri, 6 Dec 2024 11:47:17 +0800 Subject: [PATCH 21/25] fix --- .../test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java index c3ba3d554e7..f38b3dec99b 100644 --- a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java @@ -18,7 +18,7 @@ */ package org.apache.gravitino.spark.connector.integration.test.paimon; -import org.apache.gravitino.spark.connector.iceberg.GravitinoIcebergCatalogSpark35; +import org.apache.gravitino.spark.connector.paimon.GravitinoPaimonCatalogSpark35; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -28,6 +28,6 @@ public class SparkPaimonCatalogFilesystemBackendIT35 extends SparkPaimonCatalogF void testCatalogClassName() { String catalogClass = getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); - Assertions.assertEquals(GravitinoIcebergCatalogSpark35.class.getName(), catalogClass); + Assertions.assertEquals(GravitinoPaimonCatalogSpark35.class.getName(), catalogClass); } } From f0bfd602098054b1df2880630bf7a34d9e190deb Mon Sep 17 00:00:00 2001 From: caican Date: Mon, 9 Dec 2024 23:01:53 +0800 Subject: [PATCH 22/25] fix --- spark-connector/spark-common/build.gradle.kts | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/spark-connector/spark-common/build.gradle.kts b/spark-connector/spark-common/build.gradle.kts index dc0af57a00c..06e0077d21e 100644 --- a/spark-connector/spark-common/build.gradle.kts +++ b/spark-connector/spark-common/build.gradle.kts @@ -44,7 +44,9 @@ dependencies { compileOnly(project(":clients:client-java-runtime", configuration = "shadow")) compileOnly("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") compileOnly("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") - compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") + compileOnly("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") { + exclude("org.apache.spark") + } compileOnly("org.apache.spark:spark-catalyst_$scalaVersion:$sparkVersion") compileOnly("org.apache.spark:spark-core_$scalaVersion:$sparkVersion") @@ -116,7 +118,9 @@ dependencies { testImplementation("org.apache.iceberg:iceberg-core:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-hive-metastore:$icebergVersion") testImplementation("org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_$scalaVersion:$icebergVersion") - testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") + testImplementation("org.apache.paimon:paimon-spark-$sparkMajorVersion:$paimonVersion") { + exclude("org.apache.spark") + } testImplementation("org.apache.kyuubi:kyuubi-spark-connector-hive_$scalaVersion:$kyuubiVersion") // include spark-sql,spark-catalyst,hive-common,hdfs-client testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") { @@ -126,6 +130,9 @@ dependencies { exclude("org.glassfish.jersey.inject") } testImplementation("org.scala-lang.modules:scala-collection-compat_$scalaVersion:$scalaCollectionCompatVersion") + testImplementation("org.apache.spark:spark-catalyst_$scalaVersion:$sparkVersion") + testImplementation("org.apache.spark:spark-core_$scalaVersion:$sparkVersion") + testImplementation("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") testRuntimeOnly(libs.junit.jupiter.engine) } From 22de28ee3ab7b3be1b46a4ff7da74077aaac7653 Mon Sep 17 00:00:00 2001 From: caican Date: Mon, 9 Dec 2024 23:15:17 +0800 Subject: [PATCH 23/25] fix --- .../spark/connector/integration/test/SparkCommonIT.java | 1 + 1 file changed, 1 insertion(+) diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java index 40fd4f20a5b..7c490892bce 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java @@ -242,6 +242,7 @@ protected void testAlterSchema() { @Test void testDropSchema() { String testDatabaseName = "t_drop"; + dropDatabaseIfExists(testDatabaseName); Set databases = getDatabases(); Assertions.assertFalse(databases.contains(testDatabaseName)); From f8220e5a81127781152a79503f0092eb8a0920e5 Mon Sep 17 00:00:00 2001 From: caican Date: Mon, 9 Dec 2024 23:25:08 +0800 Subject: [PATCH 24/25] fix --- docs/lakehouse-paimon-catalog.md | 35 ++++++++-------- .../connector/paimon/SparkPaimonTable.java | 4 -- .../integration/test/SparkCommonIT.java | 40 +++++++++---------- .../test/paimon/SparkPaimonCatalogIT.java | 2 +- .../integration/test/util/SparkUtilIT.java | 9 ++++- .../test/hive/SparkHiveCatalogIT33.java | 5 ++- .../SparkIcebergCatalogHiveBackendIT33.java | 5 ++- ...arkPaimonCatalogFilesystemBackendIT33.java | 5 ++- .../test/hive/SparkHiveCatalogIT34.java | 5 ++- .../SparkIcebergCatalogHiveBackendIT34.java | 5 ++- ...arkPaimonCatalogFilesystemBackendIT34.java | 5 ++- .../test/hive/SparkHiveCatalogIT35.java | 5 ++- .../SparkIcebergCatalogHiveBackendIT35.java | 5 ++- ...arkPaimonCatalogFilesystemBackendIT35.java | 5 ++- 14 files changed, 82 insertions(+), 53 deletions(-) diff --git a/docs/lakehouse-paimon-catalog.md b/docs/lakehouse-paimon-catalog.md index d53ad482766..b67fe37db39 100644 --- a/docs/lakehouse-paimon-catalog.md +++ b/docs/lakehouse-paimon-catalog.md @@ -29,23 +29,24 @@ Builds with Apache Paimon `0.8.0`. ### Catalog properties -| Property name | Description | Default value | Required | Since Version | -|----------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------| -| `catalog-backend` | Catalog backend of Gravitino Paimon catalog. Supports `filesystem`, `jdbc` and `hive`. | (none) | Yes | 0.6.0-incubating | -| `uri` | The URI configuration of the Paimon catalog. `thrift://127.0.0.1:9083` or `jdbc:postgresql://127.0.0.1:5432/db_name` or `jdbc:mysql://127.0.0.1:3306/metastore_db`. It is optional for `FilesystemCatalog`. | (none) | required if the value of `catalog-backend` is not `filesystem`. | 0.6.0-incubating | -| `warehouse` | Warehouse directory of catalog. `file:///user/hive/warehouse-paimon/` for local fs, `hdfs://namespace/hdfs/path` for HDFS , `s3://{bucket-name}/path/` for S3 or `oss://{bucket-name}/path` for Aliyun OSS | (none) | Yes | 0.6.0-incubating | -| `authentication.type` | The type of authentication for Paimon catalog backend, currently Gravitino only supports `Kerberos` and `simple`. | `simple` | No | 0.6.0-incubating | -| `hive.metastore.sasl.enabled` | Whether to enable SASL authentication protocol when connect to Kerberos Hive metastore. This is a raw Hive configuration | `false` | No, This value should be true in most case(Some will use SSL protocol, but it rather rare) if the value of `gravitino.iceberg-rest.authentication.type` is Kerberos. | 0.6.0-incubating | -| `authentication.kerberos.principal` | The principal of the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0-incubating | -| `authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0-incubating | -| `authentication.kerberos.check-interval-sec` | The check interval of Kerberos credential for Paimon catalog. | 60 | No | 0.6.0-incubating | -| `authentication.kerberos.keytab-fetch-timeout-sec` | The fetch timeout of retrieving Kerberos keytab from `authentication.kerberos.keytab-uri`. | 60 | No | 0.6.0-incubating | -| `oss-endpoint` | The endpoint of the Aliyun OSS. | (none) | required if the value of `warehouse` is a OSS path | 0.7.0-incubating | -| `oss-access-key-id` | The access key of the Aliyun OSS. | (none) | required if the value of `warehouse` is a OSS path | 0.7.0-incubating | -| `oss-accesss-key-secret` | The secret key the Aliyun OSS. | (none) | required if the value of `warehouse` is a OSS path | 0.7.0-incubating | -| `s3-endpoint` | The endpoint of the AWS S3. | (none) | required if the value of `warehouse` is a S3 path | 0.7.0-incubating | -| `s3-access-key-id` | The access key of the AWS S3. | (none) | required if the value of `warehouse` is a S3 path | 0.7.0-incubating | -| `s3-secret-access-key` | The secret key of the AWS S3. | (none) | required if the value of `warehouse` is a S3 path | 0.7.0-incubating | +| Property name | Description | Default value | Required | Since Version | +|----------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------| +| `catalog-backend` | Catalog backend of Gravitino Paimon catalog. Supports `filesystem`, `jdbc` and `hive`. | (none) | Yes | 0.6.0-incubating | +| `uri` | The URI configuration of the Paimon catalog. `thrift://127.0.0.1:9083` or `jdbc:postgresql://127.0.0.1:5432/db_name` or `jdbc:mysql://127.0.0.1:3306/metastore_db`. It is optional for `FilesystemCatalog`. | (none) | required if the value of `catalog-backend` is not `filesystem`. | 0.6.0-incubating | +| `warehouse` | Warehouse directory of catalog. `file:///user/hive/warehouse-paimon/` for local fs, `hdfs://namespace/hdfs/path` for HDFS , `s3://{bucket-name}/path/` for S3 or `oss://{bucket-name}/path` for Aliyun OSS | (none) | Yes | 0.6.0-incubating | +| `catalog-backend-name` | The catalog name passed to underlying Paimon catalog backend. | The property value of `catalog-backend`, like `jdbc` for JDBC catalog backend. | No | 0.8.0-incubating | +| `authentication.type` | The type of authentication for Paimon catalog backend, currently Gravitino only supports `Kerberos` and `simple`. | `simple` | No | 0.6.0-incubating | +| `hive.metastore.sasl.enabled` | Whether to enable SASL authentication protocol when connect to Kerberos Hive metastore. This is a raw Hive configuration | `false` | No, This value should be true in most case(Some will use SSL protocol, but it rather rare) if the value of `gravitino.iceberg-rest.authentication.type` is Kerberos. | 0.6.0-incubating | +| `authentication.kerberos.principal` | The principal of the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0-incubating | +| `authentication.kerberos.keytab-uri` | The URI of The keytab for the Kerberos authentication. | (none) | required if the value of `authentication.type` is Kerberos. | 0.6.0-incubating | +| `authentication.kerberos.check-interval-sec` | The check interval of Kerberos credential for Paimon catalog. | 60 | No | 0.6.0-incubating | +| `authentication.kerberos.keytab-fetch-timeout-sec` | The fetch timeout of retrieving Kerberos keytab from `authentication.kerberos.keytab-uri`. | 60 | No | 0.6.0-incubating | +| `oss-endpoint` | The endpoint of the Aliyun OSS. | (none) | required if the value of `warehouse` is a OSS path | 0.7.0-incubating | +| `oss-access-key-id` | The access key of the Aliyun OSS. | (none) | required if the value of `warehouse` is a OSS path | 0.7.0-incubating | +| `oss-accesss-key-secret` | The secret key the Aliyun OSS. | (none) | required if the value of `warehouse` is a OSS path | 0.7.0-incubating | +| `s3-endpoint` | The endpoint of the AWS S3. | (none) | required if the value of `warehouse` is a S3 path | 0.7.0-incubating | +| `s3-access-key-id` | The access key of the AWS S3. | (none) | required if the value of `warehouse` is a S3 path | 0.7.0-incubating | +| `s3-secret-access-key` | The secret key of the AWS S3. | (none) | required if the value of `warehouse` is a S3 path | 0.7.0-incubating | :::note If you want to use the `oss` or `s3` warehouse, you need to place related jars in the `catalogs/lakehouse-paimon/lib` directory, more information can be found in the [Paimon S3](https://paimon.apache.org/docs/master/filesystems/s3/). diff --git a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/SparkPaimonTable.java b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/SparkPaimonTable.java index 557cf13bec6..f1db29b71bc 100644 --- a/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/SparkPaimonTable.java +++ b/spark-connector/spark-common/src/main/java/org/apache/gravitino/spark/connector/paimon/SparkPaimonTable.java @@ -32,10 +32,6 @@ import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.util.CaseInsensitiveStringMap; -/** - * For spark-connector in Paimon, it explicitly uses SparkTable to identify whether it is an Apache - * Paimon table, so the SparkPaimonTable must extend SparkTable. - */ public class SparkPaimonTable extends SparkTable { private GravitinoTableInfoHelper gravitinoTableInfoHelper; diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java index 7c490892bce..c7517a3bf82 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/SparkCommonIT.java @@ -148,7 +148,7 @@ void initDefaultDatabase() throws IOException { throw e; } sql("USE " + getCatalogName()); - createDatabaseIfNotExists(getDefaultDatabase()); + createDatabaseIfNotExists(getDefaultDatabase(), getProvider()); } @BeforeEach @@ -170,7 +170,7 @@ void cleanUp() { } @Test - protected void testListTables() { + void testListTables() { String tableName = "t_list"; dropTableIfExists(tableName); Set tableNames = listTableNames(); @@ -280,7 +280,7 @@ void testCreateTableWithDatabase() { // test db.table as table identifier String databaseName = "db1"; String tableName = "table1"; - createDatabaseIfNotExists(databaseName); + createDatabaseIfNotExists(databaseName, getProvider()); String tableIdentifier = String.join(".", databaseName, tableName); dropTableIfExists(tableIdentifier); @@ -294,7 +294,7 @@ void testCreateTableWithDatabase() { // use db then create table with table name databaseName = "db2"; tableName = "table2"; - createDatabaseIfNotExists(databaseName); + createDatabaseIfNotExists(databaseName, getProvider()); sql("USE " + databaseName); dropTableIfExists(tableName); @@ -327,7 +327,7 @@ void testCreateTableWithComment() { } @Test - protected void testDropTable() { + void testDropTable() { String tableName = "drop_table"; createSimpleTable(tableName); Assertions.assertEquals(true, tableExists(tableName)); @@ -340,7 +340,7 @@ protected void testDropTable() { } @Test - protected void testRenameTable() { + void testRenameTable() { String tableName = "rename1"; String newTableName = "rename2"; dropTableIfExists(tableName); @@ -367,7 +367,7 @@ protected void testRenameTable() { } @Test - protected void testListTable() { + void testListTable() { String table1 = "list1"; String table2 = "list2"; dropTableIfExists(table1); @@ -382,7 +382,7 @@ protected void testListTable() { String database = "db_list"; String table3 = "list3"; String table4 = "list4"; - createDatabaseIfNotExists(database); + createDatabaseIfNotExists(database, getProvider()); dropTableIfExists(String.join(".", database, table3)); dropTableIfExists(String.join(".", database, table4)); createSimpleTable(String.join(".", database, table3)); @@ -396,7 +396,7 @@ protected void testListTable() { } @Test - protected void testAlterTableSetAndRemoveProperty() { + void testAlterTableSetAndRemoveProperty() { String tableName = "test_property"; dropTableIfExists(tableName); @@ -414,7 +414,7 @@ protected void testAlterTableSetAndRemoveProperty() { } @Test - protected void testAlterTableUpdateComment() { + void testAlterTableUpdateComment() { String tableName = "test_comment"; String comment = "comment1"; dropTableIfExists(tableName); @@ -431,7 +431,7 @@ protected void testAlterTableUpdateComment() { } @Test - protected void testAlterTableAddAndDeleteColumn() { + void testAlterTableAddAndDeleteColumn() { String tableName = "test_column"; dropTableIfExists(tableName); @@ -450,7 +450,7 @@ protected void testAlterTableAddAndDeleteColumn() { } @Test - protected void testAlterTableUpdateColumnType() { + void testAlterTableUpdateColumnType() { String tableName = "test_column_type"; dropTableIfExists(tableName); @@ -467,7 +467,7 @@ protected void testAlterTableUpdateColumnType() { } @Test - protected void testAlterTableRenameColumn() { + void testAlterTableRenameColumn() { String tableName = "test_rename_column"; dropTableIfExists(tableName); List simpleTableColumns = getSimpleTableColumn(); @@ -487,7 +487,7 @@ protected void testAlterTableRenameColumn() { } @Test - protected void testUpdateColumnPosition() { + void testUpdateColumnPosition() { String tableName = "test_column_position"; dropTableIfExists(tableName); @@ -530,7 +530,7 @@ protected void testUpdateColumnPosition() { } @Test - protected void testAlterTableUpdateColumnComment() { + void testAlterTableUpdateColumnComment() { String tableName = "test_update_column_comment"; dropTableIfExists(tableName); List simpleTableColumns = getSimpleTableColumn(); @@ -593,7 +593,7 @@ protected void testAlterTableReplaceColumns() { } @Test - protected void testComplexType() { + void testComplexType() { String tableName = "complex_type_table"; dropTableIfExists(tableName); @@ -682,7 +682,7 @@ void testCreateSortBucketTable() { // Spark CTAS doesn't copy table properties and partition schema from source table. @Test - protected void testCreateTableAsSelect() { + void testCreateTableAsSelect() { String tableName = "ctas_table"; dropTableIfExists(tableName); createSimpleTable(tableName); @@ -704,7 +704,7 @@ protected void testCreateTableAsSelect() { } @Test - protected void testInsertTableAsSelect() { + void testInsertTableAsSelect() { String tableName = "insert_select_table"; String newTableName = "new_" + tableName; @@ -797,7 +797,7 @@ protected void deleteDirIfExists(String path) { } @Test - protected void testTableOptions() { + void testTableOptions() { String tableName = "options_table"; dropTableIfExists(tableName); String createTableSql = getCreateSimpleTableString(tableName); @@ -814,7 +814,7 @@ protected void testTableOptions() { } @Test - protected void testDropAndWriteTable() { + void testDropAndWriteTable() { String tableName = "drop_then_create_write_table"; createSimpleTable(tableName); diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java index 7dbc2a3d88f..c77a4642eec 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogIT.java @@ -66,7 +66,7 @@ protected boolean supportsSchemaEvolution() { @Override protected boolean supportsReplaceColumns() { // Paimon doesn't support replace columns, because it doesn't support drop all fields in table. - // And `ALTER TABLE REPLACE COLUMNS` statement will removes all existing columns at first and + // And `ALTER TABLE REPLACE COLUMNS` statement will remove all existing columns at first and // then adds the new set of columns. return false; } diff --git a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkUtilIT.java b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkUtilIT.java index 12077338a2a..ed7d2085ffd 100644 --- a/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkUtilIT.java +++ b/spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/util/SparkUtilIT.java @@ -74,8 +74,13 @@ protected void dropDatabaseIfExists(String database) { // Specify Location explicitly because the default location is local HDFS, Spark will expand the // location to HDFS. - protected void createDatabaseIfNotExists(String database) { - sql(String.format("CREATE DATABASE IF NOT EXISTS %s", database)); + // However, Paimon does not support create a database with a specified location. + protected void createDatabaseIfNotExists(String database, String provider) { + String locationClause = + "lakehouse-paimon".equalsIgnoreCase(provider) + ? "" + : String.format("LOCATION '/user/hive/%s'", database); + sql(String.format("CREATE DATABASE IF NOT EXISTS %s %s", database, locationClause)); } protected Map getDatabaseMetadata(String database) { diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT33.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT33.java index 74915c9cb81..cc0630a1902 100644 --- a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT33.java +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT33.java @@ -26,7 +26,10 @@ public class SparkHiveCatalogIT33 extends SparkHiveCatalogIT { @Test void testCatalogClassName() { String catalogClass = - getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoHiveCatalogSpark33.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT33.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT33.java index ebe322c27c2..737c3c90e70 100644 --- a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT33.java +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT33.java @@ -26,7 +26,10 @@ public class SparkIcebergCatalogHiveBackendIT33 extends SparkIcebergCatalogHiveB @Test void testCatalogClassName() { String catalogClass = - getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoIcebergCatalogSpark33.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java index 2c2f486563f..839b959c777 100644 --- a/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java +++ b/spark-connector/v3.3/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT33.java @@ -26,7 +26,10 @@ public class SparkPaimonCatalogFilesystemBackendIT33 extends SparkPaimonCatalogF @Test void testCatalogClassName() { String catalogClass = - getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoPaimonCatalogSpark33.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT34.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT34.java index 3bf4cde32a3..6e2f43cdea8 100644 --- a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT34.java +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT34.java @@ -26,7 +26,10 @@ public class SparkHiveCatalogIT34 extends SparkHiveCatalogIT { @Test void testCatalogClassName() { String catalogClass = - getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoHiveCatalogSpark34.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT34.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT34.java index c602ff24ae9..8a2b2177847 100644 --- a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT34.java +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT34.java @@ -27,7 +27,10 @@ public class SparkIcebergCatalogHiveBackendIT34 extends SparkIcebergCatalogHiveB @Test void testCatalogClassName() { String catalogClass = - getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoIcebergCatalogSpark34.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java index 9a0499a9c90..d230707325c 100644 --- a/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java +++ b/spark-connector/v3.4/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT34.java @@ -27,7 +27,10 @@ public class SparkPaimonCatalogFilesystemBackendIT34 extends SparkPaimonCatalogF @Test void testCatalogClassName() { String catalogClass = - getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoPaimonCatalogSpark34.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT35.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT35.java index c5236f88afc..aa59ac7ef46 100644 --- a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT35.java +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/hive/SparkHiveCatalogIT35.java @@ -26,7 +26,10 @@ public class SparkHiveCatalogIT35 extends SparkHiveCatalogIT { @Test void testCatalogClassName() { String catalogClass = - getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoHiveCatalogSpark35.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT35.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT35.java index 80be4283fa3..b15f9f7cdcf 100644 --- a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT35.java +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/iceberg/SparkIcebergCatalogHiveBackendIT35.java @@ -27,7 +27,10 @@ public class SparkIcebergCatalogHiveBackendIT35 extends SparkIcebergCatalogHiveB @Test void testCatalogClassName() { String catalogClass = - getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoIcebergCatalogSpark35.class.getName(), catalogClass); } } diff --git a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java index f38b3dec99b..44281c76ef0 100644 --- a/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java +++ b/spark-connector/v3.5/spark/src/test/java/org/apache/gravitino/spark/connector/integration/test/paimon/SparkPaimonCatalogFilesystemBackendIT35.java @@ -27,7 +27,10 @@ public class SparkPaimonCatalogFilesystemBackendIT35 extends SparkPaimonCatalogF @Test void testCatalogClassName() { String catalogClass = - getSparkSession().sparkContext().conf().get("spark.sql.catalog." + getCatalogName()); + getSparkSession() + .sessionState() + .conf() + .getConfString("spark.sql.catalog." + getCatalogName()); Assertions.assertEquals(GravitinoPaimonCatalogSpark35.class.getName(), catalogClass); } } From 5217b818d1457414c20b96e1496e48e3aaa8f9f2 Mon Sep 17 00:00:00 2001 From: caican Date: Sun, 15 Dec 2024 20:02:09 +0800 Subject: [PATCH 25/25] fix --- gradle/libs.versions.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 9817ca83fc8..88636c5a52e 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -53,7 +53,7 @@ caffeine = "2.9.3" rocksdbjni = "7.10.2" iceberg = '1.5.2' # used for Gravitino Iceberg catalog and Iceberg REST service iceberg4spark = "1.4.1" # used for compile spark connector -paimon = '0.9.0' +paimon = '0.8.0' spark33 = "3.3.4" spark34 = "3.4.3" spark35 = "3.5.1"