From 057b4c772c0de07b84d65fd76530f228538fbc27 Mon Sep 17 00:00:00 2001 From: cai can <94670132+caican00@users.noreply.github.com> Date: Tue, 19 Mar 2024 21:09:06 +0800 Subject: [PATCH 01/11] [#2566] Improvement(spark-connector): Refactoring integration tests for spark-connector (#2578) ### What changes were proposed in this pull request? 1. Make SparkIT to SparkCommonIT which contains the common tests shared by all catalogs. 2. Add new SparkHiveCatalogIT to test Hive specific tests, and both SparkXXCatalogIT extends SparkCommonIT. ### Why are the changes needed? Separate integration testing for different data sources. Fix: https://github.com/datastrato/gravitino/issues/2566 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Integration tests after separation. --- .../{SparkIT.java => SparkCommonIT.java} | 21 +++++++--------- .../integration/test/spark/SparkEnvIT.java | 11 +++++---- .../test/spark/hive/SparkHiveCatalogIT.java | 24 +++++++++++++++++++ 3 files changed, 39 insertions(+), 17 deletions(-) rename integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/{SparkIT.java => SparkCommonIT.java} (97%) create mode 100644 integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java similarity index 97% rename from integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkIT.java rename to integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java index 2cfd9a511d9..6b735affd69 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkCommonIT.java @@ -22,15 +22,10 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestInstance; -import org.junit.jupiter.api.TestInstance.Lifecycle; import org.junit.platform.commons.util.StringUtils; -@Tag("gravitino-docker-it") -@TestInstance(Lifecycle.PER_CLASS) -public class SparkIT extends SparkEnvIT { +public abstract class SparkCommonIT extends SparkEnvIT { private static String getSelectAllSql(String tableName) { return String.format("SELECT * FROM %s", tableName); } @@ -56,32 +51,32 @@ private static String getInsertWithoutPartitionSql(String tableName, String valu DataTypes.createStructField("col2", DataTypes.StringType, true))), "struct(1, 'a')"); - // Use a custom database not the original default database because SparkIT couldn't read&write - // data to tables in default database. The main reason is default database location is + // Use a custom database not the original default database because SparkCommonIT couldn't + // read&write data to tables in default database. The main reason is default database location is // determined by `hive.metastore.warehouse.dir` in hive-site.xml which is local HDFS address // not real HDFS address. The location of tables created under default database is like - // hdfs://localhost:9000/xxx which couldn't read write data from SparkIT. Will use default + // hdfs://localhost:9000/xxx which couldn't read write data from SparkCommonIT. Will use default // database after spark connector support Alter database xx set location command. @BeforeAll void initDefaultDatabase() { - sql("USE " + hiveCatalogName); + sql("USE " + getCatalogName()); createDatabaseIfNotExists(getDefaultDatabase()); } @BeforeEach void init() { - sql("USE " + hiveCatalogName); + sql("USE " + getCatalogName()); sql("USE " + getDefaultDatabase()); } - private String getDefaultDatabase() { + protected String getDefaultDatabase() { return "default_db"; } @Test void testLoadCatalogs() { Set catalogs = getCatalogs(); - Assertions.assertTrue(catalogs.contains(hiveCatalogName)); + Assertions.assertTrue(catalogs.contains(getCatalogName())); } @Test diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java index 0f7b0d385d5..b0b7fd895e6 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/SparkEnvIT.java @@ -24,17 +24,20 @@ import org.slf4j.LoggerFactory; /** Setup Hive, Gravitino, Spark, Metalake environment to execute SparkSQL. */ -public class SparkEnvIT extends SparkUtilIT { +public abstract class SparkEnvIT extends SparkUtilIT { private static final Logger LOG = LoggerFactory.getLogger(SparkEnvIT.class); private static final ContainerSuite containerSuite = ContainerSuite.getInstance(); - protected final String hiveCatalogName = "hive"; private final String metalakeName = "test"; private SparkSession sparkSession; private String hiveMetastoreUri; private String gravitinoUri; + protected abstract String getCatalogName(); + + protected abstract String getProvider(); + @Override protected SparkSession getSparkSession() { Assertions.assertNotNull(sparkSession); @@ -67,9 +70,9 @@ private void initMetalakeAndCatalogs() { properties.put(GravitinoSparkConfig.GRAVITINO_HIVE_METASTORE_URI, hiveMetastoreUri); metalake.createCatalog( - NameIdentifier.of(metalakeName, hiveCatalogName), + NameIdentifier.of(metalakeName, getCatalogName()), Catalog.Type.RELATIONAL, - "hive", + getProvider(), "", properties); } diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java new file mode 100644 index 00000000000..bce6cb212bf --- /dev/null +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/spark/hive/SparkHiveCatalogIT.java @@ -0,0 +1,24 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.integration.test.spark.hive; + +import com.datastrato.gravitino.integration.test.spark.SparkCommonIT; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.TestInstance; + +@Tag("gravitino-docker-it") +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +public class SparkHiveCatalogIT extends SparkCommonIT { + + @Override + protected String getCatalogName() { + return "hive"; + } + + @Override + protected String getProvider() { + return "hive"; + } +} From b53ff6bf2598e26dfd365c859e99da489b866e1d Mon Sep 17 00:00:00 2001 From: Qi Yu Date: Tue, 19 Mar 2024 22:06:00 +0800 Subject: [PATCH 02/11] [#2569] improvement(trino): Optimize char/varchar mapping for Iceberg catalog between Gravitino and Trino (#2579) ### What changes were proposed in this pull request? Changing the char/varchar conversion for Iceberg catalog between Gravitino and Trino. ### Why are the changes needed? We need to make type conversion more exactly. Fix: #2569 ### Does this PR introduce _any_ user-facing change? N/A. ### How was this patch tested? UT and ITs --- .../lakehouse-iceberg/00006_datatype.sql | 34 +++++------ .../lakehouse-iceberg/00006_datatype.txt | 8 +-- .../lakehouse-iceberg/00007_varchar.sql | 18 ++++++ .../lakehouse-iceberg/00007_varchar.txt | 21 +++++++ .../testsets/tpch/catalog_iceberg_prepare.sql | 58 +++++++++---------- .../iceberg/IcebergDataTypeTransformer.java | 21 ++++++- .../TestIcebergDataTypeTransformer.java | 41 +++++++++++++ 7 files changed, 146 insertions(+), 55 deletions(-) create mode 100644 integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00007_varchar.sql create mode 100644 integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00007_varchar.txt create mode 100644 trino-connector/src/test/java/com/datastrato/gravitino/trino/connector/catalog/iceberg/TestIcebergDataTypeTransformer.java diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00006_datatype.sql b/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00006_datatype.sql index 8e6e4f95a77..c3d7890550a 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00006_datatype.sql +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00006_datatype.sql @@ -4,8 +4,7 @@ USE "test.gt_iceberg".gt_db2; -- Unsupported Type: TINYINT, SMALLINT CREATE TABLE tb01 ( - f1 VARCHAR(200), - f2 CHAR(20), + f1 VARCHAR, f3 VARBINARY, f4 DECIMAL(10, 3), f5 REAL, @@ -21,17 +20,16 @@ CREATE TABLE tb01 ( SHOW CREATE TABLE tb01; -INSERT INTO tb01 (f1, f2, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15) -VALUES ('Sample text 1', 'Text1', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, 100000, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00'); +INSERT INTO tb01 (f1, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15) +VALUES ('Sample text 1', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, 100000, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00'); -INSERT INTO tb01 (f1, f2, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15) -VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +INSERT INTO tb01 (f1, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15) +VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); select * from tb01 order by f1; CREATE TABLE tb02 ( - f1 VARCHAR(200) not null , - f2 CHAR(20) not null , + f1 VARCHAR not null , f3 VARBINARY not null , f4 DECIMAL(10, 3) not null , f5 REAL not null , @@ -47,20 +45,20 @@ CREATE TABLE tb02 ( show create table tb02; -INSERT INTO tb02 (f1, f2, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15) -VALUES ('Sample text 1', 'Text1', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, 100000, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00'); +INSERT INTO tb02 (f1, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15) +VALUES ('Sample text 1', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, 100000, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00'); -INSERT INTO tb02 (f1, f2, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15) -VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); +INSERT INTO tb02 (f1, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15) +VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); -INSERT INTO tb02 (f1, f2, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15) -VALUES ('Sample text 1', NULL, x'65', 123.456, 7.89, 12.34, true, 1000, 1000, 100000, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00'); +INSERT INTO tb02 (f1, f3, f4, f5, f6, f10, f11, f12, f13, f14, f15) +VALUES ('Sample text 1', x'65', 123.456, 7.89, 12.34, 1000, 1000, 100000, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00'); -INSERT INTO tb02 (f1, f2, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15) -VALUES ('Sample text 1', 'same3', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, NULl, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00'); +INSERT INTO tb02 (f1, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15) +VALUES ('Sample text 1', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, NULL, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00'); -INSERT INTO tb02 (f1, f2, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15) -VALUES ('Sample text 1', 'same9', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, 1992382342, DATE '2024-01-01', NULL, TIMESTAMP '2024-01-01 08:00:00'); +INSERT INTO tb02 (f1, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15) +VALUES ('Sample text 1', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, 1992382342, DATE '2024-01-01', NULL, TIMESTAMP '2024-01-01 08:00:00'); drop table tb01; diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00006_datatype.txt b/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00006_datatype.txt index aa6b6901c5d..8ff0979aa96 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00006_datatype.txt +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00006_datatype.txt @@ -6,7 +6,6 @@ CREATE TABLE "CREATE TABLE ""test.gt_iceberg"".gt_db2.tb01 ( f1 varchar, - f2 varchar, f3 varbinary, f4 decimal(10, 3), f5 real, @@ -26,14 +25,13 @@ INSERT: 1 row INSERT: 1 row -"Sample text 1","Text1","65","123.456","7.89","12.34","true","1000","1000","100000","2024-01-01","08:00:00.000","2024-01-01 08:00:00.000" -"","","","","","","","","","","","","" +"Sample text 1","65","123.456","7.89","12.34","true","1000","1000","100000","2024-01-01","08:00:00.000","2024-01-01 08:00:00.000" +"","","","","","","","","","","","" CREATE TABLE "CREATE TABLE ""test.gt_iceberg"".gt_db2.tb02 ( f1 varchar NOT NULL, - f2 varchar NOT NULL, f3 varbinary NOT NULL, f4 decimal(10, 3) NOT NULL, f5 real NOT NULL, @@ -52,7 +50,7 @@ INSERT: 1 row NULL value not allowed for NOT NULL column: f1 - NULL value not allowed for NOT NULL column: f2 + NULL value not allowed for NOT NULL column: f7 NULL value not allowed for NOT NULL column: f12 diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00007_varchar.sql b/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00007_varchar.sql new file mode 100644 index 00000000000..91a3ea10906 --- /dev/null +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00007_varchar.sql @@ -0,0 +1,18 @@ +CREATE SCHEMA "test.gt_iceberg".varchar_db1; + +USE "test.gt_iceberg".varchar_db1; + +CREATE TABLE tb01 (id int, name char(20)); + +CREATE TABLE tb02 (id int, name char); + +CREATE TABLE tb03 (id int, name varchar(233)); + +CREATE TABLE tb04 (id int, name varchar); + +SHOW CREATE TABLE "test.gt_iceberg".varchar_db1.tb04; + +drop table "test.gt_iceberg".varchar_db1.tb04; + +drop schema "test.gt_iceberg".varchar_db1; + diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00007_varchar.txt b/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00007_varchar.txt new file mode 100644 index 00000000000..260e856e077 --- /dev/null +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/lakehouse-iceberg/00007_varchar.txt @@ -0,0 +1,21 @@ +CREATE SCHEMA + +USE + + Iceberg does not support the datatype CHAR + + Iceberg does not support the datatype CHAR + + Iceberg does not support the datatype VARCHAR with length + +CREATE TABLE + +"CREATE TABLE ""test.gt_iceberg"".varchar_db1.tb04 ( + id integer, + name varchar +) +COMMENT ''" + +DROP TABLE + +DROP SCHEMA diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/tpch/catalog_iceberg_prepare.sql b/integration-test/src/test/resources/trino-ci-testset/testsets/tpch/catalog_iceberg_prepare.sql index 1f5029a8297..dfd439612d3 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/tpch/catalog_iceberg_prepare.sql +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/tpch/catalog_iceberg_prepare.sql @@ -12,13 +12,13 @@ use "test.gt_iceberg2".gt_tpch2; CREATE TABLE customer ( custkey bigint, - name varchar(25), - address varchar(40), + name varchar, + address varchar, nationkey bigint, - phone varchar(15), + phone varchar, acctbal decimal(12, 2), - mktsegment varchar(10), - comment varchar(117) + mktsegment varchar, + comment varchar ); CREATE TABLE lineitem ( @@ -30,45 +30,45 @@ CREATE TABLE lineitem ( extendedprice decimal(12, 2), discount decimal(12, 2), tax decimal(12, 2), - returnflag varchar(1), - linestatus varchar(1), + returnflag varchar, + linestatus varchar, shipdate date, commitdate date, receiptdate date, - shipinstruct varchar(25), - shipmode varchar(10), - comment varchar(44) + shipinstruct varchar, + shipmode varchar, + comment varchar ); CREATE TABLE nation ( nationkey bigint, - name varchar(25), + name varchar, regionkey bigint, - comment varchar(152) + comment varchar ); CREATE TABLE orders ( orderkey bigint, custkey bigint, - orderstatus varchar(1), + orderstatus varchar, totalprice decimal(12, 2), orderdate date, - orderpriority varchar(15), - clerk varchar(15), + orderpriority varchar, + clerk varchar, shippriority integer, - comment varchar(79) + comment varchar ); CREATE TABLE part ( partkey bigint, - name varchar(55), - mfgr varchar(25), - brand varchar(10), - type varchar(25), + name varchar, + mfgr varchar, + brand varchar, + type varchar, size integer, - container varchar(10), + container varchar, retailprice decimal(12, 2), - comment varchar(23) + comment varchar ); CREATE TABLE partsupp ( @@ -76,23 +76,23 @@ CREATE TABLE partsupp ( suppkey bigint, availqty integer, supplycost decimal(12, 2), - comment varchar(199) + comment varchar ); CREATE TABLE region ( regionkey bigint, - name varchar(25), - comment varchar(152) + name varchar, + comment varchar ); CREATE TABLE supplier ( suppkey bigint, - name varchar(25), - address varchar(40), + name varchar, + address varchar, nationkey bigint, - phone varchar(15), + phone varchar, acctbal decimal(12, 2), - comment varchar(101) + comment varchar ); insert into customer select * from tpch.tiny.customer; diff --git a/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/iceberg/IcebergDataTypeTransformer.java b/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/iceberg/IcebergDataTypeTransformer.java index f6f815cda98..21180cd1cf8 100644 --- a/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/iceberg/IcebergDataTypeTransformer.java +++ b/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/iceberg/IcebergDataTypeTransformer.java @@ -8,19 +8,34 @@ import com.datastrato.gravitino.rel.types.Type; import com.datastrato.gravitino.rel.types.Type.Name; import com.datastrato.gravitino.rel.types.Types; +import com.datastrato.gravitino.trino.connector.GravitinoErrorCode; import com.datastrato.gravitino.trino.connector.util.GeneralDataTypeTransformer; +import io.trino.spi.TrinoException; import io.trino.spi.type.VarbinaryType; +import io.trino.spi.type.VarcharType; /** Type transformer between Iceberg and Trino */ public class IcebergDataTypeTransformer extends GeneralDataTypeTransformer { @Override public Type getGravitinoType(io.trino.spi.type.Type type) { - Type gravitinoType = super.getGravitinoType(type); - if (gravitinoType.name() == Name.VARCHAR || gravitinoType.name() == Name.FIXEDCHAR) { + Class typeClass = type.getClass(); + if (typeClass == io.trino.spi.type.CharType.class) { + throw new TrinoException( + GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT, + "Iceberg does not support the datatype CHAR"); + } else if (typeClass == io.trino.spi.type.VarcharType.class) { + VarcharType varCharType = (VarcharType) type; + if (varCharType.getLength().isPresent()) { + throw new TrinoException( + GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT, + "Iceberg does not support the datatype VARCHAR with length"); + } + return Types.StringType.get(); } - return gravitinoType; + + return super.getGravitinoType(type); } @Override diff --git a/trino-connector/src/test/java/com/datastrato/gravitino/trino/connector/catalog/iceberg/TestIcebergDataTypeTransformer.java b/trino-connector/src/test/java/com/datastrato/gravitino/trino/connector/catalog/iceberg/TestIcebergDataTypeTransformer.java new file mode 100644 index 00000000000..b69c5d0a31d --- /dev/null +++ b/trino-connector/src/test/java/com/datastrato/gravitino/trino/connector/catalog/iceberg/TestIcebergDataTypeTransformer.java @@ -0,0 +1,41 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ + +package com.datastrato.gravitino.trino.connector.catalog.iceberg; + +import com.datastrato.gravitino.rel.types.Types; +import com.datastrato.gravitino.trino.connector.util.GeneralDataTypeTransformer; +import io.trino.spi.TrinoException; +import io.trino.spi.type.VarcharType; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class TestIcebergDataTypeTransformer { + + @Test + public void testTrinoTypeToGravitinoType() { + GeneralDataTypeTransformer generalDataTypeTransformer = new IcebergDataTypeTransformer(); + io.trino.spi.type.Type charTypeWithLengthOne = io.trino.spi.type.CharType.createCharType(1); + + Exception e = + Assert.expectThrows( + TrinoException.class, + () -> generalDataTypeTransformer.getGravitinoType(charTypeWithLengthOne)); + Assert.assertTrue(e.getMessage().contains("Iceberg does not support the datatype CHAR")); + + io.trino.spi.type.Type varcharType = io.trino.spi.type.VarcharType.createVarcharType(1); + e = + Assert.expectThrows( + TrinoException.class, () -> generalDataTypeTransformer.getGravitinoType(varcharType)); + Assert.assertTrue( + e.getMessage().contains("Iceberg does not support the datatype VARCHAR with length")); + + io.trino.spi.type.Type varcharTypeWithoutLength = VarcharType.VARCHAR; + + Assert.assertEquals( + generalDataTypeTransformer.getGravitinoType(varcharTypeWithoutLength), + Types.StringType.get()); + } +} From fe3cf6a153c5b741261d92336a33ffe232f02879 Mon Sep 17 00:00:00 2001 From: teo Date: Mon, 11 Mar 2024 04:30:38 +0800 Subject: [PATCH 03/11] support hive dropPartition --- .../gravitino/rel/SupportsPartitions.java | 36 ++++- .../catalog/hive/HiveTableOperations.java | 84 +++++++++++- .../catalog/hive/TestHiveTableOperations.java | 123 ++++++++++++++++++ .../hive/integration/test/CatalogHiveIT.java | 63 +++++++++ .../gravitino/client/RelationalTable.java | 43 +++++- .../gravitino/client/TestRelationalTable.java | 56 ++++++++ .../dto/requests/DropPartitionsRequest.java | 47 +++++++ .../server/web/rest/PartitionOperations.java | 80 ++++++++++++ .../web/rest/TestPartitionOperations.java | 86 +++++++++++- 9 files changed, 607 insertions(+), 11 deletions(-) create mode 100644 common/src/main/java/com/datastrato/gravitino/dto/requests/DropPartitionsRequest.java diff --git a/api/src/main/java/com/datastrato/gravitino/rel/SupportsPartitions.java b/api/src/main/java/com/datastrato/gravitino/rel/SupportsPartitions.java index 277b1df1086..449d863f415 100644 --- a/api/src/main/java/com/datastrato/gravitino/rel/SupportsPartitions.java +++ b/api/src/main/java/com/datastrato/gravitino/rel/SupportsPartitions.java @@ -8,6 +8,7 @@ import com.datastrato.gravitino.exceptions.NoSuchPartitionException; import com.datastrato.gravitino.exceptions.PartitionAlreadyExistsException; import com.datastrato.gravitino.rel.partitions.Partition; +import java.util.List; /** Interface for tables that support partitions. */ @Evolving @@ -79,22 +80,49 @@ default boolean partitionExists(String partitionName) { /** * Drop a partition with specified name. * - * @param partitionName The identifier of the partition. - * @return true if a partition was deleted, false if the partition did not exist. + * @param partitionName the name of the partition + * @param ifExists If true, will not throw NoSuchPartitionException if the partition not exists + * @return true if a partition was deleted. + */ + boolean dropPartition(String partitionName, boolean ifExists) throws NoSuchPartitionException; + + /** + * Drop partitions with specified names. + * + * @param partitionNames the names of the partition + * @param ifExists If true, will not throw NoSuchPartitionException if the partition not exists + * @return true if all partitions was deleted. */ - boolean dropPartition(String partitionName); + boolean dropPartitions(List partitionNames, boolean ifExists) + throws NoSuchPartitionException, UnsupportedOperationException; /** * If the table supports purging, drop a partition with specified name and completely remove * partition data by skipping a trash. * * @param partitionName The name of the partition. + * @param ifExists If true, will not throw NoSuchPartitionException if the partition not exists * @return true if a partition was deleted, false if the partition did not exist. * @throws NoSuchPartitionException If the partition does not exist. * @throws UnsupportedOperationException If partition purging is not supported. */ - default boolean purgePartition(String partitionName) + default boolean purgePartition(String partitionName, boolean ifExists) throws NoSuchPartitionException, UnsupportedOperationException { throw new UnsupportedOperationException("Partition purging is not supported"); } + + /** + * If the table supports purging, drop partitions with specified names and completely remove + * partition data by skipping a trash. + * + * @param partitionNames The name of the partition. + * @param ifExists If true, will not throw NoSuchPartitionException if the partition not exists + * @return true if a partition was deleted, false if the partition did not exist. + * @throws NoSuchPartitionException If the partition does not exist. + * @throws UnsupportedOperationException If partition purging is not supported. + */ + default boolean purgePartitions(List partitionNames, boolean ifExists) + throws NoSuchPartitionException, UnsupportedOperationException { + throw new UnsupportedOperationException("Partitions purging is not supported"); + } } diff --git a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTableOperations.java b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTableOperations.java index 8c76f722f21..0c979b7927c 100644 --- a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTableOperations.java +++ b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTableOperations.java @@ -18,15 +18,19 @@ import com.google.common.base.Preconditions; import java.io.IOException; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.UnknownTableException; import org.apache.parquet.Strings; import org.apache.thrift.TException; @@ -215,8 +219,84 @@ private org.apache.hadoop.hive.metastore.api.Partition toHivePartition( } @Override - public boolean dropPartition(String partitionName) { - throw new UnsupportedOperationException(); + public boolean dropPartition(String partitionName, boolean ifExists) + throws NoSuchPartitionException { + try { + // check the partition exists + if (!ifExists) { + table + .clientPool() + .run(c -> c.getPartition(table.schemaName(), table.name(), partitionName)); + } + + // get all partitions that will be deleted + Table hiveTable = table.clientPool().run(c -> c.getTable(table.schemaName(), table.name())); + List partitions = + table + .clientPool() + .run( + c -> + c.listPartitions( + table.schemaName(), + table.name(), + getFilterPartitionList(hiveTable, partitionName), + (short) -1)); + + // delete partitions iteratively + for (org.apache.hadoop.hive.metastore.api.Partition partition : partitions) { + table + .clientPool() + .run( + c -> + c.dropPartition( + partition.getDbName(), + partition.getTableName(), + partition.getValues(), + false)); + } + } catch (UnknownTableException e) { + if (!ifExists) { + throw new NoSuchTableException( + e, "Hive table %s does not exist in Hive Metastore", table.name()); + } + + } catch (NoSuchObjectException e) { + if (!ifExists) { + throw new NoSuchPartitionException( + e, "Hive partition %s does not exist in Hive Metastore", partitionName); + } + + } catch (TException | InterruptedException e) { + throw new RuntimeException( + "Failed to get partition " + + partitionName + + " of table " + + table.name() + + "from Hive Metastore", + e); + } + return true; + } + + @Override + public boolean dropPartitions(List partitionNames, boolean ifExists) + throws NoSuchPartitionException, UnsupportedOperationException { + if (partitionNames.size() > 1) { + throw new UnsupportedOperationException("Only one partition is supported"); + } + return dropPartition(partitionNames.get(0), ifExists); + } + + private List getFilterPartitionList(Table dropTable, String partitionSpec) { + Map partMap = new HashMap<>(); + String[] parts = partitionSpec.split("/"); + for (String part : parts) { + String[] keyValue = part.split("="); + if (keyValue.length == 2) { + partMap.put(keyValue[0], keyValue[1]); + } + } + return MetaStoreUtils.getPvals(dropTable.getPartitionKeys(), partMap); } @Override diff --git a/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/hive/TestHiveTableOperations.java b/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/hive/TestHiveTableOperations.java index 928544ae13d..0c308f9293f 100644 --- a/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/hive/TestHiveTableOperations.java +++ b/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/hive/TestHiveTableOperations.java @@ -26,6 +26,8 @@ import com.google.common.collect.Maps; import java.time.LocalDate; import java.util.Arrays; +import java.util.Collections; +import java.util.List; import java.util.Map; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; @@ -156,4 +158,125 @@ public void testAddPartition() { .contains("Hive partition field name must be in table partitioning field names"), exception.getMessage()); } + + @Test + public void testDropPartition() { + // add partition: city=2/dt=2020-01-01 + String partitionName1 = "city=2/dt=2020-01-01"; + String[] fieldCity1 = new String[] {columns[1].name()}; + Literal valueCity1 = Literals.byteLiteral((byte) 2); + String[] fieldDt1 = new String[] {columns[2].name()}; + Literal valueDt1 = Literals.dateLiteral(LocalDate.parse("2020-01-01")); + Partition partition1 = + Partitions.identity( + new String[][] {fieldCity1, fieldDt1}, new Literal[] {valueCity1, valueDt1}); + hiveTable.supportPartitions().addPartition(partition1); + + // add partition: city=3/dt=2020-01-01 + String partitionName2 = "city=3/dt=2020-01-01"; + String[] fieldCity2 = new String[] {columns[1].name()}; + Literal valueCity2 = Literals.byteLiteral((byte) 3); + String[] fieldDt2 = new String[] {columns[2].name()}; + Literal valueDt2 = Literals.dateLiteral(LocalDate.parse("2020-01-01")); + Partition partition2 = + Partitions.identity( + new String[][] {fieldCity2, fieldDt2}, new Literal[] {valueCity2, valueDt2}); + hiveTable.supportPartitions().addPartition(partition2); + + // add partition: city=3/dt=2020-01-02 + String partitionName3 = "city=3/dt=2020-01-02"; + String[] fieldCity3 = new String[] {columns[1].name()}; + Literal valueCity3 = Literals.byteLiteral((byte) 3); + String[] fieldDt3 = new String[] {columns[2].name()}; + Literal valueDt3 = Literals.dateLiteral(LocalDate.parse("2020-01-02")); + Partition partition3 = + Partitions.identity( + new String[][] {fieldCity3, fieldDt3}, new Literal[] {valueCity3, valueDt3}); + hiveTable.supportPartitions().addPartition(partition3); + + // test drop one partition: city=2/dt=2020-01-01 + hiveTable.supportPartitions().dropPartition(partitionName1, true); + NoSuchPartitionException exception1 = + Assertions.assertThrows( + NoSuchPartitionException.class, + () -> { + hiveTable.supportPartitions().getPartition(partitionName1); + }); + Assertions.assertEquals( + String.format("Hive partition %s does not exist in Hive Metastore", partitionName1), + exception1.getMessage()); + + // test drop cascade partitions: city=3 + hiveTable.supportPartitions().dropPartition(partitionName2, true); + NoSuchPartitionException exception2 = + Assertions.assertThrows( + NoSuchPartitionException.class, + () -> { + hiveTable.supportPartitions().getPartition(partitionName2); + }); + Assertions.assertEquals( + String.format("Hive partition %s does not exist in Hive Metastore", partitionName2), + exception2.getMessage()); + hiveTable.supportPartitions().dropPartition(partitionName3, true); + NoSuchPartitionException exception3 = + Assertions.assertThrows( + NoSuchPartitionException.class, + () -> { + hiveTable.supportPartitions().getPartition(partitionName3); + }); + Assertions.assertEquals( + String.format("Hive partition %s does not exist in Hive Metastore", partitionName3), + exception3.getMessage()); + + // test exception + NoSuchPartitionException exception4 = + Assertions.assertThrows( + NoSuchPartitionException.class, + () -> { + hiveTable.supportPartitions().dropPartition("does_not_exist_partition", false); + }); + Assertions.assertEquals( + "Hive partition does_not_exist_partition does not exist in Hive Metastore", + exception4.getMessage()); + } + + @Test + public void testDropPartitions() { + // add partition: city=2/dt=2020-01-01 + String partitionName1 = "city=2/dt=2020-01-01"; + String[] fieldCity1 = new String[] {columns[1].name()}; + Literal valueCity1 = Literals.byteLiteral((byte) 2); + String[] fieldDt1 = new String[] {columns[2].name()}; + Literal valueDt1 = Literals.dateLiteral(LocalDate.parse("2020-01-01")); + Partition partition1 = + Partitions.identity( + new String[][] {fieldCity1, fieldDt1}, new Literal[] {valueCity1, valueDt1}); + hiveTable.supportPartitions().addPartition(partition1); + + // test drop one partition: city=2/dt=2020-01-01 + List partitionNames = Collections.singletonList(partitionName1); + hiveTable.supportPartitions().dropPartitions(partitionNames, true); + NoSuchPartitionException exception1 = + Assertions.assertThrows( + NoSuchPartitionException.class, + () -> { + hiveTable.supportPartitions().getPartition(partitionName1); + }); + Assertions.assertEquals( + String.format("Hive partition %s does not exist in Hive Metastore", partitionName1), + exception1.getMessage()); + + // test exception + NoSuchPartitionException exception4 = + Assertions.assertThrows( + NoSuchPartitionException.class, + () -> { + hiveTable + .supportPartitions() + .dropPartitions(Collections.singletonList("does_not_exist_partition"), false); + }); + Assertions.assertEquals( + "Hive partition does_not_exist_partition does not exist in Hive Metastore", + exception4.getMessage()); + } } diff --git a/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/hive/integration/test/CatalogHiveIT.java b/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/hive/integration/test/CatalogHiveIT.java index a8e5f38b2ae..fe21619475f 100644 --- a/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/hive/integration/test/CatalogHiveIT.java +++ b/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/hive/integration/test/CatalogHiveIT.java @@ -44,6 +44,7 @@ import com.datastrato.gravitino.dto.rel.partitioning.Partitioning; import com.datastrato.gravitino.exceptions.NoSuchCatalogException; import com.datastrato.gravitino.exceptions.NoSuchMetalakeException; +import com.datastrato.gravitino.exceptions.NoSuchPartitionException; import com.datastrato.gravitino.exceptions.NoSuchSchemaException; import com.datastrato.gravitino.exceptions.NoSuchTableException; import com.datastrato.gravitino.integration.test.container.ContainerSuite; @@ -814,6 +815,68 @@ public void testAddPartition() throws TException, InterruptedException { Assertions.assertEquals(2, count); } + @Test + public void testDropPartition() throws TException, InterruptedException { + Table createdTable = preparePartitionedTable(); + + // add partition "hive_col_name2=2023-01-02/hive_col_name3=gravitino_it_test2" + String[] field1 = new String[] {"hive_col_name2"}; + String[] field2 = new String[] {"hive_col_name3"}; + Literal literal1 = Literals.dateLiteral(LocalDate.parse("2023-01-02")); + Literal literal2 = Literals.stringLiteral("gravitino_it_test2"); + Partition identity = + Partitions.identity(new String[][] {field1, field2}, new Literal[] {literal1, literal2}); + IdentityPartition partitionAdded = + (IdentityPartition) createdTable.supportPartitions().addPartition(identity); + + // test drop partition "hive_col_name2=2023-01-02/hive_col_name3=gravitino_it_test2" + boolean dropRes = createdTable.supportPartitions().dropPartition(partitionAdded.name(), true); + Assertions.assertTrue(dropRes); + Assertions.assertThrows( + NoSuchObjectException.class, + () -> + hiveClientPool.run( + client -> + client.getPartition(schemaName, createdTable.name(), partitionAdded.name()))); + + // test no-exist partition with ifExist=false + Assertions.assertThrows( + NoSuchPartitionException.class, + () -> createdTable.supportPartitions().dropPartition(partitionAdded.name(), false)); + } + + @Test + public void testDropPartitions() throws TException, InterruptedException { + Table createdTable = preparePartitionedTable(); + + // add partition "hive_col_name2=2023-01-02/hive_col_name3=gravitino_it_test2" + String[] field1 = new String[] {"hive_col_name2"}; + String[] field2 = new String[] {"hive_col_name3"}; + Literal literal1 = Literals.dateLiteral(LocalDate.parse("2023-01-02")); + Literal literal2 = Literals.stringLiteral("gravitino_it_test2"); + Partition identity = + Partitions.identity(new String[][] {field1, field2}, new Literal[] {literal1, literal2}); + IdentityPartition partitionAdded = + (IdentityPartition) createdTable.supportPartitions().addPartition(identity); + + List partitionNames = Collections.singletonList(partitionAdded.name()); + + // test drop partition "hive_col_name2=2023-01-02/hive_col_name3=gravitino_it_test2" + boolean dropRes = createdTable.supportPartitions().dropPartitions(partitionNames, true); + Assertions.assertTrue(dropRes); + Assertions.assertThrows( + NoSuchObjectException.class, + () -> + hiveClientPool.run( + client -> + client.getPartition(schemaName, createdTable.name(), partitionAdded.name()))); + + // test no-exist partition with ifExist=false + Assertions.assertThrows( + NoSuchPartitionException.class, + () -> createdTable.supportPartitions().dropPartitions(partitionNames, false)); + } + private Table preparePartitionedTable() throws TException, InterruptedException { Column[] columns = createColumns(); diff --git a/clients/client-java/src/main/java/com/datastrato/gravitino/client/RelationalTable.java b/clients/client-java/src/main/java/com/datastrato/gravitino/client/RelationalTable.java index 848aeefb008..e634b8e720e 100644 --- a/clients/client-java/src/main/java/com/datastrato/gravitino/client/RelationalTable.java +++ b/clients/client-java/src/main/java/com/datastrato/gravitino/client/RelationalTable.java @@ -11,6 +11,8 @@ import com.datastrato.gravitino.dto.rel.TableDTO; import com.datastrato.gravitino.dto.rel.partitions.PartitionDTO; import com.datastrato.gravitino.dto.requests.AddPartitionsRequest; +import com.datastrato.gravitino.dto.requests.DropPartitionsRequest; +import com.datastrato.gravitino.dto.responses.DropResponse; import com.datastrato.gravitino.dto.responses.PartitionListResponse; import com.datastrato.gravitino.dto.responses.PartitionNameListResponse; import com.datastrato.gravitino.dto.responses.PartitionResponse; @@ -28,12 +30,16 @@ import com.google.common.annotations.VisibleForTesting; import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; import javax.annotation.Nullable; import lombok.SneakyThrows; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** Represents a relational table. */ public class RelationalTable implements Table, SupportsPartitions { + private static final Logger LOG = LoggerFactory.getLogger(RelationalTable.class); /** * Creates a new RelationalTable. @@ -219,12 +225,43 @@ public Partition addPartition(Partition partition) throws PartitionAlreadyExists /** * Drops the partition with the given name. * - * @param partitionName The identifier of the partition. + * @param partitionName The name of the partition. + * @return true if the partition is dropped, false otherwise. + */ + @Override + public boolean dropPartition(String partitionName, boolean ifExists) + throws NoSuchPartitionException { + DropResponse resp = + restClient.delete( + formatPartitionRequestPath(getPartitionRequestPath(), partitionName), + DropResponse.class, + Collections.emptyMap(), + ErrorHandlers.partitionErrorHandler()); + resp.validate(); + return resp.dropped(); + } + + /** + * Drops the partition with the given name. + * + * @param partitionNames The name list of the partition. * @return true if the partition is dropped, false otherwise. */ @Override - public boolean dropPartition(String partitionName) { - throw new UnsupportedOperationException(); + public boolean dropPartitions(List partitionNames, boolean ifExists) + throws NoSuchPartitionException { + DropPartitionsRequest req = new DropPartitionsRequest(partitionNames.toArray(new String[0])); + req.validate(); + + DropResponse resp = + restClient.post( + getPartitionRequestPath() + "/delete", + req, + DropResponse.class, + Collections.emptyMap(), + ErrorHandlers.partitionErrorHandler()); + resp.validate(); + return resp.dropped(); } /** diff --git a/clients/client-java/src/test/java/com/datastrato/gravitino/client/TestRelationalTable.java b/clients/client-java/src/test/java/com/datastrato/gravitino/client/TestRelationalTable.java index 6db97370103..4676407ecb4 100644 --- a/clients/client-java/src/test/java/com/datastrato/gravitino/client/TestRelationalTable.java +++ b/clients/client-java/src/test/java/com/datastrato/gravitino/client/TestRelationalTable.java @@ -25,8 +25,10 @@ import com.datastrato.gravitino.dto.rel.partitions.PartitionDTO; import com.datastrato.gravitino.dto.rel.partitions.RangePartitionDTO; import com.datastrato.gravitino.dto.requests.AddPartitionsRequest; +import com.datastrato.gravitino.dto.requests.DropPartitionsRequest; import com.datastrato.gravitino.dto.requests.SchemaCreateRequest; import com.datastrato.gravitino.dto.requests.TableCreateRequest; +import com.datastrato.gravitino.dto.responses.DropResponse; import com.datastrato.gravitino.dto.responses.ErrorResponse; import com.datastrato.gravitino.dto.responses.PartitionListResponse; import com.datastrato.gravitino.dto.responses.PartitionNameListResponse; @@ -46,6 +48,7 @@ import com.datastrato.gravitino.rel.types.Types; import com.fasterxml.jackson.core.JsonProcessingException; import com.google.common.collect.Maps; +import java.util.Arrays; import java.util.Collections; import org.apache.hc.core5.http.Method; import org.junit.jupiter.api.Assertions; @@ -250,4 +253,57 @@ public void testAddPartition() throws JsonProcessingException { PartitionAlreadyExistsException.class, () -> partitions.addPartition(partition)); Assertions.assertEquals("partition already exists", exception.getMessage()); } + + @Test + public void testDropPartition() throws JsonProcessingException { + String partitionName = "p1"; + + RelationalTable table = (RelationalTable) partitionedTable; + String partitionPath = + withSlash( + RelationalTable.formatPartitionRequestPath( + table.getPartitionRequestPath(), partitionName)); + DropResponse resp = new DropResponse(true); + buildMockResource(Method.DELETE, partitionPath, null, resp, SC_OK); + Assertions.assertTrue(table.supportPartitions().dropPartition(partitionName, true)); + + // test throws exception + ErrorResponse errorResp = + ErrorResponse.notFound( + NoSuchPartitionException.class.getSimpleName(), "partition not found"); + buildMockResource(Method.DELETE, partitionPath, null, errorResp, SC_NOT_FOUND); + + NoSuchPartitionException exception = + Assertions.assertThrows( + NoSuchPartitionException.class, + () -> table.supportPartitions().dropPartition(partitionName, false)); + Assertions.assertEquals("partition not found", exception.getMessage()); + } + + @Test + public void testDropPartitions() throws JsonProcessingException { + String[] partitionNames = {"p1"}; + DropPartitionsRequest req = new DropPartitionsRequest(partitionNames); + + RelationalTable table = (RelationalTable) partitionedTable; + String partitionPath = withSlash(table.getPartitionRequestPath()) + "/delete"; + DropResponse resp = new DropResponse(true); + buildMockResource(Method.POST, partitionPath, req, resp, SC_OK); + Assertions.assertTrue( + table.supportPartitions().dropPartitions(Arrays.asList(partitionNames), true)); + + // test throws exception + String[] partitionNames1 = {"p2"}; + DropPartitionsRequest req1 = new DropPartitionsRequest(partitionNames1); + ErrorResponse errorResp = + ErrorResponse.notFound( + NoSuchPartitionException.class.getSimpleName(), "partition not found"); + buildMockResource(Method.POST, partitionPath, req1, errorResp, SC_NOT_FOUND); + + NoSuchPartitionException exception = + Assertions.assertThrows( + NoSuchPartitionException.class, + () -> table.supportPartitions().dropPartitions(Arrays.asList(partitionNames1), false)); + Assertions.assertEquals("partition not found", exception.getMessage()); + } } diff --git a/common/src/main/java/com/datastrato/gravitino/dto/requests/DropPartitionsRequest.java b/common/src/main/java/com/datastrato/gravitino/dto/requests/DropPartitionsRequest.java new file mode 100644 index 00000000000..ecf583ca192 --- /dev/null +++ b/common/src/main/java/com/datastrato/gravitino/dto/requests/DropPartitionsRequest.java @@ -0,0 +1,47 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.dto.requests; + +import com.datastrato.gravitino.rest.RESTRequest; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import lombok.extern.jackson.Jacksonized; + +/** Request to add partitions to a table. */ +@Getter +@EqualsAndHashCode +@ToString +@Builder +@Jacksonized +public class DropPartitionsRequest implements RESTRequest { + + @JsonProperty("partitionNames") + private final String[] partitionNames; + + /** Default constructor for Jackson. */ + public DropPartitionsRequest() { + this(null); + } + + /** + * Constructor for the request. + * + * @param partitionNames The partitionNames to add. + */ + public DropPartitionsRequest(String[] partitionNames) { + this.partitionNames = partitionNames; + } + + @Override + public void validate() throws IllegalArgumentException { + Preconditions.checkArgument(partitionNames != null, "partitions must not be null"); + Preconditions.checkArgument( + partitionNames.length == 1, "Haven't yet implemented multiple partitions"); + } +} diff --git a/server/src/main/java/com/datastrato/gravitino/server/web/rest/PartitionOperations.java b/server/src/main/java/com/datastrato/gravitino/server/web/rest/PartitionOperations.java index 118d1e63a8a..9fbbdb238f4 100644 --- a/server/src/main/java/com/datastrato/gravitino/server/web/rest/PartitionOperations.java +++ b/server/src/main/java/com/datastrato/gravitino/server/web/rest/PartitionOperations.java @@ -13,6 +13,8 @@ import com.datastrato.gravitino.catalog.CatalogOperationDispatcher; import com.datastrato.gravitino.dto.rel.partitions.PartitionDTO; import com.datastrato.gravitino.dto.requests.AddPartitionsRequest; +import com.datastrato.gravitino.dto.requests.DropPartitionsRequest; +import com.datastrato.gravitino.dto.responses.DropResponse; import com.datastrato.gravitino.dto.responses.PartitionListResponse; import com.datastrato.gravitino.dto.responses.PartitionNameListResponse; import com.datastrato.gravitino.dto.responses.PartitionResponse; @@ -24,8 +26,10 @@ import com.datastrato.gravitino.rel.partitions.Partition; import com.datastrato.gravitino.server.web.Utils; import com.google.common.base.Preconditions; +import java.util.Arrays; import javax.inject.Inject; import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.DELETE; import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; import javax.ws.rs.POST; @@ -35,9 +39,12 @@ import javax.ws.rs.QueryParam; import javax.ws.rs.core.Context; import javax.ws.rs.core.Response; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; @Path("/metalakes/{metalake}/catalogs/{catalog}/schemas/{schema}/tables/{table}/partitions") public class PartitionOperations { + private static final Logger LOG = LoggerFactory.getLogger(PartitionOperations.class); private final CatalogOperationDispatcher dispatcher; @Context private HttpServletRequest httpRequest; @@ -146,4 +153,77 @@ public Response addPartitions( return ExceptionHandlers.handlePartitionException(OperationType.CREATE, "", table, e); } } + + @DELETE + @Path("{partition}") + @Produces("application/vnd.gravitino.v1+json") + @Timed(name = "drop-partition." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "drop-partition", absolute = true) + public Response dropPartition( + @PathParam("metalake") String metalake, + @PathParam("catalog") String catalog, + @PathParam("schema") String schema, + @PathParam("table") String table, + @PathParam("partition") String partition, + @QueryParam("purge") @DefaultValue("false") boolean purge, + @QueryParam("ifExists") @DefaultValue("false") boolean ifExists) { + try { + return Utils.doAs( + httpRequest, + () -> { + NameIdentifier tableIdent = NameIdentifier.of(metalake, catalog, schema, table); + Table loadTable = dispatcher.loadTable(tableIdent); + boolean dropped = + purge + ? loadTable.supportPartitions().purgePartition(partition, ifExists) + : loadTable.supportPartitions().dropPartition(partition, ifExists); + if (!dropped) { + LOG.warn( + "Failed to drop partition {} under table {} under schema {}", + partition, + table, + schema); + } + return Utils.ok(new DropResponse(dropped)); + }); + } catch (Exception e) { + return ExceptionHandlers.handlePartitionException(OperationType.DROP, "", table, e); + } + } + + @POST + @Produces("application/vnd.gravitino.v1+json") + @Timed(name = "drop-partitions." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @Path("/delete") + @ResponseMetered(name = "drop-partitions", absolute = true) + public Response dropPartitions( + @PathParam("metalake") String metalake, + @PathParam("catalog") String catalog, + @PathParam("schema") String schema, + @PathParam("table") String table, + @QueryParam("purge") @DefaultValue("false") boolean purge, + @QueryParam("ifExists") @DefaultValue("false") boolean ifExists, + DropPartitionsRequest request) { + Preconditions.checkArgument( + request.getPartitionNames().length == 1, "Only one partition is supported"); + try { + return Utils.doAs( + httpRequest, + () -> { + NameIdentifier tableIdent = NameIdentifier.of(metalake, catalog, schema, table); + Table loadTable = dispatcher.loadTable(tableIdent); + boolean dropped = + purge + ? loadTable + .supportPartitions() + .purgePartitions(Arrays.asList(request.getPartitionNames()), ifExists) + : loadTable + .supportPartitions() + .dropPartitions(Arrays.asList(request.getPartitionNames()), ifExists); + return Utils.ok(new DropResponse(dropped)); + }); + } catch (Exception e) { + return ExceptionHandlers.handlePartitionException(OperationType.DROP, "", table, e); + } + } } diff --git a/server/src/test/java/com/datastrato/gravitino/server/web/rest/TestPartitionOperations.java b/server/src/test/java/com/datastrato/gravitino/server/web/rest/TestPartitionOperations.java index 1a5803bb7c1..38c6b2db175 100644 --- a/server/src/test/java/com/datastrato/gravitino/server/web/rest/TestPartitionOperations.java +++ b/server/src/test/java/com/datastrato/gravitino/server/web/rest/TestPartitionOperations.java @@ -19,6 +19,8 @@ import com.datastrato.gravitino.catalog.CatalogOperationDispatcher; import com.datastrato.gravitino.dto.rel.partitions.PartitionDTO; import com.datastrato.gravitino.dto.requests.AddPartitionsRequest; +import com.datastrato.gravitino.dto.requests.DropPartitionsRequest; +import com.datastrato.gravitino.dto.responses.DropResponse; import com.datastrato.gravitino.dto.responses.ErrorConstants; import com.datastrato.gravitino.dto.responses.ErrorResponse; import com.datastrato.gravitino.dto.responses.PartitionListResponse; @@ -42,6 +44,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; import java.io.IOException; +import java.util.List; import java.util.Map; import javax.servlet.http.HttpServletRequest; import javax.ws.rs.client.Entity; @@ -183,8 +186,30 @@ public Partition addPartition(Partition partition) } @Override - public boolean dropPartition(String partitionName) { - return false; + public boolean dropPartition(String partitionName, boolean ifExists) { + if (partitions.containsKey(partitionName)) { + return true; + } else { + if (ifExists) { + return true; + } else { + throw new NoSuchPartitionException(partitionName); + } + } + } + + @Override + public boolean dropPartitions(List partitionNames, boolean ifExists) + throws NoSuchPartitionException, UnsupportedOperationException { + if (partitions.containsKey(partitionNames.get(0))) { + return true; + } else { + if (ifExists) { + return true; + } else { + throw new NoSuchPartitionException(partitionNames.get(0)); + } + } } }); when(dispatcher.loadTable(any())).thenReturn(mockedTable); @@ -347,4 +372,61 @@ public void testAddPartition() { PartitionAlreadyExistsException.class.getSimpleName(), errorResp2.getType()); Assertions.assertTrue(errorResp2.getMessage().contains(partition1.name())); } + + @Test + public void testDropPartition() { + mockPartitionedTable(); + + // drop exist partition with ifExists=ture + Response resp = + target(partitionPath(metalake, catalog, schema, table) + "p1") + .queryParam("purge", "false") + .queryParam("ifExists", "true") + .request(MediaType.APPLICATION_JSON_TYPE) + .accept("application/vnd.gravitino.v1+json") + .delete(); + + Assertions.assertEquals(Response.Status.OK.getStatusCode(), resp.getStatus()); + Assertions.assertEquals(MediaType.APPLICATION_JSON_TYPE, resp.getMediaType()); + + DropResponse dropResponse = resp.readEntity(DropResponse.class); + Assertions.assertEquals(0, dropResponse.getCode()); + Assertions.assertTrue(dropResponse.dropped()); + + // Test throws exception, drop no-exist partition with ifExists=false + Response resp1 = + target(partitionPath(metalake, catalog, schema, table) + "p5") + .queryParam("purge", "false") + .queryParam("ifExists", "false") + .request(MediaType.APPLICATION_JSON_TYPE) + .accept("application/vnd.gravitino.v1+json") + .delete(); + + Assertions.assertEquals(Response.Status.NOT_FOUND.getStatusCode(), resp1.getStatus()); + + ErrorResponse errorResp = resp1.readEntity(ErrorResponse.class); + Assertions.assertEquals(ErrorConstants.NOT_FOUND_CODE, errorResp.getCode()); + Assertions.assertEquals(NoSuchPartitionException.class.getSimpleName(), errorResp.getType()); + } + + @Test + public void testDropPartitions() { + mockPartitionedTable(); + + // drop partition, only one partition is supported + String[] partitionNames = {"p1"}; + DropPartitionsRequest req = new DropPartitionsRequest(partitionNames); + Response resp = + target(partitionPath(metalake, catalog, schema, table) + "delete") + .request(MediaType.APPLICATION_JSON_TYPE) + .accept("application/vnd.gravitino.v1+json") + .post(Entity.entity(req, MediaType.APPLICATION_JSON_TYPE)); + + Assertions.assertEquals(Response.Status.OK.getStatusCode(), resp.getStatus()); + Assertions.assertEquals(MediaType.APPLICATION_JSON_TYPE, resp.getMediaType()); + + DropResponse dropResponse = resp.readEntity(DropResponse.class); + Assertions.assertEquals(0, dropResponse.getCode()); + Assertions.assertTrue(dropResponse.dropped()); + } } From 0f11754ab5e4d2b56bd1bb7ac5bd5226e5cbb3b4 Mon Sep 17 00:00:00 2001 From: teo Date: Mon, 11 Mar 2024 17:00:30 +0800 Subject: [PATCH 04/11] throw exception when user deliver incorrect partition name --- .../gravitino/catalog/hive/HiveTableOperations.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTableOperations.java b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTableOperations.java index 0c979b7927c..c5dcce7cead 100644 --- a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTableOperations.java +++ b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTableOperations.java @@ -287,7 +287,7 @@ public boolean dropPartitions(List partitionNames, boolean ifExists) return dropPartition(partitionNames.get(0), ifExists); } - private List getFilterPartitionList(Table dropTable, String partitionSpec) { + private List getFilterPartitionList(Table dropTable, String partitionSpec) throws NoSuchPartitionException{ Map partMap = new HashMap<>(); String[] parts = partitionSpec.split("/"); for (String part : parts) { @@ -295,6 +295,9 @@ private List getFilterPartitionList(Table dropTable, String partitionSpe if (keyValue.length == 2) { partMap.put(keyValue[0], keyValue[1]); } + else { + throw new NoSuchPartitionException("Hive partition %s does not exist in Hive Metastore", partitionSpec); + } } return MetaStoreUtils.getPvals(dropTable.getPartitionKeys(), partMap); } From 90698b7a4072a1f3e572d84da5e9ba1ffe68d8c9 Mon Sep 17 00:00:00 2001 From: teo Date: Mon, 11 Mar 2024 18:00:20 +0800 Subject: [PATCH 05/11] improve error format partition check --- .../catalog/hive/HiveTableOperations.java | 34 +++++++++++-------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTableOperations.java b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTableOperations.java index c5dcce7cead..1750487925e 100644 --- a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTableOperations.java +++ b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveTableOperations.java @@ -34,8 +34,11 @@ import org.apache.hadoop.hive.metastore.api.UnknownTableException; import org.apache.parquet.Strings; import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class HiveTableOperations implements TableOperations, SupportsPartitions { + public static final Logger LOG = LoggerFactory.getLogger(HiveTableOperations.class); private static final String PARTITION_NAME_DELIMITER = "/"; private static final String PARTITION_VALUE_DELIMITER = "="; @@ -223,10 +226,17 @@ public boolean dropPartition(String partitionName, boolean ifExists) throws NoSuchPartitionException { try { // check the partition exists - if (!ifExists) { + try { table .clientPool() .run(c -> c.getPartition(table.schemaName(), table.name(), partitionName)); + } catch (NoSuchObjectException e) { + if (ifExists) { + return true; + } else { + throw new NoSuchPartitionException( + e, "Hive partition %s does not exist in Hive Metastore", partitionName); + } } // get all partitions that will be deleted @@ -255,16 +265,8 @@ public boolean dropPartition(String partitionName, boolean ifExists) false)); } } catch (UnknownTableException e) { - if (!ifExists) { - throw new NoSuchTableException( - e, "Hive table %s does not exist in Hive Metastore", table.name()); - } - - } catch (NoSuchObjectException e) { - if (!ifExists) { - throw new NoSuchPartitionException( - e, "Hive partition %s does not exist in Hive Metastore", partitionName); - } + throw new NoSuchTableException( + e, "Hive table %s does not exist in Hive Metastore", table.name()); } catch (TException | InterruptedException e) { throw new RuntimeException( @@ -287,16 +289,18 @@ public boolean dropPartitions(List partitionNames, boolean ifExists) return dropPartition(partitionNames.get(0), ifExists); } - private List getFilterPartitionList(Table dropTable, String partitionSpec) throws NoSuchPartitionException{ + private List getFilterPartitionList(Table dropTable, String partitionSpec) + throws NoSuchPartitionException { Map partMap = new HashMap<>(); String[] parts = partitionSpec.split("/"); for (String part : parts) { String[] keyValue = part.split("="); if (keyValue.length == 2) { partMap.put(keyValue[0], keyValue[1]); - } - else { - throw new NoSuchPartitionException("Hive partition %s does not exist in Hive Metastore", partitionSpec); + } else { + LOG.error("Error partition format: " + partitionSpec); + throw new NoSuchPartitionException( + "Hive partition %s does not exist in Hive Metastore", partitionSpec); } } return MetaStoreUtils.getPvals(dropTable.getPartitionKeys(), partMap); From 0a691476ed8ee7e4e9e69c03efa6b5cad8c508ae Mon Sep 17 00:00:00 2001 From: teo Date: Sun, 4 Feb 2024 15:37:41 +0800 Subject: [PATCH 06/11] create onemeta package --- bili-onemeta/build.gradle.kts | 58 +++++++++ .../gravitino/bili/onemeta/Utils.java | 123 ++++++++++++++++++ .../bili/onemeta/rest/MetalakeOperations.java | 64 +++++++++ .../gravitino/server/GravitinoServer.java | 1 + settings.gradle.kts | 2 +- 5 files changed, 247 insertions(+), 1 deletion(-) create mode 100644 bili-onemeta/build.gradle.kts create mode 100644 bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/Utils.java create mode 100644 bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/rest/MetalakeOperations.java diff --git a/bili-onemeta/build.gradle.kts b/bili-onemeta/build.gradle.kts new file mode 100644 index 00000000000..ece82fdbd4a --- /dev/null +++ b/bili-onemeta/build.gradle.kts @@ -0,0 +1,58 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +plugins { + `maven-publish` + id("java") + id("idea") +} + +dependencies { + implementation(project(":api")) + implementation(project(":common")) + implementation(project(":server-common")) + implementation(project(":core")) + implementation(libs.jackson.databind) + implementation(libs.jackson.annotations) + implementation(libs.jackson.datatype.jdk8) + implementation(libs.jackson.datatype.jsr310) + implementation(libs.guava) + implementation(libs.bundles.log4j) + implementation(libs.bundles.jetty) + implementation(libs.bundles.jersey) + implementation(libs.metrics.jersey2) + + compileOnly(libs.lombok) + annotationProcessor(libs.lombok) + testCompileOnly(libs.lombok) + testAnnotationProcessor(libs.lombok) + + testImplementation(libs.junit.jupiter.api) + testImplementation(libs.junit.jupiter.params) + testRuntimeOnly(libs.junit.jupiter.engine) + testImplementation(libs.jersey.test.framework.core) { + exclude(group = "org.junit.jupiter") + } + testImplementation(libs.jersey.test.framework.provider.jetty) { + exclude(group = "org.junit.jupiter") + } + testImplementation(libs.mockito.core) + testImplementation(libs.commons.io) +} + +tasks.build { + dependsOn("javadoc") +} + +tasks.javadoc { + dependsOn(":api:javadoc", ":common:javadoc") + source = + sourceSets["main"].allJava + + project(":api").sourceSets["main"].allJava + + project(":common").sourceSets["main"].allJava + + classpath = configurations["compileClasspath"] + + project(":api").configurations["runtimeClasspath"] + + project(":common").configurations["runtimeClasspath"] +} diff --git a/bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/Utils.java b/bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/Utils.java new file mode 100644 index 00000000000..b1a8ff9c34c --- /dev/null +++ b/bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/Utils.java @@ -0,0 +1,123 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.bili.onemeta; + +import com.datastrato.gravitino.UserPrincipal; +import com.datastrato.gravitino.auth.AuthConstants; +import com.datastrato.gravitino.dto.responses.ErrorResponse; +import com.datastrato.gravitino.utils.PrincipalUtils; +import java.security.PrivilegedExceptionAction; +import java.util.Optional; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; + +public class Utils { + + private static final String REMOTE_USER = "gravitino"; + + private Utils() {} + + public static String remoteUser(HttpServletRequest httpRequest) { + return Optional.ofNullable(httpRequest.getRemoteUser()).orElse(REMOTE_USER); + } + + public static Response ok(T t) { + return Response.status(Response.Status.OK).entity(t).type(MediaType.APPLICATION_JSON).build(); + } + + public static Response ok() { + return Response.status(Response.Status.NO_CONTENT).type(MediaType.APPLICATION_JSON).build(); + } + + public static Response illegalArguments(String message) { + return illegalArguments(message, null); + } + + public static Response illegalArguments(String message, Throwable throwable) { + return Response.status(Response.Status.BAD_REQUEST) + .entity(ErrorResponse.illegalArguments(message, throwable)) + .type(MediaType.APPLICATION_JSON) + .build(); + } + + public static Response internalError(String message) { + return internalError(message, null); + } + + public static Response internalError(String message, Throwable throwable) { + return Response.status(Response.Status.INTERNAL_SERVER_ERROR) + .entity(ErrorResponse.internalError(message, throwable)) + .type(MediaType.APPLICATION_JSON) + .build(); + } + + public static Response notFound(String type, String message) { + return notFound(type, message, null); + } + + public static Response notFound(String message, Throwable throwable) { + return notFound(throwable.getClass().getSimpleName(), message, throwable); + } + + public static Response notFound(String type, String message, Throwable throwable) { + return Response.status(Response.Status.NOT_FOUND) + .entity(ErrorResponse.notFound(type, message, throwable)) + .type(MediaType.APPLICATION_JSON) + .build(); + } + + public static Response alreadyExists(String type, String message) { + return alreadyExists(type, message, null); + } + + public static Response alreadyExists(String message, Throwable throwable) { + return alreadyExists(throwable.getClass().getSimpleName(), message, throwable); + } + + public static Response alreadyExists(String type, String message, Throwable throwable) { + return Response.status(Response.Status.CONFLICT) + .entity(ErrorResponse.alreadyExists(type, message, throwable)) + .type(MediaType.APPLICATION_JSON) + .build(); + } + + public static Response nonEmpty(String type, String message) { + return nonEmpty(type, message, null); + } + + public static Response nonEmpty(String message, Throwable throwable) { + return nonEmpty(throwable.getClass().getSimpleName(), message, throwable); + } + + public static Response nonEmpty(String type, String message, Throwable throwable) { + return Response.status(Response.Status.CONFLICT) + .entity(ErrorResponse.nonEmpty(type, message, throwable)) + .type(MediaType.APPLICATION_JSON) + .build(); + } + + public static Response unsupportedOperation(String message) { + return unsupportedOperation(message, null); + } + + public static Response unsupportedOperation(String message, Throwable throwable) { + return Response.status(Response.Status.METHOD_NOT_ALLOWED) + .entity(ErrorResponse.unsupportedOperation(message, throwable)) + .type(MediaType.APPLICATION_JSON) + .build(); + } + + public static Response doAs( + HttpServletRequest httpRequest, PrivilegedExceptionAction action) throws Exception { + UserPrincipal principal = + (UserPrincipal) + httpRequest.getAttribute(AuthConstants.AUTHENTICATED_PRINCIPAL_ATTRIBUTE_NAME); + if (principal == null) { + principal = new UserPrincipal(AuthConstants.ANONYMOUS_USER); + } + return PrincipalUtils.doAs(principal, action); + } +} diff --git a/bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/rest/MetalakeOperations.java b/bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/rest/MetalakeOperations.java new file mode 100644 index 00000000000..a1085b571f2 --- /dev/null +++ b/bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/rest/MetalakeOperations.java @@ -0,0 +1,64 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.bili.onemeta.rest; + +import com.codahale.metrics.annotation.ResponseMetered; +import com.codahale.metrics.annotation.Timed; +import com.datastrato.gravitino.bili.onemeta.Utils; +import com.datastrato.gravitino.dto.MetalakeDTO; +import com.datastrato.gravitino.dto.responses.MetalakeListResponse; +import com.datastrato.gravitino.dto.util.DTOConverters; +import com.datastrato.gravitino.meta.BaseMetalake; +import com.datastrato.gravitino.meta.MetalakeManager; +import com.datastrato.gravitino.metrics.MetricNames; +import java.util.Arrays; +import javax.inject.Inject; +import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.Consumes; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.core.Context; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Path("/onemeta/metalakes") +@Consumes(MediaType.APPLICATION_JSON) +@Produces(MediaType.APPLICATION_JSON) +public class MetalakeOperations { + + private static final Logger LOG = LoggerFactory.getLogger(MetalakeOperations.class); + + private final MetalakeManager manager; + + @Context private HttpServletRequest httpRequest; + + @Inject + public MetalakeOperations(MetalakeManager manager) { + this.manager = manager; + } + + @GET + @Produces("application/vnd.gravitino.v1+json") + @Timed(name = "list-metalake." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "list-metalake", absolute = true) + public Response listMetalakes() { + try { + return Utils.doAs( + httpRequest, + () -> { + BaseMetalake[] metalakes = manager.listMetalakes(); + MetalakeDTO[] metalakeDTOS = + Arrays.stream(metalakes).map(DTOConverters::toDTO).toArray(MetalakeDTO[]::new); + return Utils.ok(new MetalakeListResponse(metalakeDTOS)); + }); + + } catch (Exception e) { + return null; + } + } +} diff --git a/server/src/main/java/com/datastrato/gravitino/server/GravitinoServer.java b/server/src/main/java/com/datastrato/gravitino/server/GravitinoServer.java index 1651f493f6a..7e8af06fa92 100644 --- a/server/src/main/java/com/datastrato/gravitino/server/GravitinoServer.java +++ b/server/src/main/java/com/datastrato/gravitino/server/GravitinoServer.java @@ -67,6 +67,7 @@ public void initialize() { private void initializeRestApi() { packages("com.datastrato.gravitino.server.web.rest"); + packages("com.datastrato.gravitino.bili.onemeta.rest"); register( new AbstractBinder() { @Override diff --git a/settings.gradle.kts b/settings.gradle.kts index d41fa3dcefe..6a4ce99371d 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -8,7 +8,7 @@ plugins { rootProject.name = "gravitino" -include("api", "common", "core", "meta", "server", "integration-test", "server-common") +include("api", "common", "core", "meta", "server", "integration-test", "server-common", "bili-onemeta") include("catalogs:bundled-catalog") include("catalogs:catalog-hive") include("catalogs:catalog-lakehouse-iceberg") From f83d199091dd99dd443a085b0b92d516a0e01e33 Mon Sep 17 00:00:00 2001 From: teo Date: Wed, 20 Mar 2024 01:52:12 +0800 Subject: [PATCH 07/11] merge --- .../gravitino/rel/TableCatalog.java | 15 +++ bili-onemeta/build.gradle.kts | 1 + .../gravitino/bili/onemeta/Utils.java | 123 ------------------ .../bili/onemeta/rest/MetalakeOperations.java | 64 --------- .../build.gradle.kts | 113 ++++++++++++++++ .../catalog/bili/lakehouse/iceberg/Main.java | 7 + .../com.datastrato.gravitino.CatalogProvider | 5 + .../resources/bili-lakehouse-iceberg.conf | 8 ++ .../src/main/resources/core-site.xml.template | 7 + .../src/main/resources/hdfs-site.xml.template | 6 + .../catalog/hive/HiveCatalogOperations.java | 34 +++++ .../catalog/hive/OneMetaFileSystemHelper.java | 91 +++++++++++++ .../gravitino/utils/OneMetaConstants.java | 41 ++++++ settings.gradle.kts | 2 + 14 files changed, 330 insertions(+), 187 deletions(-) delete mode 100644 bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/Utils.java delete mode 100644 bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/rest/MetalakeOperations.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/build.gradle.kts create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/Main.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/META-INF.services/com.datastrato.gravitino.CatalogProvider create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/bili-lakehouse-iceberg.conf create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/core-site.xml.template create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/hdfs-site.xml.template create mode 100644 catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/OneMetaFileSystemHelper.java create mode 100644 core/src/main/java/com/datastrato/gravitino/utils/OneMetaConstants.java diff --git a/api/src/main/java/com/datastrato/gravitino/rel/TableCatalog.java b/api/src/main/java/com/datastrato/gravitino/rel/TableCatalog.java index a0ab16e0099..1c73781eb10 100644 --- a/api/src/main/java/com/datastrato/gravitino/rel/TableCatalog.java +++ b/api/src/main/java/com/datastrato/gravitino/rel/TableCatalog.java @@ -288,4 +288,19 @@ Table alterTable(NameIdentifier ident, TableChange... changes) default boolean purgeTable(NameIdentifier ident) throws UnsupportedOperationException { throw new UnsupportedOperationException("purgeTable not supported."); } + + /** + * Drop a table from the catalog and completely remove its data. Removes both the metadata and the + * directory associated with the table completely and skipping trash. + * + *

If the catalog supports to purge a table, this method should be overridden. The default + * implementation throws an {@link UnsupportedOperationException}. + * + * @param ident A table identifier. + * @return True if the table was purged, false if the table did not exist. + * @throws UnsupportedOperationException If the catalog does not support to purge a table. + */ + default boolean purgeTableOneMeta(NameIdentifier ident) throws UnsupportedOperationException { + throw new UnsupportedOperationException("purgeTable not supported."); + } } diff --git a/bili-onemeta/build.gradle.kts b/bili-onemeta/build.gradle.kts index ece82fdbd4a..f41231e12af 100644 --- a/bili-onemeta/build.gradle.kts +++ b/bili-onemeta/build.gradle.kts @@ -13,6 +13,7 @@ dependencies { implementation(project(":common")) implementation(project(":server-common")) implementation(project(":core")) + implementation(project(":server")) implementation(libs.jackson.databind) implementation(libs.jackson.annotations) implementation(libs.jackson.datatype.jdk8) diff --git a/bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/Utils.java b/bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/Utils.java deleted file mode 100644 index b1a8ff9c34c..00000000000 --- a/bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/Utils.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright 2023 Datastrato Pvt Ltd. - * This software is licensed under the Apache License version 2. - */ -package com.datastrato.gravitino.bili.onemeta; - -import com.datastrato.gravitino.UserPrincipal; -import com.datastrato.gravitino.auth.AuthConstants; -import com.datastrato.gravitino.dto.responses.ErrorResponse; -import com.datastrato.gravitino.utils.PrincipalUtils; -import java.security.PrivilegedExceptionAction; -import java.util.Optional; -import javax.servlet.http.HttpServletRequest; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.Response; - -public class Utils { - - private static final String REMOTE_USER = "gravitino"; - - private Utils() {} - - public static String remoteUser(HttpServletRequest httpRequest) { - return Optional.ofNullable(httpRequest.getRemoteUser()).orElse(REMOTE_USER); - } - - public static Response ok(T t) { - return Response.status(Response.Status.OK).entity(t).type(MediaType.APPLICATION_JSON).build(); - } - - public static Response ok() { - return Response.status(Response.Status.NO_CONTENT).type(MediaType.APPLICATION_JSON).build(); - } - - public static Response illegalArguments(String message) { - return illegalArguments(message, null); - } - - public static Response illegalArguments(String message, Throwable throwable) { - return Response.status(Response.Status.BAD_REQUEST) - .entity(ErrorResponse.illegalArguments(message, throwable)) - .type(MediaType.APPLICATION_JSON) - .build(); - } - - public static Response internalError(String message) { - return internalError(message, null); - } - - public static Response internalError(String message, Throwable throwable) { - return Response.status(Response.Status.INTERNAL_SERVER_ERROR) - .entity(ErrorResponse.internalError(message, throwable)) - .type(MediaType.APPLICATION_JSON) - .build(); - } - - public static Response notFound(String type, String message) { - return notFound(type, message, null); - } - - public static Response notFound(String message, Throwable throwable) { - return notFound(throwable.getClass().getSimpleName(), message, throwable); - } - - public static Response notFound(String type, String message, Throwable throwable) { - return Response.status(Response.Status.NOT_FOUND) - .entity(ErrorResponse.notFound(type, message, throwable)) - .type(MediaType.APPLICATION_JSON) - .build(); - } - - public static Response alreadyExists(String type, String message) { - return alreadyExists(type, message, null); - } - - public static Response alreadyExists(String message, Throwable throwable) { - return alreadyExists(throwable.getClass().getSimpleName(), message, throwable); - } - - public static Response alreadyExists(String type, String message, Throwable throwable) { - return Response.status(Response.Status.CONFLICT) - .entity(ErrorResponse.alreadyExists(type, message, throwable)) - .type(MediaType.APPLICATION_JSON) - .build(); - } - - public static Response nonEmpty(String type, String message) { - return nonEmpty(type, message, null); - } - - public static Response nonEmpty(String message, Throwable throwable) { - return nonEmpty(throwable.getClass().getSimpleName(), message, throwable); - } - - public static Response nonEmpty(String type, String message, Throwable throwable) { - return Response.status(Response.Status.CONFLICT) - .entity(ErrorResponse.nonEmpty(type, message, throwable)) - .type(MediaType.APPLICATION_JSON) - .build(); - } - - public static Response unsupportedOperation(String message) { - return unsupportedOperation(message, null); - } - - public static Response unsupportedOperation(String message, Throwable throwable) { - return Response.status(Response.Status.METHOD_NOT_ALLOWED) - .entity(ErrorResponse.unsupportedOperation(message, throwable)) - .type(MediaType.APPLICATION_JSON) - .build(); - } - - public static Response doAs( - HttpServletRequest httpRequest, PrivilegedExceptionAction action) throws Exception { - UserPrincipal principal = - (UserPrincipal) - httpRequest.getAttribute(AuthConstants.AUTHENTICATED_PRINCIPAL_ATTRIBUTE_NAME); - if (principal == null) { - principal = new UserPrincipal(AuthConstants.ANONYMOUS_USER); - } - return PrincipalUtils.doAs(principal, action); - } -} diff --git a/bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/rest/MetalakeOperations.java b/bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/rest/MetalakeOperations.java deleted file mode 100644 index a1085b571f2..00000000000 --- a/bili-onemeta/src/main/java/com/datastrato/gravitino/bili/onemeta/rest/MetalakeOperations.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright 2023 Datastrato Pvt Ltd. - * This software is licensed under the Apache License version 2. - */ -package com.datastrato.gravitino.bili.onemeta.rest; - -import com.codahale.metrics.annotation.ResponseMetered; -import com.codahale.metrics.annotation.Timed; -import com.datastrato.gravitino.bili.onemeta.Utils; -import com.datastrato.gravitino.dto.MetalakeDTO; -import com.datastrato.gravitino.dto.responses.MetalakeListResponse; -import com.datastrato.gravitino.dto.util.DTOConverters; -import com.datastrato.gravitino.meta.BaseMetalake; -import com.datastrato.gravitino.meta.MetalakeManager; -import com.datastrato.gravitino.metrics.MetricNames; -import java.util.Arrays; -import javax.inject.Inject; -import javax.servlet.http.HttpServletRequest; -import javax.ws.rs.Consumes; -import javax.ws.rs.GET; -import javax.ws.rs.Path; -import javax.ws.rs.Produces; -import javax.ws.rs.core.Context; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.Response; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@Path("/onemeta/metalakes") -@Consumes(MediaType.APPLICATION_JSON) -@Produces(MediaType.APPLICATION_JSON) -public class MetalakeOperations { - - private static final Logger LOG = LoggerFactory.getLogger(MetalakeOperations.class); - - private final MetalakeManager manager; - - @Context private HttpServletRequest httpRequest; - - @Inject - public MetalakeOperations(MetalakeManager manager) { - this.manager = manager; - } - - @GET - @Produces("application/vnd.gravitino.v1+json") - @Timed(name = "list-metalake." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) - @ResponseMetered(name = "list-metalake", absolute = true) - public Response listMetalakes() { - try { - return Utils.doAs( - httpRequest, - () -> { - BaseMetalake[] metalakes = manager.listMetalakes(); - MetalakeDTO[] metalakeDTOS = - Arrays.stream(metalakes).map(DTOConverters::toDTO).toArray(MetalakeDTO[]::new); - return Utils.ok(new MetalakeListResponse(metalakeDTOS)); - }); - - } catch (Exception e) { - return null; - } - } -} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/build.gradle.kts b/catalogs/catalog-bili-lakehouse-iceberg/build.gradle.kts new file mode 100644 index 00000000000..59bc214d66a --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/build.gradle.kts @@ -0,0 +1,113 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +description = "catalog-bili-lakehouse-iceberg" + +plugins { + `maven-publish` + id("java") + id("idea") +} + +dependencies { + implementation(project(":api")) + implementation(project(":common")) + implementation(project(":core")) + implementation(project(":server-common")) + implementation(libs.bundles.jetty) + implementation(libs.bundles.jersey) + implementation(libs.bundles.log4j) + implementation(libs.commons.collections4) + implementation(libs.commons.io) + implementation(libs.commons.lang3) + implementation(libs.guava) + implementation(libs.jackson.annotations) + implementation(libs.jackson.databind) + implementation(libs.jackson.datatype.jdk8) + implementation(libs.jackson.datatype.jsr310) + implementation(libs.sqlite.jdbc) + implementation("org.apache.iceberg:iceberg-sdk:0.13.2-bili-0.4-SNAPSHOT") + + implementation(libs.hive2.metastore) { + exclude("co.cask.tephra") + exclude("com.github.spotbugs") + exclude("com.google.code.findbugs", "jsr305") + exclude("com.tdunning", "json") + exclude("javax.transaction", "transaction-api") + exclude("org.apache.avro", "avro") + exclude("org.apache.hbase") + exclude("org.apache.hadoop", "hadoop-yarn-api") + exclude("org.apache.hadoop", "hadoop-yarn-server-applicationhistoryservice") + exclude("org.apache.hadoop", "hadoop-yarn-server-common") + exclude("org.apache.hadoop", "hadoop-yarn-server-resourcemanager") + exclude("org.apache.hadoop", "hadoop-yarn-server-web-proxy") + exclude("org.apache.logging.log4j") + exclude("org.apache.parquet", "parquet-hadoop-bundle") + exclude("org.apache.zookeeper") + exclude("org.eclipse.jetty.aggregate", "jetty-all") + exclude("org.eclipse.jetty.orbit", "javax.servlet") + exclude("org.pentaho") // missing dependency + exclude("org.slf4j", "slf4j-log4j12") + exclude("com.zaxxer", "HikariCP") + } + + annotationProcessor(libs.lombok) + compileOnly(libs.lombok) + + implementation(libs.hadoop2.common) { + exclude("com.github.spotbugs") + } + implementation(libs.hadoop2.hdfs) + implementation(libs.hadoop2.mapreduce.client.core) + implementation(libs.metrics.jersey2) + + testImplementation(libs.jersey.test.framework.core) { + exclude(group = "org.junit.jupiter") + } + testImplementation(libs.jersey.test.framework.provider.jetty) { + exclude(group = "org.junit.jupiter") + } + testImplementation(libs.junit.jupiter.api) + testImplementation(libs.junit.jupiter.params) + testImplementation(libs.mockito.core) + + testRuntimeOnly(libs.junit.jupiter.engine) +} + +tasks { + val copyDepends by registering(Copy::class) { + from(configurations.runtimeClasspath) + into("build/libs_all") + } + val copyCatalogLibs by registering(Copy::class) { + dependsOn(copyDepends, "build") + from("build/libs_all", "build/libs") + into("$rootDir/distribution/package/catalogs/bili-lakehouse-iceberg/libs") + } + + val copyCatalogConfig by registering(Copy::class) { + from("src/main/resources") + into("$rootDir/distribution/package/catalogs/bili-lakehouse-iceberg/conf") + + include("bili-lakehouse-iceberg.conf") + include("core-site.xml.template") + include("hdfs-site.xml.template") + + rename { original -> + if (original.endsWith(".template")) { + original.replace(".template", "") + } else { + original + } + } + + exclude { details -> + details.file.isDirectory() + } + } + + register("copyLibAndConfig", Copy::class) { + dependsOn(copyCatalogLibs, copyCatalogConfig) + } +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/Main.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/Main.java new file mode 100644 index 00000000000..5e2aa97673c --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/Main.java @@ -0,0 +1,7 @@ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; + +public class Main { + public static void main(String[] args) { + System.out.println("Hello world!"); + } +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/META-INF.services/com.datastrato.gravitino.CatalogProvider b/catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/META-INF.services/com.datastrato.gravitino.CatalogProvider new file mode 100644 index 00000000000..07fc2eede9f --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/META-INF.services/com.datastrato.gravitino.CatalogProvider @@ -0,0 +1,5 @@ +# +# Copyright 2023 Datastrato Pvt Ltd. +# This software is licensed under the Apache License version 2. +# +com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.IcebergCatalog \ No newline at end of file diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/bili-lakehouse-iceberg.conf b/catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/bili-lakehouse-iceberg.conf new file mode 100644 index 00000000000..82b425ee341 --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/bili-lakehouse-iceberg.conf @@ -0,0 +1,8 @@ +# +# Copyright 2023 Datastrato Pvt Ltd. +# This software is licensed under the Apache License version 2. +# + +## This file holds common configurations for Lakehouse-iceberg catalog. The format of the key is +## 'gravitino.bypass.{iceberg-inner-config-key}' and `iceberg-inner-config-key` is the +## real key that pass to Lakehouse-iceberg catalog. diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/core-site.xml.template b/catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/core-site.xml.template new file mode 100644 index 00000000000..263112f905f --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/core-site.xml.template @@ -0,0 +1,7 @@ + + + + diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/hdfs-site.xml.template b/catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/hdfs-site.xml.template new file mode 100644 index 00000000000..4b7c0376bae --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/resources/hdfs-site.xml.template @@ -0,0 +1,6 @@ + + + \ No newline at end of file diff --git a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogOperations.java b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogOperations.java index 02cbdfbde6c..e3e1a383e55 100644 --- a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogOperations.java +++ b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/HiveCatalogOperations.java @@ -64,6 +64,8 @@ import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; @@ -1028,6 +1030,38 @@ public boolean purgeTable(NameIdentifier tableIdent) throws UnsupportedOperation } } + /** + * Purges a table for iceberg sdk + * + * @param tableIdent The identifier of the table to purge. + * @return true if the table is successfully purged; false if the table does not exist. + */ + @Override + public boolean purgeTableOneMeta(NameIdentifier tableIdent) { + if (isExternalTable(tableIdent)) { + // drop meta info + boolean resultMeta = dropHiveTable(tableIdent, true, true); + + // drop hdfs file + org.apache.hadoop.hive.metastore.api.Table table = loadHiveTable(tableIdent); + String location = table.getSd().getLocation(); + boolean resultHdfs = false; + try { + LOG.info("begin drop external table {} for location : {}", table.getTableName(), location); + FileSystem fs = OneMetaFileSystemHelper.newBuilder().build().getFileSystem(); + resultHdfs = fs.delete(new Path(location), true); + } catch (IOException e) { + LOG.error( + "fail to drop external table {} for location : {}", table.getTableName(), location); + throw new RuntimeException("HDFS delete Error for location : " + location, e); + } + LOG.info("success drop external table {} for location : {}", table.getTableName(), location); + return resultMeta && resultHdfs; + } else { + return dropHiveTable(tableIdent, true, true); + } + } + /** * Checks if the given namespace is a valid namespace for the Hive schema. * diff --git a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/OneMetaFileSystemHelper.java b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/OneMetaFileSystemHelper.java new file mode 100644 index 00000000000..77026687577 --- /dev/null +++ b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/OneMetaFileSystemHelper.java @@ -0,0 +1,91 @@ +/** + * Copyright (C) 2016-2023 Expedia, Inc. + * + *

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + *

http://www.apache.org/licenses/LICENSE-2.0 + * + *

Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datastrato.gravitino.catalog.hive; + +import static com.datastrato.gravitino.utils.OneMetaConstants.CORE_SITE_PATH; +import static com.datastrato.gravitino.utils.OneMetaConstants.HDFS_SITE_PATH; +import static com.datastrato.gravitino.utils.OneMetaConstants.KEYTAB_PATH; +import static com.datastrato.gravitino.utils.OneMetaConstants.MOUNT_TABLE_PATH; +import static com.datastrato.gravitino.utils.OneMetaConstants.PRINCIPAL; + +import java.io.IOException; +import java.io.Serializable; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class OneMetaFileSystemHelper implements Serializable { + public static final Logger LOG = LoggerFactory.getLogger(OneMetaFileSystemHelper.class); + private static final long serialVersionUID = -7579536335018210975L; + + private FileSystem fs; + + public static HdfsFileServiceBuilder newBuilder() { + return new HdfsFileServiceBuilder(); + } + + public FileSystem getFileSystem() { + return fs; + } + + public void closeFS(FileSystem fs) { + if (fs != null) { + IOUtils.closeQuietly(fs); + } + } + + public void close() { + closeFS(fs); + } + + public static class HdfsFileServiceBuilder { + + public static final Logger LOGGER = LoggerFactory.getLogger(HdfsFileServiceBuilder.class); + + public OneMetaFileSystemHelper build() { + try { + UserGroupInformation.loginUserFromKeytab(PRINCIPAL, KEYTAB_PATH); + UserGroupInformation ugi = UserGroupInformation.getLoginUser(); + LOG.info(String.format("Success login ugi:%s", ugi.toString())); + } catch (IOException e) { + LOG.error("Kerberos authentication error", e); + throw new RuntimeException("Kerberos authentication error!"); + } + + Configuration conf = new Configuration(); + conf.addResource(new Path(CORE_SITE_PATH)); + conf.addResource(new Path(HDFS_SITE_PATH)); + conf.addResource(new Path(MOUNT_TABLE_PATH)); + LOG.info("FileSystemHelper : 初始化Configuration成功"); + + // 多实例 + FileSystem fs = null; + try { + fs = FileSystem.get(conf); + } catch (IOException e) { + LOG.error("Get FileSystem error!", e); + throw new RuntimeException("Get FileSystem error!"); + } + return new OneMetaFileSystemHelper(fs); + } + } + + private OneMetaFileSystemHelper(FileSystem fs) { + this.fs = fs; + } +} diff --git a/core/src/main/java/com/datastrato/gravitino/utils/OneMetaConstants.java b/core/src/main/java/com/datastrato/gravitino/utils/OneMetaConstants.java new file mode 100644 index 00000000000..6e3eca609c1 --- /dev/null +++ b/core/src/main/java/com/datastrato/gravitino/utils/OneMetaConstants.java @@ -0,0 +1,41 @@ +/** + * Copyright (C) 2016-2023 Expedia, Inc. + * + *

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + *

http://www.apache.org/licenses/LICENSE-2.0 + * + *

Unless required by applicable law or agreed to in writing, software distributed under the + * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.datastrato.gravitino.utils; + +public class OneMetaConstants { + + public static final String ROLE_NAME = "onemeta.role.name"; + + public static final String CORE_SITE_PATH = "/data/app/onemeta/conf/core-site.xml"; + public static final String HDFS_SITE_PATH = "/data/app/onemeta/conf/hdfs-site.xml"; + public static final String HIVE_SITE_PATH = "/data/app/onemeta/conf/hive-site.xml"; + public static final String MOUNT_TABLE_PATH = "/data/app/onemeta/conf/mount-table.xml"; + + public static final String KEYTAB_PATH = "/etc/security/keytabs/hive.keytab"; + public static final String PRINCIPAL = "hive@BILIBILI.CO"; + + // metrics + public static final String METRIC_ICEBERG_ACCESS_TIME = "onemeta_iceberg_access_time_ms"; + + public static final String METRIC_DISPATCHER_ACCESS_TIME = "onemeta_dispatcher_access_time_ms"; + + public static final String METRIC_HMS_ACCESS_TIME = "onemeta_hms_access_time_ms"; + + public static final String METRIC_SERVICE_ACCESS_TIME = "onemeta_service_process_time_ms"; + + public static final String METRIC_ERROR_COUNTER = "sqlscan_error_counter"; + + public static final String METRIC_COUNTER = "sqlscan_counter"; + public static final String METRIC_OPEN_CONNECTIONS = "open_connections"; +} diff --git a/settings.gradle.kts b/settings.gradle.kts index 6a4ce99371d..7f96f602eca 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -26,3 +26,5 @@ include("spark-connector") include("web") include("docs") include("integration-test-common") +include("catalogs:catalog-bili-lakehouse-iceberg") + From c5975ed4d75d3a01b26219688aa2de487578afc7 Mon Sep 17 00:00:00 2001 From: teo Date: Fri, 15 Mar 2024 08:46:31 +0800 Subject: [PATCH 08/11] add bili iceberg catalog --- catalogs/bundled-catalog/build.gradle.kts | 1 + .../lakehouse/iceberg/IcebergCatalog.java | 46 +++ .../iceberg/IcebergCatalogBackend.java | 11 + .../iceberg/IcebergCatalogOperations.java | 339 ++++++++++++++++++ .../IcebergCatalogPropertiesMetadata.java | 88 +++++ .../bili/lakehouse/iceberg/IcebergColumn.java | 35 ++ .../bili/lakehouse/iceberg/IcebergSchema.java | 34 ++ .../IcebergSchemaPropertiesMetadata.java | 31 ++ .../bili/lakehouse/iceberg/IcebergTable.java | 136 +++++++ .../IcebergTablePropertiesMetadata.java | 58 +++ .../catalog/bili/lakehouse/iceberg/Main.java | 7 - .../iceberg/converter/ConvertUtil.java | 87 +++++ .../converter/FromIcebergPartitionSpec.java | 97 +++++ .../iceberg/converter/FromIcebergType.java | 104 ++++++ .../converter/ToIcebergPartitionSpec.java | 74 ++++ .../iceberg/converter/ToIcebergType.java | 132 +++++++ .../converter/ToIcebergTypeVisitor.java | 70 ++++ .../lakehouse/iceberg/IcebergCatalog.java | 2 +- docs/open-api/catalogs.yaml | 2 + 19 files changed, 1346 insertions(+), 8 deletions(-) create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalog.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogBackend.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogOperations.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergColumn.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchema.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchemaPropertiesMetadata.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTable.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTablePropertiesMetadata.java delete mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/Main.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ConvertUtil.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/FromIcebergPartitionSpec.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/FromIcebergType.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergPartitionSpec.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergType.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergTypeVisitor.java diff --git a/catalogs/bundled-catalog/build.gradle.kts b/catalogs/bundled-catalog/build.gradle.kts index 23ef280c6c2..4adfd619e60 100644 --- a/catalogs/bundled-catalog/build.gradle.kts +++ b/catalogs/bundled-catalog/build.gradle.kts @@ -16,6 +16,7 @@ dependencies { implementation(project(":catalogs:catalog-jdbc-mysql")) implementation(project(":catalogs:catalog-jdbc-postgresql")) implementation(project(":catalogs:catalog-lakehouse-iceberg")) + implementation(project(":catalogs:catalog-bili-lakehouse-iceberg")) implementation(project(":core")) implementation(libs.slf4j.api) } diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalog.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalog.java new file mode 100644 index 00000000000..fd87bf9d747 --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalog.java @@ -0,0 +1,46 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; + +import com.datastrato.gravitino.catalog.BaseCatalog; +import com.datastrato.gravitino.catalog.CatalogOperations; +import com.datastrato.gravitino.rel.SupportsSchemas; +import com.datastrato.gravitino.rel.TableCatalog; +import java.util.Map; + +/** Implementation of an Iceberg catalog in Gravitino. */ +public class IcebergCatalog extends BaseCatalog { + + /** @return The short name of the catalog. */ + @Override + public String shortName() { + return "lakehouse-iceberg"; + } + + /** + * Creates a new instance of {@link IcebergCatalogOperations} with the provided configuration. + * + * @param config The configuration map for the Iceberg catalog operations. + * @return A new instance of {@link IcebergCatalogOperations}. + */ + @Override + protected CatalogOperations newOps(Map config) { + IcebergCatalogOperations ops = new IcebergCatalogOperations(entity()); + ops.initialize(config); + return ops; + } + + /** @return The Iceberg catalog operations as {@link IcebergCatalogOperations}. */ + @Override + public SupportsSchemas asSchemas() { + return (IcebergCatalogOperations) ops(); + } + + /** @return The Iceberg catalog operations as {@link IcebergCatalogOperations}. */ + @Override + public TableCatalog asTableCatalog() { + return (IcebergCatalogOperations) ops(); + } +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogBackend.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogBackend.java new file mode 100644 index 00000000000..aa98c6b9a21 --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogBackend.java @@ -0,0 +1,11 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; + +public enum IcebergCatalogBackend { + HIVE, + JDBC, + MEMORY +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogOperations.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogOperations.java new file mode 100644 index 00000000000..337d1d6a5e2 --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogOperations.java @@ -0,0 +1,339 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; + +import static com.datastrato.gravitino.catalog.BaseCatalog.CATALOG_BYPASS_PREFIX; +import static com.datastrato.gravitino.utils.OneMetaConstants.CORE_SITE_PATH; +import static com.datastrato.gravitino.utils.OneMetaConstants.HDFS_SITE_PATH; +import static com.datastrato.gravitino.utils.OneMetaConstants.HIVE_SITE_PATH; +import static com.datastrato.gravitino.utils.OneMetaConstants.MOUNT_TABLE_PATH; + +import com.datastrato.gravitino.NameIdentifier; +import com.datastrato.gravitino.Namespace; +import com.datastrato.gravitino.catalog.CatalogOperations; +import com.datastrato.gravitino.catalog.PropertiesMetadata; +import com.datastrato.gravitino.exceptions.NoSuchCatalogException; +import com.datastrato.gravitino.exceptions.NoSuchSchemaException; +import com.datastrato.gravitino.exceptions.NoSuchTableException; +import com.datastrato.gravitino.exceptions.NonEmptySchemaException; +import com.datastrato.gravitino.exceptions.SchemaAlreadyExistsException; +import com.datastrato.gravitino.exceptions.TableAlreadyExistsException; +import com.datastrato.gravitino.meta.CatalogEntity; +import com.datastrato.gravitino.rel.Column; +import com.datastrato.gravitino.rel.SchemaChange; +import com.datastrato.gravitino.rel.SupportsSchemas; +import com.datastrato.gravitino.rel.Table; +import com.datastrato.gravitino.rel.TableCatalog; +import com.datastrato.gravitino.rel.TableChange; +import com.datastrato.gravitino.rel.expressions.distributions.Distribution; +import com.datastrato.gravitino.rel.expressions.sorts.SortOrder; +import com.datastrato.gravitino.rel.expressions.transforms.Transform; +import com.datastrato.gravitino.rel.indexes.Index; +import com.datastrato.gravitino.utils.MapUtils; +import com.google.common.collect.Maps; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hive.HiveCatalog; +import org.apache.iceberg.sdk.HiveCatalogUtils; +import org.apache.iceberg.sdk.auth.AuthUtils; +import org.apache.iceberg.sdk.auth.HdfsAuthentication; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Operations for interacting with the Iceberg catalog in Gravitino. */ +public class IcebergCatalogOperations implements CatalogOperations, SupportsSchemas, TableCatalog { + + private static final String ICEBERG_TABLE_DOES_NOT_EXIST_MSG = "Iceberg table does not exist: %s"; + + public static final Logger LOG = LoggerFactory.getLogger(IcebergCatalogOperations.class); + + private IcebergCatalogPropertiesMetadata icebergCatalogPropertiesMetadata; + + private IcebergTablePropertiesMetadata icebergTablePropertiesMetadata; + + private IcebergSchemaPropertiesMetadata icebergSchemaPropertiesMetadata; + + private final CatalogEntity entity; + + private Configuration icebergSdkConf = null; + + /** + * Constructs a new instance of IcebergCatalogOperations. + * + * @param entity The catalog entity associated with this operations instance. + */ + public IcebergCatalogOperations(CatalogEntity entity) { + this.entity = entity; + } + + /** + * Initializes the Iceberg catalog operations with the provided configuration. + * + * @param conf The configuration map for the Iceberg catalog operations. + * @throws RuntimeException if initialization fails. + */ + @Override + public void initialize(Map conf) throws RuntimeException { + // Key format like gravitino.bypass.a.b + Map prefixMap = MapUtils.getPrefixMap(conf, CATALOG_BYPASS_PREFIX); + + this.icebergCatalogPropertiesMetadata = new IcebergCatalogPropertiesMetadata(); + // Hold keys that lie in GRAVITINO_CONFIG_TO_ICEBERG + Map gravitinoConfig = + this.icebergCatalogPropertiesMetadata.transformProperties(conf); + + Map resultConf = Maps.newHashMap(prefixMap); + resultConf.putAll(gravitinoConfig); + + this.icebergTablePropertiesMetadata = new IcebergTablePropertiesMetadata(); + this.icebergSchemaPropertiesMetadata = new IcebergSchemaPropertiesMetadata(); + icebergSdkConf = createDefaultConfiguration(); + } + + /** Closes the Iceberg catalog and releases the associated client pool. */ + @Override + public void close() {} + + /** + * Lists the schemas under the given namespace. + * + * @param namespace The namespace to list the schemas for. + * @return An array of {@link NameIdentifier} representing the schemas. + * @throws NoSuchCatalogException If the provided namespace is invalid or does not exist. + */ + @Override + public NameIdentifier[] listSchemas(Namespace namespace) throws NoSuchCatalogException { + throw new UnsupportedOperationException("purgeTable not supported."); + } + + /** + * Creates a new schema with the provided identifier, comment, and metadata. + * + * @param ident The identifier of the schema to create. + * @param comment The comment for the schema. + * @param properties The properties for the schema. + * @return The created {@link IcebergSchema}. + * @throws NoSuchCatalogException If the provided namespace is invalid or does not exist. + * @throws SchemaAlreadyExistsException If a schema with the same name already exists. + */ + @Override + public IcebergSchema createSchema( + NameIdentifier ident, String comment, Map properties) + throws NoSuchCatalogException, SchemaAlreadyExistsException { + throw new UnsupportedOperationException("purgeTable not supported."); + } + + /** + * Loads the schema with the provided identifier. + * + * @param ident The identifier of the schema to load. + * @return The loaded {@link IcebergSchema}. + * @throws NoSuchSchemaException If the schema with the provided identifier does not exist. + */ + @Override + public IcebergSchema loadSchema(NameIdentifier ident) throws NoSuchSchemaException { + throw new UnsupportedOperationException("purgeTable not supported."); + } + + /** + * Alters the schema with the provided identifier according to the specified changes. + * + * @param ident The identifier of the schema to alter. + * @param changes The changes to apply to the schema. + * @return The altered {@link IcebergSchema}. + * @throws NoSuchSchemaException If the schema with the provided identifier does not exist. + */ + @Override + public IcebergSchema alterSchema(NameIdentifier ident, SchemaChange... changes) + throws NoSuchSchemaException { + throw new UnsupportedOperationException("purgeTable not supported."); + } + + /** + * Drops the schema with the provided identifier. + * + * @param ident The identifier of the schema to drop. + * @param cascade If set to true, drops all the tables in the schema as well. + * @return true if the schema was dropped successfully, false otherwise. + * @throws NonEmptySchemaException If the schema is not empty and 'cascade' is set to false. + */ + @Override + public boolean dropSchema(NameIdentifier ident, boolean cascade) throws NonEmptySchemaException { + throw new UnsupportedOperationException("purgeTable not supported."); + } + + /** + * Lists all the tables under the specified namespace. + * + * @param namespace The namespace to list tables for. + * @return An array of {@link NameIdentifier} representing the tables in the namespace. + * @throws NoSuchSchemaException If the schema with the provided namespace does not exist. + */ + @Override + public NameIdentifier[] listTables(Namespace namespace) throws NoSuchSchemaException { + throw new UnsupportedOperationException("purgeTable not supported."); + } + + /** + * Loads a table from the Iceberg. + * + * @param tableIdent The identifier of the table to load. + * @return The loaded IcebergTable instance representing the table. + * @throws NoSuchTableException If the specified table does not exist in the Iceberg. + */ + @Override + public Table loadTable(NameIdentifier tableIdent) throws NoSuchTableException { + throw new UnsupportedOperationException("purgeTable not supported."); + } + + /** + * Apply the {@link TableChange change} to an existing Iceberg table. + * + * @param tableIdent The identifier of the table to alter. + * @param changes The changes to apply to the table. + * @return This method always throws UnsupportedOperationException. + * @throws NoSuchTableException This exception will not be thrown in this method. + * @throws IllegalArgumentException This exception will not be thrown in this method. + */ + @Override + public Table alterTable(NameIdentifier tableIdent, TableChange... changes) + throws NoSuchTableException, IllegalArgumentException { + throw new UnsupportedOperationException("purgeTable not supported."); + } + + private Table internalUpdateTable(NameIdentifier tableIdent, TableChange... changes) + throws NoSuchTableException, IllegalArgumentException { + throw new UnsupportedOperationException("purgeTable not supported."); + } + + /** + * Perform name change operations on the Iceberg. + * + * @param tableIdent tableIdent of this table. + * @param renameTable Table Change to modify the table name. + * @return Returns the table for Iceberg. + * @throws NoSuchTableException + * @throws IllegalArgumentException + */ + private Table renameTable(NameIdentifier tableIdent, TableChange.RenameTable renameTable) + throws NoSuchTableException, IllegalArgumentException { + throw new UnsupportedOperationException("purgeTable not supported."); + } + + /** + * Drops a table from the Iceberg. + * + * @param tableIdent The identifier of the table to drop. + * @return true if the table is successfully dropped; false if the table does not exist. + */ + @Override + public boolean dropTable(NameIdentifier tableIdent) { + throw new UnsupportedOperationException("purgeTable not supported."); + } + + /** + * Creates a new table in the Iceberg. + * + * @param tableIdent The identifier of the table to create. + * @param columns The array of columns for the new table. + * @param comment The comment for the new table. + * @param properties The properties for the new table. + * @param partitioning The partitioning for the new table. + * @param indexes The indexes for the new table. + * @return The newly created IcebergTable instance. + * @throws NoSuchSchemaException If the schema for the table does not exist. + * @throws TableAlreadyExistsException If the table with the same name already exists. + */ + @Override + public Table createTable( + NameIdentifier tableIdent, + Column[] columns, + String comment, + Map properties, + Transform[] partitioning, + Distribution distribution, + SortOrder[] sortOrders, + Index[] indexes) + throws NoSuchSchemaException, TableAlreadyExistsException { + throw new UnsupportedOperationException("purgeTable not supported."); + } + + /** + * Purges a table from the Iceberg. + * + * @param tableIdent The identifier of the table to purge. + * @return true if the table is successfully purged; false if the table does not exist. + * @throws UnsupportedOperationException If the table type is EXTERNAL_TABLE, it cannot be purged. + */ + @Override + public boolean purgeTable(NameIdentifier tableIdent) throws UnsupportedOperationException { + throw new UnsupportedOperationException("purgeTable not supported."); + } + /** + * Purges a table from the Iceberg. + * + * @param tableIdent The identifier of the table to purge. + * @return true if the table is successfully purged; false if the table does not exist. + * @throws UnsupportedOperationException If the table type is EXTERNAL_TABLE, it cannot be purged. + */ + @Override + public boolean purgeTableOneMeta(NameIdentifier tableIdent) { + // use iceberg sdk + try { + HdfsAuthentication hdfsAuthentication = AuthUtils.createHdfsAuthentication(icebergSdkConf); + hdfsAuthentication.doAs( + () -> { + TableIdentifier identifier = TableIdentifier.of(tableIdent.name(), tableIdent.name()); + HiveCatalog hiveCatalog = HiveCatalogUtils.createHiveCatalog(icebergSdkConf); + hiveCatalog.dropTable(identifier, true); + return null; + }); + hdfsAuthentication.close(); + } catch (org.apache.iceberg.exceptions.NoSuchTableException e) { + LOG.warn("Iceberg table {} does not exist", tableIdent.name()); + return false; + } catch (Throwable e) { + LOG.info("Purge Iceberg table Error : {}", tableIdent.name()); + } + return true; + } + + private Configuration createDefaultConfiguration() { + Configuration defaultConf = new Configuration(); + defaultConf.addResource(new Path(CORE_SITE_PATH)); + defaultConf.addResource(new Path(HDFS_SITE_PATH)); + defaultConf.addResource(new Path(HIVE_SITE_PATH)); + defaultConf.addResource(new Path(MOUNT_TABLE_PATH)); + return defaultConf; + } + + // TODO. We should figure out a better way to get the current user from servlet container. + private static String currentUser() { + return System.getProperty("user.name"); + } + + @Override + public PropertiesMetadata tablePropertiesMetadata() throws UnsupportedOperationException { + return icebergTablePropertiesMetadata; + } + + @Override + public PropertiesMetadata catalogPropertiesMetadata() throws UnsupportedOperationException { + return icebergCatalogPropertiesMetadata; + } + + @Override + public PropertiesMetadata schemaPropertiesMetadata() throws UnsupportedOperationException { + return icebergSchemaPropertiesMetadata; + } + + @Override + public PropertiesMetadata filesetPropertiesMetadata() throws UnsupportedOperationException { + throw new UnsupportedOperationException( + "Iceberg catalog doesn't support fileset related operations"); + } +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java new file mode 100644 index 00000000000..ce953e57eb1 --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java @@ -0,0 +1,88 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; + +import static com.datastrato.gravitino.catalog.PropertyEntry.enumImmutablePropertyEntry; +import static com.datastrato.gravitino.catalog.PropertyEntry.stringRequiredPropertyEntry; + +import com.datastrato.gravitino.catalog.BaseCatalogPropertiesMetadata; +import com.datastrato.gravitino.catalog.PropertyEntry; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class IcebergCatalogPropertiesMetadata extends BaseCatalogPropertiesMetadata { + public static final String CATALOG_BACKEND_NAME = "catalog-backend"; + + public static final String GRAVITINO_JDBC_USER = "jdbc-user"; + public static final String ICEBERG_JDBC_USER = "jdbc.user"; + + public static final String GRAVITINO_JDBC_PASSWORD = "jdbc-password"; + public static final String ICEBERG_JDBC_PASSWORD = "jdbc.password"; + public static final String ICEBERG_JDBC_INITIALIZE = "jdbc-initialize"; + + public static final String GRAVITINO_JDBC_DRIVER = "jdbc-driver"; + public static final String WAREHOUSE = "warehouse"; + public static final String URI = "uri"; + + private static final Map> PROPERTIES_METADATA; + + // Map that maintains the mapping of keys in Gravitino to that in Iceberg, for example, users + // will only need to set the configuration 'catalog-backend' in Gravitino and Gravitino will + // change + // it to `catalogType` automatically and pass it to Iceberg. + public static final Map GRAVITINO_CONFIG_TO_ICEBERG = + ImmutableMap.of( + CATALOG_BACKEND_NAME, + CATALOG_BACKEND_NAME, + GRAVITINO_JDBC_DRIVER, + GRAVITINO_JDBC_DRIVER, + GRAVITINO_JDBC_USER, + ICEBERG_JDBC_USER, + GRAVITINO_JDBC_PASSWORD, + ICEBERG_JDBC_PASSWORD, + URI, + URI, + WAREHOUSE, + WAREHOUSE); + + static { + List> propertyEntries = + ImmutableList.of( + enumImmutablePropertyEntry( + CATALOG_BACKEND_NAME, + "Iceberg catalog type choose properties", + true, + IcebergCatalogBackend.class, + null, + false, + false), + stringRequiredPropertyEntry(URI, "Iceberg catalog uri config", false, false), + stringRequiredPropertyEntry( + WAREHOUSE, "Iceberg catalog warehouse config", false, false)); + HashMap> result = Maps.newHashMap(BASIC_CATALOG_PROPERTY_ENTRIES); + result.putAll(Maps.uniqueIndex(propertyEntries, PropertyEntry::getName)); + PROPERTIES_METADATA = ImmutableMap.copyOf(result); + } + + @Override + protected Map> specificPropertyEntries() { + return PROPERTIES_METADATA; + } + + public Map transformProperties(Map properties) { + Map gravitinoConfig = Maps.newHashMap(); + properties.forEach( + (key, value) -> { + if (GRAVITINO_CONFIG_TO_ICEBERG.containsKey(key)) { + gravitinoConfig.put(GRAVITINO_CONFIG_TO_ICEBERG.get(key), value); + } + }); + return gravitinoConfig; + } +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergColumn.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergColumn.java new file mode 100644 index 00000000000..c678090b2c8 --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergColumn.java @@ -0,0 +1,35 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; + +import com.datastrato.gravitino.catalog.rel.BaseColumn; +import lombok.EqualsAndHashCode; + +/** Represents a column in the Iceberg column. */ +@EqualsAndHashCode(callSuper = true) +public class IcebergColumn extends BaseColumn { + + private IcebergColumn() {} + + /** A builder class for constructing IcebergColumn instances. */ + public static class Builder extends BaseColumnBuilder { + + /** + * Internal method to build a IcebergColumn instance using the provided values. + * + * @return A new IcebergColumn instance with the configured values. + */ + @Override + protected IcebergColumn internalBuild() { + IcebergColumn icebergColumn = new IcebergColumn(); + icebergColumn.name = name; + icebergColumn.comment = comment; + icebergColumn.dataType = dataType; + icebergColumn.nullable = nullable; + icebergColumn.defaultValue = defaultValue == null ? DEFAULT_VALUE_NOT_SET : defaultValue; + return icebergColumn; + } + } +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchema.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchema.java new file mode 100644 index 00000000000..464970cad3a --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchema.java @@ -0,0 +1,34 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; + +import com.datastrato.gravitino.catalog.rel.BaseSchema; +import lombok.ToString; + +/** Represents an Iceberg Schema (Database) entity in the Iceberg schema. */ +@ToString +public class IcebergSchema extends BaseSchema { + + private IcebergSchema() {} + + /** A builder class for constructing IcebergSchema instances. */ + public static class Builder extends BaseSchemaBuilder { + + @Override + protected IcebergSchema internalBuild() { + IcebergSchema icebergSchema = new IcebergSchema(); + icebergSchema.name = name; + icebergSchema.comment = + null == comment + ? (null == properties + ? null + : properties.get(IcebergSchemaPropertiesMetadata.COMMENT)) + : comment; + icebergSchema.properties = properties; + icebergSchema.auditInfo = auditInfo; + return icebergSchema; + } + } +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchemaPropertiesMetadata.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchemaPropertiesMetadata.java new file mode 100644 index 00000000000..2dad4aaf9ce --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchemaPropertiesMetadata.java @@ -0,0 +1,31 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; + +import static com.datastrato.gravitino.catalog.PropertyEntry.stringReservedPropertyEntry; + +import com.datastrato.gravitino.catalog.BasePropertiesMetadata; +import com.datastrato.gravitino.catalog.PropertyEntry; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Maps; +import java.util.List; +import java.util.Map; + +public class IcebergSchemaPropertiesMetadata extends BasePropertiesMetadata { + + public static final String COMMENT = "comment"; + private static final Map> PROPERTIES_METADATA; + + static { + List> propertyEntries = + ImmutableList.of(stringReservedPropertyEntry(COMMENT, "Schema comment", true)); + PROPERTIES_METADATA = Maps.uniqueIndex(propertyEntries, PropertyEntry::getName); + } + + @Override + protected Map> specificPropertyEntries() { + return PROPERTIES_METADATA; + } +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTable.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTable.java new file mode 100644 index 00000000000..192579baf2f --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTable.java @@ -0,0 +1,136 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; + +import static com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.IcebergTablePropertiesMetadata.DISTRIBUTION_MODE; + +import com.datastrato.gravitino.catalog.TableOperations; +import com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.converter.ConvertUtil; +import com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.converter.FromIcebergPartitionSpec; +import com.datastrato.gravitino.catalog.rel.BaseTable; +import com.datastrato.gravitino.meta.AuditInfo; +import com.datastrato.gravitino.rel.expressions.distributions.Distribution; +import com.datastrato.gravitino.rel.expressions.distributions.Distributions; +import com.datastrato.gravitino.rel.expressions.transforms.Transform; +import com.google.common.collect.Maps; +import java.util.Map; +import lombok.Getter; +import lombok.ToString; +import org.apache.iceberg.DistributionMode; +import org.apache.iceberg.Schema; +import org.apache.iceberg.TableMetadata; + +/** Represents an Iceberg Table entity in the Iceberg table. */ +@ToString +@Getter +public class IcebergTable extends BaseTable { + + /** + * A reserved property to specify the location of the table. The files of the table should be + * under this location. + */ + public static final String PROP_LOCATION = "location"; + + /** A reserved property to specify the provider of the table. */ + public static final String PROP_PROVIDER = "provider"; + + /** The default provider of the table. */ + public static final String DEFAULT_ICEBERG_PROVIDER = "iceberg"; + + public static final String ICEBERG_COMMENT_FIELD_NAME = "comment"; + + private String location; + + private IcebergTable() {} + + /** + * Creates a new IcebergTable instance from a Table and a Builder. + * + * @param table The inner Table representing the IcebergTable. + * @param tableName The name of Table. + * @return A new IcebergTable instance. + */ + public static IcebergTable fromIcebergTable(TableMetadata table, String tableName) { + Map properties = table.properties(); + Schema schema = table.schema(); + Transform[] partitionSpec = FromIcebergPartitionSpec.fromPartitionSpec(table.spec(), schema); + Distribution distribution = Distributions.NONE; + String distributionName = properties.get(DISTRIBUTION_MODE); + if (null != distributionName) { + switch (DistributionMode.fromName(distributionName)) { + case HASH: + distribution = Distributions.HASH; + break; + case RANGE: + distribution = Distributions.RANGE; + break; + default: + // do nothing + break; + } + } + IcebergColumn[] icebergColumns = + schema.columns().stream().map(ConvertUtil::fromNestedField).toArray(IcebergColumn[]::new); + return new Builder() + .withComment(table.property(IcebergTablePropertiesMetadata.COMMENT, null)) + .withLocation(table.location()) + .withProperties(properties) + .withColumns(icebergColumns) + .withName(tableName) + .withAuditInfo(AuditInfo.EMPTY) + .withPartitioning(partitionSpec) + .withDistribution(distribution) + .build(); + } + + @Override + protected TableOperations newOps() { + // todo: implement this method when we have the Iceberg table operations. + throw new UnsupportedOperationException("IcebergTable does not support TableOperations."); + } + + /** A builder class for constructing IcebergTable instances. */ + public static class Builder extends BaseTableBuilder { + + private String location; + + public Builder withLocation(String location) { + this.location = location; + return this; + } + + /** + * Internal method to build an IcebergTable instance using the provided values. + * + * @return A new IcebergTable instance with the configured values. + */ + @Override + protected IcebergTable internalBuild() { + IcebergTable icebergTable = new IcebergTable(); + icebergTable.name = name; + icebergTable.comment = comment; + icebergTable.properties = + properties != null ? Maps.newHashMap(properties) : Maps.newHashMap(); + icebergTable.auditInfo = auditInfo; + icebergTable.columns = columns; + if (null != location) { + icebergTable.location = location; + } else { + icebergTable.location = icebergTable.properties.get(PROP_LOCATION); + } + icebergTable.partitioning = partitioning; + icebergTable.distribution = distribution; + icebergTable.sortOrders = sortOrders; + if (null != comment) { + icebergTable.properties.putIfAbsent(ICEBERG_COMMENT_FIELD_NAME, comment); + } + String provider = icebergTable.properties.get(PROP_PROVIDER); + if (provider != null && !DEFAULT_ICEBERG_PROVIDER.equalsIgnoreCase(provider)) { + throw new IllegalArgumentException("Unsupported format in USING: " + provider); + } + return icebergTable; + } + } +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTablePropertiesMetadata.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTablePropertiesMetadata.java new file mode 100644 index 00000000000..7bcb156f456 --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTablePropertiesMetadata.java @@ -0,0 +1,58 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; + +import static com.datastrato.gravitino.catalog.PropertyEntry.stringImmutablePropertyEntry; +import static com.datastrato.gravitino.catalog.PropertyEntry.stringReservedPropertyEntry; + +import com.datastrato.gravitino.catalog.BasePropertiesMetadata; +import com.datastrato.gravitino.catalog.PropertyEntry; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Maps; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.TableProperties; + +public class IcebergTablePropertiesMetadata extends BasePropertiesMetadata { + public static final String COMMENT = "comment"; + public static final String CREATOR = "creator"; + public static final String LOCATION = "location"; + public static final String CURRENT_SNAPSHOT_ID = "current-snapshot-id"; + public static final String CHERRY_PICK_SNAPSHOT_ID = "cherry-pick-snapshot-id"; + public static final String SORT_ORDER = "sort-order"; + public static final String IDENTIFIER_FIELDS = "identifier-fields"; + + public static final String DISTRIBUTION_MODE = TableProperties.WRITE_DISTRIBUTION_MODE; + + private static final Map> PROPERTIES_METADATA; + + static { + List> propertyEntries = + ImmutableList.of( + stringReservedPropertyEntry(COMMENT, "The table comment", true), + stringReservedPropertyEntry(CREATOR, "The table creator", false), + stringImmutablePropertyEntry( + LOCATION, "Iceberg location for table storage", false, null, false, false), + stringReservedPropertyEntry( + CURRENT_SNAPSHOT_ID, + "The snapshot represents the current state of the table", + false), + stringReservedPropertyEntry( + CHERRY_PICK_SNAPSHOT_ID, + "Selecting a specific snapshot in a merge operation", + false), + stringReservedPropertyEntry( + SORT_ORDER, "Selecting a specific snapshot in a merge operation", false), + stringReservedPropertyEntry( + IDENTIFIER_FIELDS, "The identifier field(s) for defining the table", false), + stringReservedPropertyEntry(DISTRIBUTION_MODE, "Write distribution mode", false)); + PROPERTIES_METADATA = Maps.uniqueIndex(propertyEntries, PropertyEntry::getName); + } + + @Override + protected Map> specificPropertyEntries() { + return PROPERTIES_METADATA; + } +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/Main.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/Main.java deleted file mode 100644 index 5e2aa97673c..00000000000 --- a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/Main.java +++ /dev/null @@ -1,7 +0,0 @@ -package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; - -public class Main { - public static void main(String[] args) { - System.out.println("Hello world!"); - } -} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ConvertUtil.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ConvertUtil.java new file mode 100644 index 00000000000..2f808249992 --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ConvertUtil.java @@ -0,0 +1,87 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.converter; + +import com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.IcebergColumn; +import com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.IcebergTable; +import java.util.Arrays; +import org.apache.iceberg.Schema; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.TypeUtil; +import org.apache.iceberg.types.Types; + +public class ConvertUtil { + + /** + * Convert the Iceberg Table to the corresponding schema information in the Iceberg. + * + * @param gravitinoTable Gravitino table of Iceberg. + * @return Iceberg schema. + */ + public static Schema toIcebergSchema(IcebergTable gravitinoTable) { + com.datastrato.gravitino.rel.types.Types.StructType gravitinoStructType = + toGravitinoStructType(gravitinoTable); + Type converted = + ToIcebergTypeVisitor.visit(gravitinoStructType, new ToIcebergType(gravitinoStructType)); + return new Schema(converted.asNestedType().asStructType().fields()); + } + + /** + * Convert the Gravitino type to the Iceberg type. + * + * @param nullable Whether the field is nullable. + * @param gravitinoType Gravitino type. + * @return Iceberg type. + */ + public static Type toIcebergType( + boolean nullable, com.datastrato.gravitino.rel.types.Type gravitinoType) { + return ToIcebergTypeVisitor.visit(gravitinoType, new ToIcebergType(nullable)); + } + + /** + * Convert the nested type of Iceberg to the type of gravitino. + * + * @param type Iceberg type of field. + * @return Gravitino type. + */ + public static com.datastrato.gravitino.rel.types.Type formIcebergType(Type type) { + return TypeUtil.visit(type, new FromIcebergType()); + } + + /** + * Convert the nested field of Iceberg to the Iceberg column. + * + * @param nestedField Iceberg nested field. + * @return Gravitino iceberg column + */ + public static IcebergColumn fromNestedField(Types.NestedField nestedField) { + return new IcebergColumn.Builder() + .withName(nestedField.name()) + .withNullable(nestedField.isOptional()) + .withComment(nestedField.doc()) + .withType(ConvertUtil.formIcebergType(nestedField.type())) + .build(); + } + + /** + * Convert the Gravitino iceberg table to the Gravitino StructType + * + * @param icebergTable Gravitino iceberg table + * @return Gravitino StructType + */ + private static com.datastrato.gravitino.rel.types.Types.StructType toGravitinoStructType( + IcebergTable icebergTable) { + com.datastrato.gravitino.rel.types.Types.StructType.Field[] fields = + Arrays.stream(icebergTable.columns()) + .map( + column -> + com.datastrato.gravitino.rel.types.Types.StructType.Field.of( + column.name(), column.dataType(), column.nullable(), column.comment())) + .toArray(com.datastrato.gravitino.rel.types.Types.StructType.Field[]::new); + return com.datastrato.gravitino.rel.types.Types.StructType.of(fields); + } + + private ConvertUtil() {} +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/FromIcebergPartitionSpec.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/FromIcebergPartitionSpec.java new file mode 100644 index 00000000000..277b295ce3e --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/FromIcebergPartitionSpec.java @@ -0,0 +1,97 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.converter; + +import com.datastrato.gravitino.rel.expressions.transforms.Transform; +import com.datastrato.gravitino.rel.expressions.transforms.Transforms; +import com.google.common.collect.Lists; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.PartitionField; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; +import org.apache.iceberg.transforms.PartitionSpecVisitor; + +/** + * Convert IcebergTransform to GravitinoTransform. + * + *

Referred from + * core/src/main/java/org/apache/iceberg/spark/Spark3Util/SpecTransformToSparkTransform.java + */ +public class FromIcebergPartitionSpec implements PartitionSpecVisitor { + + private final Map idToName = new HashMap<>(); + + public FromIcebergPartitionSpec() {} + + @Override + public Transform identity(String sourceName, int sourceId) { + return Transforms.identity(idToName.get(sourceId)); + } + + @Override + public Transform bucket(String sourceName, int sourceId, int numBuckets) { + return Transforms.bucket(numBuckets, new String[] {idToName.get(sourceId)}); + } + + @Override + public Transform truncate(String sourceName, int sourceId, int width) { + return Transforms.truncate(width, idToName.get(sourceId)); + } + + @Override + public Transform year(String sourceName, int sourceId) { + return Transforms.year(idToName.get(sourceId)); + } + + @Override + public Transform month(String sourceName, int sourceId) { + return Transforms.month(idToName.get(sourceId)); + } + + @Override + public Transform day(String sourceName, int sourceId) { + return Transforms.day(idToName.get(sourceId)); + } + + @Override + public Transform hour(String sourceName, int sourceId) { + return Transforms.hour(idToName.get(sourceId)); + } + + @Override + public Transform alwaysNull(int fieldId, String sourceName, int sourceId) { + // do nothing for alwaysNull, it doesn't need to be converted to a transform + return null; + } + + @Override + public Transform unknown(int fieldId, String sourceName, int sourceId, String transform) { + throw new UnsupportedOperationException("Unsupported Transform conversion type."); + } + + /** + * Transform assembled into gravitino. + * + * @param partitionSpec + * @param schema + * @return array of transforms for partition fields. + */ + public static Transform[] fromPartitionSpec(PartitionSpec partitionSpec, Schema schema) { + // todo + FromIcebergPartitionSpec visitor = new FromIcebergPartitionSpec(); + List transforms = Lists.newArrayList(); + List fields = partitionSpec.fields(); + + for (PartitionField field : fields) { + Transform transform = PartitionSpecVisitor.visit(schema, field, visitor); + if (transform != null) { + transforms.add(transform); + } + } + return transforms.toArray(new Transform[0]); + } +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/FromIcebergType.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/FromIcebergType.java new file mode 100644 index 00000000000..2be4073adba --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/FromIcebergType.java @@ -0,0 +1,104 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.converter; + +import com.datastrato.gravitino.rel.types.Type; +import java.util.ArrayList; +import java.util.List; +import org.apache.iceberg.Schema; +import org.apache.iceberg.types.TypeUtil; +import org.apache.iceberg.types.Types; + +/** + * Implement a type converter to convert types in Iceberg. + * + *

Referred from core/src/main/java/org/apache/iceberg/spark/TypeToSparkType.java + */ +public class FromIcebergType extends TypeUtil.SchemaVisitor { + + public FromIcebergType() {} + + @Override + public Type schema(Schema schema, Type structType) { + throw new UnsupportedOperationException("Data conversion of schema type is not supported"); + } + + @Override + public Type struct(Types.StructType struct, List fieldResults) { + List fieldsList = new ArrayList<>(); + List originalFields = struct.fields(); + + for (int i = 0; i < originalFields.size(); i++) { + Types.NestedField nestedField = originalFields.get(i); + fieldsList.add( + com.datastrato.gravitino.rel.types.Types.StructType.Field.of( + nestedField.name(), + fieldResults.get(i), + nestedField.isOptional(), + nestedField.doc())); + } + return com.datastrato.gravitino.rel.types.Types.StructType.of( + fieldsList.toArray(new com.datastrato.gravitino.rel.types.Types.StructType.Field[0])); + } + + @Override + public Type field(Types.NestedField field, Type fieldResult) { + return fieldResult; + } + + @Override + public Type list(Types.ListType list, Type elementResult) { + return com.datastrato.gravitino.rel.types.Types.ListType.of( + elementResult, list.isElementOptional()); + } + + @Override + public Type map(Types.MapType map, Type keyResult, Type valueResult) { + return com.datastrato.gravitino.rel.types.Types.MapType.of( + keyResult, valueResult, map.isValueOptional()); + } + + @Override + public Type primitive(org.apache.iceberg.types.Type.PrimitiveType primitive) { + switch (primitive.typeId()) { + case BOOLEAN: + return com.datastrato.gravitino.rel.types.Types.BooleanType.get(); + case INTEGER: + return com.datastrato.gravitino.rel.types.Types.IntegerType.get(); + case LONG: + return com.datastrato.gravitino.rel.types.Types.LongType.get(); + case FLOAT: + return com.datastrato.gravitino.rel.types.Types.FloatType.get(); + case DOUBLE: + return com.datastrato.gravitino.rel.types.Types.DoubleType.get(); + case DATE: + return com.datastrato.gravitino.rel.types.Types.DateType.get(); + case TIME: + return com.datastrato.gravitino.rel.types.Types.TimeType.get(); + case TIMESTAMP: + Types.TimestampType ts = (Types.TimestampType) primitive; + if (ts.shouldAdjustToUTC()) { + return com.datastrato.gravitino.rel.types.Types.TimestampType.withoutTimeZone(); + } else { + return com.datastrato.gravitino.rel.types.Types.TimestampType.withTimeZone(); + } + case STRING: + return com.datastrato.gravitino.rel.types.Types.StringType.get(); + case UUID: + return com.datastrato.gravitino.rel.types.Types.UUIDType.get(); + case FIXED: + Types.FixedType fixedType = (Types.FixedType) primitive; + return com.datastrato.gravitino.rel.types.Types.FixedType.of(fixedType.length()); + case BINARY: + return com.datastrato.gravitino.rel.types.Types.BinaryType.get(); + case DECIMAL: + Types.DecimalType decimal = (Types.DecimalType) primitive; + return com.datastrato.gravitino.rel.types.Types.DecimalType.of( + decimal.precision(), decimal.scale()); + default: + return com.datastrato.gravitino.rel.types.Types.UnparsedType.of(primitive.typeId().name()); + } + } +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergPartitionSpec.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergPartitionSpec.java new file mode 100644 index 00000000000..2a86763a5c2 --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergPartitionSpec.java @@ -0,0 +1,74 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.converter; + +import com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.IcebergTable; +import com.datastrato.gravitino.rel.expressions.transforms.Transform; +import com.datastrato.gravitino.rel.expressions.transforms.Transforms; +import com.google.common.base.Preconditions; +import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.Schema; + +/** Convert Gravitino Transforms to Iceberg PartitionSpec. */ +public class ToIcebergPartitionSpec { + + private static final String DOT = "."; + + /** + * Convert iceberg table to iceberg partition spec through gravitino. + * + * @param icebergTable the iceberg table. + * @return a PartitionSpec + */ + public static PartitionSpec toPartitionSpec(IcebergTable icebergTable) { + Schema schema = ConvertUtil.toIcebergSchema(icebergTable); + return ToIcebergPartitionSpec.toPartitionSpec(schema, icebergTable.partitioning()); + } + + /** + * Converts gravitino transforms into a {@link PartitionSpec}. + * + * @param schema the table schema + * @param partitioning Gravitino Transforms + * @return a PartitionSpec + */ + public static PartitionSpec toPartitionSpec(Schema schema, Transform[] partitioning) { + if (partitioning == null || partitioning.length == 0) { + return PartitionSpec.unpartitioned(); + } + + PartitionSpec.Builder builder = PartitionSpec.builderFor(schema); + for (Transform transform : partitioning) { + if (transform instanceof Transforms.IdentityTransform) { + String[] fieldName = ((Transforms.IdentityTransform) transform).fieldName(); + String colName = String.join(DOT, fieldName); + builder.identity(colName); + } else if (transform instanceof Transforms.BucketTransform) { + String[][] fieldNames = ((Transforms.BucketTransform) transform).fieldNames(); + Preconditions.checkArgument( + fieldNames.length == 1, "Iceberg partition does not support multi fields", transform); + builder.bucket( + String.join(DOT, fieldNames[0]), ((Transforms.BucketTransform) transform).numBuckets()); + } else if (transform instanceof Transforms.TruncateTransform) { + Transforms.TruncateTransform truncateTransform = (Transforms.TruncateTransform) transform; + builder.truncate( + String.join(DOT, truncateTransform.fieldName()), truncateTransform.width()); + } else if (transform instanceof Transforms.YearTransform) { + builder.year(String.join(DOT, ((Transforms.YearTransform) transform).fieldName())); + } else if (transform instanceof Transforms.MonthTransform) { + builder.month(String.join(DOT, ((Transforms.MonthTransform) transform).fieldName())); + } else if (transform instanceof Transforms.DayTransform) { + builder.day(String.join(DOT, ((Transforms.DayTransform) transform).fieldName())); + } else if (transform instanceof Transforms.HourTransform) { + builder.hour(String.join(DOT, ((Transforms.HourTransform) transform).fieldName())); + } else { + throw new UnsupportedOperationException("Transform is not supported: " + transform.name()); + } + } + return builder.build(); + } + + private ToIcebergPartitionSpec() {} +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergType.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergType.java new file mode 100644 index 00000000000..2e60defd567 --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergType.java @@ -0,0 +1,132 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.converter; + +import com.google.common.collect.Lists; +import java.util.List; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; + +/** + * Convert Gravitino types to iceberg types. + * + *

Referred from core/src/main/java/org/apache/iceberg/spark/SparkTypeToType.java + */ +public class ToIcebergType extends ToIcebergTypeVisitor { + private final com.datastrato.gravitino.rel.types.Types.StructType root; + private int nextId = 0; + private boolean nullable; + + public ToIcebergType(boolean nullable) { + this.root = null; + this.nullable = nullable; + } + + public ToIcebergType(com.datastrato.gravitino.rel.types.Types.StructType root) { + this.root = root; + // the root struct's fields use the first ids + this.nextId = root.fields().length; + } + + private int getNextId() { + return nextId++; + } + + @Override + public Type struct(com.datastrato.gravitino.rel.types.Types.StructType struct, List types) { + com.datastrato.gravitino.rel.types.Types.StructType.Field[] fields = struct.fields(); + List newFields = Lists.newArrayListWithExpectedSize(fields.length); + boolean isRoot = root == struct; + for (int i = 0; i < fields.length; i += 1) { + com.datastrato.gravitino.rel.types.Types.StructType.Field field = fields[i]; + Type type = types.get(i); + + int id; + if (isRoot) { + // for new conversions, use ordinals for ids in the root struct + id = i; + } else { + id = getNextId(); + } + + String doc = field.comment(); + + if (field.nullable()) { + newFields.add(Types.NestedField.optional(id, field.name(), type, doc)); + } else { + newFields.add(Types.NestedField.required(id, field.name(), type, doc)); + } + } + return Types.StructType.of(newFields); + } + + @Override + public Type field( + com.datastrato.gravitino.rel.types.Types.StructType.Field field, Type typeResult) { + return typeResult; + } + + @Override + public Type array(com.datastrato.gravitino.rel.types.Types.ListType array, Type elementType) { + if (nullable) { + return Types.ListType.ofOptional(getNextId(), elementType); + } else { + return Types.ListType.ofRequired(getNextId(), elementType); + } + } + + @Override + public Type map( + com.datastrato.gravitino.rel.types.Types.MapType map, Type keyType, Type valueType) { + if (nullable) { + return Types.MapType.ofOptional(getNextId(), getNextId(), keyType, valueType); + } else { + return Types.MapType.ofRequired(getNextId(), getNextId(), keyType, valueType); + } + } + + @Override + public Type atomic(com.datastrato.gravitino.rel.types.Type.PrimitiveType primitive) { + if (primitive instanceof com.datastrato.gravitino.rel.types.Types.BooleanType) { + return Types.BooleanType.get(); + } else if (primitive instanceof com.datastrato.gravitino.rel.types.Types.ByteType + || primitive instanceof com.datastrato.gravitino.rel.types.Types.ShortType) { + throw new IllegalArgumentException( + "Iceberg do not support Byte and Short Type, use Integer instead"); + } else if (primitive instanceof com.datastrato.gravitino.rel.types.Types.IntegerType) { + return Types.IntegerType.get(); + } else if (primitive instanceof com.datastrato.gravitino.rel.types.Types.LongType) { + return Types.LongType.get(); + } else if (primitive instanceof com.datastrato.gravitino.rel.types.Types.FloatType) { + return Types.FloatType.get(); + } else if (primitive instanceof com.datastrato.gravitino.rel.types.Types.DoubleType) { + return Types.DoubleType.get(); + } else if (primitive instanceof com.datastrato.gravitino.rel.types.Types.StringType) { + return Types.StringType.get(); + } else if (primitive instanceof com.datastrato.gravitino.rel.types.Types.DateType) { + return Types.DateType.get(); + } else if (primitive instanceof com.datastrato.gravitino.rel.types.Types.TimeType) { + return Types.TimeType.get(); + } else if (primitive instanceof com.datastrato.gravitino.rel.types.Types.TimestampType) { + if (((com.datastrato.gravitino.rel.types.Types.TimestampType) primitive).hasTimeZone()) { + return Types.TimestampType.withZone(); + } else { + return Types.TimestampType.withoutZone(); + } + } else if (primitive instanceof com.datastrato.gravitino.rel.types.Types.DecimalType) { + return Types.DecimalType.of( + ((com.datastrato.gravitino.rel.types.Types.DecimalType) primitive).precision(), + ((com.datastrato.gravitino.rel.types.Types.DecimalType) primitive).scale()); + } else if (primitive instanceof com.datastrato.gravitino.rel.types.Types.FixedType) { + return Types.FixedType.ofLength( + ((com.datastrato.gravitino.rel.types.Types.FixedType) primitive).length()); + } else if (primitive instanceof com.datastrato.gravitino.rel.types.Types.BinaryType) { + return Types.BinaryType.get(); + } else if (primitive instanceof com.datastrato.gravitino.rel.types.Types.UUIDType) { + return Types.UUIDType.get(); + } + throw new UnsupportedOperationException("Not a supported type: " + primitive.toString()); + } +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergTypeVisitor.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergTypeVisitor.java new file mode 100644 index 00000000000..39f5570e61d --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergTypeVisitor.java @@ -0,0 +1,70 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.converter; + +import com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.IcebergTable; +import com.datastrato.gravitino.rel.types.Type; +import com.datastrato.gravitino.rel.types.Types; +import com.google.common.collect.Lists; +import java.util.List; + +/** + * Type converter belonging to gravitino. + * + *

Referred from core/src/main/java/org/apache/iceberg/spark/SparkTypeVisitor.java + */ +public class ToIcebergTypeVisitor { + + /** + * Traverse the Gravitino data type and convert the fields into Iceberg fields. + * + * @param type Gravitino a data type in a gravitino. + * @param visitor Visitor of Iceberg type + * @param Iceberg type + * @return Iceberg type + */ + public static T visit(Type type, ToIcebergTypeVisitor visitor) { + if (type instanceof Types.MapType) { + Types.MapType map = (Types.MapType) type; + return visitor.map(map, visit(map.keyType(), visitor), visit(map.valueType(), visitor)); + } else if (type instanceof Types.ListType) { + Types.ListType list = (Types.ListType) type; + return visitor.array(list, visit(list.elementType(), visitor)); + } else if (type instanceof Types.StructType) { + Types.StructType.Field[] fields = ((Types.StructType) type).fields(); + List fieldResults = Lists.newArrayListWithExpectedSize(fields.length); + for (Types.StructType.Field field : fields) { + fieldResults.add(visitor.field(field, visit(field.type(), visitor))); + } + return visitor.struct((Types.StructType) type, fieldResults); + } else { + return visitor.atomic((Type.PrimitiveType) type); + } + } + + public T struct(IcebergTable struct, List fieldResults) { + throw new UnsupportedOperationException(); + } + + public T struct(Types.StructType struct, List fieldResults) { + throw new UnsupportedOperationException(); + } + + public T field(Types.StructType.Field field, T typeResult) { + throw new UnsupportedOperationException(); + } + + public T array(Types.ListType array, T elementResult) { + throw new UnsupportedOperationException(); + } + + public T map(Types.MapType map, T keyResult, T valueResult) { + throw new UnsupportedOperationException(); + } + + public T atomic(Type.PrimitiveType primitive) { + throw new UnsupportedOperationException(); + } +} diff --git a/catalogs/catalog-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/lakehouse/iceberg/IcebergCatalog.java b/catalogs/catalog-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/lakehouse/iceberg/IcebergCatalog.java index 8ffc98491f6..6419456de34 100644 --- a/catalogs/catalog-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/lakehouse/iceberg/IcebergCatalog.java +++ b/catalogs/catalog-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/lakehouse/iceberg/IcebergCatalog.java @@ -16,7 +16,7 @@ public class IcebergCatalog extends BaseCatalog { /** @return The short name of the catalog. */ @Override public String shortName() { - return "lakehouse-iceberg"; + return "com-lakehouse-iceberg"; } /** diff --git a/docs/open-api/catalogs.yaml b/docs/open-api/catalogs.yaml index de4bc2ae8ca..ffb0a0e2e10 100644 --- a/docs/open-api/catalogs.yaml +++ b/docs/open-api/catalogs.yaml @@ -161,6 +161,7 @@ components: enum: - hive - lakehouse-iceberg + - com-lakehouse-iceberg - jdbc-mysql - jdbc-postgresql comment: @@ -199,6 +200,7 @@ components: enum: - hive - lakehouse-iceberg + - com-lakehouse-iceberg - jdbc-mysql - jdbc-postgresql comment: From b821320830ed939c42636a5eec46c2dce4f38a8a Mon Sep 17 00:00:00 2001 From: teo Date: Tue, 19 Mar 2024 21:37:13 +0800 Subject: [PATCH 09/11] update iceberg catalog --- bin/gravitino-debug.sh | 178 ++++++++++++++++++ .../lakehouse/iceberg/IcebergCatalog.java | 2 +- .../lakehouse/iceberg/IcebergCatalog.java | 2 +- docs/open-api/catalogs.yaml | 4 +- 4 files changed, 182 insertions(+), 4 deletions(-) create mode 100755 bin/gravitino-debug.sh diff --git a/bin/gravitino-debug.sh b/bin/gravitino-debug.sh new file mode 100755 index 00000000000..22d4414db4d --- /dev/null +++ b/bin/gravitino-debug.sh @@ -0,0 +1,178 @@ +#!/bin/bash +# +# Copyright 2023 Datastrato Pvt Ltd. +# This software is licensed under the Apache License version 2. +# +#set -ex +USAGE="-e Usage: bin/gravitino.sh [--config ]\n\t + {start|stop|restart|status}" + +if [[ "$1" == "--config" ]]; then + shift + conf_dir="$1" + if [[ ! -d "${conf_dir}" ]]; then + echo "ERROR : ${conf_dir} is not a directory" + echo ${USAGE} + exit 1 + else + export GRAVITINO_CONF_DIR="${conf_dir}" + fi + shift +fi + +bin="$(dirname "${BASH_SOURCE-$0}")" +bin="$(cd "${bin}">/dev/null; pwd)" + +. "${bin}/common.sh" + +check_java_version + +function check_process_status() { + local pid=$(found_gravitino_server_pid) + + if [[ -z "${pid}" ]]; then + echo "Gravitino Server is not running" + else + echo "Gravitino Server is running[PID:$pid]" + fi +} + +function found_gravitino_server_pid() { + process_name='GravitinoServer'; + RUNNING_PIDS=$(ps x | grep ${process_name} | grep -v grep | awk '{print $1}'); + + if [[ -z "${RUNNING_PIDS}" ]]; then + return + fi + + if ! kill -0 ${RUNNING_PIDS} > /dev/null 2>&1; then + echo "Gravitino Server running but process is dead" + fi + + echo "${RUNNING_PIDS}" +} + +function wait_for_gravitino_server_to_die() { + timeout=10 + timeoutTime=$(date "+%s") + let "timeoutTime+=$timeout" + currentTime=$(date "+%s") + forceKill=1 + + while [[ $currentTime -lt $timeoutTime ]]; do + local pid=$(found_gravitino_server_pid) + if [[ -z "${pid}" ]]; then + forceKill=0 + break + fi + + $(kill ${pid} > /dev/null 2> /dev/null) + if kill -0 ${pid} > /dev/null 2>&1; then + sleep 3 + else + forceKill=0 + break + fi + currentTime=$(date "+%s") + done + + if [[ forceKill -ne 0 ]]; then + $(kill -9 ${pid} > /dev/null 2> /dev/null) + fi +} + +function start() { + local pid=$(found_gravitino_server_pid) + + if [[ ! -z "${pid}" ]]; then + if kill -0 ${pid} >/dev/null 2>&1; then + echo "Gravitino Server is already running" + return 0; + fi + fi + + if [[ ! -d "${GRAVITINO_LOG_DIR}" ]]; then + echo "Log dir doesn't exist, create ${GRAVITINO_LOG_DIR}" + mkdir -p "${GRAVITINO_LOG_DIR}" + fi + + nohup ${JAVA_RUNNER} ${JAVA_OPTS} ${GRAVITINO_DEBUG_OPTS} -cp ${GRAVITINO_CLASSPATH} ${GRAVITINO_SERVER_NAME} >> "${GRAVITINO_OUTFILE}" 2>&1 & + + pid=$! + if [[ -z "${pid}" ]]; then + echo "Gravitino Server start error!" + return 1; + else + echo "Gravitino Server start success!" + fi + + sleep 2 + check_process_status +} + +function stop() { + local pid + + pid=$(found_gravitino_server_pid) + + if [[ -z "${pid}" ]]; then + echo "Gravitino Server is not running" + else + wait_for_gravitino_server_to_die + echo "Gravitino Server stop" + fi +} + +HOSTNAME=$(hostname) +GRAVITINO_OUTFILE="${GRAVITINO_LOG_DIR}/gravitino-server.out" +GRAVITINO_SERVER_NAME=com.datastrato.gravitino.server.GravitinoServer + +JAVA_OPTS+=" -Dfile.encoding=UTF-8" +JAVA_OPTS+=" -Dlog4j2.configurationFile=file://${GRAVITINO_CONF_DIR}/log4j2.properties" +JAVA_OPTS+=" -Dgravitino.log.path=${GRAVITINO_LOG_DIR} ${GRAVITINO_MEM}" +JAVA_OPTS+=" -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=7052" +if [ "$JVM_VERSION" -eq 17 ]; then + JAVA_OPTS+=" -XX:+IgnoreUnrecognizedVMOptions" + JAVA_OPTS+=" --add-opens java.base/java.io=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.lang.invoke=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.lang.reflect=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.lang=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.math=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.net=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.nio=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.text=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.time=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.util.concurrent.atomic=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.util.concurrent=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.util.regex=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/java.util=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/jdk.internal.ref=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/jdk.internal.reflect=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.sql/java.sql=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/sun.util.calendar=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/sun.nio.ch=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/sun.nio.cs=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/sun.security.action=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.base/sun.util.calendar=ALL-UNNAMED" + JAVA_OPTS+=" --add-opens java.security.jgss/sun.security.krb5=ALL-UNNAMED" +fi + +addJarInDir "${GRAVITINO_HOME}/libs" + +case "${1}" in + start) + start + ;; + stop) + stop + ;; + restart) + stop + start + ;; + status) + check_process_status + ;; + *) + echo ${USAGE} +esac diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalog.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalog.java index fd87bf9d747..b07d355ecca 100644 --- a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalog.java +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalog.java @@ -16,7 +16,7 @@ public class IcebergCatalog extends BaseCatalog { /** @return The short name of the catalog. */ @Override public String shortName() { - return "lakehouse-iceberg"; + return "bili-lakehouse-iceberg"; } /** diff --git a/catalogs/catalog-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/lakehouse/iceberg/IcebergCatalog.java b/catalogs/catalog-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/lakehouse/iceberg/IcebergCatalog.java index 6419456de34..8ffc98491f6 100644 --- a/catalogs/catalog-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/lakehouse/iceberg/IcebergCatalog.java +++ b/catalogs/catalog-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/lakehouse/iceberg/IcebergCatalog.java @@ -16,7 +16,7 @@ public class IcebergCatalog extends BaseCatalog { /** @return The short name of the catalog. */ @Override public String shortName() { - return "com-lakehouse-iceberg"; + return "lakehouse-iceberg"; } /** diff --git a/docs/open-api/catalogs.yaml b/docs/open-api/catalogs.yaml index ffb0a0e2e10..87abbf79a1a 100644 --- a/docs/open-api/catalogs.yaml +++ b/docs/open-api/catalogs.yaml @@ -161,7 +161,7 @@ components: enum: - hive - lakehouse-iceberg - - com-lakehouse-iceberg + - bili-lakehouse-iceberg - jdbc-mysql - jdbc-postgresql comment: @@ -200,7 +200,7 @@ components: enum: - hive - lakehouse-iceberg - - com-lakehouse-iceberg + - bili-lakehouse-iceberg - jdbc-mysql - jdbc-postgresql comment: From 2745b2605a2ac15b53df6cd446e1dc3b4eff593a Mon Sep 17 00:00:00 2001 From: teo Date: Wed, 20 Mar 2024 19:46:10 +0800 Subject: [PATCH 10/11] update to new comm --- build.gradle.kts | 1 + catalogs/bundled-catalog/build.gradle.kts | 1 + .../build.gradle.kts | 79 ++++++++++++-- .../lakehouse/iceberg/IcebergCatalog.java | 7 +- .../iceberg/IcebergCatalogOperations.java | 31 +++--- .../IcebergCatalogPropertiesMetadata.java | 8 +- .../bili/lakehouse/iceberg/IcebergColumn.java | 14 ++- .../bili/lakehouse/iceberg/IcebergConfig.java | 103 ++++++++++++++++++ .../bili/lakehouse/iceberg/IcebergSchema.java | 13 ++- .../IcebergSchemaPropertiesMetadata.java | 6 +- .../bili/lakehouse/iceberg/IcebergTable.java | 52 ++++++++- .../IcebergTablePropertiesMetadata.java | 8 +- .../iceberg/converter/ConvertUtil.java | 2 +- .../iceberg/converter/ToIcebergType.java | 2 + .../iceberg/utils/IcebergCatalogUtil.java | 47 ++++++++ integration-test/build.gradle.kts | 1 + settings.gradle.kts | 1 - 17 files changed, 327 insertions(+), 49 deletions(-) create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergConfig.java create mode 100644 catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/utils/IcebergCatalogUtil.java diff --git a/build.gradle.kts b/build.gradle.kts index c57c34299b9..9f057740fcc 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -672,6 +672,7 @@ tasks { dependsOn( ":catalogs:catalog-hive:copyLibAndConfig", ":catalogs:catalog-lakehouse-iceberg:copyLibAndConfig", + ":catalogs:catalog-bili-lakehouse-iceberg:copyLibAndConfig", ":catalogs:catalog-jdbc-doris:copyLibAndConfig", ":catalogs:catalog-jdbc-mysql:copyLibAndConfig", ":catalogs:catalog-jdbc-postgresql:copyLibAndConfig", diff --git a/catalogs/bundled-catalog/build.gradle.kts b/catalogs/bundled-catalog/build.gradle.kts index 4adfd619e60..aa93321172d 100644 --- a/catalogs/bundled-catalog/build.gradle.kts +++ b/catalogs/bundled-catalog/build.gradle.kts @@ -81,6 +81,7 @@ tasks.jar { tasks.compileJava { dependsOn(":catalogs:catalog-jdbc-postgresql:runtimeJars") dependsOn(":catalogs:catalog-lakehouse-iceberg:runtimeJars") + dependsOn(":catalogs:catalog-bili-lakehouse-iceberg:runtimeJars") dependsOn(":catalogs:catalog-jdbc-mysql:runtimeJars") dependsOn(":catalogs:catalog-hive:runtimeJars") dependsOn(":catalogs:catalog-hadoop:runtimeJars") diff --git a/catalogs/catalog-bili-lakehouse-iceberg/build.gradle.kts b/catalogs/catalog-bili-lakehouse-iceberg/build.gradle.kts index 59bc214d66a..b611978e663 100644 --- a/catalogs/catalog-bili-lakehouse-iceberg/build.gradle.kts +++ b/catalogs/catalog-bili-lakehouse-iceberg/build.gradle.kts @@ -2,7 +2,7 @@ * Copyright 2023 Datastrato Pvt Ltd. * This software is licensed under the Apache License version 2. */ -description = "catalog-bili-lakehouse-iceberg" +description = "bili-catalog-lakehouse-iceberg" plugins { `maven-publish` @@ -10,6 +10,10 @@ plugins { id("idea") } +val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extra["defaultScalaVersion"].toString() +val sparkVersion: String = libs.versions.spark.get() +val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get() + dependencies { implementation(project(":api")) implementation(project(":common")) @@ -22,11 +26,6 @@ dependencies { implementation(libs.commons.io) implementation(libs.commons.lang3) implementation(libs.guava) - implementation(libs.jackson.annotations) - implementation(libs.jackson.databind) - implementation(libs.jackson.datatype.jdk8) - implementation(libs.jackson.datatype.jsr310) - implementation(libs.sqlite.jdbc) implementation("org.apache.iceberg:iceberg-sdk:0.13.2-bili-0.4-SNAPSHOT") implementation(libs.hive2.metastore) { @@ -50,11 +49,24 @@ dependencies { exclude("org.pentaho") // missing dependency exclude("org.slf4j", "slf4j-log4j12") exclude("com.zaxxer", "HikariCP") + exclude("com.sun.jersey", "jersey-server") } + implementation(libs.jackson.annotations) + implementation(libs.jackson.databind) + implementation(libs.jackson.datatype.jdk8) + implementation(libs.jackson.datatype.jsr310) + implementation(libs.sqlite.jdbc) annotationProcessor(libs.lombok) + compileOnly(libs.lombok) + testImplementation(project(":catalogs:catalog-jdbc-common", "testArtifacts")) + testImplementation(project(":clients:client-java")) + testImplementation(project(":integration-test-common", "testArtifacts")) + testImplementation(project(":server")) + testImplementation(project(":server-common")) + implementation(libs.hadoop2.common) { exclude("com.github.spotbugs") } @@ -62,6 +74,17 @@ dependencies { implementation(libs.hadoop2.mapreduce.client.core) implementation(libs.metrics.jersey2) + testImplementation("org.scala-lang.modules:scala-collection-compat_$scalaVersion:$scalaCollectionCompatVersion") + testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") + testImplementation("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") { + exclude("org.apache.avro") + exclude("org.apache.hadoop") + exclude("org.apache.zookeeper") + exclude("io.dropwizard.metrics") + exclude("org.rocksdb") + } + + testImplementation(libs.bundles.log4j) testImplementation(libs.jersey.test.framework.core) { exclude(group = "org.junit.jupiter") } @@ -71,18 +94,27 @@ dependencies { testImplementation(libs.junit.jupiter.api) testImplementation(libs.junit.jupiter.params) testImplementation(libs.mockito.core) + // For test TestMultipleJDBCLoad, it was depended on testcontainers.mysql and testcontainers.postgresql) + testImplementation(libs.mysql.driver) + testImplementation(libs.postgresql.driver) + + testImplementation(libs.slf4j.api) + testImplementation(libs.testcontainers) + testImplementation(libs.testcontainers.mysql) + testImplementation(libs.testcontainers.postgresql) testRuntimeOnly(libs.junit.jupiter.engine) } tasks { - val copyDepends by registering(Copy::class) { + val runtimeJars by registering(Copy::class) { from(configurations.runtimeClasspath) - into("build/libs_all") + into("build/libs") } + val copyCatalogLibs by registering(Copy::class) { - dependsOn(copyDepends, "build") - from("build/libs_all", "build/libs") + dependsOn("jar", "runtimeJars") + from("build/libs") into("$rootDir/distribution/package/catalogs/bili-lakehouse-iceberg/libs") } @@ -111,3 +143,30 @@ tasks { dependsOn(copyCatalogLibs, copyCatalogConfig) } } + +tasks.test { + val skipUTs = project.hasProperty("skipTests") + if (skipUTs) { + // Only run integration tests + include("**/integration/**") + } + + val skipITs = project.hasProperty("skipITs") + if (skipITs) { + // Exclude integration tests + exclude("**/integration/**") + } else { + dependsOn(tasks.jar) + + doFirst { + environment("GRAVITINO_CI_HIVE_DOCKER_IMAGE", "datastrato/gravitino-ci-hive:0.1.9") + } + + val init = project.extra.get("initIntegrationTest") as (Test) -> Unit + init(this) + } +} + +tasks.clean { + delete("spark-warehouse") +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalog.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalog.java index b07d355ecca..f3f78b2923a 100644 --- a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalog.java +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalog.java @@ -4,8 +4,8 @@ */ package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; -import com.datastrato.gravitino.catalog.BaseCatalog; -import com.datastrato.gravitino.catalog.CatalogOperations; +import com.datastrato.gravitino.connector.BaseCatalog; +import com.datastrato.gravitino.connector.CatalogOperations; import com.datastrato.gravitino.rel.SupportsSchemas; import com.datastrato.gravitino.rel.TableCatalog; import java.util.Map; @@ -27,8 +27,7 @@ public String shortName() { */ @Override protected CatalogOperations newOps(Map config) { - IcebergCatalogOperations ops = new IcebergCatalogOperations(entity()); - ops.initialize(config); + IcebergCatalogOperations ops = new IcebergCatalogOperations(); return ops; } diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogOperations.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogOperations.java index 337d1d6a5e2..d6b54c0872f 100644 --- a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogOperations.java +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogOperations.java @@ -4,7 +4,7 @@ */ package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; -import static com.datastrato.gravitino.catalog.BaseCatalog.CATALOG_BYPASS_PREFIX; +import static com.datastrato.gravitino.connector.BaseCatalog.CATALOG_BYPASS_PREFIX; import static com.datastrato.gravitino.utils.OneMetaConstants.CORE_SITE_PATH; import static com.datastrato.gravitino.utils.OneMetaConstants.HDFS_SITE_PATH; import static com.datastrato.gravitino.utils.OneMetaConstants.HIVE_SITE_PATH; @@ -12,15 +12,15 @@ import com.datastrato.gravitino.NameIdentifier; import com.datastrato.gravitino.Namespace; -import com.datastrato.gravitino.catalog.CatalogOperations; -import com.datastrato.gravitino.catalog.PropertiesMetadata; +import com.datastrato.gravitino.connector.CatalogInfo; +import com.datastrato.gravitino.connector.CatalogOperations; +import com.datastrato.gravitino.connector.PropertiesMetadata; import com.datastrato.gravitino.exceptions.NoSuchCatalogException; import com.datastrato.gravitino.exceptions.NoSuchSchemaException; import com.datastrato.gravitino.exceptions.NoSuchTableException; import com.datastrato.gravitino.exceptions.NonEmptySchemaException; import com.datastrato.gravitino.exceptions.SchemaAlreadyExistsException; import com.datastrato.gravitino.exceptions.TableAlreadyExistsException; -import com.datastrato.gravitino.meta.CatalogEntity; import com.datastrato.gravitino.rel.Column; import com.datastrato.gravitino.rel.SchemaChange; import com.datastrato.gravitino.rel.SupportsSchemas; @@ -57,19 +57,10 @@ public class IcebergCatalogOperations implements CatalogOperations, SupportsSche private IcebergSchemaPropertiesMetadata icebergSchemaPropertiesMetadata; - private final CatalogEntity entity; + private CatalogInfo info; private Configuration icebergSdkConf = null; - /** - * Constructs a new instance of IcebergCatalogOperations. - * - * @param entity The catalog entity associated with this operations instance. - */ - public IcebergCatalogOperations(CatalogEntity entity) { - this.entity = entity; - } - /** * Initializes the Iceberg catalog operations with the provided configuration. * @@ -77,7 +68,8 @@ public IcebergCatalogOperations(CatalogEntity entity) { * @throws RuntimeException if initialization fails. */ @Override - public void initialize(Map conf) throws RuntimeException { + public void initialize(Map conf, CatalogInfo info) throws RuntimeException { + this.info = info; // Key format like gravitino.bypass.a.b Map prefixMap = MapUtils.getPrefixMap(conf, CATALOG_BYPASS_PREFIX); @@ -89,9 +81,10 @@ public void initialize(Map conf) throws RuntimeException { Map resultConf = Maps.newHashMap(prefixMap); resultConf.putAll(gravitinoConfig); + IcebergConfig icebergConfig = new IcebergConfig(resultConf); + this.icebergTablePropertiesMetadata = new IcebergTablePropertiesMetadata(); this.icebergSchemaPropertiesMetadata = new IcebergSchemaPropertiesMetadata(); - icebergSdkConf = createDefaultConfiguration(); } /** Closes the Iceberg catalog and releases the associated client pool. */ @@ -336,4 +329,10 @@ public PropertiesMetadata filesetPropertiesMetadata() throws UnsupportedOperatio throw new UnsupportedOperationException( "Iceberg catalog doesn't support fileset related operations"); } + + @Override + public PropertiesMetadata topicPropertiesMetadata() throws UnsupportedOperationException { + throw new UnsupportedOperationException( + "Iceberg catalog doesn't support topic related operations"); + } } diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java index ce953e57eb1..eda3c9aa0cb 100644 --- a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogPropertiesMetadata.java @@ -4,11 +4,11 @@ */ package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; -import static com.datastrato.gravitino.catalog.PropertyEntry.enumImmutablePropertyEntry; -import static com.datastrato.gravitino.catalog.PropertyEntry.stringRequiredPropertyEntry; +import static com.datastrato.gravitino.connector.PropertyEntry.enumImmutablePropertyEntry; +import static com.datastrato.gravitino.connector.PropertyEntry.stringRequiredPropertyEntry; -import com.datastrato.gravitino.catalog.BaseCatalogPropertiesMetadata; -import com.datastrato.gravitino.catalog.PropertyEntry; +import com.datastrato.gravitino.connector.BaseCatalogPropertiesMetadata; +import com.datastrato.gravitino.connector.PropertyEntry; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergColumn.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergColumn.java index c678090b2c8..8299c25bfc3 100644 --- a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergColumn.java +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergColumn.java @@ -4,7 +4,7 @@ */ package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; -import com.datastrato.gravitino.catalog.rel.BaseColumn; +import com.datastrato.gravitino.connector.BaseColumn; import lombok.EqualsAndHashCode; /** Represents a column in the Iceberg column. */ @@ -15,7 +15,8 @@ private IcebergColumn() {} /** A builder class for constructing IcebergColumn instances. */ public static class Builder extends BaseColumnBuilder { - + /** Creates a new instance of {@link Builder}. */ + private Builder() {} /** * Internal method to build a IcebergColumn instance using the provided values. * @@ -32,4 +33,13 @@ protected IcebergColumn internalBuild() { return icebergColumn; } } + + /** + * Creates a new instance of {@link Builder}. + * + * @return The new instance. + */ + public static Builder builder() { + return new Builder(); + } } diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergConfig.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergConfig.java new file mode 100644 index 00000000000..eb9f3980588 --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergConfig.java @@ -0,0 +1,103 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ + +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; + +import static com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.IcebergCatalogPropertiesMetadata.CATALOG_BACKEND_NAME; +import static com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.IcebergCatalogPropertiesMetadata.GRAVITINO_JDBC_DRIVER; +import static com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.IcebergCatalogPropertiesMetadata.ICEBERG_JDBC_INITIALIZE; +import static com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.IcebergCatalogPropertiesMetadata.ICEBERG_JDBC_PASSWORD; +import static com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.IcebergCatalogPropertiesMetadata.ICEBERG_JDBC_USER; +import static com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.IcebergCatalogPropertiesMetadata.URI; +import static com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.IcebergCatalogPropertiesMetadata.WAREHOUSE; + +import com.datastrato.gravitino.Config; +import com.datastrato.gravitino.config.ConfigBuilder; +import com.datastrato.gravitino.config.ConfigConstants; +import com.datastrato.gravitino.config.ConfigEntry; +import com.datastrato.gravitino.server.web.JettyServerConfig; +import com.datastrato.gravitino.server.web.OverwriteDefaultConfig; +import com.google.common.collect.ImmutableMap; +import java.util.Map; +import org.apache.commons.lang3.StringUtils; + +public class IcebergConfig extends Config implements OverwriteDefaultConfig { + + public static final ConfigEntry CATALOG_BACKEND = + new ConfigBuilder(CATALOG_BACKEND_NAME) + .doc("Catalog backend of Gravitino Iceberg catalog") + .version(ConfigConstants.VERSION_0_2_0) + .stringConf() + .createWithDefault("memory"); + + public static final ConfigEntry CATALOG_WAREHOUSE = + new ConfigBuilder(WAREHOUSE) + .doc("Warehouse directory of catalog") + .version(ConfigConstants.VERSION_0_2_0) + .stringConf() + .checkValue(StringUtils::isNotBlank, ConfigConstants.NOT_BLANK_ERROR_MSG) + .create(); + + public static final ConfigEntry CATALOG_URI = + new ConfigBuilder(URI) + .doc("The uri config of the Iceberg catalog") + .version(ConfigConstants.VERSION_0_2_0) + .stringConf() + .checkValue(StringUtils::isNotBlank, ConfigConstants.NOT_BLANK_ERROR_MSG) + .create(); + + public static final ConfigEntry JDBC_USER = + new ConfigBuilder(ICEBERG_JDBC_USER) + .doc("The username of the Jdbc connection") + .version(ConfigConstants.VERSION_0_2_0) + .stringConf() + .checkValue(StringUtils::isNotBlank, ConfigConstants.NOT_BLANK_ERROR_MSG) + .create(); + + public static final ConfigEntry JDBC_PASSWORD = + new ConfigBuilder(ICEBERG_JDBC_PASSWORD) + .doc("The password of the Jdbc connection") + .version(ConfigConstants.VERSION_0_2_0) + .stringConf() + .checkValue(StringUtils::isNotBlank, ConfigConstants.NOT_BLANK_ERROR_MSG) + .create(); + + public static final ConfigEntry JDBC_DRIVER = + new ConfigBuilder(GRAVITINO_JDBC_DRIVER) + .doc("The driver of the Jdbc connection") + .version(ConfigConstants.VERSION_0_3_0) + .stringConf() + .checkValue(StringUtils::isNotBlank, ConfigConstants.NOT_BLANK_ERROR_MSG) + .create(); + + public static final ConfigEntry JDBC_INIT_TABLES = + new ConfigBuilder(ICEBERG_JDBC_INITIALIZE) + .doc("Whether to initialize meta tables when create Jdbc catalog") + .version(ConfigConstants.VERSION_0_2_0) + .booleanConf() + .createWithDefault(true); + + public String getJdbcDriver() { + return get(JDBC_DRIVER); + } + + public IcebergConfig(Map properties) { + super(false); + loadFromMap(properties, k -> true); + } + + public IcebergConfig() { + super(false); + } + + @Override + public Map getOverwriteDefaultConfig() { + return ImmutableMap.of( + JettyServerConfig.WEBSERVER_HTTP_PORT.getKey(), + String.valueOf(JettyServerConfig.DEFAULT_ICEBERG_REST_SERVICE_HTTP_PORT), + JettyServerConfig.WEBSERVER_HTTPS_PORT.getKey(), + String.valueOf(JettyServerConfig.DEFAULT_ICEBERG_REST_SERVICE_HTTPS_PORT)); + } +} diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchema.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchema.java index 464970cad3a..e38f0b35534 100644 --- a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchema.java +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchema.java @@ -4,7 +4,7 @@ */ package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; -import com.datastrato.gravitino.catalog.rel.BaseSchema; +import com.datastrato.gravitino.connector.BaseSchema; import lombok.ToString; /** Represents an Iceberg Schema (Database) entity in the Iceberg schema. */ @@ -15,6 +15,8 @@ private IcebergSchema() {} /** A builder class for constructing IcebergSchema instances. */ public static class Builder extends BaseSchemaBuilder { + /** Creates a new instance of {@link Builder}. */ + private Builder() {} @Override protected IcebergSchema internalBuild() { @@ -31,4 +33,13 @@ protected IcebergSchema internalBuild() { return icebergSchema; } } + + /** + * Creates a new instance of {@link Builder}. + * + * @return The new instance. + */ + public static Builder builder() { + return new Builder(); + } } diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchemaPropertiesMetadata.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchemaPropertiesMetadata.java index 2dad4aaf9ce..828c2c2a2e3 100644 --- a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchemaPropertiesMetadata.java +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergSchemaPropertiesMetadata.java @@ -4,10 +4,10 @@ */ package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; -import static com.datastrato.gravitino.catalog.PropertyEntry.stringReservedPropertyEntry; +import static com.datastrato.gravitino.connector.PropertyEntry.stringReservedPropertyEntry; -import com.datastrato.gravitino.catalog.BasePropertiesMetadata; -import com.datastrato.gravitino.catalog.PropertyEntry; +import com.datastrato.gravitino.connector.BasePropertiesMetadata; +import com.datastrato.gravitino.connector.PropertyEntry; import com.google.common.collect.ImmutableList; import com.google.common.collect.Maps; import java.util.List; diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTable.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTable.java index 192579baf2f..a02be50226e 100644 --- a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTable.java +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTable.java @@ -6,18 +6,21 @@ import static com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.IcebergTablePropertiesMetadata.DISTRIBUTION_MODE; -import com.datastrato.gravitino.catalog.TableOperations; import com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.converter.ConvertUtil; import com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.converter.FromIcebergPartitionSpec; -import com.datastrato.gravitino.catalog.rel.BaseTable; +import com.datastrato.gravitino.connector.BaseTable; +import com.datastrato.gravitino.connector.TableOperations; import com.datastrato.gravitino.meta.AuditInfo; import com.datastrato.gravitino.rel.expressions.distributions.Distribution; import com.datastrato.gravitino.rel.expressions.distributions.Distributions; import com.datastrato.gravitino.rel.expressions.transforms.Transform; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import com.google.common.collect.Maps; import java.util.Map; import lombok.Getter; import lombok.ToString; +import org.apache.commons.lang3.ArrayUtils; import org.apache.iceberg.DistributionMode; import org.apache.iceberg.Schema; import org.apache.iceberg.TableMetadata; @@ -45,6 +48,39 @@ public class IcebergTable extends BaseTable { private IcebergTable() {} + /** + * Transforms the gravitino distribution to the distribution mode name of the Iceberg table. + * + * @param distribution The distribution of the table. + * @return The distribution mode name of the iceberg table. + */ + @VisibleForTesting + String transformDistribution(Distribution distribution) { + switch (distribution.strategy()) { + case HASH: + Preconditions.checkArgument( + ArrayUtils.isEmpty(distribution.expressions()), + "Iceberg's Distribution Mode.HASH does not support set expressions."); + Preconditions.checkArgument( + ArrayUtils.isNotEmpty(partitioning), + "Iceberg's Distribution Mode.HASH is distributed based on partition, but the partition is empty."); + return DistributionMode.HASH.modeName(); + case RANGE: + Preconditions.checkArgument( + ArrayUtils.isEmpty(distribution.expressions()), + "Iceberg's Distribution Mode.RANGE not support set expressions."); + Preconditions.checkArgument( + ArrayUtils.isNotEmpty(partitioning) || ArrayUtils.isNotEmpty(sortOrders), + "Iceberg's Distribution Mode.RANGE is distributed based on sortOrder or partition, but both are empty."); + return DistributionMode.RANGE.modeName(); + case NONE: + return DistributionMode.NONE.modeName(); + default: + throw new IllegalArgumentException( + "Iceberg unsupported distribution strategy: " + distribution.strategy()); + } + } + /** * Creates a new IcebergTable instance from a Table and a Builder. * @@ -73,7 +109,7 @@ public static IcebergTable fromIcebergTable(TableMetadata table, String tableNam } IcebergColumn[] icebergColumns = schema.columns().stream().map(ConvertUtil::fromNestedField).toArray(IcebergColumn[]::new); - return new Builder() + return IcebergTable.builder() .withComment(table.property(IcebergTablePropertiesMetadata.COMMENT, null)) .withLocation(table.location()) .withProperties(properties) @@ -93,6 +129,8 @@ protected TableOperations newOps() { /** A builder class for constructing IcebergTable instances. */ public static class Builder extends BaseTableBuilder { + /** Creates a new instance of {@link Builder}. */ + private Builder() {} private String location; @@ -133,4 +171,12 @@ protected IcebergTable internalBuild() { return icebergTable; } } + /** + * Creates a new instance of {@link Builder}. + * + * @return The new instance. + */ + public static Builder builder() { + return new Builder(); + } } diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTablePropertiesMetadata.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTablePropertiesMetadata.java index 7bcb156f456..14cd886e448 100644 --- a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTablePropertiesMetadata.java +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergTablePropertiesMetadata.java @@ -4,11 +4,11 @@ */ package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg; -import static com.datastrato.gravitino.catalog.PropertyEntry.stringImmutablePropertyEntry; -import static com.datastrato.gravitino.catalog.PropertyEntry.stringReservedPropertyEntry; +import static com.datastrato.gravitino.connector.PropertyEntry.stringImmutablePropertyEntry; +import static com.datastrato.gravitino.connector.PropertyEntry.stringReservedPropertyEntry; -import com.datastrato.gravitino.catalog.BasePropertiesMetadata; -import com.datastrato.gravitino.catalog.PropertyEntry; +import com.datastrato.gravitino.connector.BasePropertiesMetadata; +import com.datastrato.gravitino.connector.PropertyEntry; import com.google.common.collect.ImmutableList; import com.google.common.collect.Maps; import java.util.List; diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ConvertUtil.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ConvertUtil.java index 2f808249992..006eb78122b 100644 --- a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ConvertUtil.java +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ConvertUtil.java @@ -57,7 +57,7 @@ public static com.datastrato.gravitino.rel.types.Type formIcebergType(Type type) * @return Gravitino iceberg column */ public static IcebergColumn fromNestedField(Types.NestedField nestedField) { - return new IcebergColumn.Builder() + return IcebergColumn.builder() .withName(nestedField.name()) .withNullable(nestedField.isOptional()) .withComment(nestedField.doc()) diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergType.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergType.java index 2e60defd567..cc761aed0ba 100644 --- a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergType.java +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/converter/ToIcebergType.java @@ -34,10 +34,12 @@ private int getNextId() { return nextId++; } + @SuppressWarnings("ReferenceEquality") @Override public Type struct(com.datastrato.gravitino.rel.types.Types.StructType struct, List types) { com.datastrato.gravitino.rel.types.Types.StructType.Field[] fields = struct.fields(); List newFields = Lists.newArrayListWithExpectedSize(fields.length); + // Comparing the root node by reference equality. boolean isRoot = root == struct; for (int i = 0; i < fields.length; i += 1) { com.datastrato.gravitino.rel.types.Types.StructType.Field field = fields[i]; diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/utils/IcebergCatalogUtil.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/utils/IcebergCatalogUtil.java new file mode 100644 index 00000000000..b29329e5890 --- /dev/null +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/utils/IcebergCatalogUtil.java @@ -0,0 +1,47 @@ +/* + * Copyright 2023 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ +package com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.utils; + +import com.datastrato.gravitino.catalog.bili.lakehouse.iceberg.IcebergCatalogBackend; +import java.util.Collections; +import java.util.Map; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.hive.HiveCatalog; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class IcebergCatalogUtil { + + private static final Logger LOG = LoggerFactory.getLogger(IcebergCatalogUtil.class); + + private static HiveCatalog loadHiveCatalog(Map properties) { + HiveCatalog hiveCatalog = new HiveCatalog(); + HdfsConfiguration hdfsConfiguration = new HdfsConfiguration(); + properties.forEach(hdfsConfiguration::set); + hiveCatalog.setConf(hdfsConfiguration); + hiveCatalog.initialize("hive", properties); + return hiveCatalog; + } + + public static Catalog loadCatalogBackend(String catalogType) { + return loadCatalogBackend(catalogType, Collections.emptyMap()); + } + + public static Catalog loadCatalogBackend(String catalogType, Map properties) { + LOG.info("Load catalog backend of {}", catalogType); + switch (IcebergCatalogBackend.valueOf(catalogType.toUpperCase())) { + case HIVE: + return loadHiveCatalog(properties); + default: + throw new RuntimeException( + catalogType + + " catalog is not supported yet, supported catalogs: [memory]" + + catalogType); + } + } + + private IcebergCatalogUtil() {} +} diff --git a/integration-test/build.gradle.kts b/integration-test/build.gradle.kts index 0aacb8399ab..c9636ae75a6 100644 --- a/integration-test/build.gradle.kts +++ b/integration-test/build.gradle.kts @@ -140,6 +140,7 @@ tasks.test { } else { dependsOn(":trino-connector:jar") dependsOn(":catalogs:catalog-lakehouse-iceberg:jar", ":catalogs:catalog-lakehouse-iceberg:runtimeJars") + dependsOn(":catalogs:catalog-bili-lakehouse-iceberg:jar", ":catalogs:catalog-bili-lakehouse-iceberg:runtimeJars") dependsOn(":catalogs:catalog-jdbc-mysql:jar", ":catalogs:catalog-jdbc-mysql:runtimeJars") dependsOn(":catalogs:catalog-jdbc-postgresql:jar", ":catalogs:catalog-jdbc-postgresql:runtimeJars") dependsOn(":catalogs:catalog-hadoop:jar", ":catalogs:catalog-hadoop:runtimeJars") diff --git a/settings.gradle.kts b/settings.gradle.kts index 7f96f602eca..a4f5768b2dd 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -27,4 +27,3 @@ include("web") include("docs") include("integration-test-common") include("catalogs:catalog-bili-lakehouse-iceberg") - From e534b0257009a1e6db66fb350da74fca77c68aba Mon Sep 17 00:00:00 2001 From: teo Date: Wed, 20 Mar 2024 20:10:20 +0800 Subject: [PATCH 11/11] for minghuang --- .../build.gradle.kts | 8 ++++--- .../iceberg/IcebergCatalogOperations.java | 24 +------------------ 2 files changed, 6 insertions(+), 26 deletions(-) diff --git a/catalogs/catalog-bili-lakehouse-iceberg/build.gradle.kts b/catalogs/catalog-bili-lakehouse-iceberg/build.gradle.kts index b611978e663..989d9718896 100644 --- a/catalogs/catalog-bili-lakehouse-iceberg/build.gradle.kts +++ b/catalogs/catalog-bili-lakehouse-iceberg/build.gradle.kts @@ -2,7 +2,7 @@ * Copyright 2023 Datastrato Pvt Ltd. * This software is licensed under the Apache License version 2. */ -description = "bili-catalog-lakehouse-iceberg" +description = "catalog-bili-lakehouse-iceberg" plugins { `maven-publish` @@ -12,6 +12,7 @@ plugins { val scalaVersion: String = project.properties["scalaVersion"] as? String ?: extra["defaultScalaVersion"].toString() val sparkVersion: String = libs.versions.spark.get() +val icebergVersion: String = libs.versions.iceberg.get() val scalaCollectionCompatVersion: String = libs.versions.scala.collection.compat.get() dependencies { @@ -19,6 +20,7 @@ dependencies { implementation(project(":common")) implementation(project(":core")) implementation(project(":server-common")) + implementation(libs.bundles.iceberg) implementation(libs.bundles.jetty) implementation(libs.bundles.jersey) implementation(libs.bundles.log4j) @@ -26,8 +28,6 @@ dependencies { implementation(libs.commons.io) implementation(libs.commons.lang3) implementation(libs.guava) - implementation("org.apache.iceberg:iceberg-sdk:0.13.2-bili-0.4-SNAPSHOT") - implementation(libs.hive2.metastore) { exclude("co.cask.tephra") exclude("com.github.spotbugs") @@ -51,6 +51,7 @@ dependencies { exclude("com.zaxxer", "HikariCP") exclude("com.sun.jersey", "jersey-server") } + implementation(libs.iceberg.hive.metastore) implementation(libs.jackson.annotations) implementation(libs.jackson.databind) implementation(libs.jackson.datatype.jdk8) @@ -75,6 +76,7 @@ dependencies { implementation(libs.metrics.jersey2) testImplementation("org.scala-lang.modules:scala-collection-compat_$scalaVersion:$scalaCollectionCompatVersion") + testImplementation("org.apache.iceberg:iceberg-spark-runtime-3.4_$scalaVersion:$icebergVersion") testImplementation("org.apache.spark:spark-hive_$scalaVersion:$sparkVersion") testImplementation("org.apache.spark:spark-sql_$scalaVersion:$sparkVersion") { exclude("org.apache.avro") diff --git a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogOperations.java b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogOperations.java index d6b54c0872f..44f16310642 100644 --- a/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogOperations.java +++ b/catalogs/catalog-bili-lakehouse-iceberg/src/main/java/com/datastrato/gravitino/catalog/bili/lakehouse/iceberg/IcebergCatalogOperations.java @@ -36,11 +36,6 @@ import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.hive.HiveCatalog; -import org.apache.iceberg.sdk.HiveCatalogUtils; -import org.apache.iceberg.sdk.auth.AuthUtils; -import org.apache.iceberg.sdk.auth.HdfsAuthentication; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -275,24 +270,7 @@ public boolean purgeTable(NameIdentifier tableIdent) throws UnsupportedOperation */ @Override public boolean purgeTableOneMeta(NameIdentifier tableIdent) { - // use iceberg sdk - try { - HdfsAuthentication hdfsAuthentication = AuthUtils.createHdfsAuthentication(icebergSdkConf); - hdfsAuthentication.doAs( - () -> { - TableIdentifier identifier = TableIdentifier.of(tableIdent.name(), tableIdent.name()); - HiveCatalog hiveCatalog = HiveCatalogUtils.createHiveCatalog(icebergSdkConf); - hiveCatalog.dropTable(identifier, true); - return null; - }); - hdfsAuthentication.close(); - } catch (org.apache.iceberg.exceptions.NoSuchTableException e) { - LOG.warn("Iceberg table {} does not exist", tableIdent.name()); - return false; - } catch (Throwable e) { - LOG.info("Purge Iceberg table Error : {}", tableIdent.name()); - } - return true; + throw new UnsupportedOperationException("purgeTable not supported."); } private Configuration createDefaultConfiguration() {