From 4b10eed5564e734c2738df144a9604bbdf8e4ba8 Mon Sep 17 00:00:00 2001 From: yuqi <yuqi@datastrato.com> Date: Mon, 18 Mar 2024 20:47:10 +0800 Subject: [PATCH 1/6] Optimize char/varchar mapping for Hive catalog between Gravitino and Trino --- .../test/trino/TrinoConnectorIT.java | 46 ++++---- .../testsets/hive/00000_create_table.sql | 2 +- .../testsets/hive/00001_select_table.sql | 4 +- .../testsets/hive/00002_alter_table.sql | 4 +- .../testsets/hive/00002_alter_table.txt | 6 +- .../testsets/hive/00007_varchar.sql | 43 ++++++++ .../testsets/hive/00007_varchar.txt | 102 ++++++++++++++++++ .../catalog/hive/HiveDataTypeTransformer.java | 44 ++++++-- .../hive/TestHiveDataTypeConverter.java | 68 ++++++++++++ 9 files changed, 278 insertions(+), 41 deletions(-) create mode 100644 integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql create mode 100644 integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt create mode 100644 trino-connector/src/test/java/com/datastrato/gravitino/trino/connector/catalog/hive/TestHiveDataTypeConverter.java diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoConnectorIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoConnectorIT.java index e0d38b325ed..079e5b1ea2c 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoConnectorIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoConnectorIT.java @@ -175,9 +175,9 @@ public void testCreateTable() throws TException, InterruptedException { String sql3 = String.format( "CREATE TABLE \"%s.%s\".%s.%s (\n" - + " col1 varchar,\n" - + " col2 varchar,\n" - + " col3 varchar\n" + + " col1 varchar(20),\n" + + " col2 varchar(20),\n" + + " col3 varchar(20)\n" + ")\n" + "WITH (\n" + " format = 'TEXTFILE'\n" @@ -230,15 +230,15 @@ public void testScenarioTable1() throws TException, InterruptedException { String sql3 = String.format( "CREATE TABLE \"%s.%s\".%s.%s (\n" - + " user_name varchar,\n" - + " gender varchar,\n" - + " age varchar,\n" - + " phone varchar,\n" - + " email varchar,\n" - + " address varchar,\n" - + " birthday varchar,\n" - + " create_time varchar,\n" - + " update_time varchar\n" + + " user_name varchar(20),\n" + + " gender varchar(20),\n" + + " age varchar(20),\n" + + " phone varchar(20),\n" + + " email varchar(20),\n" + + " address varchar(20),\n" + + " birthday varchar(20),\n" + + " create_time varchar(20),\n" + + " update_time varchar(20)\n" + ")\n" + "WITH (\n" + " format = 'TEXTFILE'\n" @@ -292,12 +292,12 @@ public void testScenarioTable2() throws TException, InterruptedException { String sql4 = String.format( "CREATE TABLE \"%s.%s\".%s.%s (\n" - + " user_name varchar,\n" - + " consumer varchar,\n" - + " recharge varchar,\n" - + " event_time varchar,\n" - + " create_time varchar,\n" - + " update_time varchar\n" + + " user_name varchar(20),\n" + + " consumer varchar(20),\n" + + " recharge varchar(20),\n" + + " event_time varchar(20),\n" + + " create_time varchar(20),\n" + + " update_time varchar(20)\n" + ")\n" + "WITH (\n" + " format = 'TEXTFILE'\n" @@ -406,7 +406,7 @@ void testHiveTableCreatedByTrino() { String createTableSql = String.format( - "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar)" + "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar(20))" + " with ( serde_name = '123455', location = 'hdfs://localhost:9000/user/hive/warehouse/hive_schema.db/hive_table'" + ", partitioned_by = ARRAY['name'], bucketed_by = ARRAY['id'], bucket_count = 50, sorted_by = ARRAY['name']" + ")", @@ -781,7 +781,7 @@ void testHiveTableCreatedByGravitino() throws InterruptedException { tableName = GravitinoITUtils.genRandomName("table_format1").toLowerCase(); sql = String.format( - "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar) with (format = 'ORC')", + "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar(20)) with (format = 'ORC')", metalakeName, catalogName, schemaName, tableName); containerSuite.getTrinoContainer().executeUpdateSQL(sql); @@ -801,7 +801,7 @@ void testHiveTableCreatedByGravitino() throws InterruptedException { tableName = GravitinoITUtils.genRandomName("table_format2").toLowerCase(); sql = String.format( - "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar) with (format = 'ORC', input_format = 'org.apache.hadoop.mapred.TextInputFormat')", + "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar(20)) with (format = 'ORC', input_format = 'org.apache.hadoop.mapred.TextInputFormat')", metalakeName, catalogName, schemaName, tableName); containerSuite.getTrinoContainer().executeUpdateSQL(sql); sql = @@ -820,7 +820,7 @@ void testHiveTableCreatedByGravitino() throws InterruptedException { tableName = GravitinoITUtils.genRandomName("table_format3").toLowerCase(); sql = String.format( - "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar) with (format = 'ORC', output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat')", + "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar(20)) with (format = 'ORC', output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat')", metalakeName, catalogName, schemaName, tableName); containerSuite.getTrinoContainer().executeUpdateSQL(sql); sql = @@ -1016,7 +1016,7 @@ void testIcebergTableAndSchemaCreatedByTrino() { String createTableSql = String.format( - "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar)", + "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar(10))", metalakeName, catalogName, schemaName, tableName); containerSuite.getTrinoContainer().executeUpdateSQL(createTableSql); diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00000_create_table.sql b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00000_create_table.sql index 5df0dda6cd9..e2677d22c96 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00000_create_table.sql +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00000_create_table.sql @@ -1,7 +1,7 @@ CREATE SCHEMA "test.gt_hive".gt_db1; CREATE TABLE "test.gt_hive".gt_db1.tb01 ( - name varchar, + name varchar(20), salary int ) WITH ( diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00001_select_table.sql b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00001_select_table.sql index 5bd054d44d9..11d5650bce3 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00001_select_table.sql +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00001_select_table.sql @@ -1,7 +1,7 @@ CREATE SCHEMA "test.gt_hive".gt_db1; CREATE TABLE "test.gt_hive".gt_db1.tb01 ( - name varchar, + name varchar(20), salary int ) WITH ( @@ -15,7 +15,7 @@ insert into "test.gt_hive".gt_db1.tb01(name, salary) values ('bob', 14), ('tom', select * from "test.gt_hive".gt_db1.tb01 order by name; CREATE TABLE "test.gt_hive".gt_db1.tb02 ( - name varchar, + name varchar(20), salary int ) WITH ( diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.sql b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.sql index dfca4f16e19..fc77b289123 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.sql +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.sql @@ -1,7 +1,7 @@ CREATE SCHEMA "test.gt_hive".gt_db1; CREATE TABLE "test.gt_hive".gt_db1.tb01 ( - name varchar, + name varchar(20), salary int, city int ) @@ -30,7 +30,7 @@ show create table "test.gt_hive".gt_db1.tb01; comment on column "test.gt_hive".gt_db1.tb01.s is 'test column comments'; show create table "test.gt_hive".gt_db1.tb01; -alter table "test.gt_hive".gt_db1.tb01 add column city varchar comment 'aaa'; +alter table "test.gt_hive".gt_db1.tb01 add column city varchar(100) comment 'aaa'; show create table "test.gt_hive".gt_db1.tb01; drop table "test.gt_hive".gt_db1.tb01; diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.txt b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.txt index 97f7b0134be..73055cbd59e 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.txt +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.txt @@ -13,7 +13,7 @@ RENAME TABLE DROP COLUMN "CREATE TABLE ""test.gt_hive"".gt_db1.tb01 ( - name varchar(65535), + name varchar(20), salary integer ) COMMENT '' @@ -31,7 +31,7 @@ WITH ( RENAME COLUMN "CREATE TABLE ""test.gt_hive"".gt_db1.tb01 ( - s varchar(65535), + s varchar(20), salary integer ) COMMENT '' @@ -105,7 +105,7 @@ ADD COLUMN "CREATE TABLE ""test.gt_hive"".gt_db1.tb01 ( s varchar(256) COMMENT 'test column comments', salary integer, - city varchar(65535) COMMENT 'aaa' + city varchar(100) COMMENT 'aaa' ) COMMENT 'test table comments' WITH ( diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql new file mode 100644 index 00000000000..4f70aa0ab1e --- /dev/null +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql @@ -0,0 +1,43 @@ +CREATE SCHEMA "test.gt_hive".varchar_db1; + +USE "test.gt_hive".varchar_db1; + +CREATE TABLE tb01 (id int, name char(20)); + +SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb01; + +CREATE TABLE tb02 (id int, name char(255)); + +SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb02; + +CREATE TABLE tb03 (id int, name char(256)); + +CREATE TABLE tb04 (id int, name varchar(250)); + +SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb04; + +CREATE TABLE tb05 (id int, name varchar(65535)); + +SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb05; + +CREATE TABLE tb06 (id int, name char); + +SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb06; + +CREATE TABLE tb07 (id int, name varchar); + +CREATE TABLE tb08 (id int, name varchar(65536)); + + +drop table "test.gt_hive".varchar_db1.tb01; + +drop table "test.gt_hive".varchar_db1.tb02; + +drop table "test.gt_hive".varchar_db1.tb04; + +drop table "test.gt_hive".varchar_db1.tb05; + +drop table "test.gt_hive".varchar_db1.tb06; + +drop schema "test.gt_hive".varchar_db1; + diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt new file mode 100644 index 00000000000..44a98f74c08 --- /dev/null +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt @@ -0,0 +1,102 @@ +CREATE SCHEMA + +USE + +CREATE TABLE + +"CREATE TABLE ""test.gt_hive"".varchar_db1.tb01 ( + id integer, + name char(20) +) +COMMENT '' +WITH ( + input_format = 'org.apache.hadoop.mapred.TextInputFormat', + location = 'hdfs://%s:9000/user/hive/warehouse/varchar_db1.db/tb01', + output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', + serde_name = 'tb01', + table_type = 'MANAGED_TABLE' +)" + +CREATE TABLE + +"CREATE TABLE ""test.gt_hive"".varchar_db1.tb02 ( + id integer, + name char(255) +) +COMMENT '' +WITH ( + input_format = 'org.apache.hadoop.mapred.TextInputFormat', + location = 'hdfs://%s:9000/user/hive/warehouse/varchar_db1.db/tb02', + output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', + serde_name = 'tb02', + table_type = 'MANAGED_TABLE' +)" + +<QUERY_FAILED> Hive does not support the datatype CHAR with the length greater than 255 + +CREATE TABLE + +"CREATE TABLE ""test.gt_hive"".varchar_db1.tb04 ( + id integer, + name varchar(250) +) +COMMENT '' +WITH ( + input_format = 'org.apache.hadoop.mapred.TextInputFormat', + location = 'hdfs://%s:9000/user/hive/warehouse/varchar_db1.db/tb04', + output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', + serde_name = 'tb04', + table_type = 'MANAGED_TABLE' +)" + +CREATE TABLE + +"CREATE TABLE ""test.gt_hive"".varchar_db1.tb05 ( + id integer, + name varchar(65535) +) +COMMENT '' +WITH ( + input_format = 'org.apache.hadoop.mapred.TextInputFormat', + location = 'hdfs://%s:9000/user/hive/warehouse/varchar_db1.db/tb05', + output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', + serde_name = 'tb05', + table_type = 'MANAGED_TABLE' +)" + +CREATE TABLE + +"CREATE TABLE ""test.gt_hive"".varchar_db1.tb06 ( + id integer, + name char(1) +) +COMMENT '' +WITH ( + input_format = 'org.apache.hadoop.mapred.TextInputFormat', + location = 'hdfs://%s:9000/user/hive/warehouse/varchar_db1.db/tb06', + output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', + serde_name = 'tb06', + table_type = 'MANAGED_TABLE' +)" + +<QUERY_FAILED> Hive does not support the datatype VARCHAR without length + +<QUERY_FAILED> Hive does not support the datatype VARCHAR with the length greater than 65535 + + +DROP TABLE + +DROP TABLE + +DROP TABLE + +DROP TABLE + +DROP TABLE + +DROP SCHEMA diff --git a/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java b/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java index 18d5c07ac89..56e54a5ee0b 100644 --- a/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java +++ b/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java @@ -6,28 +6,52 @@ package com.datastrato.gravitino.trino.connector.catalog.hive; import com.datastrato.gravitino.rel.types.Type; -import com.datastrato.gravitino.rel.types.Type.Name; import com.datastrato.gravitino.rel.types.Types; +import com.datastrato.gravitino.trino.connector.GravitinoErrorCode; import com.datastrato.gravitino.trino.connector.util.GeneralDataTypeTransformer; +import io.trino.spi.TrinoException; +import io.trino.spi.type.VarcharType; /** Type transformer between Hive and Trino */ public class HiveDataTypeTransformer extends GeneralDataTypeTransformer { // Max length of Hive varchar is 65535 private static final int HIVE_VARCHAR_MAX_LENGTH = 65535; + private static final int HIVE_CHAR_MAX_LENGTH = 255; @Override public Type getGravitinoType(io.trino.spi.type.Type type) { - Type gravitinoType = super.getGravitinoType(type); - if (gravitinoType.name() == Name.VARCHAR - && ((Types.VarCharType) gravitinoType).length() > HIVE_VARCHAR_MAX_LENGTH) { - return Types.VarCharType.of(HIVE_VARCHAR_MAX_LENGTH); - } + Class<? extends io.trino.spi.type.Type> typeClass = type.getClass(); + if (typeClass == VarcharType.class) { + VarcharType varcharType = (VarcharType) type; + if (varcharType.getLength().isEmpty()) { + // It's was creating a table with column type 'varchar' NOT 'varchar(n)', We not support + // this case + throw new TrinoException( + GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT, + "Hive does not support the datatype VARCHAR without length"); + } + + int length = varcharType.getLength().get(); + if (length > HIVE_VARCHAR_MAX_LENGTH) { + throw new TrinoException( + GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT, + "Hive does not support the datatype VARCHAR with the length greater than " + + HIVE_VARCHAR_MAX_LENGTH); + } + + return Types.VarCharType.of(length); + } else if (typeClass == io.trino.spi.type.CharType.class) { + io.trino.spi.type.CharType charType = (io.trino.spi.type.CharType) type; + if (charType.getLength() > HIVE_CHAR_MAX_LENGTH) { + throw new TrinoException( + GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT, + "Hive does not support the datatype CHAR with the length greater than " + + HIVE_CHAR_MAX_LENGTH); + } - if (gravitinoType.name() == Name.FIXEDCHAR - && ((Types.FixedCharType) gravitinoType).length() > HIVE_VARCHAR_MAX_LENGTH) { - return Types.FixedCharType.of(HIVE_VARCHAR_MAX_LENGTH); + return Types.FixedCharType.of(charType.getLength()); } - return gravitinoType; + return super.getGravitinoType(type); } } diff --git a/trino-connector/src/test/java/com/datastrato/gravitino/trino/connector/catalog/hive/TestHiveDataTypeConverter.java b/trino-connector/src/test/java/com/datastrato/gravitino/trino/connector/catalog/hive/TestHiveDataTypeConverter.java new file mode 100644 index 00000000000..bc49cc8c036 --- /dev/null +++ b/trino-connector/src/test/java/com/datastrato/gravitino/trino/connector/catalog/hive/TestHiveDataTypeConverter.java @@ -0,0 +1,68 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ + +package com.datastrato.gravitino.trino.connector.catalog.hive; + +import com.datastrato.gravitino.rel.types.Types; +import com.datastrato.gravitino.trino.connector.util.GeneralDataTypeTransformer; +import io.trino.spi.TrinoException; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class TestHiveDataTypeConverter { + + @Test + public void testTrinoTypeToGravitinoType() { + GeneralDataTypeTransformer generalDataTypeTransformer = new HiveDataTypeTransformer(); + io.trino.spi.type.Type charTypeWithLengthOne = io.trino.spi.type.CharType.createCharType(1); + Assert.assertEquals( + generalDataTypeTransformer.getGravitinoType(charTypeWithLengthOne), + Types.FixedCharType.of(1)); + + io.trino.spi.type.Type charTypeWithLength = io.trino.spi.type.CharType.createCharType(255); + Assert.assertEquals( + generalDataTypeTransformer.getGravitinoType(charTypeWithLength), + Types.FixedCharType.of(255)); + + io.trino.spi.type.Type charLengthIsOverflow = io.trino.spi.type.CharType.createCharType(256); + Exception e = + Assert.expectThrows( + TrinoException.class, + () -> generalDataTypeTransformer.getGravitinoType(charLengthIsOverflow)); + Assert.assertTrue( + e.getMessage() + .contains("Hive does not support the datatype CHAR with the length greater than 255")); + + io.trino.spi.type.Type varcharType = io.trino.spi.type.VarcharType.createVarcharType(1); + Assert.assertEquals( + generalDataTypeTransformer.getGravitinoType(varcharType), Types.VarCharType.of(1)); + + io.trino.spi.type.Type varcharTypeWithLength = + io.trino.spi.type.VarcharType.createVarcharType(65535); + Assert.assertEquals( + generalDataTypeTransformer.getGravitinoType(varcharTypeWithLength), + Types.VarCharType.of(65535)); + + io.trino.spi.type.Type varcharLengthIsOverflow = + io.trino.spi.type.VarcharType.createVarcharType(65536); + e = + Assert.expectThrows( + TrinoException.class, + () -> generalDataTypeTransformer.getGravitinoType(varcharLengthIsOverflow)); + Assert.assertTrue( + e.getMessage() + .contains( + "Hive does not support the datatype VARCHAR with the length greater than 65535")); + + io.trino.spi.type.Type varcharTypeWithoutLength = + io.trino.spi.type.VarcharType.createUnboundedVarcharType(); + e = + Assert.expectThrows( + TrinoException.class, + () -> generalDataTypeTransformer.getGravitinoType(varcharTypeWithoutLength)); + Assert.assertTrue( + e.getMessage().contains("Hive does not support the datatype VARCHAR without length")); + } +} From 54db1ea36e8235640a0afb1e3e51542f45fdb7c1 Mon Sep 17 00:00:00 2001 From: yuqi <yuqi@datastrato.com> Date: Mon, 18 Mar 2024 21:00:53 +0800 Subject: [PATCH 2/6] Code format changes --- .../trino/connector/catalog/hive/HiveDataTypeTransformer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java b/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java index 56e54a5ee0b..f8a66e76262 100644 --- a/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java +++ b/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java @@ -24,7 +24,7 @@ public Type getGravitinoType(io.trino.spi.type.Type type) { if (typeClass == VarcharType.class) { VarcharType varcharType = (VarcharType) type; if (varcharType.getLength().isEmpty()) { - // It's was creating a table with column type 'varchar' NOT 'varchar(n)', We not support + // It was creating a table with column type 'varchar' NOT 'varchar(n)', We do not support // this case throw new TrinoException( GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT, From 81af4e2bc77e28f1d482b12d421e1da6b852fd2d Mon Sep 17 00:00:00 2001 From: yuqi <yuqi@datastrato.com> Date: Mon, 18 Mar 2024 22:13:39 +0800 Subject: [PATCH 3/6] Fix test. --- .../trino-ci-testset/testsets/hive/00007_varchar.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt index 44a98f74c08..83d31c073a1 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt @@ -11,7 +11,7 @@ CREATE TABLE COMMENT '' WITH ( input_format = 'org.apache.hadoop.mapred.TextInputFormat', - location = 'hdfs://%s:9000/user/hive/warehouse/varchar_db1.db/tb01', + location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb01', output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', serde_name = 'tb01', @@ -27,7 +27,7 @@ CREATE TABLE COMMENT '' WITH ( input_format = 'org.apache.hadoop.mapred.TextInputFormat', - location = 'hdfs://%s:9000/user/hive/warehouse/varchar_db1.db/tb02', + location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb02', output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', serde_name = 'tb02', @@ -45,7 +45,7 @@ CREATE TABLE COMMENT '' WITH ( input_format = 'org.apache.hadoop.mapred.TextInputFormat', - location = 'hdfs://%s:9000/user/hive/warehouse/varchar_db1.db/tb04', + location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb04', output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', serde_name = 'tb04', @@ -61,7 +61,7 @@ CREATE TABLE COMMENT '' WITH ( input_format = 'org.apache.hadoop.mapred.TextInputFormat', - location = 'hdfs://%s:9000/user/hive/warehouse/varchar_db1.db/tb05', + location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb05', output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', serde_name = 'tb05', @@ -77,7 +77,7 @@ CREATE TABLE COMMENT '' WITH ( input_format = 'org.apache.hadoop.mapred.TextInputFormat', - location = 'hdfs://%s:9000/user/hive/warehouse/varchar_db1.db/tb06', + location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb06', output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', serde_name = 'tb06', From 9f3337dad836ae13352f7944e405a8f64ac2e32b Mon Sep 17 00:00:00 2001 From: yuqi <yuqi@datastrato.com> Date: Wed, 20 Mar 2024 14:36:28 +0800 Subject: [PATCH 4/6] Revert changes that do not support varchar without length. --- .../test/trino/TrinoConnectorIT.java | 46 +++++++++---------- .../testsets/hive/00000_create_table.sql | 2 +- .../testsets/hive/00001_select_table.sql | 4 +- .../testsets/hive/00002_alter_table.sql | 4 +- .../testsets/hive/00002_alter_table.txt | 6 +-- .../testsets/hive/00007_varchar.sql | 2 + .../testsets/hive/00007_varchar.txt | 3 +- .../catalog/hive/HiveDataTypeTransformer.java | 6 +-- 8 files changed, 36 insertions(+), 37 deletions(-) diff --git a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoConnectorIT.java b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoConnectorIT.java index c2e74078228..5ccfed3d8bf 100644 --- a/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoConnectorIT.java +++ b/integration-test/src/test/java/com/datastrato/gravitino/integration/test/trino/TrinoConnectorIT.java @@ -175,9 +175,9 @@ public void testCreateTable() throws TException, InterruptedException { String sql3 = String.format( "CREATE TABLE \"%s.%s\".%s.%s (\n" - + " col1 varchar(20),\n" - + " col2 varchar(20),\n" - + " col3 varchar(20)\n" + + " col1 varchar,\n" + + " col2 varchar,\n" + + " col3 varchar\n" + ")\n" + "WITH (\n" + " format = 'TEXTFILE'\n" @@ -230,15 +230,15 @@ public void testScenarioTable1() throws TException, InterruptedException { String sql3 = String.format( "CREATE TABLE \"%s.%s\".%s.%s (\n" - + " user_name varchar(20),\n" - + " gender varchar(20),\n" - + " age varchar(20),\n" - + " phone varchar(20),\n" - + " email varchar(20),\n" - + " address varchar(20),\n" - + " birthday varchar(20),\n" - + " create_time varchar(20),\n" - + " update_time varchar(20)\n" + + " user_name varchar,\n" + + " gender varchar,\n" + + " age varchar,\n" + + " phone varchar,\n" + + " email varchar,\n" + + " address varchar,\n" + + " birthday varchar,\n" + + " create_time varchar,\n" + + " update_time varchar\n" + ")\n" + "WITH (\n" + " format = 'TEXTFILE'\n" @@ -292,12 +292,12 @@ public void testScenarioTable2() throws TException, InterruptedException { String sql4 = String.format( "CREATE TABLE \"%s.%s\".%s.%s (\n" - + " user_name varchar(20),\n" - + " consumer varchar(20),\n" - + " recharge varchar(20),\n" - + " event_time varchar(20),\n" - + " create_time varchar(20),\n" - + " update_time varchar(20)\n" + + " user_name varchar,\n" + + " consumer varchar,\n" + + " recharge varchar,\n" + + " event_time varchar,\n" + + " create_time varchar,\n" + + " update_time varchar\n" + ")\n" + "WITH (\n" + " format = 'TEXTFILE'\n" @@ -406,7 +406,7 @@ void testHiveTableCreatedByTrino() { String createTableSql = String.format( - "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar(20))" + "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar)" + " with ( serde_name = '123455', location = 'hdfs://localhost:9000/user/hive/warehouse/hive_schema.db/hive_table'" + ", partitioned_by = ARRAY['name'], bucketed_by = ARRAY['id'], bucket_count = 50, sorted_by = ARRAY['name']" + ")", @@ -781,7 +781,7 @@ void testHiveTableCreatedByGravitino() throws InterruptedException { tableName = GravitinoITUtils.genRandomName("table_format1").toLowerCase(); sql = String.format( - "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar(20)) with (format = 'ORC')", + "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar) with (format = 'ORC')", metalakeName, catalogName, schemaName, tableName); containerSuite.getTrinoContainer().executeUpdateSQL(sql); @@ -801,7 +801,7 @@ void testHiveTableCreatedByGravitino() throws InterruptedException { tableName = GravitinoITUtils.genRandomName("table_format2").toLowerCase(); sql = String.format( - "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar(20)) with (format = 'ORC', input_format = 'org.apache.hadoop.mapred.TextInputFormat')", + "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar) with (format = 'ORC', input_format = 'org.apache.hadoop.mapred.TextInputFormat')", metalakeName, catalogName, schemaName, tableName); containerSuite.getTrinoContainer().executeUpdateSQL(sql); sql = @@ -820,7 +820,7 @@ void testHiveTableCreatedByGravitino() throws InterruptedException { tableName = GravitinoITUtils.genRandomName("table_format3").toLowerCase(); sql = String.format( - "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar(20)) with (format = 'ORC', output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat')", + "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar) with (format = 'ORC', output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat')", metalakeName, catalogName, schemaName, tableName); containerSuite.getTrinoContainer().executeUpdateSQL(sql); sql = @@ -1016,7 +1016,7 @@ void testIcebergTableAndSchemaCreatedByTrino() { String createTableSql = String.format( - "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar(10))", + "CREATE TABLE \"%s.%s\".%s.%s (id int, name varchar)", metalakeName, catalogName, schemaName, tableName); containerSuite.getTrinoContainer().executeUpdateSQL(createTableSql); diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00000_create_table.sql b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00000_create_table.sql index e2677d22c96..5df0dda6cd9 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00000_create_table.sql +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00000_create_table.sql @@ -1,7 +1,7 @@ CREATE SCHEMA "test.gt_hive".gt_db1; CREATE TABLE "test.gt_hive".gt_db1.tb01 ( - name varchar(20), + name varchar, salary int ) WITH ( diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00001_select_table.sql b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00001_select_table.sql index 11d5650bce3..5bd054d44d9 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00001_select_table.sql +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00001_select_table.sql @@ -1,7 +1,7 @@ CREATE SCHEMA "test.gt_hive".gt_db1; CREATE TABLE "test.gt_hive".gt_db1.tb01 ( - name varchar(20), + name varchar, salary int ) WITH ( @@ -15,7 +15,7 @@ insert into "test.gt_hive".gt_db1.tb01(name, salary) values ('bob', 14), ('tom', select * from "test.gt_hive".gt_db1.tb01 order by name; CREATE TABLE "test.gt_hive".gt_db1.tb02 ( - name varchar(20), + name varchar, salary int ) WITH ( diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.sql b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.sql index fc77b289123..dfca4f16e19 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.sql +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.sql @@ -1,7 +1,7 @@ CREATE SCHEMA "test.gt_hive".gt_db1; CREATE TABLE "test.gt_hive".gt_db1.tb01 ( - name varchar(20), + name varchar, salary int, city int ) @@ -30,7 +30,7 @@ show create table "test.gt_hive".gt_db1.tb01; comment on column "test.gt_hive".gt_db1.tb01.s is 'test column comments'; show create table "test.gt_hive".gt_db1.tb01; -alter table "test.gt_hive".gt_db1.tb01 add column city varchar(100) comment 'aaa'; +alter table "test.gt_hive".gt_db1.tb01 add column city varchar comment 'aaa'; show create table "test.gt_hive".gt_db1.tb01; drop table "test.gt_hive".gt_db1.tb01; diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.txt b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.txt index 73055cbd59e..c759dfe1c95 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.txt +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.txt @@ -13,7 +13,7 @@ RENAME TABLE DROP COLUMN "CREATE TABLE ""test.gt_hive"".gt_db1.tb01 ( - name varchar(20), + name varchar, salary integer ) COMMENT '' @@ -31,7 +31,7 @@ WITH ( RENAME COLUMN "CREATE TABLE ""test.gt_hive"".gt_db1.tb01 ( - s varchar(20), + s varchar, salary integer ) COMMENT '' @@ -105,7 +105,7 @@ ADD COLUMN "CREATE TABLE ""test.gt_hive"".gt_db1.tb01 ( s varchar(256) COMMENT 'test column comments', salary integer, - city varchar(100) COMMENT 'aaa' + city varchar COMMENT 'aaa' ) COMMENT 'test table comments' WITH ( diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql index 4f70aa0ab1e..d5c9b904803 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql @@ -39,5 +39,7 @@ drop table "test.gt_hive".varchar_db1.tb05; drop table "test.gt_hive".varchar_db1.tb06; +drop table "test.gt_hive".varchar_db1.tb07; + drop schema "test.gt_hive".varchar_db1; diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt index 83d31c073a1..94390412668 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt @@ -84,10 +84,11 @@ WITH ( table_type = 'MANAGED_TABLE' )" -<QUERY_FAILED> Hive does not support the datatype VARCHAR without length +CREATE TABLE <QUERY_FAILED> Hive does not support the datatype VARCHAR with the length greater than 65535 +DROP TABLE DROP TABLE diff --git a/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java b/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java index f8a66e76262..6658fc167e1 100644 --- a/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java +++ b/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java @@ -24,11 +24,7 @@ public Type getGravitinoType(io.trino.spi.type.Type type) { if (typeClass == VarcharType.class) { VarcharType varcharType = (VarcharType) type; if (varcharType.getLength().isEmpty()) { - // It was creating a table with column type 'varchar' NOT 'varchar(n)', We do not support - // this case - throw new TrinoException( - GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT, - "Hive does not support the datatype VARCHAR without length"); + return Types.StringType.get(); } int length = varcharType.getLength().get(); From ee89712473c88aa17708451c009962d7148ae3ec Mon Sep 17 00:00:00 2001 From: yuqi <yuqi@datastrato.com> Date: Wed, 20 Mar 2024 14:48:14 +0800 Subject: [PATCH 5/6] Fix test error. --- .../catalog/hive/TestHiveDataTypeConverter.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/trino-connector/src/test/java/com/datastrato/gravitino/trino/connector/catalog/hive/TestHiveDataTypeConverter.java b/trino-connector/src/test/java/com/datastrato/gravitino/trino/connector/catalog/hive/TestHiveDataTypeConverter.java index bc49cc8c036..9aaba6b9942 100644 --- a/trino-connector/src/test/java/com/datastrato/gravitino/trino/connector/catalog/hive/TestHiveDataTypeConverter.java +++ b/trino-connector/src/test/java/com/datastrato/gravitino/trino/connector/catalog/hive/TestHiveDataTypeConverter.java @@ -58,11 +58,9 @@ public void testTrinoTypeToGravitinoType() { io.trino.spi.type.Type varcharTypeWithoutLength = io.trino.spi.type.VarcharType.createUnboundedVarcharType(); - e = - Assert.expectThrows( - TrinoException.class, - () -> generalDataTypeTransformer.getGravitinoType(varcharTypeWithoutLength)); - Assert.assertTrue( - e.getMessage().contains("Hive does not support the datatype VARCHAR without length")); + + Assert.assertEquals( + generalDataTypeTransformer.getGravitinoType(varcharTypeWithoutLength), + Types.StringType.get()); } } From f8f6df54dbae76f2bd2531ebc62357ce14c9c65d Mon Sep 17 00:00:00 2001 From: yuqi <yuqi@datastrato.com> Date: Wed, 20 Mar 2024 20:43:21 +0800 Subject: [PATCH 6/6] Fix --- .../testsets/hive/00007_varchar.sql | 2 ++ .../testsets/hive/00007_varchar.txt | 16 +++++++++++++++- .../catalog/hive/HiveDataTypeTransformer.java | 3 ++- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql index d5c9b904803..54dfd8b6f67 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql @@ -26,6 +26,8 @@ SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb06; CREATE TABLE tb07 (id int, name varchar); +SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb07; + CREATE TABLE tb08 (id int, name varchar(65536)); diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt index 94390412668..b69411fb752 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt @@ -86,7 +86,21 @@ WITH ( CREATE TABLE -<QUERY_FAILED> Hive does not support the datatype VARCHAR with the length greater than 65535 +"CREATE TABLE ""test.gt_hive"".varchar_db1.tb07 ( + id integer, + name varchar +) +COMMENT '' +WITH ( + input_format = 'org.apache.hadoop.mapred.TextInputFormat', + location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb07', + output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', + serde_name = 'tb07', + table_type = 'MANAGED_TABLE' +)" + +<QUERY_FAILED> Hive does not support the datatype VARCHAR with the length greater than 65535, you can use varchar without length instead DROP TABLE diff --git a/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java b/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java index 6658fc167e1..e1a54bd235a 100644 --- a/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java +++ b/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java @@ -32,7 +32,8 @@ public Type getGravitinoType(io.trino.spi.type.Type type) { throw new TrinoException( GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT, "Hive does not support the datatype VARCHAR with the length greater than " - + HIVE_VARCHAR_MAX_LENGTH); + + HIVE_VARCHAR_MAX_LENGTH + + ", you can use varchar without length instead"); } return Types.VarCharType.of(length);