diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.txt b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.txt index 97f7b0134be..c759dfe1c95 100644 --- a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.txt +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00002_alter_table.txt @@ -13,7 +13,7 @@ RENAME TABLE DROP COLUMN "CREATE TABLE ""test.gt_hive"".gt_db1.tb01 ( - name varchar(65535), + name varchar, salary integer ) COMMENT '' @@ -31,7 +31,7 @@ WITH ( RENAME COLUMN "CREATE TABLE ""test.gt_hive"".gt_db1.tb01 ( - s varchar(65535), + s varchar, salary integer ) COMMENT '' @@ -105,7 +105,7 @@ ADD COLUMN "CREATE TABLE ""test.gt_hive"".gt_db1.tb01 ( s varchar(256) COMMENT 'test column comments', salary integer, - city varchar(65535) COMMENT 'aaa' + city varchar COMMENT 'aaa' ) COMMENT 'test table comments' WITH ( diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql new file mode 100644 index 00000000000..54dfd8b6f67 --- /dev/null +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql @@ -0,0 +1,47 @@ +CREATE SCHEMA "test.gt_hive".varchar_db1; + +USE "test.gt_hive".varchar_db1; + +CREATE TABLE tb01 (id int, name char(20)); + +SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb01; + +CREATE TABLE tb02 (id int, name char(255)); + +SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb02; + +CREATE TABLE tb03 (id int, name char(256)); + +CREATE TABLE tb04 (id int, name varchar(250)); + +SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb04; + +CREATE TABLE tb05 (id int, name varchar(65535)); + +SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb05; + +CREATE TABLE tb06 (id int, name char); + +SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb06; + +CREATE TABLE tb07 (id int, name varchar); + +SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb07; + +CREATE TABLE tb08 (id int, name varchar(65536)); + + +drop table "test.gt_hive".varchar_db1.tb01; + +drop table "test.gt_hive".varchar_db1.tb02; + +drop table "test.gt_hive".varchar_db1.tb04; + +drop table "test.gt_hive".varchar_db1.tb05; + +drop table "test.gt_hive".varchar_db1.tb06; + +drop table "test.gt_hive".varchar_db1.tb07; + +drop schema "test.gt_hive".varchar_db1; + diff --git a/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt new file mode 100644 index 00000000000..b69411fb752 --- /dev/null +++ b/integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt @@ -0,0 +1,117 @@ +CREATE SCHEMA + +USE + +CREATE TABLE + +"CREATE TABLE ""test.gt_hive"".varchar_db1.tb01 ( + id integer, + name char(20) +) +COMMENT '' +WITH ( + input_format = 'org.apache.hadoop.mapred.TextInputFormat', + location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb01', + output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', + serde_name = 'tb01', + table_type = 'MANAGED_TABLE' +)" + +CREATE TABLE + +"CREATE TABLE ""test.gt_hive"".varchar_db1.tb02 ( + id integer, + name char(255) +) +COMMENT '' +WITH ( + input_format = 'org.apache.hadoop.mapred.TextInputFormat', + location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb02', + output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', + serde_name = 'tb02', + table_type = 'MANAGED_TABLE' +)" + + Hive does not support the datatype CHAR with the length greater than 255 + +CREATE TABLE + +"CREATE TABLE ""test.gt_hive"".varchar_db1.tb04 ( + id integer, + name varchar(250) +) +COMMENT '' +WITH ( + input_format = 'org.apache.hadoop.mapred.TextInputFormat', + location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb04', + output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', + serde_name = 'tb04', + table_type = 'MANAGED_TABLE' +)" + +CREATE TABLE + +"CREATE TABLE ""test.gt_hive"".varchar_db1.tb05 ( + id integer, + name varchar(65535) +) +COMMENT '' +WITH ( + input_format = 'org.apache.hadoop.mapred.TextInputFormat', + location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb05', + output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', + serde_name = 'tb05', + table_type = 'MANAGED_TABLE' +)" + +CREATE TABLE + +"CREATE TABLE ""test.gt_hive"".varchar_db1.tb06 ( + id integer, + name char(1) +) +COMMENT '' +WITH ( + input_format = 'org.apache.hadoop.mapred.TextInputFormat', + location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb06', + output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', + serde_name = 'tb06', + table_type = 'MANAGED_TABLE' +)" + +CREATE TABLE + +"CREATE TABLE ""test.gt_hive"".varchar_db1.tb07 ( + id integer, + name varchar +) +COMMENT '' +WITH ( + input_format = 'org.apache.hadoop.mapred.TextInputFormat', + location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb07', + output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', + serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', + serde_name = 'tb07', + table_type = 'MANAGED_TABLE' +)" + + Hive does not support the datatype VARCHAR with the length greater than 65535, you can use varchar without length instead + +DROP TABLE + +DROP TABLE + +DROP TABLE + +DROP TABLE + +DROP TABLE + +DROP TABLE + +DROP SCHEMA diff --git a/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java b/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java index 18d5c07ac89..e1a54bd235a 100644 --- a/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java +++ b/trino-connector/src/main/java/com/datastrato/gravitino/trino/connector/catalog/hive/HiveDataTypeTransformer.java @@ -6,28 +6,49 @@ package com.datastrato.gravitino.trino.connector.catalog.hive; import com.datastrato.gravitino.rel.types.Type; -import com.datastrato.gravitino.rel.types.Type.Name; import com.datastrato.gravitino.rel.types.Types; +import com.datastrato.gravitino.trino.connector.GravitinoErrorCode; import com.datastrato.gravitino.trino.connector.util.GeneralDataTypeTransformer; +import io.trino.spi.TrinoException; +import io.trino.spi.type.VarcharType; /** Type transformer between Hive and Trino */ public class HiveDataTypeTransformer extends GeneralDataTypeTransformer { // Max length of Hive varchar is 65535 private static final int HIVE_VARCHAR_MAX_LENGTH = 65535; + private static final int HIVE_CHAR_MAX_LENGTH = 255; @Override public Type getGravitinoType(io.trino.spi.type.Type type) { - Type gravitinoType = super.getGravitinoType(type); - if (gravitinoType.name() == Name.VARCHAR - && ((Types.VarCharType) gravitinoType).length() > HIVE_VARCHAR_MAX_LENGTH) { - return Types.VarCharType.of(HIVE_VARCHAR_MAX_LENGTH); - } + Class typeClass = type.getClass(); + if (typeClass == VarcharType.class) { + VarcharType varcharType = (VarcharType) type; + if (varcharType.getLength().isEmpty()) { + return Types.StringType.get(); + } + + int length = varcharType.getLength().get(); + if (length > HIVE_VARCHAR_MAX_LENGTH) { + throw new TrinoException( + GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT, + "Hive does not support the datatype VARCHAR with the length greater than " + + HIVE_VARCHAR_MAX_LENGTH + + ", you can use varchar without length instead"); + } + + return Types.VarCharType.of(length); + } else if (typeClass == io.trino.spi.type.CharType.class) { + io.trino.spi.type.CharType charType = (io.trino.spi.type.CharType) type; + if (charType.getLength() > HIVE_CHAR_MAX_LENGTH) { + throw new TrinoException( + GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT, + "Hive does not support the datatype CHAR with the length greater than " + + HIVE_CHAR_MAX_LENGTH); + } - if (gravitinoType.name() == Name.FIXEDCHAR - && ((Types.FixedCharType) gravitinoType).length() > HIVE_VARCHAR_MAX_LENGTH) { - return Types.FixedCharType.of(HIVE_VARCHAR_MAX_LENGTH); + return Types.FixedCharType.of(charType.getLength()); } - return gravitinoType; + return super.getGravitinoType(type); } } diff --git a/trino-connector/src/test/java/com/datastrato/gravitino/trino/connector/catalog/hive/TestHiveDataTypeConverter.java b/trino-connector/src/test/java/com/datastrato/gravitino/trino/connector/catalog/hive/TestHiveDataTypeConverter.java new file mode 100644 index 00000000000..9aaba6b9942 --- /dev/null +++ b/trino-connector/src/test/java/com/datastrato/gravitino/trino/connector/catalog/hive/TestHiveDataTypeConverter.java @@ -0,0 +1,66 @@ +/* + * Copyright 2024 Datastrato Pvt Ltd. + * This software is licensed under the Apache License version 2. + */ + +package com.datastrato.gravitino.trino.connector.catalog.hive; + +import com.datastrato.gravitino.rel.types.Types; +import com.datastrato.gravitino.trino.connector.util.GeneralDataTypeTransformer; +import io.trino.spi.TrinoException; +import org.testng.Assert; +import org.testng.annotations.Test; + +public class TestHiveDataTypeConverter { + + @Test + public void testTrinoTypeToGravitinoType() { + GeneralDataTypeTransformer generalDataTypeTransformer = new HiveDataTypeTransformer(); + io.trino.spi.type.Type charTypeWithLengthOne = io.trino.spi.type.CharType.createCharType(1); + Assert.assertEquals( + generalDataTypeTransformer.getGravitinoType(charTypeWithLengthOne), + Types.FixedCharType.of(1)); + + io.trino.spi.type.Type charTypeWithLength = io.trino.spi.type.CharType.createCharType(255); + Assert.assertEquals( + generalDataTypeTransformer.getGravitinoType(charTypeWithLength), + Types.FixedCharType.of(255)); + + io.trino.spi.type.Type charLengthIsOverflow = io.trino.spi.type.CharType.createCharType(256); + Exception e = + Assert.expectThrows( + TrinoException.class, + () -> generalDataTypeTransformer.getGravitinoType(charLengthIsOverflow)); + Assert.assertTrue( + e.getMessage() + .contains("Hive does not support the datatype CHAR with the length greater than 255")); + + io.trino.spi.type.Type varcharType = io.trino.spi.type.VarcharType.createVarcharType(1); + Assert.assertEquals( + generalDataTypeTransformer.getGravitinoType(varcharType), Types.VarCharType.of(1)); + + io.trino.spi.type.Type varcharTypeWithLength = + io.trino.spi.type.VarcharType.createVarcharType(65535); + Assert.assertEquals( + generalDataTypeTransformer.getGravitinoType(varcharTypeWithLength), + Types.VarCharType.of(65535)); + + io.trino.spi.type.Type varcharLengthIsOverflow = + io.trino.spi.type.VarcharType.createVarcharType(65536); + e = + Assert.expectThrows( + TrinoException.class, + () -> generalDataTypeTransformer.getGravitinoType(varcharLengthIsOverflow)); + Assert.assertTrue( + e.getMessage() + .contains( + "Hive does not support the datatype VARCHAR with the length greater than 65535")); + + io.trino.spi.type.Type varcharTypeWithoutLength = + io.trino.spi.type.VarcharType.createUnboundedVarcharType(); + + Assert.assertEquals( + generalDataTypeTransformer.getGravitinoType(varcharTypeWithoutLength), + Types.StringType.get()); + } +}