-
Notifications
You must be signed in to change notification settings - Fork 379
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[#2568] improvement(trino): Optimize char/varchar mapping for Hive ca…
…talog between Gravitino and Trino (#2576) ### What changes were proposed in this pull request? Changing the char/varchar conversion between Gravitino and Trino. ### Why are the changes needed? We need to make type conversion more exactly. Fix: #2568 ### Does this PR introduce _any_ user-facing change? N/A. ### How was this patch tested? UT and IT
- Loading branch information
Showing
5 changed files
with
264 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
47 changes: 47 additions & 0 deletions
47
integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
CREATE SCHEMA "test.gt_hive".varchar_db1; | ||
|
||
USE "test.gt_hive".varchar_db1; | ||
|
||
CREATE TABLE tb01 (id int, name char(20)); | ||
|
||
SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb01; | ||
|
||
CREATE TABLE tb02 (id int, name char(255)); | ||
|
||
SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb02; | ||
|
||
CREATE TABLE tb03 (id int, name char(256)); | ||
|
||
CREATE TABLE tb04 (id int, name varchar(250)); | ||
|
||
SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb04; | ||
|
||
CREATE TABLE tb05 (id int, name varchar(65535)); | ||
|
||
SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb05; | ||
|
||
CREATE TABLE tb06 (id int, name char); | ||
|
||
SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb06; | ||
|
||
CREATE TABLE tb07 (id int, name varchar); | ||
|
||
SHOW CREATE TABLE "test.gt_hive".varchar_db1.tb07; | ||
|
||
CREATE TABLE tb08 (id int, name varchar(65536)); | ||
|
||
|
||
drop table "test.gt_hive".varchar_db1.tb01; | ||
|
||
drop table "test.gt_hive".varchar_db1.tb02; | ||
|
||
drop table "test.gt_hive".varchar_db1.tb04; | ||
|
||
drop table "test.gt_hive".varchar_db1.tb05; | ||
|
||
drop table "test.gt_hive".varchar_db1.tb06; | ||
|
||
drop table "test.gt_hive".varchar_db1.tb07; | ||
|
||
drop schema "test.gt_hive".varchar_db1; | ||
|
117 changes: 117 additions & 0 deletions
117
integration-test/src/test/resources/trino-ci-testset/testsets/hive/00007_varchar.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
CREATE SCHEMA | ||
|
||
USE | ||
|
||
CREATE TABLE | ||
|
||
"CREATE TABLE ""test.gt_hive"".varchar_db1.tb01 ( | ||
id integer, | ||
name char(20) | ||
) | ||
COMMENT '' | ||
WITH ( | ||
input_format = 'org.apache.hadoop.mapred.TextInputFormat', | ||
location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb01', | ||
output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', | ||
serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', | ||
serde_name = 'tb01', | ||
table_type = 'MANAGED_TABLE' | ||
)" | ||
|
||
CREATE TABLE | ||
|
||
"CREATE TABLE ""test.gt_hive"".varchar_db1.tb02 ( | ||
id integer, | ||
name char(255) | ||
) | ||
COMMENT '' | ||
WITH ( | ||
input_format = 'org.apache.hadoop.mapred.TextInputFormat', | ||
location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb02', | ||
output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', | ||
serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', | ||
serde_name = 'tb02', | ||
table_type = 'MANAGED_TABLE' | ||
)" | ||
|
||
<QUERY_FAILED> Hive does not support the datatype CHAR with the length greater than 255 | ||
|
||
CREATE TABLE | ||
|
||
"CREATE TABLE ""test.gt_hive"".varchar_db1.tb04 ( | ||
id integer, | ||
name varchar(250) | ||
) | ||
COMMENT '' | ||
WITH ( | ||
input_format = 'org.apache.hadoop.mapred.TextInputFormat', | ||
location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb04', | ||
output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', | ||
serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', | ||
serde_name = 'tb04', | ||
table_type = 'MANAGED_TABLE' | ||
)" | ||
|
||
CREATE TABLE | ||
|
||
"CREATE TABLE ""test.gt_hive"".varchar_db1.tb05 ( | ||
id integer, | ||
name varchar(65535) | ||
) | ||
COMMENT '' | ||
WITH ( | ||
input_format = 'org.apache.hadoop.mapred.TextInputFormat', | ||
location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb05', | ||
output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', | ||
serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', | ||
serde_name = 'tb05', | ||
table_type = 'MANAGED_TABLE' | ||
)" | ||
|
||
CREATE TABLE | ||
|
||
"CREATE TABLE ""test.gt_hive"".varchar_db1.tb06 ( | ||
id integer, | ||
name char(1) | ||
) | ||
COMMENT '' | ||
WITH ( | ||
input_format = 'org.apache.hadoop.mapred.TextInputFormat', | ||
location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb06', | ||
output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', | ||
serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', | ||
serde_name = 'tb06', | ||
table_type = 'MANAGED_TABLE' | ||
)" | ||
|
||
CREATE TABLE | ||
|
||
"CREATE TABLE ""test.gt_hive"".varchar_db1.tb07 ( | ||
id integer, | ||
name varchar | ||
) | ||
COMMENT '' | ||
WITH ( | ||
input_format = 'org.apache.hadoop.mapred.TextInputFormat', | ||
location = 'hdfs://%:9000/user/hive/warehouse/varchar_db1.db/tb07', | ||
output_format = 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', | ||
serde_lib = 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', | ||
serde_name = 'tb07', | ||
table_type = 'MANAGED_TABLE' | ||
)" | ||
|
||
<QUERY_FAILED> Hive does not support the datatype VARCHAR with the length greater than 65535, you can use varchar without length instead | ||
|
||
DROP TABLE | ||
|
||
DROP TABLE | ||
|
||
DROP TABLE | ||
|
||
DROP TABLE | ||
|
||
DROP TABLE | ||
|
||
DROP TABLE | ||
|
||
DROP SCHEMA |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
66 changes: 66 additions & 0 deletions
66
...java/com/datastrato/gravitino/trino/connector/catalog/hive/TestHiveDataTypeConverter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
/* | ||
* Copyright 2024 Datastrato Pvt Ltd. | ||
* This software is licensed under the Apache License version 2. | ||
*/ | ||
|
||
package com.datastrato.gravitino.trino.connector.catalog.hive; | ||
|
||
import com.datastrato.gravitino.rel.types.Types; | ||
import com.datastrato.gravitino.trino.connector.util.GeneralDataTypeTransformer; | ||
import io.trino.spi.TrinoException; | ||
import org.testng.Assert; | ||
import org.testng.annotations.Test; | ||
|
||
public class TestHiveDataTypeConverter { | ||
|
||
@Test | ||
public void testTrinoTypeToGravitinoType() { | ||
GeneralDataTypeTransformer generalDataTypeTransformer = new HiveDataTypeTransformer(); | ||
io.trino.spi.type.Type charTypeWithLengthOne = io.trino.spi.type.CharType.createCharType(1); | ||
Assert.assertEquals( | ||
generalDataTypeTransformer.getGravitinoType(charTypeWithLengthOne), | ||
Types.FixedCharType.of(1)); | ||
|
||
io.trino.spi.type.Type charTypeWithLength = io.trino.spi.type.CharType.createCharType(255); | ||
Assert.assertEquals( | ||
generalDataTypeTransformer.getGravitinoType(charTypeWithLength), | ||
Types.FixedCharType.of(255)); | ||
|
||
io.trino.spi.type.Type charLengthIsOverflow = io.trino.spi.type.CharType.createCharType(256); | ||
Exception e = | ||
Assert.expectThrows( | ||
TrinoException.class, | ||
() -> generalDataTypeTransformer.getGravitinoType(charLengthIsOverflow)); | ||
Assert.assertTrue( | ||
e.getMessage() | ||
.contains("Hive does not support the datatype CHAR with the length greater than 255")); | ||
|
||
io.trino.spi.type.Type varcharType = io.trino.spi.type.VarcharType.createVarcharType(1); | ||
Assert.assertEquals( | ||
generalDataTypeTransformer.getGravitinoType(varcharType), Types.VarCharType.of(1)); | ||
|
||
io.trino.spi.type.Type varcharTypeWithLength = | ||
io.trino.spi.type.VarcharType.createVarcharType(65535); | ||
Assert.assertEquals( | ||
generalDataTypeTransformer.getGravitinoType(varcharTypeWithLength), | ||
Types.VarCharType.of(65535)); | ||
|
||
io.trino.spi.type.Type varcharLengthIsOverflow = | ||
io.trino.spi.type.VarcharType.createVarcharType(65536); | ||
e = | ||
Assert.expectThrows( | ||
TrinoException.class, | ||
() -> generalDataTypeTransformer.getGravitinoType(varcharLengthIsOverflow)); | ||
Assert.assertTrue( | ||
e.getMessage() | ||
.contains( | ||
"Hive does not support the datatype VARCHAR with the length greater than 65535")); | ||
|
||
io.trino.spi.type.Type varcharTypeWithoutLength = | ||
io.trino.spi.type.VarcharType.createUnboundedVarcharType(); | ||
|
||
Assert.assertEquals( | ||
generalDataTypeTransformer.getGravitinoType(varcharTypeWithoutLength), | ||
Types.StringType.get()); | ||
} | ||
} |