Skip to content

Commit

Permalink
[#2569] improvement(trino): Optimize char/varchar mapping for Iceberg…
Browse files Browse the repository at this point in the history
… catalog between Gravitino and Trino (#2579)

### What changes were proposed in this pull request?

Changing the char/varchar conversion for Iceberg catalog between
Gravitino and Trino.

### Why are the changes needed?

We need to make type conversion more exactly.

Fix: #2569 

### Does this PR introduce _any_ user-facing change?

N/A.

### How was this patch tested?

UT and ITs
  • Loading branch information
yuqi1129 authored Mar 19, 2024
1 parent 057b4c7 commit b53ff6b
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ USE "test.gt_iceberg".gt_db2;

-- Unsupported Type: TINYINT, SMALLINT
CREATE TABLE tb01 (
f1 VARCHAR(200),
f2 CHAR(20),
f1 VARCHAR,
f3 VARBINARY,
f4 DECIMAL(10, 3),
f5 REAL,
Expand All @@ -21,17 +20,16 @@ CREATE TABLE tb01 (

SHOW CREATE TABLE tb01;

INSERT INTO tb01 (f1, f2, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15)
VALUES ('Sample text 1', 'Text1', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, 100000, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00');
INSERT INTO tb01 (f1, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15)
VALUES ('Sample text 1', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, 100000, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00');

INSERT INTO tb01 (f1, f2, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15)
VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO tb01 (f1, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15)
VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);

select * from tb01 order by f1;

CREATE TABLE tb02 (
f1 VARCHAR(200) not null ,
f2 CHAR(20) not null ,
f1 VARCHAR not null ,
f3 VARBINARY not null ,
f4 DECIMAL(10, 3) not null ,
f5 REAL not null ,
Expand All @@ -47,20 +45,20 @@ CREATE TABLE tb02 (

show create table tb02;

INSERT INTO tb02 (f1, f2, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15)
VALUES ('Sample text 1', 'Text1', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, 100000, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00');
INSERT INTO tb02 (f1, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15)
VALUES ('Sample text 1', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, 100000, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00');

INSERT INTO tb02 (f1, f2, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15)
VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
INSERT INTO tb02 (f1, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15)
VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);

INSERT INTO tb02 (f1, f2, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15)
VALUES ('Sample text 1', NULL, x'65', 123.456, 7.89, 12.34, true, 1000, 1000, 100000, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00');
INSERT INTO tb02 (f1, f3, f4, f5, f6, f10, f11, f12, f13, f14, f15)
VALUES ('Sample text 1', x'65', 123.456, 7.89, 12.34, 1000, 1000, 100000, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00');

INSERT INTO tb02 (f1, f2, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15)
VALUES ('Sample text 1', 'same3', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, NULl, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00');
INSERT INTO tb02 (f1, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15)
VALUES ('Sample text 1', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, NULL, DATE '2024-01-01', TIME '08:00:00', TIMESTAMP '2024-01-01 08:00:00');

INSERT INTO tb02 (f1, f2, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15)
VALUES ('Sample text 1', 'same9', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, 1992382342, DATE '2024-01-01', NULL, TIMESTAMP '2024-01-01 08:00:00');
INSERT INTO tb02 (f1, f3, f4, f5, f6, f7, f10, f11, f12, f13, f14, f15)
VALUES ('Sample text 1', x'65', 123.456, 7.89, 12.34, true, 1000, 1000, 1992382342, DATE '2024-01-01', NULL, TIMESTAMP '2024-01-01 08:00:00');

drop table tb01;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ CREATE TABLE

"CREATE TABLE ""test.gt_iceberg"".gt_db2.tb01 (
f1 varchar,
f2 varchar,
f3 varbinary,
f4 decimal(10, 3),
f5 real,
Expand All @@ -26,14 +25,13 @@ INSERT: 1 row

INSERT: 1 row

"Sample text 1","Text1","65","123.456","7.89","12.34","true","1000","1000","100000","2024-01-01","08:00:00.000","2024-01-01 08:00:00.000"
"","","","","","","","","","","","",""
"Sample text 1","65","123.456","7.89","12.34","true","1000","1000","100000","2024-01-01","08:00:00.000","2024-01-01 08:00:00.000"
"","","","","","","","","","","",""

CREATE TABLE

"CREATE TABLE ""test.gt_iceberg"".gt_db2.tb02 (
f1 varchar NOT NULL,
f2 varchar NOT NULL,
f3 varbinary NOT NULL,
f4 decimal(10, 3) NOT NULL,
f5 real NOT NULL,
Expand All @@ -52,7 +50,7 @@ INSERT: 1 row

<QUERY_FAILED> NULL value not allowed for NOT NULL column: f1

<QUERY_FAILED> NULL value not allowed for NOT NULL column: f2
<QUERY_FAILED> NULL value not allowed for NOT NULL column: f7

<QUERY_FAILED> NULL value not allowed for NOT NULL column: f12

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
CREATE SCHEMA "test.gt_iceberg".varchar_db1;

USE "test.gt_iceberg".varchar_db1;

CREATE TABLE tb01 (id int, name char(20));

CREATE TABLE tb02 (id int, name char);

CREATE TABLE tb03 (id int, name varchar(233));

CREATE TABLE tb04 (id int, name varchar);

SHOW CREATE TABLE "test.gt_iceberg".varchar_db1.tb04;

drop table "test.gt_iceberg".varchar_db1.tb04;

drop schema "test.gt_iceberg".varchar_db1;

Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
CREATE SCHEMA

USE

<QUERY_FAILED> Iceberg does not support the datatype CHAR

<QUERY_FAILED> Iceberg does not support the datatype CHAR

<QUERY_FAILED> Iceberg does not support the datatype VARCHAR with length

CREATE TABLE

"CREATE TABLE ""test.gt_iceberg"".varchar_db1.tb04 (
id integer,
name varchar
)
COMMENT ''"

DROP TABLE

DROP SCHEMA
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ use "test.gt_iceberg2".gt_tpch2;

CREATE TABLE customer (
custkey bigint,
name varchar(25),
address varchar(40),
name varchar,
address varchar,
nationkey bigint,
phone varchar(15),
phone varchar,
acctbal decimal(12, 2),
mktsegment varchar(10),
comment varchar(117)
mktsegment varchar,
comment varchar
);

CREATE TABLE lineitem (
Expand All @@ -30,69 +30,69 @@ CREATE TABLE lineitem (
extendedprice decimal(12, 2),
discount decimal(12, 2),
tax decimal(12, 2),
returnflag varchar(1),
linestatus varchar(1),
returnflag varchar,
linestatus varchar,
shipdate date,
commitdate date,
receiptdate date,
shipinstruct varchar(25),
shipmode varchar(10),
comment varchar(44)
shipinstruct varchar,
shipmode varchar,
comment varchar
);

CREATE TABLE nation (
nationkey bigint,
name varchar(25),
name varchar,
regionkey bigint,
comment varchar(152)
comment varchar
);

CREATE TABLE orders (
orderkey bigint,
custkey bigint,
orderstatus varchar(1),
orderstatus varchar,
totalprice decimal(12, 2),
orderdate date,
orderpriority varchar(15),
clerk varchar(15),
orderpriority varchar,
clerk varchar,
shippriority integer,
comment varchar(79)
comment varchar
);

CREATE TABLE part (
partkey bigint,
name varchar(55),
mfgr varchar(25),
brand varchar(10),
type varchar(25),
name varchar,
mfgr varchar,
brand varchar,
type varchar,
size integer,
container varchar(10),
container varchar,
retailprice decimal(12, 2),
comment varchar(23)
comment varchar
);

CREATE TABLE partsupp (
partkey bigint,
suppkey bigint,
availqty integer,
supplycost decimal(12, 2),
comment varchar(199)
comment varchar
);

CREATE TABLE region (
regionkey bigint,
name varchar(25),
comment varchar(152)
name varchar,
comment varchar
);

CREATE TABLE supplier (
suppkey bigint,
name varchar(25),
address varchar(40),
name varchar,
address varchar,
nationkey bigint,
phone varchar(15),
phone varchar,
acctbal decimal(12, 2),
comment varchar(101)
comment varchar
);

insert into customer select * from tpch.tiny.customer;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,34 @@
import com.datastrato.gravitino.rel.types.Type;
import com.datastrato.gravitino.rel.types.Type.Name;
import com.datastrato.gravitino.rel.types.Types;
import com.datastrato.gravitino.trino.connector.GravitinoErrorCode;
import com.datastrato.gravitino.trino.connector.util.GeneralDataTypeTransformer;
import io.trino.spi.TrinoException;
import io.trino.spi.type.VarbinaryType;
import io.trino.spi.type.VarcharType;

/** Type transformer between Iceberg and Trino */
public class IcebergDataTypeTransformer extends GeneralDataTypeTransformer {

@Override
public Type getGravitinoType(io.trino.spi.type.Type type) {
Type gravitinoType = super.getGravitinoType(type);
if (gravitinoType.name() == Name.VARCHAR || gravitinoType.name() == Name.FIXEDCHAR) {
Class<? extends io.trino.spi.type.Type> typeClass = type.getClass();
if (typeClass == io.trino.spi.type.CharType.class) {
throw new TrinoException(
GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT,
"Iceberg does not support the datatype CHAR");
} else if (typeClass == io.trino.spi.type.VarcharType.class) {
VarcharType varCharType = (VarcharType) type;
if (varCharType.getLength().isPresent()) {
throw new TrinoException(
GravitinoErrorCode.GRAVITINO_ILLEGAL_ARGUMENT,
"Iceberg does not support the datatype VARCHAR with length");
}

return Types.StringType.get();
}
return gravitinoType;

return super.getGravitinoType(type);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright 2024 Datastrato Pvt Ltd.
* This software is licensed under the Apache License version 2.
*/

package com.datastrato.gravitino.trino.connector.catalog.iceberg;

import com.datastrato.gravitino.rel.types.Types;
import com.datastrato.gravitino.trino.connector.util.GeneralDataTypeTransformer;
import io.trino.spi.TrinoException;
import io.trino.spi.type.VarcharType;
import org.testng.Assert;
import org.testng.annotations.Test;

public class TestIcebergDataTypeTransformer {

@Test
public void testTrinoTypeToGravitinoType() {
GeneralDataTypeTransformer generalDataTypeTransformer = new IcebergDataTypeTransformer();
io.trino.spi.type.Type charTypeWithLengthOne = io.trino.spi.type.CharType.createCharType(1);

Exception e =
Assert.expectThrows(
TrinoException.class,
() -> generalDataTypeTransformer.getGravitinoType(charTypeWithLengthOne));
Assert.assertTrue(e.getMessage().contains("Iceberg does not support the datatype CHAR"));

io.trino.spi.type.Type varcharType = io.trino.spi.type.VarcharType.createVarcharType(1);
e =
Assert.expectThrows(
TrinoException.class, () -> generalDataTypeTransformer.getGravitinoType(varcharType));
Assert.assertTrue(
e.getMessage().contains("Iceberg does not support the datatype VARCHAR with length"));

io.trino.spi.type.Type varcharTypeWithoutLength = VarcharType.VARCHAR;

Assert.assertEquals(
generalDataTypeTransformer.getGravitinoType(varcharTypeWithoutLength),
Types.StringType.get());
}
}

0 comments on commit b53ff6b

Please sign in to comment.