From 00f6a006b8b85a36ee58ef92e205f77c660d1495 Mon Sep 17 00:00:00 2001 From: SteNicholas Date: Thu, 22 Feb 2024 17:20:02 +0800 Subject: [PATCH] [#2117] improvement(api): Type adds UNKNOWN column data type to handle an unresolvable type from the catalog --- .../datastrato/gravitino/rel/types/Type.java | 5 +- .../datastrato/gravitino/rel/types/Types.java | 55 +++++++++++++++++++ .../datastrato/gravitino/rel/TestTypes.java | 8 +++ .../catalog/hive/converter/FromHiveType.java | 14 ++--- .../catalog/converter/TestTypeConverter.java | 15 +++++ .../mysql/converter/MysqlTypeConverter.java | 2 +- .../converter/TestMysqlTypeConverter.java | 3 + .../converter/PostgreSqlTypeConverter.java | 2 +- .../TestPostgreSqlTypeConverter.java | 3 + .../datastrato/gravitino/json/JsonUtils.java | 27 +++++++-- .../gravitino/json/TestJsonUtils.java | 6 ++ 11 files changed, 125 insertions(+), 15 deletions(-) diff --git a/api/src/main/java/com/datastrato/gravitino/rel/types/Type.java b/api/src/main/java/com/datastrato/gravitino/rel/types/Type.java index 0be8ecf83c2..d7b36d54171 100644 --- a/api/src/main/java/com/datastrato/gravitino/rel/types/Type.java +++ b/api/src/main/java/com/datastrato/gravitino/rel/types/Type.java @@ -73,7 +73,10 @@ enum Name { UNION, /** The null type. A null type represents a value that is null. */ - NULL + NULL, + + /** The unparsed type. An unparsed type represents an unresolvable type. */ + UNPARSED } /** The base type of all primitive types. */ diff --git a/api/src/main/java/com/datastrato/gravitino/rel/types/Types.java b/api/src/main/java/com/datastrato/gravitino/rel/types/Types.java index 40622d7d140..50701413f70 100644 --- a/api/src/main/java/com/datastrato/gravitino/rel/types/Types.java +++ b/api/src/main/java/com/datastrato/gravitino/rel/types/Types.java @@ -1017,6 +1017,61 @@ public int hashCode() { } } + /** + * Represents a type that is not parsed yet. The parsed type is represented by other types of + * {@link Types}. + */ + public static class UnparsedType implements Type { + + /** + * Creates a new {@link UnparsedType} with the given unparsed type. + * + * @param unparsedType The unparsed type. + * @return A new {@link UnparsedType} with the given unparsed type. + */ + public static UnparsedType of(String unparsedType) { + return new UnparsedType(unparsedType); + } + + private final String unparsedType; + + private UnparsedType(String unparsedType) { + this.unparsedType = unparsedType; + } + + /** @return The unparsed type as a string. */ + public String unparsedType() { + return unparsedType; + } + + @Override + public Name name() { + return Name.UNPARSED; + } + + @Override + public String simpleString() { + return String.format("unparsed(%s)", unparsedType); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + UnparsedType that = (UnparsedType) o; + return Objects.equals(unparsedType, that.unparsedType); + } + + @Override + public int hashCode() { + return Objects.hash(unparsedType); + } + } + /** * @param dataType The data type to check. * @return True if the given data type is allowed to be an auto-increment column. diff --git a/api/src/test/java/com/datastrato/gravitino/rel/TestTypes.java b/api/src/test/java/com/datastrato/gravitino/rel/TestTypes.java index 830e44851de..265cadea805 100644 --- a/api/src/test/java/com/datastrato/gravitino/rel/TestTypes.java +++ b/api/src/test/java/com/datastrato/gravitino/rel/TestTypes.java @@ -179,4 +179,12 @@ public void testComplexTypes() { Assertions.assertEquals("union", unionType.simpleString()); Assertions.assertEquals(unionType, Types.UnionType.of(unionType.types())); } + + @Test + public void testUnparsedType() { + Types.UnparsedType unparsedType = Types.UnparsedType.of("bit"); + Assertions.assertEquals(Type.Name.UNPARSED, unparsedType.name()); + Assertions.assertEquals("unparsed(bit)", unparsedType.simpleString()); + Assertions.assertEquals(unparsedType, Types.UnparsedType.of("bit")); + } } diff --git a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/converter/FromHiveType.java b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/converter/FromHiveType.java index 60a7214b9f2..31aaa4db244 100644 --- a/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/converter/FromHiveType.java +++ b/catalogs/catalog-hive/src/main/java/com/datastrato/gravitino/catalog/hive/converter/FromHiveType.java @@ -21,6 +21,7 @@ import com.datastrato.gravitino.rel.types.Type; import com.datastrato.gravitino.rel.types.Types; +import com.google.common.annotations.VisibleForTesting; import java.util.ArrayList; import java.util.stream.IntStream; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; @@ -40,9 +41,8 @@ public class FromHiveType { * * @param hiveType The Hive data type string to convert. * @return The equivalent Gravitino data type. - * @throws IllegalArgumentException If the Hive data type is unknown or unsupported. */ - public static Type convert(String hiveType) throws IllegalArgumentException { + public static Type convert(String hiveType) { TypeInfo hiveTypeInfo = getTypeInfoFromTypeString(hiveType); return toGravitinoType(hiveTypeInfo); } @@ -52,9 +52,9 @@ public static Type convert(String hiveType) throws IllegalArgumentException { * * @param hiveTypeInfo The Hive TypeInfo object to convert. * @return The equivalent Gravitino Type. - * @throws IllegalArgumentException if the Hive data type category is unknown or unsupported. */ - private static Type toGravitinoType(TypeInfo hiveTypeInfo) throws IllegalArgumentException { + @VisibleForTesting + public static Type toGravitinoType(TypeInfo hiveTypeInfo) { switch (hiveTypeInfo.getCategory()) { case PRIMITIVE: switch (hiveTypeInfo.getTypeName()) { @@ -98,8 +98,7 @@ private static Type toGravitinoType(TypeInfo hiveTypeInfo) throws IllegalArgumen return Types.DecimalType.of(decimalTypeInfo.precision(), decimalTypeInfo.scale()); } - throw new IllegalArgumentException( - "Unknown Hive type: " + hiveTypeInfo.getQualifiedName()); + return Types.UnparsedType.of(hiveTypeInfo.getQualifiedName()); } case LIST: return Types.ListType.nullable( @@ -128,8 +127,7 @@ private static Type toGravitinoType(TypeInfo hiveTypeInfo) throws IllegalArgumen .map(FromHiveType::toGravitinoType) .toArray(Type[]::new)); default: - throw new IllegalArgumentException( - "Unknown category of Hive type: " + hiveTypeInfo.getCategory()); + return Types.UnparsedType.of(hiveTypeInfo.getQualifiedName()); } } diff --git a/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/converter/TestTypeConverter.java b/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/converter/TestTypeConverter.java index b4ef1bb5b66..d8ecaa42a96 100644 --- a/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/converter/TestTypeConverter.java +++ b/catalogs/catalog-hive/src/test/java/com/datastrato/gravitino/catalog/converter/TestTypeConverter.java @@ -29,12 +29,17 @@ import com.datastrato.gravitino.catalog.hive.converter.FromHiveType; import com.datastrato.gravitino.catalog.hive.converter.ToHiveType; +import com.datastrato.gravitino.rel.types.Types; import java.util.Arrays; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; public class TestTypeConverter { + + private static final String USER_DEFINED_TYPE = "user-defined"; + @Test public void testTypeConverter() { testConverter(BOOLEAN_TYPE_NAME); @@ -69,6 +74,9 @@ public void testTypeConverter() { Arrays.asList( getPrimitiveTypeInfo(STRING_TYPE_NAME), getPrimitiveTypeInfo(INT_TYPE_NAME))) .getTypeName()); + Assertions.assertEquals( + Types.UnparsedType.of(USER_DEFINED_TYPE), + FromHiveType.toGravitinoType(new UserDefinedTypeInfo())); } private void testConverter(String typeName) { @@ -76,4 +84,11 @@ private void testConverter(String typeName) { TypeInfo convertedType = ToHiveType.convert(FromHiveType.convert(hiveType.getTypeName())); Assertions.assertEquals(hiveType, convertedType); } + + static class UserDefinedTypeInfo extends PrimitiveTypeInfo { + @Override + public String getTypeName() { + return USER_DEFINED_TYPE; + } + } } diff --git a/catalogs/catalog-jdbc-mysql/src/main/java/com/datastrato/gravitino/catalog/mysql/converter/MysqlTypeConverter.java b/catalogs/catalog-jdbc-mysql/src/main/java/com/datastrato/gravitino/catalog/mysql/converter/MysqlTypeConverter.java index 3e18cabb2f2..bd91eb5323a 100644 --- a/catalogs/catalog-jdbc-mysql/src/main/java/com/datastrato/gravitino/catalog/mysql/converter/MysqlTypeConverter.java +++ b/catalogs/catalog-jdbc-mysql/src/main/java/com/datastrato/gravitino/catalog/mysql/converter/MysqlTypeConverter.java @@ -55,7 +55,7 @@ public Type toGravitinoType(JdbcTypeBean typeBean) { case BINARY: return Types.BinaryType.get(); default: - throw new IllegalArgumentException("Not a supported type: " + typeBean.getTypeName()); + return Types.UnparsedType.of(typeBean.getTypeName()); } } diff --git a/catalogs/catalog-jdbc-mysql/src/test/java/com/datastrato/gravitino/catalog/mysql/converter/TestMysqlTypeConverter.java b/catalogs/catalog-jdbc-mysql/src/test/java/com/datastrato/gravitino/catalog/mysql/converter/TestMysqlTypeConverter.java index 8059896bc9f..dd94f3cfaaa 100644 --- a/catalogs/catalog-jdbc-mysql/src/test/java/com/datastrato/gravitino/catalog/mysql/converter/TestMysqlTypeConverter.java +++ b/catalogs/catalog-jdbc-mysql/src/test/java/com/datastrato/gravitino/catalog/mysql/converter/TestMysqlTypeConverter.java @@ -28,6 +28,7 @@ public class TestMysqlTypeConverter { private static final MysqlTypeConverter MYSQL_TYPE_CONVERTER = new MysqlTypeConverter(); + private static final String USER_DEFINED_TYPE = "user-defined"; @Test public void testToGravitinoType() { @@ -44,6 +45,8 @@ public void testToGravitinoType() { checkJdbcTypeToGravitinoType(Types.FixedCharType.of(20), CHAR, "20", null); checkJdbcTypeToGravitinoType(Types.StringType.get(), TEXT, null, null); checkJdbcTypeToGravitinoType(Types.BinaryType.get(), BINARY, null, null); + checkJdbcTypeToGravitinoType( + Types.UnparsedType.of(USER_DEFINED_TYPE), USER_DEFINED_TYPE, null, null); } @Test diff --git a/catalogs/catalog-jdbc-postgresql/src/main/java/com/datastrato/gravitino/catalog/postgresql/converter/PostgreSqlTypeConverter.java b/catalogs/catalog-jdbc-postgresql/src/main/java/com/datastrato/gravitino/catalog/postgresql/converter/PostgreSqlTypeConverter.java index bc3f295f00f..1cb758d3c19 100644 --- a/catalogs/catalog-jdbc-postgresql/src/main/java/com/datastrato/gravitino/catalog/postgresql/converter/PostgreSqlTypeConverter.java +++ b/catalogs/catalog-jdbc-postgresql/src/main/java/com/datastrato/gravitino/catalog/postgresql/converter/PostgreSqlTypeConverter.java @@ -59,7 +59,7 @@ public Type toGravitinoType(JdbcTypeBean typeBean) { case BYTEA: return Types.BinaryType.get(); default: - throw new IllegalArgumentException("Not a supported type: " + typeBean); + return Types.UnparsedType.of(typeBean.getTypeName()); } } diff --git a/catalogs/catalog-jdbc-postgresql/src/test/java/com/datastrato/gravitino/catalog/postgresql/converter/TestPostgreSqlTypeConverter.java b/catalogs/catalog-jdbc-postgresql/src/test/java/com/datastrato/gravitino/catalog/postgresql/converter/TestPostgreSqlTypeConverter.java index 667f4d19262..d2952480849 100644 --- a/catalogs/catalog-jdbc-postgresql/src/test/java/com/datastrato/gravitino/catalog/postgresql/converter/TestPostgreSqlTypeConverter.java +++ b/catalogs/catalog-jdbc-postgresql/src/test/java/com/datastrato/gravitino/catalog/postgresql/converter/TestPostgreSqlTypeConverter.java @@ -30,6 +30,7 @@ public class TestPostgreSqlTypeConverter { private static final PostgreSqlTypeConverter POSTGRE_SQL_TYPE_CONVERTER = new PostgreSqlTypeConverter(); + private static final String USER_DEFINED_TYPE = "user-defined"; @Test public void testToGravitinoType() { @@ -47,6 +48,8 @@ public void testToGravitinoType() { checkJdbcTypeToGravitinoType(Types.FixedCharType.of(20), BPCHAR, "20", null); checkJdbcTypeToGravitinoType(Types.StringType.get(), TEXT, null, null); checkJdbcTypeToGravitinoType(Types.BinaryType.get(), BYTEA, null, null); + checkJdbcTypeToGravitinoType( + Types.UnparsedType.of(USER_DEFINED_TYPE), USER_DEFINED_TYPE, null, null); } @Test diff --git a/common/src/main/java/com/datastrato/gravitino/json/JsonUtils.java b/common/src/main/java/com/datastrato/gravitino/json/JsonUtils.java index d81e345ba6f..eb3c6f2973f 100644 --- a/common/src/main/java/com/datastrato/gravitino/json/JsonUtils.java +++ b/common/src/main/java/com/datastrato/gravitino/json/JsonUtils.java @@ -99,6 +99,8 @@ public class JsonUtils { private static final String LIST = "list"; private static final String MAP = "map"; private static final String UNION = "union"; + private static final String UNPARSED = "unparsed"; + private static final String UNPARSED_TYPE = "unparsedType"; private static final String FIELDS = "fields"; private static final String UNION_TYPES = "types"; private static final String STRUCT_FIELD_NAME = "name"; @@ -489,7 +491,7 @@ private static void writeDataType(Type dataType, JsonGenerator gen) throws IOExc writeUnionType((Types.UnionType) dataType, gen); break; default: - throw new IOException("Cannot serialize unknown type: " + dataType); + writeUnparsedType(dataType.simpleString(), gen); } } @@ -510,9 +512,8 @@ private static Type readDataType(JsonNode node) { : fromPrimitiveTypeString(text); } - if (node.isObject() && node.get(TYPE) != null) { - JsonNode typeField = node.get(TYPE); - String type = typeField.asText(); + if (node.isObject() && node.has(TYPE)) { + String type = node.get(TYPE).asText(); if (STRUCT.equals(type)) { return readStructType(node); @@ -529,6 +530,10 @@ private static Type readDataType(JsonNode node) { if (UNION.equals(type)) { return readUnionType(node); } + + if (UNPARSED.equals(type)) { + return readUnparsedType(node); + } } throw new IllegalArgumentException("Cannot parse type from JSON: " + node); @@ -599,6 +604,13 @@ private static void writeStructField(Types.StructType.Field field, JsonGenerator gen.writeEndObject(); } + private static void writeUnparsedType(String unparsedType, JsonGenerator gen) throws IOException { + gen.writeStartObject(); + gen.writeStringField(TYPE, UNPARSED); + gen.writeStringField(UNPARSED_TYPE, unparsedType); + gen.writeEndObject(); + } + private static Type.PrimitiveType fromPrimitiveTypeString(String typeString) { Type.PrimitiveType primitiveType = TYPES.get(typeString); if (primitiveType != null) { @@ -701,6 +713,13 @@ private static Types.StructType.Field readStructField(JsonNode node) { return Types.StructType.Field.of(name, type, nullable, comment); } + private static Types.UnparsedType readUnparsedType(JsonNode node) { + Preconditions.checkArgument( + node.has(UNPARSED_TYPE), "Cannot parse unparsed type from missing unparsed type: %s", node); + + return Types.UnparsedType.of(node.get(UNPARSED_TYPE).asText()); + } + // Nested classes for custom serialization and deserialization /** Custom JSON serializer for Gravitino Type objects. */ diff --git a/common/src/test/java/com/datastrato/gravitino/json/TestJsonUtils.java b/common/src/test/java/com/datastrato/gravitino/json/TestJsonUtils.java index 2cb31832fe2..4b80dd054cf 100644 --- a/common/src/test/java/com/datastrato/gravitino/json/TestJsonUtils.java +++ b/common/src/test/java/com/datastrato/gravitino/json/TestJsonUtils.java @@ -154,6 +154,12 @@ public void testTypeSerDe() throws Exception { + " ]\n" + "}"; Assertions.assertEquals(objectMapper.readTree(expected), objectMapper.readTree(jsonValue)); + + type = Types.UnparsedType.of("user-defined"); + jsonValue = JsonUtils.objectMapper().writeValueAsString(type); + expected = + "{\n" + " \"type\": \"unparsed\",\n" + " \"unparsed_type\": \"user-defined\"\n" + "}"; + Assertions.assertEquals(objectMapper.readTree(expected), objectMapper.readTree(jsonValue)); } @Test