Skip to content

Commit

Permalink
[apache#3589] improvement(relational catalog, core): add data type co…
Browse files Browse the repository at this point in the history
…nverter api (apache#3590)

### What changes were proposed in this pull request?

 - Add `DataTypeConverter` interface for catalogs usage

### Why are the changes needed?

Fix: apache#3589 

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests

Co-authored-by: Jerry Shao <[email protected]>
  • Loading branch information
mchades and jerryshao authored Jun 5, 2024
1 parent c5c9bef commit 42ba402
Show file tree
Hide file tree
Showing 26 changed files with 250 additions and 283 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
import static com.datastrato.gravitino.catalog.hive.HiveTable.TABLE_TYPE_PROP;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.COMMENT;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TABLE_TYPE;
import static com.datastrato.gravitino.catalog.hive.converter.HiveDataTypeConverter.CONVERTER;
import static com.datastrato.gravitino.connector.BaseCatalog.CATALOG_BYPASS_PREFIX;
import static org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE;

import com.datastrato.gravitino.NameIdentifier;
import com.datastrato.gravitino.Namespace;
import com.datastrato.gravitino.SchemaChange;
import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType;
import com.datastrato.gravitino.catalog.hive.converter.ToHiveType;
import com.datastrato.gravitino.connector.CatalogInfo;
import com.datastrato.gravitino.connector.CatalogOperations;
import com.datastrato.gravitino.connector.HasPropertyMetadata;
Expand Down Expand Up @@ -983,7 +983,7 @@ private void doAddColumn(List<FieldSchema> cols, TableChange.AddColumn change) {
targetPosition,
new FieldSchema(
change.fieldName()[0],
ToHiveType.convert(change.getDataType()).getQualifiedName(),
CONVERTER.fromGravitino(change.getDataType()).getQualifiedName(),
change.getComment()));
}

Expand Down Expand Up @@ -1031,7 +1031,8 @@ private void doUpdateColumnType(List<FieldSchema> cols, TableChange.UpdateColumn
if (indexOfColumn == -1) {
throw new IllegalArgumentException("UpdateColumnType does not exist: " + columnName);
}
cols.get(indexOfColumn).setType(ToHiveType.convert(change.getNewDataType()).getQualifiedName());
cols.get(indexOfColumn)
.setType(CONVERTER.fromGravitino(change.getNewDataType()).getQualifiedName());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TABLE_TYPE;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType.EXTERNAL_TABLE;
import static com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType.MANAGED_TABLE;
import static com.datastrato.gravitino.catalog.hive.converter.HiveDataTypeConverter.CONVERTER;
import static com.datastrato.gravitino.rel.expressions.transforms.Transforms.identity;

import com.datastrato.gravitino.catalog.hive.HiveTablePropertiesMetadata.TableType;
import com.datastrato.gravitino.catalog.hive.converter.FromHiveType;
import com.datastrato.gravitino.catalog.hive.converter.ToHiveType;
import com.datastrato.gravitino.connector.BaseTable;
import com.datastrato.gravitino.connector.PropertiesMetadata;
import com.datastrato.gravitino.connector.TableOperations;
Expand Down Expand Up @@ -114,15 +113,15 @@ public static HiveTable.Builder fromHiveTable(Table table) {
f ->
HiveColumn.builder()
.withName(f.getName())
.withType(FromHiveType.convert(f.getType()))
.withType(CONVERTER.toGravitino(f.getType()))
.withComment(f.getComment())
.build()),
table.getPartitionKeys().stream()
.map(
p ->
HiveColumn.builder()
.withName(p.getName())
.withType(FromHiveType.convert(p.getType()))
.withType(CONVERTER.toGravitino(p.getType()))
.withComment(p.getComment())
.build()))
.toArray(Column[]::new);
Expand Down Expand Up @@ -241,7 +240,7 @@ private FieldSchema getPartitionKey(String[] fieldName) {
.collect(Collectors.toList());
return new FieldSchema(
partitionColumns.get(0).name(),
ToHiveType.convert(partitionColumns.get(0).dataType()).getQualifiedName(),
CONVERTER.fromGravitino(partitionColumns.get(0).dataType()).getQualifiedName(),
partitionColumns.get(0).comment());
}

Expand All @@ -256,7 +255,9 @@ private StorageDescriptor buildStorageDescriptor(
.map(
c ->
new FieldSchema(
c.name(), ToHiveType.convert(c.dataType()).getQualifiedName(), c.comment()))
c.name(),
CONVERTER.fromGravitino(c.dataType()).getQualifiedName(),
c.comment()))
.collect(Collectors.toList()));

// `location` must not be null, otherwise it will result in an NPE when calling HMS `alterTable`
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2023 Datastrato Pvt Ltd.
* Copyright 2024 Datastrato Pvt Ltd.
* This software is licensed under the Apache License version 2.
*/
package com.datastrato.gravitino.catalog.hive.converter;
Expand All @@ -17,12 +17,23 @@
import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME;
import static org.apache.hadoop.hive.serde.serdeConstants.TIMESTAMP_TYPE_NAME;
import static org.apache.hadoop.hive.serde.serdeConstants.TINYINT_TYPE_NAME;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getDecimalTypeInfo;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getListTypeInfo;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getMapTypeInfo;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getPrimitiveTypeInfo;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getUnionTypeInfo;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getVarcharTypeInfo;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString;

import com.datastrato.gravitino.connector.DataTypeConverter;
import com.datastrato.gravitino.rel.types.Type;
import com.datastrato.gravitino.rel.types.Types;
import com.google.common.annotations.VisibleForTesting;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
Expand All @@ -33,28 +44,77 @@
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;

/** Converts Hive data types to corresponding Gravitino data types. */
public class FromHiveType {
public class HiveDataTypeConverter implements DataTypeConverter<TypeInfo, String> {
public static final HiveDataTypeConverter CONVERTER = new HiveDataTypeConverter();

/**
* Converts a Hive data type string to the corresponding Gravitino data type.
*
* @param hiveType The Hive data type string to convert.
* @return The equivalent Gravitino data type.
*/
public static Type convert(String hiveType) {
TypeInfo hiveTypeInfo = getTypeInfoFromTypeString(hiveType);
return toGravitinoType(hiveTypeInfo);
@Override
public TypeInfo fromGravitino(Type type) {
switch (type.name()) {
case BOOLEAN:
return getPrimitiveTypeInfo(BOOLEAN_TYPE_NAME);
case BYTE:
return getPrimitiveTypeInfo(TINYINT_TYPE_NAME);
case SHORT:
return getPrimitiveTypeInfo(SMALLINT_TYPE_NAME);
case INTEGER:
return getPrimitiveTypeInfo(INT_TYPE_NAME);
case LONG:
return getPrimitiveTypeInfo(BIGINT_TYPE_NAME);
case FLOAT:
return getPrimitiveTypeInfo(FLOAT_TYPE_NAME);
case DOUBLE:
return getPrimitiveTypeInfo(DOUBLE_TYPE_NAME);
case STRING:
return getPrimitiveTypeInfo(STRING_TYPE_NAME);
case VARCHAR:
return getVarcharTypeInfo(((Types.VarCharType) type).length());
case FIXEDCHAR:
return getCharTypeInfo(((Types.FixedCharType) type).length());
case DATE:
return getPrimitiveTypeInfo(DATE_TYPE_NAME);
case TIMESTAMP:
return getPrimitiveTypeInfo(TIMESTAMP_TYPE_NAME);
case DECIMAL:
Types.DecimalType decimalType = (Types.DecimalType) type;
return getDecimalTypeInfo(decimalType.precision(), decimalType.scale());
case BINARY:
return getPrimitiveTypeInfo(BINARY_TYPE_NAME);
case INTERVAL_YEAR:
return getPrimitiveTypeInfo(INTERVAL_YEAR_MONTH_TYPE_NAME);
case INTERVAL_DAY:
return getPrimitiveTypeInfo(INTERVAL_DAY_TIME_TYPE_NAME);
case LIST:
return getListTypeInfo(fromGravitino(((Types.ListType) type).elementType()));
case MAP:
Types.MapType mapType = (Types.MapType) type;
return getMapTypeInfo(fromGravitino(mapType.keyType()), fromGravitino(mapType.valueType()));
case STRUCT:
Types.StructType structType = (Types.StructType) type;
List<TypeInfo> typeInfos =
Arrays.stream(structType.fields())
.map(t -> fromGravitino(t.type()))
.collect(Collectors.toList());
List<String> names =
Arrays.stream(structType.fields())
.map(Types.StructType.Field::name)
.collect(Collectors.toList());
return getStructTypeInfo(names, typeInfos);
case UNION:
return getUnionTypeInfo(
Arrays.stream(((Types.UnionType) type).types())
.map(this::fromGravitino)
.collect(Collectors.toList()));
default:
throw new UnsupportedOperationException("Unsupported conversion to Hive type: " + type);
}
}

@Override
public Type toGravitino(String hiveType) {
return toGravitino(getTypeInfoFromTypeString(hiveType));
}

/**
* Converts a Hive TypeInfo object to the corresponding Gravitino Type.
*
* @param hiveTypeInfo The Hive TypeInfo object to convert.
* @return The equivalent Gravitino Type.
*/
@VisibleForTesting
public static Type toGravitinoType(TypeInfo hiveTypeInfo) {
private Type toGravitino(TypeInfo hiveTypeInfo) {
switch (hiveTypeInfo.getCategory()) {
case PRIMITIVE:
switch (hiveTypeInfo.getTypeName()) {
Expand Down Expand Up @@ -102,12 +162,12 @@ public static Type toGravitinoType(TypeInfo hiveTypeInfo) {
}
case LIST:
return Types.ListType.nullable(
toGravitinoType(((ListTypeInfo) hiveTypeInfo).getListElementTypeInfo()));
toGravitino(((ListTypeInfo) hiveTypeInfo).getListElementTypeInfo()));
case MAP:
MapTypeInfo mapTypeInfo = (MapTypeInfo) hiveTypeInfo;
return Types.MapType.valueNullable(
toGravitinoType(mapTypeInfo.getMapKeyTypeInfo()),
toGravitinoType(mapTypeInfo.getMapValueTypeInfo()));
toGravitino(mapTypeInfo.getMapKeyTypeInfo()),
toGravitino(mapTypeInfo.getMapValueTypeInfo()));
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) hiveTypeInfo;
ArrayList<String> fieldNames = structTypeInfo.getAllStructFieldNames();
Expand All @@ -117,19 +177,17 @@ public static Type toGravitinoType(TypeInfo hiveTypeInfo) {
.mapToObj(
i ->
Types.StructType.Field.nullableField(
fieldNames.get(i), toGravitinoType(typeInfos.get(i))))
fieldNames.get(i), toGravitino(typeInfos.get(i))))
.toArray(Types.StructType.Field[]::new);
return Types.StructType.of(fields);
case UNION:
UnionTypeInfo unionTypeInfo = (UnionTypeInfo) hiveTypeInfo;
return Types.UnionType.of(
unionTypeInfo.getAllUnionObjectTypeInfos().stream()
.map(FromHiveType::toGravitinoType)
.map(this::toGravitino)
.toArray(Type[]::new));
default:
return Types.ExternalType.of(hiveTypeInfo.getQualifiedName());
}
}

private FromHiveType() {}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/
package com.datastrato.gravitino.catalog.hive.converter;

import static com.datastrato.gravitino.catalog.hive.converter.HiveDataTypeConverter.CONVERTER;
import static org.apache.hadoop.hive.serde.serdeConstants.BIGINT_TYPE_NAME;
import static org.apache.hadoop.hive.serde.serdeConstants.BINARY_TYPE_NAME;
import static org.apache.hadoop.hive.serde.serdeConstants.BOOLEAN_TYPE_NAME;
Expand All @@ -29,7 +30,6 @@

import com.datastrato.gravitino.rel.types.Types;
import java.util.Arrays;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -72,24 +72,14 @@ public void testTypeConverter() {
Arrays.asList(
getPrimitiveTypeInfo(STRING_TYPE_NAME), getPrimitiveTypeInfo(INT_TYPE_NAME)))
.getTypeName());
Assertions.assertEquals(
Types.ExternalType.of(USER_DEFINED_TYPE),
FromHiveType.toGravitinoType(new UserDefinedTypeInfo()));
Assertions.assertThrows(
UnsupportedOperationException.class,
() -> ToHiveType.convert(Types.ExternalType.of(USER_DEFINED_TYPE)));
() -> CONVERTER.fromGravitino(Types.ExternalType.of(USER_DEFINED_TYPE)));
}

private void testConverter(String typeName) {
TypeInfo hiveType = getTypeInfoFromTypeString(typeName);
TypeInfo convertedType = ToHiveType.convert(FromHiveType.convert(hiveType.getTypeName()));
TypeInfo convertedType = CONVERTER.fromGravitino(CONVERTER.toGravitino(hiveType.getTypeName()));
Assertions.assertEquals(hiveType, convertedType);
}

static class UserDefinedTypeInfo extends PrimitiveTypeInfo {
@Override
public String getTypeName() {
return USER_DEFINED_TYPE;
}
}
}
Loading

0 comments on commit 42ba402

Please sign in to comment.