Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache Iceberg column types in Glue for faster access #18315

Merged
merged 12 commits into from
Aug 1, 2023
6 changes: 6 additions & 0 deletions .mvn/modernizer/violations.xml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,12 @@
<comment>Table type is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getTableType</comment>
</violation>

<violation>
<name>com/amazonaws/services/glue/model/Column.getParameters:()Ljava/util/Map;</name>
<version>1.1</version>
<comment>Column parameters map is nullable in Glue model, which is too easy to forget about. Prefer GlueToTrinoConverter.getColumnParameters</comment>
</violation>

<violation>
<name>com/amazonaws/services/glue/model/Table.getParameters:()Ljava/util/Map;</name>
<version>1.1</version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,11 @@

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.ImmutableMap;
import com.google.errorprone.annotations.Immutable;
import io.trino.plugin.hive.HiveType;

import java.util.Map;
import java.util.Objects;
import java.util.Optional;

Expand All @@ -30,16 +32,28 @@ public class Column
private final String name;
private final HiveType type;
private final Optional<String> comment;
private final Map<String, String> properties;

@Deprecated
public Column(
String name,
HiveType type,
Optional<String> comment)
{
this(name, type, comment, ImmutableMap.of());
}

@JsonCreator
public Column(
@JsonProperty("name") String name,
@JsonProperty("type") HiveType type,
@JsonProperty("comment") Optional<String> comment)
@JsonProperty("comment") Optional<String> comment,
@JsonProperty("properties") Map<String, String> properties)
{
this.name = requireNonNull(name, "name is null");
this.type = requireNonNull(type, "type is null");
this.comment = requireNonNull(comment, "comment is null");
this.properties = ImmutableMap.copyOf(requireNonNull(properties, "properties is null"));
}

@JsonProperty
Expand All @@ -60,6 +74,12 @@ public Optional<String> getComment()
return comment;
}

@JsonProperty
public Map<String, String> getProperties()
{
return properties;
}

@Override
public String toString()
{
Expand All @@ -82,12 +102,13 @@ public boolean equals(Object o)
Column column = (Column) o;
return Objects.equals(name, column.name) &&
Objects.equals(type, column.type) &&
Objects.equals(comment, column.comment);
Objects.equals(comment, column.comment) &&
Objects.equals(properties, column.properties);
}

@Override
public int hashCode()
{
return Objects.hash(name, type, comment);
return Objects.hash(name, type, comment, properties);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.hive.metastore.file;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.ImmutableMap;
import com.google.errorprone.annotations.Immutable;
import io.trino.plugin.hive.HiveType;

import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;

import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.util.Objects.requireNonNull;

@Immutable
public class Column
findepi marked this conversation as resolved.
Show resolved Hide resolved
{
private final String name;
private final HiveType type;
private final Optional<String> comment;
private final Map<String, String> properties;

@JsonCreator
public Column(
@JsonProperty("name") String name,
@JsonProperty("type") HiveType type,
@JsonProperty("comment") Optional<String> comment,
@JsonProperty("properties") Optional<Map<String, String>> properties)
{
this(
name,
type,
comment,
properties.orElse(ImmutableMap.of()));
}

public Column(
String name,
HiveType type,
Optional<String> comment,
Map<String, String> properties)
{
this.name = requireNonNull(name, "name is null");
this.type = requireNonNull(type, "type is null");
this.comment = requireNonNull(comment, "comment is null");
this.properties = ImmutableMap.copyOf(requireNonNull(properties, "properties is null"));
}

@JsonProperty
public String getName()
{
return name;
}

@JsonProperty
public HiveType getType()
{
return type;
}

@JsonProperty
public Optional<String> getComment()
{
return comment;
}

@JsonProperty
public Map<String, String> getProperties()
{
return properties;
}

@Override
public String toString()
{
return toStringHelper(this)
.add("name", name)
.add("type", type)
.toString();
}

@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}

Column column = (Column) o;
return Objects.equals(name, column.name) &&
Objects.equals(type, column.type) &&
Objects.equals(comment, column.comment) &&
Objects.equals(properties, column.properties);
}

@Override
public int hashCode()
{
return Objects.hash(name, type, comment, properties);
}

public static List<Column> fromMetastoreModel(List<io.trino.plugin.hive.metastore.Column> metastoreColumns)
{
return metastoreColumns.stream()
.map(Column::fromMetastoreModel)
.collect(toImmutableList());
}

public static Column fromMetastoreModel(io.trino.plugin.hive.metastore.Column metastoreColumn)
{
return new Column(
metastoreColumn.getName(),
metastoreColumn.getType(),
metastoreColumn.getComment(),
metastoreColumn.getProperties());
}

public static List<io.trino.plugin.hive.metastore.Column> toMetastoreModel(List<Column> fileMetastoreColumns)
{
return fileMetastoreColumns.stream()
.map(Column::toMetastoreModel)
.collect(toImmutableList());
}

public static io.trino.plugin.hive.metastore.Column toMetastoreModel(Column fileMetastoreColumn)
{
return new io.trino.plugin.hive.metastore.Column(
fileMetastoreColumn.getName(),
fileMetastoreColumn.getType(),
fileMetastoreColumn.getComment(),
fileMetastoreColumn.getProperties());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import io.trino.plugin.hive.TableAlreadyExistsException;
import io.trino.plugin.hive.TableType;
import io.trino.plugin.hive.acid.AcidTransaction;
import io.trino.plugin.hive.metastore.Column;
import io.trino.plugin.hive.metastore.Database;
import io.trino.plugin.hive.metastore.HiveColumnStatistics;
import io.trino.plugin.hive.metastore.HiveMetastore;
Expand Down Expand Up @@ -680,7 +679,7 @@ public synchronized void commentColumn(String databaseName, String tableName, St
ImmutableList.Builder<Column> newDataColumns = ImmutableList.builder();
for (Column fieldSchema : oldTable.getDataColumns()) {
if (fieldSchema.getName().equals(columnName)) {
newDataColumns.add(new Column(columnName, fieldSchema.getType(), comment));
newDataColumns.add(new Column(columnName, fieldSchema.getType(), comment, fieldSchema.getProperties()));
}
else {
newDataColumns.add(fieldSchema);
Expand All @@ -703,7 +702,7 @@ public synchronized void addColumn(String databaseName, String tableName, String
currentVersion,
ImmutableList.<Column>builder()
.addAll(oldTable.getDataColumns())
.add(new Column(columnName, columnType, Optional.ofNullable(columnComment)))
.add(new Column(columnName, columnType, Optional.ofNullable(columnComment), ImmutableMap.of()))
.build());
});
}
Expand All @@ -728,7 +727,7 @@ public synchronized void renameColumn(String databaseName, String tableName, Str
ImmutableList.Builder<Column> newDataColumns = ImmutableList.builder();
for (Column fieldSchema : oldTable.getDataColumns()) {
if (fieldSchema.getName().equals(oldColumnName)) {
newDataColumns.add(new Column(newColumnName, fieldSchema.getType(), fieldSchema.getComment()));
newDataColumns.add(new Column(newColumnName, fieldSchema.getType(), fieldSchema.getComment(), fieldSchema.getProperties()));
}
else {
newDataColumns.add(fieldSchema);
Expand Down Expand Up @@ -1084,7 +1083,7 @@ private boolean isValidPartition(Table table, String partitionName)
}
}

private List<ArrayDeque<String>> listPartitions(Path director, List<Column> partitionColumns)
private List<ArrayDeque<String>> listPartitions(Path director, List<io.trino.plugin.hive.metastore.Column> partitionColumns)
{
if (partitionColumns.isEmpty()) {
return ImmutableList.of();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import com.google.common.collect.ImmutableMap;
import io.trino.plugin.hive.HiveBucketProperty;
import io.trino.plugin.hive.HiveStorageFormat;
import io.trino.plugin.hive.metastore.Column;
import io.trino.plugin.hive.metastore.HiveColumnStatistics;
import io.trino.plugin.hive.metastore.Storage;
import io.trino.plugin.hive.metastore.StorageFormat;
Expand Down Expand Up @@ -105,8 +104,8 @@ public TableMetadata(String currentVersion, Table table)
writerVersion = Optional.of(requireNonNull(currentVersion, "currentVersion is null"));
owner = table.getOwner();
tableType = table.getTableType();
dataColumns = table.getDataColumns();
partitionColumns = table.getPartitionColumns();
dataColumns = Column.fromMetastoreModel(table.getDataColumns());
partitionColumns = Column.fromMetastoreModel(table.getPartitionColumns());
parameters = table.getParameters();

StorageFormat tableFormat = table.getStorage().getStorageFormat();
Expand Down Expand Up @@ -305,8 +304,8 @@ public Table toTable(String databaseName, String tableName, String location)
.setBucketProperty(bucketProperty)
.setSerdeParameters(serdeParameters)
.build(),
dataColumns,
partitionColumns,
Column.toMetastoreModel(dataColumns),
Column.toMetastoreModel(partitionColumns),
parameters,
viewOriginalText,
viewExpandedText,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,12 @@ private static StorageDescriptor convertStorage(Storage storage, List<Column> co
return sd;
}

private static com.amazonaws.services.glue.model.Column convertColumn(Column prestoColumn)
private static com.amazonaws.services.glue.model.Column convertColumn(Column trinoColumn)
{
return new com.amazonaws.services.glue.model.Column()
.withName(prestoColumn.getName())
.withType(prestoColumn.getType().toString())
.withComment(prestoColumn.getComment().orElse(null));
.withName(trinoColumn.getName())
.withType(trinoColumn.getType().toString())
.withComment(trinoColumn.getComment().orElse(null))
.withParameters(trinoColumn.getProperties());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ public final class GlueToTrinoConverter

private GlueToTrinoConverter() {}

@SuppressModernizer // Usage of `Column.getParameters` is not allowed. Only this method can call that.
public static Map<String, String> getColumnParameters(com.amazonaws.services.glue.model.Column glueColumn)
{
return firstNonNull(glueColumn.getParameters(), ImmutableMap.of());
}

public static String getTableType(com.amazonaws.services.glue.model.Table glueTable)
{
// Athena treats missing table type as EXTERNAL_TABLE.
Expand Down Expand Up @@ -132,7 +138,7 @@ public static Table convertTable(com.amazonaws.services.glue.model.Table glueTab
// Iceberg tables do not need to read the StorageDescriptor field, but we still need to return dummy properties for compatibility
// Delta Lake tables only need to provide a dummy properties if a StorageDescriptor was not explicitly configured.
// Materialized views do not need to read the StorageDescriptor, but we still need to return dummy properties for compatibility
tableBuilder.setDataColumns(ImmutableList.of(new Column("dummy", HIVE_INT, Optional.empty())));
tableBuilder.setDataColumns(ImmutableList.of(new Column("dummy", HIVE_INT, Optional.empty(), ImmutableMap.of())));
tableBuilder.getStorageBuilder().setStorageFormat(StorageFormat.fromHiveStorageFormat(HiveStorageFormat.PARQUET));
}
else {
Expand All @@ -159,9 +165,9 @@ private static Column convertColumn(SchemaTableName table, com.amazonaws.service
// to string to avoid cast exceptions.
if (HiveStorageFormat.CSV.getSerde().equals(serde)) {
//TODO(https://github.com/trinodb/trino/issues/7240) Add tests
return new Column(glueColumn.getName(), HiveType.HIVE_STRING, Optional.ofNullable(glueColumn.getComment()));
return new Column(glueColumn.getName(), HiveType.HIVE_STRING, Optional.ofNullable(glueColumn.getComment()), getColumnParameters(glueColumn));
}
return new Column(glueColumn.getName(), convertType(table, glueColumn), Optional.ofNullable(glueColumn.getComment()));
return new Column(glueColumn.getName(), convertType(table, glueColumn), Optional.ofNullable(glueColumn.getComment()), getColumnParameters(glueColumn));
}

private static HiveType convertType(SchemaTableName table, com.amazonaws.services.glue.model.Column column)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ public class HiveMetastoreRecording

@Inject
public HiveMetastoreRecording(RecordingMetastoreConfig config, JsonCodec<Recording> recordingCodec)
throws IOException
{
this.recordingCodec = recordingCodec;
this.recordingPath = Paths.get(requireNonNull(config.getRecordingPath(), "recordingPath is null"));
Expand Down Expand Up @@ -117,12 +116,14 @@ public HiveMetastoreRecording(RecordingMetastoreConfig config, JsonCodec<Recordi

@VisibleForTesting
void loadRecording()
throws IOException
{
Recording recording;
try (GZIPInputStream inputStream = new GZIPInputStream(Files.newInputStream(recordingPath))) {
recording = recordingCodec.fromJson(inputStream.readAllBytes());
}
catch (RuntimeException | IOException e) {
throw new RuntimeException("Failed to load recording from: " + recordingPath, e);
}

allDatabases = recording.getAllDatabases();
allRoles = recording.getAllRoles();
Expand Down
5 changes: 5 additions & 0 deletions plugin/trino-iceberg/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@
<artifactId>jjwt-jackson</artifactId>
</dependency>

<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-cache</artifactId>
</dependency>

<dependency>
<groupId>io.trino</groupId>
<artifactId>trino-filesystem</artifactId>
Expand Down
Loading
Loading