Skip to content

Commit

Permalink
Add hidden column $filesize with Hive table
Browse files Browse the repository at this point in the history
  • Loading branch information
agrawalreetika committed Jul 30, 2021
1 parent 587cf72 commit aab0ed2
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ public class HiveColumnHandle
public static final HiveType BUCKET_HIVE_TYPE = HIVE_INT;
public static final TypeSignature BUCKET_TYPE_SIGNATURE = BUCKET_HIVE_TYPE.getTypeSignature();

public static final int FILE_SIZE_COLUMN_INDEX = -15;
public static final String FILE_SIZE_COLUMN_NAME = "$file_size";
public static final HiveType FILE_SIZE_TYPE = HIVE_LONG;
public static final TypeSignature FILE_SIZE_TYPE_SIGNATURE = FILE_SIZE_TYPE.getTypeSignature();

private static final String UPDATE_ROW_ID_COLUMN_NAME = "$shard_row_id";

// Ids <= this can be used for distinguishing between different prefilled columns.
Expand Down Expand Up @@ -243,6 +248,11 @@ public static HiveColumnHandle bucketColumnHandle()
return new HiveColumnHandle(BUCKET_COLUMN_NAME, BUCKET_HIVE_TYPE, BUCKET_TYPE_SIGNATURE, BUCKET_COLUMN_INDEX, SYNTHESIZED, Optional.empty(), ImmutableList.of(), Optional.empty());
}

public static HiveColumnHandle fileSizeColumnHandle()
{
return new HiveColumnHandle(FILE_SIZE_COLUMN_NAME, FILE_SIZE_TYPE, FILE_SIZE_TYPE_SIGNATURE, FILE_SIZE_COLUMN_INDEX, SYNTHESIZED, Optional.empty(), ImmutableList.of(), Optional.empty());
}

public static boolean isPathColumnHandle(HiveColumnHandle column)
{
return column.getHiveColumnIndex() == PATH_COLUMN_INDEX;
Expand All @@ -266,4 +276,9 @@ public static boolean isPushedDownSubfield(HiveColumnHandle column)
{
return column.getColumnType() == SYNTHESIZED && column.getRequiredSubfields().size() == 1;
}

public static boolean isFileSizeColumnHandle(HiveColumnHandle column)
{
return column.getHiveColumnIndex() == FILE_SIZE_COLUMN_INDEX;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@
import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY;
import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR;
import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.SYNTHESIZED;
import static com.facebook.presto.hive.HiveColumnHandle.FILE_SIZE_COLUMN_NAME;
import static com.facebook.presto.hive.HiveColumnHandle.PATH_COLUMN_NAME;
import static com.facebook.presto.hive.HiveColumnHandle.updateRowIdHandle;
import static com.facebook.presto.hive.HiveErrorCode.HIVE_COLUMN_ORDER_MISMATCH;
Expand Down Expand Up @@ -3616,6 +3617,8 @@ private static Function<HiveColumnHandle, ColumnMetadata> columnMetadataGetter(T
builder.put(BUCKET_COLUMN_NAME, Optional.empty());
}

builder.put(FILE_SIZE_COLUMN_NAME, Optional.empty());

Map<String, Optional<String>> columnComment = builder.build();

return handle -> new ColumnMetadata(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,8 @@ private static Optional<ConnectorPageSource> createSelectivePageSource(
ImmutableList.of(),
split.getTableToPartitionMapping(),
path,
split.getTableBucketNumber());
split.getTableBucketNumber(),
split.getFileSize());

Optional<BucketAdaptation> bucketAdaptation = split.getBucketConversion().map(conversion -> toBucketAdaptation(conversion, columnMappings, split.getTableBucketNumber(), mapping -> mapping.getHiveColumnHandle().getHiveColumnIndex()));

Expand Down Expand Up @@ -380,7 +381,8 @@ public static Optional<ConnectorPageSource> createHivePageSource(
bucketConversion.map(BucketConversion::getBucketColumnHandles).orElse(ImmutableList.of()),
tableToPartitionMapping,
path,
tableBucketNumber);
tableBucketNumber,
fileSize);

Set<Integer> outputIndices = hiveColumns.stream()
.map(HiveColumnHandle::getHiveColumnIndex)
Expand Down Expand Up @@ -665,7 +667,8 @@ public static List<ColumnMapping> buildColumnMappings(
List<HiveColumnHandle> requiredInterimColumns,
TableToPartitionMapping tableToPartitionMapping,
Path path,
OptionalInt bucketNumber)
OptionalInt bucketNumber,
long fileSize)
{
Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey::getName);
int regularIndex = 0;
Expand Down Expand Up @@ -703,7 +706,7 @@ else if (isPushedDownSubfield(column)) {
else {
columnMappings.add(prefilled(
column,
getPrefilledColumnValue(column, partitionKeysByName.get(column.getName()), path, bucketNumber),
getPrefilledColumnValue(column, partitionKeysByName.get(column.getName()), path, bucketNumber, fileSize),
coercionFrom));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@
import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR;
import static com.facebook.presto.hive.HiveColumnHandle.MAX_PARTITION_KEY_COLUMN_INDEX;
import static com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle;
import static com.facebook.presto.hive.HiveColumnHandle.fileSizeColumnHandle;
import static com.facebook.presto.hive.HiveColumnHandle.isBucketColumnHandle;
import static com.facebook.presto.hive.HiveColumnHandle.isFileSizeColumnHandle;
import static com.facebook.presto.hive.HiveColumnHandle.isPathColumnHandle;
import static com.facebook.presto.hive.HiveColumnHandle.pathColumnHandle;
import static com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA;
Expand Down Expand Up @@ -887,6 +889,7 @@ public static List<HiveColumnHandle> hiveColumnHandles(Table table)
if (table.getStorage().getBucketProperty().isPresent()) {
columns.add(bucketColumnHandle());
}
columns.add(fileSizeColumnHandle());

return columns.build();
}
Expand Down Expand Up @@ -931,7 +934,7 @@ public static String columnExtraInfo(boolean partitionKey)
return partitionKey ? "partition key" : null;
}

public static Optional<String> getPrefilledColumnValue(HiveColumnHandle columnHandle, HivePartitionKey partitionKey, Path path, OptionalInt bucketNumber)
public static Optional<String> getPrefilledColumnValue(HiveColumnHandle columnHandle, HivePartitionKey partitionKey, Path path, OptionalInt bucketNumber, long fileSize)
{
if (partitionKey != null) {
return partitionKey.getValue();
Expand All @@ -945,6 +948,10 @@ public static Optional<String> getPrefilledColumnValue(HiveColumnHandle columnHa
}
return Optional.of(String.valueOf(bucketNumber.getAsInt()));
}
if (isFileSizeColumnHandle(columnHandle)) {
return Optional.of(String.valueOf(fileSize));
}

throw new PrestoException(NOT_SUPPORTED, "unsupported hidden column: " + columnHandle);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@
import static com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType;
import static com.facebook.presto.common.type.VarcharType.createVarcharType;
import static com.facebook.presto.hive.HiveColumnHandle.BUCKET_COLUMN_NAME;
import static com.facebook.presto.hive.HiveColumnHandle.FILE_SIZE_COLUMN_NAME;
import static com.facebook.presto.hive.HiveColumnHandle.PATH_COLUMN_NAME;
import static com.facebook.presto.hive.HiveQueryRunner.HIVE_CATALOG;
import static com.facebook.presto.hive.HiveQueryRunner.TPCH_SCHEMA;
Expand Down Expand Up @@ -2744,7 +2745,7 @@ private void testPathHiddenColumn(Session session, HiveStorageFormat storageForm
TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, "test_path");
assertEquals(tableMetadata.getMetadata().getProperties().get(STORAGE_FORMAT_PROPERTY), storageFormat);

List<String> columnNames = ImmutableList.of("col0", "col1", PATH_COLUMN_NAME);
List<String> columnNames = ImmutableList.of("col0", "col1", PATH_COLUMN_NAME, FILE_SIZE_COLUMN_NAME);
List<ColumnMetadata> columnMetadatas = tableMetadata.getColumns();
assertEquals(columnMetadatas.size(), columnNames.size());
for (int i = 0; i < columnMetadatas.size(); i++) {
Expand Down Expand Up @@ -2802,7 +2803,7 @@ public void testBucketHiddenColumn()
assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKETED_BY_PROPERTY), ImmutableList.of("col0"));
assertEquals(tableMetadata.getMetadata().getProperties().get(BUCKET_COUNT_PROPERTY), 2);

List<String> columnNames = ImmutableList.of("col0", "col1", PATH_COLUMN_NAME, BUCKET_COLUMN_NAME);
List<String> columnNames = ImmutableList.of("col0", "col1", PATH_COLUMN_NAME, BUCKET_COLUMN_NAME, FILE_SIZE_COLUMN_NAME);
List<ColumnMetadata> columnMetadatas = tableMetadata.getColumns();
assertEquals(columnMetadatas.size(), columnNames.size());
for (int i = 0; i < columnMetadatas.size(); i++) {
Expand Down Expand Up @@ -2835,6 +2836,57 @@ public void testBucketHiddenColumn()
assertFalse(getQueryRunner().tableExists(getSession(), "test_bucket_hidden_column"));
}

@Test
public void testFileSizeHiddenColumn()
{
@Language("SQL") String createTable = "CREATE TABLE test_file_size " +
"WITH (" +
"partitioned_by = ARRAY['col1']" +
") AS " +
"SELECT * FROM (VALUES " +
"(0, 0), (3, 0), (6, 0), " +
"(1, 1), (4, 1), (7, 1), " +
"(2, 2), (5, 2) " +
" ) t(col0, col1) ";
assertUpdate(createTable, 8);
assertTrue(getQueryRunner().tableExists(getSession(), "test_file_size"));

TableMetadata tableMetadata = getTableMetadata(catalog, TPCH_SCHEMA, "test_file_size");

List<String> columnNames = ImmutableList.of("col0", "col1", PATH_COLUMN_NAME, FILE_SIZE_COLUMN_NAME);
List<ColumnMetadata> columnMetadatas = tableMetadata.getColumns();
assertEquals(columnMetadatas.size(), columnNames.size());
for (int i = 0; i < columnMetadatas.size(); i++) {
ColumnMetadata columnMetadata = columnMetadatas.get(i);
assertEquals(columnMetadata.getName(), columnNames.get(i));
if (columnMetadata.getName().equals(FILE_SIZE_COLUMN_NAME)) {
assertTrue(columnMetadata.isHidden());
}
}
assertEquals(getPartitions("test_file_size").size(), 3);

MaterializedResult results = computeActual(format("SELECT *, \"%s\" FROM test_file_size", FILE_SIZE_COLUMN_NAME));
Map<Integer, Long> fileSizeMap = new HashMap<>();
for (int i = 0; i < results.getRowCount(); i++) {
MaterializedRow row = results.getMaterializedRows().get(i);
int col0 = (int) row.getField(0);
int col1 = (int) row.getField(1);
long fileSize = (Long) row.getField(2);

assertTrue(fileSize > 0);
assertEquals(col0 % 3, col1);
if (fileSizeMap.containsKey(col1)) {
assertEquals(fileSizeMap.get(col1).longValue(), fileSize);
}
else {
fileSizeMap.put(col1, fileSize);
}
}
assertEquals(fileSizeMap.size(), 3);

assertUpdate("DROP TABLE test_file_size");
}

@Test
public void testDeleteAndInsert()
{
Expand Down

0 comments on commit aab0ed2

Please sign in to comment.