Skip to content

Commit

Permalink
Do case insensitive comparison between dereferenced fields and intern…
Browse files Browse the repository at this point in the history
…al ORC field names
  • Loading branch information
willmostly authored and findepi committed Apr 9, 2021
1 parent 987b1dc commit 8bac015
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -298,16 +298,14 @@ private static ConnectorPageSource createOrcPageSource(
.collect(Collectors.groupingBy(
HiveColumnHandle::getBaseColumnName,
mapping(
column -> column.getHiveColumnProjectionInfo().map(HiveColumnProjectionInfo::getDereferenceNames).orElse(ImmutableList.<String>of()),
toList())));
OrcPageSourceFactory::getDereferencesAsList, toList())));
}
else {
projectionsByColumnIndex = projections.stream()
.collect(Collectors.groupingBy(
HiveColumnHandle::getBaseHiveColumnIndex,
mapping(
column -> column.getHiveColumnProjectionInfo().map(HiveColumnProjectionInfo::getDereferenceNames).orElse(ImmutableList.<String>of()),
toList())));
OrcPageSourceFactory::getDereferencesAsList, toList())));
}

TupleDomainOrcPredicateBuilder predicateBuilder = TupleDomainOrcPredicate.builder()
Expand Down Expand Up @@ -540,4 +538,13 @@ private static OrcColumn getNestedColumn(OrcColumn baseColumn, Optional<HiveColu
}
return current;
}

private static List<String> getDereferencesAsList(HiveColumnHandle column)
{
return column.getHiveColumnProjectionInfo()
.map(info -> info.getDereferenceNames().stream()
.map(dereference -> dereference.toLowerCase(ENGLISH))
.collect(toImmutableList()))
.orElse(ImmutableList.of());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,42 @@ public void testSnappyCompressedParquetTableCreatedInHive()
onHive().executeQuery("DROP TABLE " + tableName);
}

@Test
public void testOrcStructsWithNonLowercaseFields()
throws SQLException
{
String tableName = "orc_structs_with_non_lowercase";

ensureDummyExists();
onHive().executeQuery("DROP TABLE IF EXISTS " + tableName);

onHive().executeQuery(format(
"CREATE TABLE %s (" +
" c_bigint BIGINT," +
" c_struct struct<testCustId:string, requestDate:string>)" +
"STORED AS ORC ",
tableName));

onHive().executeQuery(format(
"INSERT INTO %s"
// insert with SELECT because hive does not support array/map/struct functions in VALUES
+ " SELECT"
+ " 1,"
+ " named_struct('testCustId', '1234', 'requestDate', 'some day')"
// some hive versions don't allow INSERT from SELECT without FROM
+ " FROM dummy",
tableName));

setSessionProperty(onTrino().getConnection(), "hive.projection_pushdown_enabled", "true");
assertThat(onTrino().executeQuery("SELECT c_struct.testCustId FROM " + tableName)).containsOnly(row("1234"));
assertThat(onTrino().executeQuery("SELECT c_struct.testcustid FROM " + tableName)).containsOnly(row("1234"));
assertThat(onTrino().executeQuery("SELECT c_struct.requestDate FROM " + tableName)).containsOnly(row("some day"));
setSessionProperty(onTrino().getConnection(), "hive.projection_pushdown_enabled", "false");
assertThat(onTrino().executeQuery("SELECT c_struct.testCustId FROM " + tableName)).containsOnly(row("1234"));
assertThat(onTrino().executeQuery("SELECT c_struct.testcustid FROM " + tableName)).containsOnly(row("1234"));
assertThat(onTrino().executeQuery("SELECT c_struct.requestDate FROM " + tableName)).containsOnly(row("some day"));
}

@Test(dataProvider = "storageFormatsWithNanosecondPrecision")
public void testTimestampCreatedFromHive(StorageFormat storageFormat)
{
Expand Down

0 comments on commit 8bac015

Please sign in to comment.