Skip to content

Commit

Permalink
Handle nested types when parsing sort fields
Browse files Browse the repository at this point in the history
Previously the `parseSortFields` was only collecting the field id from
the top level columns don't considering nested fields of nested types,
so in case a query with a `sorted_by` property use a nested field of a
nested type trino would throw an expcetion that the column don't exists,
because the field id of the nested column don't exists on
`baseColumnFieldIds` set.

This commit fix this issue by recursively collecting the field ids from
table columns which the column type is a nested type.

Fix: #19620
  • Loading branch information
mattheusv committed May 24, 2024
1 parent b061560 commit feefe77
Showing 1 changed file with 26 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@
import org.apache.iceberg.SortField;
import org.apache.iceberg.SortOrder;
import org.apache.iceberg.SortOrderBuilder;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Type.NestedType;
import org.apache.iceberg.types.Types.NestedField;

import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
Expand All @@ -29,7 +32,6 @@
import static com.google.common.base.MoreObjects.firstNonNull;
import static com.google.common.base.Verify.verify;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static io.trino.plugin.iceberg.IcebergTableProperties.SORTED_BY_PROPERTY;
import static io.trino.plugin.iceberg.PartitionFields.fromIdentifierToColumn;
import static io.trino.plugin.iceberg.PartitionFields.quotedName;
Expand Down Expand Up @@ -60,9 +62,8 @@ public static SortOrder parseSortFields(Schema schema, List<String> fields)
throw new TrinoException(INVALID_TABLE_PROPERTY, "Invalid " + SORTED_BY_PROPERTY + " definition", e);
}

Set<Integer> baseColumnFieldIds = schema.columns().stream()
.map(Types.NestedField::fieldId)
.collect(toImmutableSet());
Set<Integer> baseColumnFieldIds = collectBaseColumnFieldIds(schema);

for (SortField field : sortOrder.fields()) {
if (!baseColumnFieldIds.contains(field.sourceId())) {
throw new TrinoException(COLUMN_NOT_FOUND, "Column not found: " + schema.findColumnName(field.sourceId()));
Expand All @@ -72,6 +73,26 @@ public static SortOrder parseSortFields(Schema schema, List<String> fields)
return sortOrder;
}

private static Set<Integer> collectBaseColumnFieldIds(Schema schema)
{
Set<Integer> ids = new HashSet<Integer>();
schema.columns().forEach(column -> addNestedField(ids, column));
return ids;
}

private static void addNestedField(Set<Integer> baseColumnFieldIds, NestedField field)
{
baseColumnFieldIds.add(field.fieldId());

Type type = field.type();
if (type.isNestedType()) {
NestedType nestedType = type.asNestedType();
for (NestedField nestedField : nestedType.fields()) {
addNestedField(baseColumnFieldIds, nestedField);
}
}
}

public static void parseSortFields(SortOrderBuilder<?> sortOrderBuilder, List<String> fields)
{
fields.forEach(field -> parseSortField(sortOrderBuilder, field));
Expand Down

0 comments on commit feefe77

Please sign in to comment.