diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 56f2242514..91745a5864 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -1876,6 +1876,7 @@ def data_file_statistics_from_parquet_metadata( col_aggs = {} + invalidate_col: Set[int] = set() for r in range(parquet_metadata.num_row_groups): # References: # https://github.com/apache/iceberg/blob/fc381a81a1fdb8f51a0637ca27cd30673bd7aad3/parquet/src/main/java/org/apache/iceberg/parquet/ParquetUtil.java#L232 @@ -1891,8 +1892,6 @@ def data_file_statistics_from_parquet_metadata( else: split_offsets.append(data_offset) - invalidate_col: Set[int] = set() - for pos in range(parquet_metadata.num_columns): column = row_group.column(pos) field_id = parquet_column_mapping[column.path_in_schema]