Skip to content

Commit

Permalink
temp ugly fix
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed Sep 14, 2023
1 parent 15a8ac3 commit 17a4922
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 8 deletions.
13 changes: 7 additions & 6 deletions cpp/src/arrow/dataset/file_parquet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ bool IsNan(const Scalar& value) {
}

std::optional<compute::Expression> ColumnChunkStatisticsAsExpression(
const SchemaField& schema_field, const parquet::RowGroupMetaData& metadata) {
const SchemaField& schema_field, const parquet::RowGroupMetaData& metadata, const arrow::Field& dest_field) {
// For the remaining of this function, failure to extract/parse statistics
// are ignored by returning nullptr. The goal is two fold. First
// avoid an optimization which breaks the computation. Second, allow the
Expand All @@ -131,7 +131,7 @@ std::optional<compute::Expression> ColumnChunkStatisticsAsExpression(
return std::nullopt;
}

return ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *statistics);
return ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *statistics, dest_field);
}

void AddColumnIndices(const SchemaField& schema_field,
Expand Down Expand Up @@ -311,7 +311,7 @@ Result<bool> IsSupportedParquetFile(const ParquetFileFormat& format,
} // namespace

std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpression(
const Field& field, const parquet::Statistics& statistics) {
const Field& field, const parquet::Statistics& statistics, const Field& dest_field) {
auto field_expr = compute::field_ref(field.name());

// Optimize for corner case where all values are nulls
Expand All @@ -324,8 +324,8 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
return std::nullopt;
}

auto maybe_min = min->CastTo(field.type());
auto maybe_max = max->CastTo(field.type());
auto maybe_min = min->CastTo(dest_field.type());
auto maybe_max = max->CastTo(dest_field.type());

if (maybe_min.ok() && maybe_max.ok()) {
min = maybe_min.MoveValueUnsafe();
Expand Down Expand Up @@ -799,12 +799,13 @@ Result<std::vector<compute::Expression>> ParquetFileFragment::TestRowGroups(
statistics_expressions_complete_[match[0]] = true;

const SchemaField& schema_field = manifest_->schema_fields[match[0]];
auto arrow_field = physical_schema_->field(match[0]);
int i = 0;
for (int row_group : *row_groups_) {
auto row_group_metadata = metadata_->RowGroup(row_group);

if (auto minmax =
ColumnChunkStatisticsAsExpression(schema_field, *row_group_metadata)) {
ColumnChunkStatisticsAsExpression(schema_field, *row_group_metadata, *arrow_field)) {
FoldingAnd(&statistics_expressions_[i], std::move(*minmax));
ARROW_ASSIGN_OR_RAISE(statistics_expressions_[i],
statistics_expressions_[i].Bind(*physical_schema_));
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/dataset/file_parquet.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ class ARROW_DS_EXPORT ParquetFileFragment : public FileFragment {
Result<std::shared_ptr<Fragment>> Subset(std::vector<int> row_group_ids);

static std::optional<compute::Expression> EvaluateStatisticsAsExpression(
const Field& field, const parquet::Statistics& statistics);
const Field& field, const parquet::Statistics& statistics, const Field& dest_field);

private:
ParquetFileFragment(FileSource source, std::shared_ptr<FileFormat> format,
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/dataset/file_parquet_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,7 @@ TEST(TestParquetStatistics, NullMax) {
parquet::ParquetFileReader::OpenFile(dir_string + "/nan_in_stats.parquet");
auto statistics = reader->RowGroup(0)->metadata()->ColumnChunk(0)->statistics();
auto stat_expression =
ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *statistics);
ParquetFileFragment::EvaluateStatisticsAsExpression(*field, *statistics, *field);
EXPECT_EQ(stat_expression->ToString(), "(x >= 1)");
}

Expand Down

0 comments on commit 17a4922

Please sign in to comment.