Skip to content

Commit

Permalink
Separate def level and rep level handling
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed May 9, 2024
1 parent 2caffa0 commit e4a8c51
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions cpp/src/parquet/column_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1699,27 +1699,28 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>,

// Count logical records and number of non-null values to read
ARROW_DCHECK(!at_record_start_);
// Scan repetition levels to find record end
int64_t level = levels_position_;
while (levels_position_ < levels_written_) {
int64_t stride =
std::min(levels_written_ - levels_position_, num_records - records_read);
const int64_t position_end = levels_position_ + stride;
for (int64_t i = levels_position_; i < position_end; ++i) {
records_read += rep_levels[i] == 0;
values_to_read += def_levels[i] == this->max_def_level_;
}
levels_position_ = position_end;
if (records_read == num_records) {
// Last rep_level reaches the boundary
ARROW_CHECK_EQ(rep_levels[levels_position_ - 1], 0);
// We've found the number of records we were looking for. Set
// at_record_start_ to true and break
at_record_start_ = true;
// Remove last value if we have reaches the end of the record
levels_position_ = levels_position_ - 1;
values_to_read -= def_levels[levels_position_] == this->max_def_level_;
break;
}
levels_position_ = position_end;
}
// Scan definition levels to find number of physical values
values_to_read += std::count(def_levels + level, def_levels + levels_position_,
this->max_def_level_);
*values_seen = values_to_read;
return records_read;
}
Expand Down

0 comments on commit e4a8c51

Please sign in to comment.