Skip to content

Commit

Permalink
Minor: style enhancment for parquet FileMetadata Subset
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed Dec 21, 2023
1 parent 2308cdf commit ac0c569
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 20 deletions.
2 changes: 1 addition & 1 deletion cpp/src/arrow/util/bit_stream_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ class BitReader {

/// Returns the number of bytes left in the stream, not including the current
/// byte (i.e., there may be an additional fraction of a byte).
int bytes_left() {
int bytes_left() const {
return max_bytes_ -
(byte_offset_ + static_cast<int>(bit_util::BytesForBits(bit_offset_)));
}
Expand Down
30 changes: 12 additions & 18 deletions cpp/src/parquet/metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -793,26 +793,19 @@ class FileMetaData::FileMetaDataImpl {

std::shared_ptr<FileMetaData> out(new FileMetaData());
out->impl_ = std::make_unique<FileMetaDataImpl>();
out->impl_->metadata_ = std::make_unique<format::FileMetaData>();
out->impl_->metadata_ = std::make_unique<format::FileMetaData>(*metadata_);
auto output_metadata = out->impl_->metadata_.get();

auto metadata = out->impl_->metadata_.get();
metadata->version = metadata_->version;
metadata->schema = metadata_->schema;

metadata->row_groups.resize(row_groups.size());
// Discard row groups that are not in the subset
output_metadata->num_rows = 0;
output_metadata->row_groups.clear();
output_metadata->row_groups.resize(row_groups.size());
int i = 0;
for (int selected_index : row_groups) {
metadata->num_rows += row_group(selected_index).num_rows;
metadata->row_groups[i++] = row_group(selected_index);
output_metadata->num_rows += row_group(selected_index).num_rows;
output_metadata->row_groups[i++] = row_group(selected_index);
}

metadata->key_value_metadata = metadata_->key_value_metadata;
metadata->created_by = metadata_->created_by;
metadata->column_orders = metadata_->column_orders;
metadata->encryption_algorithm = metadata_->encryption_algorithm;
metadata->footer_signing_key_metadata = metadata_->footer_signing_key_metadata;
metadata->__isset = metadata_->__isset;

out->impl_->schema_ = schema_;
out->impl_->writer_version_ = writer_version_;
out->impl_->key_value_metadata_ = key_value_metadata_;
Expand Down Expand Up @@ -886,13 +879,14 @@ std::shared_ptr<FileMetaData> FileMetaData::Make(
const void* metadata, uint32_t* metadata_len,
std::shared_ptr<InternalFileDecryptor> file_decryptor) {
return std::shared_ptr<FileMetaData>(new FileMetaData(
metadata, metadata_len, default_reader_properties(), file_decryptor));
metadata, metadata_len, default_reader_properties(), std::move(file_decryptor)));
}

FileMetaData::FileMetaData(const void* metadata, uint32_t* metadata_len,
const ReaderProperties& properties,
std::shared_ptr<InternalFileDecryptor> file_decryptor)
: impl_(new FileMetaDataImpl(metadata, metadata_len, properties, file_decryptor)) {}
: impl_(new FileMetaDataImpl(metadata, metadata_len, properties,
std::move(file_decryptor))) {}

FileMetaData::FileMetaData() : impl_(new FileMetaDataImpl()) {}

Expand Down Expand Up @@ -942,7 +936,7 @@ const std::string& FileMetaData::footer_signing_key_metadata() const {

void FileMetaData::set_file_decryptor(
std::shared_ptr<InternalFileDecryptor> file_decryptor) {
impl_->set_file_decryptor(file_decryptor);
impl_->set_file_decryptor(std::move(file_decryptor));
}

ParquetVersion::type FileMetaData::version() const {
Expand Down
8 changes: 7 additions & 1 deletion cpp/src/parquet/metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,9 +306,15 @@ class PARQUET_EXPORT FileMetaData {
int num_schema_elements() const;

/// \brief The total number of rows.
///
/// If the FileMetaData is generate from a `SubSet()`, the number of rows
/// will be the sum of the number of rows of remaining RowGroup.
int64_t num_rows() const;

/// \brief The number of row groups in the file.
///
/// If the FileMetaData is generate from a `SubSet()`, the number of
/// row groups will be the number of remaining RowGroup.
int num_row_groups() const;

/// \brief Return the RowGroupMetaData of the corresponding row group ordinal.
Expand Down Expand Up @@ -338,7 +344,7 @@ class PARQUET_EXPORT FileMetaData {
/// \brief Size of the original thrift encoded metadata footer.
uint32_t size() const;

/// \brief Indicate if all of the FileMetadata's RowGroups can be decompressed.
/// \brief Indicate if all of the FileMetaData's RowGroups can be decompressed.
///
/// This will return false if any of the RowGroup's page is compressed with a
/// compression format which is not compiled in the current parquet library.
Expand Down

0 comments on commit ac0c569

Please sign in to comment.