From 05d82b4ea8ffcb38f9a6b6ce80e890a3de0b4b65 Mon Sep 17 00:00:00 2001 From: mwish Date: Tue, 16 May 2023 00:32:37 +0800 Subject: [PATCH] GH-35519: [C++][Parquet] Fixing exception handling in parquet FileSerializer (#35520) ### Rationale for this change Mentioned in https://github.com/apache/arrow/issues/35519 ### What changes are included in this PR? Exception handling in `CheckRowsWritten` ### Are these changes tested? No, I don't know how to mock it currently ### Are there any user-facing changes? no * Closes: #35519 Authored-by: mwish Signed-off-by: Antoine Pitrou --- cpp/src/parquet/file_writer.cc | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc index f1098e4a74bc5..913c99e0910ea 100644 --- a/cpp/src/parquet/file_writer.cc +++ b/cpp/src/parquet/file_writer.cc @@ -219,17 +219,16 @@ class RowGroupSerializer : public RowGroupWriter::Contents { closed_ = true; CheckRowsWritten(); - for (size_t i = 0; i < column_writers_.size(); i++) { - if (column_writers_[i]) { - total_bytes_written_ += column_writers_[i]->Close(); + // Avoid invalid state if ColumnWriter::Close() throws internally. + auto column_writers = std::move(column_writers_); + for (size_t i = 0; i < column_writers.size(); i++) { + if (column_writers[i]) { + total_bytes_written_ += column_writers[i]->Close(); total_compressed_bytes_written_ += - column_writers_[i]->total_compressed_bytes_written(); - column_writers_[i].reset(); + column_writers[i]->total_compressed_bytes_written(); } } - column_writers_.clear(); - // Ensures all columns have been written metadata_->set_num_rows(num_rows_); metadata_->Finish(total_bytes_written_, row_group_ordinal_); @@ -261,8 +260,10 @@ class RowGroupSerializer : public RowGroupWriter::Contents { } } else if (buffered_row_group_ && column_writers_.size() > 0) { // when buffered_row_group = true + DCHECK(column_writers_[0] != nullptr); int64_t current_col_rows = column_writers_[0]->rows_written(); for (int i = 1; i < static_cast(column_writers_.size()); i++) { + DCHECK(column_writers_[i] != nullptr); int64_t current_col_rows_i = column_writers_[i]->rows_written(); if (current_col_rows != current_col_rows_i) { ThrowRowsMisMatchError(i, current_col_rows_i, current_col_rows); @@ -380,7 +381,7 @@ class FileSerializer : public ParquetFileWriter::Contents { void AddKeyValueMetadata( const std::shared_ptr& key_value_metadata) override { if (key_value_metadata_ == nullptr) { - key_value_metadata_ = std::move(key_value_metadata); + key_value_metadata_ = key_value_metadata; } else if (key_value_metadata != nullptr) { key_value_metadata_ = key_value_metadata_->Merge(*key_value_metadata); } @@ -388,7 +389,7 @@ class FileSerializer : public ParquetFileWriter::Contents { ~FileSerializer() override { try { - Close(); + FileSerializer::Close(); } catch (...) { } }