diff --git a/be/src/formats/parquet/file_writer.cpp b/be/src/formats/parquet/file_writer.cpp index bb0ecc89b0ae43..0b0a2bdd331874 100644 --- a/be/src/formats/parquet/file_writer.cpp +++ b/be/src/formats/parquet/file_writer.cpp @@ -377,11 +377,7 @@ Status SyncFileWriter::_flush_row_group() { _chunk_writer->close(); } catch (const ::parquet::ParquetStatusException& e) { _chunk_writer.reset(); - - // this is to avoid calling ParquetFileWriter.Close which incurs segfault _closed = true; - _writer.release(); - auto st = Status::IOError(fmt::format("{}: {}", "flush rowgroup error", e.what())); LOG(WARNING) << st; return st; diff --git a/thirdparty/download-thirdparty.sh b/thirdparty/download-thirdparty.sh index ba1a0d7ccd7d1a..e83726150d9f73 100755 --- a/thirdparty/download-thirdparty.sh +++ b/thirdparty/download-thirdparty.sh @@ -475,13 +475,19 @@ fi echo "Finished patching $SERDES_SOURCE" cd - -# patch arrows to use our built jemalloc +# patch arrows if [[ -d $TP_SOURCE_DIR/$ARROW_SOURCE ]] ; then cd $TP_SOURCE_DIR/$ARROW_SOURCE + # use our built jemalloc if [ ! -f $PATCHED_MARK ] && [ $ARROW_SOURCE = "arrow-apache-arrow-5.0.0" ] ; then patch -p1 < $TP_PATCH_DIR/arrow-5.0.0-force-use-external-jemalloc.patch touch $PATCHED_MARK fi + # fix arrow parquet exception handling + if [ ! -f $PATCHED_MARK ] && [ $ARROW_SOURCE = "arrow-apache-arrow-5.0.0" ] ; then + patch -p1 < $TP_PATCH_DIR/arrow-5.0.0-fix-exception-handling.patch + touch $PATCHED_MARK + fi cd - echo "Finished patching $ARROW_SOURCE" fi diff --git a/thirdparty/patches/arrow-5.0.0-fix-exception-handling.patch b/thirdparty/patches/arrow-5.0.0-fix-exception-handling.patch new file mode 100644 index 00000000000000..e8e78ed992f1dc --- /dev/null +++ b/thirdparty/patches/arrow-5.0.0-fix-exception-handling.patch @@ -0,0 +1,21 @@ +diff --git a/cpp/src/parquet/file_writer.cc b/cpp/src/parquet/file_writer.cc +index deac9586e..836f9e93c 100644 +--- a/cpp/src/parquet/file_writer.cc ++++ b/cpp/src/parquet/file_writer.cc +@@ -181,15 +181,13 @@ class RowGroupSerializer : public RowGroupWriter::Contents { + closed_ = true; + CheckRowsWritten(); + ++ auto column_writers = std::move(column_writers_); + for (size_t i = 0; i < column_writers_.size(); i++) { + if (column_writers_[i]) { + total_bytes_written_ += column_writers_[i]->Close(); +- column_writers_[i].reset(); + } + } + +- column_writers_.clear(); +- + // Ensures all columns have been written + metadata_->set_num_rows(num_rows_); + metadata_->Finish(total_bytes_written_, row_group_ordinal_);