From 9dce377cd04441af5ea1996b47031041cc4f6465 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 19 Nov 2024 17:22:58 +0900 Subject: [PATCH] GH-44784: [C++][Parquet] Add `arrow::Result` version of `parquet::arrow::OpenFile()` --- cpp/examples/arrow/parquet_read_write.cc | 2 +- .../parquet/parquet_arrow/reader_writer.cc | 16 +++--- .../parquet/arrow/arrow_reader_writer_test.cc | 50 ++++++++----------- cpp/src/parquet/arrow/reader.cc | 7 ++- cpp/src/parquet/arrow/reader.h | 10 ++++ .../arrow/reconstruct_internal_test.cc | 4 +- 6 files changed, 49 insertions(+), 40 deletions(-) diff --git a/cpp/examples/arrow/parquet_read_write.cc b/cpp/examples/arrow/parquet_read_write.cc index 3b8b4c2212b75..a07c10fda5af8 100644 --- a/cpp/examples/arrow/parquet_read_write.cc +++ b/cpp/examples/arrow/parquet_read_write.cc @@ -34,7 +34,7 @@ arrow::Status ReadFullFile(std::string path_to_file) { // Open Parquet file reader std::unique_ptr arrow_reader; - ARROW_RETURN_NOT_OK(parquet::arrow::OpenFile(input, pool, &arrow_reader)); + ARROW_ASSIGN_OR_RAISE(arrow_reader, parquet::arrow::OpenFile(input, pool)); // Read entire file as a single Arrow table std::shared_ptr table; diff --git a/cpp/examples/parquet/parquet_arrow/reader_writer.cc b/cpp/examples/parquet/parquet_arrow/reader_writer.cc index f5d96ec16ca64..448c9ecfb88cb 100644 --- a/cpp/examples/parquet/parquet_arrow/reader_writer.cc +++ b/cpp/examples/parquet/parquet_arrow/reader_writer.cc @@ -68,8 +68,8 @@ void read_whole_file() { arrow::default_memory_pool())); std::unique_ptr reader; - PARQUET_THROW_NOT_OK( - parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader)); + PARQUET_ASSIGN_OR_THROW(reader, + parquet::arrow::OpenFile(infile, arrow::default_memory_pool())); std::shared_ptr table; PARQUET_THROW_NOT_OK(reader->ReadTable(&table)); std::cout << "Loaded " << table->num_rows() << " rows in " << table->num_columns() @@ -85,8 +85,8 @@ void read_single_rowgroup() { arrow::default_memory_pool())); std::unique_ptr reader; - PARQUET_THROW_NOT_OK( - parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader)); + PARQUET_ASSIGN_OR_THROW(reader, + parquet::arrow::OpenFile(infile, arrow::default_memory_pool())); std::shared_ptr table; PARQUET_THROW_NOT_OK(reader->RowGroup(0)->ReadTable(&table)); std::cout << "Loaded " << table->num_rows() << " rows in " << table->num_columns() @@ -102,8 +102,8 @@ void read_single_column() { arrow::default_memory_pool())); std::unique_ptr reader; - PARQUET_THROW_NOT_OK( - parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader)); + PARQUET_ASSIGN_OR_THROW(reader, + parquet::arrow::OpenFile(infile, arrow::default_memory_pool())); std::shared_ptr array; PARQUET_THROW_NOT_OK(reader->ReadColumn(0, &array)); PARQUET_THROW_NOT_OK(arrow::PrettyPrint(*array, 4, &std::cout)); @@ -122,8 +122,8 @@ void read_single_column_chunk() { arrow::default_memory_pool())); std::unique_ptr reader; - PARQUET_THROW_NOT_OK( - parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader)); + PARQUET_ASSIGN_OR_THROW(reader, + parquet::arrow::OpenFile(infile, arrow::default_memory_pool())); std::shared_ptr array; PARQUET_THROW_NOT_OK(reader->RowGroup(0)->Column(0)->Read(&array)); PARQUET_THROW_NOT_OK(arrow::PrettyPrint(*array, 4, &std::cout)); diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index 73974f9b2a888..b5fd2bc2553d9 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -448,9 +448,8 @@ void DoSimpleRoundtrip(const std::shared_ptr& table, bool use_threads, ASSERT_NO_FATAL_FAILURE( WriteTableToBuffer(table, row_group_size, arrow_properties, &buffer)); - std::unique_ptr reader; - ASSERT_OK_NO_THROW(OpenFile(std::make_shared(buffer), - ::arrow::default_memory_pool(), &reader)); + ASSERT_OK_AND_ASSIGN(auto reader, OpenFile(std::make_shared(buffer), + ::arrow::default_memory_pool())); reader->set_use_threads(use_threads); if (column_subset.size() > 0) { @@ -1095,8 +1094,7 @@ TYPED_TEST(TestParquetIO, SingleColumnTableRequiredChunkedWriteArrowIO) { auto source = std::make_shared(pbuffer); std::shared_ptr<::arrow::Table> out; - std::unique_ptr reader; - ASSERT_OK_NO_THROW(OpenFile(source, ::arrow::default_memory_pool(), &reader)); + ASSERT_OK_AND_ASSIGN(auto reader, OpenFile(source, ::arrow::default_memory_pool())); ASSERT_NO_FATAL_FAILURE(this->ReadTableFromFile(std::move(reader), &out)); ASSERT_EQ(1, out->num_columns()); ASSERT_EQ(values->length(), out->num_rows()); @@ -2295,9 +2293,8 @@ TEST(TestArrowReadWrite, ReadSingleRowGroup) { ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, num_rows / 2, default_arrow_writer_properties(), &buffer)); - std::unique_ptr reader; - ASSERT_OK_NO_THROW(OpenFile(std::make_shared(buffer), - ::arrow::default_memory_pool(), &reader)); + ASSERT_OK_AND_ASSIGN(auto reader, OpenFile(std::make_shared(buffer), + ::arrow::default_memory_pool())); ASSERT_EQ(2, reader->num_row_groups()); @@ -2357,9 +2354,8 @@ TEST(TestArrowReadWrite, ReadTableManually) { ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(expected, num_rows / 2, default_arrow_writer_properties(), &buffer)); - std::unique_ptr reader; - ASSERT_OK_NO_THROW(OpenFile(std::make_shared(buffer), - ::arrow::default_memory_pool(), &reader)); + ASSERT_OK_AND_ASSIGN(auto reader, OpenFile(std::make_shared(buffer), + ::arrow::default_memory_pool())); ASSERT_EQ(2, reader->num_row_groups()); @@ -2476,9 +2472,8 @@ TEST(TestArrowReadWrite, CoalescedReadsAndNonCoalescedReads) { ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(expected, num_rows / 2, default_arrow_writer_properties(), &buffer)); - std::unique_ptr reader; - ASSERT_OK_NO_THROW(OpenFile(std::make_shared(buffer), - ::arrow::default_memory_pool(), &reader)); + ASSERT_OK_AND_ASSIGN(auto reader, OpenFile(std::make_shared(buffer), + ::arrow::default_memory_pool())); ASSERT_EQ(2, reader->num_row_groups()); @@ -2594,9 +2589,8 @@ TEST(TestArrowReadWrite, ScanContents) { ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, num_rows / 2, default_arrow_writer_properties(), &buffer)); - std::unique_ptr reader; - ASSERT_OK_NO_THROW(OpenFile(std::make_shared(buffer), - ::arrow::default_memory_pool(), &reader)); + ASSERT_OK_AND_ASSIGN(auto reader, OpenFile(std::make_shared(buffer), + ::arrow::default_memory_pool())); int64_t num_rows_returned = 0; ASSERT_OK_NO_THROW(reader->ScanContents({}, 256, &num_rows_returned)); @@ -2689,9 +2683,8 @@ TEST(TestArrowReadWrite, ListLargeRecords) { ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, row_group_size, default_arrow_writer_properties(), &buffer)); - std::unique_ptr reader; - ASSERT_OK_NO_THROW(OpenFile(std::make_shared(buffer), - ::arrow::default_memory_pool(), &reader)); + ASSERT_OK_AND_ASSIGN(auto reader, OpenFile(std::make_shared(buffer), + ::arrow::default_memory_pool())); // Read everything std::shared_ptr
result; @@ -2699,8 +2692,8 @@ TEST(TestArrowReadWrite, ListLargeRecords) { ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*table, *result)); // Read 1 record at a time - ASSERT_OK_NO_THROW(OpenFile(std::make_shared(buffer), - ::arrow::default_memory_pool(), &reader)); + ASSERT_OK_AND_ASSIGN(reader, OpenFile(std::make_shared(buffer), + ::arrow::default_memory_pool())); std::unique_ptr col_reader; ASSERT_OK(reader->GetColumn(0, &col_reader)); @@ -2974,9 +2967,8 @@ TEST(ArrowReadWrite, DecimalStats) { ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, /*row_group_size=*/100, default_arrow_writer_properties(), &buffer)); - std::unique_ptr reader; - ASSERT_OK_NO_THROW(OpenFile(std::make_shared(buffer), - ::arrow::default_memory_pool(), &reader)); + ASSERT_OK_AND_ASSIGN(auto reader, OpenFile(std::make_shared(buffer), + ::arrow::default_memory_pool())); std::shared_ptr min, max; ReadSingleColumnFileStatistics(std::move(reader), &min, &max); @@ -3575,8 +3567,8 @@ class TestNestedSchemaRead : public ::testing::TestWithParam { void InitReader() { ASSERT_OK_AND_ASSIGN(auto buffer, nested_parquet_->Finish()); - ASSERT_OK_NO_THROW(OpenFile(std::make_shared(buffer), - ::arrow::default_memory_pool(), &reader_)); + ASSERT_OK_AND_ASSIGN(reader_, OpenFile(std::make_shared(buffer), + ::arrow::default_memory_pool())); } void InitNewParquetFile(const std::shared_ptr& schema, int num_rows) { @@ -5344,8 +5336,8 @@ TEST(TestArrowReadWrite, MultithreadedWrite) { // Read to verify the data. std::shared_ptr
result; - std::unique_ptr reader; - ASSERT_OK_NO_THROW(OpenFile(std::make_shared(buffer), pool, &reader)); + ASSERT_OK_AND_ASSIGN(auto reader, + OpenFile(std::make_shared(buffer), pool)); ASSERT_OK_NO_THROW(reader->ReadTable(&result)); ASSERT_NO_FATAL_FAILURE(::arrow::AssertTablesEqual(*table, *result)); } diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc index 4f57c3f4f56f7..3002d90b5fff8 100644 --- a/cpp/src/parquet/arrow/reader.cc +++ b/cpp/src/parquet/arrow/reader.cc @@ -1372,9 +1372,14 @@ Result> FileReaderBuilder::Build() { Status OpenFile(std::shared_ptr<::arrow::io::RandomAccessFile> file, MemoryPool* pool, std::unique_ptr* reader) { + return OpenFile(std::move(file), pool).Value(reader); +} + +Result> OpenFile( + std::shared_ptr<::arrow::io::RandomAccessFile> file, MemoryPool* pool) { FileReaderBuilder builder; RETURN_NOT_OK(builder.Open(std::move(file))); - return builder.memory_pool(pool)->Build(reader); + return builder.memory_pool(pool)->Build(); } namespace internal { diff --git a/cpp/src/parquet/arrow/reader.h b/cpp/src/parquet/arrow/reader.h index 6e46ca43f7b18..ec996a5afa615 100644 --- a/cpp/src/parquet/arrow/reader.h +++ b/cpp/src/parquet/arrow/reader.h @@ -357,11 +357,21 @@ class PARQUET_EXPORT FileReaderBuilder { /// \brief Build FileReader from Arrow file and MemoryPool /// /// Advanced settings are supported through the FileReaderBuilder class. +/// +/// \deprecated Deprecated in 19.0.0. Use arrow::Result version instead. +ARROW_DEPRECATED("Deprecated in 19.0.0. Use arrow::Result version instead.") PARQUET_EXPORT ::arrow::Status OpenFile(std::shared_ptr<::arrow::io::RandomAccessFile>, ::arrow::MemoryPool* allocator, std::unique_ptr* reader); +/// \brief Build FileReader from Arrow file and MemoryPool +/// +/// Advanced settings are supported through the FileReaderBuilder class. +PARQUET_EXPORT +::arrow::Result> OpenFile( + std::shared_ptr<::arrow::io::RandomAccessFile>, ::arrow::MemoryPool* allocator); + /// @} PARQUET_EXPORT diff --git a/cpp/src/parquet/arrow/reconstruct_internal_test.cc b/cpp/src/parquet/arrow/reconstruct_internal_test.cc index 4e1f421498e85..ecdbcc5a3da14 100644 --- a/cpp/src/parquet/arrow/reconstruct_internal_test.cc +++ b/cpp/src/parquet/arrow/reconstruct_internal_test.cc @@ -189,7 +189,9 @@ class FileTester { protected: Status Open(std::shared_ptr buffer, MemoryPool* pool) { pool_ = pool; - return OpenFile(std::make_shared(buffer), pool_, &file_reader_); + ARROW_ASSIGN_OR_RAISE(file_reader_, + OpenFile(std::make_shared(buffer), pool_)); + return Status::OK(); } MemoryPool* pool_;