diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc index d16b6cfd2e97d..51cca497485ce 100644 --- a/cpp/src/arrow/adapters/orc/adapter.cc +++ b/cpp/src/arrow/adapters/orc/adapter.cc @@ -145,7 +145,10 @@ class OrcStripeReader : public RecordBatchReader { Status ReadNext(std::shared_ptr* out) override { std::unique_ptr batch; - ORC_CATCH_NOT_OK(batch = row_reader_->createRowBatch(batch_size_)); + std::unique_ptr builder; + + ORC_BEGIN_CATCH_NOT_OK + batch = row_reader_->createRowBatch(batch_size_); const liborc::Type& type = row_reader_->getSelectedType(); if (!row_reader_->next(*batch)) { @@ -153,10 +156,8 @@ class OrcStripeReader : public RecordBatchReader { return Status::OK(); } - std::unique_ptr builder; ARROW_ASSIGN_OR_RAISE(builder, RecordBatchBuilder::Make(schema_, pool_, batch->numElements)); - // The top-level type must be a struct to read into an arrow table const auto& struct_batch = checked_cast(*batch); @@ -164,9 +165,9 @@ class OrcStripeReader : public RecordBatchReader { RETURN_NOT_OK(AppendBatch(type.getSubtype(i), struct_batch.fields[i], 0, batch->numElements, builder->GetField(i))); } + ORC_END_CATCH_NOT_OK - ARROW_ASSIGN_OR_RAISE(*out, builder->Flush()); - return Status::OK(); + return builder->Flush().Value(out); } private: @@ -470,15 +471,13 @@ class ORCFileReader::Impl { int64_t nrows) { std::unique_ptr row_reader; std::unique_ptr batch; + std::unique_ptr builder; ORC_BEGIN_CATCH_NOT_OK row_reader = reader_->createRowReader(opts); batch = row_reader->createRowBatch(std::min(nrows, kReadRowsBatch)); - ORC_END_CATCH_NOT_OK - std::unique_ptr builder; ARROW_ASSIGN_OR_RAISE(builder, RecordBatchBuilder::Make(schema, pool_, nrows)); - // The top-level type must be a struct to read into an arrow table const auto& struct_batch = checked_cast(*batch); @@ -489,6 +488,7 @@ class ORCFileReader::Impl { batch->numElements, builder->GetField(i))); } } + ORC_END_CATCH_NOT_OK return builder->Flush(); } diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py index 1b467d523304c..b0f9e813b103d 100644 --- a/python/pyarrow/tests/test_orc.py +++ b/python/pyarrow/tests/test_orc.py @@ -15,9 +15,14 @@ # specific language governing permissions and limitations # under the License. -import pytest import decimal import datetime +from pathlib import Path +import shutil +import subprocess +import sys + +import pytest import pyarrow as pa from pyarrow import fs @@ -140,6 +145,57 @@ def test_example_using_json(filename, datadir): check_example_file(path, table, need_fix=True) +def test_timezone_database_absent(datadir): + # Example file relies on the timezone "US/Pacific". It should gracefully + # fail, not crash, if the timezone database is not found. + path = datadir / 'TestOrcFile.testDate1900.orc' + code = f"""if 1: + import os + os.environ['TZDIR'] = '/tmp/non_existent' + + from pyarrow import orc + try: + orc_file = orc.ORCFile({str(path)!r}) + orc_file.read() + except Exception as e: + assert "time zone database" in str(e).lower(), e + else: + assert False, "Should have raised exception" + """ + subprocess.run([sys.executable, "-c", code], check=True) + + +def test_timezone_absent(datadir, tmpdir): + # Example file relies on the timezone "US/Pacific". It should gracefully + # fail, not crash, if the timezone database is present but the timezone + # is not found (GH-40633). + source_tzdir = Path('/usr/share/zoneinfo') + if not source_tzdir.exists(): + pytest.skip(f"Test needs timezone database in {source_tzdir}") + tzdir = Path(tmpdir / 'zoneinfo') + try: + shutil.copytree(source_tzdir, tzdir, symlinks=True) + except OSError as e: + pytest.skip(f"Failed to copy timezone database: {e}") + (tzdir / 'US' / 'Pacific').unlink(missing_ok=True) + + path = datadir / 'TestOrcFile.testDate1900.orc' + code = f"""if 1: + import os + os.environ['TZDIR'] = {str(tzdir)!r} + + from pyarrow import orc + orc_file = orc.ORCFile({str(path)!r}) + try: + orc_file.read() + except Exception as e: + assert "zoneinfo/US/Pacific" in str(e), e + else: + assert False, "Should have raised exception" + """ + subprocess.run([sys.executable, "-c", code], check=True) + + def test_orcfile_empty(datadir): from pyarrow import orc