From d42ed9611f7043a30d102775c7de7b3edee026be Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 29 Feb 2024 15:45:58 +0100 Subject: [PATCH] GH-40153: [C++][Python] Fix test_gdb failures on 32-bit --- cpp/gdb_arrow.py | 3 +- python/pyarrow/tests/test_gdb.py | 81 ++++++++++++++++++++++---------- 2 files changed, 57 insertions(+), 27 deletions(-) diff --git a/cpp/gdb_arrow.py b/cpp/gdb_arrow.py index e6180f2ff0eeb..c3f5ab62981ec 100644 --- a/cpp/gdb_arrow.py +++ b/cpp/gdb_arrow.py @@ -304,7 +304,8 @@ def format_timestamp(val, unit): seconds, subseconds = divmod(val, traits.multiplier) try: dt = datetime.datetime.utcfromtimestamp(seconds) - except (ValueError, OSError): # value out of range for datetime.datetime + except (ValueError, OSError, OverflowError): + # value out of range for datetime.datetime pretty = "too large to represent" else: pretty = dt.isoformat().replace('T', ' ') diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py index d0d241cc56438..0d12d710dcf64 100644 --- a/python/pyarrow/tests/test_gdb.py +++ b/python/pyarrow/tests/test_gdb.py @@ -885,32 +885,61 @@ def test_arrays_heap(gdb_arrow): ("arrow::DurationArray of type arrow::duration" "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {" "[0] = null, [1] = -1234567890123456789ns}")) - check_heap_repr( - gdb_arrow, "heap_timestamp_array_s", - ("arrow::TimestampArray of type arrow::timestamp" - "(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {" - "[0] = null, [1] = 0s [1970-01-01 00:00:00], " - "[2] = -2203932304s [1900-02-28 12:34:56], " - "[3] = 63730281600s [3989-07-14 00:00:00]}")) - check_heap_repr( - gdb_arrow, "heap_timestamp_array_ms", - ("arrow::TimestampArray of type arrow::timestamp" - "(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {" - "[0] = null, [1] = -2203932303877ms [1900-02-28 12:34:56.123], " - "[2] = 63730281600789ms [3989-07-14 00:00:00.789]}")) - check_heap_repr( - gdb_arrow, "heap_timestamp_array_us", - ("arrow::TimestampArray of type arrow::timestamp" - "(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {" - "[0] = null, " - "[1] = -2203932303345679us [1900-02-28 12:34:56.654321], " - "[2] = 63730281600456789us [3989-07-14 00:00:00.456789]}")) - check_heap_repr( - gdb_arrow, "heap_timestamp_array_ns", - ("arrow::TimestampArray of type arrow::timestamp" - "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {" - "[0] = null, " - "[1] = -2203932303012345679ns [1900-02-28 12:34:56.987654321]}")) + if sys.maxsize > 2**32: + check_heap_repr( + gdb_arrow, "heap_timestamp_array_s", + ("arrow::TimestampArray of type arrow::timestamp" + "(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {" + "[0] = null, [1] = 0s [1970-01-01 00:00:00], " + "[2] = -2203932304s [1900-02-28 12:34:56], " + "[3] = 63730281600s [3989-07-14 00:00:00]}")) + check_heap_repr( + gdb_arrow, "heap_timestamp_array_ms", + ("arrow::TimestampArray of type arrow::timestamp" + "(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {" + "[0] = null, [1] = -2203932303877ms [1900-02-28 12:34:56.123], " + "[2] = 63730281600789ms [3989-07-14 00:00:00.789]}")) + check_heap_repr( + gdb_arrow, "heap_timestamp_array_us", + ("arrow::TimestampArray of type arrow::timestamp" + "(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {" + "[0] = null, " + "[1] = -2203932303345679us [1900-02-28 12:34:56.654321], " + "[2] = 63730281600456789us [3989-07-14 00:00:00.456789]}")) + check_heap_repr( + gdb_arrow, "heap_timestamp_array_ns", + ("arrow::TimestampArray of type arrow::timestamp" + "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {" + "[0] = null, " + "[1] = -2203932303012345679ns [1900-02-28 12:34:56.987654321]}")) + else: + # Python's datetime is limited to smaller timestamps on 32-bit platforms + check_heap_repr( + gdb_arrow, "heap_timestamp_array_s", + ("arrow::TimestampArray of type arrow::timestamp" + "(arrow::TimeUnit::SECOND), length 4, offset 0, null count 1 = {" + "[0] = null, [1] = 0s [1970-01-01 00:00:00], " + "[2] = -2203932304s [too large to represent], " + "[3] = 63730281600s [too large to represent]}")) + check_heap_repr( + gdb_arrow, "heap_timestamp_array_ms", + ("arrow::TimestampArray of type arrow::timestamp" + "(arrow::TimeUnit::MILLI), length 3, offset 0, null count 1 = {" + "[0] = null, [1] = -2203932303877ms [too large to represent], " + "[2] = 63730281600789ms [too large to represent]}")) + check_heap_repr( + gdb_arrow, "heap_timestamp_array_us", + ("arrow::TimestampArray of type arrow::timestamp" + "(arrow::TimeUnit::MICRO), length 3, offset 0, null count 1 = {" + "[0] = null, " + "[1] = -2203932303345679us [too large to represent], " + "[2] = 63730281600456789us [too large to represent]}")) + check_heap_repr( + gdb_arrow, "heap_timestamp_array_ns", + ("arrow::TimestampArray of type arrow::timestamp" + "(arrow::TimeUnit::NANO), length 2, offset 0, null count 1 = {" + "[0] = null, " + "[1] = -2203932303012345679ns [too large to represent]}")) # Decimal check_heap_repr(