From c78ef0f1a4adc9211aef78345415faa00cefa0bf Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 6 Jun 2023 17:24:04 +0200 Subject: [PATCH] GH-35040: [Python] Skip test_cast_timestamp_to_string on Windows because it requires tz database (#35735) ### Rationale for this change Fix up of https://github.com/apache/arrow/pull/35395, skipping one of the tests added in that PR on Windows, because the test requires access to a tz database. Authored-by: Joris Van den Bossche Signed-off-by: Joris Van den Bossche --- python/pyarrow/tests/test_compute.py | 44 +++++++++++++++------------- python/pyarrow/tests/test_scalars.py | 4 +++ python/pyarrow/tests/util.py | 9 ++++++ 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 875d0e613b6ca..f934edd3c3bcb 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -36,6 +36,8 @@ import pyarrow as pa import pyarrow.compute as pc from pyarrow.lib import ArrowNotImplementedError +from pyarrow.tests import util + all_array_types = [ ('bool', [True, False, False, True, True]), @@ -180,17 +182,19 @@ def test_option_class_equality(): pc.WeekOptions(week_starts_monday=True, count_from_zero=False, first_week_is_fully_in_year=False), ] - # TODO: We should test on windows once ARROW-13168 is resolved. - # Timezone database is not available on Windows yet - if sys.platform != 'win32': + # Timezone database might not be installed on Windows + if sys.platform != "win32" or util.windows_has_tzdata(): options.append(pc.AssumeTimezoneOptions("Europe/Ljubljana")) classes = {type(option) for option in options} for cls in exported_option_classes: - # Timezone database is not available on Windows yet - if cls not in classes and sys.platform != 'win32' and \ - cls != pc.AssumeTimezoneOptions: + # Timezone database might not be installed on Windows + if ( + cls not in classes + and (sys.platform != "win32" or util.windows_has_tzdata()) + and cls != pc.AssumeTimezoneOptions + ): try: options.append(cls()) except TypeError: @@ -1846,17 +1850,18 @@ def test_strptime(): assert got == pa.array([None, None, None], type=pa.timestamp('s')) -# TODO: We should test on windows once ARROW-13168 is resolved. @pytest.mark.pandas -@pytest.mark.skipif(sys.platform == 'win32', - reason="Timezone database is not available on Windows yet") +@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(), + reason="Timezone database is not installed on Windows") def test_strftime(): times = ["2018-03-10 09:00", "2038-01-31 12:23", None] timezones = ["CET", "UTC", "Europe/Ljubljana"] - formats = ["%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H", - "%I", "%p", "%M", "%z", "%Z", "%j", "%U", "%W", "%c", "%x", - "%X", "%%", "%G", "%V", "%u"] + formats = ["%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H", "%I", + "%p", "%M", "%z", "%Z", "%j", "%U", "%W", "%%", "%G", "%V", "%u"] + if sys.platform != "win32": + # Locale-dependent formats don't match on Windows + formats.extend(["%c", "%x", "%X"]) for timezone in timezones: ts = pd.to_datetime(times).tz_localize(timezone) @@ -2029,18 +2034,16 @@ def test_extract_datetime_components(): _check_datetime_components(timestamps) # Test timezone aware timestamp array - if sys.platform == 'win32': - # TODO: We should test on windows once ARROW-13168 is resolved. - pytest.skip('Timezone database is not available on Windows yet') + if sys.platform == "win32" and not util.windows_has_tzdata(): + pytest.skip('Timezone database is not installed on Windows') else: for timezone in timezones: _check_datetime_components(timestamps, timezone) -# TODO: We should test on windows once ARROW-13168 is resolved. @pytest.mark.pandas -@pytest.mark.skipif(sys.platform == 'win32', - reason="Timezone database is not available on Windows yet") +@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(), + reason="Timezone database is not installed on Windows") def test_assume_timezone(): ts_type = pa.timestamp("ns") timestamps = pd.to_datetime(["1970-01-01T00:00:59.123456789", @@ -2235,9 +2238,8 @@ def _check_temporal_rounding(ts, values, unit): np.testing.assert_array_equal(result, expected) -# TODO: We should test on windows once ARROW-13168 is resolved. -@pytest.mark.skipif(sys.platform == 'win32', - reason="Timezone database is not available on Windows yet") +@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(), + reason="Timezone database is not installed on Windows") @pytest.mark.parametrize('unit', ("nanosecond", "microsecond", "millisecond", "second", "minute", "hour", "day")) @pytest.mark.pandas diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py index 1e6a3f29e0d7e..b7180e5250fdf 100644 --- a/python/pyarrow/tests/test_scalars.py +++ b/python/pyarrow/tests/test_scalars.py @@ -19,12 +19,14 @@ import decimal import pickle import pytest +import sys import weakref import numpy as np import pyarrow as pa import pyarrow.compute as pc +from pyarrow.tests import util @pytest.mark.parametrize(['value', 'ty', 'klass'], [ @@ -304,6 +306,8 @@ def test_cast(): pa.scalar('foo').cast('int32') +@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(), + reason="Timezone database is not installed on Windows") def test_cast_timestamp_to_string(): # GH-35370 pytest.importorskip("pytz") diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py index df7936371ee8f..0b69deb73ba28 100644 --- a/python/pyarrow/tests/util.py +++ b/python/pyarrow/tests/util.py @@ -448,3 +448,12 @@ def _configure_s3_limited_user(s3_server, policy): except FileNotFoundError: pytest.skip("Configuring limited s3 user failed") + + +def windows_has_tzdata(): + """ + This is the default location where tz.cpp will look for (until we make + this configurable at run-time) + """ + tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata") + return os.path.exists(tzdata_path)