diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index bd3fa5e73cd11..b5926933544e8 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -8,7 +8,7 @@ import dateutil import numpy as np from dateutil.parser import parse -from datetime import datetime, date, time, timedelta +from datetime import datetime, date, time from distutils.version import LooseVersion import pandas as pd @@ -19,7 +19,6 @@ from pandas.errors import OutOfBoundsDatetime from pandas.compat import lmap, PY3 -from pandas.compat.numpy import np_array_datetime64_compat from pandas.core.dtypes.common import is_datetime64_ns_dtype from pandas.util import testing as tm import pandas.util._test_decorators as td @@ -803,6 +802,15 @@ def test_dataframe_dtypes(self, cache): class TestToDatetimeMisc(object): + def test_to_datetime_barely_out_of_bounds(self): + # GH#19529 + # GH#19382 close enough to bounds that dropping nanos would result + # in an in-bounds datetime + arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object) + + with pytest.raises(OutOfBoundsDatetime): + to_datetime(arr) + @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_iso8601(self, cache): result = to_datetime(["2012-01-01 00:00:00"], cache=cache) @@ -1464,180 +1472,6 @@ def test_parsers_timezone_minute_offsets_roundtrip(self, cache): converted_time = dt_time.tz_localize('UTC').tz_convert(tz) assert dt_string_repr == repr(converted_time) - def test_parsers_iso8601(self): - # GH 12060 - # test only the iso parser - flexibility to different - # separators and leadings 0s - # Timestamp construction falls back to dateutil - cases = {'2011-01-02': datetime(2011, 1, 2), - '2011-1-2': datetime(2011, 1, 2), - '2011-01': datetime(2011, 1, 1), - '2011-1': datetime(2011, 1, 1), - '2011 01 02': datetime(2011, 1, 2), - '2011.01.02': datetime(2011, 1, 2), - '2011/01/02': datetime(2011, 1, 2), - '2011\\01\\02': datetime(2011, 1, 2), - '2013-01-01 05:30:00': datetime(2013, 1, 1, 5, 30), - '2013-1-1 5:30:00': datetime(2013, 1, 1, 5, 30)} - for date_str, exp in compat.iteritems(cases): - actual = tslib._test_parse_iso8601(date_str) - assert actual == exp - - # separators must all match - YYYYMM not valid - invalid_cases = ['2011-01/02', '2011^11^11', - '201401', '201111', '200101', - # mixed separated and unseparated - '2005-0101', '200501-01', - '20010101 12:3456', '20010101 1234:56', - # HHMMSS must have two digits in each component - # if unseparated - '20010101 1', '20010101 123', '20010101 12345', - '20010101 12345Z', - # wrong separator for HHMMSS - '2001-01-01 12-34-56'] - for date_str in invalid_cases: - with pytest.raises(ValueError): - tslib._test_parse_iso8601(date_str) - # If no ValueError raised, let me know which case failed. - raise Exception(date_str) - - -class TestArrayToDatetime(object): - def test_coerce_out_of_bounds_utc(self): - # GH#19612 - ts = Timestamp('1900-01-01', tz='US/Pacific') - dt = ts.to_pydatetime() - timedelta(days=365 * 300) # ~1600AD - arr = np.array([dt]) - result = tslib.array_to_datetime(arr, utc=True, errors='coerce') - expected = np.array(['NaT'], dtype='datetime64[ns]') - tm.assert_numpy_array_equal(result, expected) - - def test_parsing_valid_dates(self): - arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) - tm.assert_numpy_array_equal( - tslib.array_to_datetime(arr), - np_array_datetime64_compat( - [ - '2013-01-01T00:00:00.000000000-0000', - '2013-01-02T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - - arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) - tm.assert_numpy_array_equal( - tslib.array_to_datetime(arr), - np_array_datetime64_compat( - [ - '2013-09-16T00:00:00.000000000-0000', - '2013-09-17T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - - def test_parsing_timezone_offsets(self): - # All of these datetime strings with offsets are equivalent - # to the same datetime after the timezone offset is added - dt_strings = [ - '01-01-2013 08:00:00+08:00', - '2013-01-01T08:00:00.000000000+0800', - '2012-12-31T16:00:00.000000000-0800', - '12-31-2012 23:00:00-01:00' - ] - - expected_output = tslib.array_to_datetime(np.array( - ['01-01-2013 00:00:00'], dtype=object)) - - for dt_string in dt_strings: - tm.assert_numpy_array_equal( - tslib.array_to_datetime( - np.array([dt_string], dtype=object) - ), - expected_output - ) - - def test_number_looking_strings_not_into_datetime(self): - # #4601 - # These strings don't look like datetimes so they shouldn't be - # attempted to be converted - arr = np.array(['-352.737091', '183.575577'], dtype=object) - tm.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - arr = np.array(['1', '2', '3', '4', '5'], dtype=object) - tm.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - def test_coercing_dates_outside_of_datetime64_ns_bounds(self): - invalid_dates = [ - date(1000, 1, 1), - datetime(1000, 1, 1), - '1000-01-01', - 'Jan 1, 1000', - np.datetime64('1000-01-01'), - ] - - for invalid_date in invalid_dates: - pytest.raises(ValueError, - tslib.array_to_datetime, - np.array([invalid_date], dtype='object'), - errors='raise', ) - tm.assert_numpy_array_equal( - tslib.array_to_datetime( - np.array([invalid_date], dtype='object'), - errors='coerce'), - np.array([tslib.iNaT], dtype='M8[ns]') - ) - - arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) - tm.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='coerce'), - np_array_datetime64_compat( - [ - tslib.iNaT, - '2000-01-01T00:00:00.000000000-0000' - ], - dtype='M8[ns]' - ) - ) - - def test_coerce_of_invalid_datetimes(self): - arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object) - - # Without coercing, the presence of any invalid dates prevents - # any values from being converted - tm.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='ignore'), arr) - - # With coercing, the invalid dates becomes iNaT - tm.assert_numpy_array_equal( - tslib.array_to_datetime(arr, errors='coerce'), - np_array_datetime64_compat( - [ - '2013-01-01T00:00:00.000000000-0000', - tslib.iNaT, - tslib.iNaT - ], - dtype='M8[ns]' - ) - ) - - def test_to_datetime_barely_out_of_bounds(self): - # GH#19529 - # GH#19382 close enough to bounds that dropping nanos would result - # in an in-bounds datetime - arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object) - - with pytest.raises(OutOfBoundsDatetime): - to_datetime(arr) - - with pytest.raises(OutOfBoundsDatetime): - # Essentially the same as above, but more directly calling - # the relevant function - tslib.array_to_datetime(arr) - def test_normalize_date(): value = date(2012, 9, 7) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py new file mode 100644 index 0000000000000..eb77e52e7c91d --- /dev/null +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -0,0 +1,145 @@ +# -*- coding: utf-8 -*- +from datetime import datetime, date + +import numpy as np +import pytest + +from pandas._libs import tslib +from pandas.compat.numpy import np_array_datetime64_compat +import pandas.util.testing as tm + + +class TestParseISO8601(object): + @pytest.mark.parametrize('date_str, exp', [ + ('2011-01-02', datetime(2011, 1, 2)), + ('2011-1-2', datetime(2011, 1, 2)), + ('2011-01', datetime(2011, 1, 1)), + ('2011-1', datetime(2011, 1, 1)), + ('2011 01 02', datetime(2011, 1, 2)), + ('2011.01.02', datetime(2011, 1, 2)), + ('2011/01/02', datetime(2011, 1, 2)), + ('2011\\01\\02', datetime(2011, 1, 2)), + ('2013-01-01 05:30:00', datetime(2013, 1, 1, 5, 30)), + ('2013-1-1 5:30:00', datetime(2013, 1, 1, 5, 30))]) + def test_parsers_iso8601(self, date_str, exp): + # GH#12060 + # test only the iso parser - flexibility to different + # separators and leadings 0s + # Timestamp construction falls back to dateutil + actual = tslib._test_parse_iso8601(date_str) + assert actual == exp + + @pytest.mark.parametrize( + 'date_str', + ['2011-01/02', '2011^11^11', + '201401', '201111', '200101', + # mixed separated and unseparated + '2005-0101', '200501-01', + '20010101 12:3456', + '20010101 1234:56', + # HHMMSS must have two digits in + # each component if unseparated + '20010101 1', '20010101 123', + '20010101 12345', '20010101 12345Z', + # wrong separator for HHMMSS + '2001-01-01 12-34-56']) + def test_parsers_iso8601_invalid(self, date_str): + # separators must all match - YYYYMM not valid + with pytest.raises(ValueError): + tslib._test_parse_iso8601(date_str) + + +class TestArrayToDatetime(object): + def test_parsing_valid_dates(self): + arr = np.array(['01-01-2013', '01-02-2013'], dtype=object) + result = tslib.array_to_datetime(arr) + expected = ['2013-01-01T00:00:00.000000000-0000', + '2013-01-02T00:00:00.000000000-0000'] + tm.assert_numpy_array_equal( + result, + np_array_datetime64_compat(expected, dtype='M8[ns]')) + + arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object) + result = tslib.array_to_datetime(arr) + expected = ['2013-09-16T00:00:00.000000000-0000', + '2013-09-17T00:00:00.000000000-0000'] + tm.assert_numpy_array_equal( + result, + np_array_datetime64_compat(expected, dtype='M8[ns]')) + + @pytest.mark.parametrize('dt_string', [ + '01-01-2013 08:00:00+08:00', + '2013-01-01T08:00:00.000000000+0800', + '2012-12-31T16:00:00.000000000-0800', + '12-31-2012 23:00:00-01:00']) + def test_parsing_timezone_offsets(self, dt_string): + # All of these datetime strings with offsets are equivalent + # to the same datetime after the timezone offset is added + arr = np.array(['01-01-2013 00:00:00'], dtype=object) + expected = tslib.array_to_datetime(arr) + + arr = np.array([dt_string], dtype=object) + result = tslib.array_to_datetime(arr) + tm.assert_numpy_array_equal(result, expected) + + def test_number_looking_strings_not_into_datetime(self): + # GH#4601 + # These strings don't look like datetimes so they shouldn't be + # attempted to be converted + arr = np.array(['-352.737091', '183.575577'], dtype=object) + result = tslib.array_to_datetime(arr, errors='ignore') + tm.assert_numpy_array_equal(result, arr) + + arr = np.array(['1', '2', '3', '4', '5'], dtype=object) + result = tslib.array_to_datetime(arr, errors='ignore') + tm.assert_numpy_array_equal(result, arr) + + @pytest.mark.parametrize('invalid_date', [ + date(1000, 1, 1), + datetime(1000, 1, 1), + '1000-01-01', + 'Jan 1, 1000', + np.datetime64('1000-01-01')]) + def test_coerce_outside_ns_bounds(self, invalid_date): + arr = np.array([invalid_date], dtype='object') + with pytest.raises(ValueError): + tslib.array_to_datetime(arr, errors='raise') + + result = tslib.array_to_datetime(arr, errors='coerce') + expected = np.array([tslib.iNaT], dtype='M8[ns]') + tm.assert_numpy_array_equal(result, expected) + + def test_coerce_outside_ns_bounds_one_valid(self): + arr = np.array(['1/1/1000', '1/1/2000'], dtype=object) + result = tslib.array_to_datetime(arr, errors='coerce') + expected = [tslib.iNaT, + '2000-01-01T00:00:00.000000000-0000'] + tm.assert_numpy_array_equal( + result, + np_array_datetime64_compat(expected, dtype='M8[ns]')) + + def test_coerce_of_invalid_datetimes(self): + arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object) + + # Without coercing, the presence of any invalid dates prevents + # any values from being converted + result = tslib.array_to_datetime(arr, errors='ignore') + tm.assert_numpy_array_equal(result, arr) + + # With coercing, the invalid dates becomes iNaT + result = tslib.array_to_datetime(arr, errors='coerce') + expected = ['2013-01-01T00:00:00.000000000-0000', + tslib.iNaT, + tslib.iNaT] + + tm.assert_numpy_array_equal( + result, + np_array_datetime64_compat(expected, dtype='M8[ns]')) + + def test_to_datetime_barely_out_of_bounds(self): + # GH#19529 + # GH#19382 close enough to bounds that dropping nanos would result + # in an in-bounds datetime + arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object) + with pytest.raises(tslib.OutOfBoundsDatetime): + tslib.array_to_datetime(arr)