Skip to content

Commit

Permalink
move array_to_datetime timests (pandas-dev#19640)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and harisbal committed Feb 28, 2018
1 parent 82f011b commit 067984a
Show file tree
Hide file tree
Showing 2 changed files with 155 additions and 176 deletions.
186 changes: 10 additions & 176 deletions pandas/tests/indexes/datetimes/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import dateutil
import numpy as np
from dateutil.parser import parse
from datetime import datetime, date, time, timedelta
from datetime import datetime, date, time
from distutils.version import LooseVersion

import pandas as pd
Expand All @@ -19,7 +19,6 @@

from pandas.errors import OutOfBoundsDatetime
from pandas.compat import lmap, PY3
from pandas.compat.numpy import np_array_datetime64_compat
from pandas.core.dtypes.common import is_datetime64_ns_dtype
from pandas.util import testing as tm
import pandas.util._test_decorators as td
Expand Down Expand Up @@ -803,6 +802,15 @@ def test_dataframe_dtypes(self, cache):


class TestToDatetimeMisc(object):
def test_to_datetime_barely_out_of_bounds(self):
# GH#19529
# GH#19382 close enough to bounds that dropping nanos would result
# in an in-bounds datetime
arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object)

with pytest.raises(OutOfBoundsDatetime):
to_datetime(arr)

@pytest.mark.parametrize('cache', [True, False])
def test_to_datetime_iso8601(self, cache):
result = to_datetime(["2012-01-01 00:00:00"], cache=cache)
Expand Down Expand Up @@ -1464,180 +1472,6 @@ def test_parsers_timezone_minute_offsets_roundtrip(self, cache):
converted_time = dt_time.tz_localize('UTC').tz_convert(tz)
assert dt_string_repr == repr(converted_time)

def test_parsers_iso8601(self):
# GH 12060
# test only the iso parser - flexibility to different
# separators and leadings 0s
# Timestamp construction falls back to dateutil
cases = {'2011-01-02': datetime(2011, 1, 2),
'2011-1-2': datetime(2011, 1, 2),
'2011-01': datetime(2011, 1, 1),
'2011-1': datetime(2011, 1, 1),
'2011 01 02': datetime(2011, 1, 2),
'2011.01.02': datetime(2011, 1, 2),
'2011/01/02': datetime(2011, 1, 2),
'2011\\01\\02': datetime(2011, 1, 2),
'2013-01-01 05:30:00': datetime(2013, 1, 1, 5, 30),
'2013-1-1 5:30:00': datetime(2013, 1, 1, 5, 30)}
for date_str, exp in compat.iteritems(cases):
actual = tslib._test_parse_iso8601(date_str)
assert actual == exp

# separators must all match - YYYYMM not valid
invalid_cases = ['2011-01/02', '2011^11^11',
'201401', '201111', '200101',
# mixed separated and unseparated
'2005-0101', '200501-01',
'20010101 12:3456', '20010101 1234:56',
# HHMMSS must have two digits in each component
# if unseparated
'20010101 1', '20010101 123', '20010101 12345',
'20010101 12345Z',
# wrong separator for HHMMSS
'2001-01-01 12-34-56']
for date_str in invalid_cases:
with pytest.raises(ValueError):
tslib._test_parse_iso8601(date_str)
# If no ValueError raised, let me know which case failed.
raise Exception(date_str)


class TestArrayToDatetime(object):
def test_coerce_out_of_bounds_utc(self):
# GH#19612
ts = Timestamp('1900-01-01', tz='US/Pacific')
dt = ts.to_pydatetime() - timedelta(days=365 * 300) # ~1600AD
arr = np.array([dt])
result = tslib.array_to_datetime(arr, utc=True, errors='coerce')
expected = np.array(['NaT'], dtype='datetime64[ns]')
tm.assert_numpy_array_equal(result, expected)

def test_parsing_valid_dates(self):
arr = np.array(['01-01-2013', '01-02-2013'], dtype=object)
tm.assert_numpy_array_equal(
tslib.array_to_datetime(arr),
np_array_datetime64_compat(
[
'2013-01-01T00:00:00.000000000-0000',
'2013-01-02T00:00:00.000000000-0000'
],
dtype='M8[ns]'
)
)

arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object)
tm.assert_numpy_array_equal(
tslib.array_to_datetime(arr),
np_array_datetime64_compat(
[
'2013-09-16T00:00:00.000000000-0000',
'2013-09-17T00:00:00.000000000-0000'
],
dtype='M8[ns]'
)
)

def test_parsing_timezone_offsets(self):
# All of these datetime strings with offsets are equivalent
# to the same datetime after the timezone offset is added
dt_strings = [
'01-01-2013 08:00:00+08:00',
'2013-01-01T08:00:00.000000000+0800',
'2012-12-31T16:00:00.000000000-0800',
'12-31-2012 23:00:00-01:00'
]

expected_output = tslib.array_to_datetime(np.array(
['01-01-2013 00:00:00'], dtype=object))

for dt_string in dt_strings:
tm.assert_numpy_array_equal(
tslib.array_to_datetime(
np.array([dt_string], dtype=object)
),
expected_output
)

def test_number_looking_strings_not_into_datetime(self):
# #4601
# These strings don't look like datetimes so they shouldn't be
# attempted to be converted
arr = np.array(['-352.737091', '183.575577'], dtype=object)
tm.assert_numpy_array_equal(
tslib.array_to_datetime(arr, errors='ignore'), arr)

arr = np.array(['1', '2', '3', '4', '5'], dtype=object)
tm.assert_numpy_array_equal(
tslib.array_to_datetime(arr, errors='ignore'), arr)

def test_coercing_dates_outside_of_datetime64_ns_bounds(self):
invalid_dates = [
date(1000, 1, 1),
datetime(1000, 1, 1),
'1000-01-01',
'Jan 1, 1000',
np.datetime64('1000-01-01'),
]

for invalid_date in invalid_dates:
pytest.raises(ValueError,
tslib.array_to_datetime,
np.array([invalid_date], dtype='object'),
errors='raise', )
tm.assert_numpy_array_equal(
tslib.array_to_datetime(
np.array([invalid_date], dtype='object'),
errors='coerce'),
np.array([tslib.iNaT], dtype='M8[ns]')
)

arr = np.array(['1/1/1000', '1/1/2000'], dtype=object)
tm.assert_numpy_array_equal(
tslib.array_to_datetime(arr, errors='coerce'),
np_array_datetime64_compat(
[
tslib.iNaT,
'2000-01-01T00:00:00.000000000-0000'
],
dtype='M8[ns]'
)
)

def test_coerce_of_invalid_datetimes(self):
arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object)

# Without coercing, the presence of any invalid dates prevents
# any values from being converted
tm.assert_numpy_array_equal(
tslib.array_to_datetime(arr, errors='ignore'), arr)

# With coercing, the invalid dates becomes iNaT
tm.assert_numpy_array_equal(
tslib.array_to_datetime(arr, errors='coerce'),
np_array_datetime64_compat(
[
'2013-01-01T00:00:00.000000000-0000',
tslib.iNaT,
tslib.iNaT
],
dtype='M8[ns]'
)
)

def test_to_datetime_barely_out_of_bounds(self):
# GH#19529
# GH#19382 close enough to bounds that dropping nanos would result
# in an in-bounds datetime
arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object)

with pytest.raises(OutOfBoundsDatetime):
to_datetime(arr)

with pytest.raises(OutOfBoundsDatetime):
# Essentially the same as above, but more directly calling
# the relevant function
tslib.array_to_datetime(arr)


def test_normalize_date():
value = date(2012, 9, 7)
Expand Down
145 changes: 145 additions & 0 deletions pandas/tests/tslibs/test_array_to_datetime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# -*- coding: utf-8 -*-
from datetime import datetime, date

import numpy as np
import pytest

from pandas._libs import tslib
from pandas.compat.numpy import np_array_datetime64_compat
import pandas.util.testing as tm


class TestParseISO8601(object):
@pytest.mark.parametrize('date_str, exp', [
('2011-01-02', datetime(2011, 1, 2)),
('2011-1-2', datetime(2011, 1, 2)),
('2011-01', datetime(2011, 1, 1)),
('2011-1', datetime(2011, 1, 1)),
('2011 01 02', datetime(2011, 1, 2)),
('2011.01.02', datetime(2011, 1, 2)),
('2011/01/02', datetime(2011, 1, 2)),
('2011\\01\\02', datetime(2011, 1, 2)),
('2013-01-01 05:30:00', datetime(2013, 1, 1, 5, 30)),
('2013-1-1 5:30:00', datetime(2013, 1, 1, 5, 30))])
def test_parsers_iso8601(self, date_str, exp):
# GH#12060
# test only the iso parser - flexibility to different
# separators and leadings 0s
# Timestamp construction falls back to dateutil
actual = tslib._test_parse_iso8601(date_str)
assert actual == exp

@pytest.mark.parametrize(
'date_str',
['2011-01/02', '2011^11^11',
'201401', '201111', '200101',
# mixed separated and unseparated
'2005-0101', '200501-01',
'20010101 12:3456',
'20010101 1234:56',
# HHMMSS must have two digits in
# each component if unseparated
'20010101 1', '20010101 123',
'20010101 12345', '20010101 12345Z',
# wrong separator for HHMMSS
'2001-01-01 12-34-56'])
def test_parsers_iso8601_invalid(self, date_str):
# separators must all match - YYYYMM not valid
with pytest.raises(ValueError):
tslib._test_parse_iso8601(date_str)


class TestArrayToDatetime(object):
def test_parsing_valid_dates(self):
arr = np.array(['01-01-2013', '01-02-2013'], dtype=object)
result = tslib.array_to_datetime(arr)
expected = ['2013-01-01T00:00:00.000000000-0000',
'2013-01-02T00:00:00.000000000-0000']
tm.assert_numpy_array_equal(
result,
np_array_datetime64_compat(expected, dtype='M8[ns]'))

arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object)
result = tslib.array_to_datetime(arr)
expected = ['2013-09-16T00:00:00.000000000-0000',
'2013-09-17T00:00:00.000000000-0000']
tm.assert_numpy_array_equal(
result,
np_array_datetime64_compat(expected, dtype='M8[ns]'))

@pytest.mark.parametrize('dt_string', [
'01-01-2013 08:00:00+08:00',
'2013-01-01T08:00:00.000000000+0800',
'2012-12-31T16:00:00.000000000-0800',
'12-31-2012 23:00:00-01:00'])
def test_parsing_timezone_offsets(self, dt_string):
# All of these datetime strings with offsets are equivalent
# to the same datetime after the timezone offset is added
arr = np.array(['01-01-2013 00:00:00'], dtype=object)
expected = tslib.array_to_datetime(arr)

arr = np.array([dt_string], dtype=object)
result = tslib.array_to_datetime(arr)
tm.assert_numpy_array_equal(result, expected)

def test_number_looking_strings_not_into_datetime(self):
# GH#4601
# These strings don't look like datetimes so they shouldn't be
# attempted to be converted
arr = np.array(['-352.737091', '183.575577'], dtype=object)
result = tslib.array_to_datetime(arr, errors='ignore')
tm.assert_numpy_array_equal(result, arr)

arr = np.array(['1', '2', '3', '4', '5'], dtype=object)
result = tslib.array_to_datetime(arr, errors='ignore')
tm.assert_numpy_array_equal(result, arr)

@pytest.mark.parametrize('invalid_date', [
date(1000, 1, 1),
datetime(1000, 1, 1),
'1000-01-01',
'Jan 1, 1000',
np.datetime64('1000-01-01')])
def test_coerce_outside_ns_bounds(self, invalid_date):
arr = np.array([invalid_date], dtype='object')
with pytest.raises(ValueError):
tslib.array_to_datetime(arr, errors='raise')

result = tslib.array_to_datetime(arr, errors='coerce')
expected = np.array([tslib.iNaT], dtype='M8[ns]')
tm.assert_numpy_array_equal(result, expected)

def test_coerce_outside_ns_bounds_one_valid(self):
arr = np.array(['1/1/1000', '1/1/2000'], dtype=object)
result = tslib.array_to_datetime(arr, errors='coerce')
expected = [tslib.iNaT,
'2000-01-01T00:00:00.000000000-0000']
tm.assert_numpy_array_equal(
result,
np_array_datetime64_compat(expected, dtype='M8[ns]'))

def test_coerce_of_invalid_datetimes(self):
arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object)

# Without coercing, the presence of any invalid dates prevents
# any values from being converted
result = tslib.array_to_datetime(arr, errors='ignore')
tm.assert_numpy_array_equal(result, arr)

# With coercing, the invalid dates becomes iNaT
result = tslib.array_to_datetime(arr, errors='coerce')
expected = ['2013-01-01T00:00:00.000000000-0000',
tslib.iNaT,
tslib.iNaT]

tm.assert_numpy_array_equal(
result,
np_array_datetime64_compat(expected, dtype='M8[ns]'))

def test_to_datetime_barely_out_of_bounds(self):
# GH#19529
# GH#19382 close enough to bounds that dropping nanos would result
# in an in-bounds datetime
arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object)
with pytest.raises(tslib.OutOfBoundsDatetime):
tslib.array_to_datetime(arr)

0 comments on commit 067984a

Please sign in to comment.