diff --git a/.travis.yml b/.travis.yml
index fe1a2950dbf081..0f43e4cf54faa1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -52,10 +52,6 @@ matrix:
- dist: trusty
env:
- JOB="3.5" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true
- addons:
- apt:
- packages:
- - xsel
- dist: trusty
env:
- JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true
@@ -66,7 +62,11 @@ matrix:
# In allow_failures
- dist: trusty
env:
- - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true
+ - JOB="3.6_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true
+ addons:
+ apt:
+ packages:
+ - xsel
# In allow_failures
- dist: trusty
env:
@@ -75,17 +75,17 @@ matrix:
- dist: trusty
env:
- JOB="3.6_DOC" DOC=true
- addons:
- apt:
- packages:
- - xsel
allow_failures:
- dist: trusty
env:
- JOB="2.7_SLOW" SLOW=true
- dist: trusty
env:
- - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true
+ - JOB="3.6_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true
+ addons:
+ apt:
+ packages:
+ - xsel
- dist: trusty
env:
- JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate"
@@ -102,8 +102,6 @@ before_install:
- uname -a
- git --version
- git tag
- - ci/before_install_travis.sh
- - export DISPLAY=":99.0"
install:
- echo "install start"
@@ -114,6 +112,8 @@ install:
before_script:
- ci/install_db_travis.sh
+ - export DISPLAY=":99.0"
+ - ci/before_script_travis.sh
script:
- echo "script start"
diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py
index c112d1ef72eb80..0f8c8458628b15 100644
--- a/asv_bench/benchmarks/timedelta.py
+++ b/asv_bench/benchmarks/timedelta.py
@@ -40,3 +40,46 @@ def setup(self):
def test_add_td_ts(self):
self.td + self.ts
+
+
+class TimedeltaProperties(object):
+ goal_time = 0.2
+
+ def setup(self):
+ self.td = Timedelta(days=365, minutes=35, seconds=25, milliseconds=35)
+
+ def time_timedelta_days(self):
+ self.td.days
+
+ def time_timedelta_seconds(self):
+ self.td.seconds
+
+ def time_timedelta_microseconds(self):
+ self.td.microseconds
+
+ def time_timedelta_nanoseconds(self):
+ self.td.nanoseconds
+
+
+class DatetimeAccessor(object):
+ goal_time = 0.2
+
+ def setup(self):
+ self.N = 100000
+ self.series = pd.Series(
+ pd.timedelta_range('1 days', periods=self.N, freq='h')
+ )
+ def time_dt_accessor(self):
+ self.series.dt
+
+ def time_timedelta_dt_accessor_days(self):
+ self.series.dt.days
+
+ def time_timedelta_dt_accessor_seconds(self):
+ self.series.dt.seconds
+
+ def time_timedelta_dt_accessor_microseconds(self):
+ self.series.dt.microseconds
+
+ def time_timedelta_dt_accessor_nanoseconds(self):
+ self.series.dt.nanoseconds
diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
index 779fc0bd20964a..9614a63332609a 100644
--- a/asv_bench/benchmarks/timeseries.py
+++ b/asv_bench/benchmarks/timeseries.py
@@ -346,17 +346,22 @@ class ToDatetime(object):
def setup(self):
self.rng = date_range(start='1/1/2000', periods=10000, freq='D')
- self.stringsD = Series((((self.rng.year * 10000) + (self.rng.month * 100)) + self.rng.day), dtype=np.int64).apply(str)
+ self.stringsD = Series(self.rng.strftime('%Y%m%d'))
self.rng = date_range(start='1/1/2000', periods=20000, freq='H')
- self.strings = [x.strftime('%Y-%m-%d %H:%M:%S') for x in self.rng]
- self.strings_nosep = [x.strftime('%Y%m%d %H:%M:%S') for x in self.rng]
+ self.strings = self.rng.strftime('%Y-%m-%d %H:%M:%S').tolist()
+ self.strings_nosep = self.rng.strftime('%Y%m%d %H:%M:%S').tolist()
self.strings_tz_space = [x.strftime('%Y-%m-%d %H:%M:%S') + ' -0800'
for x in self.rng]
self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000))
self.s2 = self.s.str.replace(':\\S+$', '')
+ self.unique_numeric_seconds = range(10000)
+ self.dup_numeric_seconds = [1000] * 10000
+ self.dup_string_dates = ['2000-02-11'] * 10000
+ self.dup_string_with_tz = ['2000-02-11 15:00:00-0800'] * 10000
+
def time_format_YYYYMMDD(self):
to_datetime(self.stringsD, format='%Y%m%d')
@@ -381,6 +386,36 @@ def time_format_exact(self):
def time_format_no_exact(self):
to_datetime(self.s, format='%d%b%y', exact=False)
+ def time_cache_true_with_unique_seconds_and_unit(self):
+ to_datetime(self.unique_numeric_seconds, unit='s', cache=True)
+
+ def time_cache_false_with_unique_seconds_and_unit(self):
+ to_datetime(self.unique_numeric_seconds, unit='s', cache=False)
+
+ def time_cache_true_with_dup_seconds_and_unit(self):
+ to_datetime(self.dup_numeric_seconds, unit='s', cache=True)
+
+ def time_cache_false_with_dup_seconds_and_unit(self):
+ to_datetime(self.dup_numeric_seconds, unit='s', cache=False)
+
+ def time_cache_true_with_dup_string_dates(self):
+ to_datetime(self.dup_string_dates, cache=True)
+
+ def time_cache_false_with_dup_string_dates(self):
+ to_datetime(self.dup_string_dates, cache=False)
+
+ def time_cache_true_with_dup_string_dates_and_format(self):
+ to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=True)
+
+ def time_cache_false_with_dup_string_dates_and_format(self):
+ to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=False)
+
+ def time_cache_true_with_dup_string_tzoffset_dates(self):
+ to_datetime(self.dup_string_with_tz, cache=True)
+
+ def time_cache_false_with_dup_string_tzoffset_dates(self):
+ to_datetime(self.dup_string_with_tz, cache=False)
+
class Offsets(object):
goal_time = 0.2
diff --git a/ci/before_install_travis.sh b/ci/before_script_travis.sh
similarity index 93%
rename from ci/before_install_travis.sh
rename to ci/before_script_travis.sh
index 2d0b4da6120dc3..0b3939b1906a24 100755
--- a/ci/before_install_travis.sh
+++ b/ci/before_script_travis.sh
@@ -4,6 +4,7 @@ echo "inside $0"
if [ "${TRAVIS_OS_NAME}" == "linux" ]; then
sh -e /etc/init.d/xvfb start
+ sleep 3
fi
# Never fail because bad things happened here.
diff --git a/ci/requirements-2.7_BUILD_TEST.build b/ci/requirements-2.7_BUILD_TEST.build
deleted file mode 100644
index aadec00cb7ebf2..00000000000000
--- a/ci/requirements-2.7_BUILD_TEST.build
+++ /dev/null
@@ -1,6 +0,0 @@
-python=2.7*
-dateutil
-pytz
-nomkl
-numpy
-cython
diff --git a/ci/requirements-3.6_BUILD_TEST.build b/ci/requirements-3.6_BUILD_TEST.build
new file mode 100644
index 00000000000000..1c4b46aea3865d
--- /dev/null
+++ b/ci/requirements-3.6_BUILD_TEST.build
@@ -0,0 +1,6 @@
+python=3.6*
+python-dateutil
+pytz
+nomkl
+numpy
+cython
diff --git a/ci/requirements-2.7_BUILD_TEST.pip b/ci/requirements-3.6_BUILD_TEST.pip
similarity index 100%
rename from ci/requirements-2.7_BUILD_TEST.pip
rename to ci/requirements-3.6_BUILD_TEST.pip
diff --git a/ci/requirements-2.7_BUILD_TEST.sh b/ci/requirements-3.6_BUILD_TEST.sh
old mode 100755
new mode 100644
similarity index 75%
rename from ci/requirements-2.7_BUILD_TEST.sh
rename to ci/requirements-3.6_BUILD_TEST.sh
index 78941fd0944e57..84dd27c50d587d
--- a/ci/requirements-2.7_BUILD_TEST.sh
+++ b/ci/requirements-3.6_BUILD_TEST.sh
@@ -2,6 +2,6 @@
source activate pandas
-echo "install 27 BUILD_TEST"
+echo "install 36 BUILD_TEST"
conda install -n pandas -c conda-forge pyarrow dask
diff --git a/ci/requirements-3.6_NUMPY_DEV.build.sh b/ci/requirements-3.6_NUMPY_DEV.build.sh
index bc92d8fca6b17f..fd79142c5cebbe 100644
--- a/ci/requirements-3.6_NUMPY_DEV.build.sh
+++ b/ci/requirements-3.6_NUMPY_DEV.build.sh
@@ -12,10 +12,7 @@ PRE_WHEELS="https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf
pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS numpy scipy
# install dateutil from master
-
-# TODO(jreback), temp disable dateutil master has changed
-# pip install -U git+git://github.com/dateutil/dateutil.git
-pip install python-dateutil
+pip install -U git+git://github.com/dateutil/dateutil.git
# cython via pip
pip install cython
diff --git a/ci/script_multi.sh b/ci/script_multi.sh
index ee9fbcaad5ef5f..863613e14af98e 100755
--- a/ci/script_multi.sh
+++ b/ci/script_multi.sh
@@ -27,6 +27,11 @@ if [ "$BUILD_TEST" ]; then
echo "[running]"
cd /tmp
unset PYTHONPATH
+
+ echo "[build-test: single]"
+ python -c 'import pandas; pandas.test(["--skip-slow", "--skip-network", "-r xX", "-m single"])'
+
+ echo "[build-test: not single]"
python -c 'import pandas; pandas.test(["-n 2", "--skip-slow", "--skip-network", "-r xX", "-m not single"])'
elif [ "$DOC" ]; then
diff --git a/doc/source/api.rst b/doc/source/api.rst
index b5cf593ac0d1f9..ce88aed91823c5 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1870,8 +1870,52 @@ Methods
Timedelta.to_timedelta64
Timedelta.total_seconds
+.. _api.frequencies:
+
+Frequencies
+-----------
+
+.. currentmodule:: pandas.tseries.frequencies
+
+
+.. autosummary::
+ :toctree: generated/
+
+ to_offset
+
+.. _api.offsets:
+
+Offsets
+-------
+
+.. currentmodule:: pandas.tseries.offsets
+
+.. autosummary::
+ :toctree: generated/
+
+ DateOffset
+ Week
+ Day
+ Hour
+ Minute
+ Second
+ Milli
+ Micro
+ Nano
+
+.. autosummary::
+ :toctree: generated/
+
+ MonthBegin
+ MonthEnd
+ QuarterBegin
+ QuarterEnd
+ YearBegin
+ YearEnd
+
Window
------
+
.. currentmodule:: pandas.core.window
Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc.
diff --git a/doc/source/io.rst b/doc/source/io.rst
index 36f216601b4911..c94d5bc75d4fcc 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -4427,8 +4427,10 @@ Several caveats.
- This is a newer library, and the format, though stable, is not guaranteed to be backward compatible
to the earlier versions.
-- The format will NOT write an ``Index``, or ``MultiIndex`` for the ``DataFrame`` and will raise an
- error if a non-default one is provided. You can simply ``.reset_index()`` in order to store the index.
+- The format will NOT write an ``Index``, or ``MultiIndex`` for the
+ ``DataFrame`` and will raise an error if a non-default one is provided. You
+ can ``.reset_index()`` to store the index or ``.reset_index(drop=True)`` to
+ ignore it.
- Duplicate column names and non-string columns names are not supported
- Non supported types include ``Period`` and actual python object types. These will raise a helpful error message
on an attempt at serialization.
@@ -4491,8 +4493,10 @@ dtypes, including extension dtypes such as datetime with tz.
Several caveats.
-- The format will NOT write an ``Index``, or ``MultiIndex`` for the ``DataFrame`` and will raise an
- error if a non-default one is provided. You can simply ``.reset_index(drop=True)`` in order to store the index.
+- The format will NOT write an ``Index``, or ``MultiIndex`` for the
+ ``DataFrame`` and will raise an error if a non-default one is provided. You
+ can ``.reset_index()`` to store the index or ``.reset_index(drop=True)`` to
+ ignore it.
- Duplicate column names and non-string columns names are not supported
- Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype.
- Non supported types include ``Period`` and actual python object types. These will raise a helpful error message
@@ -4538,7 +4542,7 @@ Read from a parquet file.
result.dtypes
-Read only certain columns of a parquet file.
+Read only certain columns of a parquet file.
.. ipython:: python
diff --git a/doc/source/release.rst b/doc/source/release.rst
index 6c3e7f847b485a..a3289b11448631 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -52,7 +52,7 @@ Highlights include:
- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` function and :meth:`DataFrame.to_parquet` method, see :ref:`here `.
- New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying
categoricals independent of the data, see :ref:`here `.
-- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, see :ref:`here `.
+- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here `.
- Compatibility fixes for pypy, see :ref:`here `.
- Additions to the ``drop``, ``reindex`` and ``rename`` API to make them more consistent, see :ref:`here `.
- Addition of the new methods ``DataFrame.infer_objects`` (see :ref:`here `) and ``GroupBy.pipe`` (see :ref:`here `).
diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst
index 3385bafc264677..64cbe0b050a619 100644
--- a/doc/source/whatsnew.rst
+++ b/doc/source/whatsnew.rst
@@ -18,6 +18,10 @@ What's New
These are new features and improvements of note in each release.
+.. include:: whatsnew/v0.22.0.txt
+
+.. include:: whatsnew/v0.21.1.txt
+
.. include:: whatsnew/v0.21.0.txt
.. include:: whatsnew/v0.20.3.txt
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 4c460eeb85b82a..89e2d3006696c5 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -12,7 +12,7 @@ Highlights include:
- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` function and :meth:`DataFrame.to_parquet` method, see :ref:`here `.
- New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying
categoricals independent of the data, see :ref:`here `.
-- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, see :ref:`here `.
+- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here `.
- Compatibility fixes for pypy, see :ref:`here `.
- Additions to the ``drop``, ``reindex`` and ``rename`` API to make them more consistent, see :ref:`here `.
- Addition of the new methods ``DataFrame.infer_objects`` (see :ref:`here `) and ``GroupBy.pipe`` (see :ref:`here `).
@@ -369,11 +369,11 @@ Additionally, support has been dropped for Python 3.4 (:issue:`15251`).
.. _whatsnew_0210.api_breaking.bottleneck:
-Sum/Prod of all-NaN Series/DataFrames is now consistently NaN
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Sum/Prod of all-NaN or empty Series/DataFrames is now consistently NaN
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames no longer depends on
-whether `bottleneck `__ is installed. (:issue:`9422`, :issue:`15507`).
+whether `bottleneck `__ is installed, and return value of ``sum`` and ``prod`` on an empty Series has changed (:issue:`9422`, :issue:`15507`).
Calling ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of a ``DataFrame``, will result in ``NaN``. See the :ref:`docs `.
@@ -381,35 +381,35 @@ Calling ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of
s = Series([np.nan])
-Previously NO ``bottleneck``
+Previously WITHOUT ``bottleneck`` installed:
.. code-block:: ipython
In [2]: s.sum()
Out[2]: np.nan
-Previously WITH ``bottleneck``
+Previously WITH ``bottleneck``:
.. code-block:: ipython
In [2]: s.sum()
Out[2]: 0.0
-New Behavior, without regard to the bottleneck installation.
+New Behavior, without regard to the bottleneck installation:
.. ipython:: python
s.sum()
-Note that this also changes the sum of an empty ``Series``
-
-Previously regardless of ``bottlenck``
+Note that this also changes the sum of an empty ``Series``. Previously this always returned 0 regardless of a ``bottlenck`` installation:
.. code-block:: ipython
In [1]: pd.Series([]).sum()
Out[1]: 0
+but for consistency with the all-NaN case, this was changed to return NaN as well:
+
.. ipython:: python
pd.Series([]).sum()
@@ -877,6 +877,28 @@ New Behavior:
pd.interval_range(start=0, end=4)
+.. _whatsnew_0210.api.mpl_converters:
+
+No Automatic Matplotlib Converters
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Pandas no longer registers our ``date``, ``time``, ``datetime``,
+``datetime64``, and ``Period`` converters with matplotlib when pandas is
+imported. Matplotlib plot methods (``plt.plot``, ``ax.plot``, ...), will not
+nicely format the x-axis for ``DatetimeIndex`` or ``PeriodIndex`` values. You
+must explicitly register these methods:
+
+.. ipython:: python
+
+ from pandas.tseries import converter
+ converter.register()
+
+ fig, ax = plt.subplots()
+ plt.plot(pd.date_range('2017', periods=6), range(6))
+
+Pandas built-in ``Series.plot`` and ``DataFrame.plot`` *will* register these
+converters on first-use (:issue:17710).
+
.. _whatsnew_0210.api:
Other API Changes
@@ -900,8 +922,6 @@ Other API Changes
- Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`)
- Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`)
- Restricted DateOffset keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`).
-- Pandas no longer registers matplotlib converters on import. The converters
- will be registered and used when the first plot is draw (:issue:`17710`)
.. _whatsnew_0210.deprecations:
diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt
index 0f6135ca2f045c..62d83069940535 100644
--- a/doc/source/whatsnew/v0.21.1.txt
+++ b/doc/source/whatsnew/v0.21.1.txt
@@ -61,7 +61,7 @@ Bug Fixes
- Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`)
- Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`)
- Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`)
-- Bug in ``pd.Categorical.unique()`` returning read-only array when all categories were ``NaN`` (:issue:`18051`)
+- Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`)
Conversion
^^^^^^^^^^
@@ -73,7 +73,8 @@ Conversion
Indexing
^^^^^^^^
--
+- Bug in a boolean comparison of a ``datetime.datetime`` and a ``datetime64[ns]`` dtype Series (:issue:`17965`)
+- Bug where a ``MultiIndex`` with more than a million records was not raising ``AttributeError`` when trying to access a missing attribute (:issue:`18165`)
-
-
@@ -85,6 +86,7 @@ I/O
- Bug in :func:`read_csv` for handling null values in index columns when specifying ``na_filter=False`` (:issue:`5239`)
- Bug in :meth:`DataFrame.to_csv` when the table had ``MultiIndex`` columns, and a list of strings was passed in for ``header`` (:issue:`5539`)
- :func:`read_parquet` now allows to specify the columns to read from a parquet file (:issue:`18154`)
+- :func:`read_parquet` now allows to specify kwargs which are passed to the respective engine (:issue:`18216`)
Plotting
^^^^^^^^
@@ -128,6 +130,7 @@ Categorical
- Error messages in the testing module have been improved when items have
different ``CategoricalDtype`` (:issue:`18069`)
- ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`)
+- Bug in ``Categorical.unique()`` returning read-only array when all categories were ``NaN`` (:issue:`18051`)
Other
^^^^^
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
index 943b6bb84fb47b..8afdd1b2e22b37 100644
--- a/doc/source/whatsnew/v0.22.0.txt
+++ b/doc/source/whatsnew/v0.22.0.txt
@@ -22,8 +22,8 @@ New features
Other Enhancements
^^^^^^^^^^^^^^^^^^
-- Better support for ``Dataframe.style.to_excel()`` output with the ``xlsxwriter`` engine. (:issue:`16149`)
--
+- Better support for :func:`Dataframe.style.to_excel` output with the ``xlsxwriter`` engine. (:issue:`16149`)
+- :func:`pandas.tseries.frequencies.to_offset` now accepts leading '+' signs e.g. '+1h'. (:issue:`18171`)
-
.. _whatsnew_0220.api_breaking:
@@ -41,10 +41,11 @@ Other API Changes
^^^^^^^^^^^^^^^^^
- ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`)
-- All-NaN levels in ``MultiIndex`` are now assigned float rather than object dtype, coherently with flat indexes (:issue:`17929`).
-- :class:`Timestamp` will no longer silently ignore unused or invalid `tz` or `tzinfo` keyword arguments (:issue:`17690`)
-- :class:`Timestamp` will no longer silently ignore invalid `freq` arguments (:issue:`5168`)
-- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the `tseries.offsets` module (:issue:`17830`)
+- All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`).
+- :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`)
+- :class:`Timestamp` will no longer silently ignore invalid ``freq`` arguments (:issue:`5168`)
+- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
+- `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
.. _whatsnew_0220.deprecations:
@@ -60,8 +61,8 @@ Deprecations
Removal of prior version deprecations/changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
--
+- Warnings against the obsolete usage ``Categorical(codes, categories)``, which were emitted for instance when the first two arguments to ``Categorical()`` had different dtypes, and recommended the use of ``Categorical.from_codes``, have now been removed (:issue:`8074`)
+- The ``levels`` and ``labels`` attributes of a ``MultiIndex`` can no longer be set directly (:issue:`4039`).
-
.. _whatsnew_0220.performance:
@@ -69,8 +70,10 @@ Removal of prior version deprecations/changes
Performance Improvements
~~~~~~~~~~~~~~~~~~~~~~~~
-- Indexers on Series or DataFrame no longer create a reference cycle (:issue:`17956`)
--
+- Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`)
+- Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`)
+- :class`DateOffset` arithmetic performance is improved (:issue:`18218`)
+- Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`)
-
.. _whatsnew_0220.docs:
@@ -87,9 +90,6 @@ Documentation Changes
Bug Fixes
~~~~~~~~~
-- Bug in ``pd.read_msgpack()`` with a non existent file is passed in Python 2 (:issue:`15296`)
-- Bug in ``DataFrame.groupby`` where key as tuple in a ``MultiIndex`` were interpreted as a list of keys (:issue:`17979`)
-- Bug in :func:`pd.read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`)
Conversion
^^^^^^^^^^
@@ -101,7 +101,8 @@ Conversion
Indexing
^^^^^^^^
-- Bug in :func:`PeriodIndex.truncate` which raises ``TypeError`` when ``PeriodIndex`` is monotonic (:issue:`17717`)
+- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`)
+- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`)
-
-
@@ -109,6 +110,9 @@ I/O
^^^
- :func:`read_html` now rewinds seekable IO objects after parse failure, before attempting to parse with a new parser. If a parser errors and the object is non-seekable, an informative error is raised suggesting the use of a different parser (:issue:`17975`)
+- Bug in :func:`read_msgpack` with a non existent file is passed in Python 2 (:issue:`15296`)
+- Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`)
+- Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`)
-
-
@@ -157,6 +161,6 @@ Categorical
Other
^^^^^
--
+- Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`)
-
-
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 2fbbc81c4b5a12..e1312a40971f0d 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
# cython: profile=False
cimport numpy as cnp
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 78eb7b3ae483e1..f5d8a0da0112b3 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -19,7 +19,7 @@ from hashtable cimport HashTable
from pandas._libs import algos, period as periodlib, hashtable as _hash
from pandas._libs.tslib import Timestamp, Timedelta
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, date
from cpython cimport PyTuple_Check, PyList_Check
@@ -549,7 +549,7 @@ cpdef convert_scalar(ndarray arr, object value):
if arr.descr.type_num == NPY_DATETIME:
if isinstance(value, np.ndarray):
pass
- elif isinstance(value, datetime):
+ elif isinstance(value, (datetime, np.datetime64, date)):
return Timestamp(value).value
elif value is None or value != value:
return iNaT
diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
index 72523a19b95952..bd21fb97ede206 100644
--- a/pandas/_libs/period.pyx
+++ b/pandas/_libs/period.pyx
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
+# cython: profile=False
from datetime import datetime, date, timedelta
import operator
@@ -27,14 +28,16 @@ from util cimport is_period_object, is_string_object, INT32_MIN
from lib cimport is_null_datetimelike
from pandas._libs import tslib
-from pandas._libs.tslib import Timestamp, iNaT, NaT
+from pandas._libs.tslib import Timestamp, iNaT
from tslibs.timezones cimport (
is_utc, is_tzlocal, get_utcoffset, get_dst_info, maybe_get_tz)
from tslibs.timedeltas cimport delta_to_nanoseconds
-from tslibs.parsing import parse_time_string, NAT_SENTINEL
+from tslibs.parsing import (parse_time_string, NAT_SENTINEL,
+ _get_rule_month, _MONTH_NUMBERS)
from tslibs.frequencies cimport get_freq_code
-from tslibs.nattype import nat_strings
+from tslibs.resolution import resolution, Resolution
+from tslibs.nattype import nat_strings, NaT
from tslibs.nattype cimport _nat_scalar_rules
from pandas.tseries import offsets
@@ -42,13 +45,6 @@ from pandas.tseries import frequencies
cdef int64_t NPY_NAT = util.get_nat()
-cdef int RESO_US = frequencies.RESO_US
-cdef int RESO_MS = frequencies.RESO_MS
-cdef int RESO_SEC = frequencies.RESO_SEC
-cdef int RESO_MIN = frequencies.RESO_MIN
-cdef int RESO_HR = frequencies.RESO_HR
-cdef int RESO_DAY = frequencies.RESO_DAY
-
cdef extern from "period_helper.h":
ctypedef struct date_info:
int64_t absdate
@@ -487,98 +483,10 @@ def extract_freq(ndarray[object] values):
raise ValueError('freq not specified and cannot be inferred')
-cpdef resolution(ndarray[int64_t] stamps, tz=None):
- cdef:
- Py_ssize_t i, n = len(stamps)
- pandas_datetimestruct dts
- int reso = RESO_DAY, curr_reso
-
- if tz is not None:
- tz = maybe_get_tz(tz)
- return _reso_local(stamps, tz)
- else:
- for i in range(n):
- if stamps[i] == NPY_NAT:
- continue
- dt64_to_dtstruct(stamps[i], &dts)
- curr_reso = _reso_stamp(&dts)
- if curr_reso < reso:
- reso = curr_reso
- return reso
-
-
-cdef inline int _reso_stamp(pandas_datetimestruct *dts):
- if dts.us != 0:
- if dts.us % 1000 == 0:
- return RESO_MS
- return RESO_US
- elif dts.sec != 0:
- return RESO_SEC
- elif dts.min != 0:
- return RESO_MIN
- elif dts.hour != 0:
- return RESO_HR
- return RESO_DAY
-
-cdef _reso_local(ndarray[int64_t] stamps, object tz):
- cdef:
- Py_ssize_t n = len(stamps)
- int reso = RESO_DAY, curr_reso
- ndarray[int64_t] trans, deltas, pos
- pandas_datetimestruct dts
-
- if is_utc(tz):
- for i in range(n):
- if stamps[i] == NPY_NAT:
- continue
- dt64_to_dtstruct(stamps[i], &dts)
- curr_reso = _reso_stamp(&dts)
- if curr_reso < reso:
- reso = curr_reso
- elif is_tzlocal(tz):
- for i in range(n):
- if stamps[i] == NPY_NAT:
- continue
- dt64_to_dtstruct(stamps[i], &dts)
- dt = datetime(dts.year, dts.month, dts.day, dts.hour,
- dts.min, dts.sec, dts.us, tz)
- delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000
- dt64_to_dtstruct(stamps[i] + delta, &dts)
- curr_reso = _reso_stamp(&dts)
- if curr_reso < reso:
- reso = curr_reso
- else:
- # Adjust datetime64 timestamp, recompute datetimestruct
- trans, deltas, typ = get_dst_info(tz)
-
- _pos = trans.searchsorted(stamps, side='right') - 1
- if _pos.dtype != np.int64:
- _pos = _pos.astype(np.int64)
- pos = _pos
-
- # statictzinfo
- if typ not in ['pytz', 'dateutil']:
- for i in range(n):
- if stamps[i] == NPY_NAT:
- continue
- dt64_to_dtstruct(stamps[i] + deltas[0], &dts)
- curr_reso = _reso_stamp(&dts)
- if curr_reso < reso:
- reso = curr_reso
- else:
- for i in range(n):
- if stamps[i] == NPY_NAT:
- continue
- dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts)
- curr_reso = _reso_stamp(&dts)
- if curr_reso < reso:
- reso = curr_reso
-
- return reso
-
-
+# -----------------------------------------------------------------------
# period helpers
+
cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
int freq, object tz):
cdef:
@@ -1191,7 +1099,7 @@ class Period(_Period):
if freq is None:
try:
- freq = frequencies.Resolution.get_freq(reso)
+ freq = Resolution.get_freq(reso)
except KeyError:
raise ValueError(
"Invalid frequency or could not infer: %s" % reso)
@@ -1236,7 +1144,7 @@ def _quarter_to_myear(year, quarter, freq):
if quarter <= 0 or quarter > 4:
raise ValueError('Quarter must be 1 <= q <= 4')
- mnum = tslib._MONTH_NUMBERS[tslib._get_rule_month(freq)] + 1
+ mnum = _MONTH_NUMBERS[_get_rule_month(freq)] + 1
month = (mnum + (quarter - 1) * 3) % 12 + 1
if month > mnum:
year -= 1
diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx
index 374da8067eedd3..4beb24f07c21cc 100644
--- a/pandas/_libs/properties.pyx
+++ b/pandas/_libs/properties.pyx
@@ -63,7 +63,14 @@ cdef class AxisProperty(object):
self.axis = axis
def __get__(self, obj, type):
- cdef list axes = obj._data.axes
+ cdef:
+ list axes
+
+ if obj is None:
+ # Only instances have _data, not classes
+ return None
+ else:
+ axes = obj._data.axes
return axes[self.axis]
def __set__(self, obj, value):
diff --git a/pandas/_libs/src/datetime/np_datetime.c b/pandas/_libs/src/datetime/np_datetime.c
index f8254ed9d84180..7278cbaff86caa 100644
--- a/pandas/_libs/src/datetime/np_datetime.c
+++ b/pandas/_libs/src/datetime/np_datetime.c
@@ -562,6 +562,17 @@ void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr,
convert_datetime_to_datetimestruct(&meta, val, result);
}
+void pandas_timedelta_to_timedeltastruct(npy_timedelta val,
+ PANDAS_DATETIMEUNIT fr,
+ pandas_timedeltastruct *result) {
+ pandas_datetime_metadata meta;
+
+ meta.base = fr;
+ meta.num - 1;
+
+ convert_timedelta_to_timedeltastruct(&meta, val, result);
+}
+
PANDAS_DATETIMEUNIT get_datetime64_unit(PyObject *obj) {
return (PANDAS_DATETIMEUNIT)((PyDatetimeScalarObject *)obj)->obmeta.base;
}
@@ -980,3 +991,107 @@ int convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
return 0;
}
+
+/*
+ * Converts a timedelta from a timedeltastruct to a timedelta based
+ * on some metadata. The timedelta is assumed to be valid.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+int convert_timedelta_to_timedeltastruct(pandas_timedelta_metadata *meta,
+ npy_timedelta td,
+ pandas_timedeltastruct *out) {
+ npy_int64 perday;
+ npy_int64 frac;
+ npy_int64 sfrac;
+ npy_int64 ifrac;
+ int sign;
+ npy_int64 DAY_NS = 86400000000000LL;
+
+ /* Initialize the output to all zeros */
+ memset(out, 0, sizeof(pandas_timedeltastruct));
+
+ switch (meta->base) {
+ case PANDAS_FR_ns:
+
+ // put frac in seconds
+ if (td < 0 && td % (1000LL * 1000LL * 1000LL) != 0)
+ frac = td / (1000LL * 1000LL * 1000LL) - 1;
+ else
+ frac = td / (1000LL * 1000LL * 1000LL);
+
+ if (frac < 0) {
+ sign = -1;
+
+ // even fraction
+ if ((-frac % 86400LL) != 0) {
+ out->days = -frac / 86400LL + 1;
+ frac += 86400LL * out->days;
+ } else {
+ frac = -frac;
+ }
+ } else {
+ sign = 1;
+ out->days = 0;
+ }
+
+ if (frac >= 86400) {
+ out->days += frac / 86400LL;
+ frac -= out->days * 86400LL;
+ }
+
+ if (frac >= 3600) {
+ out->hrs = frac / 3600LL;
+ frac -= out->hrs * 3600LL;
+ } else {
+ out->hrs = 0;
+ }
+
+ if (frac >= 60) {
+ out->min = frac / 60LL;
+ frac -= out->min * 60LL;
+ } else {
+ out->min = 0;
+ }
+
+ if (frac >= 0) {
+ out->sec = frac;
+ frac -= out->sec;
+ } else {
+ out->sec = 0;
+ }
+
+ sfrac = (out->hrs * 3600LL + out->min * 60LL
+ + out->sec) * (1000LL * 1000LL * 1000LL);
+
+ if (sign < 0)
+ out->days = -out->days;
+
+ ifrac = td - (out->days * DAY_NS + sfrac);
+
+ if (ifrac != 0) {
+ out->ms = ifrac / (1000LL * 1000LL);
+ ifrac -= out->ms * 1000LL * 1000LL;
+ out->us = ifrac / 1000LL;
+ ifrac -= out->us * 1000LL;
+ out->ns = ifrac;
+ } else {
+ out->ms = 0;
+ out->us = 0;
+ out->ns = 0;
+ }
+
+ out->seconds = out->hrs * 3600 + out->min * 60 + out->sec;
+ out->microseconds = out->ms * 1000 + out->us;
+ out->nanoseconds = out->ns;
+ break;
+
+ default:
+ PyErr_SetString(PyExc_RuntimeError,
+ "NumPy datetime metadata is corrupted with invalid "
+ "base unit");
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/pandas/_libs/src/datetime/np_datetime.h b/pandas/_libs/src/datetime/np_datetime.h
index af3d2e0f01c1b5..c51a4bddac82f0 100644
--- a/pandas/_libs/src/datetime/np_datetime.h
+++ b/pandas/_libs/src/datetime/np_datetime.h
@@ -49,11 +49,18 @@ typedef struct {
npy_int32 month, day, hour, min, sec, us, ps, as;
} pandas_datetimestruct;
+typedef struct {
+ npy_int64 days;
+ npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds;
+} pandas_timedeltastruct;
+
typedef struct {
PANDAS_DATETIMEUNIT base;
int num;
} pandas_datetime_metadata;
+typedef pandas_datetime_metadata pandas_timedelta_metadata;
+
extern const pandas_datetimestruct _NS_MIN_DTS;
extern const pandas_datetimestruct _NS_MAX_DTS;
@@ -71,6 +78,10 @@ npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr,
void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr,
pandas_datetimestruct *result);
+void pandas_timedelta_to_timedeltastruct(npy_timedelta val,
+ PANDAS_DATETIMEUNIT fr,
+ pandas_timedeltastruct *result);
+
int dayofweek(int y, int m, int d);
extern const int days_per_month_table[2][12];
@@ -131,6 +142,11 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta,
npy_datetime dt,
pandas_datetimestruct *out);
+int
+convert_timedelta_to_timedeltastruct(pandas_timedelta_metadata *meta,
+ npy_timedelta td,
+ pandas_timedeltastruct *out);
+
PANDAS_DATETIMEUNIT get_datetime64_unit(PyObject *obj);
diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx
index f2edf48a6b8295..c432c40c8f6b3b 100644
--- a/pandas/_libs/src/inference.pyx
+++ b/pandas/_libs/src/inference.pyx
@@ -613,7 +613,7 @@ cdef class Validator:
self.dtype = dtype
self.skipna = skipna
- cdef bint validate(self, object[:] values) except -1:
+ cdef bint validate(self, ndarray values) except -1:
if not self.n:
return False
@@ -629,7 +629,7 @@ cdef class Validator:
@cython.wraparound(False)
@cython.boundscheck(False)
- cdef bint _validate(self, object[:] values) except -1:
+ cdef bint _validate(self, ndarray values) except -1:
cdef:
Py_ssize_t i
Py_ssize_t n = self.n
@@ -642,7 +642,7 @@ cdef class Validator:
@cython.wraparound(False)
@cython.boundscheck(False)
- cdef bint _validate_skipna(self, object[:] values) except -1:
+ cdef bint _validate_skipna(self, ndarray values) except -1:
cdef:
Py_ssize_t i
Py_ssize_t n = self.n
@@ -852,7 +852,7 @@ cdef class DatetimeValidator(TemporalValidator):
return is_null_datetime64(value)
-cpdef bint is_datetime_array(ndarray[object] values):
+cpdef bint is_datetime_array(ndarray values):
cdef:
DatetimeValidator validator = DatetimeValidator(
len(values),
@@ -876,7 +876,7 @@ cpdef bint is_datetime64_array(ndarray values):
return validator.validate(values)
-cpdef bint is_datetime_with_singletz_array(ndarray[object] values):
+cpdef bint is_datetime_with_singletz_array(ndarray values):
"""
Check values have the same tzinfo attribute.
Doesn't check values are datetime-like types.
@@ -959,7 +959,7 @@ cdef class DateValidator(Validator):
return is_date(value)
-cpdef bint is_date_array(ndarray[object] values, bint skipna=False):
+cpdef bint is_date_array(ndarray values, bint skipna=False):
cdef DateValidator validator = DateValidator(len(values), skipna=skipna)
return validator.validate(values)
@@ -970,7 +970,7 @@ cdef class TimeValidator(Validator):
return is_time(value)
-cpdef bint is_time_array(ndarray[object] values, bint skipna=False):
+cpdef bint is_time_array(ndarray values, bint skipna=False):
cdef TimeValidator validator = TimeValidator(len(values), skipna=skipna)
return validator.validate(values)
@@ -984,7 +984,7 @@ cdef class PeriodValidator(TemporalValidator):
return is_null_period(value)
-cpdef bint is_period_array(ndarray[object] values):
+cpdef bint is_period_array(ndarray values):
cdef PeriodValidator validator = PeriodValidator(len(values), skipna=True)
return validator.validate(values)
@@ -995,7 +995,7 @@ cdef class IntervalValidator(Validator):
return is_interval(value)
-cpdef bint is_interval_array(ndarray[object] values):
+cpdef bint is_interval_array(ndarray values):
cdef:
IntervalValidator validator = IntervalValidator(
len(values),
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index bf22a3a528259b..540a081bdda2ec 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -50,6 +50,7 @@ from datetime cimport (
# stdlib datetime imports
from datetime import time as datetime_time
+
from tslibs.np_datetime cimport (check_dts_bounds,
reverse_ops,
cmp_scalar,
@@ -61,12 +62,6 @@ from tslibs.np_datetime cimport (check_dts_bounds,
get_timedelta64_value)
from tslibs.np_datetime import OutOfBoundsDatetime
-from khash cimport (
- khiter_t,
- kh_destroy_int64, kh_put_int64,
- kh_init_int64, kh_int64_t,
- kh_resize_int64, kh_get_int64)
-
from .tslibs.parsing import parse_datetime_string
cimport cython
@@ -97,9 +92,8 @@ from tslibs.conversion cimport (tz_convert_single, _TSObject,
convert_to_tsobject,
convert_datetime_to_tsobject,
get_datetime64_nanos)
-from tslibs.conversion import (
- tz_localize_to_utc, tz_convert,
- tz_convert_single)
+from tslibs.conversion import (tz_localize_to_utc,
+ tz_convert_single, date_normalize)
from tslibs.nattype import NaT, nat_strings
from tslibs.nattype cimport _checknull_with_nat
@@ -878,33 +872,6 @@ Timestamp.min = Timestamp(_NS_LOWER_BOUND)
Timestamp.max = Timestamp(_NS_UPPER_BOUND)
-# ----------------------------------------------------------------------
-# Frequency inference
-
-def unique_deltas(ndarray[int64_t] arr):
- cdef:
- Py_ssize_t i, n = len(arr)
- int64_t val
- khiter_t k
- kh_int64_t *table
- int ret = 0
- list uniques = []
-
- table = kh_init_int64()
- kh_resize_int64(table, 10)
- for i in range(n - 1):
- val = arr[i + 1] - arr[i]
- k = kh_get_int64(table, val)
- if k == table.n_buckets:
- kh_put_int64(table, val, &ret)
- uniques.append(val)
- kh_destroy_int64(table)
-
- result = np.array(uniques, dtype=np.int64)
- result.sort()
- return result
-
-
cdef str _NDIM_STRING = "ndim"
# This is PITA. Because we inherit from datetime, which has very specific
@@ -1389,27 +1356,6 @@ _MONTH_NUMBERS = {k: i for i, k in enumerate(_MONTHS)}
_MONTH_ALIASES = {(k + 1): v for k, v in enumerate(_MONTHS)}
-cpdef object _get_rule_month(object source, object default='DEC'):
- """
- Return starting month of given freq, default is December.
-
- Example
- -------
- >>> _get_rule_month('D')
- 'DEC'
-
- >>> _get_rule_month('A-JAN')
- 'JAN'
- """
- if hasattr(source, 'freqstr'):
- source = source.freqstr
- source = source.upper()
- if '-' not in source:
- return default
- else:
- return source.split('-')[1]
-
-
cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
"""
convert the ndarray according to the unit
@@ -1849,26 +1795,6 @@ cdef inline _to_i8(object val):
return val
-cpdef pydt_to_i8(object pydt):
- """
- Convert to int64 representation compatible with numpy datetime64; converts
- to UTC
- """
- cdef:
- _TSObject ts
-
- ts = convert_to_tsobject(pydt, None, None, 0, 0)
-
- return ts.value
-
-
-def i8_to_pydt(int64_t i8, object tzinfo=None):
- """
- Inverse of pydt_to_i8
- """
- return Timestamp(i8)
-
-
# ----------------------------------------------------------------------
# Accessors
@@ -1892,130 +1818,6 @@ def get_time_micros(ndarray[int64_t] dtindex):
return micros
-cdef int64_t DAY_NS = 86400000000000LL
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def date_normalize(ndarray[int64_t] stamps, tz=None):
- cdef:
- Py_ssize_t i, n = len(stamps)
- pandas_datetimestruct dts
- ndarray[int64_t] result = np.empty(n, dtype=np.int64)
-
- if tz is not None:
- tz = maybe_get_tz(tz)
- result = _normalize_local(stamps, tz)
- else:
- with nogil:
- for i in range(n):
- if stamps[i] == NPY_NAT:
- result[i] = NPY_NAT
- continue
- dt64_to_dtstruct(stamps[i], &dts)
- result[i] = _normalized_stamp(&dts)
-
- return result
-
-
-@cython.wraparound(False)
-@cython.boundscheck(False)
-cdef _normalize_local(ndarray[int64_t] stamps, object tz):
- cdef:
- Py_ssize_t n = len(stamps)
- ndarray[int64_t] result = np.empty(n, dtype=np.int64)
- ndarray[int64_t] trans, deltas, pos
- pandas_datetimestruct dts
-
- if is_utc(tz):
- with nogil:
- for i in range(n):
- if stamps[i] == NPY_NAT:
- result[i] = NPY_NAT
- continue
- dt64_to_dtstruct(stamps[i], &dts)
- result[i] = _normalized_stamp(&dts)
- elif is_tzlocal(tz):
- for i in range(n):
- if stamps[i] == NPY_NAT:
- result[i] = NPY_NAT
- continue
- dt64_to_dtstruct(stamps[i], &dts)
- dt = datetime(dts.year, dts.month, dts.day, dts.hour,
- dts.min, dts.sec, dts.us, tz)
- delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000
- dt64_to_dtstruct(stamps[i] + delta, &dts)
- result[i] = _normalized_stamp(&dts)
- else:
- # Adjust datetime64 timestamp, recompute datetimestruct
- trans, deltas, typ = get_dst_info(tz)
-
- _pos = trans.searchsorted(stamps, side='right') - 1
- if _pos.dtype != np.int64:
- _pos = _pos.astype(np.int64)
- pos = _pos
-
- # statictzinfo
- if typ not in ['pytz', 'dateutil']:
- for i in range(n):
- if stamps[i] == NPY_NAT:
- result[i] = NPY_NAT
- continue
- dt64_to_dtstruct(stamps[i] + deltas[0], &dts)
- result[i] = _normalized_stamp(&dts)
- else:
- for i in range(n):
- if stamps[i] == NPY_NAT:
- result[i] = NPY_NAT
- continue
- dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts)
- result[i] = _normalized_stamp(&dts)
-
- return result
-
-cdef inline int64_t _normalized_stamp(pandas_datetimestruct *dts) nogil:
- dts.hour = 0
- dts.min = 0
- dts.sec = 0
- dts.us = 0
- dts.ps = 0
- return dtstruct_to_dt64(dts)
-
-
-def dates_normalized(ndarray[int64_t] stamps, tz=None):
- cdef:
- Py_ssize_t i, n = len(stamps)
- ndarray[int64_t] trans, deltas
- pandas_datetimestruct dts
-
- if tz is None or is_utc(tz):
- for i in range(n):
- dt64_to_dtstruct(stamps[i], &dts)
- if (dts.hour + dts.min + dts.sec + dts.us) > 0:
- return False
- elif is_tzlocal(tz):
- for i in range(n):
- dt64_to_dtstruct(stamps[i], &dts)
- dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min,
- dts.sec, dts.us, tz)
- dt = dt + tz.utcoffset(dt)
- if (dt.hour + dt.minute + dt.second + dt.microsecond) > 0:
- return False
- else:
- trans, deltas, typ = get_dst_info(tz)
-
- for i in range(n):
- # Adjust datetime64 timestamp, recompute datetimestruct
- pos = trans.searchsorted(stamps[i]) - 1
- inf = tz._transition_info[pos]
-
- dt64_to_dtstruct(stamps[i] + deltas[pos], &dts)
- if (dts.hour + dts.min + dts.sec + dts.us) > 0:
- return False
-
- return True
-
-
# ----------------------------------------------------------------------
# Some general helper functions
diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd
index 843a688a2630c9..ad817ce8852f25 100644
--- a/pandas/_libs/tslibs/conversion.pxd
+++ b/pandas/_libs/tslibs/conversion.pxd
@@ -26,3 +26,5 @@ cdef void _localize_tso(_TSObject obj, object tz)
cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2)
cdef int64_t get_datetime64_nanos(object val) except? -1
+
+cpdef int64_t pydt_to_i8(object pydt) except? -1
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index 61efc865112a9d..c175769dc725ee 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -53,7 +53,6 @@ UTC = pytz.UTC
# ----------------------------------------------------------------------
# Misc Helpers
-
# TODO: How to declare np.datetime64 as the input type?
cdef inline int64_t get_datetime64_nanos(object val) except? -1:
"""
@@ -90,6 +89,27 @@ cdef class _TSObject:
return self.value
+cpdef int64_t pydt_to_i8(object pydt) except? -1:
+ """
+ Convert to int64 representation compatible with numpy datetime64; converts
+ to UTC
+
+ Parameters
+ ----------
+ pydt : object
+
+ Returns
+ -------
+ i8value : np.int64
+ """
+ cdef:
+ _TSObject ts
+
+ ts = convert_to_tsobject(pydt, None, None, 0, 0)
+
+ return ts.value
+
+
cdef convert_to_tsobject(object ts, object tz, object unit,
bint dayfirst, bint yearfirst):
"""
@@ -334,18 +354,18 @@ cdef inline void _localize_tso(_TSObject obj, object tz):
Py_ssize_t delta, posn
datetime dt
+ assert obj.tzinfo is None
+
if is_utc(tz):
- obj.tzinfo = tz
+ pass
+ elif obj.value == NPY_NAT:
+ pass
elif is_tzlocal(tz):
dt64_to_dtstruct(obj.value, &obj.dts)
dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour,
obj.dts.min, obj.dts.sec, obj.dts.us, tz)
delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000
- if obj.value != NPY_NAT:
- dt64_to_dtstruct(obj.value + delta, &obj.dts)
- else:
- dt64_to_dtstruct(obj.value, &obj.dts)
- obj.tzinfo = tz
+ dt64_to_dtstruct(obj.value + delta, &obj.dts)
else:
# Adjust datetime64 timestamp, recompute datetimestruct
trans, deltas, typ = get_dst_info(tz)
@@ -355,26 +375,17 @@ cdef inline void _localize_tso(_TSObject obj, object tz):
# static/pytz/dateutil specific code
if is_fixed_offset(tz):
# statictzinfo
- if len(deltas) > 0 and obj.value != NPY_NAT:
- dt64_to_dtstruct(obj.value + deltas[0], &obj.dts)
- else:
- dt64_to_dtstruct(obj.value, &obj.dts)
- obj.tzinfo = tz
+ assert len(deltas) == 1, len(deltas)
+ dt64_to_dtstruct(obj.value + deltas[0], &obj.dts)
elif treat_tz_as_pytz(tz):
- inf = tz._transition_info[pos]
- if obj.value != NPY_NAT:
- dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
- else:
- dt64_to_dtstruct(obj.value, &obj.dts)
- obj.tzinfo = tz._tzinfos[inf]
+ tz = tz._tzinfos[tz._transition_info[pos]]
+ dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
elif treat_tz_as_dateutil(tz):
- if obj.value != NPY_NAT:
- dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
- else:
- dt64_to_dtstruct(obj.value, &obj.dts)
- obj.tzinfo = tz
+ dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
else:
- obj.tzinfo = tz
+ pass
+
+ obj.tzinfo = tz
cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz):
@@ -401,7 +412,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
"""
Convert the val (in i8) from timezone1 to timezone2
- This is a single timezone versoin of tz_convert
+ This is a single timezone version of tz_convert
Parameters
----------
@@ -422,6 +433,9 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
pandas_datetimestruct dts
datetime dt
+ # See GH#17734 We should always be converting either from UTC or to UTC
+ assert (is_utc(tz1) or tz1 == 'UTC') or (is_utc(tz2) or tz2 == 'UTC')
+
if val == NPY_NAT:
return val
@@ -444,8 +458,8 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
if get_timezone(tz2) == 'UTC':
return utc_date
- if is_tzlocal(tz2):
- dt64_to_dtstruct(val, &dts)
+ elif is_tzlocal(tz2):
+ dt64_to_dtstruct(utc_date, &dts)
dt = datetime(dts.year, dts.month, dts.day, dts.hour,
dts.min, dts.sec, dts.us, tz2)
delta = int(get_utcoffset(tz2, dt).total_seconds()) * 1000000000
@@ -782,3 +796,183 @@ cdef inline str _render_tstamp(int64_t val):
""" Helper function to render exception messages"""
from pandas._libs.tslib import Timestamp
return str(Timestamp(val))
+
+
+# ----------------------------------------------------------------------
+# Normalization
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def date_normalize(ndarray[int64_t] stamps, tz=None):
+ """
+ Normalize each of the (nanosecond) timestamps in the given array by
+ rounding down to the beginning of the day (i.e. midnight). If `tz`
+ is not None, then this is midnight for this timezone.
+
+ Parameters
+ ----------
+ stamps : int64 ndarray
+ tz : tzinfo or None
+
+ Returns
+ -------
+ result : int64 ndarray of converted of normalized nanosecond timestamps
+ """
+ cdef:
+ Py_ssize_t i, n = len(stamps)
+ pandas_datetimestruct dts
+ ndarray[int64_t] result = np.empty(n, dtype=np.int64)
+
+ if tz is not None:
+ tz = maybe_get_tz(tz)
+ result = _normalize_local(stamps, tz)
+ else:
+ with nogil:
+ for i in range(n):
+ if stamps[i] == NPY_NAT:
+ result[i] = NPY_NAT
+ continue
+ dt64_to_dtstruct(stamps[i], &dts)
+ result[i] = _normalized_stamp(&dts)
+
+ return result
+
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz):
+ """
+ Normalize each of the (nanosecond) timestamps in the given array by
+ rounding down to the beginning of the day (i.e. midnight) for the
+ given timezone `tz`.
+
+ Parameters
+ ----------
+ stamps : int64 ndarray
+ tz : tzinfo or None
+
+ Returns
+ -------
+ result : int64 ndarray of converted of normalized nanosecond timestamps
+ """
+ cdef:
+ Py_ssize_t n = len(stamps)
+ ndarray[int64_t] result = np.empty(n, dtype=np.int64)
+ ndarray[int64_t] trans, deltas, pos
+ pandas_datetimestruct dts
+ datetime dt
+
+ if is_utc(tz):
+ with nogil:
+ for i in range(n):
+ if stamps[i] == NPY_NAT:
+ result[i] = NPY_NAT
+ continue
+ dt64_to_dtstruct(stamps[i], &dts)
+ result[i] = _normalized_stamp(&dts)
+ elif is_tzlocal(tz):
+ for i in range(n):
+ if stamps[i] == NPY_NAT:
+ result[i] = NPY_NAT
+ continue
+ dt64_to_dtstruct(stamps[i], &dts)
+ dt = datetime(dts.year, dts.month, dts.day, dts.hour,
+ dts.min, dts.sec, dts.us, tz)
+ delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000
+ dt64_to_dtstruct(stamps[i] + delta, &dts)
+ result[i] = _normalized_stamp(&dts)
+ else:
+ # Adjust datetime64 timestamp, recompute datetimestruct
+ trans, deltas, typ = get_dst_info(tz)
+
+ _pos = trans.searchsorted(stamps, side='right') - 1
+ if _pos.dtype != np.int64:
+ _pos = _pos.astype(np.int64)
+ pos = _pos
+
+ # statictzinfo
+ if typ not in ['pytz', 'dateutil']:
+ for i in range(n):
+ if stamps[i] == NPY_NAT:
+ result[i] = NPY_NAT
+ continue
+ dt64_to_dtstruct(stamps[i] + deltas[0], &dts)
+ result[i] = _normalized_stamp(&dts)
+ else:
+ for i in range(n):
+ if stamps[i] == NPY_NAT:
+ result[i] = NPY_NAT
+ continue
+ dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts)
+ result[i] = _normalized_stamp(&dts)
+
+ return result
+
+
+cdef inline int64_t _normalized_stamp(pandas_datetimestruct *dts) nogil:
+ """
+ Normalize the given datetimestruct to midnight, then convert to int64_t.
+
+ Parameters
+ ----------
+ *dts : pointer to pandas_datetimestruct
+
+ Returns
+ -------
+ stamp : int64
+ """
+ dts.hour = 0
+ dts.min = 0
+ dts.sec = 0
+ dts.us = 0
+ dts.ps = 0
+ return dtstruct_to_dt64(dts)
+
+
+def is_date_array_normalized(ndarray[int64_t] stamps, tz=None):
+ """
+ Check if all of the given (nanosecond) timestamps are normalized to
+ midnight, i.e. hour == minute == second == 0. If the optional timezone
+ `tz` is not None, then this is midnight for this timezone.
+
+ Parameters
+ ----------
+ stamps : int64 ndarray
+ tz : tzinfo or None
+
+ Returns
+ -------
+ is_normalized : bool True if all stamps are normalized
+ """
+ cdef:
+ Py_ssize_t i, n = len(stamps)
+ ndarray[int64_t] trans, deltas
+ pandas_datetimestruct dts
+ datetime dt
+
+ if tz is None or is_utc(tz):
+ for i in range(n):
+ dt64_to_dtstruct(stamps[i], &dts)
+ if (dts.hour + dts.min + dts.sec + dts.us) > 0:
+ return False
+ elif is_tzlocal(tz):
+ for i in range(n):
+ dt64_to_dtstruct(stamps[i], &dts)
+ dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min,
+ dts.sec, dts.us, tz)
+ dt = dt + tz.utcoffset(dt)
+ if (dt.hour + dt.minute + dt.second + dt.microsecond) > 0:
+ return False
+ else:
+ trans, deltas, typ = get_dst_info(tz)
+
+ for i in range(n):
+ # Adjust datetime64 timestamp, recompute datetimestruct
+ pos = trans.searchsorted(stamps[i]) - 1
+ inf = tz._transition_info[pos]
+
+ dt64_to_dtstruct(stamps[i] + deltas[pos], &dts)
+ if (dts.hour + dts.min + dts.sec + dts.us) > 0:
+ return False
+
+ return True
diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx
index b40646295cce57..3ab84853dfc4ac 100644
--- a/pandas/_libs/tslibs/fields.pyx
+++ b/pandas/_libs/tslibs/fields.pyx
@@ -17,7 +17,8 @@ from numpy cimport ndarray, int64_t, int32_t, int8_t
np.import_array()
-from np_datetime cimport pandas_datetimestruct, dt64_to_dtstruct
+from np_datetime cimport (pandas_datetimestruct, pandas_timedeltastruct,
+ dt64_to_dtstruct, td64_to_tdstruct)
from datetime cimport (
days_per_month_table,
@@ -545,6 +546,123 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
raise ValueError("Field %s not supported" % field)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def get_timedelta_field(ndarray[int64_t] tdindex, object field):
+ """
+ Given a int64-based timedelta index, extract the days, hrs, sec.,
+ field and return an array of these values.
+ """
+ cdef:
+ Py_ssize_t i, count = 0
+ ndarray[int32_t] out
+ pandas_timedeltastruct tds
+
+ count = len(tdindex)
+ out = np.empty(count, dtype='i4')
+
+ if field == 'days':
+ with nogil:
+ for i in range(count):
+ if tdindex[i] == NPY_NAT:
+ out[i] = -1
+ continue
+
+ td64_to_tdstruct(tdindex[i], &tds)
+ out[i] = tds.days
+ return out
+
+ elif field == 'h':
+ with nogil:
+ for i in range(count):
+ if tdindex[i] == NPY_NAT:
+ out[i] = -1
+ continue
+
+ td64_to_tdstruct(tdindex[i], &tds)
+ out[i] = tds.hrs
+ return out
+
+ elif field == 's':
+ with nogil:
+ for i in range(count):
+ if tdindex[i] == NPY_NAT:
+ out[i] = -1
+ continue
+
+ td64_to_tdstruct(tdindex[i], &tds)
+ out[i] = tds.sec
+ return out
+
+ elif field == 'seconds':
+ with nogil:
+ for i in range(count):
+ if tdindex[i] == NPY_NAT:
+ out[i] = -1
+ continue
+
+ td64_to_tdstruct(tdindex[i], &tds)
+ out[i] = tds.seconds
+ return out
+
+ elif field == 'ms':
+ with nogil:
+ for i in range(count):
+ if tdindex[i] == NPY_NAT:
+ out[i] = -1
+ continue
+
+ td64_to_tdstruct(tdindex[i], &tds)
+ out[i] = tds.ms
+ return out
+
+ elif field == 'microseconds':
+ with nogil:
+ for i in range(count):
+ if tdindex[i] == NPY_NAT:
+ out[i] = -1
+ continue
+
+ td64_to_tdstruct(tdindex[i], &tds)
+ out[i] = tds.microseconds
+ return out
+
+ elif field == 'us':
+ with nogil:
+ for i in range(count):
+ if tdindex[i] == NPY_NAT:
+ out[i] = -1
+ continue
+
+ td64_to_tdstruct(tdindex[i], &tds)
+ out[i] = tds.us
+ return out
+
+ elif field == 'ns':
+ with nogil:
+ for i in range(count):
+ if tdindex[i] == NPY_NAT:
+ out[i] = -1
+ continue
+
+ td64_to_tdstruct(tdindex[i], &tds)
+ out[i] = tds.ns
+ return out
+
+ elif field == 'nanoseconds':
+ with nogil:
+ for i in range(count):
+ if tdindex[i] == NPY_NAT:
+ out[i] = -1
+ continue
+
+ td64_to_tdstruct(tdindex[i], &tds)
+ out[i] = tds.nanoseconds
+ return out
+
+ raise ValueError("Field %s not supported" % field)
+
+
cdef inline int days_in_month(pandas_datetimestruct dts) nogil:
return days_per_month_table[is_leapyear(dts.year)][dts.month - 1]
diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx
index 9d810bfb411afe..2a700d52eaaf3f 100644
--- a/pandas/_libs/tslibs/frequencies.pyx
+++ b/pandas/_libs/tslibs/frequencies.pyx
@@ -15,7 +15,7 @@ from util cimport is_integer_object
# hack to handle WOM-1MON
opattern = re.compile(
- r'([\-]?\d*|[\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)'
+ r'([+\-]?\d*|[+\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)'
)
_INVALID_FREQ_ERROR = "Invalid frequency: {0}"
diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd
index 1ae0499f90c0dd..3692822ada135f 100644
--- a/pandas/_libs/tslibs/np_datetime.pxd
+++ b/pandas/_libs/tslibs/np_datetime.pxd
@@ -30,6 +30,10 @@ cdef extern from "../src/datetime/np_datetime.h":
int64_t year
int32_t month, day, hour, min, sec, us, ps, as
+ ctypedef struct pandas_timedeltastruct:
+ int64_t days
+ int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds
+
ctypedef enum PANDAS_DATETIMEUNIT:
PANDAS_FR_Y
PANDAS_FR_M
@@ -54,6 +58,7 @@ cdef check_dts_bounds(pandas_datetimestruct *dts)
cdef int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil
cdef void dt64_to_dtstruct(int64_t dt64, pandas_datetimestruct* out) nogil
+cdef void td64_to_tdstruct(int64_t td64, pandas_timedeltastruct* out) nogil
cdef int64_t pydatetime_to_dt64(datetime val, pandas_datetimestruct *dts)
cdef int64_t pydate_to_dt64(date val, pandas_datetimestruct *dts)
diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx
index abd6c59ea62443..72c028161a9378 100644
--- a/pandas/_libs/tslibs/np_datetime.pyx
+++ b/pandas/_libs/tslibs/np_datetime.pyx
@@ -26,6 +26,11 @@ cdef extern from "../src/datetime/np_datetime.h":
PANDAS_DATETIMEUNIT fr,
pandas_datetimestruct *result) nogil
+ void pandas_timedelta_to_timedeltastruct(npy_timedelta val,
+ PANDAS_DATETIMEUNIT fr,
+ pandas_timedeltastruct *result
+ ) nogil
+
pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS
# ----------------------------------------------------------------------
@@ -127,6 +132,13 @@ cdef inline void dt64_to_dtstruct(int64_t dt64,
pandas_datetime_to_datetimestruct(dt64, PANDAS_FR_ns, out)
return
+cdef inline void td64_to_tdstruct(int64_t td64,
+ pandas_timedeltastruct* out) nogil:
+ """Convenience function to call pandas_timedelta_to_timedeltastruct
+ with the by-far-most-common frequency PANDAS_FR_ns"""
+ pandas_timedelta_to_timedeltastruct(td64, PANDAS_FR_ns, out)
+ return
+
cdef inline int64_t pydatetime_to_dt64(datetime val,
pandas_datetimestruct *dts):
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index 87be9fa9101012..2d8ce4c59fedcc 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -4,7 +4,7 @@
cimport cython
import time
-from cpython.datetime cimport timedelta, time as dt_time
+from cpython.datetime cimport datetime, timedelta, time as dt_time
from dateutil.relativedelta import relativedelta
@@ -13,12 +13,12 @@ cimport numpy as np
np.import_array()
-from util cimport is_string_object
+from util cimport is_string_object, is_integer_object
-from pandas._libs.tslib import pydt_to_i8
+from pandas._libs.tslib import monthrange
+from conversion cimport tz_convert_single, pydt_to_i8
from frequencies cimport get_freq_code
-from conversion cimport tz_convert_single
# ---------------------------------------------------------------------
# Constants
@@ -375,3 +375,56 @@ class BaseOffset(_BaseOffset):
# i.e. isinstance(other, (ABCDatetimeIndex, ABCSeries))
return other - self
return -self + other
+
+
+# ----------------------------------------------------------------------
+# RelativeDelta Arithmetic
+
+
+cpdef datetime shift_month(datetime stamp, int months, object day_opt=None):
+ """
+ Given a datetime (or Timestamp) `stamp`, an integer `months` and an
+ option `day_opt`, return a new datetimelike that many months later,
+ with day determined by `day_opt` using relativedelta semantics.
+
+ Scalar analogue of tslib.shift_months
+
+ Parameters
+ ----------
+ stamp : datetime or Timestamp
+ months : int
+ day_opt : None, 'start', 'end', or an integer
+ None: returned datetimelike has the same day as the input, or the
+ last day of the month if the new month is too short
+ 'start': returned datetimelike has day=1
+ 'end': returned datetimelike has day on the last day of the month
+ int: returned datetimelike has day equal to day_opt
+
+ Returns
+ -------
+ shifted : datetime or Timestamp (same as input `stamp`)
+ """
+ cdef:
+ int year, month, day
+ int dim, dy
+
+ dy = (stamp.month + months) // 12
+ month = (stamp.month + months) % 12
+
+ if month == 0:
+ month = 12
+ dy -= 1
+ year = stamp.year + dy
+
+ dim = monthrange(year, month)[1]
+ if day_opt is None:
+ day = min(stamp.day, dim)
+ elif day_opt == 'start':
+ day = 1
+ elif day_opt == 'end':
+ day = dim
+ elif is_integer_object(day_opt):
+ day = min(day_opt, dim)
+ else:
+ raise ValueError(day_opt)
+ return stamp.replace(year=year, month=month, day=day)
diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx
new file mode 100644
index 00000000000000..b590121b9021ad
--- /dev/null
+++ b/pandas/_libs/tslibs/resolution.pyx
@@ -0,0 +1,652 @@
+# -*- coding: utf-8 -*-
+# cython: profile=False
+
+from cython cimport Py_ssize_t
+
+import numpy as np
+cimport numpy as np
+from numpy cimport ndarray, int64_t
+np.import_array()
+
+from util cimport is_string_object, get_nat
+
+from khash cimport (
+ khiter_t,
+ kh_destroy_int64, kh_put_int64,
+ kh_init_int64, kh_int64_t,
+ kh_resize_int64, kh_get_int64)
+
+from cpython.datetime cimport datetime
+
+from np_datetime cimport (pandas_datetimestruct,
+ dtstruct_to_dt64, dt64_to_dtstruct)
+from frequencies cimport get_freq_code
+from timezones cimport (
+ is_utc, is_tzlocal,
+ maybe_get_tz, get_dst_info, get_utcoffset)
+from fields import build_field_sarray
+from conversion import tz_convert
+
+from pandas._libs.properties import cache_readonly
+from pandas._libs.tslib import Timestamp
+
+from pandas.core.algorithms import unique # TODO: Avoid this non-cython import
+
+# ----------------------------------------------------------------------
+# Constants
+
+cdef int64_t NPY_NAT = get_nat()
+
+cdef int RESO_NS = 0
+cdef int RESO_US = 1
+cdef int RESO_MS = 2
+cdef int RESO_SEC = 3
+cdef int RESO_MIN = 4
+cdef int RESO_HR = 5
+cdef int RESO_DAY = 6
+
+_ONE_MICRO = 1000L
+_ONE_MILLI = _ONE_MICRO * 1000
+_ONE_SECOND = _ONE_MILLI * 1000
+_ONE_MINUTE = 60 * _ONE_SECOND
+_ONE_HOUR = 60 * _ONE_MINUTE
+_ONE_DAY = 24 * _ONE_HOUR
+
+DAYS = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
+_weekday_rule_aliases = dict((k, v) for k, v in enumerate(DAYS))
+
+_MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL',
+ 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']
+_MONTH_ALIASES = {(k + 1): v for k, v in enumerate(_MONTHS)}
+
+# ----------------------------------------------------------------------
+
+cpdef resolution(ndarray[int64_t] stamps, tz=None):
+ cdef:
+ Py_ssize_t i, n = len(stamps)
+ pandas_datetimestruct dts
+ int reso = RESO_DAY, curr_reso
+
+ if tz is not None:
+ tz = maybe_get_tz(tz)
+ return _reso_local(stamps, tz)
+ else:
+ for i in range(n):
+ if stamps[i] == NPY_NAT:
+ continue
+ dt64_to_dtstruct(stamps[i], &dts)
+ curr_reso = _reso_stamp(&dts)
+ if curr_reso < reso:
+ reso = curr_reso
+ return reso
+
+
+cdef _reso_local(ndarray[int64_t] stamps, object tz):
+ cdef:
+ Py_ssize_t n = len(stamps)
+ int reso = RESO_DAY, curr_reso
+ ndarray[int64_t] trans, deltas, pos
+ pandas_datetimestruct dts
+
+ if is_utc(tz):
+ for i in range(n):
+ if stamps[i] == NPY_NAT:
+ continue
+ dt64_to_dtstruct(stamps[i], &dts)
+ curr_reso = _reso_stamp(&dts)
+ if curr_reso < reso:
+ reso = curr_reso
+ elif is_tzlocal(tz):
+ for i in range(n):
+ if stamps[i] == NPY_NAT:
+ continue
+ dt64_to_dtstruct(stamps[i], &dts)
+ dt = datetime(dts.year, dts.month, dts.day, dts.hour,
+ dts.min, dts.sec, dts.us, tz)
+ delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000
+ dt64_to_dtstruct(stamps[i] + delta, &dts)
+ curr_reso = _reso_stamp(&dts)
+ if curr_reso < reso:
+ reso = curr_reso
+ else:
+ # Adjust datetime64 timestamp, recompute datetimestruct
+ trans, deltas, typ = get_dst_info(tz)
+
+ _pos = trans.searchsorted(stamps, side='right') - 1
+ if _pos.dtype != np.int64:
+ _pos = _pos.astype(np.int64)
+ pos = _pos
+
+ # statictzinfo
+ if typ not in ['pytz', 'dateutil']:
+ for i in range(n):
+ if stamps[i] == NPY_NAT:
+ continue
+ dt64_to_dtstruct(stamps[i] + deltas[0], &dts)
+ curr_reso = _reso_stamp(&dts)
+ if curr_reso < reso:
+ reso = curr_reso
+ else:
+ for i in range(n):
+ if stamps[i] == NPY_NAT:
+ continue
+ dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts)
+ curr_reso = _reso_stamp(&dts)
+ if curr_reso < reso:
+ reso = curr_reso
+
+ return reso
+
+
+cdef inline int _reso_stamp(pandas_datetimestruct *dts):
+ if dts.us != 0:
+ if dts.us % 1000 == 0:
+ return RESO_MS
+ return RESO_US
+ elif dts.sec != 0:
+ return RESO_SEC
+ elif dts.min != 0:
+ return RESO_MIN
+ elif dts.hour != 0:
+ return RESO_HR
+ return RESO_DAY
+
+
+def get_freq_group(freq):
+ """
+ Return frequency code group of given frequency str or offset.
+
+ Example
+ -------
+ >>> get_freq_group('W-MON')
+ 4000
+
+ >>> get_freq_group('W-FRI')
+ 4000
+ """
+ if getattr(freq, '_typ', None) == 'dateoffset':
+ freq = freq.rule_code
+
+ if is_string_object(freq):
+ base, mult = get_freq_code(freq)
+ freq = base
+ elif isinstance(freq, int):
+ pass
+ else:
+ raise ValueError('input must be str, offset or int')
+ return (freq // 1000) * 1000
+
+
+class Resolution(object):
+
+ # Note: cython won't allow us to reference the cdef versions at the
+ # module level
+ RESO_NS = 0
+ RESO_US = 1
+ RESO_MS = 2
+ RESO_SEC = 3
+ RESO_MIN = 4
+ RESO_HR = 5
+ RESO_DAY = 6
+
+ _reso_str_map = {
+ RESO_NS: 'nanosecond',
+ RESO_US: 'microsecond',
+ RESO_MS: 'millisecond',
+ RESO_SEC: 'second',
+ RESO_MIN: 'minute',
+ RESO_HR: 'hour',
+ RESO_DAY: 'day'}
+
+ # factor to multiply a value by to convert it to the next finer grained
+ # resolution
+ _reso_mult_map = {
+ RESO_NS: None,
+ RESO_US: 1000,
+ RESO_MS: 1000,
+ RESO_SEC: 1000,
+ RESO_MIN: 60,
+ RESO_HR: 60,
+ RESO_DAY: 24}
+
+ _reso_str_bump_map = {
+ 'D': 'H',
+ 'H': 'T',
+ 'T': 'S',
+ 'S': 'L',
+ 'L': 'U',
+ 'U': 'N',
+ 'N': None}
+
+ _str_reso_map = dict([(v, k) for k, v in _reso_str_map.items()])
+
+ _reso_freq_map = {
+ 'year': 'A',
+ 'quarter': 'Q',
+ 'month': 'M',
+ 'day': 'D',
+ 'hour': 'H',
+ 'minute': 'T',
+ 'second': 'S',
+ 'millisecond': 'L',
+ 'microsecond': 'U',
+ 'nanosecond': 'N'}
+
+ _freq_reso_map = dict([(v, k)
+ for k, v in _reso_freq_map.items()])
+
+ @classmethod
+ def get_str(cls, reso):
+ """
+ Return resolution str against resolution code.
+
+ Example
+ -------
+ >>> Resolution.get_str(Resolution.RESO_SEC)
+ 'second'
+ """
+ return cls._reso_str_map.get(reso, 'day')
+
+ @classmethod
+ def get_reso(cls, resostr):
+ """
+ Return resolution str against resolution code.
+
+ Example
+ -------
+ >>> Resolution.get_reso('second')
+ 2
+
+ >>> Resolution.get_reso('second') == Resolution.RESO_SEC
+ True
+ """
+ return cls._str_reso_map.get(resostr, cls.RESO_DAY)
+
+ @classmethod
+ def get_freq_group(cls, resostr):
+ """
+ Return frequency str against resolution str.
+
+ Example
+ -------
+ >>> f.Resolution.get_freq_group('day')
+ 4000
+ """
+ return get_freq_group(cls.get_freq(resostr))
+
+ @classmethod
+ def get_freq(cls, resostr):
+ """
+ Return frequency str against resolution str.
+
+ Example
+ -------
+ >>> f.Resolution.get_freq('day')
+ 'D'
+ """
+ return cls._reso_freq_map[resostr]
+
+ @classmethod
+ def get_str_from_freq(cls, freq):
+ """
+ Return resolution str against frequency str.
+
+ Example
+ -------
+ >>> Resolution.get_str_from_freq('H')
+ 'hour'
+ """
+ return cls._freq_reso_map.get(freq, 'day')
+
+ @classmethod
+ def get_reso_from_freq(cls, freq):
+ """
+ Return resolution code against frequency str.
+
+ Example
+ -------
+ >>> Resolution.get_reso_from_freq('H')
+ 4
+
+ >>> Resolution.get_reso_from_freq('H') == Resolution.RESO_HR
+ True
+ """
+ return cls.get_reso(cls.get_str_from_freq(freq))
+
+ @classmethod
+ def get_stride_from_decimal(cls, value, freq):
+ """
+ Convert freq with decimal stride into a higher freq with integer stride
+
+ Parameters
+ ----------
+ value : integer or float
+ freq : string
+ Frequency string
+
+ Raises
+ ------
+ ValueError
+ If the float cannot be converted to an integer at any resolution.
+
+ Example
+ -------
+ >>> Resolution.get_stride_from_decimal(1.5, 'T')
+ (90, 'S')
+
+ >>> Resolution.get_stride_from_decimal(1.04, 'H')
+ (3744, 'S')
+
+ >>> Resolution.get_stride_from_decimal(1, 'D')
+ (1, 'D')
+ """
+ if np.isclose(value % 1, 0):
+ return int(value), freq
+ else:
+ start_reso = cls.get_reso_from_freq(freq)
+ if start_reso == 0:
+ raise ValueError("Could not convert to integer offset "
+ "at any resolution")
+
+ next_value = cls._reso_mult_map[start_reso] * value
+ next_name = cls._reso_str_bump_map[freq]
+ return cls.get_stride_from_decimal(next_value, next_name)
+
+
+# ----------------------------------------------------------------------
+# Frequency Inference
+
+
+# TODO: this is non performiant logic here (and duplicative) and this
+# simply should call unique_1d directly
+# plus no reason to depend on khash directly
+cdef unique_deltas(ndarray[int64_t] arr):
+ cdef:
+ Py_ssize_t i, n = len(arr)
+ int64_t val
+ khiter_t k
+ kh_int64_t *table
+ int ret = 0
+ list uniques = []
+
+ table = kh_init_int64()
+ kh_resize_int64(table, 10)
+ for i in range(n - 1):
+ val = arr[i + 1] - arr[i]
+ k = kh_get_int64(table, val)
+ if k == table.n_buckets:
+ kh_put_int64(table, val, &ret)
+ uniques.append(val)
+ kh_destroy_int64(table)
+
+ result = np.array(uniques, dtype=np.int64)
+ result.sort()
+ return result
+
+
+def _is_multiple(us, mult):
+ return us % mult == 0
+
+
+def _maybe_add_count(base, count):
+ if count != 1:
+ return '{count}{base}'.format(count=int(count), base=base)
+ else:
+ return base
+
+
+class _FrequencyInferer(object):
+ """
+ Not sure if I can avoid the state machine here
+ """
+
+ def __init__(self, index, warn=True):
+ self.index = index
+ self.values = np.asarray(index).view('i8')
+
+ # This moves the values, which are implicitly in UTC, to the
+ # the timezone so they are in local time
+ if hasattr(index, 'tz'):
+ if index.tz is not None:
+ self.values = tz_convert(self.values, 'UTC', index.tz)
+
+ self.warn = warn
+
+ if len(index) < 3:
+ raise ValueError('Need at least 3 dates to infer frequency')
+
+ self.is_monotonic = (self.index.is_monotonic_increasing or
+ self.index.is_monotonic_decreasing)
+
+ @cache_readonly
+ def deltas(self):
+ return unique_deltas(self.values)
+
+ @cache_readonly
+ def deltas_asi8(self):
+ return unique_deltas(self.index.asi8)
+
+ @cache_readonly
+ def is_unique(self):
+ return len(self.deltas) == 1
+
+ @cache_readonly
+ def is_unique_asi8(self):
+ return len(self.deltas_asi8) == 1
+
+ def get_freq(self):
+ if not self.is_monotonic or not self.index.is_unique:
+ return None
+
+ delta = self.deltas[0]
+ if _is_multiple(delta, _ONE_DAY):
+ return self._infer_daily_rule()
+ else:
+ # Business hourly, maybe. 17: one day / 65: one weekend
+ if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]):
+ return 'BH'
+ # Possibly intraday frequency. Here we use the
+ # original .asi8 values as the modified values
+ # will not work around DST transitions. See #8772
+ elif not self.is_unique_asi8:
+ return None
+ delta = self.deltas_asi8[0]
+ if _is_multiple(delta, _ONE_HOUR):
+ # Hours
+ return _maybe_add_count('H', delta / _ONE_HOUR)
+ elif _is_multiple(delta, _ONE_MINUTE):
+ # Minutes
+ return _maybe_add_count('T', delta / _ONE_MINUTE)
+ elif _is_multiple(delta, _ONE_SECOND):
+ # Seconds
+ return _maybe_add_count('S', delta / _ONE_SECOND)
+ elif _is_multiple(delta, _ONE_MILLI):
+ # Milliseconds
+ return _maybe_add_count('L', delta / _ONE_MILLI)
+ elif _is_multiple(delta, _ONE_MICRO):
+ # Microseconds
+ return _maybe_add_count('U', delta / _ONE_MICRO)
+ else:
+ # Nanoseconds
+ return _maybe_add_count('N', delta)
+
+ @cache_readonly
+ def day_deltas(self):
+ return [x / _ONE_DAY for x in self.deltas]
+
+ @cache_readonly
+ def hour_deltas(self):
+ return [x / _ONE_HOUR for x in self.deltas]
+
+ @cache_readonly
+ def fields(self):
+ return build_field_sarray(self.values)
+
+ @cache_readonly
+ def rep_stamp(self):
+ return Timestamp(self.values[0])
+
+ def month_position_check(self):
+ # TODO: cythonize this, very slow
+ calendar_end = True
+ business_end = True
+ calendar_start = True
+ business_start = True
+
+ years = self.fields['Y']
+ months = self.fields['M']
+ days = self.fields['D']
+ weekdays = self.index.dayofweek
+
+ from calendar import monthrange
+ for y, m, d, wd in zip(years, months, days, weekdays):
+
+ if calendar_start:
+ calendar_start &= d == 1
+ if business_start:
+ business_start &= d == 1 or (d <= 3 and wd == 0)
+
+ if calendar_end or business_end:
+ _, daysinmonth = monthrange(y, m)
+ cal = d == daysinmonth
+ if calendar_end:
+ calendar_end &= cal
+ if business_end:
+ business_end &= cal or (daysinmonth - d < 3 and wd == 4)
+ elif not calendar_start and not business_start:
+ break
+
+ if calendar_end:
+ return 'ce'
+ elif business_end:
+ return 'be'
+ elif calendar_start:
+ return 'cs'
+ elif business_start:
+ return 'bs'
+ else:
+ return None
+
+ @cache_readonly
+ def mdiffs(self):
+ nmonths = self.fields['Y'] * 12 + self.fields['M']
+ return unique_deltas(nmonths.astype('i8'))
+
+ @cache_readonly
+ def ydiffs(self):
+ return unique_deltas(self.fields['Y'].astype('i8'))
+
+ def _infer_daily_rule(self):
+ annual_rule = self._get_annual_rule()
+ if annual_rule:
+ nyears = self.ydiffs[0]
+ month = _MONTH_ALIASES[self.rep_stamp.month]
+ alias = '{prefix}-{month}'.format(prefix=annual_rule, month=month)
+ return _maybe_add_count(alias, nyears)
+
+ quarterly_rule = self._get_quarterly_rule()
+ if quarterly_rule:
+ nquarters = self.mdiffs[0] / 3
+ mod_dict = {0: 12, 2: 11, 1: 10}
+ month = _MONTH_ALIASES[mod_dict[self.rep_stamp.month % 3]]
+ alias = '{prefix}-{month}'.format(prefix=quarterly_rule,
+ month=month)
+ return _maybe_add_count(alias, nquarters)
+
+ monthly_rule = self._get_monthly_rule()
+ if monthly_rule:
+ return _maybe_add_count(monthly_rule, self.mdiffs[0])
+
+ if self.is_unique:
+ days = self.deltas[0] / _ONE_DAY
+ if days % 7 == 0:
+ # Weekly
+ day = _weekday_rule_aliases[self.rep_stamp.weekday()]
+ return _maybe_add_count('W-{day}'.format(day=day), days / 7)
+ else:
+ return _maybe_add_count('D', days)
+
+ if self._is_business_daily():
+ return 'B'
+
+ wom_rule = self._get_wom_rule()
+ if wom_rule:
+ return wom_rule
+
+ def _get_annual_rule(self):
+ if len(self.ydiffs) > 1:
+ return None
+
+ if len(unique(self.fields['M'])) > 1:
+ return None
+
+ pos_check = self.month_position_check()
+ return {'cs': 'AS', 'bs': 'BAS',
+ 'ce': 'A', 'be': 'BA'}.get(pos_check)
+
+ def _get_quarterly_rule(self):
+ if len(self.mdiffs) > 1:
+ return None
+
+ if not self.mdiffs[0] % 3 == 0:
+ return None
+
+ pos_check = self.month_position_check()
+ return {'cs': 'QS', 'bs': 'BQS',
+ 'ce': 'Q', 'be': 'BQ'}.get(pos_check)
+
+ def _get_monthly_rule(self):
+ if len(self.mdiffs) > 1:
+ return None
+ pos_check = self.month_position_check()
+ return {'cs': 'MS', 'bs': 'BMS',
+ 'ce': 'M', 'be': 'BM'}.get(pos_check)
+
+ def _is_business_daily(self):
+ # quick check: cannot be business daily
+ if self.day_deltas != [1, 3]:
+ return False
+
+ # probably business daily, but need to confirm
+ first_weekday = self.index[0].weekday()
+ shifts = np.diff(self.index.asi8)
+ shifts = np.floor_divide(shifts, _ONE_DAY)
+ weekdays = np.mod(first_weekday + np.cumsum(shifts), 7)
+ return np.all(((weekdays == 0) & (shifts == 3)) |
+ ((weekdays > 0) & (weekdays <= 4) & (shifts == 1)))
+
+ def _get_wom_rule(self):
+ # wdiffs = unique(np.diff(self.index.week))
+ # We also need -47, -49, -48 to catch index spanning year boundary
+ # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all():
+ # return None
+
+ weekdays = unique(self.index.weekday)
+ if len(weekdays) > 1:
+ return None
+
+ week_of_months = unique((self.index.day - 1) // 7)
+ # Only attempt to infer up to WOM-4. See #9425
+ week_of_months = week_of_months[week_of_months < 4]
+ if len(week_of_months) == 0 or len(week_of_months) > 1:
+ return None
+
+ # get which week
+ week = week_of_months[0] + 1
+ wd = _weekday_rule_aliases[weekdays[0]]
+
+ return 'WOM-{week}{weekday}'.format(week=week, weekday=wd)
+
+
+class _TimedeltaFrequencyInferer(_FrequencyInferer):
+
+ def _infer_daily_rule(self):
+ if self.is_unique:
+ days = self.deltas[0] / _ONE_DAY
+ if days % 7 == 0:
+ # Weekly
+ wd = _weekday_rule_aliases[self.rep_stamp.weekday()]
+ alias = 'W-{weekday}'.format(weekday=wd)
+ return _maybe_add_count(alias, days / 7)
+ else:
+ return _maybe_add_count('D', days)
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 869ff5ee77bda9..aba213122ea31c 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -26,7 +26,8 @@ from util cimport (is_timedelta64_object, is_datetime64_object,
is_integer_object, is_float_object,
is_string_object)
-from np_datetime cimport cmp_scalar, reverse_ops
+from np_datetime cimport (cmp_scalar, reverse_ops, td64_to_tdstruct,
+ pandas_timedeltastruct)
from nattype import nat_strings, NaT
from nattype cimport _checknull_with_nat
@@ -584,65 +585,26 @@ cdef class _Timedelta(timedelta):
"""
compute the components
"""
- cdef int64_t sfrac, ifrac, frac, ivalue = self.value
-
if self.is_populated:
return
- # put frac in seconds
- frac = ivalue / (1000 * 1000 * 1000)
- if frac < 0:
- self._sign = -1
+ cdef:
+ pandas_timedeltastruct tds
- # even fraction
- if (-frac % 86400) != 0:
- self._d = -frac / 86400 + 1
- frac += 86400 * self._d
- else:
- frac = -frac
+ td64_to_tdstruct(self.value, &tds)
+ self._d = tds.days
+ if self._d < 0:
+ self._sign = -1
else:
self._sign = 1
- self._d = 0
-
- if frac >= 86400:
- self._d += frac / 86400
- frac -= self._d * 86400
-
- if frac >= 3600:
- self._h = frac / 3600
- frac -= self._h * 3600
- else:
- self._h = 0
-
- if frac >= 60:
- self._m = frac / 60
- frac -= self._m * 60
- else:
- self._m = 0
-
- if frac >= 0:
- self._s = frac
- frac -= self._s
- else:
- self._s = 0
-
- sfrac = (self._h * 3600 + self._m * 60
- + self._s) * (1000 * 1000 * 1000)
- if self._sign < 0:
- ifrac = ivalue + self._d * DAY_NS - sfrac
- else:
- ifrac = ivalue - (self._d * DAY_NS + sfrac)
-
- if ifrac != 0:
- self._ms = ifrac / (1000 * 1000)
- ifrac -= self._ms * 1000 * 1000
- self._us = ifrac / 1000
- ifrac -= self._us * 1000
- self._ns = ifrac
- else:
- self._ms = 0
- self._us = 0
- self._ns = 0
+ self._h = tds.hrs
+ self._m = tds.min
+ self._s = tds.sec
+ self._ms = tds.ms
+ self._us = tds.us
+ self._ns = tds.ns
+ self._seconds = tds.seconds
+ self._microseconds = tds.microseconds
self.is_populated = 1
@@ -671,10 +633,6 @@ cdef class _Timedelta(timedelta):
def components(self):
""" Return a Components NamedTuple-like """
self._ensure_components()
- if self._sign < 0:
- return Components(-self._d, self._h, self._m, self._s,
- self._ms, self._us, self._ns)
-
# return the named tuple
return Components(self._d, self._h, self._m, self._s,
self._ms, self._us, self._ns)
@@ -717,8 +675,6 @@ cdef class _Timedelta(timedelta):
.components will return the shown components
"""
self._ensure_components()
- if self._sign < 0:
- return -1 * self._d
return self._d
@property
@@ -729,7 +685,7 @@ cdef class _Timedelta(timedelta):
.components will return the shown components
"""
self._ensure_components()
- return self._h * 3600 + self._m * 60 + self._s
+ return self._seconds
@property
def microseconds(self):
@@ -739,7 +695,7 @@ cdef class _Timedelta(timedelta):
.components will return the shown components
"""
self._ensure_components()
- return self._ms * 1000 + self._us
+ return self._microseconds
@property
def nanoseconds(self):
@@ -778,9 +734,9 @@ cdef class _Timedelta(timedelta):
if format == 'all':
seconds_pretty = "%02d.%03d%03d%03d" % (
self._s, self._ms, self._us, self._ns)
- return "%s%d days%s%02d:%02d:%s" % (sign_pretty, self._d,
- sign2_pretty, self._h,
- self._m, seconds_pretty)
+ return "%d days%s%02d:%02d:%s" % (self._d,
+ sign2_pretty, self._h,
+ self._m, seconds_pretty)
# by default not showing nano
if self._ms or self._us or self._ns:
@@ -794,7 +750,7 @@ cdef class _Timedelta(timedelta):
if format == 'even_day':
if not subs:
- return "%s%d days" % (sign_pretty, self._d)
+ return "%d days" % (self._d)
elif format == 'sub_day':
if not self._d:
@@ -806,10 +762,10 @@ cdef class _Timedelta(timedelta):
self._h, self._m, seconds_pretty)
if subs or format=='long':
- return "%s%d days%s%02d:%02d:%s" % (sign_pretty, self._d,
- sign2_pretty, self._h,
- self._m, seconds_pretty)
- return "%s%d days" % (sign_pretty, self._d)
+ return "%d days%s%02d:%02d:%s" % (self._d,
+ sign2_pretty, self._h,
+ self._m, seconds_pretty)
+ return "%d days" % (self._d)
def __repr__(self):
return "Timedelta('{0}')".format(self._repr_base(format='long'))
diff --git a/pandas/_version.py b/pandas/_version.py
index 4695b512feff5f..0fdb0efde1f055 100644
--- a/pandas/_version.py
+++ b/pandas/_version.py
@@ -75,7 +75,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
if e.errno == errno.ENOENT:
continue
if verbose:
- print("unable to run %s" % dispcmd)
+ print("unable to run {dispcmd}".format(dispcmd=dispcmd))
print(e)
return None
else:
@@ -87,7 +87,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False):
stdout = stdout.decode()
if p.returncode != 0:
if verbose:
- print("unable to run %s (error)" % dispcmd)
+ print("unable to run {dispcmd} (error)".format(dispcmd=dispcmd))
return None
return stdout
@@ -98,8 +98,10 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
dirname = os.path.basename(root)
if not dirname.startswith(parentdir_prefix):
if verbose:
- print("guessing rootdir is '%s', but '%s' doesn't start with "
- "prefix '%s'" % (root, dirname, parentdir_prefix))
+ print("guessing rootdir is '{root}', but '{dirname}' "
+ "doesn't start with prefix '{parentdir_prefix}'".format(
+ root=root, dirname=dirname,
+ parentdir_prefix=parentdir_prefix))
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
return {"version": dirname[len(parentdir_prefix):],
"full-revisionid": None,
@@ -154,15 +156,15 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# "stabilization", as well as "HEAD" and "master".
tags = set([r for r in refs if re.search(r'\d', r)])
if verbose:
- print("discarding '%s', no digits" % ",".join(refs - tags))
+ print("discarding '{}', no digits".format(",".join(refs - tags)))
if verbose:
- print("likely tags: %s" % ",".join(sorted(tags)))
+ print("likely tags: {}".format(",".join(sorted(tags))))
for ref in sorted(tags):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix):]
if verbose:
- print("picking %s" % r)
+ print("picking {r}".format(r=r))
return {"version": r,
"full-revisionid": keywords["full"].strip(),
"dirty": False, "error": None
@@ -184,7 +186,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
if not os.path.exists(os.path.join(root, ".git")):
if verbose:
- print("no .git in %s" % root)
+ print("no .git in {root}".format(root=root))
raise NotThisMethod("no .git directory")
GITS = ["git"]
@@ -226,18 +228,21 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
if not mo:
# unparseable. Maybe git-describe is misbehaving?
- pieces["error"] = ("unable to parse git-describe output: '%s'"
- % describe_out)
+ pieces["error"] = ("unable to parse git-describe output: "
+ "'{describe_out}'".format(
+ describe_out=describe_out))
return pieces
# tag
full_tag = mo.group(1)
if not full_tag.startswith(tag_prefix):
if verbose:
- fmt = "tag '%s' doesn't start with prefix '%s'"
- print(fmt % (full_tag, tag_prefix))
- pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
- % (full_tag, tag_prefix))
+ fmt = "tag '{full_tag}' doesn't start with prefix " \
+ "'{tag_prefix}'"
+ print(fmt.format(full_tag=full_tag, tag_prefix=tag_prefix))
+ pieces["error"] = ("tag '{full_tag}' doesn't start with "
+ "prefix '{tag_prefix}'".format(
+ full_tag, tag_prefix))
return pieces
pieces["closest-tag"] = full_tag[len(tag_prefix):]
@@ -275,13 +280,13 @@ def render_pep440(pieces):
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += plus_or_dot(pieces)
- rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+ rendered += "{:d}.g{}".format(pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
- rendered = "0+untagged.%d.g%s" % (pieces["distance"],
- pieces["short"])
+ rendered = "0+untagged.{:d}.g{}".format(pieces["distance"],
+ pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
@@ -315,17 +320,17 @@ def render_pep440_post(pieces):
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
- rendered += ".post%d" % pieces["distance"]
+ rendered += ".post{:d}".format(pieces["distance"])
if pieces["dirty"]:
rendered += ".dev0"
rendered += plus_or_dot(pieces)
- rendered += "g%s" % pieces["short"]
+ rendered += "g{}".format(pieces["short"])
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
- rendered += "+g%s" % pieces["short"]
+ rendered += "+g{}".format(pieces["short"])
return rendered
@@ -359,7 +364,7 @@ def render_git_describe(pieces):
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"]:
- rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+ rendered += "-{:d}-g{}".format(pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
@@ -377,7 +382,7 @@ def render_git_describe_long(pieces):
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
- rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+ rendered += "-{:d}-g{}".format(pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
@@ -409,7 +414,7 @@ def render(pieces, style):
elif style == "git-describe-long":
rendered = render_git_describe_long(pieces)
else:
- raise ValueError("unknown style '%s'" % style)
+ raise ValueError("unknown style '{style}'".format(style=style))
return {"version": rendered, "full-revisionid": pieces["long"],
"dirty": pieces["dirty"], "error": None}
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 90e5ac864e96f4..b9d0087b503068 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -1,8 +1,10 @@
import pytest
+from distutils.version import LooseVersion
import numpy
import pandas
import pandas.util.testing as tm
+import dateutil
def pytest_addoption(parser):
@@ -65,3 +67,11 @@ def ip():
pytest.importorskip('IPython', minversion="6.0.0")
from IPython.core.interactiveshell import InteractiveShell
return InteractiveShell()
+
+
+is_dateutil_le_261 = pytest.mark.skipif(
+ LooseVersion(dateutil.__version__) > '2.6.1',
+ reason="dateutil api change version")
+is_dateutil_gt_261 = pytest.mark.skipif(
+ LooseVersion(dateutil.__version__) <= '2.6.1',
+ reason="dateutil stable version")
diff --git a/pandas/core/api.py b/pandas/core/api.py
index 2f818a400162b3..1f46aaa40e9eb9 100644
--- a/pandas/core/api.py
+++ b/pandas/core/api.py
@@ -24,8 +24,8 @@
from pandas.core.panel import Panel, WidePanel
from pandas.core.panel4d import Panel4D
from pandas.core.reshape.reshape import (
- pivot_simple as pivot, get_dummies,
- lreshape, wide_to_long)
+ pivot_simple as pivot, get_dummies)
+from pandas.core.reshape.melt import lreshape, wide_to_long
from pandas.core.indexing import IndexSlice
from pandas.core.tools.numeric import to_numeric
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 13ea0eaf649303..645921bb007a16 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -25,7 +25,6 @@
is_timedelta64_dtype,
is_categorical,
is_categorical_dtype,
- is_integer_dtype,
is_list_like, is_sequence,
is_scalar,
is_dict_like)
@@ -261,6 +260,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
# c.) infer from values
if dtype is not None:
+ # The dtype argument takes precedence over values.dtype (if any)
if isinstance(dtype, compat.string_types):
if dtype == 'category':
dtype = CategoricalDtype(categories, ordered)
@@ -275,9 +275,12 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
ordered = dtype.ordered
elif is_categorical(values):
+ # If no "dtype" was passed, use the one from "values", but honor
+ # the "ordered" and "categories" arguments
dtype = values.dtype._from_categorical_dtype(values.dtype,
categories, ordered)
else:
+ # If dtype=None and values is not categorical, create a new dtype
dtype = CategoricalDtype(categories, ordered)
# At this point, dtype is always a CategoricalDtype
@@ -294,28 +297,12 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
# sanitize input
if is_categorical_dtype(values):
+ if dtype.categories is None:
+ dtype = CategoricalDtype(values.categories, dtype.ordered)
- # we are either a Series or a CategoricalIndex
- if isinstance(values, (ABCSeries, ABCCategoricalIndex)):
- values = values._values
-
- if ordered is None:
- ordered = values.ordered
- if categories is None:
- categories = values.categories
- values = values.get_values()
-
- elif isinstance(values, (ABCIndexClass, ABCSeries)):
- # we'll do inference later
- pass
-
- else:
-
- # on numpy < 1.6 datetimelike get inferred to all i8 by
- # _sanitize_array which is fine, but since factorize does this
- # correctly no need here this is an issue because _sanitize_array
- # also coerces np.nan to a string under certain versions of numpy
- # as well
+ elif not isinstance(values, (ABCIndexClass, ABCSeries)):
+ # _sanitize_array coerces np.nan to a string under certain versions
+ # of numpy
values = maybe_infer_to_datetimelike(values, convert_dates=True)
if not isinstance(values, np.ndarray):
values = _convert_to_list_like(values)
@@ -335,7 +322,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
codes, categories = factorize(values, sort=True)
except TypeError:
codes, categories = factorize(values, sort=False)
- if ordered:
+ if dtype.ordered:
# raise, as we don't have a sortable data structure and so
# the user should give us one by specifying categories
raise TypeError("'values' is not ordered, please "
@@ -347,34 +334,18 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
raise NotImplementedError("> 1 ndim Categorical are not "
"supported at this time")
- if dtype.categories is None:
- # we're inferring from values
- dtype = CategoricalDtype(categories, ordered)
+ # we're inferring from values
+ dtype = CategoricalDtype(categories, dtype.ordered)
- else:
- # there were two ways if categories are present
- # - the old one, where each value is a int pointer to the levels
- # array -> not anymore possible, but code outside of pandas could
- # call us like that, so make some checks
- # - the new one, where each value is also in the categories array
- # (or np.nan)
+ elif is_categorical_dtype(values):
+ old_codes = (values.cat.codes if isinstance(values, ABCSeries)
+ else values.codes)
+ codes = _recode_for_categories(old_codes, values.dtype.categories,
+ dtype.categories)
+ else:
codes = _get_codes_for_values(values, dtype.categories)
- # TODO: check for old style usage. These warnings should be removes
- # after 0.18/ in 2016
- if (is_integer_dtype(values) and
- not is_integer_dtype(dtype.categories)):
- warn("Values and categories have different dtypes. Did you "
- "mean to use\n'Categorical.from_codes(codes, "
- "categories)'?", RuntimeWarning, stacklevel=2)
-
- if (len(values) and is_integer_dtype(values) and
- (codes == -1).all()):
- warn("None of the categories were found in values. Did you "
- "mean to use\n'Categorical.from_codes(codes, "
- "categories)'?", RuntimeWarning, stacklevel=2)
-
if null_mask.any():
# Reinsert -1 placeholders for previously removed missing values
full_codes = - np.ones(null_mask.shape, dtype=codes.dtype)
diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py
index 196f4b26795768..f44fa347cb053b 100644
--- a/pandas/core/computation/eval.py
+++ b/pandas/core/computation/eval.py
@@ -3,6 +3,7 @@
"""Top level ``eval`` module.
"""
+import warnings
import tokenize
from pandas.io.formats.printing import pprint_thing
from pandas.core.computation.scope import _ensure_scope
@@ -303,7 +304,8 @@ def eval(expr, parser='pandas', engine=None, truediv=True,
"if there is no assignment")
# assign if needed
- if env.target is not None and parsed_expr.assigner is not None:
+ assigner = parsed_expr.assigner
+ if env.target is not None and assigner is not None:
target_modified = True
# if returning a copy, copy only on the first assignment
@@ -317,22 +319,25 @@ def eval(expr, parser='pandas', engine=None, truediv=True,
# TypeError is most commonly raised (e.g. int, list), but you
# get IndexError if you try to do this assignment on np.ndarray.
+ # we will ignore numpy warnings here; e.g. if trying
+ # to use a non-numeric indexer
try:
- target[parsed_expr.assigner] = ret
+ with warnings.catch_warnings(record=True):
+ target[assigner] = ret
except (TypeError, IndexError):
raise ValueError("Cannot assign expression output to target")
if not resolvers:
- resolvers = ({parsed_expr.assigner: ret},)
+ resolvers = ({assigner: ret},)
else:
# existing resolver needs updated to handle
# case of mutating existing column in copy
for resolver in resolvers:
- if parsed_expr.assigner in resolver:
- resolver[parsed_expr.assigner] = ret
+ if assigner in resolver:
+ resolver[assigner] = ret
break
else:
- resolvers += ({parsed_expr.assigner: ret},)
+ resolvers += ({assigner: ret},)
ret = None
first_expr = False
diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py
index ae956bce113294..23abfa8b3fca14 100644
--- a/pandas/core/computation/expr.py
+++ b/pandas/core/computation/expr.py
@@ -307,7 +307,14 @@ def __init__(self, env, engine, parser, preparser=_preparse):
def visit(self, node, **kwargs):
if isinstance(node, string_types):
clean = self.preparser(node)
- node = ast.fix_missing_locations(ast.parse(clean))
+ try:
+ node = ast.fix_missing_locations(ast.parse(clean))
+ except SyntaxError as e:
+ from keyword import iskeyword
+ if any(iskeyword(x) for x in clean.split()):
+ e.msg = ("Python keyword not valid identifier"
+ " in numexpr query")
+ raise e
method = 'visit_' + node.__class__.__name__
visitor = getattr(self, method)
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index f3b11e52cdd7ad..eae283e9bc00da 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -136,7 +136,7 @@ def trans(x): # noqa
try:
if np.allclose(new_result, result, rtol=0):
return new_result
- except:
+ except Exception:
# comparison of an object dtype with a number type could
# hit here
@@ -151,14 +151,14 @@ def trans(x): # noqa
elif dtype.kind in ['M', 'm'] and result.dtype.kind in ['i', 'f']:
try:
result = result.astype(dtype)
- except:
+ except Exception:
if dtype.tz:
# convert to datetime and change timezone
from pandas import to_datetime
result = to_datetime(result).tz_localize('utc')
result = result.tz_convert(dtype.tz)
- except:
+ except Exception:
pass
return result
@@ -210,7 +210,7 @@ def changeit():
new_result[mask] = om_at
result[:] = new_result
return result, False
- except:
+ except Exception:
pass
# we are forced to change the dtype of the result as the input
@@ -243,7 +243,7 @@ def changeit():
try:
np.place(result, mask, other)
- except:
+ except Exception:
return changeit()
return result, False
@@ -274,14 +274,14 @@ def maybe_promote(dtype, fill_value=np.nan):
if issubclass(dtype.type, np.datetime64):
try:
fill_value = tslib.Timestamp(fill_value).value
- except:
+ except Exception:
# the proper thing to do here would probably be to upcast
# to object (but numpy 1.6.1 doesn't do this properly)
fill_value = iNaT
elif issubclass(dtype.type, np.timedelta64):
try:
fill_value = lib.Timedelta(fill_value).value
- except:
+ except Exception:
# as for datetimes, cannot upcast to object
fill_value = iNaT
else:
@@ -592,12 +592,12 @@ def maybe_convert_scalar(values):
def coerce_indexer_dtype(indexer, categories):
""" coerce the indexer input array to the smallest dtype possible """
- l = len(categories)
- if l < _int8_max:
+ length = len(categories)
+ if length < _int8_max:
return _ensure_int8(indexer)
- elif l < _int16_max:
+ elif length < _int16_max:
return _ensure_int16(indexer)
- elif l < _int32_max:
+ elif length < _int32_max:
return _ensure_int32(indexer)
return _ensure_int64(indexer)
@@ -629,7 +629,7 @@ def conv(r, dtype):
r = float(r)
elif dtype.kind == 'i':
r = int(r)
- except:
+ except Exception:
pass
return r
@@ -756,7 +756,7 @@ def maybe_convert_objects(values, convert_dates=True, convert_numeric=True,
if not isna(new_values).all():
values = new_values
- except:
+ except Exception:
pass
else:
# soft-conversion
@@ -817,7 +817,7 @@ def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True,
# If all NaNs, then do not-alter
values = converted if not isna(converted).all() else values
values = values.copy() if copy else values
- except:
+ except Exception:
pass
return values
@@ -888,10 +888,10 @@ def try_datetime(v):
try:
from pandas import to_datetime
return to_datetime(v)
- except:
+ except Exception:
pass
- except:
+ except Exception:
pass
return v.reshape(shape)
@@ -903,7 +903,7 @@ def try_timedelta(v):
from pandas import to_timedelta
try:
return to_timedelta(v)._values.reshape(shape)
- except:
+ except Exception:
return v.reshape(shape)
inferred_type = lib.infer_datetimelike_array(_ensure_object(v))
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 4e15aa50e43194..23884869a4d9f6 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -571,12 +571,14 @@ def _concat_rangeindex_same_dtype(indexes):
indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6)
indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5])
"""
+ from pandas import Int64Index, RangeIndex
start = step = next = None
- for obj in indexes:
- if not len(obj):
- continue
+ # Filter the empty indexes
+ non_empty_indexes = [obj for obj in indexes if len(obj)]
+
+ for obj in non_empty_indexes:
if start is None:
# This is set by the first non-empty index
@@ -586,21 +588,23 @@ def _concat_rangeindex_same_dtype(indexes):
elif step is None:
# First non-empty index had only one element
if obj._start == start:
- from pandas import Int64Index
return _concat_index_same_dtype(indexes, klass=Int64Index)
step = obj._start - start
non_consecutive = ((step != obj._step and len(obj) > 1) or
(next is not None and obj._start != next))
if non_consecutive:
- from pandas import Int64Index
return _concat_index_same_dtype(indexes, klass=Int64Index)
if step is not None:
next = obj[-1] + step
- if start is None:
- start = obj._start
- step = obj._step
- stop = obj._stop if next is None else next
- return indexes[0].__class__(start, stop, step)
+ if non_empty_indexes:
+ # Get the stop value from "next" or alternatively
+ # from the last non-empty index
+ stop = non_empty_indexes[-1]._stop if next is None else next
+ return RangeIndex(start, stop, step)
+
+ # Here all "indexes" had 0 length, i.e. were empty.
+ # In this case return an empty range index.
+ return RangeIndex(0, 0)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 70f1ff0a5380dc..982b27fd21fb55 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2267,7 +2267,8 @@ def query(self, expr, inplace=False, **kwargs):
by default, which allows you to treat both the index and columns of the
frame as a column in the frame.
The identifier ``index`` is used for the frame index; you can also
- use the name of the index to identify it in a query.
+ use the name of the index to identify it in a query. Please note that
+ Python keywords may not be used as identifiers.
For further details and examples see the ``query`` documentation in
:ref:`indexing `.
@@ -4028,6 +4029,8 @@ def combine(self, other, func, fill_value=None, overwrite=True):
----------
other : DataFrame
func : function
+ Function that takes two series as inputs and return a Series or a
+ scalar
fill_value : scalar value
overwrite : boolean, default True
If True then overwrite values for common keys in the calling frame
@@ -4035,8 +4038,21 @@ def combine(self, other, func, fill_value=None, overwrite=True):
Returns
-------
result : DataFrame
- """
+ Examples
+ --------
+ >>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]})
+ >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]})
+ >>> df1.combine(df2, lambda s1, s2: s1 if s1.sum() < s2.sum() else s2)
+ A B
+ 0 0 3
+ 1 0 3
+
+ See Also
+ --------
+ DataFrame.combine_first : Combine two DataFrame objects and default to
+ non-null values in frame calling the method
+ """
other_idxlen = len(other.index) # save for compare
this, other = self.align(other, copy=False)
@@ -4124,16 +4140,24 @@ def combine_first(self, other):
----------
other : DataFrame
+ Returns
+ -------
+ combined : DataFrame
+
Examples
--------
- a's values prioritized, use values from b to fill holes:
-
- >>> a.combine_first(b)
+ df1's values prioritized, use values from df2 to fill holes:
+ >>> df1 = pd.DataFrame([[1, np.nan]])
+ >>> df2 = pd.DataFrame([[3, 4]])
+ >>> df1.combine_first(df2)
+ 0 1
+ 0 1 4.0
- Returns
- -------
- combined : DataFrame
+ See Also
+ --------
+ DataFrame.combine : Perform series-wise operation on two DataFrames
+ using a given function
"""
import pandas.core.computation.expressions as expressions
@@ -4637,7 +4661,7 @@ def unstack(self, level=-1, fill_value=None):
other='melt'))
def melt(self, id_vars=None, value_vars=None, var_name=None,
value_name='value', col_level=None):
- from pandas.core.reshape.reshape import melt
+ from pandas.core.reshape.melt import melt
return melt(self, id_vars=id_vars, value_vars=value_vars,
var_name=var_name, value_name=value_name,
col_level=col_level)
@@ -5781,7 +5805,12 @@ def idxmin(self, axis=0, skipna=True):
0 or 'index' for row-wise, 1 or 'columns' for column-wise
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
- will be NA
+ will be NA.
+
+ Raises
+ ------
+ ValueError
+ * If the row/column is empty
Returns
-------
@@ -5812,7 +5841,12 @@ def idxmax(self, axis=0, skipna=True):
0 or 'index' for row-wise, 1 or 'columns' for column-wise
skipna : boolean, default True
Exclude NA/null values. If an entire row/column is NA, the result
- will be first index.
+ will be NA.
+
+ Raises
+ ------
+ ValueError
+ * If the row/column is empty
Returns
-------
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index f1edfe276dfad8..8b2a15e6d16668 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5092,14 +5092,15 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True,
Parameters
----------
- by : mapping, function, str, or iterable
+ by : mapping, function, label, or list of labels
Used to determine the groups for the groupby.
If ``by`` is a function, it's called on each value of the object's
index. If a dict or Series is passed, the Series or dict VALUES
will be used to determine the groups (the Series' values are first
aligned; see ``.align()`` method). If an ndarray is passed, the
- values are used as-is determine the groups. A str or list of strs
- may be passed to group by the columns in ``self``
+ values are used as-is determine the groups. A label or list of
+ labels may be passed to group by the columns in ``self``. Notice
+ that a tuple is interpreted a (single) key.
axis : int, default 0
level : int, level name, or sequence of such, default None
If the axis is a MultiIndex (hierarchical), group by a particular
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 8db75accc84e52..7a58b7d358fbb4 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1913,7 +1913,10 @@ def size(self):
"""
ids, _, ngroup = self.group_info
ids = _ensure_platform_int(ids)
- out = np.bincount(ids[ids != -1], minlength=ngroup or None)
+ if ngroup:
+ out = np.bincount(ids[ids != -1], minlength=ngroup)
+ else:
+ out = ids
return Series(out,
index=self.result_index,
dtype='int64')
@@ -2704,7 +2707,6 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
"""
group_axis = obj._get_axis(axis)
- is_axis_multiindex = isinstance(obj._info_axis, MultiIndex)
# validate that the passed single level is compatible with the passed
# axis of the object
@@ -2765,9 +2767,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
elif isinstance(key, BaseGrouper):
return key, [], obj
- # when MultiIndex, allow tuple to be a key
- if not isinstance(key, (tuple, list)) or \
- (isinstance(key, tuple) and is_axis_multiindex):
+ # Everything which is not a list is a key (including tuples):
+ if not isinstance(key, list):
keys = [key]
match_axis_length = False
else:
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 57d2d07294a53c..eb96cbad70099a 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2032,7 +2032,7 @@ def equals(self, other):
try:
return array_equivalent(_values_from_object(self),
_values_from_object(other))
- except:
+ except Exception:
return False
def identical(self, other):
@@ -2315,7 +2315,7 @@ def intersection(self, other):
try:
indexer = Index(other._values).get_indexer(self._values)
indexer = indexer.take((indexer != -1).nonzero()[0])
- except:
+ except Exception:
# duplicates
indexer = algos.unique1d(
Index(other._values).get_indexer_non_unique(self._values)[0])
@@ -3022,13 +3022,13 @@ def _reindex_non_unique(self, target):
new_indexer = None
if len(missing):
- l = np.arange(len(indexer))
+ length = np.arange(len(indexer))
missing = _ensure_platform_int(missing)
missing_labels = target.take(missing)
- missing_indexer = _ensure_int64(l[~check])
+ missing_indexer = _ensure_int64(length[~check])
cur_labels = self.take(indexer[check]).values
- cur_indexer = _ensure_int64(l[check])
+ cur_indexer = _ensure_int64(length[check])
new_labels = np.empty(tuple([len(indexer)]), dtype=object)
new_labels[cur_indexer] = cur_labels
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 78869de318dce9..2e022cb1510085 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -55,8 +55,7 @@
from pandas._libs import (lib, index as libindex, tslib as libts,
algos as libalgos, join as libjoin,
Timestamp, period as libperiod)
-from pandas._libs.tslibs import timezones
-
+from pandas._libs.tslibs import timezones, conversion
# -------- some conversion wrapper functions
@@ -384,8 +383,8 @@ def __new__(cls, data=None,
getattr(data, 'tz', None) is None):
# Convert tz-naive to UTC
ints = subarr.view('i8')
- subarr = libts.tz_localize_to_utc(ints, tz,
- ambiguous=ambiguous)
+ subarr = conversion.tz_localize_to_utc(ints, tz,
+ ambiguous=ambiguous)
subarr = subarr.view(_NS_DTYPE)
subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz)
@@ -449,7 +448,7 @@ def _generate(cls, start, end, periods, name, offset,
try:
inferred_tz = timezones.infer_tzinfo(start, end)
- except:
+ except Exception:
raise TypeError('Start and end cannot both be tz-aware with '
'different timezones')
@@ -531,8 +530,8 @@ def _generate(cls, start, end, periods, name, offset,
index = _generate_regular_range(start, end, periods, offset)
if tz is not None and getattr(index, 'tz', None) is None:
- index = libts.tz_localize_to_utc(_ensure_int64(index), tz,
- ambiguous=ambiguous)
+ index = conversion.tz_localize_to_utc(_ensure_int64(index), tz,
+ ambiguous=ambiguous)
index = index.view(_NS_DTYPE)
# index is localized datetime64 array -> have to convert
@@ -561,11 +560,11 @@ def _convert_for_op(self, value):
def _local_timestamps(self):
if self.is_monotonic:
- return libts.tz_convert(self.asi8, utc, self.tz)
+ return conversion.tz_convert(self.asi8, utc, self.tz)
else:
values = self.asi8
indexer = values.argsort()
- result = libts.tz_convert(values.take(indexer), utc, self.tz)
+ result = conversion.tz_convert(values.take(indexer), utc, self.tz)
n = len(indexer)
reverse = np.empty(n, dtype=np.int_)
@@ -1176,12 +1175,12 @@ def __iter__(self):
# convert in chunks of 10k for efficiency
data = self.asi8
- l = len(self)
+ length = len(self)
chunksize = 10000
- chunks = int(l / chunksize) + 1
+ chunks = int(length / chunksize) + 1
for i in range(chunks):
start_i = i * chunksize
- end_i = min((i + 1) * chunksize, l)
+ end_i = min((i + 1) * chunksize, length)
converted = libts.ints_to_pydatetime(data[start_i:end_i],
tz=self.tz, freq=self.freq,
box=True)
@@ -1644,7 +1643,7 @@ def normalize(self):
-------
normalized : DatetimeIndex
"""
- new_values = libts.date_normalize(self.asi8, self.tz)
+ new_values = conversion.date_normalize(self.asi8, self.tz)
return DatetimeIndex(new_values, freq='infer', name=self.name,
tz=self.tz)
@@ -1683,7 +1682,7 @@ def is_normalized(self):
"""
Returns True if all of the dates are at midnight ("no time")
"""
- return libts.dates_normalized(self.asi8, self.tz)
+ return conversion.is_date_array_normalized(self.asi8, self.tz)
@cache_readonly
def _resolution(self):
@@ -1724,7 +1723,7 @@ def insert(self, loc, item):
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
self[loc:].asi8))
if self.tz is not None:
- new_dates = libts.tz_convert(new_dates, 'UTC', self.tz)
+ new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz)
return DatetimeIndex(new_dates, name=self.name, freq=freq,
tz=self.tz)
@@ -1764,7 +1763,7 @@ def delete(self, loc):
freq = self.freq
if self.tz is not None:
- new_dates = libts.tz_convert(new_dates, 'UTC', self.tz)
+ new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz)
return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz)
def tz_convert(self, tz):
@@ -1844,16 +1843,16 @@ def tz_localize(self, tz, ambiguous='raise', errors='raise'):
"""
if self.tz is not None:
if tz is None:
- new_dates = libts.tz_convert(self.asi8, 'UTC', self.tz)
+ new_dates = conversion.tz_convert(self.asi8, 'UTC', self.tz)
else:
raise TypeError("Already tz-aware, use tz_convert to convert.")
else:
tz = timezones.maybe_get_tz(tz)
# Convert to UTC
- new_dates = libts.tz_localize_to_utc(self.asi8, tz,
- ambiguous=ambiguous,
- errors=errors)
+ new_dates = conversion.tz_localize_to_utc(self.asi8, tz,
+ ambiguous=ambiguous,
+ errors=errors)
new_dates = new_dates.view(_NS_DTYPE)
return self._shallow_copy(new_dates, tz=tz)
@@ -2194,7 +2193,7 @@ def _to_m8(key, tz=None):
# this also converts strings
key = Timestamp(key, tz=tz)
- return np.int64(libts.pydt_to_i8(key)).view(_NS_DTYPE)
+ return np.int64(conversion.pydt_to_i8(key)).view(_NS_DTYPE)
_CACHE_START = Timestamp(datetime(1950, 1, 1))
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 4cc59f52970589..e6294f7d47aff2 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -2,7 +2,6 @@
# pylint: disable=E1101,E1103,W0232
import datetime
import warnings
-from functools import partial
from sys import getsizeof
import numpy as np
@@ -28,8 +27,7 @@
is_true_slices)
import pandas.core.base as base
-from pandas.util._decorators import (Appender, cache_readonly,
- deprecate, deprecate_kwarg)
+from pandas.util._decorators import Appender, cache_readonly, deprecate_kwarg
import pandas.core.common as com
import pandas.core.missing as missing
import pandas.core.algorithms as algos
@@ -177,7 +175,8 @@ def _verify_integrity(self, labels=None, levels=None):
" inconsistent state" % (i, label.max(),
len(level)))
- def _get_levels(self):
+ @property
+ def levels(self):
return self._levels
def _set_levels(self, levels, level=None, copy=False, validate=True,
@@ -279,14 +278,8 @@ def set_levels(self, levels, level=None, inplace=False,
if not inplace:
return idx
- # remove me in 0.14 and change to read only property
- __set_levels = deprecate("setting `levels` directly",
- partial(set_levels, inplace=True,
- verify_integrity=True),
- alt_name="set_levels")
- levels = property(fget=_get_levels, fset=__set_levels)
-
- def _get_labels(self):
+ @property
+ def labels(self):
return self._labels
def _set_labels(self, labels, level=None, copy=False, validate=True,
@@ -379,13 +372,6 @@ def set_labels(self, labels, level=None, inplace=False,
if not inplace:
return idx
- # remove me in 0.14 and change to readonly property
- __set_labels = deprecate("setting labels directly",
- partial(set_labels, inplace=True,
- verify_integrity=True),
- alt_name="set_labels")
- labels = property(fget=_get_labels, fset=__set_labels)
-
def copy(self, names=None, dtype=None, levels=None, labels=None,
deep=False, _set_identity=False, **kwargs):
"""
@@ -446,6 +432,17 @@ def _shallow_copy_with_infer(self, values=None, **kwargs):
**kwargs)
return self._shallow_copy(values, **kwargs)
+ @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs)
+ def __contains__(self, key):
+ hash(key)
+ try:
+ self.get_loc(key)
+ return True
+ except (LookupError, TypeError):
+ return False
+
+ contains = __contains__
+
@Appender(_index_shared_docs['_shallow_copy'])
def _shallow_copy(self, values=None, **kwargs):
if values is not None:
@@ -809,9 +806,10 @@ def duplicated(self, keep='first'):
return duplicated_int64(ids, keep)
- @Appender(ibase._index_shared_docs['fillna'])
def fillna(self, value=None, downcast=None):
- # isna is not implemented for MultiIndex
+ """
+ fillna is not implemented for MultiIndex
+ """
raise NotImplementedError('isna is not defined for MultiIndex')
@Appender(_index_shared_docs['dropna'])
@@ -1370,17 +1368,6 @@ def nlevels(self):
def levshape(self):
return tuple(len(x) for x in self.levels)
- @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs)
- def __contains__(self, key):
- hash(key)
- try:
- self.get_loc(key)
- return True
- except LookupError:
- return False
-
- contains = __contains__
-
def __reduce__(self):
"""Necessary for making this object picklable"""
d = dict(levels=[lev for lev in self.levels],
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index a6d5690767c10b..df242e657c9d7b 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -36,6 +36,7 @@
get_period_field_arr, _validate_end_alias,
_quarter_to_myear)
from pandas._libs.tslibs.fields import isleapyear_arr
+from pandas._libs.tslibs import resolution
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds
from pandas.core.base import _shared_docs
@@ -752,8 +753,8 @@ def get_value(self, series, key):
except (KeyError, IndexError):
try:
asdt, parsed, reso = parse_time_string(key, self.freq)
- grp = frequencies.Resolution.get_freq_group(reso)
- freqn = frequencies.get_freq_group(self.freq)
+ grp = resolution.Resolution.get_freq_group(reso)
+ freqn = resolution.get_freq_group(self.freq)
vals = self._values
@@ -912,8 +913,8 @@ def _get_string_slice(self, key):
'ordered time series')
key, parsed, reso = parse_time_string(key, self.freq)
- grp = frequencies.Resolution.get_freq_group(reso)
- freqn = frequencies.get_freq_group(self.freq)
+ grp = resolution.Resolution.get_freq_group(reso)
+ freqn = resolution.get_freq_group(self.freq)
if reso in ['day', 'hour', 'minute', 'second'] and not grp < freqn:
raise KeyError(key)
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index a4a5f7df9aa0f8..e4bc46fb7bdbed 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -35,20 +35,15 @@
from pandas._libs import (lib, index as libindex, tslib as libts,
join as libjoin, Timedelta, NaT, iNaT)
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
+from pandas._libs.tslibs.fields import get_timedelta_field
def _field_accessor(name, alias, docstring=None):
def f(self):
+ values = self.asi8
+ result = get_timedelta_field(values, alias)
if self.hasnans:
- result = np.empty(len(self), dtype='float64')
- mask = self._isnan
- imask = ~mask
- result.flat[imask] = np.array([getattr(Timedelta(val), alias)
- for val in self.asi8[imask]])
- result[mask] = np.nan
- else:
- result = np.array([getattr(Timedelta(val), alias)
- for val in self.asi8], dtype='int64')
+ result = self._maybe_mask_results(result, convert='float64')
return Index(result, name=self.name)
@@ -841,7 +836,7 @@ def insert(self, loc, item):
if _is_convertible_to_td(item):
try:
item = Timedelta(item)
- except:
+ except Exception:
pass
freq = None
diff --git a/pandas/core/reshape/api.py b/pandas/core/reshape/api.py
index c75e0341918bb6..99286d807a2053 100644
--- a/pandas/core/reshape/api.py
+++ b/pandas/core/reshape/api.py
@@ -1,7 +1,7 @@
# flake8: noqa
from pandas.core.reshape.concat import concat
-from pandas.core.reshape.reshape import melt
+from pandas.core.reshape.melt import melt
from pandas.core.reshape.merge import (
merge, ordered_merge, merge_ordered, merge_asof)
from pandas.core.reshape.pivot import pivot_table, crosstab
diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py
new file mode 100644
index 00000000000000..846d04221fe7f6
--- /dev/null
+++ b/pandas/core/reshape/melt.py
@@ -0,0 +1,386 @@
+# pylint: disable=E1101,E1103
+# pylint: disable=W0703,W0622,W0613,W0201
+import numpy as np
+
+from pandas.core.dtypes.common import is_list_like
+from pandas import compat
+from pandas.core.categorical import Categorical
+
+from pandas.core.frame import DataFrame
+from pandas.core.index import MultiIndex
+
+from pandas.core.frame import _shared_docs
+from pandas.util._decorators import Appender
+
+import re
+import pandas.core.dtypes.concat as _concat
+from pandas.core.dtypes.missing import notna
+
+
+@Appender(_shared_docs['melt'] %
+ dict(caller='pd.melt(df, ',
+ versionadded="",
+ other='DataFrame.melt'))
+def melt(frame, id_vars=None, value_vars=None, var_name=None,
+ value_name='value', col_level=None):
+ # TODO: what about the existing index?
+ if id_vars is not None:
+ if not is_list_like(id_vars):
+ id_vars = [id_vars]
+ elif (isinstance(frame.columns, MultiIndex) and
+ not isinstance(id_vars, list)):
+ raise ValueError('id_vars must be a list of tuples when columns'
+ ' are a MultiIndex')
+ else:
+ id_vars = list(id_vars)
+ else:
+ id_vars = []
+
+ if value_vars is not None:
+ if not is_list_like(value_vars):
+ value_vars = [value_vars]
+ elif (isinstance(frame.columns, MultiIndex) and
+ not isinstance(value_vars, list)):
+ raise ValueError('value_vars must be a list of tuples when'
+ ' columns are a MultiIndex')
+ else:
+ value_vars = list(value_vars)
+ frame = frame.loc[:, id_vars + value_vars]
+ else:
+ frame = frame.copy()
+
+ if col_level is not None: # allow list or other?
+ # frame is a copy
+ frame.columns = frame.columns.get_level_values(col_level)
+
+ if var_name is None:
+ if isinstance(frame.columns, MultiIndex):
+ if len(frame.columns.names) == len(set(frame.columns.names)):
+ var_name = frame.columns.names
+ else:
+ var_name = ['variable_{i}'.format(i=i)
+ for i in range(len(frame.columns.names))]
+ else:
+ var_name = [frame.columns.name if frame.columns.name is not None
+ else 'variable']
+ if isinstance(var_name, compat.string_types):
+ var_name = [var_name]
+
+ N, K = frame.shape
+ K -= len(id_vars)
+
+ mdata = {}
+ for col in id_vars:
+ mdata[col] = np.tile(frame.pop(col).values, K)
+
+ mcolumns = id_vars + var_name + [value_name]
+
+ mdata[value_name] = frame.values.ravel('F')
+ for i, col in enumerate(var_name):
+ # asanyarray will keep the columns as an Index
+ mdata[col] = np.asanyarray(frame.columns
+ ._get_level_values(i)).repeat(N)
+
+ return DataFrame(mdata, columns=mcolumns)
+
+
+def lreshape(data, groups, dropna=True, label=None):
+ """
+ Reshape long-format data to wide. Generalized inverse of DataFrame.pivot
+
+ Parameters
+ ----------
+ data : DataFrame
+ groups : dict
+ {new_name : list_of_columns}
+ dropna : boolean, default True
+
+ Examples
+ --------
+ >>> import pandas as pd
+ >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526],
+ ... 'team': ['Red Sox', 'Yankees'],
+ ... 'year1': [2007, 2007], 'year2': [2008, 2008]})
+ >>> data
+ hr1 hr2 team year1 year2
+ 0 514 545 Red Sox 2007 2008
+ 1 573 526 Yankees 2007 2008
+
+ >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']})
+ team year hr
+ 0 Red Sox 2007 514
+ 1 Yankees 2007 573
+ 2 Red Sox 2008 545
+ 3 Yankees 2008 526
+
+ Returns
+ -------
+ reshaped : DataFrame
+ """
+ if isinstance(groups, dict):
+ keys = list(groups.keys())
+ values = list(groups.values())
+ else:
+ keys, values = zip(*groups)
+
+ all_cols = list(set.union(*[set(x) for x in values]))
+ id_cols = list(data.columns.difference(all_cols))
+
+ K = len(values[0])
+
+ for seq in values:
+ if len(seq) != K:
+ raise ValueError('All column lists must be same length')
+
+ mdata = {}
+ pivot_cols = []
+
+ for target, names in zip(keys, values):
+ to_concat = [data[col].values for col in names]
+ mdata[target] = _concat._concat_compat(to_concat)
+ pivot_cols.append(target)
+
+ for col in id_cols:
+ mdata[col] = np.tile(data[col].values, K)
+
+ if dropna:
+ mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool)
+ for c in pivot_cols:
+ mask &= notna(mdata[c])
+ if not mask.all():
+ mdata = dict((k, v[mask]) for k, v in compat.iteritems(mdata))
+
+ return DataFrame(mdata, columns=id_cols + pivot_cols)
+
+
+def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'):
+ r"""
+ Wide panel to long format. Less flexible but more user-friendly than melt.
+
+ With stubnames ['A', 'B'], this function expects to find one or more
+ group of columns with format Asuffix1, Asuffix2,..., Bsuffix1, Bsuffix2,...
+ You specify what you want to call this suffix in the resulting long format
+ with `j` (for example `j='year'`)
+
+ Each row of these wide variables are assumed to be uniquely identified by
+ `i` (can be a single column name or a list of column names)
+
+ All remaining variables in the data frame are left intact.
+
+ Parameters
+ ----------
+ df : DataFrame
+ The wide-format DataFrame
+ stubnames : str or list-like
+ The stub name(s). The wide format variables are assumed to
+ start with the stub names.
+ i : str or list-like
+ Column(s) to use as id variable(s)
+ j : str
+ The name of the subobservation variable. What you wish to name your
+ suffix in the long format.
+ sep : str, default ""
+ A character indicating the separation of the variable names
+ in the wide format, to be stripped from the names in the long format.
+ For example, if your column names are A-suffix1, A-suffix2, you
+ can strip the hypen by specifying `sep='-'`
+
+ .. versionadded:: 0.20.0
+
+ suffix : str, default '\\d+'
+ A regular expression capturing the wanted suffixes. '\\d+' captures
+ numeric suffixes. Suffixes with no numbers could be specified with the
+ negated character class '\\D+'. You can also further disambiguate
+ suffixes, for example, if your wide variables are of the form
+ Aone, Btwo,.., and you have an unrelated column Arating, you can
+ ignore the last one by specifying `suffix='(!?one|two)'`
+
+ .. versionadded:: 0.20.0
+
+ Returns
+ -------
+ DataFrame
+ A DataFrame that contains each stub name as a variable, with new index
+ (i, j)
+
+ Examples
+ --------
+ >>> import pandas as pd
+ >>> import numpy as np
+ >>> np.random.seed(123)
+ >>> df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"},
+ ... "A1980" : {0 : "d", 1 : "e", 2 : "f"},
+ ... "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7},
+ ... "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1},
+ ... "X" : dict(zip(range(3), np.random.randn(3)))
+ ... })
+ >>> df["id"] = df.index
+ >>> df
+ A1970 A1980 B1970 B1980 X id
+ 0 a d 2.5 3.2 -1.085631 0
+ 1 b e 1.2 1.3 0.997345 1
+ 2 c f 0.7 0.1 0.282978 2
+ >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year")
+ ... # doctest: +NORMALIZE_WHITESPACE
+ X A B
+ id year
+ 0 1970 -1.085631 a 2.5
+ 1 1970 0.997345 b 1.2
+ 2 1970 0.282978 c 0.7
+ 0 1980 -1.085631 d 3.2
+ 1 1980 0.997345 e 1.3
+ 2 1980 0.282978 f 0.1
+
+ With multuple id columns
+
+ >>> df = pd.DataFrame({
+ ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
+ ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
+ ... 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
+ ... 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
+ ... })
+ >>> df
+ birth famid ht1 ht2
+ 0 1 1 2.8 3.4
+ 1 2 1 2.9 3.8
+ 2 3 1 2.2 2.9
+ 3 1 2 2.0 3.2
+ 4 2 2 1.8 2.8
+ 5 3 2 1.9 2.4
+ 6 1 3 2.2 3.3
+ 7 2 3 2.3 3.4
+ 8 3 3 2.1 2.9
+ >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age')
+ >>> l
+ ... # doctest: +NORMALIZE_WHITESPACE
+ ht
+ famid birth age
+ 1 1 1 2.8
+ 2 3.4
+ 2 1 2.9
+ 2 3.8
+ 3 1 2.2
+ 2 2.9
+ 2 1 1 2.0
+ 2 3.2
+ 2 1 1.8
+ 2 2.8
+ 3 1 1.9
+ 2 2.4
+ 3 1 1 2.2
+ 2 3.3
+ 2 1 2.3
+ 2 3.4
+ 3 1 2.1
+ 2 2.9
+
+ Going from long back to wide just takes some creative use of `unstack`
+
+ >>> w = l.reset_index().set_index(['famid', 'birth', 'age']).unstack()
+ >>> w.columns = pd.Index(w.columns).str.join('')
+ >>> w.reset_index()
+ famid birth ht1 ht2
+ 0 1 1 2.8 3.4
+ 1 1 2 2.9 3.8
+ 2 1 3 2.2 2.9
+ 3 2 1 2.0 3.2
+ 4 2 2 1.8 2.8
+ 5 2 3 1.9 2.4
+ 6 3 1 2.2 3.3
+ 7 3 2 2.3 3.4
+ 8 3 3 2.1 2.9
+
+ Less wieldy column names are also handled
+
+ >>> np.random.seed(0)
+ >>> df = pd.DataFrame({'A(quarterly)-2010': np.random.rand(3),
+ ... 'A(quarterly)-2011': np.random.rand(3),
+ ... 'B(quarterly)-2010': np.random.rand(3),
+ ... 'B(quarterly)-2011': np.random.rand(3),
+ ... 'X' : np.random.randint(3, size=3)})
+ >>> df['id'] = df.index
+ >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
+ A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 ...
+ 0 0.548814 0.544883 0.437587 ...
+ 1 0.715189 0.423655 0.891773 ...
+ 2 0.602763 0.645894 0.963663 ...
+ X id
+ 0 0 0
+ 1 1 1
+ 2 1 2
+
+ >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id',
+ ... j='year', sep='-')
+ ... # doctest: +NORMALIZE_WHITESPACE
+ X A(quarterly) B(quarterly)
+ id year
+ 0 2010 0 0.548814 0.437587
+ 1 2010 1 0.715189 0.891773
+ 2 2010 1 0.602763 0.963663
+ 0 2011 0 0.544883 0.383442
+ 1 2011 1 0.423655 0.791725
+ 2 2011 1 0.645894 0.528895
+
+ If we have many columns, we could also use a regex to find our
+ stubnames and pass that list on to wide_to_long
+
+ >>> stubnames = sorted(
+ ... set([match[0] for match in df.columns.str.findall(
+ ... r'[A-B]\(.*\)').values if match != [] ])
+ ... )
+ >>> list(stubnames)
+ ['A(quarterly)', 'B(quarterly)']
+
+ Notes
+ -----
+ All extra variables are left untouched. This simply uses
+ `pandas.melt` under the hood, but is hard-coded to "do the right thing"
+ in a typicaly case.
+ """
+ def get_var_names(df, stub, sep, suffix):
+ regex = "^{stub}{sep}{suffix}".format(
+ stub=re.escape(stub), sep=re.escape(sep), suffix=suffix)
+ return df.filter(regex=regex).columns.tolist()
+
+ def melt_stub(df, stub, i, j, value_vars, sep):
+ newdf = melt(df, id_vars=i, value_vars=value_vars,
+ value_name=stub.rstrip(sep), var_name=j)
+ newdf[j] = Categorical(newdf[j])
+ newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "")
+
+ return newdf.set_index(i + [j])
+
+ if any(map(lambda s: s in df.columns.tolist(), stubnames)):
+ raise ValueError("stubname can't be identical to a column name")
+
+ if not is_list_like(stubnames):
+ stubnames = [stubnames]
+ else:
+ stubnames = list(stubnames)
+
+ if not is_list_like(i):
+ i = [i]
+ else:
+ i = list(i)
+
+ if df[i].duplicated().any():
+ raise ValueError("the id variables need to uniquely identify each row")
+
+ value_vars = list(map(lambda stub:
+ get_var_names(df, stub, sep, suffix), stubnames))
+
+ value_vars_flattened = [e for sublist in value_vars for e in sublist]
+ id_vars = list(set(df.columns.tolist()).difference(value_vars_flattened))
+
+ melted = []
+ for s, v in zip(stubnames, value_vars):
+ melted.append(melt_stub(df, s, i, j, v, sep))
+ melted = melted[0].join(melted[1:], how='outer')
+
+ if len(i) == 1:
+ new = df[id_vars].set_index(i).join(melted)
+ return new
+
+ new = df[id_vars].merge(melted.reset_index(), on=i).set_index(i + [j])
+
+ return new
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 0234a5563326c5..412c00dc95ec00 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -126,7 +126,7 @@ def _groupby_and_merge(by, on, left, right, _merge_pieces,
try:
if k in merged:
merged[k] = key
- except:
+ except KeyError:
pass
pieces.append(merged)
@@ -1268,8 +1268,10 @@ def _get_merge_keys(self):
else:
lt = left_join_keys[-1]
- msg = "incompatible tolerance, must be compat " \
- "with type {lt}".format(lt=type(lt))
+ msg = ("incompatible tolerance {tolerance}, must be compat "
+ "with type {lkdtype}".format(
+ tolerance=type(self.tolerance),
+ lkdtype=lt.dtype))
if is_datetime64_dtype(lt) or is_datetime64tz_dtype(lt):
if not isinstance(self.tolerance, Timedelta):
@@ -1505,12 +1507,12 @@ def _sort_labels(uniques, left, right):
# tuplesafe
uniques = Index(uniques).values
- l = len(left)
+ llength = len(left)
labels = np.concatenate([left, right])
_, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
new_labels = _ensure_int64(new_labels)
- new_left, new_right = new_labels[:l], new_labels[l:]
+ new_left, new_right = new_labels[:llength], new_labels[llength:]
return new_left, new_right
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index b8885820f4a49d..96738afbca9e3c 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -4,7 +4,6 @@
from pandas import compat
from functools import partial
import itertools
-import re
import numpy as np
@@ -14,7 +13,6 @@
needs_i8_conversion, is_sparse)
from pandas.core.dtypes.cast import maybe_promote
from pandas.core.dtypes.missing import notna
-import pandas.core.dtypes.concat as _concat
from pandas.core.series import Series
from pandas.core.frame import DataFrame
@@ -30,8 +28,6 @@
import pandas.core.algorithms as algos
from pandas._libs import algos as _algos, reshape as _reshape
-from pandas.core.frame import _shared_docs
-from pandas.util._decorators import Appender
from pandas.core.index import Index, MultiIndex, _get_na_value
@@ -700,375 +696,6 @@ def _convert_level_number(level_num, columns):
return result
-@Appender(_shared_docs['melt'] %
- dict(caller='pd.melt(df, ',
- versionadded="",
- other='DataFrame.melt'))
-def melt(frame, id_vars=None, value_vars=None, var_name=None,
- value_name='value', col_level=None):
- # TODO: what about the existing index?
- if id_vars is not None:
- if not is_list_like(id_vars):
- id_vars = [id_vars]
- elif (isinstance(frame.columns, MultiIndex) and
- not isinstance(id_vars, list)):
- raise ValueError('id_vars must be a list of tuples when columns'
- ' are a MultiIndex')
- else:
- id_vars = list(id_vars)
- else:
- id_vars = []
-
- if value_vars is not None:
- if not is_list_like(value_vars):
- value_vars = [value_vars]
- elif (isinstance(frame.columns, MultiIndex) and
- not isinstance(value_vars, list)):
- raise ValueError('value_vars must be a list of tuples when'
- ' columns are a MultiIndex')
- else:
- value_vars = list(value_vars)
- frame = frame.loc[:, id_vars + value_vars]
- else:
- frame = frame.copy()
-
- if col_level is not None: # allow list or other?
- # frame is a copy
- frame.columns = frame.columns.get_level_values(col_level)
-
- if var_name is None:
- if isinstance(frame.columns, MultiIndex):
- if len(frame.columns.names) == len(set(frame.columns.names)):
- var_name = frame.columns.names
- else:
- var_name = ['variable_{i}'.format(i=i)
- for i in range(len(frame.columns.names))]
- else:
- var_name = [frame.columns.name if frame.columns.name is not None
- else 'variable']
- if isinstance(var_name, compat.string_types):
- var_name = [var_name]
-
- N, K = frame.shape
- K -= len(id_vars)
-
- mdata = {}
- for col in id_vars:
- mdata[col] = np.tile(frame.pop(col).values, K)
-
- mcolumns = id_vars + var_name + [value_name]
-
- mdata[value_name] = frame.values.ravel('F')
- for i, col in enumerate(var_name):
- # asanyarray will keep the columns as an Index
- mdata[col] = np.asanyarray(frame.columns
- ._get_level_values(i)).repeat(N)
-
- return DataFrame(mdata, columns=mcolumns)
-
-
-def lreshape(data, groups, dropna=True, label=None):
- """
- Reshape long-format data to wide. Generalized inverse of DataFrame.pivot
-
- Parameters
- ----------
- data : DataFrame
- groups : dict
- {new_name : list_of_columns}
- dropna : boolean, default True
-
- Examples
- --------
- >>> import pandas as pd
- >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526],
- ... 'team': ['Red Sox', 'Yankees'],
- ... 'year1': [2007, 2007], 'year2': [2008, 2008]})
- >>> data
- hr1 hr2 team year1 year2
- 0 514 545 Red Sox 2007 2008
- 1 573 526 Yankees 2007 2008
-
- >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']})
- team year hr
- 0 Red Sox 2007 514
- 1 Yankees 2007 573
- 2 Red Sox 2008 545
- 3 Yankees 2008 526
-
- Returns
- -------
- reshaped : DataFrame
- """
- if isinstance(groups, dict):
- keys = list(groups.keys())
- values = list(groups.values())
- else:
- keys, values = zip(*groups)
-
- all_cols = list(set.union(*[set(x) for x in values]))
- id_cols = list(data.columns.difference(all_cols))
-
- K = len(values[0])
-
- for seq in values:
- if len(seq) != K:
- raise ValueError('All column lists must be same length')
-
- mdata = {}
- pivot_cols = []
-
- for target, names in zip(keys, values):
- to_concat = [data[col].values for col in names]
- mdata[target] = _concat._concat_compat(to_concat)
- pivot_cols.append(target)
-
- for col in id_cols:
- mdata[col] = np.tile(data[col].values, K)
-
- if dropna:
- mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool)
- for c in pivot_cols:
- mask &= notna(mdata[c])
- if not mask.all():
- mdata = dict((k, v[mask]) for k, v in compat.iteritems(mdata))
-
- return DataFrame(mdata, columns=id_cols + pivot_cols)
-
-
-def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'):
- r"""
- Wide panel to long format. Less flexible but more user-friendly than melt.
-
- With stubnames ['A', 'B'], this function expects to find one or more
- group of columns with format Asuffix1, Asuffix2,..., Bsuffix1, Bsuffix2,...
- You specify what you want to call this suffix in the resulting long format
- with `j` (for example `j='year'`)
-
- Each row of these wide variables are assumed to be uniquely identified by
- `i` (can be a single column name or a list of column names)
-
- All remaining variables in the data frame are left intact.
-
- Parameters
- ----------
- df : DataFrame
- The wide-format DataFrame
- stubnames : str or list-like
- The stub name(s). The wide format variables are assumed to
- start with the stub names.
- i : str or list-like
- Column(s) to use as id variable(s)
- j : str
- The name of the subobservation variable. What you wish to name your
- suffix in the long format.
- sep : str, default ""
- A character indicating the separation of the variable names
- in the wide format, to be stripped from the names in the long format.
- For example, if your column names are A-suffix1, A-suffix2, you
- can strip the hypen by specifying `sep='-'`
-
- .. versionadded:: 0.20.0
-
- suffix : str, default '\\d+'
- A regular expression capturing the wanted suffixes. '\\d+' captures
- numeric suffixes. Suffixes with no numbers could be specified with the
- negated character class '\\D+'. You can also further disambiguate
- suffixes, for example, if your wide variables are of the form
- Aone, Btwo,.., and you have an unrelated column Arating, you can
- ignore the last one by specifying `suffix='(!?one|two)'`
-
- .. versionadded:: 0.20.0
-
- Returns
- -------
- DataFrame
- A DataFrame that contains each stub name as a variable, with new index
- (i, j)
-
- Examples
- --------
- >>> import pandas as pd
- >>> import numpy as np
- >>> np.random.seed(123)
- >>> df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"},
- ... "A1980" : {0 : "d", 1 : "e", 2 : "f"},
- ... "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7},
- ... "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1},
- ... "X" : dict(zip(range(3), np.random.randn(3)))
- ... })
- >>> df["id"] = df.index
- >>> df
- A1970 A1980 B1970 B1980 X id
- 0 a d 2.5 3.2 -1.085631 0
- 1 b e 1.2 1.3 0.997345 1
- 2 c f 0.7 0.1 0.282978 2
- >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year")
- ... # doctest: +NORMALIZE_WHITESPACE
- X A B
- id year
- 0 1970 -1.085631 a 2.5
- 1 1970 0.997345 b 1.2
- 2 1970 0.282978 c 0.7
- 0 1980 -1.085631 d 3.2
- 1 1980 0.997345 e 1.3
- 2 1980 0.282978 f 0.1
-
- With multuple id columns
-
- >>> df = pd.DataFrame({
- ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
- ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
- ... 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
- ... 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
- ... })
- >>> df
- birth famid ht1 ht2
- 0 1 1 2.8 3.4
- 1 2 1 2.9 3.8
- 2 3 1 2.2 2.9
- 3 1 2 2.0 3.2
- 4 2 2 1.8 2.8
- 5 3 2 1.9 2.4
- 6 1 3 2.2 3.3
- 7 2 3 2.3 3.4
- 8 3 3 2.1 2.9
- >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age')
- >>> l
- ... # doctest: +NORMALIZE_WHITESPACE
- ht
- famid birth age
- 1 1 1 2.8
- 2 3.4
- 2 1 2.9
- 2 3.8
- 3 1 2.2
- 2 2.9
- 2 1 1 2.0
- 2 3.2
- 2 1 1.8
- 2 2.8
- 3 1 1.9
- 2 2.4
- 3 1 1 2.2
- 2 3.3
- 2 1 2.3
- 2 3.4
- 3 1 2.1
- 2 2.9
-
- Going from long back to wide just takes some creative use of `unstack`
-
- >>> w = l.reset_index().set_index(['famid', 'birth', 'age']).unstack()
- >>> w.columns = pd.Index(w.columns).str.join('')
- >>> w.reset_index()
- famid birth ht1 ht2
- 0 1 1 2.8 3.4
- 1 1 2 2.9 3.8
- 2 1 3 2.2 2.9
- 3 2 1 2.0 3.2
- 4 2 2 1.8 2.8
- 5 2 3 1.9 2.4
- 6 3 1 2.2 3.3
- 7 3 2 2.3 3.4
- 8 3 3 2.1 2.9
-
- Less wieldy column names are also handled
-
- >>> np.random.seed(0)
- >>> df = pd.DataFrame({'A(quarterly)-2010': np.random.rand(3),
- ... 'A(quarterly)-2011': np.random.rand(3),
- ... 'B(quarterly)-2010': np.random.rand(3),
- ... 'B(quarterly)-2011': np.random.rand(3),
- ... 'X' : np.random.randint(3, size=3)})
- >>> df['id'] = df.index
- >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
- A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 ...
- 0 0.548814 0.544883 0.437587 ...
- 1 0.715189 0.423655 0.891773 ...
- 2 0.602763 0.645894 0.963663 ...
- X id
- 0 0 0
- 1 1 1
- 2 1 2
-
- >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id',
- ... j='year', sep='-')
- ... # doctest: +NORMALIZE_WHITESPACE
- X A(quarterly) B(quarterly)
- id year
- 0 2010 0 0.548814 0.437587
- 1 2010 1 0.715189 0.891773
- 2 2010 1 0.602763 0.963663
- 0 2011 0 0.544883 0.383442
- 1 2011 1 0.423655 0.791725
- 2 2011 1 0.645894 0.528895
-
- If we have many columns, we could also use a regex to find our
- stubnames and pass that list on to wide_to_long
-
- >>> stubnames = sorted(
- ... set([match[0] for match in df.columns.str.findall(
- ... r'[A-B]\(.*\)').values if match != [] ])
- ... )
- >>> list(stubnames)
- ['A(quarterly)', 'B(quarterly)']
-
- Notes
- -----
- All extra variables are left untouched. This simply uses
- `pandas.melt` under the hood, but is hard-coded to "do the right thing"
- in a typicaly case.
- """
- def get_var_names(df, stub, sep, suffix):
- regex = "^{stub}{sep}{suffix}".format(
- stub=re.escape(stub), sep=re.escape(sep), suffix=suffix)
- return df.filter(regex=regex).columns.tolist()
-
- def melt_stub(df, stub, i, j, value_vars, sep):
- newdf = melt(df, id_vars=i, value_vars=value_vars,
- value_name=stub.rstrip(sep), var_name=j)
- newdf[j] = Categorical(newdf[j])
- newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "")
-
- return newdf.set_index(i + [j])
-
- if any(map(lambda s: s in df.columns.tolist(), stubnames)):
- raise ValueError("stubname can't be identical to a column name")
-
- if not is_list_like(stubnames):
- stubnames = [stubnames]
- else:
- stubnames = list(stubnames)
-
- if not is_list_like(i):
- i = [i]
- else:
- i = list(i)
-
- if df[i].duplicated().any():
- raise ValueError("the id variables need to uniquely identify each row")
-
- value_vars = list(map(lambda stub:
- get_var_names(df, stub, sep, suffix), stubnames))
-
- value_vars_flattened = [e for sublist in value_vars for e in sublist]
- id_vars = list(set(df.columns.tolist()).difference(value_vars_flattened))
-
- melted = []
- for s, v in zip(stubnames, value_vars):
- melted.append(melt_stub(df, s, i, j, v, sep))
- melted = melted[0].join(melted[1:], how='outer')
-
- if len(i) == 1:
- new = df[id_vars].set_index(i).join(melted)
- return new
-
- new = df[id_vars].merge(melted.reset_index(), on=i).set_index(i + [j])
-
- return new
-
-
def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
columns=None, sparse=False, drop_first=False):
"""
diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
index fda339aa304612..2adf17a227a598 100644
--- a/pandas/core/reshape/tile.py
+++ b/pandas/core/reshape/tile.py
@@ -148,7 +148,7 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
Parameters
----------
- x : ndarray or Series
+ x : 1d ndarray or Series
q : integer or array of quantiles
Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately
array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 1c92c4b8850ee4..dd86e51ee8154c 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -597,7 +597,7 @@ def _ixs(self, i, axis=0):
return values[i]
except IndexError:
raise
- except:
+ except Exception:
if isinstance(i, slice):
indexer = self.index._convert_slice_indexer(i, kind='iloc')
return self._get_values(indexer)
@@ -675,7 +675,7 @@ def _get_with(self, key):
if isinstance(key, tuple):
try:
return self._get_values_tuple(key)
- except:
+ except Exception:
if len(key) == 1:
key = key[0]
if isinstance(key, slice):
@@ -818,7 +818,7 @@ def _set_with(self, key, value):
if not isinstance(key, (list, Series, np.ndarray, Series)):
try:
key = list(key)
- except:
+ except Exception:
key = [key]
if isinstance(key, Index):
@@ -1306,7 +1306,13 @@ def idxmin(self, axis=None, skipna=True, *args, **kwargs):
Parameters
----------
skipna : boolean, default True
- Exclude NA/null values
+ Exclude NA/null values. If the entire Series is NA, the result
+ will be NA.
+
+ Raises
+ ------
+ ValueError
+ * If the Series is empty
Returns
-------
@@ -1336,7 +1342,13 @@ def idxmax(self, axis=None, skipna=True, *args, **kwargs):
Parameters
----------
skipna : boolean, default True
- Exclude NA/null values
+ Exclude NA/null values. If the entire Series is NA, the result
+ will be NA.
+
+ Raises
+ ------
+ ValueError
+ * If the Series is empty
Returns
-------
@@ -1361,13 +1373,13 @@ def idxmax(self, axis=None, skipna=True, *args, **kwargs):
# ndarray compat
argmin = deprecate('argmin', idxmin,
- msg="'argmin' is deprecated. Use 'idxmin' instead. "
+ msg="'argmin' is deprecated, use 'idxmin' instead. "
"The behavior of 'argmin' will be corrected to "
"return the positional minimum in the future. "
"Use 'series.values.argmin' to get the position of "
"the minimum now.")
argmax = deprecate('argmax', idxmax,
- msg="'argmax' is deprecated. Use 'idxmax' instead. "
+ msg="'argmax' is deprecated, use 'idxmax' instead. "
"The behavior of 'argmax' will be corrected to "
"return the positional maximum in the future. "
"Use 'series.values.argmax' to get the position of "
@@ -1731,11 +1743,26 @@ def combine(self, other, func, fill_value=np.nan):
----------
other : Series or scalar value
func : function
+ Function that takes two scalars as inputs and return a scalar
fill_value : scalar value
Returns
-------
result : Series
+
+ Examples
+ --------
+ >>> s1 = Series([1, 2])
+ >>> s2 = Series([0, 3])
+ >>> s1.combine(s2, lambda x1, x2: x1 if x1 < x2 else x2)
+ 0 0
+ 1 2
+ dtype: int64
+
+ See Also
+ --------
+ Series.combine_first : Combine Series values, choosing the calling
+ Series's values first
"""
if isinstance(other, Series):
new_index = self.index.union(other.index)
@@ -1764,7 +1791,21 @@ def combine_first(self, other):
Returns
-------
- y : Series
+ combined : Series
+
+ Examples
+ --------
+ >>> s1 = pd.Series([1, np.nan])
+ >>> s2 = pd.Series([3, 4])
+ >>> s1.combine_first(s2)
+ 0 1.0
+ 1 4.0
+ dtype: float64
+
+ See Also
+ --------
+ Series.combine : Perform elementwise operation on two Series
+ using a given function
"""
new_index = self.index.union(other.index)
this = self.reindex(new_index, copy=False)
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index ae8aa275b2baea..19f7e459d0725f 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -36,9 +36,77 @@ def _guess_datetime_format_for_array(arr, **kwargs):
return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs)
+def _maybe_cache(arg, format, cache, tz, convert_listlike):
+ """
+ Create a cache of unique dates from an array of dates
+
+ Parameters
+ ----------
+ arg : integer, float, string, datetime, list, tuple, 1-d array, Series
+ format : string
+ Strftime format to parse time
+ cache : boolean
+ True attempts to create a cache of converted values
+ tz : string
+ Timezone of the dates
+ convert_listlike : function
+ Conversion function to apply on dates
+
+ Returns
+ -------
+ cache_array : Series
+ Cache of converted, unique dates. Can be empty
+ """
+ from pandas import Series
+ cache_array = Series()
+ if cache:
+ # Perform a quicker unique check
+ from pandas import Index
+ if not Index(arg).is_unique:
+ unique_dates = algorithms.unique(arg)
+ cache_dates = convert_listlike(unique_dates, True, format, tz=tz)
+ cache_array = Series(cache_dates, index=unique_dates)
+ return cache_array
+
+
+def _convert_and_box_cache(arg, cache_array, box, errors, name=None):
+ """
+ Convert array of dates with a cache and box the result
+
+ Parameters
+ ----------
+ arg : integer, float, string, datetime, list, tuple, 1-d array, Series
+ cache_array : Series
+ Cache of converted, unique dates
+ box : boolean
+ True boxes result as an Index-like, False returns an ndarray
+ errors : string
+ 'ignore' plus box=True will convert result to Index
+ name : string, default None
+ Name for a DatetimeIndex
+
+ Returns
+ -------
+ result : datetime of converted dates
+ Returns:
+
+ - Index-like if box=True
+ - ndarray if box=False
+ """
+ from pandas import Series, DatetimeIndex, Index
+ result = Series(arg).map(cache_array)
+ if box:
+ if errors == 'ignore':
+ return Index(result)
+ else:
+ return DatetimeIndex(result, name=name)
+ return result.values
+
+
def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
utc=None, box=True, format=None, exact=True,
- unit=None, infer_datetime_format=False, origin='unix'):
+ unit=None, infer_datetime_format=False, origin='unix',
+ cache=False):
"""
Convert argument to datetime.
@@ -111,7 +179,12 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
origin.
.. versionadded: 0.20.0
+ cache : boolean, default False
+ If True, use a cache of unique, converted dates to apply the datetime
+ conversion. May produce sigificant speed-up when parsing duplicate date
+ strings, especially ones with timezone offsets.
+ .. versionadded: 0.22.0
Returns
-------
ret : datetime if parsing succeeded.
@@ -369,15 +442,28 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
if isinstance(arg, tslib.Timestamp):
result = arg
elif isinstance(arg, ABCSeries):
- from pandas import Series
- values = _convert_listlike(arg._values, True, format)
- result = Series(values, index=arg.index, name=arg.name)
+ cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike)
+ if not cache_array.empty:
+ result = arg.map(cache_array)
+ else:
+ from pandas import Series
+ values = _convert_listlike(arg._values, True, format)
+ result = Series(values, index=arg.index, name=arg.name)
elif isinstance(arg, (ABCDataFrame, MutableMapping)):
result = _assemble_from_unit_mappings(arg, errors=errors)
elif isinstance(arg, ABCIndexClass):
- result = _convert_listlike(arg, box, format, name=arg.name)
+ cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike)
+ if not cache_array.empty:
+ result = _convert_and_box_cache(arg, cache_array, box, errors,
+ name=arg.name)
+ else:
+ result = _convert_listlike(arg, box, format, name=arg.name)
elif is_list_like(arg):
- result = _convert_listlike(arg, box, format)
+ cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike)
+ if not cache_array.empty:
+ result = _convert_and_box_cache(arg, cache_array, box, errors)
+ else:
+ result = _convert_listlike(arg, box, format)
else:
result = _convert_listlike(np.array([arg]), box, format)[0]
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
index be39f4baba0fb4..32bab09a0c4acf 100644
--- a/pandas/io/json/json.py
+++ b/pandas/io/json/json.py
@@ -764,7 +764,7 @@ def _parse_numpy(self):
if orient == "columns":
args = loads(json, dtype=None, numpy=True, labelled=True,
precise_float=self.precise_float)
- if args:
+ if len(args):
args = (args[0].T, args[2], args[1])
self.obj = DataFrame(*args)
elif orient == "split":
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index ef95e32cc241e8..4a13d2c9db9445 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -76,9 +76,10 @@ def write(self, df, path, compression='snappy',
table, path, compression=compression,
coerce_timestamps=coerce_timestamps, **kwargs)
- def read(self, path, columns=None):
+ def read(self, path, columns=None, **kwargs):
path, _, _ = get_filepath_or_buffer(path)
- return self.api.parquet.read_table(path, columns=columns).to_pandas()
+ return self.api.parquet.read_table(path, columns=columns,
+ **kwargs).to_pandas()
class FastParquetImpl(object):
@@ -115,9 +116,9 @@ def write(self, df, path, compression='snappy', **kwargs):
self.api.write(path, df,
compression=compression, **kwargs)
- def read(self, path, columns=None):
+ def read(self, path, columns=None, **kwargs):
path, _, _ = get_filepath_or_buffer(path)
- return self.api.ParquetFile(path).to_pandas(columns=columns)
+ return self.api.ParquetFile(path).to_pandas(columns=columns, **kwargs)
def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
@@ -175,7 +176,7 @@ def to_parquet(df, path, engine='auto', compression='snappy', **kwargs):
if df.columns.inferred_type not in valid_types:
raise ValueError("parquet must have string column names")
- return impl.write(df, path, compression=compression)
+ return impl.write(df, path, compression=compression, **kwargs)
def read_parquet(path, engine='auto', columns=None, **kwargs):
@@ -205,4 +206,4 @@ def read_parquet(path, engine='auto', columns=None, **kwargs):
"""
impl = get_engine(engine)
- return impl.read(path, columns=columns)
+ return impl.read(path, columns=columns, **kwargs)
diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
index 2b3a91e2062b15..26e39f0df8b294 100644
--- a/pandas/io/sas/sas7bdat.py
+++ b/pandas/io/sas/sas7bdat.py
@@ -17,6 +17,7 @@
import pandas as pd
from pandas import compat
from pandas.io.common import get_filepath_or_buffer, BaseIterator
+from pandas.errors import EmptyDataError
import numpy as np
import struct
import pandas.io.sas.sas_constants as const
@@ -594,6 +595,10 @@ def read(self, nrows=None):
elif nrows is None:
nrows = self.row_count
+ if len(self.column_types) == 0:
+ self.close()
+ raise EmptyDataError("No columns to parse from file")
+
if self._current_row_in_file_index >= self.row_count:
return None
diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py
index 47d15195315ba2..aadd5a1beb28b6 100644
--- a/pandas/plotting/_converter.py
+++ b/pandas/plotting/_converter.py
@@ -28,6 +28,7 @@
from pandas.core.indexes.datetimes import date_range
import pandas.core.tools.datetimes as tools
+from pandas._libs.tslibs import resolution
import pandas.tseries.frequencies as frequencies
from pandas.tseries.frequencies import FreqGroup
from pandas.core.indexes.period import Period, PeriodIndex
@@ -64,7 +65,7 @@ def time2num(d):
if isinstance(d, compat.string_types):
parsed = tools.to_datetime(d)
if not isinstance(parsed, datetime):
- raise ValueError('Could not parse time %s' % d)
+ raise ValueError('Could not parse time {d}'.format(d=d))
return _to_ordinalf(parsed.time())
if isinstance(d, pydt.time):
return _to_ordinalf(d)
@@ -166,7 +167,7 @@ def get_datevalue(date, freq):
return date
elif date is None:
return None
- raise ValueError("Unrecognizable date '%s'" % date)
+ raise ValueError("Unrecognizable date '{date}'".format(date=date))
def _dt_to_float_ordinal(dt):
@@ -351,10 +352,12 @@ def __call__(self):
estimate = (nmax - nmin) / (self._get_unit() * self._get_interval())
if estimate > self.MAXTICKS * 2:
- raise RuntimeError(('MillisecondLocator estimated to generate %d '
- 'ticks from %s to %s: exceeds Locator.MAXTICKS'
- '* 2 (%d) ') %
- (estimate, dmin, dmax, self.MAXTICKS * 2))
+ raise RuntimeError(('MillisecondLocator estimated to generate '
+ '{estimate:d} ticks from {dmin} to {dmax}: '
+ 'exceeds Locator.MAXTICKS'
+ '* 2 ({arg:d}) ').format(
+ estimate=estimate, dmin=dmin, dmax=dmax,
+ arg=self.MAXTICKS * 2))
freq = '%dL' % self._get_interval()
tz = self.tz.tzname(None)
@@ -505,7 +508,7 @@ def _daily_finder(vmin, vmax, freq):
elif freq == FreqGroup.FR_HR:
periodsperday = 24
else: # pragma: no cover
- raise ValueError("unexpected frequency: %s" % freq)
+ raise ValueError("unexpected frequency: {freq}".format(freq=freq))
periodsperyear = 365 * periodsperday
periodspermonth = 28 * periodsperday
@@ -515,7 +518,7 @@ def _daily_finder(vmin, vmax, freq):
elif freq == FreqGroup.FR_DAY:
periodsperyear = 365
periodspermonth = 28
- elif frequencies.get_freq_group(freq) == FreqGroup.FR_WK:
+ elif resolution.get_freq_group(freq) == FreqGroup.FR_WK:
periodsperyear = 52
periodspermonth = 3
else: # pragma: no cover
@@ -853,7 +856,7 @@ def _annual_finder(vmin, vmax, freq):
def get_finder(freq):
if isinstance(freq, compat.string_types):
freq = frequencies.get_freq(freq)
- fgroup = frequencies.get_freq_group(freq)
+ fgroup = resolution.get_freq_group(freq)
if fgroup == FreqGroup.FR_ANN:
return _annual_finder
@@ -864,7 +867,7 @@ def get_finder(freq):
elif ((freq >= FreqGroup.FR_BUS) or fgroup == FreqGroup.FR_WK):
return _daily_finder
else: # pragma: no cover
- errmsg = "Unsupported frequency: %s" % (freq)
+ errmsg = "Unsupported frequency: {freq}".format(freq=freq)
raise NotImplementedError(errmsg)
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 58f9b7ee6fc02d..62b2899f494134 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -749,7 +749,7 @@ def match_labels(data, e):
err = np.tile([err], (self.nseries, len(self.data)))
else:
- msg = "No valid %s detected" % label
+ msg = "No valid {label} detected".format(label=label)
raise ValueError(msg)
return err
@@ -1414,7 +1414,7 @@ def _plot(cls, ax, y, style=None, bw_method=None, ind=None,
gkde = gaussian_kde(y)
if bw_method is not None:
msg = ('bw_method was added in Scipy 0.11.0.' +
- ' Scipy version in use is %s.' % spv)
+ ' Scipy version in use is {spv}.'.format(spv=spv))
warnings.warn(msg)
y = gkde.evaluate(ind)
@@ -2452,7 +2452,7 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None,
result = axes
byline = by[0] if len(by) == 1 else by
- fig.suptitle('Boxplot grouped by %s' % byline)
+ fig.suptitle('Boxplot grouped by {byline}'.format(byline=byline))
fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2)
return result
diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py
index 54f87febdc2141..d6048f54993e6d 100644
--- a/pandas/plotting/_misc.py
+++ b/pandas/plotting/_misc.py
@@ -525,7 +525,7 @@ def lag_plot(series, lag=1, ax=None, **kwds):
if ax is None:
ax = plt.gca()
ax.set_xlabel("y(t)")
- ax.set_ylabel("y(t + %s)" % lag)
+ ax.set_ylabel("y(t + {lag})".format(lag=lag))
ax.scatter(y1, y2, **kwds)
return ax
diff --git a/pandas/plotting/_style.py b/pandas/plotting/_style.py
index 4c31ff0177488a..145597e52ae14e 100644
--- a/pandas/plotting/_style.py
+++ b/pandas/plotting/_style.py
@@ -131,7 +131,8 @@ def __getitem__(self, key):
self._warn_if_deprecated()
key = self._get_canonical_key(key)
if key not in self:
- raise ValueError('%s is not a valid pandas plotting option' % key)
+ raise ValueError(
+ '{key} is not a valid pandas plotting option'.format(key=key))
return super(_Options, self).__getitem__(key)
def __setitem__(self, key, value):
@@ -142,7 +143,8 @@ def __setitem__(self, key, value):
def __delitem__(self, key):
key = self._get_canonical_key(key)
if key in self._DEFAULT_KEYS:
- raise ValueError('Cannot remove default parameter %s' % key)
+ raise ValueError(
+ 'Cannot remove default parameter {key}'.format(key=key))
return super(_Options, self).__delitem__(key)
def __contains__(self, key):
diff --git a/pandas/plotting/_tools.py b/pandas/plotting/_tools.py
index 047a57ead72f87..816586fbb82f52 100644
--- a/pandas/plotting/_tools.py
+++ b/pandas/plotting/_tools.py
@@ -84,8 +84,9 @@ def _get_layout(nplots, layout=None, layout_type='box'):
raise ValueError(msg)
if nrows * ncols < nplots:
- raise ValueError('Layout of %sx%s must be larger than '
- 'required size %s' % (nrows, ncols, nplots))
+ raise ValueError('Layout of {nrows}x{ncols} must be larger '
+ 'than required size {nplots}'.format(
+ nrows=nrows, ncols=ncols, nplots=nplots))
return layout
diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py
index f6c3a08c6721ac..4e9e1b51e6fdab 100644
--- a/pandas/stats/moments.py
+++ b/pandas/stats/moments.py
@@ -458,7 +458,7 @@ def _rolling_func(name, desc, how=None, func_kw=None, additional_kw=''):
if how is None:
how_arg_str = 'None'
else:
- how_arg_str = "'%s" % how
+ how_arg_str = "'{how}".format(how=how)
@Substitution(desc, _unary_arg, _roll_kw % how_arg_str + additional_kw,
_type_of_input_retval, _roll_notes)
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index d2874b1606e729..c2d1eb8ae13725 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -718,6 +718,18 @@ def test_float_truncation(self):
expected = df.loc[[1], :]
tm.assert_frame_equal(expected, result)
+ def test_disallow_python_keywords(self):
+ # GH 18221
+ df = pd.DataFrame([[0, 0, 0]], columns=['foo', 'bar', 'class'])
+ msg = "Python keyword not valid identifier in numexpr query"
+ with tm.assert_raises_regex(SyntaxError, msg):
+ df.query('class == 0')
+
+ df = pd.DataFrame()
+ df.index.name = 'lambda'
+ with tm.assert_raises_regex(SyntaxError, msg):
+ df.query('lambda == 0')
+
class TestEvalNumexprPython(TestEvalNumexprPandas):
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index 7195cb43a70dc0..34ed9d3142923e 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -39,7 +39,7 @@
from pandas.util import testing as tm
-@pytest.fixture(params=[True, False], ids=lambda val: str(val))
+@pytest.fixture(params=[True, False], ids=str)
def coerce(request):
return request.param
@@ -60,16 +60,20 @@ def __getitem__(self):
assert (not is_seq(A()))
-def test_is_list_like():
- passes = ([], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]),
- Series([]), Series(['a']).str)
- fails = (1, '2', object(), str)
+@pytest.mark.parametrize(
+ "ll",
+ [
+ [], [1], (1, ), (1, 2), {'a': 1},
+ set([1, 'a']), Series([1]),
+ Series([]), Series(['a']).str])
+def test_is_list_like_passes(ll):
+ assert inference.is_list_like(ll)
- for p in passes:
- assert inference.is_list_like(p)
- for f in fails:
- assert not inference.is_list_like(f)
+@pytest.mark.parametrize(
+ "ll", [1, '2', object(), str])
+def test_is_list_like_fails(ll):
+ assert not inference.is_list_like(ll)
@pytest.mark.parametrize('inner', [
@@ -93,15 +97,16 @@ def test_is_nested_list_like_fails(obj):
assert not inference.is_nested_list_like(obj)
-def test_is_dict_like():
- passes = [{}, {'A': 1}, Series([1])]
- fails = ['1', 1, [1, 2], (1, 2), range(2), Index([1])]
+@pytest.mark.parametrize(
+ "ll", [{}, {'A': 1}, Series([1])])
+def test_is_dict_like_passes(ll):
+ assert inference.is_dict_like(ll)
- for p in passes:
- assert inference.is_dict_like(p)
- for f in fails:
- assert not inference.is_dict_like(f)
+@pytest.mark.parametrize(
+ "ll", ['1', 1, [1, 2], (1, 2), range(2), Index([1])])
+def test_is_dict_like_fails(ll):
+ assert not inference.is_dict_like(ll)
def test_is_file_like():
@@ -148,15 +153,16 @@ class MockFile(object):
assert not is_file(mock.Mock())
-def test_is_named_tuple():
- passes = (collections.namedtuple('Test', list('abc'))(1, 2, 3), )
- fails = ((1, 2, 3), 'a', Series({'pi': 3.14}))
+@pytest.mark.parametrize(
+ "ll", [collections.namedtuple('Test', list('abc'))(1, 2, 3)])
+def test_is_names_tuple_passes(ll):
+ assert inference.is_named_tuple(ll)
- for p in passes:
- assert inference.is_named_tuple(p)
- for f in fails:
- assert not inference.is_named_tuple(f)
+@pytest.mark.parametrize(
+ "ll", [(1, 2, 3), 'a', Series({'pi': 3.14})])
+def test_is_names_tuple_fails(ll):
+ assert not inference.is_named_tuple(ll)
def test_is_hashable():
@@ -208,27 +214,32 @@ class OldStyleClass():
hash(c) # this will not raise
-def test_is_re():
- passes = re.compile('ad'),
- fails = 'x', 2, 3, object()
+@pytest.mark.parametrize(
+ "ll", [re.compile('ad')])
+def test_is_re_passes(ll):
+ assert inference.is_re(ll)
- for p in passes:
- assert inference.is_re(p)
- for f in fails:
- assert not inference.is_re(f)
+@pytest.mark.parametrize(
+ "ll", ['x', 2, 3, object()])
+def test_is_re_fails(ll):
+ assert not inference.is_re(ll)
-def test_is_recompilable():
- passes = (r'a', u('x'), r'asdf', re.compile('adsf'), u(r'\u2233\s*'),
- re.compile(r''))
- fails = 1, [], object()
+@pytest.mark.parametrize(
+ "ll", [r'a', u('x'),
+ r'asdf',
+ re.compile('adsf'),
+ u(r'\u2233\s*'),
+ re.compile(r'')])
+def test_is_recompilable_passes(ll):
+ assert inference.is_re_compilable(ll)
- for p in passes:
- assert inference.is_re_compilable(p)
- for f in fails:
- assert not inference.is_re_compilable(f)
+@pytest.mark.parametrize(
+ "ll", [1, [], object()])
+def test_is_recompilable_fails(ll):
+ assert not inference.is_re_compilable(ll)
class TestInference(object):
@@ -300,15 +311,14 @@ def test_maybe_convert_numeric_infinities(self):
np.array(['foo_' + infinity], dtype=object),
na_values, maybe_int)
- def test_maybe_convert_numeric_post_floatify_nan(self):
+ def test_maybe_convert_numeric_post_floatify_nan(self, coerce):
# see gh-13314
data = np.array(['1.200', '-999.000', '4.500'], dtype=object)
expected = np.array([1.2, np.nan, 4.5], dtype=np.float64)
nan_values = set([-999, -999.0])
- for coerce_type in (True, False):
- out = lib.maybe_convert_numeric(data, nan_values, coerce_type)
- tm.assert_numpy_array_equal(out, expected)
+ out = lib.maybe_convert_numeric(data, nan_values, coerce)
+ tm.assert_numpy_array_equal(out, expected)
def test_convert_infs(self):
arr = np.array(['inf', 'inf', 'inf'], dtype='O')
@@ -739,6 +749,36 @@ def test_is_datetimelike_array_all_nan_nat_like(self):
assert not lib.is_timedelta64_array(arr)
assert not lib.is_timedelta_or_timedelta64_array(arr)
+ assert lib.is_datetime_with_singletz_array(
+ np.array([pd.Timestamp('20130101', tz='US/Eastern'),
+ pd.Timestamp('20130102', tz='US/Eastern')],
+ dtype=object))
+ assert not lib.is_datetime_with_singletz_array(
+ np.array([pd.Timestamp('20130101', tz='US/Eastern'),
+ pd.Timestamp('20130102', tz='CET')],
+ dtype=object))
+
+ @pytest.mark.parametrize(
+ "func",
+ [
+ 'is_datetime_array',
+ 'is_datetime64_array',
+ 'is_bool_array',
+ 'is_timedelta_array',
+ 'is_timedelta64_array',
+ 'is_timedelta_or_timedelta64_array',
+ 'is_date_array',
+ 'is_time_array',
+ 'is_interval_array',
+ 'is_period_array'])
+ def test_other_dtypes_for_array(self, func):
+ func = getattr(lib, func)
+ arr = np.array(['foo', 'bar'])
+ assert not func(arr)
+
+ arr = np.array([1, 2])
+ assert not func(arr)
+
def test_date(self):
dates = [date(2012, 1, day) for day in range(1, 20)]
@@ -752,6 +792,24 @@ def test_date(self):
result = lib.infer_dtype(dates, skipna=True)
assert result == 'date'
+ def test_is_numeric_array(self):
+
+ assert lib.is_float_array(np.array([1, 2.0]))
+ assert lib.is_float_array(np.array([1, 2.0, np.nan]))
+ assert not lib.is_float_array(np.array([1, 2]))
+
+ assert lib.is_integer_array(np.array([1, 2]))
+ assert not lib.is_integer_array(np.array([1, 2.0]))
+
+ def test_is_string_array(self):
+
+ assert lib.is_string_array(np.array(['foo', 'bar']))
+ assert not lib.is_string_array(
+ np.array(['foo', 'bar', np.nan], dtype=object), skipna=False)
+ assert lib.is_string_array(
+ np.array(['foo', 'bar', np.nan], dtype=object), skipna=True)
+ assert not lib.is_string_array(np.array([1, 2]))
+
def test_to_object_array_tuples(self):
r = (5, 6)
values = [r]
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index be6d81c63ae1ef..c50aa858a15b53 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -306,6 +306,11 @@ def test_axis_aliases(self):
result = f.sum(axis='columns')
assert_series_equal(result, expected)
+ def test_class_axis(self):
+ # https://github.com/pandas-dev/pandas/issues/18147
+ DataFrame.index # no exception!
+ DataFrame.columns # no exception!
+
def test_more_asMatrix(self):
values = self.mixed_frame.as_matrix()
assert values.shape[1] == len(self.mixed_frame.columns)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 2f750a76219052..03f780957b15ea 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -12,7 +12,7 @@
from pandas.errors import UnsupportedFunctionCall, PerformanceWarning
from pandas.util.testing import (assert_frame_equal, assert_index_equal,
assert_series_equal, assert_almost_equal)
-from pandas.compat import (range, long, lrange, StringIO, lmap, lzip, map, zip,
+from pandas.compat import (range, lrange, StringIO, lmap, lzip, map, zip,
builtins, OrderedDict)
from pandas import compat
from collections import defaultdict
@@ -264,7 +264,7 @@ def test_len(self):
df = pd.DataFrame(dict(a=[np.nan] * 3, b=[1, 2, 3]))
assert len(df.groupby(('a'))) == 0
assert len(df.groupby(('b'))) == 3
- assert len(df.groupby(('a', 'b'))) == 3
+ assert len(df.groupby(['a', 'b'])) == 3
def test_basic_regression(self):
# regression
@@ -2051,30 +2051,6 @@ def afunc(data):
assert_frame_equal(closure_bad, closure_good)
- def test_multiindex_columns_empty_level(self):
- l = [['count', 'values'], ['to filter', '']]
- midx = MultiIndex.from_tuples(l)
-
- df = DataFrame([[long(1), 'A']], columns=midx)
-
- grouped = df.groupby('to filter').groups
- assert grouped['A'] == [0]
-
- grouped = df.groupby([('to filter', '')]).groups
- assert grouped['A'] == [0]
-
- df = DataFrame([[long(1), 'A'], [long(2), 'B']], columns=midx)
-
- expected = df.groupby('to filter').groups
- result = df.groupby([('to filter', '')]).groups
- assert result == expected
-
- df = DataFrame([[long(1), 'A'], [long(2), 'A']], columns=midx)
-
- expected = df.groupby('to filter').groups
- result = df.groupby([('to filter', '')]).groups
- tm.assert_dict_equal(result, expected)
-
def test_cython_median(self):
df = DataFrame(np.random.randn(1000))
df.values[::2] = np.nan
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 9e6de8749952f8..cc422f2d1cdeb7 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -9,7 +9,7 @@
Index, MultiIndex, DataFrame, Series)
from pandas.util.testing import (assert_panel_equal, assert_frame_equal,
assert_series_equal, assert_almost_equal)
-from pandas.compat import lrange
+from pandas.compat import lrange, long
from pandas import compat
import numpy as np
@@ -356,6 +356,30 @@ def test_multifunc_select_col_integer_cols(self):
# it works!
df.groupby(1, as_index=False)[2].agg({'Q': np.mean})
+ def test_multiindex_columns_empty_level(self):
+ lst = [['count', 'values'], ['to filter', '']]
+ midx = MultiIndex.from_tuples(lst)
+
+ df = DataFrame([[long(1), 'A']], columns=midx)
+
+ grouped = df.groupby('to filter').groups
+ assert grouped['A'] == [0]
+
+ grouped = df.groupby([('to filter', '')]).groups
+ assert grouped['A'] == [0]
+
+ df = DataFrame([[long(1), 'A'], [long(2), 'B']], columns=midx)
+
+ expected = df.groupby('to filter').groups
+ result = df.groupby([('to filter', '')]).groups
+ assert result == expected
+
+ df = DataFrame([[long(1), 'A'], [long(2), 'A']], columns=midx)
+
+ expected = df.groupby('to filter').groups
+ result = df.groupby([('to filter', '')]).groups
+ tm.assert_dict_equal(result, expected)
+
def test_groupby_multiindex_tuple(self):
# GH 17979
df = pd.DataFrame([[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
@@ -366,13 +390,18 @@ def test_groupby_multiindex_tuple(self):
result = df.groupby(('b', 1)).groups
tm.assert_dict_equal(expected, result)
- df2 = pd.DataFrame([[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
+ df2 = pd.DataFrame(df.values,
columns=pd.MultiIndex.from_arrays(
[['a', 'b', 'b', 'c'],
['d', 'd', 'e', 'e']]))
- df2.groupby([('b', 'd')]).groups
- expected = df.groupby([('b', 'd')]).groups
- result = df.groupby(('b', 'd')).groups
+ expected = df2.groupby([('b', 'd')]).groups
+ result = df.groupby(('b', 1)).groups
+ tm.assert_dict_equal(expected, result)
+
+ df3 = pd.DataFrame(df.values,
+ columns=[('a', 'd'), ('b', 'd'), ('b', 'e'), 'c'])
+ expected = df3.groupby([('b', 'd')]).groups
+ result = df.groupby(('b', 1)).groups
tm.assert_dict_equal(expected, result)
@pytest.mark.parametrize('sort', [True, False])
diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py
index 501fe63137cf47..2a408b85f0ed16 100644
--- a/pandas/tests/groupby/test_nth.py
+++ b/pandas/tests/groupby/test_nth.py
@@ -202,7 +202,7 @@ def test_nth(self):
freq='B')
df = DataFrame(1, index=business_dates, columns=['a', 'b'])
# get the first, fourth and last two business days for each month
- key = (df.index.year, df.index.month)
+ key = [df.index.year, df.index.month]
result = df.groupby(key, as_index=False).nth([0, 3, -2, -1])
expected_dates = pd.to_datetime(
['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1',
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index 3d7977c63eeb64..1434656115d187 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -43,7 +43,7 @@ def seed_df(seed_nans, n, m):
df = seed_df(seed_nans, n, m)
bins = None, np.arange(0, max(5, df['3rd'].max()) + 1, 2)
- keys = '1st', '2nd', ('1st', '2nd')
+ keys = '1st', '2nd', ['1st', '2nd']
for k, b in product(keys, bins):
binned.append((df, k, b, n, m))
ids.append("{}-{}-{}".format(k, n, m))
diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py
index 50ee88bd82f409..a9c26ebb903593 100644
--- a/pandas/tests/indexes/datetimes/test_partial_slicing.py
+++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py
@@ -2,9 +2,10 @@
import pytest
-from datetime import datetime
+from datetime import datetime, date
import numpy as np
import pandas as pd
+import operator as op
from pandas import (DatetimeIndex, Series, DataFrame,
date_range, Index, Timedelta, Timestamp)
@@ -330,3 +331,21 @@ def test_loc_datetime_length_one(self):
result = df.loc['2016-10-01T00:00:00':]
tm.assert_frame_equal(result, df)
+
+ @pytest.mark.parametrize('datetimelike', [
+ Timestamp('20130101'), datetime(2013, 1, 1),
+ date(2013, 1, 1), np.datetime64('2013-01-01T00:00', 'ns')])
+ @pytest.mark.parametrize('op,expected', [
+ (op.lt, [True, False, False, False]),
+ (op.le, [True, True, False, False]),
+ (op.eq, [False, True, False, False]),
+ (op.gt, [False, False, False, True])])
+ def test_selection_by_datetimelike(self, datetimelike, op, expected):
+ # GH issue #17965, test for ability to compare datetime64[ns] columns
+ # to datetimelike
+ df = DataFrame({'A': [pd.Timestamp('20120101'),
+ pd.Timestamp('20130101'),
+ np.nan, pd.Timestamp('20130103')]})
+ result = op(df.A, datetimelike)
+ expected = Series(expected, name='A')
+ tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index 8205b4fde217b0..a1287c3102b779 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -12,6 +12,7 @@
from distutils.version import LooseVersion
import pandas as pd
+from pandas.conftest import is_dateutil_le_261, is_dateutil_gt_261
from pandas._libs import tslib
from pandas._libs.tslibs import parsing
from pandas.core.tools import datetimes as tools
@@ -28,7 +29,8 @@
class TestTimeConversionFormats(object):
- def test_to_datetime_format(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_format(self, cache):
values = ['1/1/2000', '1/2/2000', '1/3/2000']
results1 = [Timestamp('20000101'), Timestamp('20000201'),
@@ -43,7 +45,7 @@ def test_to_datetime_format(self):
(values[2], (results1[2], results2[2]))]:
for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']):
- result = to_datetime(vals, format=fmt)
+ result = to_datetime(vals, format=fmt, cache=cache)
expected = expecteds[i]
if isinstance(expected, Series):
@@ -53,14 +55,15 @@ def test_to_datetime_format(self):
else:
tm.assert_index_equal(result, expected)
- def test_to_datetime_format_YYYYMMDD(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_format_YYYYMMDD(self, cache):
s = Series([19801222, 19801222] + [19810105] * 5)
expected = Series([Timestamp(x) for x in s.apply(str)])
- result = to_datetime(s, format='%Y%m%d')
+ result = to_datetime(s, format='%Y%m%d', cache=cache)
assert_series_equal(result, expected)
- result = to_datetime(s.apply(str), format='%Y%m%d')
+ result = to_datetime(s.apply(str), format='%Y%m%d', cache=cache)
assert_series_equal(result, expected)
# with NaT
@@ -69,44 +72,48 @@ def test_to_datetime_format_YYYYMMDD(self):
expected[2] = np.nan
s[2] = np.nan
- result = to_datetime(s, format='%Y%m%d')
+ result = to_datetime(s, format='%Y%m%d', cache=cache)
assert_series_equal(result, expected)
# string with NaT
s = s.apply(str)
s[2] = 'nat'
- result = to_datetime(s, format='%Y%m%d')
+ result = to_datetime(s, format='%Y%m%d', cache=cache)
assert_series_equal(result, expected)
# coercion
# GH 7930
s = Series([20121231, 20141231, 99991231])
- result = pd.to_datetime(s, format='%Y%m%d', errors='ignore')
+ result = pd.to_datetime(s, format='%Y%m%d', errors='ignore',
+ cache=cache)
expected = Series([datetime(2012, 12, 31),
datetime(2014, 12, 31), datetime(9999, 12, 31)],
dtype=object)
tm.assert_series_equal(result, expected)
- result = pd.to_datetime(s, format='%Y%m%d', errors='coerce')
+ result = pd.to_datetime(s, format='%Y%m%d', errors='coerce',
+ cache=cache)
expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]')
assert_series_equal(result, expected)
- # GH 10178
- def test_to_datetime_format_integer(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_format_integer(self, cache):
+ # GH 10178
s = Series([2000, 2001, 2002])
expected = Series([Timestamp(x) for x in s.apply(str)])
- result = to_datetime(s, format='%Y')
+ result = to_datetime(s, format='%Y', cache=cache)
assert_series_equal(result, expected)
s = Series([200001, 200105, 200206])
expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str)
])
- result = to_datetime(s, format='%Y%m')
+ result = to_datetime(s, format='%Y%m', cache=cache)
assert_series_equal(result, expected)
- def test_to_datetime_format_microsecond(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_format_microsecond(self, cache):
# these are locale dependent
lang, _ = locale.getlocale()
@@ -114,11 +121,12 @@ def test_to_datetime_format_microsecond(self):
val = '01-{}-2011 00:00:01.978'.format(month_abbr)
format = '%d-%b-%Y %H:%M:%S.%f'
- result = to_datetime(val, format=format)
+ result = to_datetime(val, format=format, cache=cache)
exp = datetime.strptime(val, format)
assert result == exp
- def test_to_datetime_format_time(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_format_time(self, cache):
data = [
['01/10/2010 15:20', '%m/%d/%Y %H:%M',
Timestamp('2010-01-10 15:20')],
@@ -134,9 +142,10 @@ def test_to_datetime_format_time(self):
# Timestamp('2010-01-10 09:12:56')]
]
for s, format, dt in data:
- assert to_datetime(s, format=format) == dt
+ assert to_datetime(s, format=format, cache=cache) == dt
- def test_to_datetime_with_non_exact(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_with_non_exact(self, cache):
# GH 10834
tm._skip_if_has_locale()
@@ -147,12 +156,13 @@ def test_to_datetime_with_non_exact(self):
s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00',
'19MAY11 00:00:00Z'])
- result = to_datetime(s, format='%d%b%y', exact=False)
+ result = to_datetime(s, format='%d%b%y', exact=False, cache=cache)
expected = to_datetime(s.str.extract(r'(\d+\w+\d+)', expand=False),
- format='%d%b%y')
+ format='%d%b%y', cache=cache)
assert_series_equal(result, expected)
- def test_parse_nanoseconds_with_formula(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_parse_nanoseconds_with_formula(self, cache):
# GH8989
# trunctaing the nanoseconds when a format was provided
@@ -161,44 +171,48 @@ def test_parse_nanoseconds_with_formula(self):
"2012-01-01 09:00:00.001",
"2012-01-01 09:00:00.001000",
"2012-01-01 09:00:00.001000000", ]:
- expected = pd.to_datetime(v)
- result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f")
+ expected = pd.to_datetime(v, cache=cache)
+ result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f",
+ cache=cache)
assert result == expected
- def test_to_datetime_format_weeks(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_format_weeks(self, cache):
data = [
['2009324', '%Y%W%w', Timestamp('2009-08-13')],
['2013020', '%Y%U%w', Timestamp('2013-01-13')]
]
for s, format, dt in data:
- assert to_datetime(s, format=format) == dt
+ assert to_datetime(s, format=format, cache=cache) == dt
class TestToDatetime(object):
- def test_to_datetime_dt64s(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_dt64s(self, cache):
in_bound_dts = [
np.datetime64('2000-01-01'),
np.datetime64('2000-01-02'),
]
for dt in in_bound_dts:
- assert pd.to_datetime(dt) == Timestamp(dt)
+ assert pd.to_datetime(dt, cache=cache) == Timestamp(dt)
oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ]
for dt in oob_dts:
pytest.raises(ValueError, pd.to_datetime, dt, errors='raise')
pytest.raises(ValueError, Timestamp, dt)
- assert pd.to_datetime(dt, errors='coerce') is NaT
+ assert pd.to_datetime(dt, errors='coerce', cache=cache) is NaT
- def test_to_datetime_array_of_dt64s(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_array_of_dt64s(self, cache):
dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ]
# Assuming all datetimes are in bounds, to_datetime() returns
# an array that is equal to Timestamp() parsing
tm.assert_numpy_array_equal(
- pd.to_datetime(dts, box=False),
+ pd.to_datetime(dts, box=False, cache=cache),
np.array([Timestamp(x).asm8 for x in dts])
)
@@ -209,7 +223,8 @@ def test_to_datetime_array_of_dt64s(self):
errors='raise')
tm.assert_numpy_array_equal(
- pd.to_datetime(dts_with_oob, box=False, errors='coerce'),
+ pd.to_datetime(dts_with_oob, box=False, errors='coerce',
+ cache=cache),
np.array(
[
Timestamp(dts_with_oob[0]).asm8,
@@ -224,20 +239,22 @@ def test_to_datetime_array_of_dt64s(self):
# are converted to their .item(), which depending on the version of
# numpy is either a python datetime.datetime or datetime.date
tm.assert_numpy_array_equal(
- pd.to_datetime(dts_with_oob, box=False, errors='ignore'),
+ pd.to_datetime(dts_with_oob, box=False, errors='ignore',
+ cache=cache),
np.array(
[dt.item() for dt in dts_with_oob],
dtype='O'
)
)
- def test_to_datetime_tz(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_tz(self, cache):
# xref 8260
# uniform returns a DatetimeIndex
arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),
pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')]
- result = pd.to_datetime(arr)
+ result = pd.to_datetime(arr, cache=cache)
expected = DatetimeIndex(
['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific')
tm.assert_index_equal(result, expected)
@@ -245,9 +262,10 @@ def test_to_datetime_tz(self):
# mixed tzs will raise
arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'),
pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')]
- pytest.raises(ValueError, lambda: pd.to_datetime(arr))
+ pytest.raises(ValueError, lambda: pd.to_datetime(arr, cache=cache))
- def test_to_datetime_tz_pytz(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_tz_pytz(self, cache):
# see gh-8260
us_eastern = pytz.timezone('US/Eastern')
arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1,
@@ -255,18 +273,20 @@ def test_to_datetime_tz_pytz(self):
us_eastern.localize(datetime(year=2000, month=6, day=1,
hour=3, minute=0))],
dtype=object)
- result = pd.to_datetime(arr, utc=True)
+ result = pd.to_datetime(arr, utc=True, cache=cache)
expected = DatetimeIndex(['2000-01-01 08:00:00+00:00',
'2000-06-01 07:00:00+00:00'],
dtype='datetime64[ns, UTC]', freq=None)
tm.assert_index_equal(result, expected)
+ @pytest.mark.parametrize('cache', [True, False])
@pytest.mark.parametrize("init_constructor, end_constructor, test_method",
[(Index, DatetimeIndex, tm.assert_index_equal),
(list, DatetimeIndex, tm.assert_index_equal),
(np.array, DatetimeIndex, tm.assert_index_equal),
(Series, Series, tm.assert_series_equal)])
def test_to_datetime_utc_true(self,
+ cache,
init_constructor,
end_constructor,
test_method):
@@ -277,39 +297,47 @@ def test_to_datetime_utc_true(self,
result = pd.to_datetime(init_constructor(data),
format='%Y%m%d %H%M%S',
- utc=True)
+ utc=True,
+ cache=cache)
expected = end_constructor(expected_data)
test_method(result, expected)
# Test scalar case as well
for scalar, expected in zip(data, expected_data):
- result = pd.to_datetime(scalar, format='%Y%m%d %H%M%S', utc=True)
+ result = pd.to_datetime(scalar, format='%Y%m%d %H%M%S', utc=True,
+ cache=cache)
assert result == expected
- def test_to_datetime_utc_true_with_series_single_value(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_utc_true_with_series_single_value(self, cache):
# GH 15760 UTC=True with Series
ts = 1.5e18
- result = pd.to_datetime(pd.Series([ts]), utc=True)
+ result = pd.to_datetime(pd.Series([ts]), utc=True, cache=cache)
expected = pd.Series([pd.Timestamp(ts, tz='utc')])
tm.assert_series_equal(result, expected)
- def test_to_datetime_utc_true_with_series_tzaware_string(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_utc_true_with_series_tzaware_string(self, cache):
ts = '2013-01-01 00:00:00-01:00'
expected_ts = '2013-01-01 01:00:00'
data = pd.Series([ts] * 3)
- result = pd.to_datetime(data, utc=True)
+ result = pd.to_datetime(data, utc=True, cache=cache)
expected = pd.Series([pd.Timestamp(expected_ts, tz='utc')] * 3)
tm.assert_series_equal(result, expected)
+ @pytest.mark.parametrize('cache', [True, False])
@pytest.mark.parametrize('date, dtype',
[('2013-01-01 01:00:00', 'datetime64[ns]'),
('2013-01-01 01:00:00', 'datetime64[ns, UTC]')])
- def test_to_datetime_utc_true_with_series_datetime_ns(self, date, dtype):
+ def test_to_datetime_utc_true_with_series_datetime_ns(self, cache, date,
+ dtype):
expected = pd.Series([pd.Timestamp('2013-01-01 01:00:00', tz='UTC')])
- result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True)
+ result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True,
+ cache=cache)
tm.assert_series_equal(result, expected)
- def test_to_datetime_tz_psycopg2(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_tz_psycopg2(self, cache):
# xref 8260
try:
@@ -324,7 +352,7 @@ def test_to_datetime_tz_psycopg2(self):
datetime(2000, 6, 1, 3, 0, tzinfo=tz2)],
dtype=object)
- result = pd.to_datetime(arr, errors='coerce', utc=True)
+ result = pd.to_datetime(arr, errors='coerce', utc=True, cache=cache)
expected = DatetimeIndex(['2000-01-01 08:00:00+00:00',
'2000-06-01 07:00:00+00:00'],
dtype='datetime64[ns, UTC]', freq=None)
@@ -337,32 +365,39 @@ def test_to_datetime_tz_psycopg2(self):
assert is_datetime64_ns_dtype(i)
# tz coerceion
- result = pd.to_datetime(i, errors='coerce')
+ result = pd.to_datetime(i, errors='coerce', cache=cache)
tm.assert_index_equal(result, i)
- result = pd.to_datetime(i, errors='coerce', utc=True)
+ result = pd.to_datetime(i, errors='coerce', utc=True, cache=cache)
expected = pd.DatetimeIndex(['2000-01-01 13:00:00'],
dtype='datetime64[ns, UTC]')
tm.assert_index_equal(result, expected)
- def test_datetime_bool(self):
+ @pytest.mark.parametrize(
+ 'cache',
+ [pytest.param(True,
+ marks=pytest.mark.skipif(True, reason="GH 18111")),
+ False])
+ def test_datetime_bool(self, cache):
# GH13176
with pytest.raises(TypeError):
to_datetime(False)
- assert to_datetime(False, errors="coerce") is NaT
- assert to_datetime(False, errors="ignore") is False
+ assert to_datetime(False, errors="coerce", cache=cache) is NaT
+ assert to_datetime(False, errors="ignore", cache=cache) is False
with pytest.raises(TypeError):
to_datetime(True)
- assert to_datetime(True, errors="coerce") is NaT
- assert to_datetime(True, errors="ignore") is True
+ assert to_datetime(True, errors="coerce", cache=cache) is NaT
+ assert to_datetime(True, errors="ignore", cache=cache) is True
with pytest.raises(TypeError):
- to_datetime([False, datetime.today()])
+ to_datetime([False, datetime.today()], cache=cache)
with pytest.raises(TypeError):
- to_datetime(['20130101', True])
+ to_datetime(['20130101', True], cache=cache)
tm.assert_index_equal(to_datetime([0, False, NaT, 0.0],
- errors="coerce"),
- DatetimeIndex([to_datetime(0), NaT,
- NaT, to_datetime(0)]))
+ errors="coerce", cache=cache),
+ DatetimeIndex([to_datetime(0, cache=cache),
+ NaT,
+ NaT,
+ to_datetime(0, cache=cache)]))
def test_datetime_invalid_datatype(self):
# GH13176
@@ -372,6 +407,39 @@ def test_datetime_invalid_datatype(self):
with pytest.raises(TypeError):
pd.to_datetime(pd.to_datetime)
+ @pytest.mark.parametrize("utc", [True, None])
+ @pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
+ @pytest.mark.parametrize("box", [True, False])
+ @pytest.mark.parametrize("constructor", [list, tuple, np.array, pd.Index])
+ def test_to_datetime_cache(self, utc, format, box, constructor):
+ date = '20130101 00:00:00'
+ test_dates = [date] * 10**5
+ data = constructor(test_dates)
+ result = pd.to_datetime(data, utc=utc, format=format, box=box,
+ cache=True)
+ expected = pd.to_datetime(data, utc=utc, format=format, box=box,
+ cache=False)
+ if box:
+ tm.assert_index_equal(result, expected)
+ else:
+ tm.assert_numpy_array_equal(result, expected)
+
+ @pytest.mark.parametrize("utc", [True, None])
+ @pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None])
+ def test_to_datetime_cache_series(self, utc, format):
+ date = '20130101 00:00:00'
+ test_dates = [date] * 10**5
+ data = pd.Series(test_dates)
+ result = pd.to_datetime(data, utc=utc, format=format, cache=True)
+ expected = pd.to_datetime(data, utc=utc, format=format, cache=False)
+ tm.assert_series_equal(result, expected)
+
+ def test_to_datetime_cache_scalar(self):
+ date = '20130101 00:00:00'
+ result = pd.to_datetime(date, cache=True)
+ expected = pd.Timestamp('20130101 00:00:00')
+ assert result == expected
+
@pytest.mark.parametrize('date, format',
[('2017-20', '%Y-%W'),
('20 Sunday', '%W %A'),
@@ -388,72 +456,77 @@ def test_week_without_day_and_calendar_year(self, date, format):
class TestToDatetimeUnit(object):
-
- def test_unit(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_unit(self, cache):
# GH 11758
# test proper behavior with erros
with pytest.raises(ValueError):
- to_datetime([1], unit='D', format='%Y%m%d')
+ to_datetime([1], unit='D', format='%Y%m%d', cache=cache)
values = [11111111, 1, 1.0, tslib.iNaT, NaT, np.nan,
'NaT', '']
- result = to_datetime(values, unit='D', errors='ignore')
+ result = to_datetime(values, unit='D', errors='ignore', cache=cache)
expected = Index([11111111, Timestamp('1970-01-02'),
Timestamp('1970-01-02'), NaT,
NaT, NaT, NaT, NaT],
dtype=object)
tm.assert_index_equal(result, expected)
- result = to_datetime(values, unit='D', errors='coerce')
+ result = to_datetime(values, unit='D', errors='coerce', cache=cache)
expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02',
'NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
tm.assert_index_equal(result, expected)
with pytest.raises(tslib.OutOfBoundsDatetime):
- to_datetime(values, unit='D', errors='raise')
+ to_datetime(values, unit='D', errors='raise', cache=cache)
values = [1420043460000, tslib.iNaT, NaT, np.nan, 'NaT']
- result = to_datetime(values, errors='ignore', unit='s')
+ result = to_datetime(values, errors='ignore', unit='s', cache=cache)
expected = Index([1420043460000, NaT, NaT,
NaT, NaT], dtype=object)
tm.assert_index_equal(result, expected)
- result = to_datetime(values, errors='coerce', unit='s')
+ result = to_datetime(values, errors='coerce', unit='s', cache=cache)
expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT'])
tm.assert_index_equal(result, expected)
with pytest.raises(tslib.OutOfBoundsDatetime):
- to_datetime(values, errors='raise', unit='s')
+ to_datetime(values, errors='raise', unit='s', cache=cache)
# if we have a string, then we raise a ValueError
# and NOT an OutOfBoundsDatetime
for val in ['foo', Timestamp('20130101')]:
try:
- to_datetime(val, errors='raise', unit='s')
+ to_datetime(val, errors='raise', unit='s', cache=cache)
except tslib.OutOfBoundsDatetime:
raise AssertionError("incorrect exception raised")
except ValueError:
pass
- def test_unit_consistency(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_unit_consistency(self, cache):
# consistency of conversions
expected = Timestamp('1970-05-09 14:25:11')
- result = pd.to_datetime(11111111, unit='s', errors='raise')
+ result = pd.to_datetime(11111111, unit='s', errors='raise',
+ cache=cache)
assert result == expected
assert isinstance(result, Timestamp)
- result = pd.to_datetime(11111111, unit='s', errors='coerce')
+ result = pd.to_datetime(11111111, unit='s', errors='coerce',
+ cache=cache)
assert result == expected
assert isinstance(result, Timestamp)
- result = pd.to_datetime(11111111, unit='s', errors='ignore')
+ result = pd.to_datetime(11111111, unit='s', errors='ignore',
+ cache=cache)
assert result == expected
assert isinstance(result, Timestamp)
- def test_unit_with_numeric(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_unit_with_numeric(self, cache):
# GH 13180
# coercions from floats/ints are ok
@@ -462,10 +535,10 @@ def test_unit_with_numeric(self):
arr1 = [1.434692e+18, 1.432766e+18]
arr2 = np.array(arr1).astype('int64')
for errors in ['ignore', 'raise', 'coerce']:
- result = pd.to_datetime(arr1, errors=errors)
+ result = pd.to_datetime(arr1, errors=errors, cache=cache)
tm.assert_index_equal(result, expected)
- result = pd.to_datetime(arr2, errors=errors)
+ result = pd.to_datetime(arr2, errors=errors, cache=cache)
tm.assert_index_equal(result, expected)
# but we want to make sure that we are coercing
@@ -474,7 +547,7 @@ def test_unit_with_numeric(self):
'2015-06-19 05:33:20',
'2015-05-27 22:33:20'])
arr = ['foo', 1.434692e+18, 1.432766e+18]
- result = pd.to_datetime(arr, errors='coerce')
+ result = pd.to_datetime(arr, errors='coerce', cache=cache)
tm.assert_index_equal(result, expected)
expected = DatetimeIndex(['2015-06-19 05:33:20',
@@ -482,31 +555,33 @@ def test_unit_with_numeric(self):
'NaT',
'NaT'])
arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT']
- result = pd.to_datetime(arr, errors='coerce')
+ result = pd.to_datetime(arr, errors='coerce', cache=cache)
tm.assert_index_equal(result, expected)
- def test_unit_mixed(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_unit_mixed(self, cache):
# mixed integers/datetimes
expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT'])
arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18]
- result = pd.to_datetime(arr, errors='coerce')
+ result = pd.to_datetime(arr, errors='coerce', cache=cache)
tm.assert_index_equal(result, expected)
with pytest.raises(ValueError):
- pd.to_datetime(arr, errors='raise')
+ pd.to_datetime(arr, errors='raise', cache=cache)
expected = DatetimeIndex(['NaT',
'NaT',
'2013-01-01'])
arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')]
- result = pd.to_datetime(arr, errors='coerce')
+ result = pd.to_datetime(arr, errors='coerce', cache=cache)
tm.assert_index_equal(result, expected)
with pytest.raises(ValueError):
- pd.to_datetime(arr, errors='raise')
+ pd.to_datetime(arr, errors='raise', cache=cache)
- def test_dataframe(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_dataframe(self, cache):
df = DataFrame({'year': [2015, 2016],
'month': [2, 3],
@@ -520,19 +595,20 @@ def test_dataframe(self):
result = to_datetime({'year': df['year'],
'month': df['month'],
- 'day': df['day']})
+ 'day': df['day']}, cache=cache)
expected = Series([Timestamp('20150204 00:00:00'),
Timestamp('20160305 00:0:00')])
assert_series_equal(result, expected)
# dict-like
- result = to_datetime(df[['year', 'month', 'day']].to_dict())
+ result = to_datetime(df[['year', 'month', 'day']].to_dict(),
+ cache=cache)
assert_series_equal(result, expected)
# dict but with constructable
df2 = df[['year', 'month', 'day']].to_dict()
df2['month'] = 2
- result = to_datetime(df2)
+ result = to_datetime(df2, cache=cache)
expected2 = Series([Timestamp('20150204 00:00:00'),
Timestamp('20160205 00:0:00')])
assert_series_equal(result, expected2)
@@ -553,7 +629,8 @@ def test_dataframe(self):
]
for d in units:
- result = to_datetime(df[list(d.keys())].rename(columns=d))
+ result = to_datetime(df[list(d.keys())].rename(columns=d),
+ cache=cache)
expected = Series([Timestamp('20150204 06:58:10'),
Timestamp('20160305 07:59:11')])
assert_series_equal(result, expected)
@@ -568,13 +645,13 @@ def test_dataframe(self):
'us': 'us',
'ns': 'ns'}
- result = to_datetime(df.rename(columns=d))
+ result = to_datetime(df.rename(columns=d), cache=cache)
expected = Series([Timestamp('20150204 06:58:10.001002003'),
Timestamp('20160305 07:59:11.001002003')])
assert_series_equal(result, expected)
# coerce back to int
- result = to_datetime(df.astype(str))
+ result = to_datetime(df.astype(str), cache=cache)
assert_series_equal(result, expected)
# passing coerce
@@ -585,8 +662,8 @@ def test_dataframe(self):
msg = ("cannot assemble the datetimes: time data .+ does not "
"match format '%Y%m%d' \(match\)")
with tm.assert_raises_regex(ValueError, msg):
- to_datetime(df2)
- result = to_datetime(df2, errors='coerce')
+ to_datetime(df2, cache=cache)
+ result = to_datetime(df2, errors='coerce', cache=cache)
expected = Series([Timestamp('20150204 00:00:00'),
NaT])
assert_series_equal(result, expected)
@@ -597,7 +674,7 @@ def test_dataframe(self):
with tm.assert_raises_regex(ValueError, msg):
df2 = df.copy()
df2['foo'] = 1
- to_datetime(df2)
+ to_datetime(df2, cache=cache)
# not enough
msg = ('to assemble mappings requires at least that \[year, month, '
@@ -608,7 +685,7 @@ def test_dataframe(self):
['month', 'day'],
['year', 'day', 'second']]:
with tm.assert_raises_regex(ValueError, msg):
- to_datetime(df[c])
+ to_datetime(df[c], cache=cache)
# duplicates
msg = 'cannot assemble with duplicate keys'
@@ -617,7 +694,7 @@ def test_dataframe(self):
'day': [4, 5]})
df2.columns = ['year', 'year', 'day']
with tm.assert_raises_regex(ValueError, msg):
- to_datetime(df2)
+ to_datetime(df2, cache=cache)
df2 = DataFrame({'year': [2015, 2016],
'month': [2, 20],
@@ -625,16 +702,17 @@ def test_dataframe(self):
'hour': [4, 5]})
df2.columns = ['year', 'month', 'day', 'day']
with tm.assert_raises_regex(ValueError, msg):
- to_datetime(df2)
+ to_datetime(df2, cache=cache)
- def test_dataframe_dtypes(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_dataframe_dtypes(self, cache):
# #13451
df = DataFrame({'year': [2015, 2016],
'month': [2, 3],
'day': [4, 5]})
# int16
- result = to_datetime(df.astype('int16'))
+ result = to_datetime(df.astype('int16'), cache=cache)
expected = Series([Timestamp('20150204 00:00:00'),
Timestamp('20160305 00:00:00')])
assert_series_equal(result, expected)
@@ -642,7 +720,7 @@ def test_dataframe_dtypes(self):
# mixed dtypes
df['month'] = df['month'].astype('int8')
df['day'] = df['day'].astype('int8')
- result = to_datetime(df)
+ result = to_datetime(df, cache=cache)
expected = Series([Timestamp('20150204 00:00:00'),
Timestamp('20160305 00:00:00')])
assert_series_equal(result, expected)
@@ -652,18 +730,19 @@ def test_dataframe_dtypes(self):
'month': [1.5, 1],
'day': [1, 1]})
with pytest.raises(ValueError):
- to_datetime(df)
+ to_datetime(df, cache=cache)
class TestToDatetimeMisc(object):
- def test_index_to_datetime(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_index_to_datetime(self, cache):
idx = Index(['1/1/2000', '1/2/2000', '1/3/2000'])
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = idx.to_datetime()
- expected = DatetimeIndex(pd.to_datetime(idx.values))
+ expected = DatetimeIndex(pd.to_datetime(idx.values, cache=cache))
tm.assert_index_equal(result, expected)
with tm.assert_produces_warning(FutureWarning,
@@ -674,17 +753,19 @@ def test_index_to_datetime(self):
expected = DatetimeIndex([today])
tm.assert_index_equal(result, expected)
- def test_to_datetime_iso8601(self):
- result = to_datetime(["2012-01-01 00:00:00"])
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_iso8601(self, cache):
+ result = to_datetime(["2012-01-01 00:00:00"], cache=cache)
exp = Timestamp("2012-01-01 00:00:00")
assert result[0] == exp
- result = to_datetime(['20121001']) # bad iso 8601
+ result = to_datetime(['20121001'], cache=cache) # bad iso 8601
exp = Timestamp('2012-10-01')
assert result[0] == exp
- def test_to_datetime_default(self):
- rs = to_datetime('2001')
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_default(self, cache):
+ rs = to_datetime('2001', cache=cache)
xp = datetime(2001, 1, 1)
assert rs == xp
@@ -694,71 +775,80 @@ def test_to_datetime_default(self):
# pytest.raises(ValueError, to_datetime('01-13-2012',
# dayfirst=True))
- def test_to_datetime_on_datetime64_series(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_on_datetime64_series(self, cache):
# #2699
s = Series(date_range('1/1/2000', periods=10))
- result = to_datetime(s)
+ result = to_datetime(s, cache=cache)
assert result[0] == s[0]
- def test_to_datetime_with_space_in_series(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_with_space_in_series(self, cache):
# GH 6428
s = Series(['10/18/2006', '10/18/2008', ' '])
- pytest.raises(ValueError, lambda: to_datetime(s, errors='raise'))
- result_coerce = to_datetime(s, errors='coerce')
+ pytest.raises(ValueError, lambda: to_datetime(s,
+ errors='raise',
+ cache=cache))
+ result_coerce = to_datetime(s, errors='coerce', cache=cache)
expected_coerce = Series([datetime(2006, 10, 18),
datetime(2008, 10, 18),
NaT])
tm.assert_series_equal(result_coerce, expected_coerce)
- result_ignore = to_datetime(s, errors='ignore')
+ result_ignore = to_datetime(s, errors='ignore', cache=cache)
tm.assert_series_equal(result_ignore, s)
- def test_to_datetime_with_apply(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_with_apply(self, cache):
# this is only locale tested with US/None locales
tm._skip_if_has_locale()
# GH 5195
# with a format and coerce a single item to_datetime fails
td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1, 2, 3])
- expected = pd.to_datetime(td, format='%b %y')
- result = td.apply(pd.to_datetime, format='%b %y')
+ expected = pd.to_datetime(td, format='%b %y', cache=cache)
+ result = td.apply(pd.to_datetime, format='%b %y', cache=cache)
assert_series_equal(result, expected)
td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3])
pytest.raises(ValueError,
lambda: pd.to_datetime(td, format='%b %y',
- errors='raise'))
+ errors='raise',
+ cache=cache))
pytest.raises(ValueError,
lambda: td.apply(pd.to_datetime, format='%b %y',
- errors='raise'))
- expected = pd.to_datetime(td, format='%b %y', errors='coerce')
+ errors='raise', cache=cache))
+ expected = pd.to_datetime(td, format='%b %y', errors='coerce',
+ cache=cache)
result = td.apply(
- lambda x: pd.to_datetime(x, format='%b %y', errors='coerce'))
+ lambda x: pd.to_datetime(x, format='%b %y', errors='coerce',
+ cache=cache))
assert_series_equal(result, expected)
- def test_to_datetime_types(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_types(self, cache):
# empty string
- result = to_datetime('')
+ result = to_datetime('', cache=cache)
assert result is NaT
- result = to_datetime(['', ''])
+ result = to_datetime(['', ''], cache=cache)
assert isna(result).all()
# ints
result = Timestamp(0)
- expected = to_datetime(0)
+ expected = to_datetime(0, cache=cache)
assert result == expected
# GH 3888 (strings)
- expected = to_datetime(['2012'])[0]
- result = to_datetime('2012')
+ expected = to_datetime(['2012'], cache=cache)[0]
+ result = to_datetime('2012', cache=cache)
assert result == expected
# array = ['2012','20120101','20120101 12:01:01']
array = ['20120101', '20120101 12:01:01']
- expected = list(to_datetime(array))
+ expected = list(to_datetime(array, cache=cache))
result = lmap(Timestamp, array)
tm.assert_almost_equal(result, expected)
@@ -767,13 +857,15 @@ def test_to_datetime_types(self):
# expected = to_datetime('2012')
# assert result == expected
- def test_to_datetime_unprocessable_input(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_unprocessable_input(self, cache):
# GH 4928
tm.assert_numpy_array_equal(
- to_datetime([1, '1'], errors='ignore'),
+ to_datetime([1, '1'], errors='ignore', cache=cache),
np.array([1, '1'], dtype='O')
)
- pytest.raises(TypeError, to_datetime, [1, '1'], errors='raise')
+ pytest.raises(TypeError, to_datetime, [1, '1'], errors='raise',
+ cache=cache)
def test_to_datetime_other_datetime64_units(self):
# 5/25/2012
@@ -809,7 +901,8 @@ def test_to_datetime_overflow(self):
with pytest.raises(OverflowError):
date_range(start='1/1/1700', freq='B', periods=100000)
- def test_string_na_nat_conversion(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_string_na_nat_conversion(self, cache):
# GH #999, #858
from pandas.compat import parse_date
@@ -827,7 +920,7 @@ def test_string_na_nat_conversion(self):
result = tslib.array_to_datetime(strings)
tm.assert_almost_equal(result, expected)
- result2 = to_datetime(strings)
+ result2 = to_datetime(strings, cache=cache)
assert isinstance(result2, DatetimeIndex)
tm.assert_numpy_array_equal(result, result2.values)
@@ -835,22 +928,25 @@ def test_string_na_nat_conversion(self):
# GH 10636, default is now 'raise'
pytest.raises(ValueError,
- lambda: to_datetime(malformed, errors='raise'))
+ lambda: to_datetime(malformed, errors='raise',
+ cache=cache))
- result = to_datetime(malformed, errors='ignore')
+ result = to_datetime(malformed, errors='ignore', cache=cache)
tm.assert_numpy_array_equal(result, malformed)
- pytest.raises(ValueError, to_datetime, malformed, errors='raise')
+ pytest.raises(ValueError, to_datetime, malformed, errors='raise',
+ cache=cache)
idx = ['a', 'b', 'c', 'd', 'e']
series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan,
'1/5/2000'], index=idx, name='foo')
- dseries = Series([to_datetime('1/1/2000'), np.nan,
- to_datetime('1/3/2000'), np.nan,
- to_datetime('1/5/2000')], index=idx, name='foo')
+ dseries = Series([to_datetime('1/1/2000', cache=cache), np.nan,
+ to_datetime('1/3/2000', cache=cache), np.nan,
+ to_datetime('1/5/2000', cache=cache)],
+ index=idx, name='foo')
- result = to_datetime(series)
- dresult = to_datetime(dseries)
+ result = to_datetime(series, cache=cache)
+ dresult = to_datetime(dseries, cache=cache)
expected = Series(np.empty(5, dtype='M8[ns]'), index=idx)
for i in range(5):
@@ -858,7 +954,7 @@ def test_string_na_nat_conversion(self):
if isna(x):
expected[i] = tslib.iNaT
else:
- expected[i] = to_datetime(x)
+ expected[i] = to_datetime(x, cache=cache)
assert_series_equal(result, expected, check_names=False)
assert result.name == 'foo'
@@ -866,26 +962,29 @@ def test_string_na_nat_conversion(self):
assert_series_equal(dresult, expected, check_names=False)
assert dresult.name == 'foo'
- def test_dti_constructor_numpy_timeunits(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_dti_constructor_numpy_timeunits(self, cache):
# GH 9114
- base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT'])
+ base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT'],
+ cache=cache)
for dtype in ['datetime64[h]', 'datetime64[m]', 'datetime64[s]',
'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']:
values = base.values.astype(dtype)
tm.assert_index_equal(DatetimeIndex(values), base)
- tm.assert_index_equal(to_datetime(values), base)
+ tm.assert_index_equal(to_datetime(values, cache=cache), base)
- def test_dayfirst(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_dayfirst(self, cache):
# GH 5917
arr = ['10/02/2014', '11/02/2014', '12/02/2014']
expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11),
datetime(2014, 2, 12)])
idx1 = DatetimeIndex(arr, dayfirst=True)
idx2 = DatetimeIndex(np.array(arr), dayfirst=True)
- idx3 = to_datetime(arr, dayfirst=True)
- idx4 = to_datetime(np.array(arr), dayfirst=True)
+ idx3 = to_datetime(arr, dayfirst=True, cache=cache)
+ idx4 = to_datetime(np.array(arr), dayfirst=True, cache=cache)
idx5 = DatetimeIndex(Index(arr), dayfirst=True)
idx6 = DatetimeIndex(Series(arr), dayfirst=True)
tm.assert_index_equal(expected, idx1)
@@ -897,6 +996,8 @@ def test_dayfirst(self):
class TestGuessDatetimeFormat(object):
+
+ @is_dateutil_le_261
def test_guess_datetime_format_for_array(self):
tm._skip_if_not_us_locale()
expected_format = '%Y-%m-%d %H:%M:%S.%f'
@@ -917,10 +1018,32 @@ def test_guess_datetime_format_for_array(self):
[np.nan, np.nan, np.nan], dtype='O'))
assert format_for_string_of_nans is None
+ @is_dateutil_gt_261
+ def test_guess_datetime_format_for_array_gt_261(self):
+ tm._skip_if_not_us_locale()
+ expected_format = '%Y-%m-%d %H:%M:%S.%f'
+ dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format)
+
+ test_arrays = [
+ np.array([dt_string, dt_string, dt_string], dtype='O'),
+ np.array([np.nan, np.nan, dt_string], dtype='O'),
+ np.array([dt_string, 'random_string'], dtype='O'),
+ ]
+
+ for test_array in test_arrays:
+ assert tools._guess_datetime_format_for_array(
+ test_array) is None
+
+ format_for_string_of_nans = tools._guess_datetime_format_for_array(
+ np.array(
+ [np.nan, np.nan, np.nan], dtype='O'))
+ assert format_for_string_of_nans is None
+
class TestToDatetimeInferFormat(object):
- def test_to_datetime_infer_datetime_format_consistent_format(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_infer_datetime_format_consistent_format(self, cache):
s = pd.Series(pd.date_range('20000101', periods=50, freq='H'))
test_formats = ['%m-%d-%Y', '%m/%d/%Y %H:%M:%S.%f',
@@ -929,90 +1052,113 @@ def test_to_datetime_infer_datetime_format_consistent_format(self):
for test_format in test_formats:
s_as_dt_strings = s.apply(lambda x: x.strftime(test_format))
- with_format = pd.to_datetime(s_as_dt_strings, format=test_format)
+ with_format = pd.to_datetime(s_as_dt_strings, format=test_format,
+ cache=cache)
no_infer = pd.to_datetime(s_as_dt_strings,
- infer_datetime_format=False)
+ infer_datetime_format=False,
+ cache=cache)
yes_infer = pd.to_datetime(s_as_dt_strings,
- infer_datetime_format=True)
+ infer_datetime_format=True,
+ cache=cache)
# Whether the format is explicitly passed, it is inferred, or
# it is not inferred, the results should all be the same
tm.assert_series_equal(with_format, no_infer)
tm.assert_series_equal(no_infer, yes_infer)
- def test_to_datetime_infer_datetime_format_inconsistent_format(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_infer_datetime_format_inconsistent_format(self,
+ cache):
s = pd.Series(np.array(['01/01/2011 00:00:00',
'01-02-2011 00:00:00',
'2011-01-03T00:00:00']))
# When the format is inconsistent, infer_datetime_format should just
# fallback to the default parsing
- tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False),
- pd.to_datetime(s, infer_datetime_format=True))
+ tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False,
+ cache=cache),
+ pd.to_datetime(s, infer_datetime_format=True,
+ cache=cache))
s = pd.Series(np.array(['Jan/01/2011', 'Feb/01/2011', 'Mar/01/2011']))
- tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False),
- pd.to_datetime(s, infer_datetime_format=True))
+ tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False,
+ cache=cache),
+ pd.to_datetime(s, infer_datetime_format=True,
+ cache=cache))
- def test_to_datetime_infer_datetime_format_series_with_nans(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_infer_datetime_format_series_with_nans(self, cache):
s = pd.Series(np.array(['01/01/2011 00:00:00', np.nan,
'01/03/2011 00:00:00', np.nan]))
- tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False),
- pd.to_datetime(s, infer_datetime_format=True))
-
- def test_to_datetime_infer_datetime_format_series_starting_with_nans(self):
+ tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False,
+ cache=cache),
+ pd.to_datetime(s, infer_datetime_format=True,
+ cache=cache))
+
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_infer_datetime_format_series_start_with_nans(self,
+ cache):
s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00',
'01/02/2011 00:00:00', '01/03/2011 00:00:00']))
- tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False),
- pd.to_datetime(s, infer_datetime_format=True))
+ tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False,
+ cache=cache),
+ pd.to_datetime(s, infer_datetime_format=True,
+ cache=cache))
- def test_to_datetime_iso8601_noleading_0s(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_to_datetime_iso8601_noleading_0s(self, cache):
# GH 11871
s = pd.Series(['2014-1-1', '2014-2-2', '2015-3-3'])
expected = pd.Series([pd.Timestamp('2014-01-01'),
pd.Timestamp('2014-02-02'),
pd.Timestamp('2015-03-03')])
- tm.assert_series_equal(pd.to_datetime(s), expected)
- tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d'), expected)
+ tm.assert_series_equal(pd.to_datetime(s, cache=cache), expected)
+ tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d',
+ cache=cache), expected)
class TestDaysInMonth(object):
# tests for issue #10154
- def test_day_not_in_month_coerce(self):
- assert isna(to_datetime('2015-02-29', errors='coerce'))
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_day_not_in_month_coerce(self, cache):
+ assert isna(to_datetime('2015-02-29', errors='coerce', cache=cache))
assert isna(to_datetime('2015-02-29', format="%Y-%m-%d",
- errors='coerce'))
+ errors='coerce', cache=cache))
assert isna(to_datetime('2015-02-32', format="%Y-%m-%d",
- errors='coerce'))
+ errors='coerce', cache=cache))
assert isna(to_datetime('2015-04-31', format="%Y-%m-%d",
- errors='coerce'))
+ errors='coerce', cache=cache))
- def test_day_not_in_month_raise(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_day_not_in_month_raise(self, cache):
pytest.raises(ValueError, to_datetime, '2015-02-29',
- errors='raise')
+ errors='raise', cache=cache)
pytest.raises(ValueError, to_datetime, '2015-02-29',
- errors='raise', format="%Y-%m-%d")
+ errors='raise', format="%Y-%m-%d", cache=cache)
pytest.raises(ValueError, to_datetime, '2015-02-32',
- errors='raise', format="%Y-%m-%d")
+ errors='raise', format="%Y-%m-%d", cache=cache)
pytest.raises(ValueError, to_datetime, '2015-04-31',
- errors='raise', format="%Y-%m-%d")
+ errors='raise', format="%Y-%m-%d", cache=cache)
- def test_day_not_in_month_ignore(self):
- assert to_datetime('2015-02-29', errors='ignore') == '2015-02-29'
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_day_not_in_month_ignore(self, cache):
+ assert to_datetime('2015-02-29', errors='ignore',
+ cache=cache) == '2015-02-29'
assert to_datetime('2015-02-29', errors='ignore',
- format="%Y-%m-%d") == '2015-02-29'
+ format="%Y-%m-%d", cache=cache) == '2015-02-29'
assert to_datetime('2015-02-32', errors='ignore',
- format="%Y-%m-%d") == '2015-02-32'
+ format="%Y-%m-%d", cache=cache) == '2015-02-32'
assert to_datetime('2015-04-31', errors='ignore',
- format="%Y-%m-%d") == '2015-04-31'
+ format="%Y-%m-%d", cache=cache) == '2015-04-31'
class TestDatetimeParsingWrappers(object):
- def test_parsers(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_parsers(self, cache):
# https://github.com/dateutil/dateutil/issues/217
import dateutil
@@ -1076,7 +1222,7 @@ def test_parsers(self):
result3 = to_datetime([date_str], yearfirst=yearfirst)
# result5 is used below
result4 = to_datetime(np.array([date_str], dtype=object),
- yearfirst=yearfirst)
+ yearfirst=yearfirst, cache=cache)
result6 = DatetimeIndex([date_str], yearfirst=yearfirst)
# result7 is used below
result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst)
@@ -1106,7 +1252,8 @@ def test_parsers(self):
assert result3 is tslib.NaT
assert result4 is tslib.NaT
- def test_parsers_dayfirst_yearfirst(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_parsers_dayfirst_yearfirst(self, cache):
# OK
# 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00
# 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00
@@ -1190,7 +1337,7 @@ def test_parsers_dayfirst_yearfirst(self):
assert result2 == expected
result3 = to_datetime(date_str, dayfirst=dayfirst,
- yearfirst=yearfirst)
+ yearfirst=yearfirst, cache=cache)
result4 = DatetimeIndex([date_str], dayfirst=dayfirst,
yearfirst=yearfirst)[0]
@@ -1199,7 +1346,8 @@ def test_parsers_dayfirst_yearfirst(self):
assert result3 == expected
assert result4 == expected
- def test_parsers_timestring(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_parsers_timestring(self, cache):
# must be the same as dateutil result
cases = {'10:15': (parse('10:15'), datetime(1, 1, 1, 10, 15)),
'9:05': (parse('9:05'), datetime(1, 1, 1, 9, 5))}
@@ -1254,9 +1402,10 @@ def test_parsers_time(self):
assert isinstance(res, list)
assert res == expected_arr
- def test_parsers_timezone_minute_offsets_roundtrip(self):
+ @pytest.mark.parametrize('cache', [True, False])
+ def test_parsers_timezone_minute_offsets_roundtrip(self, cache):
# GH11708
- base = to_datetime("2013-01-01 00:00:00")
+ base = to_datetime("2013-01-01 00:00:00", cache=cache)
dt_strings = [
('2013-01-01 05:45+0545',
"Asia/Katmandu",
@@ -1267,7 +1416,7 @@ def test_parsers_timezone_minute_offsets_roundtrip(self):
]
for dt_string, tz, dt_string_repr in dt_strings:
- dt_time = to_datetime(dt_string)
+ dt_time = to_datetime(dt_string, cache=cache)
assert base == dt_time
converted_time = dt_time.tz_localize('UTC').tz_convert(tz)
assert dt_string_repr == repr(converted_time)
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index ded5de9253eafd..dbd18de16cebde 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -158,6 +158,24 @@ def test_set_name_methods(self):
assert res is None
assert ind.names == new_names2
+ def test_set_levels_labels_directly(self):
+ # setting levels/labels directly raises AttributeError
+
+ levels = self.index.levels
+ new_levels = [[lev + 'a' for lev in level] for level in levels]
+
+ labels = self.index.labels
+ major_labels, minor_labels = labels
+ major_labels = [(x + 1) % 3 for x in major_labels]
+ minor_labels = [(x + 1) % 1 for x in minor_labels]
+ new_labels = [major_labels, minor_labels]
+
+ with pytest.raises(AttributeError):
+ self.index.levels = new_levels
+
+ with pytest.raises(AttributeError):
+ self.index.labels = new_labels
+
def test_set_levels(self):
# side note - you probably wouldn't want to use levels and labels
# directly like this - but it is possible.
@@ -578,16 +596,6 @@ def test_constructor_mismatched_label_levels(self):
with tm.assert_raises_regex(ValueError, label_error):
self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]])
- # deprecated properties
- with warnings.catch_warnings():
- warnings.simplefilter('ignore')
-
- with tm.assert_raises_regex(ValueError, length_error):
- self.index.copy().levels = [['a'], ['b']]
-
- with tm.assert_raises_regex(ValueError, label_error):
- self.index.copy().labels = [[0, 0, 0, 0], [0, 0]]
-
def assert_multiindex_copied(self, copy, original):
# Levels should be (at least, shallow copied)
tm.assert_copy(copy.levels, original.levels)
@@ -2981,3 +2989,13 @@ def test_nan_stays_float(self):
assert pd.isna(df0.index.get_level_values(1)).all()
# the following failed in 0.14.1
assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
+
+ def test_million_record_attribute_error(self):
+ # GH 18165
+ r = list(range(1000000))
+ df = pd.DataFrame({'a': r, 'b': r},
+ index=pd.MultiIndex.from_tuples([(x, x) for x in r]))
+
+ with tm.assert_raises_regex(AttributeError,
+ "'Series' object has no attribute 'foo'"):
+ df['a'].foo()
diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
index 9fe10885186de0..7d88b547746f64 100644
--- a/pandas/tests/indexes/test_range.py
+++ b/pandas/tests/indexes/test_range.py
@@ -971,8 +971,8 @@ def test_append(self):
([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)),
([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)),
([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)),
- ([RI(-4, -8), RI(-8, -12)], RI(-8, -12)),
- ([RI(-4, -8), RI(3, -4)], RI(3, -8)),
+ ([RI(-4, -8), RI(-8, -12)], RI(0, 0)),
+ ([RI(-4, -8), RI(3, -4)], RI(0, 0)),
([RI(-4, -8), RI(3, 5)], RI(3, 5)),
([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])),
([RI(-2,), RI(3, 5)], RI(3, 5)),
diff --git a/pandas/tests/io/sas/data/zero_variables.sas7bdat b/pandas/tests/io/sas/data/zero_variables.sas7bdat
new file mode 100644
index 00000000000000..85fec09447ec50
Binary files /dev/null and b/pandas/tests/io/sas/data/zero_variables.sas7bdat differ
diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py
index c3fb85811ca2ac..a5546b1198fc67 100644
--- a/pandas/tests/io/sas/test_sas7bdat.py
+++ b/pandas/tests/io/sas/test_sas7bdat.py
@@ -1,9 +1,11 @@
import pandas as pd
from pandas.compat import PY2
import pandas.util.testing as tm
+from pandas.errors import EmptyDataError
import os
import io
import numpy as np
+import pytest
class TestSAS7BDAT(object):
@@ -174,3 +176,11 @@ def test_date_time():
df0 = pd.read_csv(fname, parse_dates=['Date1', 'Date2', 'DateTime',
'DateTimeHi', 'Taiw'])
tm.assert_frame_equal(df, df0)
+
+
+def test_zero_variables():
+ # Check if the SAS file has zero variables (PR #18184)
+ dirpath = tm.get_data_path()
+ fname = os.path.join(dirpath, "zero_variables.sas7bdat")
+ with pytest.raises(EmptyDataError):
+ pd.read_sas(fname)
diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
index 940a331a9de847..b5d1435c29cb7b 100644
--- a/pandas/tests/io/test_clipboard.py
+++ b/pandas/tests/io/test_clipboard.py
@@ -18,7 +18,7 @@
try:
DataFrame({'A': [1, 2]}).to_clipboard()
_DEPS_INSTALLED = 1
-except PyperclipException:
+except (PyperclipException, RuntimeError):
_DEPS_INSTALLED = 0
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index 956f3c68eeb414..0b268dcca90e81 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -973,6 +973,7 @@ def test_importcheck_thread_safety():
def test_parse_failure_unseekable():
# Issue #17975
_skip_if_no('lxml')
+ _skip_if_no('bs4')
class UnseekableStringIO(StringIO):
def seekable(self):
@@ -996,6 +997,7 @@ def seekable(self):
def test_parse_failure_rewinds():
# Issue #17975
_skip_if_no('lxml')
+ _skip_if_no('bs4')
class MockFile(object):
def __init__(self, data):
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 9a4edf38e2ef4a..e7bcff22371b7e 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -105,7 +105,7 @@ def test_options_py(df_compat, pa):
with pd.option_context('io.parquet.engine', 'pyarrow'):
df.to_parquet(path)
- result = read_parquet(path, compression=None)
+ result = read_parquet(path)
tm.assert_frame_equal(result, df)
@@ -118,7 +118,7 @@ def test_options_fp(df_compat, fp):
with pd.option_context('io.parquet.engine', 'fastparquet'):
df.to_parquet(path, compression=None)
- result = read_parquet(path, compression=None)
+ result = read_parquet(path)
tm.assert_frame_equal(result, df)
@@ -130,7 +130,7 @@ def test_options_auto(df_compat, fp, pa):
with pd.option_context('io.parquet.engine', 'auto'):
df.to_parquet(path)
- result = read_parquet(path, compression=None)
+ result = read_parquet(path)
tm.assert_frame_equal(result, df)
@@ -162,7 +162,7 @@ def test_cross_engine_pa_fp(df_cross_compat, pa, fp):
with tm.ensure_clean() as path:
df.to_parquet(path, engine=pa, compression=None)
- result = read_parquet(path, engine=fp, compression=None)
+ result = read_parquet(path, engine=fp)
tm.assert_frame_equal(result, df)
@@ -174,7 +174,7 @@ def test_cross_engine_fp_pa(df_cross_compat, pa, fp):
with tm.ensure_clean() as path:
df.to_parquet(path, engine=fp, compression=None)
- result = read_parquet(path, engine=pa, compression=None)
+ result = read_parquet(path, engine=pa)
tm.assert_frame_equal(result, df)
@@ -188,19 +188,23 @@ def check_error_on_write(self, df, engine, exc):
with tm.ensure_clean() as path:
to_parquet(df, path, engine, compression=None)
- def check_round_trip(self, df, engine, expected=None, **kwargs):
-
+ def check_round_trip(self, df, engine, expected=None,
+ write_kwargs=None, read_kwargs=None):
+ if write_kwargs is None:
+ write_kwargs = {}
+ if read_kwargs is None:
+ read_kwargs = {}
with tm.ensure_clean() as path:
- df.to_parquet(path, engine, **kwargs)
- result = read_parquet(path, engine, **kwargs)
+ df.to_parquet(path, engine, **write_kwargs)
+ result = read_parquet(path, engine, **read_kwargs)
if expected is None:
expected = df
tm.assert_frame_equal(result, expected)
# repeat
- to_parquet(df, path, engine, **kwargs)
- result = pd.read_parquet(path, engine, **kwargs)
+ to_parquet(df, path, engine, **write_kwargs)
+ result = pd.read_parquet(path, engine, **read_kwargs)
if expected is None:
expected = df
@@ -222,7 +226,7 @@ def test_columns_dtypes(self, engine):
# unicode
df.columns = [u'foo', u'bar']
- self.check_round_trip(df, engine, compression=None)
+ self.check_round_trip(df, engine, write_kwargs={'compression': None})
def test_columns_dtypes_invalid(self, engine):
@@ -246,7 +250,7 @@ def test_columns_dtypes_invalid(self, engine):
def test_write_with_index(self, engine):
df = pd.DataFrame({'A': [1, 2, 3]})
- self.check_round_trip(df, engine, compression=None)
+ self.check_round_trip(df, engine, write_kwargs={'compression': None})
# non-default index
for index in [[2, 3, 4],
@@ -280,7 +284,8 @@ def test_compression(self, engine, compression):
pytest.importorskip('brotli')
df = pd.DataFrame({'A': [1, 2, 3]})
- self.check_round_trip(df, engine, compression=compression)
+ self.check_round_trip(df, engine,
+ write_kwargs={'compression': compression})
def test_read_columns(self, engine):
# GH18154
@@ -289,7 +294,8 @@ def test_read_columns(self, engine):
expected = pd.DataFrame({'string': list('abc')})
self.check_round_trip(df, engine, expected=expected,
- compression=None, columns=["string"])
+ write_kwargs={'compression': None},
+ read_kwargs={'columns': ['string']})
class TestParquetPyArrow(Base):
@@ -377,7 +383,7 @@ def test_basic(self, fp):
'timedelta': pd.timedelta_range('1 day', periods=3),
})
- self.check_round_trip(df, fp, compression=None)
+ self.check_round_trip(df, fp, write_kwargs={'compression': None})
@pytest.mark.skip(reason="not supported")
def test_duplicate_columns(self, fp):
@@ -390,7 +396,8 @@ def test_duplicate_columns(self, fp):
def test_bool_with_none(self, fp):
df = pd.DataFrame({'a': [True, None, False]})
expected = pd.DataFrame({'a': [1.0, np.nan, 0.0]}, dtype='float16')
- self.check_round_trip(df, fp, expected=expected, compression=None)
+ self.check_round_trip(df, fp, expected=expected,
+ write_kwargs={'compression': None})
def test_unsupported(self, fp):
@@ -406,7 +413,7 @@ def test_categorical(self, fp):
if LooseVersion(fastparquet.__version__) < LooseVersion("0.1.3"):
pytest.skip("CategoricalDtype not supported for older fp")
df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
- self.check_round_trip(df, fp, compression=None)
+ self.check_round_trip(df, fp, write_kwargs={'compression': None})
def test_datetime_tz(self, fp):
# doesn't preserve tz
@@ -416,4 +423,13 @@ def test_datetime_tz(self, fp):
# warns on the coercion
with catch_warnings(record=True):
self.check_round_trip(df, fp, df.astype('datetime64[ns]'),
- compression=None)
+ write_kwargs={'compression': None})
+
+ def test_filter_row_groups(self, fp):
+ d = {'a': list(range(0, 3))}
+ df = pd.DataFrame(d)
+ with tm.ensure_clean() as path:
+ df.to_parquet(path, fp, compression=None,
+ row_group_offsets=1)
+ result = read_parquet(path, fp, filters=[('a', '==', 0)])
+ assert len(result) == 1
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index c9c294e70e7b14..fd5b4611e58d6b 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -1983,3 +1983,21 @@ def test_concat_will_upcast(dt, pdt):
pdt(np.array([5], dtype=dt, ndmin=dims))]
x = pd.concat(dfs)
assert x.values.dtype == 'float64'
+
+
+def test_concat_empty_and_non_empty_frame_regression():
+ # GH 18178 regression test
+ df1 = pd.DataFrame({'foo': [1]})
+ df2 = pd.DataFrame({'foo': []})
+ expected = pd.DataFrame({'foo': [1.0]})
+ result = pd.concat([df1, df2])
+ assert_frame_equal(result, expected)
+
+
+def test_concat_empty_and_non_empty_series_regression():
+ # GH 18187 regression test
+ s1 = pd.Series([1])
+ s2 = pd.Series([])
+ expected = s1
+ result = pd.concat([s1, s2])
+ tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py
index fc9f89934b4ea0..2722c3e92d85a1 100644
--- a/pandas/tests/reshape/test_reshape.py
+++ b/pandas/tests/reshape/test_reshape.py
@@ -11,8 +11,8 @@
from pandas.util.testing import assert_frame_equal
-from pandas.core.reshape.reshape import (
- melt, lreshape, get_dummies, wide_to_long)
+from pandas.core.reshape.reshape import get_dummies
+from pandas.core.reshape.melt import melt, lreshape, wide_to_long
import pandas.util.testing as tm
from pandas.compat import range, u
diff --git a/pandas/tests/scalar/test_parsing.py b/pandas/tests/scalar/test_parsing.py
index 6908fecbd4e058..70961755ceec98 100644
--- a/pandas/tests/scalar/test_parsing.py
+++ b/pandas/tests/scalar/test_parsing.py
@@ -3,14 +3,12 @@
Tests for Timestamp parsing, aimed at pandas/_libs/tslibs/parsing.pyx
"""
from datetime import datetime
-
import numpy as np
import pytest
from dateutil.parser import parse
-
+from pandas.conftest import is_dateutil_le_261, is_dateutil_gt_261
from pandas import compat
from pandas.util import testing as tm
-
from pandas._libs.tslibs import parsing
@@ -67,37 +65,90 @@ def test_parsers_monthfreq(self):
class TestGuessDatetimeFormat(object):
- def test_guess_datetime_format_with_parseable_formats(self):
+
+ @is_dateutil_le_261
+ @pytest.mark.parametrize(
+ "string, format",
+ [
+ ('20111230', '%Y%m%d'),
+ ('2011-12-30', '%Y-%m-%d'),
+ ('30-12-2011', '%d-%m-%Y'),
+ ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'),
+ ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'),
+ ('2011-12-30 00:00:00.000000',
+ '%Y-%m-%d %H:%M:%S.%f')])
+ def test_guess_datetime_format_with_parseable_formats(
+ self, string, format):
+ tm._skip_if_not_us_locale()
+
+ result = parsing._guess_datetime_format(string)
+ assert result == format
+
+ @is_dateutil_gt_261
+ @pytest.mark.parametrize(
+ "string",
+ ['20111230', '2011-12-30', '30-12-2011',
+ '2011-12-30 00:00:00', '2011-12-30T00:00:00',
+ '2011-12-30 00:00:00.000000'])
+ def test_guess_datetime_format_with_parseable_formats_gt_261(
+ self, string):
tm._skip_if_not_us_locale()
- dt_string_to_format = (('20111230', '%Y%m%d'),
- ('2011-12-30', '%Y-%m-%d'),
- ('30-12-2011', '%d-%m-%Y'),
- ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'),
- ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'),
- ('2011-12-30 00:00:00.000000',
- '%Y-%m-%d %H:%M:%S.%f'), )
-
- for dt_string, dt_format in dt_string_to_format:
- assert parsing._guess_datetime_format(dt_string) == dt_format
-
- def test_guess_datetime_format_with_dayfirst(self):
- ambiguous_string = '01/01/2011'
- assert parsing._guess_datetime_format(
- ambiguous_string, dayfirst=True) == '%d/%m/%Y'
- assert parsing._guess_datetime_format(
- ambiguous_string, dayfirst=False) == '%m/%d/%Y'
- def test_guess_datetime_format_with_locale_specific_formats(self):
+ result = parsing._guess_datetime_format(string)
+ assert result is None
+
+ @is_dateutil_le_261
+ @pytest.mark.parametrize(
+ "dayfirst, expected",
+ [
+ (True, "%d/%m/%Y"),
+ (False, "%m/%d/%Y")])
+ def test_guess_datetime_format_with_dayfirst(self, dayfirst, expected):
+ ambiguous_string = '01/01/2011'
+ result = parsing._guess_datetime_format(
+ ambiguous_string, dayfirst=dayfirst)
+ assert result == expected
+
+ @is_dateutil_gt_261
+ @pytest.mark.parametrize(
+ "dayfirst", [True, False])
+ def test_guess_datetime_format_with_dayfirst_gt_261(self, dayfirst):
+ ambiguous_string = '01/01/2011'
+ result = parsing._guess_datetime_format(
+ ambiguous_string, dayfirst=dayfirst)
+ assert result is None
+
+ @is_dateutil_le_261
+ @pytest.mark.parametrize(
+ "string, format",
+ [
+ ('30/Dec/2011', '%d/%b/%Y'),
+ ('30/December/2011', '%d/%B/%Y'),
+ ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S')])
+ def test_guess_datetime_format_with_locale_specific_formats(
+ self, string, format):
# The month names will vary depending on the locale, in which
# case these wont be parsed properly (dateutil can't parse them)
tm._skip_if_has_locale()
- dt_string_to_format = (('30/Dec/2011', '%d/%b/%Y'),
- ('30/December/2011', '%d/%B/%Y'),
- ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S'), )
+ result = parsing._guess_datetime_format(string)
+ assert result == format
+
+ @is_dateutil_gt_261
+ @pytest.mark.parametrize(
+ "string",
+ [
+ '30/Dec/2011',
+ '30/December/2011',
+ '30/Dec/2011 00:00:00'])
+ def test_guess_datetime_format_with_locale_specific_formats_gt_261(
+ self, string):
+ # The month names will vary depending on the locale, in which
+ # case these wont be parsed properly (dateutil can't parse them)
+ tm._skip_if_has_locale()
- for dt_string, dt_format in dt_string_to_format:
- assert parsing._guess_datetime_format(dt_string) == dt_format
+ result = parsing._guess_datetime_format(string)
+ assert result is None
def test_guess_datetime_format_invalid_inputs(self):
# A datetime string must include a year, month and a day for it
@@ -117,17 +168,35 @@ def test_guess_datetime_format_invalid_inputs(self):
for invalid_dt in invalid_dts:
assert parsing._guess_datetime_format(invalid_dt) is None
- def test_guess_datetime_format_nopadding(self):
+ @is_dateutil_le_261
+ @pytest.mark.parametrize(
+ "string, format",
+ [
+ ('2011-1-1', '%Y-%m-%d'),
+ ('30-1-2011', '%d-%m-%Y'),
+ ('1/1/2011', '%m/%d/%Y'),
+ ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'),
+ ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'),
+ ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')])
+ def test_guess_datetime_format_nopadding(self, string, format):
+ # GH 11142
+ result = parsing._guess_datetime_format(string)
+ assert result == format
+
+ @is_dateutil_gt_261
+ @pytest.mark.parametrize(
+ "string",
+ [
+ '2011-1-1',
+ '30-1-2011',
+ '1/1/2011',
+ '2011-1-1 00:00:00',
+ '2011-1-1 0:0:0',
+ '2011-1-3T00:00:0'])
+ def test_guess_datetime_format_nopadding_gt_261(self, string):
# GH 11142
- dt_string_to_format = (('2011-1-1', '%Y-%m-%d'),
- ('30-1-2011', '%d-%m-%Y'),
- ('1/1/2011', '%m/%d/%Y'),
- ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'),
- ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'),
- ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S'))
-
- for dt_string, dt_format in dt_string_to_format:
- assert parsing._guess_datetime_format(dt_string) == dt_format
+ result = parsing._guess_datetime_format(string)
+ assert result is None
class TestArrayToDatetime(object):
diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py
index 28d85c52604d94..8cfdf7a461879e 100644
--- a/pandas/tests/scalar/test_period.py
+++ b/pandas/tests/scalar/test_period.py
@@ -13,7 +13,7 @@
from pandas._libs import tslib, period as libperiod
from pandas._libs.tslibs.parsing import DateParseError
from pandas import Period, Timestamp, offsets
-from pandas.tseries.frequencies import DAYS, MONTHS
+from pandas._libs.tslibs.resolution import DAYS, _MONTHS as MONTHS
class TestPeriodProperties(object):
diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py
index 4cd9a2fadeb326..a79fb554f94548 100644
--- a/pandas/tests/scalar/test_timestamp.py
+++ b/pandas/tests/scalar/test_timestamp.py
@@ -16,8 +16,9 @@
import pandas.util.testing as tm
from pandas.tseries import offsets, frequencies
-from pandas._libs import tslib, period
+from pandas._libs import period
from pandas._libs.tslibs.timezones import get_timezone
+from pandas._libs.tslibs import conversion
from pandas.compat import lrange, long, PY3
from pandas.util.testing import assert_series_equal
@@ -77,12 +78,12 @@ def test_constructor(self):
for result in [Timestamp(date_str), Timestamp(date)]:
# only with timestring
assert result.value == expected
- assert tslib.pydt_to_i8(result) == expected
+ assert conversion.pydt_to_i8(result) == expected
# re-creation shouldn't affect to internal value
result = Timestamp(result)
assert result.value == expected
- assert tslib.pydt_to_i8(result) == expected
+ assert conversion.pydt_to_i8(result) == expected
# with timezone
for tz, offset in timezones:
@@ -90,18 +91,18 @@ def test_constructor(self):
tz=tz)]:
expected_tz = expected - offset * 3600 * 1000000000
assert result.value == expected_tz
- assert tslib.pydt_to_i8(result) == expected_tz
+ assert conversion.pydt_to_i8(result) == expected_tz
# should preserve tz
result = Timestamp(result)
assert result.value == expected_tz
- assert tslib.pydt_to_i8(result) == expected_tz
+ assert conversion.pydt_to_i8(result) == expected_tz
# should convert to UTC
result = Timestamp(result, tz='UTC')
expected_utc = expected - offset * 3600 * 1000000000
assert result.value == expected_utc
- assert tslib.pydt_to_i8(result) == expected_utc
+ assert conversion.pydt_to_i8(result) == expected_utc
def test_constructor_with_stringoffset(self):
# GH 7833
@@ -129,30 +130,30 @@ def test_constructor_with_stringoffset(self):
for result in [Timestamp(date_str)]:
# only with timestring
assert result.value == expected
- assert tslib.pydt_to_i8(result) == expected
+ assert conversion.pydt_to_i8(result) == expected
# re-creation shouldn't affect to internal value
result = Timestamp(result)
assert result.value == expected
- assert tslib.pydt_to_i8(result) == expected
+ assert conversion.pydt_to_i8(result) == expected
# with timezone
for tz, offset in timezones:
result = Timestamp(date_str, tz=tz)
expected_tz = expected
assert result.value == expected_tz
- assert tslib.pydt_to_i8(result) == expected_tz
+ assert conversion.pydt_to_i8(result) == expected_tz
# should preserve tz
result = Timestamp(result)
assert result.value == expected_tz
- assert tslib.pydt_to_i8(result) == expected_tz
+ assert conversion.pydt_to_i8(result) == expected_tz
# should convert to UTC
result = Timestamp(result, tz='UTC')
expected_utc = expected
assert result.value == expected_utc
- assert tslib.pydt_to_i8(result) == expected_utc
+ assert conversion.pydt_to_i8(result) == expected_utc
# This should be 2013-11-01 05:00 in UTC
# converted to Chicago tz
@@ -1101,13 +1102,18 @@ def test_timestamp(self):
tsc = Timestamp('2014-10-11 11:00:01.12345678', tz='US/Central')
utsc = tsc.tz_convert('UTC')
+
# utsc is a different representation of the same time
assert tsc.timestamp() == utsc.timestamp()
if PY3:
- # should agree with datetime.timestamp method
- dt = ts.to_pydatetime()
- assert dt.timestamp() == ts.timestamp()
+
+ # datetime.timestamp() converts in the local timezone
+ with tm.set_timezone('UTC'):
+
+ # should agree with datetime.timestamp method
+ dt = ts.to_pydatetime()
+ assert dt.timestamp() == ts.timestamp()
class TestTimestampNsOperations(object):
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
index 6b950be15ca465..c1e41892839285 100644
--- a/pandas/tests/series/test_api.py
+++ b/pandas/tests/series/test_api.py
@@ -334,6 +334,10 @@ def test_axis_alias(self):
assert s._get_axis_number('rows') == 0
assert s._get_axis_name('rows') == 'index'
+ def test_class_axis(self):
+ # https://github.com/pandas-dev/pandas/issues/18147
+ Series.index # no exception!
+
def test_numpy_unique(self):
# it works!
np.unique(self.ts)
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index a8782b32d12f6d..f593ba85aec5f4 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -306,20 +306,18 @@ def f():
assert len(cat.codes) == 1
assert cat.codes[0] == 0
- # Catch old style constructor useage: two arrays, codes + categories
- # We can only catch two cases:
+ # two arrays
# - when the first is an integer dtype and the second is not
# - when the resulting codes are all -1/NaN
- with tm.assert_produces_warning(RuntimeWarning):
+ with tm.assert_produces_warning(None):
c_old = Categorical([0, 1, 2, 0, 1, 2],
categories=["a", "b", "c"]) # noqa
- with tm.assert_produces_warning(RuntimeWarning):
+ with tm.assert_produces_warning(None):
c_old = Categorical([0, 1, 2, 0, 1, 2], # noqa
categories=[3, 4, 5])
- # the next one are from the old docs, but unfortunately these don't
- # trigger :-(
+ # the next one are from the old docs
with tm.assert_produces_warning(None):
c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) # noqa
cat = Categorical([1, 2], categories=[1, 2, 3])
diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
index ba1a2ad1f42e2c..61b2b689bffd6c 100644
--- a/pandas/tests/test_resample.py
+++ b/pandas/tests/test_resample.py
@@ -21,7 +21,8 @@
from pandas.core.base import SpecificationError, AbstractMethodError
from pandas.errors import UnsupportedFunctionCall
from pandas.core.groupby import DataError
-from pandas.tseries.frequencies import MONTHS, DAYS
+from pandas._libs.tslibs.resolution import DAYS
+from pandas.tseries.frequencies import MONTHS
from pandas.tseries.frequencies import to_offset
from pandas.core.indexes.datetimes import date_range
from pandas.tseries.offsets import Minute, BDay
diff --git a/pandas/tests/tseries/conftest.py b/pandas/tests/tseries/conftest.py
index 25446c24b28c09..fc1ecf21c54465 100644
--- a/pandas/tests/tseries/conftest.py
+++ b/pandas/tests/tseries/conftest.py
@@ -1,10 +1,4 @@
import pytest
-import pandas.tseries.offsets as offsets
-
-
-@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__])
-def offset_types(request):
- return request.param
@pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', 'US/Eastern',
diff --git a/pandas/tests/tseries/offsets/__init__.py b/pandas/tests/tseries/offsets/__init__.py
new file mode 100644
index 00000000000000..40a96afc6ff09d
--- /dev/null
+++ b/pandas/tests/tseries/offsets/__init__.py
@@ -0,0 +1 @@
+# -*- coding: utf-8 -*-
diff --git a/pandas/tests/tseries/offsets/common.py b/pandas/tests/tseries/offsets/common.py
new file mode 100644
index 00000000000000..2e8eb224bca7fa
--- /dev/null
+++ b/pandas/tests/tseries/offsets/common.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+"""
+Assertion helpers for offsets tests
+"""
+
+
+def assert_offset_equal(offset, base, expected):
+ actual = offset + base
+ actual_swapped = base + offset
+ actual_apply = offset.apply(base)
+ try:
+ assert actual == expected
+ assert actual_swapped == expected
+ assert actual_apply == expected
+ except AssertionError:
+ raise AssertionError("\nExpected: %s\nActual: %s\nFor Offset: %s)"
+ "\nAt Date: %s" %
+ (expected, actual, offset, base))
+
+
+def assert_onOffset(offset, date, expected):
+ actual = offset.onOffset(date)
+ assert actual == expected, ("\nExpected: %s\nActual: %s\nFor Offset: %s)"
+ "\nAt Date: %s" %
+ (expected, actual, offset, date))
diff --git a/pandas/tests/tseries/offsets/conftest.py b/pandas/tests/tseries/offsets/conftest.py
new file mode 100644
index 00000000000000..25446c24b28c09
--- /dev/null
+++ b/pandas/tests/tseries/offsets/conftest.py
@@ -0,0 +1,13 @@
+import pytest
+import pandas.tseries.offsets as offsets
+
+
+@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__])
+def offset_types(request):
+ return request.param
+
+
+@pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', 'US/Eastern',
+ 'dateutil/Asia/Tokyo', 'dateutil/US/Pacific'])
+def tz(request):
+ return request.param
diff --git a/pandas/tests/tseries/data/cday-0.14.1.pickle b/pandas/tests/tseries/offsets/data/cday-0.14.1.pickle
similarity index 100%
rename from pandas/tests/tseries/data/cday-0.14.1.pickle
rename to pandas/tests/tseries/offsets/data/cday-0.14.1.pickle
diff --git a/pandas/tests/tseries/data/dateoffset_0_15_2.pickle b/pandas/tests/tseries/offsets/data/dateoffset_0_15_2.pickle
similarity index 100%
rename from pandas/tests/tseries/data/dateoffset_0_15_2.pickle
rename to pandas/tests/tseries/offsets/data/dateoffset_0_15_2.pickle
diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py
similarity index 79%
rename from pandas/tests/tseries/test_offsets.py
rename to pandas/tests/tseries/offsets/test_offsets.py
index 4fd3bba01602fc..b123fa127e29c4 100644
--- a/pandas/tests/tseries/test_offsets.py
+++ b/pandas/tests/tseries/offsets/test_offsets.py
@@ -4,7 +4,7 @@
from dateutil.relativedelta import relativedelta
import pytest
-from pandas.compat import range, iteritems
+from pandas.compat import range
from pandas import compat
import numpy as np
@@ -25,9 +25,9 @@
MonthBegin, SemiMonthBegin, SemiMonthEnd,
BYearBegin, QuarterBegin, BQuarterBegin,
BMonthBegin, DateOffset, Week, YearBegin,
- YearEnd, Hour, Minute, Second, Day, Micro,
+ YearEnd, Day,
QuarterEnd, BusinessMonthEnd, FY5253,
- Milli, Nano, Easter, FY5253Quarter,
+ Nano, Easter, FY5253Quarter,
LastWeekOfMonth)
from pandas.core.tools.datetimes import (
format, ole2datetime, parse_time_string,
@@ -35,11 +35,13 @@
import pandas.tseries.offsets as offsets
from pandas.io.pickle import read_pickle
from pandas._libs.tslibs import timezones
-from pandas._libs.tslib import normalize_date, NaT, Timestamp, Timedelta
+from pandas._libs.tslib import normalize_date, NaT, Timestamp
import pandas._libs.tslib as tslib
import pandas.util.testing as tm
from pandas.tseries.holiday import USFederalHolidayCalendar
+from .common import assert_offset_equal, assert_onOffset
+
def test_monthrange():
import calendar
@@ -162,51 +164,44 @@ def test_apply_out_of_range(self, tz):
class TestCommon(Base):
-
- def setup_method(self, method):
- # exected value created by Base._get_offset
- # are applied to 2011/01/01 09:00 (Saturday)
- # used for .apply and .rollforward
- self.expecteds = {'Day': Timestamp('2011-01-02 09:00:00'),
- 'DateOffset': Timestamp('2011-01-02 09:00:00'),
- 'BusinessDay': Timestamp('2011-01-03 09:00:00'),
- 'CustomBusinessDay':
- Timestamp('2011-01-03 09:00:00'),
- 'CustomBusinessMonthEnd':
- Timestamp('2011-01-31 09:00:00'),
- 'CustomBusinessMonthBegin':
- Timestamp('2011-01-03 09:00:00'),
- 'MonthBegin': Timestamp('2011-02-01 09:00:00'),
- 'BusinessMonthBegin':
- Timestamp('2011-01-03 09:00:00'),
- 'MonthEnd': Timestamp('2011-01-31 09:00:00'),
- 'SemiMonthEnd': Timestamp('2011-01-15 09:00:00'),
- 'SemiMonthBegin': Timestamp('2011-01-15 09:00:00'),
- 'BusinessMonthEnd': Timestamp('2011-01-31 09:00:00'),
- 'YearBegin': Timestamp('2012-01-01 09:00:00'),
- 'BYearBegin': Timestamp('2011-01-03 09:00:00'),
- 'YearEnd': Timestamp('2011-12-31 09:00:00'),
- 'BYearEnd': Timestamp('2011-12-30 09:00:00'),
- 'QuarterBegin': Timestamp('2011-03-01 09:00:00'),
- 'BQuarterBegin': Timestamp('2011-03-01 09:00:00'),
- 'QuarterEnd': Timestamp('2011-03-31 09:00:00'),
- 'BQuarterEnd': Timestamp('2011-03-31 09:00:00'),
- 'BusinessHour': Timestamp('2011-01-03 10:00:00'),
- 'CustomBusinessHour':
- Timestamp('2011-01-03 10:00:00'),
- 'WeekOfMonth': Timestamp('2011-01-08 09:00:00'),
- 'LastWeekOfMonth': Timestamp('2011-01-29 09:00:00'),
- 'FY5253Quarter': Timestamp('2011-01-25 09:00:00'),
- 'FY5253': Timestamp('2011-01-25 09:00:00'),
- 'Week': Timestamp('2011-01-08 09:00:00'),
- 'Easter': Timestamp('2011-04-24 09:00:00'),
- 'Hour': Timestamp('2011-01-01 10:00:00'),
- 'Minute': Timestamp('2011-01-01 09:01:00'),
- 'Second': Timestamp('2011-01-01 09:00:01'),
- 'Milli': Timestamp('2011-01-01 09:00:00.001000'),
- 'Micro': Timestamp('2011-01-01 09:00:00.000001'),
- 'Nano': Timestamp(np_datetime64_compat(
- '2011-01-01T09:00:00.000000001Z'))}
+ # exected value created by Base._get_offset
+ # are applied to 2011/01/01 09:00 (Saturday)
+ # used for .apply and .rollforward
+ expecteds = {'Day': Timestamp('2011-01-02 09:00:00'),
+ 'DateOffset': Timestamp('2011-01-02 09:00:00'),
+ 'BusinessDay': Timestamp('2011-01-03 09:00:00'),
+ 'CustomBusinessDay': Timestamp('2011-01-03 09:00:00'),
+ 'CustomBusinessMonthEnd': Timestamp('2011-01-31 09:00:00'),
+ 'CustomBusinessMonthBegin': Timestamp('2011-01-03 09:00:00'),
+ 'MonthBegin': Timestamp('2011-02-01 09:00:00'),
+ 'BusinessMonthBegin': Timestamp('2011-01-03 09:00:00'),
+ 'MonthEnd': Timestamp('2011-01-31 09:00:00'),
+ 'SemiMonthEnd': Timestamp('2011-01-15 09:00:00'),
+ 'SemiMonthBegin': Timestamp('2011-01-15 09:00:00'),
+ 'BusinessMonthEnd': Timestamp('2011-01-31 09:00:00'),
+ 'YearBegin': Timestamp('2012-01-01 09:00:00'),
+ 'BYearBegin': Timestamp('2011-01-03 09:00:00'),
+ 'YearEnd': Timestamp('2011-12-31 09:00:00'),
+ 'BYearEnd': Timestamp('2011-12-30 09:00:00'),
+ 'QuarterBegin': Timestamp('2011-03-01 09:00:00'),
+ 'BQuarterBegin': Timestamp('2011-03-01 09:00:00'),
+ 'QuarterEnd': Timestamp('2011-03-31 09:00:00'),
+ 'BQuarterEnd': Timestamp('2011-03-31 09:00:00'),
+ 'BusinessHour': Timestamp('2011-01-03 10:00:00'),
+ 'CustomBusinessHour': Timestamp('2011-01-03 10:00:00'),
+ 'WeekOfMonth': Timestamp('2011-01-08 09:00:00'),
+ 'LastWeekOfMonth': Timestamp('2011-01-29 09:00:00'),
+ 'FY5253Quarter': Timestamp('2011-01-25 09:00:00'),
+ 'FY5253': Timestamp('2011-01-25 09:00:00'),
+ 'Week': Timestamp('2011-01-08 09:00:00'),
+ 'Easter': Timestamp('2011-04-24 09:00:00'),
+ 'Hour': Timestamp('2011-01-01 10:00:00'),
+ 'Minute': Timestamp('2011-01-01 09:01:00'),
+ 'Second': Timestamp('2011-01-01 09:00:01'),
+ 'Milli': Timestamp('2011-01-01 09:00:00.001000'),
+ 'Micro': Timestamp('2011-01-01 09:00:00.000001'),
+ 'Nano': Timestamp(np_datetime64_compat(
+ '2011-01-01T09:00:00.000000001Z'))}
def test_return_type(self, offset_types):
offset = self._get_offset(offset_types)
@@ -623,7 +618,7 @@ def test_onOffset(self):
(BDay(), datetime(2008, 1, 5), False)]
for offset, d, expected in tests:
- assertOnOffset(offset, d, expected)
+ assert_onOffset(offset, d, expected)
def test_apply(self):
tests = []
@@ -668,7 +663,7 @@ def test_apply(self):
for offset, cases in tests:
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
def test_apply_large_n(self):
dt = datetime(2012, 10, 23)
@@ -1272,7 +1267,7 @@ def test_apply(self):
for offset, cases in tests:
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
def test_apply_large_n(self):
tests = []
@@ -1331,7 +1326,7 @@ def test_apply_large_n(self):
for offset, cases in tests:
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
def test_apply_nanoseconds(self):
tests = []
@@ -1354,7 +1349,7 @@ def test_apply_nanoseconds(self):
for offset, cases in tests:
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
def test_offsets_compare_equal(self):
# root cause of #456
@@ -1628,7 +1623,7 @@ def test_apply(self):
for offset, cases in tests:
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
def test_apply_nanoseconds(self):
tests = []
@@ -1651,7 +1646,7 @@ def test_apply_nanoseconds(self):
for offset, cases in tests:
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
class TestCustomBusinessDay(Base):
@@ -1752,7 +1747,7 @@ def test_onOffset(self):
(CDay(), datetime(2008, 1, 5), False)]
for offset, d, expected in tests:
- assertOnOffset(offset, d, expected)
+ assert_onOffset(offset, d, expected)
def test_apply(self):
tests = []
@@ -1798,7 +1793,7 @@ def test_apply(self):
for offset, cases in tests:
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
def test_apply_large_n(self):
dt = datetime(2012, 10, 23)
@@ -1870,7 +1865,7 @@ def test_weekmask_and_holidays(self):
def test_calendar(self):
calendar = USFederalHolidayCalendar()
dt = datetime(2014, 1, 17)
- assertEq(CDay(calendar=calendar), dt, datetime(2014, 1, 21))
+ assert_offset_equal(CDay(calendar=calendar), dt, datetime(2014, 1, 21))
def test_roundtrip_pickle(self):
def _check_roundtrip(obj):
@@ -1997,7 +1992,7 @@ def test_onOffset(self):
(CBMonthEnd(), datetime(2008, 1, 1), False)]
for offset, d, expected in tests:
- assertOnOffset(offset, d, expected)
+ assert_onOffset(offset, d, expected)
def test_apply(self):
cbm = CBMonthEnd()
@@ -2022,7 +2017,7 @@ def test_apply(self):
for offset, cases in tests:
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
def test_apply_large_n(self):
dt = datetime(2012, 10, 23)
@@ -2111,7 +2106,7 @@ def test_onOffset(self):
(CBMonthBegin(), datetime(2008, 1, 31), False)]
for offset, dt, expected in tests:
- assertOnOffset(offset, dt, expected)
+ assert_onOffset(offset, dt, expected)
def test_apply(self):
cbm = CBMonthBegin()
@@ -2135,7 +2130,7 @@ def test_apply(self):
for offset, cases in tests:
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
def test_apply_large_n(self):
dt = datetime(2012, 10, 23)
@@ -2174,13 +2169,6 @@ def test_datetimeindex(self):
freq=cbmb).tolist()[0] == datetime(2012, 1, 3))
-def assertOnOffset(offset, date, expected):
- actual = offset.onOffset(date)
- assert actual == expected, ("\nExpected: %s\nActual: %s\nFor Offset: %s)"
- "\nAt Date: %s" %
- (expected, actual, offset, date))
-
-
class TestWeek(Base):
_offset = Week
@@ -2231,7 +2219,7 @@ def test_offset(self):
for offset, cases in tests:
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
def test_onOffset(self):
for weekday in range(7):
@@ -2244,7 +2232,7 @@ def test_onOffset(self):
expected = True
else:
expected = False
- assertOnOffset(offset, date, expected)
+ assert_onOffset(offset, date, expected)
def test_offsets_compare_equal(self):
# root cause of #456
@@ -2316,7 +2304,7 @@ def test_offset(self):
for n, week, weekday, dt, expected in test_cases:
offset = WeekOfMonth(n, week=week, weekday=weekday)
- assertEq(offset, dt, expected)
+ assert_offset_equal(offset, dt, expected)
# try subtracting
result = datetime(2011, 2, 1) - WeekOfMonth(week=1, weekday=2)
@@ -2457,7 +2445,7 @@ def test_offset(self):
for offset, cases in tests:
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
def test_onOffset(self):
@@ -2467,7 +2455,7 @@ def test_onOffset(self):
(BMonthBegin(), datetime(2008, 3, 3), True)]
for offset, dt, expected in tests:
- assertOnOffset(offset, dt, expected)
+ assert_onOffset(offset, dt, expected)
def test_offsets_compare_equal(self):
# root cause of #456
@@ -2515,7 +2503,7 @@ def test_offset(self):
for offset, cases in tests:
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
def test_normalize(self):
dt = datetime(2007, 1, 1, 3)
@@ -2530,7 +2518,7 @@ def test_onOffset(self):
(BMonthEnd(), datetime(2008, 1, 1), False)]
for offset, dt, expected in tests:
- assertOnOffset(offset, dt, expected)
+ assert_onOffset(offset, dt, expected)
def test_offsets_compare_equal(self):
# root cause of #456
@@ -2577,7 +2565,7 @@ def test_offset(self):
for offset, cases in tests:
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
class TestMonthEnd(Base):
@@ -2619,7 +2607,7 @@ def test_offset(self):
for offset, cases in tests:
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
def test_day_of_month(self):
dt = datetime(2007, 1, 1)
@@ -2644,7 +2632,7 @@ def test_onOffset(self):
(MonthEnd(), datetime(2008, 1, 1), False)]
for offset, dt, expected in tests:
- assertOnOffset(offset, dt, expected)
+ assert_onOffset(offset, dt, expected)
class TestSemiMonthEnd(Base):
@@ -2759,7 +2747,7 @@ def test_offset_whole_year(self):
datetime(2008, 12, 31))
for base, exp_date in zip(dates[:-1], dates[1:]):
- assertEq(SemiMonthEnd(), base, exp_date)
+ assert_offset_equal(SemiMonthEnd(), base, exp_date)
# ensure .apply_index works as expected
s = DatetimeIndex(dates[:-1])
@@ -2775,7 +2763,7 @@ def test_offset_whole_year(self):
def test_offset(self):
for offset, cases in self._get_tests():
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
def test_apply_index(self):
for offset, cases in self._get_tests():
@@ -2793,30 +2781,30 @@ def test_onOffset(self):
(datetime(2008, 2, 29), True)]
for dt, expected in tests:
- assertOnOffset(SemiMonthEnd(), dt, expected)
-
- def test_vectorized_offset_addition(self):
- for klass, assert_func in zip([Series, DatetimeIndex],
- [tm.assert_series_equal,
- tm.assert_index_equal]):
- s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
- Timestamp('2000-02-15', tz='US/Central')], name='a')
-
- result = s + SemiMonthEnd()
- result2 = SemiMonthEnd() + s
- exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'),
- Timestamp('2000-02-29', tz='US/Central')], name='a')
- assert_func(result, exp)
- assert_func(result2, exp)
-
- s = klass([Timestamp('2000-01-01 00:15:00', tz='US/Central'),
- Timestamp('2000-02-01', tz='US/Central')], name='a')
- result = s + SemiMonthEnd()
- result2 = SemiMonthEnd() + s
- exp = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
- Timestamp('2000-02-15', tz='US/Central')], name='a')
- assert_func(result, exp)
- assert_func(result2, exp)
+ assert_onOffset(SemiMonthEnd(), dt, expected)
+
+ @pytest.mark.parametrize('klass,assert_func',
+ [(Series, tm.assert_series_equal),
+ (DatetimeIndex, tm.assert_index_equal)])
+ def test_vectorized_offset_addition(self, klass, assert_func):
+ s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
+ Timestamp('2000-02-15', tz='US/Central')], name='a')
+
+ result = s + SemiMonthEnd()
+ result2 = SemiMonthEnd() + s
+ exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'),
+ Timestamp('2000-02-29', tz='US/Central')], name='a')
+ assert_func(result, exp)
+ assert_func(result2, exp)
+
+ s = klass([Timestamp('2000-01-01 00:15:00', tz='US/Central'),
+ Timestamp('2000-02-01', tz='US/Central')], name='a')
+ result = s + SemiMonthEnd()
+ result2 = SemiMonthEnd() + s
+ exp = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
+ Timestamp('2000-02-15', tz='US/Central')], name='a')
+ assert_func(result, exp)
+ assert_func(result2, exp)
class TestSemiMonthBegin(Base):
@@ -2935,7 +2923,7 @@ def test_offset_whole_year(self):
datetime(2008, 12, 15))
for base, exp_date in zip(dates[:-1], dates[1:]):
- assertEq(SemiMonthBegin(), base, exp_date)
+ assert_offset_equal(SemiMonthBegin(), base, exp_date)
# ensure .apply_index works as expected
s = DatetimeIndex(dates[:-1])
@@ -2951,7 +2939,7 @@ def test_offset_whole_year(self):
def test_offset(self):
for offset, cases in self._get_tests():
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
def test_apply_index(self):
for offset, cases in self._get_tests():
@@ -2968,30 +2956,29 @@ def test_onOffset(self):
(datetime(2008, 2, 15), True)]
for dt, expected in tests:
- assertOnOffset(SemiMonthBegin(), dt, expected)
-
- def test_vectorized_offset_addition(self):
- for klass, assert_func in zip([Series, DatetimeIndex],
- [tm.assert_series_equal,
- tm.assert_index_equal]):
-
- s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
- Timestamp('2000-02-15', tz='US/Central')], name='a')
- result = s + SemiMonthBegin()
- result2 = SemiMonthBegin() + s
- exp = klass([Timestamp('2000-02-01 00:15:00', tz='US/Central'),
- Timestamp('2000-03-01', tz='US/Central')], name='a')
- assert_func(result, exp)
- assert_func(result2, exp)
-
- s = klass([Timestamp('2000-01-01 00:15:00', tz='US/Central'),
- Timestamp('2000-02-01', tz='US/Central')], name='a')
- result = s + SemiMonthBegin()
- result2 = SemiMonthBegin() + s
- exp = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
- Timestamp('2000-02-15', tz='US/Central')], name='a')
- assert_func(result, exp)
- assert_func(result2, exp)
+ assert_onOffset(SemiMonthBegin(), dt, expected)
+
+ @pytest.mark.parametrize('klass,assert_func',
+ [(Series, tm.assert_series_equal),
+ (DatetimeIndex, tm.assert_index_equal)])
+ def test_vectorized_offset_addition(self, klass, assert_func):
+ s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
+ Timestamp('2000-02-15', tz='US/Central')], name='a')
+ result = s + SemiMonthBegin()
+ result2 = SemiMonthBegin() + s
+ exp = klass([Timestamp('2000-02-01 00:15:00', tz='US/Central'),
+ Timestamp('2000-03-01', tz='US/Central')], name='a')
+ assert_func(result, exp)
+ assert_func(result2, exp)
+
+ s = klass([Timestamp('2000-01-01 00:15:00', tz='US/Central'),
+ Timestamp('2000-02-01', tz='US/Central')], name='a')
+ result = s + SemiMonthBegin()
+ result2 = SemiMonthBegin() + s
+ exp = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'),
+ Timestamp('2000-02-15', tz='US/Central')], name='a')
+ assert_func(result, exp)
+ assert_func(result2, exp)
class TestBQuarterBegin(Base):
@@ -3081,7 +3068,7 @@ def test_offset(self):
for offset, cases in tests:
for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ assert_offset_equal(offset, base, expected)
# corner
offset = BQuarterBegin(n=-1, startingMonth=1)
@@ -3104,100 +3091,100 @@ def test_isAnchored(self):
assert BQuarterEnd().isAnchored()
assert not BQuarterEnd(2, startingMonth=1).isAnchored()
- def test_offset(self):
- tests = []
-
- tests.append((BQuarterEnd(startingMonth=1),
- {datetime(2008, 1, 1): datetime(2008, 1, 31),
- datetime(2008, 1, 31): datetime(2008, 4, 30),
- datetime(2008, 2, 15): datetime(2008, 4, 30),
- datetime(2008, 2, 29): datetime(2008, 4, 30),
- datetime(2008, 3, 15): datetime(2008, 4, 30),
- datetime(2008, 3, 31): datetime(2008, 4, 30),
- datetime(2008, 4, 15): datetime(2008, 4, 30),
- datetime(2008, 4, 30): datetime(2008, 7, 31), }))
-
- tests.append((BQuarterEnd(startingMonth=2),
- {datetime(2008, 1, 1): datetime(2008, 2, 29),
- datetime(2008, 1, 31): datetime(2008, 2, 29),
- datetime(2008, 2, 15): datetime(2008, 2, 29),
- datetime(2008, 2, 29): datetime(2008, 5, 30),
- datetime(2008, 3, 15): datetime(2008, 5, 30),
- datetime(2008, 3, 31): datetime(2008, 5, 30),
- datetime(2008, 4, 15): datetime(2008, 5, 30),
- datetime(2008, 4, 30): datetime(2008, 5, 30), }))
-
- tests.append((BQuarterEnd(startingMonth=1, n=0),
- {datetime(2008, 1, 1): datetime(2008, 1, 31),
- datetime(2008, 1, 31): datetime(2008, 1, 31),
- datetime(2008, 2, 15): datetime(2008, 4, 30),
- datetime(2008, 2, 29): datetime(2008, 4, 30),
- datetime(2008, 3, 15): datetime(2008, 4, 30),
- datetime(2008, 3, 31): datetime(2008, 4, 30),
- datetime(2008, 4, 15): datetime(2008, 4, 30),
- datetime(2008, 4, 30): datetime(2008, 4, 30), }))
-
- tests.append((BQuarterEnd(startingMonth=1, n=-1),
- {datetime(2008, 1, 1): datetime(2007, 10, 31),
- datetime(2008, 1, 31): datetime(2007, 10, 31),
- datetime(2008, 2, 15): datetime(2008, 1, 31),
- datetime(2008, 2, 29): datetime(2008, 1, 31),
- datetime(2008, 3, 15): datetime(2008, 1, 31),
- datetime(2008, 3, 31): datetime(2008, 1, 31),
- datetime(2008, 4, 15): datetime(2008, 1, 31),
- datetime(2008, 4, 30): datetime(2008, 1, 31), }))
-
- tests.append((BQuarterEnd(startingMonth=1, n=2),
- {datetime(2008, 1, 31): datetime(2008, 7, 31),
- datetime(2008, 2, 15): datetime(2008, 7, 31),
- datetime(2008, 2, 29): datetime(2008, 7, 31),
- datetime(2008, 3, 15): datetime(2008, 7, 31),
- datetime(2008, 3, 31): datetime(2008, 7, 31),
- datetime(2008, 4, 15): datetime(2008, 7, 31),
- datetime(2008, 4, 30): datetime(2008, 10, 31), }))
-
- for offset, cases in tests:
- for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
-
+ offset_cases = []
+ offset_cases.append((BQuarterEnd(startingMonth=1),
+ {datetime(2008, 1, 1): datetime(2008, 1, 31),
+ datetime(2008, 1, 31): datetime(2008, 4, 30),
+ datetime(2008, 2, 15): datetime(2008, 4, 30),
+ datetime(2008, 2, 29): datetime(2008, 4, 30),
+ datetime(2008, 3, 15): datetime(2008, 4, 30),
+ datetime(2008, 3, 31): datetime(2008, 4, 30),
+ datetime(2008, 4, 15): datetime(2008, 4, 30),
+ datetime(2008, 4, 30): datetime(2008, 7, 31), }))
+
+ offset_cases.append((BQuarterEnd(startingMonth=2),
+ {datetime(2008, 1, 1): datetime(2008, 2, 29),
+ datetime(2008, 1, 31): datetime(2008, 2, 29),
+ datetime(2008, 2, 15): datetime(2008, 2, 29),
+ datetime(2008, 2, 29): datetime(2008, 5, 30),
+ datetime(2008, 3, 15): datetime(2008, 5, 30),
+ datetime(2008, 3, 31): datetime(2008, 5, 30),
+ datetime(2008, 4, 15): datetime(2008, 5, 30),
+ datetime(2008, 4, 30): datetime(2008, 5, 30), }))
+
+ offset_cases.append((BQuarterEnd(startingMonth=1, n=0),
+ {datetime(2008, 1, 1): datetime(2008, 1, 31),
+ datetime(2008, 1, 31): datetime(2008, 1, 31),
+ datetime(2008, 2, 15): datetime(2008, 4, 30),
+ datetime(2008, 2, 29): datetime(2008, 4, 30),
+ datetime(2008, 3, 15): datetime(2008, 4, 30),
+ datetime(2008, 3, 31): datetime(2008, 4, 30),
+ datetime(2008, 4, 15): datetime(2008, 4, 30),
+ datetime(2008, 4, 30): datetime(2008, 4, 30), }))
+
+ offset_cases.append((BQuarterEnd(startingMonth=1, n=-1),
+ {datetime(2008, 1, 1): datetime(2007, 10, 31),
+ datetime(2008, 1, 31): datetime(2007, 10, 31),
+ datetime(2008, 2, 15): datetime(2008, 1, 31),
+ datetime(2008, 2, 29): datetime(2008, 1, 31),
+ datetime(2008, 3, 15): datetime(2008, 1, 31),
+ datetime(2008, 3, 31): datetime(2008, 1, 31),
+ datetime(2008, 4, 15): datetime(2008, 1, 31),
+ datetime(2008, 4, 30): datetime(2008, 1, 31), }))
+
+ offset_cases.append((BQuarterEnd(startingMonth=1, n=2),
+ {datetime(2008, 1, 31): datetime(2008, 7, 31),
+ datetime(2008, 2, 15): datetime(2008, 7, 31),
+ datetime(2008, 2, 29): datetime(2008, 7, 31),
+ datetime(2008, 3, 15): datetime(2008, 7, 31),
+ datetime(2008, 3, 31): datetime(2008, 7, 31),
+ datetime(2008, 4, 15): datetime(2008, 7, 31),
+ datetime(2008, 4, 30): datetime(2008, 10, 31), }))
+
+ @pytest.mark.parametrize('case', offset_cases)
+ def test_offset(self, case):
+ offset, cases = case
+ for base, expected in compat.iteritems(cases):
+ assert_offset_equal(offset, base, expected)
+
+ def test_offset_corner_case(self):
# corner
offset = BQuarterEnd(n=-1, startingMonth=1)
assert datetime(2010, 1, 31) + offset == datetime(2010, 1, 29)
- def test_onOffset(self):
-
- tests = [
- (BQuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True),
- (BQuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False),
- (BQuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False),
- (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False),
- (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False),
- (BQuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True),
- (BQuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False),
- (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False),
- (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False),
- (BQuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False),
- (BQuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False),
- (BQuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True),
- (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False),
- (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False),
- (BQuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False),
- (BQuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), True),
- (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False),
- (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False),
- (BQuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False),
- (BQuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True),
- (BQuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False),
- (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), True),
- (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), False),
- (BQuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False),
- (BQuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False),
- (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), True),
- (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), False),
- ]
-
- for offset, dt, expected in tests:
- assertOnOffset(offset, dt, expected)
+ on_offset_cases = [
+ (BQuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True),
+ (BQuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False),
+ (BQuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False),
+ (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False),
+ (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False),
+ (BQuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True),
+ (BQuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False),
+ (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False),
+ (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False),
+ (BQuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False),
+ (BQuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False),
+ (BQuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True),
+ (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False),
+ (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False),
+ (BQuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False),
+ (BQuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), True),
+ (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False),
+ (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False),
+ (BQuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False),
+ (BQuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True),
+ (BQuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False),
+ (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), True),
+ (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), False),
+ (BQuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False),
+ (BQuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False),
+ (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), True),
+ (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), False)]
+
+ @pytest.mark.parametrize('case', on_offset_cases)
+ def test_onOffset(self, case):
+ offset, dt, expected = case
+ assert_onOffset(offset, dt, expected)
def makeFY5253LastOfMonthQuarter(*args, **kwds):
@@ -3268,7 +3255,7 @@ def test_onOffset(self):
]
for offset, dt, expected in tests:
- assertOnOffset(offset, dt, expected)
+ assert_onOffset(offset, dt, expected)
def test_apply(self):
offset_lom_aug_sat = makeFY5253LastOfMonth(startingMonth=8,
@@ -3410,7 +3397,7 @@ def test_onOffset(self):
]
for offset, dt, expected in tests:
- assertOnOffset(offset, dt, expected)
+ assert_onOffset(offset, dt, expected)
def test_apply(self):
date_seq_nem_8_sat = [datetime(2006, 9, 2), datetime(2007, 9, 1),
@@ -3515,27 +3502,28 @@ def test_offset(self):
datetime(2012, 9, 29), datetime(2012, 12, 29),
datetime(2013, 3, 30), datetime(2013, 6, 29)]
- assertEq(offset, base=GMCR[0], expected=GMCR[1])
- assertEq(offset, base=GMCR[0] + relativedelta(days=-1),
- expected=GMCR[0])
- assertEq(offset, base=GMCR[1], expected=GMCR[2])
+ assert_offset_equal(offset, base=GMCR[0], expected=GMCR[1])
+ assert_offset_equal(offset, base=GMCR[0] + relativedelta(days=-1),
+ expected=GMCR[0])
+ assert_offset_equal(offset, base=GMCR[1], expected=GMCR[2])
- assertEq(offset2, base=GMCR[0], expected=GMCR[2])
- assertEq(offset4, base=GMCR[0], expected=GMCR[4])
+ assert_offset_equal(offset2, base=GMCR[0], expected=GMCR[2])
+ assert_offset_equal(offset4, base=GMCR[0], expected=GMCR[4])
- assertEq(offset_neg1, base=GMCR[-1], expected=GMCR[-2])
- assertEq(offset_neg1, base=GMCR[-1] + relativedelta(days=+1),
- expected=GMCR[-1])
- assertEq(offset_neg2, base=GMCR[-1], expected=GMCR[-3])
+ assert_offset_equal(offset_neg1, base=GMCR[-1], expected=GMCR[-2])
+ assert_offset_equal(offset_neg1,
+ base=GMCR[-1] + relativedelta(days=+1),
+ expected=GMCR[-1])
+ assert_offset_equal(offset_neg2, base=GMCR[-1], expected=GMCR[-3])
date = GMCR[0] + relativedelta(days=-1)
for expected in GMCR:
- assertEq(offset, date, expected)
+ assert_offset_equal(offset, date, expected)
date = date + offset
date = GMCR[-1] + relativedelta(days=+1)
for expected in reversed(GMCR):
- assertEq(offset_neg1, date, expected)
+ assert_offset_equal(offset_neg1, date, expected)
date = date + offset_neg1
def test_onOffset(self):
@@ -3609,7 +3597,7 @@ def test_onOffset(self):
]
for offset, dt, expected in tests:
- assertOnOffset(offset, dt, expected)
+ assert_onOffset(offset, dt, expected)
def test_year_has_extra_week(self):
# End of long Q1
@@ -3722,29 +3710,35 @@ def test_onOffset(self):
]
for offset, dt, expected in tests:
- assertOnOffset(offset, dt, expected)
+ assert_onOffset(offset, dt, expected)
def test_offset(self):
offset = makeFY5253NearestEndMonthQuarter(1, startingMonth=8,
weekday=WeekDay.THU,
qtr_with_extra_week=4)
- MU = [datetime(2012, 5, 31), datetime(2012, 8, 30), datetime(2012, 11,
- 29),
+ MU = [datetime(2012, 5, 31),
+ datetime(2012, 8, 30), datetime(2012, 11, 29),
datetime(2013, 2, 28), datetime(2013, 5, 30)]
date = MU[0] + relativedelta(days=-1)
for expected in MU:
- assertEq(offset, date, expected)
+ assert_offset_equal(offset, date, expected)
date = date + offset
- assertEq(offset, datetime(2012, 5, 31), datetime(2012, 8, 30))
- assertEq(offset, datetime(2012, 5, 30), datetime(2012, 5, 31))
+ assert_offset_equal(offset,
+ datetime(2012, 5, 31),
+ datetime(2012, 8, 30))
+ assert_offset_equal(offset,
+ datetime(2012, 5, 30),
+ datetime(2012, 5, 31))
offset2 = FY5253Quarter(weekday=5, startingMonth=12, variation="last",
qtr_with_extra_week=4)
- assertEq(offset2, datetime(2013, 1, 15), datetime(2013, 3, 30))
+ assert_offset_equal(offset2,
+ datetime(2013, 1, 15),
+ datetime(2013, 3, 30))
class TestQuarterBegin(Base):
@@ -3762,64 +3756,65 @@ def test_isAnchored(self):
assert QuarterBegin().isAnchored()
assert not QuarterBegin(2, startingMonth=1).isAnchored()
- def test_offset(self):
- tests = []
-
- tests.append((QuarterBegin(startingMonth=1),
- {datetime(2007, 12, 1): datetime(2008, 1, 1),
- datetime(2008, 1, 1): datetime(2008, 4, 1),
- datetime(2008, 2, 15): datetime(2008, 4, 1),
- datetime(2008, 2, 29): datetime(2008, 4, 1),
- datetime(2008, 3, 15): datetime(2008, 4, 1),
- datetime(2008, 3, 31): datetime(2008, 4, 1),
- datetime(2008, 4, 15): datetime(2008, 7, 1),
- datetime(2008, 4, 1): datetime(2008, 7, 1), }))
-
- tests.append((QuarterBegin(startingMonth=2),
- {datetime(2008, 1, 1): datetime(2008, 2, 1),
- datetime(2008, 1, 31): datetime(2008, 2, 1),
- datetime(2008, 1, 15): datetime(2008, 2, 1),
- datetime(2008, 2, 29): datetime(2008, 5, 1),
- datetime(2008, 3, 15): datetime(2008, 5, 1),
- datetime(2008, 3, 31): datetime(2008, 5, 1),
- datetime(2008, 4, 15): datetime(2008, 5, 1),
- datetime(2008, 4, 30): datetime(2008, 5, 1), }))
-
- tests.append((QuarterBegin(startingMonth=1, n=0),
- {datetime(2008, 1, 1): datetime(2008, 1, 1),
- datetime(2008, 12, 1): datetime(2009, 1, 1),
- datetime(2008, 1, 1): datetime(2008, 1, 1),
- datetime(2008, 2, 15): datetime(2008, 4, 1),
- datetime(2008, 2, 29): datetime(2008, 4, 1),
- datetime(2008, 3, 15): datetime(2008, 4, 1),
- datetime(2008, 3, 31): datetime(2008, 4, 1),
- datetime(2008, 4, 15): datetime(2008, 7, 1),
- datetime(2008, 4, 30): datetime(2008, 7, 1), }))
-
- tests.append((QuarterBegin(startingMonth=1, n=-1),
- {datetime(2008, 1, 1): datetime(2007, 10, 1),
- datetime(2008, 1, 31): datetime(2008, 1, 1),
- datetime(2008, 2, 15): datetime(2008, 1, 1),
- datetime(2008, 2, 29): datetime(2008, 1, 1),
- datetime(2008, 3, 15): datetime(2008, 1, 1),
- datetime(2008, 3, 31): datetime(2008, 1, 1),
- datetime(2008, 4, 15): datetime(2008, 4, 1),
- datetime(2008, 4, 30): datetime(2008, 4, 1),
- datetime(2008, 7, 1): datetime(2008, 4, 1)}))
-
- tests.append((QuarterBegin(startingMonth=1, n=2),
- {datetime(2008, 1, 1): datetime(2008, 7, 1),
- datetime(2008, 2, 15): datetime(2008, 7, 1),
- datetime(2008, 2, 29): datetime(2008, 7, 1),
- datetime(2008, 3, 15): datetime(2008, 7, 1),
- datetime(2008, 3, 31): datetime(2008, 7, 1),
- datetime(2008, 4, 15): datetime(2008, 10, 1),
- datetime(2008, 4, 1): datetime(2008, 10, 1), }))
-
- for offset, cases in tests:
- for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
-
+ offset_cases = []
+ offset_cases.append((QuarterBegin(startingMonth=1),
+ {datetime(2007, 12, 1): datetime(2008, 1, 1),
+ datetime(2008, 1, 1): datetime(2008, 4, 1),
+ datetime(2008, 2, 15): datetime(2008, 4, 1),
+ datetime(2008, 2, 29): datetime(2008, 4, 1),
+ datetime(2008, 3, 15): datetime(2008, 4, 1),
+ datetime(2008, 3, 31): datetime(2008, 4, 1),
+ datetime(2008, 4, 15): datetime(2008, 7, 1),
+ datetime(2008, 4, 1): datetime(2008, 7, 1), }))
+
+ offset_cases.append((QuarterBegin(startingMonth=2),
+ {datetime(2008, 1, 1): datetime(2008, 2, 1),
+ datetime(2008, 1, 31): datetime(2008, 2, 1),
+ datetime(2008, 1, 15): datetime(2008, 2, 1),
+ datetime(2008, 2, 29): datetime(2008, 5, 1),
+ datetime(2008, 3, 15): datetime(2008, 5, 1),
+ datetime(2008, 3, 31): datetime(2008, 5, 1),
+ datetime(2008, 4, 15): datetime(2008, 5, 1),
+ datetime(2008, 4, 30): datetime(2008, 5, 1), }))
+
+ offset_cases.append((QuarterBegin(startingMonth=1, n=0),
+ {datetime(2008, 1, 1): datetime(2008, 1, 1),
+ datetime(2008, 12, 1): datetime(2009, 1, 1),
+ datetime(2008, 1, 1): datetime(2008, 1, 1),
+ datetime(2008, 2, 15): datetime(2008, 4, 1),
+ datetime(2008, 2, 29): datetime(2008, 4, 1),
+ datetime(2008, 3, 15): datetime(2008, 4, 1),
+ datetime(2008, 3, 31): datetime(2008, 4, 1),
+ datetime(2008, 4, 15): datetime(2008, 7, 1),
+ datetime(2008, 4, 30): datetime(2008, 7, 1), }))
+
+ offset_cases.append((QuarterBegin(startingMonth=1, n=-1),
+ {datetime(2008, 1, 1): datetime(2007, 10, 1),
+ datetime(2008, 1, 31): datetime(2008, 1, 1),
+ datetime(2008, 2, 15): datetime(2008, 1, 1),
+ datetime(2008, 2, 29): datetime(2008, 1, 1),
+ datetime(2008, 3, 15): datetime(2008, 1, 1),
+ datetime(2008, 3, 31): datetime(2008, 1, 1),
+ datetime(2008, 4, 15): datetime(2008, 4, 1),
+ datetime(2008, 4, 30): datetime(2008, 4, 1),
+ datetime(2008, 7, 1): datetime(2008, 4, 1)}))
+
+ offset_cases.append((QuarterBegin(startingMonth=1, n=2),
+ {datetime(2008, 1, 1): datetime(2008, 7, 1),
+ datetime(2008, 2, 15): datetime(2008, 7, 1),
+ datetime(2008, 2, 29): datetime(2008, 7, 1),
+ datetime(2008, 3, 15): datetime(2008, 7, 1),
+ datetime(2008, 3, 31): datetime(2008, 7, 1),
+ datetime(2008, 4, 15): datetime(2008, 10, 1),
+ datetime(2008, 4, 1): datetime(2008, 10, 1), }))
+
+ @pytest.mark.parametrize('case', offset_cases)
+ def test_offset(self, case):
+ offset, cases = case
+ for base, expected in compat.iteritems(cases):
+ assert_offset_equal(offset, base, expected)
+
+ def test_offset_corner_case(self):
# corner
offset = QuarterBegin(n=-1, startingMonth=1)
assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 1)
@@ -3841,127 +3836,104 @@ def test_isAnchored(self):
assert QuarterEnd().isAnchored()
assert not QuarterEnd(2, startingMonth=1).isAnchored()
- def test_offset(self):
- tests = []
-
- tests.append((QuarterEnd(startingMonth=1),
- {datetime(2008, 1, 1): datetime(2008, 1, 31),
- datetime(2008, 1, 31): datetime(2008, 4, 30),
- datetime(2008, 2, 15): datetime(2008, 4, 30),
- datetime(2008, 2, 29): datetime(2008, 4, 30),
- datetime(2008, 3, 15): datetime(2008, 4, 30),
- datetime(2008, 3, 31): datetime(2008, 4, 30),
- datetime(2008, 4, 15): datetime(2008, 4, 30),
- datetime(2008, 4, 30): datetime(2008, 7, 31), }))
-
- tests.append((QuarterEnd(startingMonth=2),
- {datetime(2008, 1, 1): datetime(2008, 2, 29),
- datetime(2008, 1, 31): datetime(2008, 2, 29),
- datetime(2008, 2, 15): datetime(2008, 2, 29),
- datetime(2008, 2, 29): datetime(2008, 5, 31),
- datetime(2008, 3, 15): datetime(2008, 5, 31),
- datetime(2008, 3, 31): datetime(2008, 5, 31),
- datetime(2008, 4, 15): datetime(2008, 5, 31),
- datetime(2008, 4, 30): datetime(2008, 5, 31), }))
-
- tests.append((QuarterEnd(startingMonth=1, n=0),
- {datetime(2008, 1, 1): datetime(2008, 1, 31),
- datetime(2008, 1, 31): datetime(2008, 1, 31),
- datetime(2008, 2, 15): datetime(2008, 4, 30),
- datetime(2008, 2, 29): datetime(2008, 4, 30),
- datetime(2008, 3, 15): datetime(2008, 4, 30),
- datetime(2008, 3, 31): datetime(2008, 4, 30),
- datetime(2008, 4, 15): datetime(2008, 4, 30),
- datetime(2008, 4, 30): datetime(2008, 4, 30), }))
-
- tests.append((QuarterEnd(startingMonth=1, n=-1),
- {datetime(2008, 1, 1): datetime(2007, 10, 31),
- datetime(2008, 1, 31): datetime(2007, 10, 31),
- datetime(2008, 2, 15): datetime(2008, 1, 31),
- datetime(2008, 2, 29): datetime(2008, 1, 31),
- datetime(2008, 3, 15): datetime(2008, 1, 31),
- datetime(2008, 3, 31): datetime(2008, 1, 31),
- datetime(2008, 4, 15): datetime(2008, 1, 31),
- datetime(2008, 4, 30): datetime(2008, 1, 31),
- datetime(2008, 7, 1): datetime(2008, 4, 30)}))
-
- tests.append((QuarterEnd(startingMonth=1, n=2),
- {datetime(2008, 1, 31): datetime(2008, 7, 31),
- datetime(2008, 2, 15): datetime(2008, 7, 31),
- datetime(2008, 2, 29): datetime(2008, 7, 31),
- datetime(2008, 3, 15): datetime(2008, 7, 31),
- datetime(2008, 3, 31): datetime(2008, 7, 31),
- datetime(2008, 4, 15): datetime(2008, 7, 31),
- datetime(2008, 4, 30): datetime(2008, 10, 31), }))
-
- for offset, cases in tests:
- for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
-
+ offset_cases = []
+ offset_cases.append((QuarterEnd(startingMonth=1),
+ {datetime(2008, 1, 1): datetime(2008, 1, 31),
+ datetime(2008, 1, 31): datetime(2008, 4, 30),
+ datetime(2008, 2, 15): datetime(2008, 4, 30),
+ datetime(2008, 2, 29): datetime(2008, 4, 30),
+ datetime(2008, 3, 15): datetime(2008, 4, 30),
+ datetime(2008, 3, 31): datetime(2008, 4, 30),
+ datetime(2008, 4, 15): datetime(2008, 4, 30),
+ datetime(2008, 4, 30): datetime(2008, 7, 31), }))
+
+ offset_cases.append((QuarterEnd(startingMonth=2),
+ {datetime(2008, 1, 1): datetime(2008, 2, 29),
+ datetime(2008, 1, 31): datetime(2008, 2, 29),
+ datetime(2008, 2, 15): datetime(2008, 2, 29),
+ datetime(2008, 2, 29): datetime(2008, 5, 31),
+ datetime(2008, 3, 15): datetime(2008, 5, 31),
+ datetime(2008, 3, 31): datetime(2008, 5, 31),
+ datetime(2008, 4, 15): datetime(2008, 5, 31),
+ datetime(2008, 4, 30): datetime(2008, 5, 31), }))
+
+ offset_cases.append((QuarterEnd(startingMonth=1, n=0),
+ {datetime(2008, 1, 1): datetime(2008, 1, 31),
+ datetime(2008, 1, 31): datetime(2008, 1, 31),
+ datetime(2008, 2, 15): datetime(2008, 4, 30),
+ datetime(2008, 2, 29): datetime(2008, 4, 30),
+ datetime(2008, 3, 15): datetime(2008, 4, 30),
+ datetime(2008, 3, 31): datetime(2008, 4, 30),
+ datetime(2008, 4, 15): datetime(2008, 4, 30),
+ datetime(2008, 4, 30): datetime(2008, 4, 30), }))
+
+ offset_cases.append((QuarterEnd(startingMonth=1, n=-1),
+ {datetime(2008, 1, 1): datetime(2007, 10, 31),
+ datetime(2008, 1, 31): datetime(2007, 10, 31),
+ datetime(2008, 2, 15): datetime(2008, 1, 31),
+ datetime(2008, 2, 29): datetime(2008, 1, 31),
+ datetime(2008, 3, 15): datetime(2008, 1, 31),
+ datetime(2008, 3, 31): datetime(2008, 1, 31),
+ datetime(2008, 4, 15): datetime(2008, 1, 31),
+ datetime(2008, 4, 30): datetime(2008, 1, 31),
+ datetime(2008, 7, 1): datetime(2008, 4, 30)}))
+
+ offset_cases.append((QuarterEnd(startingMonth=1, n=2),
+ {datetime(2008, 1, 31): datetime(2008, 7, 31),
+ datetime(2008, 2, 15): datetime(2008, 7, 31),
+ datetime(2008, 2, 29): datetime(2008, 7, 31),
+ datetime(2008, 3, 15): datetime(2008, 7, 31),
+ datetime(2008, 3, 31): datetime(2008, 7, 31),
+ datetime(2008, 4, 15): datetime(2008, 7, 31),
+ datetime(2008, 4, 30): datetime(2008, 10, 31), }))
+
+ @pytest.mark.parametrize('case', offset_cases)
+ def test_offset(self, case):
+ offset, cases = case
+ for base, expected in compat.iteritems(cases):
+ assert_offset_equal(offset, base, expected)
+
+ def test_offset_corner_case(self):
# corner
offset = QuarterEnd(n=-1, startingMonth=1)
assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 31)
- def test_onOffset(self):
-
- tests = [(QuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True),
- (QuarterEnd(1, startingMonth=1), datetime(2007, 12, 31),
- False),
- (QuarterEnd(1, startingMonth=1), datetime(2008, 2, 29),
- False),
- (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 30),
- False),
- (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 31),
- False),
- (QuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True),
- (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 30),
- False),
- (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 31),
- False),
- (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 29),
- False),
- (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 30),
- False),
- (QuarterEnd(1, startingMonth=2), datetime(2008, 1, 31),
- False),
- (QuarterEnd(1, startingMonth=2), datetime(2007, 12, 31),
- False),
- (QuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True),
- (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 30),
- False),
- (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 31),
- False),
- (QuarterEnd(1, startingMonth=2), datetime(2008, 4, 30),
- False),
- (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 30),
- False),
- (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 31), True),
- (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 29),
- False),
- (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 30),
- False),
- (QuarterEnd(1, startingMonth=3), datetime(2008, 1, 31),
- False),
- (QuarterEnd(1, startingMonth=3), datetime(2007, 12, 31),
- True),
- (QuarterEnd(1, startingMonth=3), datetime(2008, 2, 29),
- False),
- (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 30),
- False),
- (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), True),
- (QuarterEnd(1, startingMonth=3), datetime(2008, 4, 30),
- False),
- (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 30),
- False),
- (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 31),
- False),
- (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 29),
- False),
- (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 30),
- True), ]
-
- for offset, dt, expected in tests:
- assertOnOffset(offset, dt, expected)
+ on_offset_cases = [
+ (QuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True),
+ (QuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False),
+ (QuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False),
+ (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False),
+ (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False),
+ (QuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True),
+ (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False),
+ (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 31), False),
+ (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False),
+ (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False),
+ (QuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False),
+ (QuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False),
+ (QuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True),
+ (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False),
+ (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False),
+ (QuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False),
+ (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), False),
+ (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 31), True),
+ (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False),
+ (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False),
+ (QuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False),
+ (QuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True),
+ (QuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False),
+ (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), False),
+ (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), True),
+ (QuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False),
+ (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False),
+ (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 31), False),
+ (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), False),
+ (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), True)]
+
+ @pytest.mark.parametrize('case', on_offset_cases)
+ def test_onOffset(self, case):
+ offset, dt, expected = case
+ assert_onOffset(offset, dt, expected)
class TestBYearBegin(Base):
@@ -3971,43 +3943,43 @@ def test_misspecified(self):
pytest.raises(ValueError, BYearBegin, month=13)
pytest.raises(ValueError, BYearEnd, month=13)
- def test_offset(self):
- tests = []
-
- tests.append((BYearBegin(),
- {datetime(2008, 1, 1): datetime(2009, 1, 1),
- datetime(2008, 6, 30): datetime(2009, 1, 1),
- datetime(2008, 12, 31): datetime(2009, 1, 1),
- datetime(2011, 1, 1): datetime(2011, 1, 3),
- datetime(2011, 1, 3): datetime(2012, 1, 2),
- datetime(2005, 12, 30): datetime(2006, 1, 2),
- datetime(2005, 12, 31): datetime(2006, 1, 2)}))
-
- tests.append((BYearBegin(0),
- {datetime(2008, 1, 1): datetime(2008, 1, 1),
- datetime(2008, 6, 30): datetime(2009, 1, 1),
- datetime(2008, 12, 31): datetime(2009, 1, 1),
- datetime(2005, 12, 30): datetime(2006, 1, 2),
- datetime(2005, 12, 31): datetime(2006, 1, 2), }))
-
- tests.append((BYearBegin(-1),
- {datetime(2007, 1, 1): datetime(2006, 1, 2),
- datetime(2009, 1, 4): datetime(2009, 1, 1),
- datetime(2009, 1, 1): datetime(2008, 1, 1),
- datetime(2008, 6, 30): datetime(2008, 1, 1),
- datetime(2008, 12, 31): datetime(2008, 1, 1),
- datetime(2006, 12, 29): datetime(2006, 1, 2),
- datetime(2006, 12, 30): datetime(2006, 1, 2),
- datetime(2006, 1, 1): datetime(2005, 1, 3), }))
-
- tests.append((BYearBegin(-2),
- {datetime(2007, 1, 1): datetime(2005, 1, 3),
- datetime(2007, 6, 30): datetime(2006, 1, 2),
- datetime(2008, 12, 31): datetime(2007, 1, 1), }))
-
- for offset, cases in tests:
- for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
+ offset_cases = []
+ offset_cases.append((BYearBegin(),
+ {datetime(2008, 1, 1): datetime(2009, 1, 1),
+ datetime(2008, 6, 30): datetime(2009, 1, 1),
+ datetime(2008, 12, 31): datetime(2009, 1, 1),
+ datetime(2011, 1, 1): datetime(2011, 1, 3),
+ datetime(2011, 1, 3): datetime(2012, 1, 2),
+ datetime(2005, 12, 30): datetime(2006, 1, 2),
+ datetime(2005, 12, 31): datetime(2006, 1, 2)}))
+
+ offset_cases.append((BYearBegin(0),
+ {datetime(2008, 1, 1): datetime(2008, 1, 1),
+ datetime(2008, 6, 30): datetime(2009, 1, 1),
+ datetime(2008, 12, 31): datetime(2009, 1, 1),
+ datetime(2005, 12, 30): datetime(2006, 1, 2),
+ datetime(2005, 12, 31): datetime(2006, 1, 2), }))
+
+ offset_cases.append((BYearBegin(-1),
+ {datetime(2007, 1, 1): datetime(2006, 1, 2),
+ datetime(2009, 1, 4): datetime(2009, 1, 1),
+ datetime(2009, 1, 1): datetime(2008, 1, 1),
+ datetime(2008, 6, 30): datetime(2008, 1, 1),
+ datetime(2008, 12, 31): datetime(2008, 1, 1),
+ datetime(2006, 12, 29): datetime(2006, 1, 2),
+ datetime(2006, 12, 30): datetime(2006, 1, 2),
+ datetime(2006, 1, 1): datetime(2005, 1, 3), }))
+
+ offset_cases.append((BYearBegin(-2),
+ {datetime(2007, 1, 1): datetime(2005, 1, 3),
+ datetime(2007, 6, 30): datetime(2006, 1, 2),
+ datetime(2008, 12, 31): datetime(2007, 1, 1), }))
+
+ @pytest.mark.parametrize('case', offset_cases)
+ def test_offset(self, case):
+ offset, cases = case
+ for base, expected in compat.iteritems(cases):
+ assert_offset_equal(offset, base, expected)
class TestYearBegin(Base):
@@ -4016,91 +3988,89 @@ class TestYearBegin(Base):
def test_misspecified(self):
pytest.raises(ValueError, YearBegin, month=13)
- def test_offset(self):
- tests = []
-
- tests.append((YearBegin(),
- {datetime(2008, 1, 1): datetime(2009, 1, 1),
- datetime(2008, 6, 30): datetime(2009, 1, 1),
- datetime(2008, 12, 31): datetime(2009, 1, 1),
- datetime(2005, 12, 30): datetime(2006, 1, 1),
- datetime(2005, 12, 31): datetime(2006, 1, 1), }))
-
- tests.append((YearBegin(0),
- {datetime(2008, 1, 1): datetime(2008, 1, 1),
- datetime(2008, 6, 30): datetime(2009, 1, 1),
- datetime(2008, 12, 31): datetime(2009, 1, 1),
- datetime(2005, 12, 30): datetime(2006, 1, 1),
- datetime(2005, 12, 31): datetime(2006, 1, 1), }))
-
- tests.append((YearBegin(3),
- {datetime(2008, 1, 1): datetime(2011, 1, 1),
- datetime(2008, 6, 30): datetime(2011, 1, 1),
- datetime(2008, 12, 31): datetime(2011, 1, 1),
- datetime(2005, 12, 30): datetime(2008, 1, 1),
- datetime(2005, 12, 31): datetime(2008, 1, 1), }))
-
- tests.append((YearBegin(-1),
- {datetime(2007, 1, 1): datetime(2006, 1, 1),
- datetime(2007, 1, 15): datetime(2007, 1, 1),
- datetime(2008, 6, 30): datetime(2008, 1, 1),
- datetime(2008, 12, 31): datetime(2008, 1, 1),
- datetime(2006, 12, 29): datetime(2006, 1, 1),
- datetime(2006, 12, 30): datetime(2006, 1, 1),
- datetime(2007, 1, 1): datetime(2006, 1, 1), }))
-
- tests.append((YearBegin(-2),
- {datetime(2007, 1, 1): datetime(2005, 1, 1),
- datetime(2008, 6, 30): datetime(2007, 1, 1),
- datetime(2008, 12, 31): datetime(2007, 1, 1), }))
-
- tests.append((YearBegin(month=4),
- {datetime(2007, 4, 1): datetime(2008, 4, 1),
- datetime(2007, 4, 15): datetime(2008, 4, 1),
- datetime(2007, 3, 1): datetime(2007, 4, 1),
- datetime(2007, 12, 15): datetime(2008, 4, 1),
- datetime(2012, 1, 31): datetime(2012, 4, 1), }))
-
- tests.append((YearBegin(0, month=4),
- {datetime(2007, 4, 1): datetime(2007, 4, 1),
- datetime(2007, 3, 1): datetime(2007, 4, 1),
- datetime(2007, 12, 15): datetime(2008, 4, 1),
- datetime(2012, 1, 31): datetime(2012, 4, 1), }))
-
- tests.append((YearBegin(4, month=4),
- {datetime(2007, 4, 1): datetime(2011, 4, 1),
- datetime(2007, 4, 15): datetime(2011, 4, 1),
- datetime(2007, 3, 1): datetime(2010, 4, 1),
- datetime(2007, 12, 15): datetime(2011, 4, 1),
- datetime(2012, 1, 31): datetime(2015, 4, 1), }))
-
- tests.append((YearBegin(-1, month=4),
- {datetime(2007, 4, 1): datetime(2006, 4, 1),
- datetime(2007, 3, 1): datetime(2006, 4, 1),
- datetime(2007, 12, 15): datetime(2007, 4, 1),
- datetime(2012, 1, 31): datetime(2011, 4, 1), }))
-
- tests.append((YearBegin(-3, month=4),
- {datetime(2007, 4, 1): datetime(2004, 4, 1),
- datetime(2007, 3, 1): datetime(2004, 4, 1),
- datetime(2007, 12, 15): datetime(2005, 4, 1),
- datetime(2012, 1, 31): datetime(2009, 4, 1), }))
-
- for offset, cases in tests:
- for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
-
- def test_onOffset(self):
-
- tests = [
- (YearBegin(), datetime(2007, 1, 3), False),
- (YearBegin(), datetime(2008, 1, 1), True),
- (YearBegin(), datetime(2006, 12, 31), False),
- (YearBegin(), datetime(2006, 1, 2), False),
- ]
-
- for offset, dt, expected in tests:
- assertOnOffset(offset, dt, expected)
+ offset_cases = []
+ offset_cases.append((YearBegin(),
+ {datetime(2008, 1, 1): datetime(2009, 1, 1),
+ datetime(2008, 6, 30): datetime(2009, 1, 1),
+ datetime(2008, 12, 31): datetime(2009, 1, 1),
+ datetime(2005, 12, 30): datetime(2006, 1, 1),
+ datetime(2005, 12, 31): datetime(2006, 1, 1), }))
+
+ offset_cases.append((YearBegin(0),
+ {datetime(2008, 1, 1): datetime(2008, 1, 1),
+ datetime(2008, 6, 30): datetime(2009, 1, 1),
+ datetime(2008, 12, 31): datetime(2009, 1, 1),
+ datetime(2005, 12, 30): datetime(2006, 1, 1),
+ datetime(2005, 12, 31): datetime(2006, 1, 1), }))
+
+ offset_cases.append((YearBegin(3),
+ {datetime(2008, 1, 1): datetime(2011, 1, 1),
+ datetime(2008, 6, 30): datetime(2011, 1, 1),
+ datetime(2008, 12, 31): datetime(2011, 1, 1),
+ datetime(2005, 12, 30): datetime(2008, 1, 1),
+ datetime(2005, 12, 31): datetime(2008, 1, 1), }))
+
+ offset_cases.append((YearBegin(-1),
+ {datetime(2007, 1, 1): datetime(2006, 1, 1),
+ datetime(2007, 1, 15): datetime(2007, 1, 1),
+ datetime(2008, 6, 30): datetime(2008, 1, 1),
+ datetime(2008, 12, 31): datetime(2008, 1, 1),
+ datetime(2006, 12, 29): datetime(2006, 1, 1),
+ datetime(2006, 12, 30): datetime(2006, 1, 1),
+ datetime(2007, 1, 1): datetime(2006, 1, 1), }))
+
+ offset_cases.append((YearBegin(-2),
+ {datetime(2007, 1, 1): datetime(2005, 1, 1),
+ datetime(2008, 6, 30): datetime(2007, 1, 1),
+ datetime(2008, 12, 31): datetime(2007, 1, 1), }))
+
+ offset_cases.append((YearBegin(month=4),
+ {datetime(2007, 4, 1): datetime(2008, 4, 1),
+ datetime(2007, 4, 15): datetime(2008, 4, 1),
+ datetime(2007, 3, 1): datetime(2007, 4, 1),
+ datetime(2007, 12, 15): datetime(2008, 4, 1),
+ datetime(2012, 1, 31): datetime(2012, 4, 1), }))
+
+ offset_cases.append((YearBegin(0, month=4),
+ {datetime(2007, 4, 1): datetime(2007, 4, 1),
+ datetime(2007, 3, 1): datetime(2007, 4, 1),
+ datetime(2007, 12, 15): datetime(2008, 4, 1),
+ datetime(2012, 1, 31): datetime(2012, 4, 1), }))
+
+ offset_cases.append((YearBegin(4, month=4),
+ {datetime(2007, 4, 1): datetime(2011, 4, 1),
+ datetime(2007, 4, 15): datetime(2011, 4, 1),
+ datetime(2007, 3, 1): datetime(2010, 4, 1),
+ datetime(2007, 12, 15): datetime(2011, 4, 1),
+ datetime(2012, 1, 31): datetime(2015, 4, 1), }))
+
+ offset_cases.append((YearBegin(-1, month=4),
+ {datetime(2007, 4, 1): datetime(2006, 4, 1),
+ datetime(2007, 3, 1): datetime(2006, 4, 1),
+ datetime(2007, 12, 15): datetime(2007, 4, 1),
+ datetime(2012, 1, 31): datetime(2011, 4, 1), }))
+
+ offset_cases.append((YearBegin(-3, month=4),
+ {datetime(2007, 4, 1): datetime(2004, 4, 1),
+ datetime(2007, 3, 1): datetime(2004, 4, 1),
+ datetime(2007, 12, 15): datetime(2005, 4, 1),
+ datetime(2012, 1, 31): datetime(2009, 4, 1), }))
+
+ @pytest.mark.parametrize('case', offset_cases)
+ def test_offset(self, case):
+ offset, cases = case
+ for base, expected in compat.iteritems(cases):
+ assert_offset_equal(offset, base, expected)
+
+ on_offset_cases = [(YearBegin(), datetime(2007, 1, 3), False),
+ (YearBegin(), datetime(2008, 1, 1), True),
+ (YearBegin(), datetime(2006, 12, 31), False),
+ (YearBegin(), datetime(2006, 1, 2), False)]
+
+ @pytest.mark.parametrize('case', on_offset_cases)
+ def test_onOffset(self, case):
+ offset, dt, expected = case
+ assert_onOffset(offset, dt, expected)
class TestBYearEndLagged(Base):
@@ -4109,20 +4079,20 @@ def test_bad_month_fail(self):
pytest.raises(Exception, BYearEnd, month=13)
pytest.raises(Exception, BYearEnd, month=0)
- def test_offset(self):
- tests = []
+ offset_cases = []
+ offset_cases.append((BYearEnd(month=6),
+ {datetime(2008, 1, 1): datetime(2008, 6, 30),
+ datetime(2007, 6, 30): datetime(2008, 6, 30)}, ))
- tests.append((BYearEnd(month=6),
- {datetime(2008, 1, 1): datetime(2008, 6, 30),
- datetime(2007, 6, 30): datetime(2008, 6, 30)}, ))
+ offset_cases.append((BYearEnd(n=-1, month=6),
+ {datetime(2008, 1, 1): datetime(2007, 6, 29),
+ datetime(2007, 6, 30): datetime(2007, 6, 29)}, ))
- tests.append((BYearEnd(n=-1, month=6),
- {datetime(2008, 1, 1): datetime(2007, 6, 29),
- datetime(2007, 6, 30): datetime(2007, 6, 29)}, ))
-
- for offset, cases in tests:
- for base, expected in compat.iteritems(cases):
- assert base + offset == expected
+ @pytest.mark.parametrize('case', offset_cases)
+ def test_offset(self, case):
+ offset, cases = case
+ for base, expected in compat.iteritems(cases):
+ assert base + offset == expected
def test_roll(self):
offset = BYearEnd(month=6)
@@ -4131,64 +4101,60 @@ def test_roll(self):
assert offset.rollforward(date) == datetime(2010, 6, 30)
assert offset.rollback(date) == datetime(2009, 6, 30)
- def test_onOffset(self):
-
- tests = [
- (BYearEnd(month=2), datetime(2007, 2, 28), True),
- (BYearEnd(month=6), datetime(2007, 6, 30), False),
- ]
+ on_offset_cases = [(BYearEnd(month=2), datetime(2007, 2, 28), True),
+ (BYearEnd(month=6), datetime(2007, 6, 30), False)]
- for offset, dt, expected in tests:
- assertOnOffset(offset, dt, expected)
+ @pytest.mark.parametrize('case', on_offset_cases)
+ def test_onOffset(self, case):
+ offset, dt, expected = case
+ assert_onOffset(offset, dt, expected)
class TestBYearEnd(Base):
_offset = BYearEnd
- def test_offset(self):
- tests = []
-
- tests.append((BYearEnd(),
- {datetime(2008, 1, 1): datetime(2008, 12, 31),
- datetime(2008, 6, 30): datetime(2008, 12, 31),
- datetime(2008, 12, 31): datetime(2009, 12, 31),
- datetime(2005, 12, 30): datetime(2006, 12, 29),
- datetime(2005, 12, 31): datetime(2006, 12, 29), }))
-
- tests.append((BYearEnd(0),
- {datetime(2008, 1, 1): datetime(2008, 12, 31),
- datetime(2008, 6, 30): datetime(2008, 12, 31),
- datetime(2008, 12, 31): datetime(2008, 12, 31),
- datetime(2005, 12, 31): datetime(2006, 12, 29), }))
-
- tests.append((BYearEnd(-1),
- {datetime(2007, 1, 1): datetime(2006, 12, 29),
- datetime(2008, 6, 30): datetime(2007, 12, 31),
- datetime(2008, 12, 31): datetime(2007, 12, 31),
- datetime(2006, 12, 29): datetime(2005, 12, 30),
- datetime(2006, 12, 30): datetime(2006, 12, 29),
- datetime(2007, 1, 1): datetime(2006, 12, 29), }))
-
- tests.append((BYearEnd(-2),
- {datetime(2007, 1, 1): datetime(2005, 12, 30),
- datetime(2008, 6, 30): datetime(2006, 12, 29),
- datetime(2008, 12, 31): datetime(2006, 12, 29), }))
-
- for offset, cases in tests:
- for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
-
- def test_onOffset(self):
-
- tests = [
- (BYearEnd(), datetime(2007, 12, 31), True),
- (BYearEnd(), datetime(2008, 1, 1), False),
- (BYearEnd(), datetime(2006, 12, 31), False),
- (BYearEnd(), datetime(2006, 12, 29), True),
- ]
-
- for offset, dt, expected in tests:
- assertOnOffset(offset, dt, expected)
+ offset_cases = []
+ offset_cases.append((BYearEnd(),
+ {datetime(2008, 1, 1): datetime(2008, 12, 31),
+ datetime(2008, 6, 30): datetime(2008, 12, 31),
+ datetime(2008, 12, 31): datetime(2009, 12, 31),
+ datetime(2005, 12, 30): datetime(2006, 12, 29),
+ datetime(2005, 12, 31): datetime(2006, 12, 29), }))
+
+ offset_cases.append((BYearEnd(0),
+ {datetime(2008, 1, 1): datetime(2008, 12, 31),
+ datetime(2008, 6, 30): datetime(2008, 12, 31),
+ datetime(2008, 12, 31): datetime(2008, 12, 31),
+ datetime(2005, 12, 31): datetime(2006, 12, 29), }))
+
+ offset_cases.append((BYearEnd(-1),
+ {datetime(2007, 1, 1): datetime(2006, 12, 29),
+ datetime(2008, 6, 30): datetime(2007, 12, 31),
+ datetime(2008, 12, 31): datetime(2007, 12, 31),
+ datetime(2006, 12, 29): datetime(2005, 12, 30),
+ datetime(2006, 12, 30): datetime(2006, 12, 29),
+ datetime(2007, 1, 1): datetime(2006, 12, 29), }))
+
+ offset_cases.append((BYearEnd(-2),
+ {datetime(2007, 1, 1): datetime(2005, 12, 30),
+ datetime(2008, 6, 30): datetime(2006, 12, 29),
+ datetime(2008, 12, 31): datetime(2006, 12, 29), }))
+
+ @pytest.mark.parametrize('case', offset_cases)
+ def test_offset(self, case):
+ offset, cases = case
+ for base, expected in compat.iteritems(cases):
+ assert_offset_equal(offset, base, expected)
+
+ on_offset_cases = [(BYearEnd(), datetime(2007, 12, 31), True),
+ (BYearEnd(), datetime(2008, 1, 1), False),
+ (BYearEnd(), datetime(2006, 12, 31), False),
+ (BYearEnd(), datetime(2006, 12, 29), True)]
+
+ @pytest.mark.parametrize('case', on_offset_cases)
+ def test_onOffset(self, case):
+ offset, dt, expected = case
+ assert_onOffset(offset, dt, expected)
class TestYearEnd(Base):
@@ -4197,286 +4163,115 @@ class TestYearEnd(Base):
def test_misspecified(self):
pytest.raises(ValueError, YearEnd, month=13)
- def test_offset(self):
- tests = []
-
- tests.append((YearEnd(),
- {datetime(2008, 1, 1): datetime(2008, 12, 31),
- datetime(2008, 6, 30): datetime(2008, 12, 31),
- datetime(2008, 12, 31): datetime(2009, 12, 31),
- datetime(2005, 12, 30): datetime(2005, 12, 31),
- datetime(2005, 12, 31): datetime(2006, 12, 31), }))
-
- tests.append((YearEnd(0),
- {datetime(2008, 1, 1): datetime(2008, 12, 31),
- datetime(2008, 6, 30): datetime(2008, 12, 31),
- datetime(2008, 12, 31): datetime(2008, 12, 31),
- datetime(2005, 12, 30): datetime(2005, 12, 31), }))
-
- tests.append((YearEnd(-1),
- {datetime(2007, 1, 1): datetime(2006, 12, 31),
- datetime(2008, 6, 30): datetime(2007, 12, 31),
- datetime(2008, 12, 31): datetime(2007, 12, 31),
- datetime(2006, 12, 29): datetime(2005, 12, 31),
- datetime(2006, 12, 30): datetime(2005, 12, 31),
- datetime(2007, 1, 1): datetime(2006, 12, 31), }))
-
- tests.append((YearEnd(-2),
- {datetime(2007, 1, 1): datetime(2005, 12, 31),
- datetime(2008, 6, 30): datetime(2006, 12, 31),
- datetime(2008, 12, 31): datetime(2006, 12, 31), }))
-
- for offset, cases in tests:
- for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
-
- def test_onOffset(self):
-
- tests = [
- (YearEnd(), datetime(2007, 12, 31), True),
- (YearEnd(), datetime(2008, 1, 1), False),
- (YearEnd(), datetime(2006, 12, 31), True),
- (YearEnd(), datetime(2006, 12, 29), False),
- ]
-
- for offset, dt, expected in tests:
- assertOnOffset(offset, dt, expected)
+ offset_cases = []
+ offset_cases.append((YearEnd(),
+ {datetime(2008, 1, 1): datetime(2008, 12, 31),
+ datetime(2008, 6, 30): datetime(2008, 12, 31),
+ datetime(2008, 12, 31): datetime(2009, 12, 31),
+ datetime(2005, 12, 30): datetime(2005, 12, 31),
+ datetime(2005, 12, 31): datetime(2006, 12, 31), }))
+
+ offset_cases.append((YearEnd(0),
+ {datetime(2008, 1, 1): datetime(2008, 12, 31),
+ datetime(2008, 6, 30): datetime(2008, 12, 31),
+ datetime(2008, 12, 31): datetime(2008, 12, 31),
+ datetime(2005, 12, 30): datetime(2005, 12, 31), }))
+
+ offset_cases.append((YearEnd(-1),
+ {datetime(2007, 1, 1): datetime(2006, 12, 31),
+ datetime(2008, 6, 30): datetime(2007, 12, 31),
+ datetime(2008, 12, 31): datetime(2007, 12, 31),
+ datetime(2006, 12, 29): datetime(2005, 12, 31),
+ datetime(2006, 12, 30): datetime(2005, 12, 31),
+ datetime(2007, 1, 1): datetime(2006, 12, 31), }))
+
+ offset_cases.append((YearEnd(-2),
+ {datetime(2007, 1, 1): datetime(2005, 12, 31),
+ datetime(2008, 6, 30): datetime(2006, 12, 31),
+ datetime(2008, 12, 31): datetime(2006, 12, 31), }))
+
+ @pytest.mark.parametrize('case', offset_cases)
+ def test_offset(self, case):
+ offset, cases = case
+ for base, expected in compat.iteritems(cases):
+ assert_offset_equal(offset, base, expected)
+
+ on_offset_cases = [(YearEnd(), datetime(2007, 12, 31), True),
+ (YearEnd(), datetime(2008, 1, 1), False),
+ (YearEnd(), datetime(2006, 12, 31), True),
+ (YearEnd(), datetime(2006, 12, 29), False)]
+
+ @pytest.mark.parametrize('case', on_offset_cases)
+ def test_onOffset(self, case):
+ offset, dt, expected = case
+ assert_onOffset(offset, dt, expected)
class TestYearEndDiffMonth(Base):
- def test_offset(self):
- tests = []
-
- tests.append((YearEnd(month=3),
- {datetime(2008, 1, 1): datetime(2008, 3, 31),
- datetime(2008, 2, 15): datetime(2008, 3, 31),
- datetime(2008, 3, 31): datetime(2009, 3, 31),
- datetime(2008, 3, 30): datetime(2008, 3, 31),
- datetime(2005, 3, 31): datetime(2006, 3, 31),
- datetime(2006, 7, 30): datetime(2007, 3, 31)}))
-
- tests.append((YearEnd(0, month=3),
- {datetime(2008, 1, 1): datetime(2008, 3, 31),
- datetime(2008, 2, 28): datetime(2008, 3, 31),
- datetime(2008, 3, 31): datetime(2008, 3, 31),
- datetime(2005, 3, 30): datetime(2005, 3, 31), }))
-
- tests.append((YearEnd(-1, month=3),
- {datetime(2007, 1, 1): datetime(2006, 3, 31),
- datetime(2008, 2, 28): datetime(2007, 3, 31),
- datetime(2008, 3, 31): datetime(2007, 3, 31),
- datetime(2006, 3, 29): datetime(2005, 3, 31),
- datetime(2006, 3, 30): datetime(2005, 3, 31),
- datetime(2007, 3, 1): datetime(2006, 3, 31), }))
-
- tests.append((YearEnd(-2, month=3),
- {datetime(2007, 1, 1): datetime(2005, 3, 31),
- datetime(2008, 6, 30): datetime(2007, 3, 31),
- datetime(2008, 3, 31): datetime(2006, 3, 31), }))
-
- for offset, cases in tests:
- for base, expected in compat.iteritems(cases):
- assertEq(offset, base, expected)
-
- def test_onOffset(self):
-
- tests = [
- (YearEnd(month=3), datetime(2007, 3, 31), True),
- (YearEnd(month=3), datetime(2008, 1, 1), False),
- (YearEnd(month=3), datetime(2006, 3, 31), True),
- (YearEnd(month=3), datetime(2006, 3, 29), False),
- ]
-
- for offset, dt, expected in tests:
- assertOnOffset(offset, dt, expected)
-
-
-def assertEq(offset, base, expected):
- actual = offset + base
- actual_swapped = base + offset
- actual_apply = offset.apply(base)
- try:
- assert actual == expected
- assert actual_swapped == expected
- assert actual_apply == expected
- except AssertionError:
- raise AssertionError("\nExpected: %s\nActual: %s\nFor Offset: %s)"
- "\nAt Date: %s" %
- (expected, actual, offset, base))
+ offset_cases = []
+ offset_cases.append((YearEnd(month=3),
+ {datetime(2008, 1, 1): datetime(2008, 3, 31),
+ datetime(2008, 2, 15): datetime(2008, 3, 31),
+ datetime(2008, 3, 31): datetime(2009, 3, 31),
+ datetime(2008, 3, 30): datetime(2008, 3, 31),
+ datetime(2005, 3, 31): datetime(2006, 3, 31),
+ datetime(2006, 7, 30): datetime(2007, 3, 31)}))
+
+ offset_cases.append((YearEnd(0, month=3),
+ {datetime(2008, 1, 1): datetime(2008, 3, 31),
+ datetime(2008, 2, 28): datetime(2008, 3, 31),
+ datetime(2008, 3, 31): datetime(2008, 3, 31),
+ datetime(2005, 3, 30): datetime(2005, 3, 31), }))
+
+ offset_cases.append((YearEnd(-1, month=3),
+ {datetime(2007, 1, 1): datetime(2006, 3, 31),
+ datetime(2008, 2, 28): datetime(2007, 3, 31),
+ datetime(2008, 3, 31): datetime(2007, 3, 31),
+ datetime(2006, 3, 29): datetime(2005, 3, 31),
+ datetime(2006, 3, 30): datetime(2005, 3, 31),
+ datetime(2007, 3, 1): datetime(2006, 3, 31), }))
+
+ offset_cases.append((YearEnd(-2, month=3),
+ {datetime(2007, 1, 1): datetime(2005, 3, 31),
+ datetime(2008, 6, 30): datetime(2007, 3, 31),
+ datetime(2008, 3, 31): datetime(2006, 3, 31), }))
+
+ @pytest.mark.parametrize('case', offset_cases)
+ def test_offset(self, case):
+ offset, cases = case
+ for base, expected in compat.iteritems(cases):
+ assert_offset_equal(offset, base, expected)
+
+ on_offset_cases = [(YearEnd(month=3), datetime(2007, 3, 31), True),
+ (YearEnd(month=3), datetime(2008, 1, 1), False),
+ (YearEnd(month=3), datetime(2006, 3, 31), True),
+ (YearEnd(month=3), datetime(2006, 3, 29), False)]
+
+ @pytest.mark.parametrize('case', on_offset_cases)
+ def test_onOffset(self, case):
+ offset, dt, expected = case
+ assert_onOffset(offset, dt, expected)
def test_Easter():
- assertEq(Easter(), datetime(2010, 1, 1), datetime(2010, 4, 4))
- assertEq(Easter(), datetime(2010, 4, 5), datetime(2011, 4, 24))
- assertEq(Easter(2), datetime(2010, 1, 1), datetime(2011, 4, 24))
-
- assertEq(Easter(), datetime(2010, 4, 4), datetime(2011, 4, 24))
- assertEq(Easter(2), datetime(2010, 4, 4), datetime(2012, 4, 8))
+ assert_offset_equal(Easter(), datetime(2010, 1, 1), datetime(2010, 4, 4))
+ assert_offset_equal(Easter(), datetime(2010, 4, 5), datetime(2011, 4, 24))
+ assert_offset_equal(Easter(2), datetime(2010, 1, 1), datetime(2011, 4, 24))
- assertEq(-Easter(), datetime(2011, 1, 1), datetime(2010, 4, 4))
- assertEq(-Easter(), datetime(2010, 4, 5), datetime(2010, 4, 4))
- assertEq(-Easter(2), datetime(2011, 1, 1), datetime(2009, 4, 12))
+ assert_offset_equal(Easter(), datetime(2010, 4, 4), datetime(2011, 4, 24))
+ assert_offset_equal(Easter(2), datetime(2010, 4, 4), datetime(2012, 4, 8))
- assertEq(-Easter(), datetime(2010, 4, 4), datetime(2009, 4, 12))
- assertEq(-Easter(2), datetime(2010, 4, 4), datetime(2008, 3, 23))
+ assert_offset_equal(-Easter(), datetime(2011, 1, 1), datetime(2010, 4, 4))
+ assert_offset_equal(-Easter(), datetime(2010, 4, 5), datetime(2010, 4, 4))
+ assert_offset_equal(-Easter(2),
+ datetime(2011, 1, 1),
+ datetime(2009, 4, 12))
-
-class TestTicks(object):
-
- ticks = [Hour, Minute, Second, Milli, Micro, Nano]
-
- def test_ticks(self):
- offsets = [(Hour, Timedelta(hours=5)),
- (Minute, Timedelta(hours=2, minutes=3)),
- (Second, Timedelta(hours=2, seconds=3)),
- (Milli, Timedelta(hours=2, milliseconds=3)),
- (Micro, Timedelta(hours=2, microseconds=3)),
- (Nano, Timedelta(hours=2, nanoseconds=3))]
-
- for kls, expected in offsets:
- offset = kls(3)
- result = offset + Timedelta(hours=2)
- assert isinstance(result, Timedelta)
- assert result == expected
-
- def test_Hour(self):
- assertEq(Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 1))
- assertEq(Hour(-1), datetime(2010, 1, 1, 1), datetime(2010, 1, 1))
- assertEq(2 * Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 2))
- assertEq(-1 * Hour(), datetime(2010, 1, 1, 1), datetime(2010, 1, 1))
-
- assert Hour(3) + Hour(2) == Hour(5)
- assert Hour(3) - Hour(2) == Hour()
-
- assert Hour(4) != Hour(1)
-
- def test_Minute(self):
- assertEq(Minute(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 1))
- assertEq(Minute(-1), datetime(2010, 1, 1, 0, 1), datetime(2010, 1, 1))
- assertEq(2 * Minute(), datetime(2010, 1, 1),
- datetime(2010, 1, 1, 0, 2))
- assertEq(-1 * Minute(), datetime(2010, 1, 1, 0, 1),
- datetime(2010, 1, 1))
-
- assert Minute(3) + Minute(2) == Minute(5)
- assert Minute(3) - Minute(2) == Minute()
- assert Minute(5) != Minute()
-
- def test_Second(self):
- assertEq(Second(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 1))
- assertEq(Second(-1), datetime(2010, 1, 1,
- 0, 0, 1), datetime(2010, 1, 1))
- assertEq(2 * Second(), datetime(2010, 1, 1),
- datetime(2010, 1, 1, 0, 0, 2))
- assertEq(-1 * Second(), datetime(2010, 1, 1, 0, 0, 1),
- datetime(2010, 1, 1))
-
- assert Second(3) + Second(2) == Second(5)
- assert Second(3) - Second(2) == Second()
-
- def test_Millisecond(self):
- assertEq(Milli(), datetime(2010, 1, 1),
- datetime(2010, 1, 1, 0, 0, 0, 1000))
- assertEq(Milli(-1), datetime(2010, 1, 1, 0,
- 0, 0, 1000), datetime(2010, 1, 1))
- assertEq(Milli(2), datetime(2010, 1, 1),
- datetime(2010, 1, 1, 0, 0, 0, 2000))
- assertEq(2 * Milli(), datetime(2010, 1, 1),
- datetime(2010, 1, 1, 0, 0, 0, 2000))
- assertEq(-1 * Milli(), datetime(2010, 1, 1, 0, 0, 0, 1000),
- datetime(2010, 1, 1))
-
- assert Milli(3) + Milli(2) == Milli(5)
- assert Milli(3) - Milli(2) == Milli()
-
- def test_MillisecondTimestampArithmetic(self):
- assertEq(Milli(), Timestamp('2010-01-01'),
- Timestamp('2010-01-01 00:00:00.001'))
- assertEq(Milli(-1), Timestamp('2010-01-01 00:00:00.001'),
- Timestamp('2010-01-01'))
-
- def test_Microsecond(self):
- assertEq(Micro(), datetime(2010, 1, 1),
- datetime(2010, 1, 1, 0, 0, 0, 1))
- assertEq(Micro(-1), datetime(2010, 1, 1,
- 0, 0, 0, 1), datetime(2010, 1, 1))
- assertEq(2 * Micro(), datetime(2010, 1, 1),
- datetime(2010, 1, 1, 0, 0, 0, 2))
- assertEq(-1 * Micro(), datetime(2010, 1, 1, 0, 0, 0, 1),
- datetime(2010, 1, 1))
-
- assert Micro(3) + Micro(2) == Micro(5)
- assert Micro(3) - Micro(2) == Micro()
-
- def test_NanosecondGeneric(self):
- timestamp = Timestamp(datetime(2010, 1, 1))
- assert timestamp.nanosecond == 0
-
- result = timestamp + Nano(10)
- assert result.nanosecond == 10
-
- reverse_result = Nano(10) + timestamp
- assert reverse_result.nanosecond == 10
-
- def test_Nanosecond(self):
- timestamp = Timestamp(datetime(2010, 1, 1))
- assertEq(Nano(), timestamp, timestamp + np.timedelta64(1, 'ns'))
- assertEq(Nano(-1), timestamp + np.timedelta64(1, 'ns'), timestamp)
- assertEq(2 * Nano(), timestamp, timestamp + np.timedelta64(2, 'ns'))
- assertEq(-1 * Nano(), timestamp + np.timedelta64(1, 'ns'), timestamp)
-
- assert Nano(3) + Nano(2) == Nano(5)
- assert Nano(3) - Nano(2) == Nano()
-
- # GH9284
- assert Nano(1) + Nano(10) == Nano(11)
- assert Nano(5) + Micro(1) == Nano(1005)
- assert Micro(5) + Nano(1) == Nano(5001)
-
- def test_tick_zero(self):
- for t1 in self.ticks:
- for t2 in self.ticks:
- assert t1(0) == t2(0)
- assert t1(0) + t2(0) == t1(0)
-
- if t1 is not Nano:
- assert t1(2) + t2(0) == t1(2)
- if t1 is Nano:
- assert t1(2) + Nano(0) == t1(2)
-
- def test_tick_equalities(self):
- for t in self.ticks:
- assert t(3) == t(3)
- assert t() == t(1)
-
- # not equals
- assert t(3) != t(2)
- assert t(3) != t(-3)
-
- def test_tick_operators(self):
- for t in self.ticks:
- assert t(3) + t(2) == t(5)
- assert t(3) - t(2) == t(1)
- assert t(800) + t(300) == t(1100)
- assert t(1000) - t(5) == t(995)
-
- def test_tick_offset(self):
- for t in self.ticks:
- assert not t().isAnchored()
-
- def test_compare_ticks(self):
- for kls in self.ticks:
- three = kls(3)
- four = kls(4)
-
- for _ in range(10):
- assert three < kls(4)
- assert kls(3) < four
- assert four > kls(3)
- assert kls(4) > three
- assert kls(3) == kls(3)
- assert kls(3) != kls(4)
+ assert_offset_equal(-Easter(), datetime(2010, 4, 4), datetime(2009, 4, 12))
+ assert_offset_equal(-Easter(2),
+ datetime(2010, 4, 4),
+ datetime(2008, 3, 23))
class TestOffsetNames(object):
@@ -4641,19 +4436,6 @@ def test_rule_code(self):
assert k == _get_freq_str(code)
-def test_apply_ticks():
- result = offsets.Hour(3).apply(offsets.Hour(4))
- exp = offsets.Hour(7)
- assert (result == exp)
-
-
-def test_delta_to_tick():
- delta = timedelta(3)
-
- tick = offsets._delta_to_tick(delta)
- assert (tick == offsets.Day(3))
-
-
def test_dateoffset_misc():
oset = offsets.DateOffset(months=2, days=4)
# it works
@@ -4875,27 +4657,29 @@ def test_springforward_singular(self):
self._test_all_offsets(n=1, tstart=self._make_timestamp(
self.ts_pre_springfwd, hrs_pre, tz), expected_utc_offset=None)
- def test_all_offset_classes(self):
- tests = {MonthBegin: ['11/2/2012', '12/1/2012'],
- MonthEnd: ['11/2/2012', '11/30/2012'],
- BMonthBegin: ['11/2/2012', '12/3/2012'],
- BMonthEnd: ['11/2/2012', '11/30/2012'],
- CBMonthBegin: ['11/2/2012', '12/3/2012'],
- CBMonthEnd: ['11/2/2012', '11/30/2012'],
- SemiMonthBegin: ['11/2/2012', '11/15/2012'],
- SemiMonthEnd: ['11/2/2012', '11/15/2012'],
- Week: ['11/2/2012', '11/9/2012'],
- YearBegin: ['11/2/2012', '1/1/2013'],
- YearEnd: ['11/2/2012', '12/31/2012'],
- BYearBegin: ['11/2/2012', '1/1/2013'],
- BYearEnd: ['11/2/2012', '12/31/2012'],
- QuarterBegin: ['11/2/2012', '12/1/2012'],
- QuarterEnd: ['11/2/2012', '12/31/2012'],
- BQuarterBegin: ['11/2/2012', '12/3/2012'],
- BQuarterEnd: ['11/2/2012', '12/31/2012'],
- Day: ['11/4/2012', '11/4/2012 23:00']}
-
- for offset, test_values in iteritems(tests):
- first = Timestamp(test_values[0], tz='US/Eastern') + offset()
- second = Timestamp(test_values[1], tz='US/Eastern')
- assert first == second
+ offset_classes = {MonthBegin: ['11/2/2012', '12/1/2012'],
+ MonthEnd: ['11/2/2012', '11/30/2012'],
+ BMonthBegin: ['11/2/2012', '12/3/2012'],
+ BMonthEnd: ['11/2/2012', '11/30/2012'],
+ CBMonthBegin: ['11/2/2012', '12/3/2012'],
+ CBMonthEnd: ['11/2/2012', '11/30/2012'],
+ SemiMonthBegin: ['11/2/2012', '11/15/2012'],
+ SemiMonthEnd: ['11/2/2012', '11/15/2012'],
+ Week: ['11/2/2012', '11/9/2012'],
+ YearBegin: ['11/2/2012', '1/1/2013'],
+ YearEnd: ['11/2/2012', '12/31/2012'],
+ BYearBegin: ['11/2/2012', '1/1/2013'],
+ BYearEnd: ['11/2/2012', '12/31/2012'],
+ QuarterBegin: ['11/2/2012', '12/1/2012'],
+ QuarterEnd: ['11/2/2012', '12/31/2012'],
+ BQuarterBegin: ['11/2/2012', '12/3/2012'],
+ BQuarterEnd: ['11/2/2012', '12/31/2012'],
+ Day: ['11/4/2012', '11/4/2012 23:00']}.items()
+
+ @pytest.mark.parametrize('tup', offset_classes)
+ def test_all_offset_classes(self, tup):
+ offset, test_values = tup
+
+ first = Timestamp(test_values[0], tz='US/Eastern') + offset()
+ second = Timestamp(test_values[1], tz='US/Eastern')
+ assert first == second
diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py
new file mode 100644
index 00000000000000..24033d4ff6cbde
--- /dev/null
+++ b/pandas/tests/tseries/offsets/test_ticks.py
@@ -0,0 +1,236 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for offsets.Tick and subclasses
+"""
+from datetime import datetime, timedelta
+
+import pytest
+import numpy as np
+
+from pandas import Timedelta, Timestamp
+from pandas.tseries import offsets
+from pandas.tseries.offsets import Hour, Minute, Second, Milli, Micro, Nano
+
+from .common import assert_offset_equal
+
+# ---------------------------------------------------------------------
+# Test Helpers
+
+tick_classes = [Hour, Minute, Second, Milli, Micro, Nano]
+
+
+# ---------------------------------------------------------------------
+
+
+def test_apply_ticks():
+ result = offsets.Hour(3).apply(offsets.Hour(4))
+ exp = offsets.Hour(7)
+ assert (result == exp)
+
+
+def test_delta_to_tick():
+ delta = timedelta(3)
+
+ tick = offsets._delta_to_tick(delta)
+ assert (tick == offsets.Day(3))
+
+
+# ---------------------------------------------------------------------
+
+
+def test_Hour():
+ assert_offset_equal(Hour(),
+ datetime(2010, 1, 1), datetime(2010, 1, 1, 1))
+ assert_offset_equal(Hour(-1),
+ datetime(2010, 1, 1, 1), datetime(2010, 1, 1))
+ assert_offset_equal(2 * Hour(),
+ datetime(2010, 1, 1), datetime(2010, 1, 1, 2))
+ assert_offset_equal(-1 * Hour(),
+ datetime(2010, 1, 1, 1), datetime(2010, 1, 1))
+
+ assert Hour(3) + Hour(2) == Hour(5)
+ assert Hour(3) - Hour(2) == Hour()
+
+ assert Hour(4) != Hour(1)
+
+
+def test_Minute():
+ assert_offset_equal(Minute(),
+ datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 1))
+ assert_offset_equal(Minute(-1),
+ datetime(2010, 1, 1, 0, 1), datetime(2010, 1, 1))
+ assert_offset_equal(2 * Minute(),
+ datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 2))
+ assert_offset_equal(-1 * Minute(),
+ datetime(2010, 1, 1, 0, 1), datetime(2010, 1, 1))
+
+ assert Minute(3) + Minute(2) == Minute(5)
+ assert Minute(3) - Minute(2) == Minute()
+ assert Minute(5) != Minute()
+
+
+def test_Second():
+ assert_offset_equal(Second(),
+ datetime(2010, 1, 1),
+ datetime(2010, 1, 1, 0, 0, 1))
+ assert_offset_equal(Second(-1),
+ datetime(2010, 1, 1, 0, 0, 1),
+ datetime(2010, 1, 1))
+ assert_offset_equal(2 * Second(),
+ datetime(2010, 1, 1),
+ datetime(2010, 1, 1, 0, 0, 2))
+ assert_offset_equal(-1 * Second(),
+ datetime(2010, 1, 1, 0, 0, 1),
+ datetime(2010, 1, 1))
+
+ assert Second(3) + Second(2) == Second(5)
+ assert Second(3) - Second(2) == Second()
+
+
+def test_Millisecond():
+ assert_offset_equal(Milli(),
+ datetime(2010, 1, 1),
+ datetime(2010, 1, 1, 0, 0, 0, 1000))
+ assert_offset_equal(Milli(-1),
+ datetime(2010, 1, 1, 0, 0, 0, 1000),
+ datetime(2010, 1, 1))
+ assert_offset_equal(Milli(2),
+ datetime(2010, 1, 1),
+ datetime(2010, 1, 1, 0, 0, 0, 2000))
+ assert_offset_equal(2 * Milli(),
+ datetime(2010, 1, 1),
+ datetime(2010, 1, 1, 0, 0, 0, 2000))
+ assert_offset_equal(-1 * Milli(),
+ datetime(2010, 1, 1, 0, 0, 0, 1000),
+ datetime(2010, 1, 1))
+
+ assert Milli(3) + Milli(2) == Milli(5)
+ assert Milli(3) - Milli(2) == Milli()
+
+
+def test_MillisecondTimestampArithmetic():
+ assert_offset_equal(Milli(),
+ Timestamp('2010-01-01'),
+ Timestamp('2010-01-01 00:00:00.001'))
+ assert_offset_equal(Milli(-1),
+ Timestamp('2010-01-01 00:00:00.001'),
+ Timestamp('2010-01-01'))
+
+
+def test_Microsecond():
+ assert_offset_equal(Micro(),
+ datetime(2010, 1, 1),
+ datetime(2010, 1, 1, 0, 0, 0, 1))
+ assert_offset_equal(Micro(-1),
+ datetime(2010, 1, 1, 0, 0, 0, 1),
+ datetime(2010, 1, 1))
+
+ assert_offset_equal(2 * Micro(),
+ datetime(2010, 1, 1),
+ datetime(2010, 1, 1, 0, 0, 0, 2))
+ assert_offset_equal(-1 * Micro(),
+ datetime(2010, 1, 1, 0, 0, 0, 1),
+ datetime(2010, 1, 1))
+
+ assert Micro(3) + Micro(2) == Micro(5)
+ assert Micro(3) - Micro(2) == Micro()
+
+
+def test_NanosecondGeneric():
+ timestamp = Timestamp(datetime(2010, 1, 1))
+ assert timestamp.nanosecond == 0
+
+ result = timestamp + Nano(10)
+ assert result.nanosecond == 10
+
+ reverse_result = Nano(10) + timestamp
+ assert reverse_result.nanosecond == 10
+
+
+def test_Nanosecond():
+ timestamp = Timestamp(datetime(2010, 1, 1))
+ assert_offset_equal(Nano(),
+ timestamp,
+ timestamp + np.timedelta64(1, 'ns'))
+ assert_offset_equal(Nano(-1),
+ timestamp + np.timedelta64(1, 'ns'),
+ timestamp)
+ assert_offset_equal(2 * Nano(),
+ timestamp,
+ timestamp + np.timedelta64(2, 'ns'))
+ assert_offset_equal(-1 * Nano(),
+ timestamp + np.timedelta64(1, 'ns'),
+ timestamp)
+
+ assert Nano(3) + Nano(2) == Nano(5)
+ assert Nano(3) - Nano(2) == Nano()
+
+ # GH9284
+ assert Nano(1) + Nano(10) == Nano(11)
+ assert Nano(5) + Micro(1) == Nano(1005)
+ assert Micro(5) + Nano(1) == Nano(5001)
+
+
+@pytest.mark.parametrize('kls, expected',
+ [(Hour, Timedelta(hours=5)),
+ (Minute, Timedelta(hours=2, minutes=3)),
+ (Second, Timedelta(hours=2, seconds=3)),
+ (Milli, Timedelta(hours=2, milliseconds=3)),
+ (Micro, Timedelta(hours=2, microseconds=3)),
+ (Nano, Timedelta(hours=2, nanoseconds=3))])
+def test_tick_addition(kls, expected):
+ offset = kls(3)
+ result = offset + Timedelta(hours=2)
+ assert isinstance(result, Timedelta)
+ assert result == expected
+
+
+@pytest.mark.parametrize('cls1', tick_classes)
+@pytest.mark.parametrize('cls2', tick_classes)
+def test_tick_zero(cls1, cls2):
+ assert cls1(0) == cls2(0)
+ assert cls1(0) + cls2(0) == cls1(0)
+
+ if cls1 is not Nano:
+ assert cls1(2) + cls2(0) == cls1(2)
+
+ if cls1 is Nano:
+ assert cls1(2) + Nano(0) == cls1(2)
+
+
+@pytest.mark.parametrize('cls', tick_classes)
+def test_tick_equalities(cls):
+ assert cls(3) == cls(3)
+ assert cls() == cls(1)
+
+ # not equals
+ assert cls(3) != cls(2)
+ assert cls(3) != cls(-3)
+
+
+@pytest.mark.parametrize('cls', tick_classes)
+def test_tick_operators(cls):
+ assert cls(3) + cls(2) == cls(5)
+ assert cls(3) - cls(2) == cls(1)
+ assert cls(800) + cls(300) == cls(1100)
+ assert cls(1000) - cls(5) == cls(995)
+
+
+@pytest.mark.parametrize('cls', tick_classes)
+def test_tick_offset(cls):
+ assert not cls().isAnchored()
+
+
+@pytest.mark.parametrize('cls', tick_classes)
+def test_compare_ticks(cls):
+ three = cls(3)
+ four = cls(4)
+
+ # TODO: WTF? What is this range(10) supposed to do?
+ for _ in range(10):
+ assert three < cls(4)
+ assert cls(3) < four
+ assert four > cls(3)
+ assert cls(4) > three
+ assert cls(3) == cls(3)
+ assert cls(3) != cls(4)
diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py
index 39a9a87141753e..9666a4c154c635 100644
--- a/pandas/tests/tseries/test_frequencies.py
+++ b/pandas/tests/tseries/test_frequencies.py
@@ -7,6 +7,7 @@
from pandas import (Index, DatetimeIndex, Timestamp, Series,
date_range, period_range)
+from pandas._libs.tslibs import resolution
import pandas.tseries.frequencies as frequencies
from pandas.core.tools.datetimes import to_datetime
@@ -169,6 +170,19 @@ def test_to_offset_leading_zero(self):
result = frequencies.to_offset(freqstr)
assert (result.n == -194)
+ def test_to_offset_leading_plus(self):
+ freqstr = '+1d'
+ result = frequencies.to_offset(freqstr)
+ assert (result.n == 1)
+
+ freqstr = '+2h30min'
+ result = frequencies.to_offset(freqstr)
+ assert (result.n == 150)
+
+ for bad_freq in ['+-1d', '-+1h', '+1', '-7', '+d', '-m']:
+ with tm.assert_raises_regex(ValueError, 'Invalid frequency:'):
+ frequencies.to_offset(bad_freq)
+
def test_to_offset_pd_timedelta(self):
# Tests for #9064
td = Timedelta(days=1, seconds=1)
@@ -370,35 +384,35 @@ def test_freq_code(self):
result = frequencies.get_freq(freqstr)
assert result == code
- result = frequencies.get_freq_group(freqstr)
+ result = resolution.get_freq_group(freqstr)
assert result == code // 1000 * 1000
- result = frequencies.get_freq_group(code)
+ result = resolution.get_freq_group(code)
assert result == code // 1000 * 1000
def test_freq_group(self):
- assert frequencies.get_freq_group('A') == 1000
- assert frequencies.get_freq_group('3A') == 1000
- assert frequencies.get_freq_group('-1A') == 1000
- assert frequencies.get_freq_group('A-JAN') == 1000
- assert frequencies.get_freq_group('A-MAY') == 1000
-
- assert frequencies.get_freq_group('Y') == 1000
- assert frequencies.get_freq_group('3Y') == 1000
- assert frequencies.get_freq_group('-1Y') == 1000
- assert frequencies.get_freq_group('Y-JAN') == 1000
- assert frequencies.get_freq_group('Y-MAY') == 1000
-
- assert frequencies.get_freq_group(offsets.YearEnd()) == 1000
- assert frequencies.get_freq_group(offsets.YearEnd(month=1)) == 1000
- assert frequencies.get_freq_group(offsets.YearEnd(month=5)) == 1000
-
- assert frequencies.get_freq_group('W') == 4000
- assert frequencies.get_freq_group('W-MON') == 4000
- assert frequencies.get_freq_group('W-FRI') == 4000
- assert frequencies.get_freq_group(offsets.Week()) == 4000
- assert frequencies.get_freq_group(offsets.Week(weekday=1)) == 4000
- assert frequencies.get_freq_group(offsets.Week(weekday=5)) == 4000
+ assert resolution.get_freq_group('A') == 1000
+ assert resolution.get_freq_group('3A') == 1000
+ assert resolution.get_freq_group('-1A') == 1000
+ assert resolution.get_freq_group('A-JAN') == 1000
+ assert resolution.get_freq_group('A-MAY') == 1000
+
+ assert resolution.get_freq_group('Y') == 1000
+ assert resolution.get_freq_group('3Y') == 1000
+ assert resolution.get_freq_group('-1Y') == 1000
+ assert resolution.get_freq_group('Y-JAN') == 1000
+ assert resolution.get_freq_group('Y-MAY') == 1000
+
+ assert resolution.get_freq_group(offsets.YearEnd()) == 1000
+ assert resolution.get_freq_group(offsets.YearEnd(month=1)) == 1000
+ assert resolution.get_freq_group(offsets.YearEnd(month=5)) == 1000
+
+ assert resolution.get_freq_group('W') == 4000
+ assert resolution.get_freq_group('W-MON') == 4000
+ assert resolution.get_freq_group('W-FRI') == 4000
+ assert resolution.get_freq_group(offsets.Week()) == 4000
+ assert resolution.get_freq_group(offsets.Week(weekday=1)) == 4000
+ assert resolution.get_freq_group(offsets.Week(weekday=5)) == 4000
def test_get_to_timestamp_base(self):
tsb = frequencies.get_to_timestamp_base
@@ -510,7 +524,7 @@ def test_get_freq_code(self):
(frequencies.get_freq('W-FRI'), -2))
def test_frequency_misc(self):
- assert (frequencies.get_freq_group('T') ==
+ assert (resolution.get_freq_group('T') ==
frequencies.FreqGroup.FR_MIN)
code, stride = frequencies.get_freq_code(offsets.Hour())
diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py
index ddcf1bb7d8b7bd..3dfad2d4af75ed 100644
--- a/pandas/tests/tseries/test_timezones.py
+++ b/pandas/tests/tseries/test_timezones.py
@@ -13,11 +13,11 @@
import pandas.util.testing as tm
import pandas.tseries.offsets as offsets
-from pandas.compat import lrange, zip
+from pandas.compat import lrange, zip, PY3
from pandas.core.indexes.datetimes import bdate_range, date_range
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas._libs import tslib
-from pandas._libs.tslibs import timezones
+from pandas._libs.tslibs import timezones, conversion
from pandas import (Index, Series, DataFrame, isna, Timestamp, NaT,
DatetimeIndex, to_datetime)
from pandas.util.testing import (assert_frame_equal, assert_series_equal,
@@ -1278,16 +1278,22 @@ def test_replace_tzinfo(self):
result_dt = dt.replace(tzinfo=tzinfo)
result_pd = Timestamp(dt).replace(tzinfo=tzinfo)
- if hasattr(result_dt, 'timestamp'): # New method in Py 3.3
- assert result_dt.timestamp() == result_pd.timestamp()
+ if PY3:
+ # datetime.timestamp() converts in the local timezone
+ with tm.set_timezone('UTC'):
+ assert result_dt.timestamp() == result_pd.timestamp()
+
assert result_dt == result_pd
assert result_dt == result_pd.to_pydatetime()
result_dt = dt.replace(tzinfo=tzinfo).replace(tzinfo=None)
result_pd = Timestamp(dt).replace(tzinfo=tzinfo).replace(tzinfo=None)
- if hasattr(result_dt, 'timestamp'): # New method in Py 3.3
- assert result_dt.timestamp() == result_pd.timestamp()
+ if PY3:
+ # datetime.timestamp() converts in the local timezone
+ with tm.set_timezone('UTC'):
+ assert result_dt.timestamp() == result_pd.timestamp()
+
assert result_dt == result_pd
assert result_dt == result_pd.to_pydatetime()
@@ -1732,14 +1738,14 @@ class TestTslib(object):
def test_tslib_tz_convert(self):
def compare_utc_to_local(tz_didx, utc_didx):
- f = lambda x: tslib.tz_convert_single(x, 'UTC', tz_didx.tz)
- result = tslib.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz)
+ f = lambda x: conversion.tz_convert_single(x, 'UTC', tz_didx.tz)
+ result = conversion.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz)
result_single = np.vectorize(f)(tz_didx.asi8)
tm.assert_numpy_array_equal(result, result_single)
def compare_local_to_utc(tz_didx, utc_didx):
- f = lambda x: tslib.tz_convert_single(x, tz_didx.tz, 'UTC')
- result = tslib.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC')
+ f = lambda x: conversion.tz_convert_single(x, tz_didx.tz, 'UTC')
+ result = conversion.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC')
result_single = np.vectorize(f)(utc_didx.asi8)
tm.assert_numpy_array_equal(result, result_single)
@@ -1764,14 +1770,14 @@ def compare_local_to_utc(tz_didx, utc_didx):
compare_local_to_utc(tz_didx, utc_didx)
# Check empty array
- result = tslib.tz_convert(np.array([], dtype=np.int64),
- timezones.maybe_get_tz('US/Eastern'),
- timezones.maybe_get_tz('Asia/Tokyo'))
+ result = conversion.tz_convert(np.array([], dtype=np.int64),
+ timezones.maybe_get_tz('US/Eastern'),
+ timezones.maybe_get_tz('Asia/Tokyo'))
tm.assert_numpy_array_equal(result, np.array([], dtype=np.int64))
# Check all-NaT array
- result = tslib.tz_convert(np.array([tslib.iNaT], dtype=np.int64),
- timezones.maybe_get_tz('US/Eastern'),
- timezones.maybe_get_tz('Asia/Tokyo'))
+ result = conversion.tz_convert(np.array([tslib.iNaT], dtype=np.int64),
+ timezones.maybe_get_tz('US/Eastern'),
+ timezones.maybe_get_tz('Asia/Tokyo'))
tm.assert_numpy_array_equal(result, np.array(
[tslib.iNaT], dtype=np.int64))
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index be25a439f9075b..fef88587a7282e 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
from datetime import timedelta
-from pandas.compat import long, zip
+from pandas.compat import zip
from pandas import compat
import re
import warnings
@@ -13,18 +13,21 @@
is_timedelta64_dtype,
is_datetime64_dtype)
-import pandas.core.algorithms as algos
-from pandas.core.algorithms import unique
from pandas.tseries.offsets import DateOffset
-from pandas.util._decorators import cache_readonly, deprecate_kwarg
+from pandas.util._decorators import deprecate_kwarg
import pandas.tseries.offsets as offsets
-from pandas._libs import lib, tslib
+from pandas._libs import tslib
from pandas._libs.tslib import Timedelta
from pandas._libs.tslibs.frequencies import ( # noqa
get_freq_code, _base_and_stride, _period_str_to_code,
_INVALID_FREQ_ERROR, opattern, _lite_rule_alias, _dont_uppercase,
_period_code_map, _reverse_period_code_map)
+from pandas._libs.tslibs.resolution import (Resolution,
+ _FrequencyInferer,
+ _TimedeltaFrequencyInferer)
+from pandas._libs.tslibs.parsing import _get_rule_month
+
from pytz import AmbiguousTimeError
@@ -52,184 +55,6 @@ class FreqGroup(object):
RESO_DAY = 6
-class Resolution(object):
-
- RESO_US = RESO_US
- RESO_MS = RESO_MS
- RESO_SEC = RESO_SEC
- RESO_MIN = RESO_MIN
- RESO_HR = RESO_HR
- RESO_DAY = RESO_DAY
-
- _reso_str_map = {
- RESO_NS: 'nanosecond',
- RESO_US: 'microsecond',
- RESO_MS: 'millisecond',
- RESO_SEC: 'second',
- RESO_MIN: 'minute',
- RESO_HR: 'hour',
- RESO_DAY: 'day'
- }
-
- # factor to multiply a value by to convert it to the next finer grained
- # resolution
- _reso_mult_map = {
- RESO_NS: None,
- RESO_US: 1000,
- RESO_MS: 1000,
- RESO_SEC: 1000,
- RESO_MIN: 60,
- RESO_HR: 60,
- RESO_DAY: 24
- }
-
- _reso_str_bump_map = {
- 'D': 'H',
- 'H': 'T',
- 'T': 'S',
- 'S': 'L',
- 'L': 'U',
- 'U': 'N',
- 'N': None
- }
-
- _str_reso_map = dict([(v, k) for k, v in compat.iteritems(_reso_str_map)])
-
- _reso_freq_map = {
- 'year': 'A',
- 'quarter': 'Q',
- 'month': 'M',
- 'day': 'D',
- 'hour': 'H',
- 'minute': 'T',
- 'second': 'S',
- 'millisecond': 'L',
- 'microsecond': 'U',
- 'nanosecond': 'N'}
-
- _freq_reso_map = dict([(v, k)
- for k, v in compat.iteritems(_reso_freq_map)])
-
- @classmethod
- def get_str(cls, reso):
- """
- Return resolution str against resolution code.
-
- Example
- -------
- >>> Resolution.get_str(Resolution.RESO_SEC)
- 'second'
- """
- return cls._reso_str_map.get(reso, 'day')
-
- @classmethod
- def get_reso(cls, resostr):
- """
- Return resolution str against resolution code.
-
- Example
- -------
- >>> Resolution.get_reso('second')
- 2
-
- >>> Resolution.get_reso('second') == Resolution.RESO_SEC
- True
- """
- return cls._str_reso_map.get(resostr, cls.RESO_DAY)
-
- @classmethod
- def get_freq_group(cls, resostr):
- """
- Return frequency str against resolution str.
-
- Example
- -------
- >>> f.Resolution.get_freq_group('day')
- 4000
- """
- return get_freq_group(cls.get_freq(resostr))
-
- @classmethod
- def get_freq(cls, resostr):
- """
- Return frequency str against resolution str.
-
- Example
- -------
- >>> f.Resolution.get_freq('day')
- 'D'
- """
- return cls._reso_freq_map[resostr]
-
- @classmethod
- def get_str_from_freq(cls, freq):
- """
- Return resolution str against frequency str.
-
- Example
- -------
- >>> Resolution.get_str_from_freq('H')
- 'hour'
- """
- return cls._freq_reso_map.get(freq, 'day')
-
- @classmethod
- def get_reso_from_freq(cls, freq):
- """
- Return resolution code against frequency str.
-
- Example
- -------
- >>> Resolution.get_reso_from_freq('H')
- 4
-
- >>> Resolution.get_reso_from_freq('H') == Resolution.RESO_HR
- True
- """
- return cls.get_reso(cls.get_str_from_freq(freq))
-
- @classmethod
- def get_stride_from_decimal(cls, value, freq):
- """
- Convert freq with decimal stride into a higher freq with integer stride
-
- Parameters
- ----------
- value : integer or float
- freq : string
- Frequency string
-
- Raises
- ------
- ValueError
- If the float cannot be converted to an integer at any resolution.
-
- Example
- -------
- >>> Resolution.get_stride_from_decimal(1.5, 'T')
- (90, 'S')
-
- >>> Resolution.get_stride_from_decimal(1.04, 'H')
- (3744, 'S')
-
- >>> Resolution.get_stride_from_decimal(1, 'D')
- (1, 'D')
- """
-
- if np.isclose(value % 1, 0):
- return int(value), freq
- else:
- start_reso = cls.get_reso_from_freq(freq)
- if start_reso == 0:
- raise ValueError(
- "Could not convert to integer offset at any resolution"
- )
-
- next_value = cls._reso_mult_map[start_reso] * value
- next_name = cls._reso_str_bump_map[freq]
- return cls.get_stride_from_decimal(next_value, next_name)
-
-
def get_to_timestamp_base(base):
"""
Return frequency code group used for base of to_timestamp against
@@ -258,31 +83,6 @@ def get_to_timestamp_base(base):
return base
-def get_freq_group(freq):
- """
- Return frequency code group of given frequency str or offset.
-
- Example
- -------
- >>> get_freq_group('W-MON')
- 4000
-
- >>> get_freq_group('W-FRI')
- 4000
- """
- if isinstance(freq, offsets.DateOffset):
- freq = freq.rule_code
-
- if isinstance(freq, compat.string_types):
- base, mult = get_freq_code(freq)
- freq = base
- elif isinstance(freq, int):
- pass
- else:
- raise ValueError('input must be str, offset or int')
- return (freq // 1000) * 1000
-
-
def get_freq(freq):
"""
Return frequency code of given frequency str.
@@ -562,278 +362,6 @@ def infer_freq(index, warn=True):
return inferer.get_freq()
-_ONE_MICRO = long(1000)
-_ONE_MILLI = _ONE_MICRO * 1000
-_ONE_SECOND = _ONE_MILLI * 1000
-_ONE_MINUTE = 60 * _ONE_SECOND
-_ONE_HOUR = 60 * _ONE_MINUTE
-_ONE_DAY = 24 * _ONE_HOUR
-
-
-class _FrequencyInferer(object):
- """
- Not sure if I can avoid the state machine here
- """
-
- def __init__(self, index, warn=True):
- self.index = index
- self.values = np.asarray(index).view('i8')
-
- # This moves the values, which are implicitly in UTC, to the
- # the timezone so they are in local time
- if hasattr(index, 'tz'):
- if index.tz is not None:
- self.values = tslib.tz_convert(self.values, 'UTC', index.tz)
-
- self.warn = warn
-
- if len(index) < 3:
- raise ValueError('Need at least 3 dates to infer frequency')
-
- self.is_monotonic = (self.index.is_monotonic_increasing or
- self.index.is_monotonic_decreasing)
-
- @cache_readonly
- def deltas(self):
- return tslib.unique_deltas(self.values)
-
- @cache_readonly
- def deltas_asi8(self):
- return tslib.unique_deltas(self.index.asi8)
-
- @cache_readonly
- def is_unique(self):
- return len(self.deltas) == 1
-
- @cache_readonly
- def is_unique_asi8(self):
- return len(self.deltas_asi8) == 1
-
- def get_freq(self):
- if not self.is_monotonic or not self.index.is_unique:
- return None
-
- delta = self.deltas[0]
- if _is_multiple(delta, _ONE_DAY):
- return self._infer_daily_rule()
- else:
- # Business hourly, maybe. 17: one day / 65: one weekend
- if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]):
- return 'BH'
- # Possibly intraday frequency. Here we use the
- # original .asi8 values as the modified values
- # will not work around DST transitions. See #8772
- elif not self.is_unique_asi8:
- return None
- delta = self.deltas_asi8[0]
- if _is_multiple(delta, _ONE_HOUR):
- # Hours
- return _maybe_add_count('H', delta / _ONE_HOUR)
- elif _is_multiple(delta, _ONE_MINUTE):
- # Minutes
- return _maybe_add_count('T', delta / _ONE_MINUTE)
- elif _is_multiple(delta, _ONE_SECOND):
- # Seconds
- return _maybe_add_count('S', delta / _ONE_SECOND)
- elif _is_multiple(delta, _ONE_MILLI):
- # Milliseconds
- return _maybe_add_count('L', delta / _ONE_MILLI)
- elif _is_multiple(delta, _ONE_MICRO):
- # Microseconds
- return _maybe_add_count('U', delta / _ONE_MICRO)
- else:
- # Nanoseconds
- return _maybe_add_count('N', delta)
-
- @cache_readonly
- def day_deltas(self):
- return [x / _ONE_DAY for x in self.deltas]
-
- @cache_readonly
- def hour_deltas(self):
- return [x / _ONE_HOUR for x in self.deltas]
-
- @cache_readonly
- def fields(self):
- return tslib.build_field_sarray(self.values)
-
- @cache_readonly
- def rep_stamp(self):
- return lib.Timestamp(self.values[0])
-
- def month_position_check(self):
- # TODO: cythonize this, very slow
- calendar_end = True
- business_end = True
- calendar_start = True
- business_start = True
-
- years = self.fields['Y']
- months = self.fields['M']
- days = self.fields['D']
- weekdays = self.index.dayofweek
-
- from calendar import monthrange
- for y, m, d, wd in zip(years, months, days, weekdays):
-
- if calendar_start:
- calendar_start &= d == 1
- if business_start:
- business_start &= d == 1 or (d <= 3 and wd == 0)
-
- if calendar_end or business_end:
- _, daysinmonth = monthrange(y, m)
- cal = d == daysinmonth
- if calendar_end:
- calendar_end &= cal
- if business_end:
- business_end &= cal or (daysinmonth - d < 3 and wd == 4)
- elif not calendar_start and not business_start:
- break
-
- if calendar_end:
- return 'ce'
- elif business_end:
- return 'be'
- elif calendar_start:
- return 'cs'
- elif business_start:
- return 'bs'
- else:
- return None
-
- @cache_readonly
- def mdiffs(self):
- nmonths = self.fields['Y'] * 12 + self.fields['M']
- return tslib.unique_deltas(nmonths.astype('i8'))
-
- @cache_readonly
- def ydiffs(self):
- return tslib.unique_deltas(self.fields['Y'].astype('i8'))
-
- def _infer_daily_rule(self):
- annual_rule = self._get_annual_rule()
- if annual_rule:
- nyears = self.ydiffs[0]
- month = _month_aliases[self.rep_stamp.month]
- alias = '{prefix}-{month}'.format(prefix=annual_rule, month=month)
- return _maybe_add_count(alias, nyears)
-
- quarterly_rule = self._get_quarterly_rule()
- if quarterly_rule:
- nquarters = self.mdiffs[0] / 3
- mod_dict = {0: 12, 2: 11, 1: 10}
- month = _month_aliases[mod_dict[self.rep_stamp.month % 3]]
- alias = '{prefix}-{month}'.format(prefix=quarterly_rule,
- month=month)
- return _maybe_add_count(alias, nquarters)
-
- monthly_rule = self._get_monthly_rule()
- if monthly_rule:
- return _maybe_add_count(monthly_rule, self.mdiffs[0])
-
- if self.is_unique:
- days = self.deltas[0] / _ONE_DAY
- if days % 7 == 0:
- # Weekly
- day = _weekday_rule_aliases[self.rep_stamp.weekday()]
- return _maybe_add_count('W-{day}'.format(day=day), days / 7)
- else:
- return _maybe_add_count('D', days)
-
- if self._is_business_daily():
- return 'B'
-
- wom_rule = self._get_wom_rule()
- if wom_rule:
- return wom_rule
-
- def _get_annual_rule(self):
- if len(self.ydiffs) > 1:
- return None
-
- if len(algos.unique(self.fields['M'])) > 1:
- return None
-
- pos_check = self.month_position_check()
- return {'cs': 'AS', 'bs': 'BAS',
- 'ce': 'A', 'be': 'BA'}.get(pos_check)
-
- def _get_quarterly_rule(self):
- if len(self.mdiffs) > 1:
- return None
-
- if not self.mdiffs[0] % 3 == 0:
- return None
-
- pos_check = self.month_position_check()
- return {'cs': 'QS', 'bs': 'BQS',
- 'ce': 'Q', 'be': 'BQ'}.get(pos_check)
-
- def _get_monthly_rule(self):
- if len(self.mdiffs) > 1:
- return None
- pos_check = self.month_position_check()
- return {'cs': 'MS', 'bs': 'BMS',
- 'ce': 'M', 'be': 'BM'}.get(pos_check)
-
- def _is_business_daily(self):
- # quick check: cannot be business daily
- if self.day_deltas != [1, 3]:
- return False
-
- # probably business daily, but need to confirm
- first_weekday = self.index[0].weekday()
- shifts = np.diff(self.index.asi8)
- shifts = np.floor_divide(shifts, _ONE_DAY)
- weekdays = np.mod(first_weekday + np.cumsum(shifts), 7)
- return np.all(((weekdays == 0) & (shifts == 3)) |
- ((weekdays > 0) & (weekdays <= 4) & (shifts == 1)))
-
- def _get_wom_rule(self):
- # wdiffs = unique(np.diff(self.index.week))
- # We also need -47, -49, -48 to catch index spanning year boundary
- # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all():
- # return None
-
- weekdays = unique(self.index.weekday)
- if len(weekdays) > 1:
- return None
-
- week_of_months = unique((self.index.day - 1) // 7)
- # Only attempt to infer up to WOM-4. See #9425
- week_of_months = week_of_months[week_of_months < 4]
- if len(week_of_months) == 0 or len(week_of_months) > 1:
- return None
-
- # get which week
- week = week_of_months[0] + 1
- wd = _weekday_rule_aliases[weekdays[0]]
-
- return 'WOM-{week}{weekday}'.format(week=week, weekday=wd)
-
-
-class _TimedeltaFrequencyInferer(_FrequencyInferer):
-
- def _infer_daily_rule(self):
- if self.is_unique:
- days = self.deltas[0] / _ONE_DAY
- if days % 7 == 0:
- # Weekly
- wd = _weekday_rule_aliases[self.rep_stamp.weekday()]
- alias = 'W-{weekday}'.format(weekday=wd)
- return _maybe_add_count(alias, days / 7)
- else:
- return _maybe_add_count('D', days)
-
-
-def _maybe_add_count(base, count):
- if count != 1:
- return '{count}{base}'.format(count=int(count), base=base)
- else:
- return base
-
-
def _maybe_coerce_freq(code):
""" we might need to coerce a code to a rule_code
and uppercase it
@@ -963,9 +491,6 @@ def is_superperiod(source, target):
return target in ['N']
-_get_rule_month = tslib._get_rule_month
-
-
def _is_annual(rule):
rule = rule.upper()
return rule == 'A' or rule.startswith('A-')
@@ -992,13 +517,5 @@ def _is_weekly(rule):
return rule == 'W' or rule.startswith('W-')
-DAYS = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
-
MONTHS = tslib._MONTHS
_month_numbers = tslib._MONTH_NUMBERS
-_month_aliases = tslib._MONTH_ALIASES
-_weekday_rule_aliases = dict((k, v) for k, v in enumerate(DAYS))
-
-
-def _is_multiple(us, mult):
- return us % mult == 0
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 5843aaa23be574..4dc26f4dd69e29 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -22,6 +22,7 @@
_int_to_weekday, _weekday_to_int,
_determine_offset,
apply_index_wraps,
+ shift_month,
BeginMixin, EndMixin,
BaseOffset)
@@ -252,6 +253,8 @@ def apply_index(self, i):
"applied vectorized".format(kwd=kwd))
def isAnchored(self):
+ # TODO: Does this make sense for the general case? It would help
+ # if there were a canonical docstring for what isAnchored means.
return (self.n == 1)
def _params(self):
@@ -280,10 +283,10 @@ def _repr_attrs(self):
if not hasattr(self, key):
kwds_new[key] = self.kwds[key]
if len(kwds_new) > 0:
- attrs.append('kwds=%s' % (kwds_new))
+ attrs.append('kwds={kwds_new}'.format(kwds_new=kwds_new))
elif attr not in exclude:
value = getattr(self, attr)
- attrs.append('%s=%s' % (attr, value))
+ attrs.append('{attr}={value}'.format(attr=attr, value=value))
out = ''
if attrs:
@@ -721,6 +724,7 @@ def apply(self, other):
return result
else:
+ # TODO: Figure out the end of this sente
raise ApplyTypeError(
'Only know how to combine business hour with ')
@@ -927,10 +931,10 @@ def apply(self, other):
n = self.n
_, days_in_month = tslib.monthrange(other.year, other.month)
if other.day != days_in_month:
- other = other + relativedelta(months=-1, day=31)
+ other = shift_month(other, -1, 'end')
if n <= 0:
n = n + 1
- other = other + relativedelta(months=n, day=31)
+ other = shift_month(other, n, 'end')
return other
@apply_index_wraps
@@ -956,7 +960,7 @@ def apply(self, other):
if other.day > 1 and n <= 0: # then roll forward if n<=0
n += 1
- return other + relativedelta(months=n, day=1)
+ return shift_month(other, n, 'start')
@apply_index_wraps
def apply_index(self, i):
@@ -1002,12 +1006,12 @@ def apply(self, other):
if not self.onOffset(other):
_, days_in_month = tslib.monthrange(other.year, other.month)
if 1 < other.day < self.day_of_month:
- other += relativedelta(day=self.day_of_month)
+ other = other.replace(day=self.day_of_month)
if n > 0:
# rollforward so subtract 1
n -= 1
elif self.day_of_month < other.day < days_in_month:
- other += relativedelta(day=self.day_of_month)
+ other = other.replace(day=self.day_of_month)
if n < 0:
# rollforward in the negative direction so add 1
n += 1
@@ -1084,11 +1088,11 @@ def onOffset(self, dt):
def _apply(self, n, other):
# if other.day is not day_of_month move to day_of_month and update n
if other.day < self.day_of_month:
- other += relativedelta(day=self.day_of_month)
+ other = other.replace(day=self.day_of_month)
if n > 0:
n -= 1
elif other.day > self.day_of_month:
- other += relativedelta(day=self.day_of_month)
+ other = other.replace(day=self.day_of_month)
if n == 0:
n = 1
else:
@@ -1096,7 +1100,7 @@ def _apply(self, n, other):
months = n // 2
day = 31 if n % 2 else self.day_of_month
- return other + relativedelta(months=months, day=day)
+ return shift_month(other, months, day)
def _get_roll(self, i, before_day_of_month, after_day_of_month):
n = self.n
@@ -1141,13 +1145,13 @@ def onOffset(self, dt):
def _apply(self, n, other):
# if other.day is not day_of_month move to day_of_month and update n
if other.day < self.day_of_month:
- other += relativedelta(day=self.day_of_month)
+ other = other.replace(day=self.day_of_month)
if n == 0:
n = -1
else:
n -= 1
elif other.day > self.day_of_month:
- other += relativedelta(day=self.day_of_month)
+ other = other.replace(day=self.day_of_month)
if n == 0:
n = 1
elif n < 0:
@@ -1155,7 +1159,7 @@ def _apply(self, n, other):
months = n // 2 + n % 2
day = 1 if n % 2 else self.day_of_month
- return other + relativedelta(months=months, day=day)
+ return shift_month(other, months, day)
def _get_roll(self, i, before_day_of_month, after_day_of_month):
n = self.n
@@ -1191,7 +1195,7 @@ def apply(self, other):
n = n - 1
elif n <= 0 and other.day > lastBDay:
n = n + 1
- other = other + relativedelta(months=n, day=31)
+ other = shift_month(other, n, 'end')
if other.weekday() > 4:
other = other - BDay()
@@ -1215,7 +1219,7 @@ def apply(self, other):
other = other + timedelta(days=first - other.day)
n -= 1
- other = other + relativedelta(months=n)
+ other = shift_month(other, n, None)
wkday, _ = tslib.monthrange(other.year, other.month)
first = _get_firstbday(wkday)
result = datetime(other.year, other.month, first,
@@ -1520,8 +1524,7 @@ def apply(self, other):
else:
months = self.n + 1
- other = self.getOffsetOfMonth(
- other + relativedelta(months=months, day=1))
+ other = self.getOffsetOfMonth(shift_month(other, months, 'start'))
other = datetime(other.year, other.month, other.day, base.hour,
base.minute, base.second, base.microsecond)
return other
@@ -1612,8 +1615,7 @@ def apply(self, other):
else:
months = self.n + 1
- return self.getOffsetOfMonth(
- other + relativedelta(months=months, day=1))
+ return self.getOffsetOfMonth(shift_month(other, months, 'start'))
def getOffsetOfMonth(self, dt):
m = MonthEnd()
@@ -1716,7 +1718,7 @@ def apply(self, other):
elif n <= 0 and other.day > lastBDay and monthsToGo == 0:
n = n + 1
- other = other + relativedelta(months=monthsToGo + 3 * n, day=31)
+ other = shift_month(other, monthsToGo + 3 * n, 'end')
other = tslib._localize_pydatetime(other, base.tzinfo)
if other.weekday() > 4:
other = other - BDay()
@@ -1761,7 +1763,7 @@ def apply(self, other):
n = n - 1
# get the first bday for result
- other = other + relativedelta(months=3 * n - monthsSince)
+ other = shift_month(other, 3 * n - monthsSince, None)
wkday, _ = tslib.monthrange(other.year, other.month)
first = _get_firstbday(wkday)
result = datetime(other.year, other.month, first,
@@ -1795,7 +1797,7 @@ def apply(self, other):
if n > 0 and not (other.day >= days_in_month and monthsToGo == 0):
n = n - 1
- other = other + relativedelta(months=monthsToGo + 3 * n, day=31)
+ other = shift_month(other, monthsToGo + 3 * n, 'end')
return other
@apply_index_wraps
@@ -1830,7 +1832,7 @@ def apply(self, other):
# after start, so come back an extra period as if rolled forward
n = n + 1
- other = other + relativedelta(months=3 * n - monthsSince, day=1)
+ other = shift_month(other, 3 * n - monthsSince, 'start')
return other
@apply_index_wraps
@@ -1889,7 +1891,7 @@ def apply(self, other):
(other.month == self.month and other.day > lastBDay)):
years += 1
- other = other + relativedelta(years=years)
+ other = shift_month(other, 12 * years, None)
_, days_in_month = tslib.monthrange(other.year, self.month)
result = datetime(other.year, self.month, days_in_month,
@@ -1927,7 +1929,7 @@ def apply(self, other):
years += 1
# set first bday for result
- other = other + relativedelta(years=years)
+ other = shift_month(other, years * 12, None)
wkday, days_in_month = tslib.monthrange(other.year, self.month)
first = _get_firstbday(wkday)
return datetime(other.year, self.month, first, other.hour,
@@ -2145,8 +2147,8 @@ def onOffset(self, dt):
if self.variation == "nearest":
# We have to check the year end of "this" cal year AND the previous
- return year_end == dt or \
- self.get_year_end(dt - relativedelta(months=1)) == dt
+ return (year_end == dt or
+ self.get_year_end(shift_month(dt, -1, None)) == dt)
else:
return year_end == dt
@@ -2226,8 +2228,8 @@ def get_year_end(self, dt):
def get_target_month_end(self, dt):
target_month = datetime(
dt.year, self.startingMonth, 1, tzinfo=dt.tzinfo)
- next_month_first_of = target_month + relativedelta(months=+1)
- return next_month_first_of + relativedelta(days=-1)
+ next_month_first_of = shift_month(target_month, 1, None)
+ return next_month_first_of + timedelta(days=-1)
def _get_year_end_nearest(self, dt):
target_date = self.get_target_month_end(dt)
@@ -2382,7 +2384,7 @@ def apply(self, other):
qtr_lens = self.get_weeks(other + self._offset)
for weeks in qtr_lens:
- start += relativedelta(weeks=weeks)
+ start += timedelta(weeks=weeks)
if start > other:
other = start
n -= 1
@@ -2399,7 +2401,7 @@ def apply(self, other):
qtr_lens = self.get_weeks(other)
for weeks in reversed(qtr_lens):
- end -= relativedelta(weeks=weeks)
+ end -= timedelta(weeks=weeks)
if end < other:
other = end
n -= 1
@@ -2442,7 +2444,7 @@ def onOffset(self, dt):
current = next_year_end
for qtr_len in qtr_lens[0:4]:
- current += relativedelta(weeks=qtr_len)
+ current += timedelta(weeks=qtr_len)
if dt == current:
return True
return False
diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py
index 7c9250e52d4825..6be6152b09fc8c 100644
--- a/pandas/util/_decorators.py
+++ b/pandas/util/_decorators.py
@@ -3,7 +3,7 @@
import inspect
import types
import warnings
-from textwrap import dedent
+from textwrap import dedent, wrap
from functools import wraps, update_wrapper
@@ -29,11 +29,16 @@ def deprecate(name, alternative, alt_name=None, klass=None,
alt_name = alt_name or alternative.__name__
klass = klass or FutureWarning
- msg = msg or "{} is deprecated. Use {} instead".format(name, alt_name)
+ msg = msg or "{} is deprecated, use {} instead".format(name, alt_name)
+ @wraps(alternative)
def wrapper(*args, **kwargs):
warnings.warn(msg, klass, stacklevel=stacklevel)
return alternative(*args, **kwargs)
+
+ if getattr(wrapper, '__doc__', None) is not None:
+ wrapper.__doc__ = ('\n'.join(wrap(msg, 70)) + '\n'
+ + dedent(wrapper.__doc__))
return wrapper
diff --git a/setup.py b/setup.py
index dd24c5c14ee69e..c3e0c037625da9 100755
--- a/setup.py
+++ b/setup.py
@@ -350,6 +350,7 @@ class CheckSDist(sdist_class):
'pandas/_libs/tslibs/fields.pyx',
'pandas/_libs/tslibs/offsets.pyx',
'pandas/_libs/tslibs/frequencies.pyx',
+ 'pandas/_libs/tslibs/resolution.pyx',
'pandas/_libs/tslibs/parsing.pyx',
'pandas/io/sas/sas.pyx']
@@ -580,6 +581,13 @@ def pxd(name):
'pyxfile': '_libs/tslibs/parsing',
'pxdfiles': ['_libs/src/util',
'_libs/src/khash']},
+ '_libs.tslibs.resolution': {
+ 'pyxfile': '_libs/tslibs/resolution',
+ 'pxdfiles': ['_libs/src/util',
+ '_libs/src/khash',
+ '_libs/tslibs/frequencies',
+ '_libs/tslibs/timezones'],
+ 'depends': tseries_depends},
'_libs.tslibs.strptime': {
'pyxfile': '_libs/tslibs/strptime',
'pxdfiles': ['_libs/src/util',
@@ -761,6 +769,7 @@ def pxd(name):
'pandas.tests.series',
'pandas.tests.scalar',
'pandas.tests.tseries',
+ 'pandas.tests.tseries.offsets',
'pandas.tests.plotting',
'pandas.tests.tools',
'pandas.tests.util',
@@ -796,7 +805,7 @@ def pxd(name):
'pandas.tests.io.formats': ['data/*.csv'],
'pandas.tests.io.msgpack': ['data/*.mp'],
'pandas.tests.reshape': ['data/*.csv'],
- 'pandas.tests.tseries': ['data/*.pickle'],
+ 'pandas.tests.tseries.offsets': ['data/*.pickle'],
'pandas.io.formats': ['templates/*.tpl']
},
ext_modules=extensions,