diff --git a/.travis.yml b/.travis.yml index fe1a2950dbf081..0f43e4cf54faa1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -52,10 +52,6 @@ matrix: - dist: trusty env: - JOB="3.5" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true - addons: - apt: - packages: - - xsel - dist: trusty env: - JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true @@ -66,7 +62,11 @@ matrix: # In allow_failures - dist: trusty env: - - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true + - JOB="3.6_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true + addons: + apt: + packages: + - xsel # In allow_failures - dist: trusty env: @@ -75,17 +75,17 @@ matrix: - dist: trusty env: - JOB="3.6_DOC" DOC=true - addons: - apt: - packages: - - xsel allow_failures: - dist: trusty env: - JOB="2.7_SLOW" SLOW=true - dist: trusty env: - - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true + - JOB="3.6_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true + addons: + apt: + packages: + - xsel - dist: trusty env: - JOB="3.6_NUMPY_DEV" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" @@ -102,8 +102,6 @@ before_install: - uname -a - git --version - git tag - - ci/before_install_travis.sh - - export DISPLAY=":99.0" install: - echo "install start" @@ -114,6 +112,8 @@ install: before_script: - ci/install_db_travis.sh + - export DISPLAY=":99.0" + - ci/before_script_travis.sh script: - echo "script start" diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index c112d1ef72eb80..0f8c8458628b15 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -40,3 +40,46 @@ def setup(self): def test_add_td_ts(self): self.td + self.ts + + +class TimedeltaProperties(object): + goal_time = 0.2 + + def setup(self): + self.td = Timedelta(days=365, minutes=35, seconds=25, milliseconds=35) + + def time_timedelta_days(self): + self.td.days + + def time_timedelta_seconds(self): + self.td.seconds + + def time_timedelta_microseconds(self): + self.td.microseconds + + def time_timedelta_nanoseconds(self): + self.td.nanoseconds + + +class DatetimeAccessor(object): + goal_time = 0.2 + + def setup(self): + self.N = 100000 + self.series = pd.Series( + pd.timedelta_range('1 days', periods=self.N, freq='h') + ) + def time_dt_accessor(self): + self.series.dt + + def time_timedelta_dt_accessor_days(self): + self.series.dt.days + + def time_timedelta_dt_accessor_seconds(self): + self.series.dt.seconds + + def time_timedelta_dt_accessor_microseconds(self): + self.series.dt.microseconds + + def time_timedelta_dt_accessor_nanoseconds(self): + self.series.dt.nanoseconds diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 779fc0bd20964a..9614a63332609a 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -346,17 +346,22 @@ class ToDatetime(object): def setup(self): self.rng = date_range(start='1/1/2000', periods=10000, freq='D') - self.stringsD = Series((((self.rng.year * 10000) + (self.rng.month * 100)) + self.rng.day), dtype=np.int64).apply(str) + self.stringsD = Series(self.rng.strftime('%Y%m%d')) self.rng = date_range(start='1/1/2000', periods=20000, freq='H') - self.strings = [x.strftime('%Y-%m-%d %H:%M:%S') for x in self.rng] - self.strings_nosep = [x.strftime('%Y%m%d %H:%M:%S') for x in self.rng] + self.strings = self.rng.strftime('%Y-%m-%d %H:%M:%S').tolist() + self.strings_nosep = self.rng.strftime('%Y%m%d %H:%M:%S').tolist() self.strings_tz_space = [x.strftime('%Y-%m-%d %H:%M:%S') + ' -0800' for x in self.rng] self.s = Series((['19MAY11', '19MAY11:00:00:00'] * 100000)) self.s2 = self.s.str.replace(':\\S+$', '') + self.unique_numeric_seconds = range(10000) + self.dup_numeric_seconds = [1000] * 10000 + self.dup_string_dates = ['2000-02-11'] * 10000 + self.dup_string_with_tz = ['2000-02-11 15:00:00-0800'] * 10000 + def time_format_YYYYMMDD(self): to_datetime(self.stringsD, format='%Y%m%d') @@ -381,6 +386,36 @@ def time_format_exact(self): def time_format_no_exact(self): to_datetime(self.s, format='%d%b%y', exact=False) + def time_cache_true_with_unique_seconds_and_unit(self): + to_datetime(self.unique_numeric_seconds, unit='s', cache=True) + + def time_cache_false_with_unique_seconds_and_unit(self): + to_datetime(self.unique_numeric_seconds, unit='s', cache=False) + + def time_cache_true_with_dup_seconds_and_unit(self): + to_datetime(self.dup_numeric_seconds, unit='s', cache=True) + + def time_cache_false_with_dup_seconds_and_unit(self): + to_datetime(self.dup_numeric_seconds, unit='s', cache=False) + + def time_cache_true_with_dup_string_dates(self): + to_datetime(self.dup_string_dates, cache=True) + + def time_cache_false_with_dup_string_dates(self): + to_datetime(self.dup_string_dates, cache=False) + + def time_cache_true_with_dup_string_dates_and_format(self): + to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=True) + + def time_cache_false_with_dup_string_dates_and_format(self): + to_datetime(self.dup_string_dates, format='%Y-%m-%d', cache=False) + + def time_cache_true_with_dup_string_tzoffset_dates(self): + to_datetime(self.dup_string_with_tz, cache=True) + + def time_cache_false_with_dup_string_tzoffset_dates(self): + to_datetime(self.dup_string_with_tz, cache=False) + class Offsets(object): goal_time = 0.2 diff --git a/ci/before_install_travis.sh b/ci/before_script_travis.sh similarity index 93% rename from ci/before_install_travis.sh rename to ci/before_script_travis.sh index 2d0b4da6120dc3..0b3939b1906a24 100755 --- a/ci/before_install_travis.sh +++ b/ci/before_script_travis.sh @@ -4,6 +4,7 @@ echo "inside $0" if [ "${TRAVIS_OS_NAME}" == "linux" ]; then sh -e /etc/init.d/xvfb start + sleep 3 fi # Never fail because bad things happened here. diff --git a/ci/requirements-2.7_BUILD_TEST.build b/ci/requirements-2.7_BUILD_TEST.build deleted file mode 100644 index aadec00cb7ebf2..00000000000000 --- a/ci/requirements-2.7_BUILD_TEST.build +++ /dev/null @@ -1,6 +0,0 @@ -python=2.7* -dateutil -pytz -nomkl -numpy -cython diff --git a/ci/requirements-3.6_BUILD_TEST.build b/ci/requirements-3.6_BUILD_TEST.build new file mode 100644 index 00000000000000..1c4b46aea3865d --- /dev/null +++ b/ci/requirements-3.6_BUILD_TEST.build @@ -0,0 +1,6 @@ +python=3.6* +python-dateutil +pytz +nomkl +numpy +cython diff --git a/ci/requirements-2.7_BUILD_TEST.pip b/ci/requirements-3.6_BUILD_TEST.pip similarity index 100% rename from ci/requirements-2.7_BUILD_TEST.pip rename to ci/requirements-3.6_BUILD_TEST.pip diff --git a/ci/requirements-2.7_BUILD_TEST.sh b/ci/requirements-3.6_BUILD_TEST.sh old mode 100755 new mode 100644 similarity index 75% rename from ci/requirements-2.7_BUILD_TEST.sh rename to ci/requirements-3.6_BUILD_TEST.sh index 78941fd0944e57..84dd27c50d587d --- a/ci/requirements-2.7_BUILD_TEST.sh +++ b/ci/requirements-3.6_BUILD_TEST.sh @@ -2,6 +2,6 @@ source activate pandas -echo "install 27 BUILD_TEST" +echo "install 36 BUILD_TEST" conda install -n pandas -c conda-forge pyarrow dask diff --git a/ci/requirements-3.6_NUMPY_DEV.build.sh b/ci/requirements-3.6_NUMPY_DEV.build.sh index bc92d8fca6b17f..fd79142c5cebbe 100644 --- a/ci/requirements-3.6_NUMPY_DEV.build.sh +++ b/ci/requirements-3.6_NUMPY_DEV.build.sh @@ -12,10 +12,7 @@ PRE_WHEELS="https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS numpy scipy # install dateutil from master - -# TODO(jreback), temp disable dateutil master has changed -# pip install -U git+git://github.com/dateutil/dateutil.git -pip install python-dateutil +pip install -U git+git://github.com/dateutil/dateutil.git # cython via pip pip install cython diff --git a/ci/script_multi.sh b/ci/script_multi.sh index ee9fbcaad5ef5f..863613e14af98e 100755 --- a/ci/script_multi.sh +++ b/ci/script_multi.sh @@ -27,6 +27,11 @@ if [ "$BUILD_TEST" ]; then echo "[running]" cd /tmp unset PYTHONPATH + + echo "[build-test: single]" + python -c 'import pandas; pandas.test(["--skip-slow", "--skip-network", "-r xX", "-m single"])' + + echo "[build-test: not single]" python -c 'import pandas; pandas.test(["-n 2", "--skip-slow", "--skip-network", "-r xX", "-m not single"])' elif [ "$DOC" ]; then diff --git a/doc/source/api.rst b/doc/source/api.rst index b5cf593ac0d1f9..ce88aed91823c5 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1870,8 +1870,52 @@ Methods Timedelta.to_timedelta64 Timedelta.total_seconds +.. _api.frequencies: + +Frequencies +----------- + +.. currentmodule:: pandas.tseries.frequencies + + +.. autosummary:: + :toctree: generated/ + + to_offset + +.. _api.offsets: + +Offsets +------- + +.. currentmodule:: pandas.tseries.offsets + +.. autosummary:: + :toctree: generated/ + + DateOffset + Week + Day + Hour + Minute + Second + Milli + Micro + Nano + +.. autosummary:: + :toctree: generated/ + + MonthBegin + MonthEnd + QuarterBegin + QuarterEnd + YearBegin + YearEnd + Window ------ + .. currentmodule:: pandas.core.window Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc. diff --git a/doc/source/io.rst b/doc/source/io.rst index 36f216601b4911..c94d5bc75d4fcc 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4427,8 +4427,10 @@ Several caveats. - This is a newer library, and the format, though stable, is not guaranteed to be backward compatible to the earlier versions. -- The format will NOT write an ``Index``, or ``MultiIndex`` for the ``DataFrame`` and will raise an - error if a non-default one is provided. You can simply ``.reset_index()`` in order to store the index. +- The format will NOT write an ``Index``, or ``MultiIndex`` for the + ``DataFrame`` and will raise an error if a non-default one is provided. You + can ``.reset_index()`` to store the index or ``.reset_index(drop=True)`` to + ignore it. - Duplicate column names and non-string columns names are not supported - Non supported types include ``Period`` and actual python object types. These will raise a helpful error message on an attempt at serialization. @@ -4491,8 +4493,10 @@ dtypes, including extension dtypes such as datetime with tz. Several caveats. -- The format will NOT write an ``Index``, or ``MultiIndex`` for the ``DataFrame`` and will raise an - error if a non-default one is provided. You can simply ``.reset_index(drop=True)`` in order to store the index. +- The format will NOT write an ``Index``, or ``MultiIndex`` for the + ``DataFrame`` and will raise an error if a non-default one is provided. You + can ``.reset_index()`` to store the index or ``.reset_index(drop=True)`` to + ignore it. - Duplicate column names and non-string columns names are not supported - Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype. - Non supported types include ``Period`` and actual python object types. These will raise a helpful error message @@ -4538,7 +4542,7 @@ Read from a parquet file. result.dtypes -Read only certain columns of a parquet file. +Read only certain columns of a parquet file. .. ipython:: python diff --git a/doc/source/release.rst b/doc/source/release.rst index 6c3e7f847b485a..a3289b11448631 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -52,7 +52,7 @@ Highlights include: - Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` function and :meth:`DataFrame.to_parquet` method, see :ref:`here `. - New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying categoricals independent of the data, see :ref:`here `. -- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, see :ref:`here `. +- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here `. - Compatibility fixes for pypy, see :ref:`here `. - Additions to the ``drop``, ``reindex`` and ``rename`` API to make them more consistent, see :ref:`here `. - Addition of the new methods ``DataFrame.infer_objects`` (see :ref:`here `) and ``GroupBy.pipe`` (see :ref:`here `). diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst index 3385bafc264677..64cbe0b050a619 100644 --- a/doc/source/whatsnew.rst +++ b/doc/source/whatsnew.rst @@ -18,6 +18,10 @@ What's New These are new features and improvements of note in each release. +.. include:: whatsnew/v0.22.0.txt + +.. include:: whatsnew/v0.21.1.txt + .. include:: whatsnew/v0.21.0.txt .. include:: whatsnew/v0.20.3.txt diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4c460eeb85b82a..89e2d3006696c5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -12,7 +12,7 @@ Highlights include: - Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` function and :meth:`DataFrame.to_parquet` method, see :ref:`here `. - New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying categoricals independent of the data, see :ref:`here `. -- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, see :ref:`here `. +- The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames is now consistent and no longer depends on whether `bottleneck `__ is installed, and ``sum`` and ``prod`` on empty Series now return NaN instead of 0, see :ref:`here `. - Compatibility fixes for pypy, see :ref:`here `. - Additions to the ``drop``, ``reindex`` and ``rename`` API to make them more consistent, see :ref:`here `. - Addition of the new methods ``DataFrame.infer_objects`` (see :ref:`here `) and ``GroupBy.pipe`` (see :ref:`here `). @@ -369,11 +369,11 @@ Additionally, support has been dropped for Python 3.4 (:issue:`15251`). .. _whatsnew_0210.api_breaking.bottleneck: -Sum/Prod of all-NaN Series/DataFrames is now consistently NaN -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Sum/Prod of all-NaN or empty Series/DataFrames is now consistently NaN +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The behavior of ``sum`` and ``prod`` on all-NaN Series/DataFrames no longer depends on -whether `bottleneck `__ is installed. (:issue:`9422`, :issue:`15507`). +whether `bottleneck `__ is installed, and return value of ``sum`` and ``prod`` on an empty Series has changed (:issue:`9422`, :issue:`15507`). Calling ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of a ``DataFrame``, will result in ``NaN``. See the :ref:`docs `. @@ -381,35 +381,35 @@ Calling ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of s = Series([np.nan]) -Previously NO ``bottleneck`` +Previously WITHOUT ``bottleneck`` installed: .. code-block:: ipython In [2]: s.sum() Out[2]: np.nan -Previously WITH ``bottleneck`` +Previously WITH ``bottleneck``: .. code-block:: ipython In [2]: s.sum() Out[2]: 0.0 -New Behavior, without regard to the bottleneck installation. +New Behavior, without regard to the bottleneck installation: .. ipython:: python s.sum() -Note that this also changes the sum of an empty ``Series`` - -Previously regardless of ``bottlenck`` +Note that this also changes the sum of an empty ``Series``. Previously this always returned 0 regardless of a ``bottlenck`` installation: .. code-block:: ipython In [1]: pd.Series([]).sum() Out[1]: 0 +but for consistency with the all-NaN case, this was changed to return NaN as well: + .. ipython:: python pd.Series([]).sum() @@ -877,6 +877,28 @@ New Behavior: pd.interval_range(start=0, end=4) +.. _whatsnew_0210.api.mpl_converters: + +No Automatic Matplotlib Converters +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Pandas no longer registers our ``date``, ``time``, ``datetime``, +``datetime64``, and ``Period`` converters with matplotlib when pandas is +imported. Matplotlib plot methods (``plt.plot``, ``ax.plot``, ...), will not +nicely format the x-axis for ``DatetimeIndex`` or ``PeriodIndex`` values. You +must explicitly register these methods: + +.. ipython:: python + + from pandas.tseries import converter + converter.register() + + fig, ax = plt.subplots() + plt.plot(pd.date_range('2017', periods=6), range(6)) + +Pandas built-in ``Series.plot`` and ``DataFrame.plot`` *will* register these +converters on first-use (:issue:17710). + .. _whatsnew_0210.api: Other API Changes @@ -900,8 +922,6 @@ Other API Changes - Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`) - Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`) - Restricted DateOffset keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`). -- Pandas no longer registers matplotlib converters on import. The converters - will be registered and used when the first plot is draw (:issue:`17710`) .. _whatsnew_0210.deprecations: diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index 0f6135ca2f045c..62d83069940535 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -61,7 +61,7 @@ Bug Fixes - Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`) - Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`) - Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`) -- Bug in ``pd.Categorical.unique()`` returning read-only array when all categories were ``NaN`` (:issue:`18051`) +- Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`) Conversion ^^^^^^^^^^ @@ -73,7 +73,8 @@ Conversion Indexing ^^^^^^^^ -- +- Bug in a boolean comparison of a ``datetime.datetime`` and a ``datetime64[ns]`` dtype Series (:issue:`17965`) +- Bug where a ``MultiIndex`` with more than a million records was not raising ``AttributeError`` when trying to access a missing attribute (:issue:`18165`) - - @@ -85,6 +86,7 @@ I/O - Bug in :func:`read_csv` for handling null values in index columns when specifying ``na_filter=False`` (:issue:`5239`) - Bug in :meth:`DataFrame.to_csv` when the table had ``MultiIndex`` columns, and a list of strings was passed in for ``header`` (:issue:`5539`) - :func:`read_parquet` now allows to specify the columns to read from a parquet file (:issue:`18154`) +- :func:`read_parquet` now allows to specify kwargs which are passed to the respective engine (:issue:`18216`) Plotting ^^^^^^^^ @@ -128,6 +130,7 @@ Categorical - Error messages in the testing module have been improved when items have different ``CategoricalDtype`` (:issue:`18069`) - ``CategoricalIndex`` can now correctly take a ``pd.api.types.CategoricalDtype`` as its dtype (:issue:`18116`) +- Bug in ``Categorical.unique()`` returning read-only array when all categories were ``NaN`` (:issue:`18051`) Other ^^^^^ diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 943b6bb84fb47b..8afdd1b2e22b37 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -22,8 +22,8 @@ New features Other Enhancements ^^^^^^^^^^^^^^^^^^ -- Better support for ``Dataframe.style.to_excel()`` output with the ``xlsxwriter`` engine. (:issue:`16149`) -- +- Better support for :func:`Dataframe.style.to_excel` output with the ``xlsxwriter`` engine. (:issue:`16149`) +- :func:`pandas.tseries.frequencies.to_offset` now accepts leading '+' signs e.g. '+1h'. (:issue:`18171`) - .. _whatsnew_0220.api_breaking: @@ -41,10 +41,11 @@ Other API Changes ^^^^^^^^^^^^^^^^^ - ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`) -- All-NaN levels in ``MultiIndex`` are now assigned float rather than object dtype, coherently with flat indexes (:issue:`17929`). -- :class:`Timestamp` will no longer silently ignore unused or invalid `tz` or `tzinfo` keyword arguments (:issue:`17690`) -- :class:`Timestamp` will no longer silently ignore invalid `freq` arguments (:issue:`5168`) -- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the `tseries.offsets` module (:issue:`17830`) +- All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`). +- :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`) +- :class:`Timestamp` will no longer silently ignore invalid ``freq`` arguments (:issue:`5168`) +- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`) +- `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`) .. _whatsnew_0220.deprecations: @@ -60,8 +61,8 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- -- +- Warnings against the obsolete usage ``Categorical(codes, categories)``, which were emitted for instance when the first two arguments to ``Categorical()`` had different dtypes, and recommended the use of ``Categorical.from_codes``, have now been removed (:issue:`8074`) +- The ``levels`` and ``labels`` attributes of a ``MultiIndex`` can no longer be set directly (:issue:`4039`). - .. _whatsnew_0220.performance: @@ -69,8 +70,10 @@ Removal of prior version deprecations/changes Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- Indexers on Series or DataFrame no longer create a reference cycle (:issue:`17956`) -- +- Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`) +- Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`) +- :class`DateOffset` arithmetic performance is improved (:issue:`18218`) +- Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`) - .. _whatsnew_0220.docs: @@ -87,9 +90,6 @@ Documentation Changes Bug Fixes ~~~~~~~~~ -- Bug in ``pd.read_msgpack()`` with a non existent file is passed in Python 2 (:issue:`15296`) -- Bug in ``DataFrame.groupby`` where key as tuple in a ``MultiIndex`` were interpreted as a list of keys (:issue:`17979`) -- Bug in :func:`pd.read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`) Conversion ^^^^^^^^^^ @@ -101,7 +101,8 @@ Conversion Indexing ^^^^^^^^ -- Bug in :func:`PeriodIndex.truncate` which raises ``TypeError`` when ``PeriodIndex`` is monotonic (:issue:`17717`) +- Bug in :func:`Series.truncate` which raises ``TypeError`` with a monotonic ``PeriodIndex`` (:issue:`17717`) +- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`) - - @@ -109,6 +110,9 @@ I/O ^^^ - :func:`read_html` now rewinds seekable IO objects after parse failure, before attempting to parse with a new parser. If a parser errors and the object is non-seekable, an informative error is raised suggesting the use of a different parser (:issue:`17975`) +- Bug in :func:`read_msgpack` with a non existent file is passed in Python 2 (:issue:`15296`) +- Bug in :func:`read_csv` where a ``MultiIndex`` with duplicate columns was not being mangled appropriately (:issue:`18062`) +- Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`) - - @@ -157,6 +161,6 @@ Categorical Other ^^^^^ -- +- Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`) - - diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 2fbbc81c4b5a12..e1312a40971f0d 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # cython: profile=False cimport numpy as cnp diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 78eb7b3ae483e1..f5d8a0da0112b3 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -19,7 +19,7 @@ from hashtable cimport HashTable from pandas._libs import algos, period as periodlib, hashtable as _hash from pandas._libs.tslib import Timestamp, Timedelta -from datetime import datetime, timedelta +from datetime import datetime, timedelta, date from cpython cimport PyTuple_Check, PyList_Check @@ -549,7 +549,7 @@ cpdef convert_scalar(ndarray arr, object value): if arr.descr.type_num == NPY_DATETIME: if isinstance(value, np.ndarray): pass - elif isinstance(value, datetime): + elif isinstance(value, (datetime, np.datetime64, date)): return Timestamp(value).value elif value is None or value != value: return iNaT diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 72523a19b95952..bd21fb97ede206 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +# cython: profile=False from datetime import datetime, date, timedelta import operator @@ -27,14 +28,16 @@ from util cimport is_period_object, is_string_object, INT32_MIN from lib cimport is_null_datetimelike from pandas._libs import tslib -from pandas._libs.tslib import Timestamp, iNaT, NaT +from pandas._libs.tslib import Timestamp, iNaT from tslibs.timezones cimport ( is_utc, is_tzlocal, get_utcoffset, get_dst_info, maybe_get_tz) from tslibs.timedeltas cimport delta_to_nanoseconds -from tslibs.parsing import parse_time_string, NAT_SENTINEL +from tslibs.parsing import (parse_time_string, NAT_SENTINEL, + _get_rule_month, _MONTH_NUMBERS) from tslibs.frequencies cimport get_freq_code -from tslibs.nattype import nat_strings +from tslibs.resolution import resolution, Resolution +from tslibs.nattype import nat_strings, NaT from tslibs.nattype cimport _nat_scalar_rules from pandas.tseries import offsets @@ -42,13 +45,6 @@ from pandas.tseries import frequencies cdef int64_t NPY_NAT = util.get_nat() -cdef int RESO_US = frequencies.RESO_US -cdef int RESO_MS = frequencies.RESO_MS -cdef int RESO_SEC = frequencies.RESO_SEC -cdef int RESO_MIN = frequencies.RESO_MIN -cdef int RESO_HR = frequencies.RESO_HR -cdef int RESO_DAY = frequencies.RESO_DAY - cdef extern from "period_helper.h": ctypedef struct date_info: int64_t absdate @@ -487,98 +483,10 @@ def extract_freq(ndarray[object] values): raise ValueError('freq not specified and cannot be inferred') -cpdef resolution(ndarray[int64_t] stamps, tz=None): - cdef: - Py_ssize_t i, n = len(stamps) - pandas_datetimestruct dts - int reso = RESO_DAY, curr_reso - - if tz is not None: - tz = maybe_get_tz(tz) - return _reso_local(stamps, tz) - else: - for i in range(n): - if stamps[i] == NPY_NAT: - continue - dt64_to_dtstruct(stamps[i], &dts) - curr_reso = _reso_stamp(&dts) - if curr_reso < reso: - reso = curr_reso - return reso - - -cdef inline int _reso_stamp(pandas_datetimestruct *dts): - if dts.us != 0: - if dts.us % 1000 == 0: - return RESO_MS - return RESO_US - elif dts.sec != 0: - return RESO_SEC - elif dts.min != 0: - return RESO_MIN - elif dts.hour != 0: - return RESO_HR - return RESO_DAY - -cdef _reso_local(ndarray[int64_t] stamps, object tz): - cdef: - Py_ssize_t n = len(stamps) - int reso = RESO_DAY, curr_reso - ndarray[int64_t] trans, deltas, pos - pandas_datetimestruct dts - - if is_utc(tz): - for i in range(n): - if stamps[i] == NPY_NAT: - continue - dt64_to_dtstruct(stamps[i], &dts) - curr_reso = _reso_stamp(&dts) - if curr_reso < reso: - reso = curr_reso - elif is_tzlocal(tz): - for i in range(n): - if stamps[i] == NPY_NAT: - continue - dt64_to_dtstruct(stamps[i], &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) - delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - dt64_to_dtstruct(stamps[i] + delta, &dts) - curr_reso = _reso_stamp(&dts) - if curr_reso < reso: - reso = curr_reso - else: - # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = get_dst_info(tz) - - _pos = trans.searchsorted(stamps, side='right') - 1 - if _pos.dtype != np.int64: - _pos = _pos.astype(np.int64) - pos = _pos - - # statictzinfo - if typ not in ['pytz', 'dateutil']: - for i in range(n): - if stamps[i] == NPY_NAT: - continue - dt64_to_dtstruct(stamps[i] + deltas[0], &dts) - curr_reso = _reso_stamp(&dts) - if curr_reso < reso: - reso = curr_reso - else: - for i in range(n): - if stamps[i] == NPY_NAT: - continue - dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts) - curr_reso = _reso_stamp(&dts) - if curr_reso < reso: - reso = curr_reso - - return reso - - +# ----------------------------------------------------------------------- # period helpers + cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, int freq, object tz): cdef: @@ -1191,7 +1099,7 @@ class Period(_Period): if freq is None: try: - freq = frequencies.Resolution.get_freq(reso) + freq = Resolution.get_freq(reso) except KeyError: raise ValueError( "Invalid frequency or could not infer: %s" % reso) @@ -1236,7 +1144,7 @@ def _quarter_to_myear(year, quarter, freq): if quarter <= 0 or quarter > 4: raise ValueError('Quarter must be 1 <= q <= 4') - mnum = tslib._MONTH_NUMBERS[tslib._get_rule_month(freq)] + 1 + mnum = _MONTH_NUMBERS[_get_rule_month(freq)] + 1 month = (mnum + (quarter - 1) * 3) % 12 + 1 if month > mnum: year -= 1 diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx index 374da8067eedd3..4beb24f07c21cc 100644 --- a/pandas/_libs/properties.pyx +++ b/pandas/_libs/properties.pyx @@ -63,7 +63,14 @@ cdef class AxisProperty(object): self.axis = axis def __get__(self, obj, type): - cdef list axes = obj._data.axes + cdef: + list axes + + if obj is None: + # Only instances have _data, not classes + return None + else: + axes = obj._data.axes return axes[self.axis] def __set__(self, obj, value): diff --git a/pandas/_libs/src/datetime/np_datetime.c b/pandas/_libs/src/datetime/np_datetime.c index f8254ed9d84180..7278cbaff86caa 100644 --- a/pandas/_libs/src/datetime/np_datetime.c +++ b/pandas/_libs/src/datetime/np_datetime.c @@ -562,6 +562,17 @@ void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr, convert_datetime_to_datetimestruct(&meta, val, result); } +void pandas_timedelta_to_timedeltastruct(npy_timedelta val, + PANDAS_DATETIMEUNIT fr, + pandas_timedeltastruct *result) { + pandas_datetime_metadata meta; + + meta.base = fr; + meta.num - 1; + + convert_timedelta_to_timedeltastruct(&meta, val, result); +} + PANDAS_DATETIMEUNIT get_datetime64_unit(PyObject *obj) { return (PANDAS_DATETIMEUNIT)((PyDatetimeScalarObject *)obj)->obmeta.base; } @@ -980,3 +991,107 @@ int convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta, return 0; } + +/* + * Converts a timedelta from a timedeltastruct to a timedelta based + * on some metadata. The timedelta is assumed to be valid. + * + * Returns 0 on success, -1 on failure. + */ +int convert_timedelta_to_timedeltastruct(pandas_timedelta_metadata *meta, + npy_timedelta td, + pandas_timedeltastruct *out) { + npy_int64 perday; + npy_int64 frac; + npy_int64 sfrac; + npy_int64 ifrac; + int sign; + npy_int64 DAY_NS = 86400000000000LL; + + /* Initialize the output to all zeros */ + memset(out, 0, sizeof(pandas_timedeltastruct)); + + switch (meta->base) { + case PANDAS_FR_ns: + + // put frac in seconds + if (td < 0 && td % (1000LL * 1000LL * 1000LL) != 0) + frac = td / (1000LL * 1000LL * 1000LL) - 1; + else + frac = td / (1000LL * 1000LL * 1000LL); + + if (frac < 0) { + sign = -1; + + // even fraction + if ((-frac % 86400LL) != 0) { + out->days = -frac / 86400LL + 1; + frac += 86400LL * out->days; + } else { + frac = -frac; + } + } else { + sign = 1; + out->days = 0; + } + + if (frac >= 86400) { + out->days += frac / 86400LL; + frac -= out->days * 86400LL; + } + + if (frac >= 3600) { + out->hrs = frac / 3600LL; + frac -= out->hrs * 3600LL; + } else { + out->hrs = 0; + } + + if (frac >= 60) { + out->min = frac / 60LL; + frac -= out->min * 60LL; + } else { + out->min = 0; + } + + if (frac >= 0) { + out->sec = frac; + frac -= out->sec; + } else { + out->sec = 0; + } + + sfrac = (out->hrs * 3600LL + out->min * 60LL + + out->sec) * (1000LL * 1000LL * 1000LL); + + if (sign < 0) + out->days = -out->days; + + ifrac = td - (out->days * DAY_NS + sfrac); + + if (ifrac != 0) { + out->ms = ifrac / (1000LL * 1000LL); + ifrac -= out->ms * 1000LL * 1000LL; + out->us = ifrac / 1000LL; + ifrac -= out->us * 1000LL; + out->ns = ifrac; + } else { + out->ms = 0; + out->us = 0; + out->ns = 0; + } + + out->seconds = out->hrs * 3600 + out->min * 60 + out->sec; + out->microseconds = out->ms * 1000 + out->us; + out->nanoseconds = out->ns; + break; + + default: + PyErr_SetString(PyExc_RuntimeError, + "NumPy datetime metadata is corrupted with invalid " + "base unit"); + return -1; + } + + return 0; +} diff --git a/pandas/_libs/src/datetime/np_datetime.h b/pandas/_libs/src/datetime/np_datetime.h index af3d2e0f01c1b5..c51a4bddac82f0 100644 --- a/pandas/_libs/src/datetime/np_datetime.h +++ b/pandas/_libs/src/datetime/np_datetime.h @@ -49,11 +49,18 @@ typedef struct { npy_int32 month, day, hour, min, sec, us, ps, as; } pandas_datetimestruct; +typedef struct { + npy_int64 days; + npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds; +} pandas_timedeltastruct; + typedef struct { PANDAS_DATETIMEUNIT base; int num; } pandas_datetime_metadata; +typedef pandas_datetime_metadata pandas_timedelta_metadata; + extern const pandas_datetimestruct _NS_MIN_DTS; extern const pandas_datetimestruct _NS_MAX_DTS; @@ -71,6 +78,10 @@ npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr, void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *result); +void pandas_timedelta_to_timedeltastruct(npy_timedelta val, + PANDAS_DATETIMEUNIT fr, + pandas_timedeltastruct *result); + int dayofweek(int y, int m, int d); extern const int days_per_month_table[2][12]; @@ -131,6 +142,11 @@ convert_datetime_to_datetimestruct(pandas_datetime_metadata *meta, npy_datetime dt, pandas_datetimestruct *out); +int +convert_timedelta_to_timedeltastruct(pandas_timedelta_metadata *meta, + npy_timedelta td, + pandas_timedeltastruct *out); + PANDAS_DATETIMEUNIT get_datetime64_unit(PyObject *obj); diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx index f2edf48a6b8295..c432c40c8f6b3b 100644 --- a/pandas/_libs/src/inference.pyx +++ b/pandas/_libs/src/inference.pyx @@ -613,7 +613,7 @@ cdef class Validator: self.dtype = dtype self.skipna = skipna - cdef bint validate(self, object[:] values) except -1: + cdef bint validate(self, ndarray values) except -1: if not self.n: return False @@ -629,7 +629,7 @@ cdef class Validator: @cython.wraparound(False) @cython.boundscheck(False) - cdef bint _validate(self, object[:] values) except -1: + cdef bint _validate(self, ndarray values) except -1: cdef: Py_ssize_t i Py_ssize_t n = self.n @@ -642,7 +642,7 @@ cdef class Validator: @cython.wraparound(False) @cython.boundscheck(False) - cdef bint _validate_skipna(self, object[:] values) except -1: + cdef bint _validate_skipna(self, ndarray values) except -1: cdef: Py_ssize_t i Py_ssize_t n = self.n @@ -852,7 +852,7 @@ cdef class DatetimeValidator(TemporalValidator): return is_null_datetime64(value) -cpdef bint is_datetime_array(ndarray[object] values): +cpdef bint is_datetime_array(ndarray values): cdef: DatetimeValidator validator = DatetimeValidator( len(values), @@ -876,7 +876,7 @@ cpdef bint is_datetime64_array(ndarray values): return validator.validate(values) -cpdef bint is_datetime_with_singletz_array(ndarray[object] values): +cpdef bint is_datetime_with_singletz_array(ndarray values): """ Check values have the same tzinfo attribute. Doesn't check values are datetime-like types. @@ -959,7 +959,7 @@ cdef class DateValidator(Validator): return is_date(value) -cpdef bint is_date_array(ndarray[object] values, bint skipna=False): +cpdef bint is_date_array(ndarray values, bint skipna=False): cdef DateValidator validator = DateValidator(len(values), skipna=skipna) return validator.validate(values) @@ -970,7 +970,7 @@ cdef class TimeValidator(Validator): return is_time(value) -cpdef bint is_time_array(ndarray[object] values, bint skipna=False): +cpdef bint is_time_array(ndarray values, bint skipna=False): cdef TimeValidator validator = TimeValidator(len(values), skipna=skipna) return validator.validate(values) @@ -984,7 +984,7 @@ cdef class PeriodValidator(TemporalValidator): return is_null_period(value) -cpdef bint is_period_array(ndarray[object] values): +cpdef bint is_period_array(ndarray values): cdef PeriodValidator validator = PeriodValidator(len(values), skipna=True) return validator.validate(values) @@ -995,7 +995,7 @@ cdef class IntervalValidator(Validator): return is_interval(value) -cpdef bint is_interval_array(ndarray[object] values): +cpdef bint is_interval_array(ndarray values): cdef: IntervalValidator validator = IntervalValidator( len(values), diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index bf22a3a528259b..540a081bdda2ec 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -50,6 +50,7 @@ from datetime cimport ( # stdlib datetime imports from datetime import time as datetime_time + from tslibs.np_datetime cimport (check_dts_bounds, reverse_ops, cmp_scalar, @@ -61,12 +62,6 @@ from tslibs.np_datetime cimport (check_dts_bounds, get_timedelta64_value) from tslibs.np_datetime import OutOfBoundsDatetime -from khash cimport ( - khiter_t, - kh_destroy_int64, kh_put_int64, - kh_init_int64, kh_int64_t, - kh_resize_int64, kh_get_int64) - from .tslibs.parsing import parse_datetime_string cimport cython @@ -97,9 +92,8 @@ from tslibs.conversion cimport (tz_convert_single, _TSObject, convert_to_tsobject, convert_datetime_to_tsobject, get_datetime64_nanos) -from tslibs.conversion import ( - tz_localize_to_utc, tz_convert, - tz_convert_single) +from tslibs.conversion import (tz_localize_to_utc, + tz_convert_single, date_normalize) from tslibs.nattype import NaT, nat_strings from tslibs.nattype cimport _checknull_with_nat @@ -878,33 +872,6 @@ Timestamp.min = Timestamp(_NS_LOWER_BOUND) Timestamp.max = Timestamp(_NS_UPPER_BOUND) -# ---------------------------------------------------------------------- -# Frequency inference - -def unique_deltas(ndarray[int64_t] arr): - cdef: - Py_ssize_t i, n = len(arr) - int64_t val - khiter_t k - kh_int64_t *table - int ret = 0 - list uniques = [] - - table = kh_init_int64() - kh_resize_int64(table, 10) - for i in range(n - 1): - val = arr[i + 1] - arr[i] - k = kh_get_int64(table, val) - if k == table.n_buckets: - kh_put_int64(table, val, &ret) - uniques.append(val) - kh_destroy_int64(table) - - result = np.array(uniques, dtype=np.int64) - result.sort() - return result - - cdef str _NDIM_STRING = "ndim" # This is PITA. Because we inherit from datetime, which has very specific @@ -1389,27 +1356,6 @@ _MONTH_NUMBERS = {k: i for i, k in enumerate(_MONTHS)} _MONTH_ALIASES = {(k + 1): v for k, v in enumerate(_MONTHS)} -cpdef object _get_rule_month(object source, object default='DEC'): - """ - Return starting month of given freq, default is December. - - Example - ------- - >>> _get_rule_month('D') - 'DEC' - - >>> _get_rule_month('A-JAN') - 'JAN' - """ - if hasattr(source, 'freqstr'): - source = source.freqstr - source = source.upper() - if '-' not in source: - return default - else: - return source.split('-')[1] - - cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): """ convert the ndarray according to the unit @@ -1849,26 +1795,6 @@ cdef inline _to_i8(object val): return val -cpdef pydt_to_i8(object pydt): - """ - Convert to int64 representation compatible with numpy datetime64; converts - to UTC - """ - cdef: - _TSObject ts - - ts = convert_to_tsobject(pydt, None, None, 0, 0) - - return ts.value - - -def i8_to_pydt(int64_t i8, object tzinfo=None): - """ - Inverse of pydt_to_i8 - """ - return Timestamp(i8) - - # ---------------------------------------------------------------------- # Accessors @@ -1892,130 +1818,6 @@ def get_time_micros(ndarray[int64_t] dtindex): return micros -cdef int64_t DAY_NS = 86400000000000LL - - -@cython.wraparound(False) -@cython.boundscheck(False) -def date_normalize(ndarray[int64_t] stamps, tz=None): - cdef: - Py_ssize_t i, n = len(stamps) - pandas_datetimestruct dts - ndarray[int64_t] result = np.empty(n, dtype=np.int64) - - if tz is not None: - tz = maybe_get_tz(tz) - result = _normalize_local(stamps, tz) - else: - with nogil: - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - dt64_to_dtstruct(stamps[i], &dts) - result[i] = _normalized_stamp(&dts) - - return result - - -@cython.wraparound(False) -@cython.boundscheck(False) -cdef _normalize_local(ndarray[int64_t] stamps, object tz): - cdef: - Py_ssize_t n = len(stamps) - ndarray[int64_t] result = np.empty(n, dtype=np.int64) - ndarray[int64_t] trans, deltas, pos - pandas_datetimestruct dts - - if is_utc(tz): - with nogil: - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - dt64_to_dtstruct(stamps[i], &dts) - result[i] = _normalized_stamp(&dts) - elif is_tzlocal(tz): - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - dt64_to_dtstruct(stamps[i], &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, - dts.min, dts.sec, dts.us, tz) - delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - dt64_to_dtstruct(stamps[i] + delta, &dts) - result[i] = _normalized_stamp(&dts) - else: - # Adjust datetime64 timestamp, recompute datetimestruct - trans, deltas, typ = get_dst_info(tz) - - _pos = trans.searchsorted(stamps, side='right') - 1 - if _pos.dtype != np.int64: - _pos = _pos.astype(np.int64) - pos = _pos - - # statictzinfo - if typ not in ['pytz', 'dateutil']: - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - dt64_to_dtstruct(stamps[i] + deltas[0], &dts) - result[i] = _normalized_stamp(&dts) - else: - for i in range(n): - if stamps[i] == NPY_NAT: - result[i] = NPY_NAT - continue - dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts) - result[i] = _normalized_stamp(&dts) - - return result - -cdef inline int64_t _normalized_stamp(pandas_datetimestruct *dts) nogil: - dts.hour = 0 - dts.min = 0 - dts.sec = 0 - dts.us = 0 - dts.ps = 0 - return dtstruct_to_dt64(dts) - - -def dates_normalized(ndarray[int64_t] stamps, tz=None): - cdef: - Py_ssize_t i, n = len(stamps) - ndarray[int64_t] trans, deltas - pandas_datetimestruct dts - - if tz is None or is_utc(tz): - for i in range(n): - dt64_to_dtstruct(stamps[i], &dts) - if (dts.hour + dts.min + dts.sec + dts.us) > 0: - return False - elif is_tzlocal(tz): - for i in range(n): - dt64_to_dtstruct(stamps[i], &dts) - dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, - dts.sec, dts.us, tz) - dt = dt + tz.utcoffset(dt) - if (dt.hour + dt.minute + dt.second + dt.microsecond) > 0: - return False - else: - trans, deltas, typ = get_dst_info(tz) - - for i in range(n): - # Adjust datetime64 timestamp, recompute datetimestruct - pos = trans.searchsorted(stamps[i]) - 1 - inf = tz._transition_info[pos] - - dt64_to_dtstruct(stamps[i] + deltas[pos], &dts) - if (dts.hour + dts.min + dts.sec + dts.us) > 0: - return False - - return True - - # ---------------------------------------------------------------------- # Some general helper functions diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 843a688a2630c9..ad817ce8852f25 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -26,3 +26,5 @@ cdef void _localize_tso(_TSObject obj, object tz) cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2) cdef int64_t get_datetime64_nanos(object val) except? -1 + +cpdef int64_t pydt_to_i8(object pydt) except? -1 diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 61efc865112a9d..c175769dc725ee 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -53,7 +53,6 @@ UTC = pytz.UTC # ---------------------------------------------------------------------- # Misc Helpers - # TODO: How to declare np.datetime64 as the input type? cdef inline int64_t get_datetime64_nanos(object val) except? -1: """ @@ -90,6 +89,27 @@ cdef class _TSObject: return self.value +cpdef int64_t pydt_to_i8(object pydt) except? -1: + """ + Convert to int64 representation compatible with numpy datetime64; converts + to UTC + + Parameters + ---------- + pydt : object + + Returns + ------- + i8value : np.int64 + """ + cdef: + _TSObject ts + + ts = convert_to_tsobject(pydt, None, None, 0, 0) + + return ts.value + + cdef convert_to_tsobject(object ts, object tz, object unit, bint dayfirst, bint yearfirst): """ @@ -334,18 +354,18 @@ cdef inline void _localize_tso(_TSObject obj, object tz): Py_ssize_t delta, posn datetime dt + assert obj.tzinfo is None + if is_utc(tz): - obj.tzinfo = tz + pass + elif obj.value == NPY_NAT: + pass elif is_tzlocal(tz): dt64_to_dtstruct(obj.value, &obj.dts) dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour, obj.dts.min, obj.dts.sec, obj.dts.us, tz) delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 - if obj.value != NPY_NAT: - dt64_to_dtstruct(obj.value + delta, &obj.dts) - else: - dt64_to_dtstruct(obj.value, &obj.dts) - obj.tzinfo = tz + dt64_to_dtstruct(obj.value + delta, &obj.dts) else: # Adjust datetime64 timestamp, recompute datetimestruct trans, deltas, typ = get_dst_info(tz) @@ -355,26 +375,17 @@ cdef inline void _localize_tso(_TSObject obj, object tz): # static/pytz/dateutil specific code if is_fixed_offset(tz): # statictzinfo - if len(deltas) > 0 and obj.value != NPY_NAT: - dt64_to_dtstruct(obj.value + deltas[0], &obj.dts) - else: - dt64_to_dtstruct(obj.value, &obj.dts) - obj.tzinfo = tz + assert len(deltas) == 1, len(deltas) + dt64_to_dtstruct(obj.value + deltas[0], &obj.dts) elif treat_tz_as_pytz(tz): - inf = tz._transition_info[pos] - if obj.value != NPY_NAT: - dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) - else: - dt64_to_dtstruct(obj.value, &obj.dts) - obj.tzinfo = tz._tzinfos[inf] + tz = tz._tzinfos[tz._transition_info[pos]] + dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) elif treat_tz_as_dateutil(tz): - if obj.value != NPY_NAT: - dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) - else: - dt64_to_dtstruct(obj.value, &obj.dts) - obj.tzinfo = tz + dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts) else: - obj.tzinfo = tz + pass + + obj.tzinfo = tz cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): @@ -401,7 +412,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): """ Convert the val (in i8) from timezone1 to timezone2 - This is a single timezone versoin of tz_convert + This is a single timezone version of tz_convert Parameters ---------- @@ -422,6 +433,9 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): pandas_datetimestruct dts datetime dt + # See GH#17734 We should always be converting either from UTC or to UTC + assert (is_utc(tz1) or tz1 == 'UTC') or (is_utc(tz2) or tz2 == 'UTC') + if val == NPY_NAT: return val @@ -444,8 +458,8 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2): if get_timezone(tz2) == 'UTC': return utc_date - if is_tzlocal(tz2): - dt64_to_dtstruct(val, &dts) + elif is_tzlocal(tz2): + dt64_to_dtstruct(utc_date, &dts) dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz2) delta = int(get_utcoffset(tz2, dt).total_seconds()) * 1000000000 @@ -782,3 +796,183 @@ cdef inline str _render_tstamp(int64_t val): """ Helper function to render exception messages""" from pandas._libs.tslib import Timestamp return str(Timestamp(val)) + + +# ---------------------------------------------------------------------- +# Normalization + +@cython.wraparound(False) +@cython.boundscheck(False) +def date_normalize(ndarray[int64_t] stamps, tz=None): + """ + Normalize each of the (nanosecond) timestamps in the given array by + rounding down to the beginning of the day (i.e. midnight). If `tz` + is not None, then this is midnight for this timezone. + + Parameters + ---------- + stamps : int64 ndarray + tz : tzinfo or None + + Returns + ------- + result : int64 ndarray of converted of normalized nanosecond timestamps + """ + cdef: + Py_ssize_t i, n = len(stamps) + pandas_datetimestruct dts + ndarray[int64_t] result = np.empty(n, dtype=np.int64) + + if tz is not None: + tz = maybe_get_tz(tz) + result = _normalize_local(stamps, tz) + else: + with nogil: + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + dt64_to_dtstruct(stamps[i], &dts) + result[i] = _normalized_stamp(&dts) + + return result + + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef ndarray[int64_t] _normalize_local(ndarray[int64_t] stamps, object tz): + """ + Normalize each of the (nanosecond) timestamps in the given array by + rounding down to the beginning of the day (i.e. midnight) for the + given timezone `tz`. + + Parameters + ---------- + stamps : int64 ndarray + tz : tzinfo or None + + Returns + ------- + result : int64 ndarray of converted of normalized nanosecond timestamps + """ + cdef: + Py_ssize_t n = len(stamps) + ndarray[int64_t] result = np.empty(n, dtype=np.int64) + ndarray[int64_t] trans, deltas, pos + pandas_datetimestruct dts + datetime dt + + if is_utc(tz): + with nogil: + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + dt64_to_dtstruct(stamps[i], &dts) + result[i] = _normalized_stamp(&dts) + elif is_tzlocal(tz): + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + dt64_to_dtstruct(stamps[i], &dts) + dt = datetime(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us, tz) + delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 + dt64_to_dtstruct(stamps[i] + delta, &dts) + result[i] = _normalized_stamp(&dts) + else: + # Adjust datetime64 timestamp, recompute datetimestruct + trans, deltas, typ = get_dst_info(tz) + + _pos = trans.searchsorted(stamps, side='right') - 1 + if _pos.dtype != np.int64: + _pos = _pos.astype(np.int64) + pos = _pos + + # statictzinfo + if typ not in ['pytz', 'dateutil']: + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + dt64_to_dtstruct(stamps[i] + deltas[0], &dts) + result[i] = _normalized_stamp(&dts) + else: + for i in range(n): + if stamps[i] == NPY_NAT: + result[i] = NPY_NAT + continue + dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts) + result[i] = _normalized_stamp(&dts) + + return result + + +cdef inline int64_t _normalized_stamp(pandas_datetimestruct *dts) nogil: + """ + Normalize the given datetimestruct to midnight, then convert to int64_t. + + Parameters + ---------- + *dts : pointer to pandas_datetimestruct + + Returns + ------- + stamp : int64 + """ + dts.hour = 0 + dts.min = 0 + dts.sec = 0 + dts.us = 0 + dts.ps = 0 + return dtstruct_to_dt64(dts) + + +def is_date_array_normalized(ndarray[int64_t] stamps, tz=None): + """ + Check if all of the given (nanosecond) timestamps are normalized to + midnight, i.e. hour == minute == second == 0. If the optional timezone + `tz` is not None, then this is midnight for this timezone. + + Parameters + ---------- + stamps : int64 ndarray + tz : tzinfo or None + + Returns + ------- + is_normalized : bool True if all stamps are normalized + """ + cdef: + Py_ssize_t i, n = len(stamps) + ndarray[int64_t] trans, deltas + pandas_datetimestruct dts + datetime dt + + if tz is None or is_utc(tz): + for i in range(n): + dt64_to_dtstruct(stamps[i], &dts) + if (dts.hour + dts.min + dts.sec + dts.us) > 0: + return False + elif is_tzlocal(tz): + for i in range(n): + dt64_to_dtstruct(stamps[i], &dts) + dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, + dts.sec, dts.us, tz) + dt = dt + tz.utcoffset(dt) + if (dt.hour + dt.minute + dt.second + dt.microsecond) > 0: + return False + else: + trans, deltas, typ = get_dst_info(tz) + + for i in range(n): + # Adjust datetime64 timestamp, recompute datetimestruct + pos = trans.searchsorted(stamps[i]) - 1 + inf = tz._transition_info[pos] + + dt64_to_dtstruct(stamps[i] + deltas[pos], &dts) + if (dts.hour + dts.min + dts.sec + dts.us) > 0: + return False + + return True diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index b40646295cce57..3ab84853dfc4ac 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -17,7 +17,8 @@ from numpy cimport ndarray, int64_t, int32_t, int8_t np.import_array() -from np_datetime cimport pandas_datetimestruct, dt64_to_dtstruct +from np_datetime cimport (pandas_datetimestruct, pandas_timedeltastruct, + dt64_to_dtstruct, td64_to_tdstruct) from datetime cimport ( days_per_month_table, @@ -545,6 +546,123 @@ def get_date_field(ndarray[int64_t] dtindex, object field): raise ValueError("Field %s not supported" % field) +@cython.wraparound(False) +@cython.boundscheck(False) +def get_timedelta_field(ndarray[int64_t] tdindex, object field): + """ + Given a int64-based timedelta index, extract the days, hrs, sec., + field and return an array of these values. + """ + cdef: + Py_ssize_t i, count = 0 + ndarray[int32_t] out + pandas_timedeltastruct tds + + count = len(tdindex) + out = np.empty(count, dtype='i4') + + if field == 'days': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.days + return out + + elif field == 'h': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.hrs + return out + + elif field == 's': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.sec + return out + + elif field == 'seconds': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.seconds + return out + + elif field == 'ms': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.ms + return out + + elif field == 'microseconds': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.microseconds + return out + + elif field == 'us': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.us + return out + + elif field == 'ns': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.ns + return out + + elif field == 'nanoseconds': + with nogil: + for i in range(count): + if tdindex[i] == NPY_NAT: + out[i] = -1 + continue + + td64_to_tdstruct(tdindex[i], &tds) + out[i] = tds.nanoseconds + return out + + raise ValueError("Field %s not supported" % field) + + cdef inline int days_in_month(pandas_datetimestruct dts) nogil: return days_per_month_table[is_leapyear(dts.year)][dts.month - 1] diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index 9d810bfb411afe..2a700d52eaaf3f 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -15,7 +15,7 @@ from util cimport is_integer_object # hack to handle WOM-1MON opattern = re.compile( - r'([\-]?\d*|[\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)' + r'([+\-]?\d*|[+\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)' ) _INVALID_FREQ_ERROR = "Invalid frequency: {0}" diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 1ae0499f90c0dd..3692822ada135f 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -30,6 +30,10 @@ cdef extern from "../src/datetime/np_datetime.h": int64_t year int32_t month, day, hour, min, sec, us, ps, as + ctypedef struct pandas_timedeltastruct: + int64_t days + int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds + ctypedef enum PANDAS_DATETIMEUNIT: PANDAS_FR_Y PANDAS_FR_M @@ -54,6 +58,7 @@ cdef check_dts_bounds(pandas_datetimestruct *dts) cdef int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil cdef void dt64_to_dtstruct(int64_t dt64, pandas_datetimestruct* out) nogil +cdef void td64_to_tdstruct(int64_t td64, pandas_timedeltastruct* out) nogil cdef int64_t pydatetime_to_dt64(datetime val, pandas_datetimestruct *dts) cdef int64_t pydate_to_dt64(date val, pandas_datetimestruct *dts) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index abd6c59ea62443..72c028161a9378 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -26,6 +26,11 @@ cdef extern from "../src/datetime/np_datetime.h": PANDAS_DATETIMEUNIT fr, pandas_datetimestruct *result) nogil + void pandas_timedelta_to_timedeltastruct(npy_timedelta val, + PANDAS_DATETIMEUNIT fr, + pandas_timedeltastruct *result + ) nogil + pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS # ---------------------------------------------------------------------- @@ -127,6 +132,13 @@ cdef inline void dt64_to_dtstruct(int64_t dt64, pandas_datetime_to_datetimestruct(dt64, PANDAS_FR_ns, out) return +cdef inline void td64_to_tdstruct(int64_t td64, + pandas_timedeltastruct* out) nogil: + """Convenience function to call pandas_timedelta_to_timedeltastruct + with the by-far-most-common frequency PANDAS_FR_ns""" + pandas_timedelta_to_timedeltastruct(td64, PANDAS_FR_ns, out) + return + cdef inline int64_t pydatetime_to_dt64(datetime val, pandas_datetimestruct *dts): diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 87be9fa9101012..2d8ce4c59fedcc 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -4,7 +4,7 @@ cimport cython import time -from cpython.datetime cimport timedelta, time as dt_time +from cpython.datetime cimport datetime, timedelta, time as dt_time from dateutil.relativedelta import relativedelta @@ -13,12 +13,12 @@ cimport numpy as np np.import_array() -from util cimport is_string_object +from util cimport is_string_object, is_integer_object -from pandas._libs.tslib import pydt_to_i8 +from pandas._libs.tslib import monthrange +from conversion cimport tz_convert_single, pydt_to_i8 from frequencies cimport get_freq_code -from conversion cimport tz_convert_single # --------------------------------------------------------------------- # Constants @@ -375,3 +375,56 @@ class BaseOffset(_BaseOffset): # i.e. isinstance(other, (ABCDatetimeIndex, ABCSeries)) return other - self return -self + other + + +# ---------------------------------------------------------------------- +# RelativeDelta Arithmetic + + +cpdef datetime shift_month(datetime stamp, int months, object day_opt=None): + """ + Given a datetime (or Timestamp) `stamp`, an integer `months` and an + option `day_opt`, return a new datetimelike that many months later, + with day determined by `day_opt` using relativedelta semantics. + + Scalar analogue of tslib.shift_months + + Parameters + ---------- + stamp : datetime or Timestamp + months : int + day_opt : None, 'start', 'end', or an integer + None: returned datetimelike has the same day as the input, or the + last day of the month if the new month is too short + 'start': returned datetimelike has day=1 + 'end': returned datetimelike has day on the last day of the month + int: returned datetimelike has day equal to day_opt + + Returns + ------- + shifted : datetime or Timestamp (same as input `stamp`) + """ + cdef: + int year, month, day + int dim, dy + + dy = (stamp.month + months) // 12 + month = (stamp.month + months) % 12 + + if month == 0: + month = 12 + dy -= 1 + year = stamp.year + dy + + dim = monthrange(year, month)[1] + if day_opt is None: + day = min(stamp.day, dim) + elif day_opt == 'start': + day = 1 + elif day_opt == 'end': + day = dim + elif is_integer_object(day_opt): + day = min(day_opt, dim) + else: + raise ValueError(day_opt) + return stamp.replace(year=year, month=month, day=day) diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx new file mode 100644 index 00000000000000..b590121b9021ad --- /dev/null +++ b/pandas/_libs/tslibs/resolution.pyx @@ -0,0 +1,652 @@ +# -*- coding: utf-8 -*- +# cython: profile=False + +from cython cimport Py_ssize_t + +import numpy as np +cimport numpy as np +from numpy cimport ndarray, int64_t +np.import_array() + +from util cimport is_string_object, get_nat + +from khash cimport ( + khiter_t, + kh_destroy_int64, kh_put_int64, + kh_init_int64, kh_int64_t, + kh_resize_int64, kh_get_int64) + +from cpython.datetime cimport datetime + +from np_datetime cimport (pandas_datetimestruct, + dtstruct_to_dt64, dt64_to_dtstruct) +from frequencies cimport get_freq_code +from timezones cimport ( + is_utc, is_tzlocal, + maybe_get_tz, get_dst_info, get_utcoffset) +from fields import build_field_sarray +from conversion import tz_convert + +from pandas._libs.properties import cache_readonly +from pandas._libs.tslib import Timestamp + +from pandas.core.algorithms import unique # TODO: Avoid this non-cython import + +# ---------------------------------------------------------------------- +# Constants + +cdef int64_t NPY_NAT = get_nat() + +cdef int RESO_NS = 0 +cdef int RESO_US = 1 +cdef int RESO_MS = 2 +cdef int RESO_SEC = 3 +cdef int RESO_MIN = 4 +cdef int RESO_HR = 5 +cdef int RESO_DAY = 6 + +_ONE_MICRO = 1000L +_ONE_MILLI = _ONE_MICRO * 1000 +_ONE_SECOND = _ONE_MILLI * 1000 +_ONE_MINUTE = 60 * _ONE_SECOND +_ONE_HOUR = 60 * _ONE_MINUTE +_ONE_DAY = 24 * _ONE_HOUR + +DAYS = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] +_weekday_rule_aliases = dict((k, v) for k, v in enumerate(DAYS)) + +_MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', + 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'] +_MONTH_ALIASES = {(k + 1): v for k, v in enumerate(_MONTHS)} + +# ---------------------------------------------------------------------- + +cpdef resolution(ndarray[int64_t] stamps, tz=None): + cdef: + Py_ssize_t i, n = len(stamps) + pandas_datetimestruct dts + int reso = RESO_DAY, curr_reso + + if tz is not None: + tz = maybe_get_tz(tz) + return _reso_local(stamps, tz) + else: + for i in range(n): + if stamps[i] == NPY_NAT: + continue + dt64_to_dtstruct(stamps[i], &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso + return reso + + +cdef _reso_local(ndarray[int64_t] stamps, object tz): + cdef: + Py_ssize_t n = len(stamps) + int reso = RESO_DAY, curr_reso + ndarray[int64_t] trans, deltas, pos + pandas_datetimestruct dts + + if is_utc(tz): + for i in range(n): + if stamps[i] == NPY_NAT: + continue + dt64_to_dtstruct(stamps[i], &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso + elif is_tzlocal(tz): + for i in range(n): + if stamps[i] == NPY_NAT: + continue + dt64_to_dtstruct(stamps[i], &dts) + dt = datetime(dts.year, dts.month, dts.day, dts.hour, + dts.min, dts.sec, dts.us, tz) + delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000 + dt64_to_dtstruct(stamps[i] + delta, &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso + else: + # Adjust datetime64 timestamp, recompute datetimestruct + trans, deltas, typ = get_dst_info(tz) + + _pos = trans.searchsorted(stamps, side='right') - 1 + if _pos.dtype != np.int64: + _pos = _pos.astype(np.int64) + pos = _pos + + # statictzinfo + if typ not in ['pytz', 'dateutil']: + for i in range(n): + if stamps[i] == NPY_NAT: + continue + dt64_to_dtstruct(stamps[i] + deltas[0], &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso + else: + for i in range(n): + if stamps[i] == NPY_NAT: + continue + dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts) + curr_reso = _reso_stamp(&dts) + if curr_reso < reso: + reso = curr_reso + + return reso + + +cdef inline int _reso_stamp(pandas_datetimestruct *dts): + if dts.us != 0: + if dts.us % 1000 == 0: + return RESO_MS + return RESO_US + elif dts.sec != 0: + return RESO_SEC + elif dts.min != 0: + return RESO_MIN + elif dts.hour != 0: + return RESO_HR + return RESO_DAY + + +def get_freq_group(freq): + """ + Return frequency code group of given frequency str or offset. + + Example + ------- + >>> get_freq_group('W-MON') + 4000 + + >>> get_freq_group('W-FRI') + 4000 + """ + if getattr(freq, '_typ', None) == 'dateoffset': + freq = freq.rule_code + + if is_string_object(freq): + base, mult = get_freq_code(freq) + freq = base + elif isinstance(freq, int): + pass + else: + raise ValueError('input must be str, offset or int') + return (freq // 1000) * 1000 + + +class Resolution(object): + + # Note: cython won't allow us to reference the cdef versions at the + # module level + RESO_NS = 0 + RESO_US = 1 + RESO_MS = 2 + RESO_SEC = 3 + RESO_MIN = 4 + RESO_HR = 5 + RESO_DAY = 6 + + _reso_str_map = { + RESO_NS: 'nanosecond', + RESO_US: 'microsecond', + RESO_MS: 'millisecond', + RESO_SEC: 'second', + RESO_MIN: 'minute', + RESO_HR: 'hour', + RESO_DAY: 'day'} + + # factor to multiply a value by to convert it to the next finer grained + # resolution + _reso_mult_map = { + RESO_NS: None, + RESO_US: 1000, + RESO_MS: 1000, + RESO_SEC: 1000, + RESO_MIN: 60, + RESO_HR: 60, + RESO_DAY: 24} + + _reso_str_bump_map = { + 'D': 'H', + 'H': 'T', + 'T': 'S', + 'S': 'L', + 'L': 'U', + 'U': 'N', + 'N': None} + + _str_reso_map = dict([(v, k) for k, v in _reso_str_map.items()]) + + _reso_freq_map = { + 'year': 'A', + 'quarter': 'Q', + 'month': 'M', + 'day': 'D', + 'hour': 'H', + 'minute': 'T', + 'second': 'S', + 'millisecond': 'L', + 'microsecond': 'U', + 'nanosecond': 'N'} + + _freq_reso_map = dict([(v, k) + for k, v in _reso_freq_map.items()]) + + @classmethod + def get_str(cls, reso): + """ + Return resolution str against resolution code. + + Example + ------- + >>> Resolution.get_str(Resolution.RESO_SEC) + 'second' + """ + return cls._reso_str_map.get(reso, 'day') + + @classmethod + def get_reso(cls, resostr): + """ + Return resolution str against resolution code. + + Example + ------- + >>> Resolution.get_reso('second') + 2 + + >>> Resolution.get_reso('second') == Resolution.RESO_SEC + True + """ + return cls._str_reso_map.get(resostr, cls.RESO_DAY) + + @classmethod + def get_freq_group(cls, resostr): + """ + Return frequency str against resolution str. + + Example + ------- + >>> f.Resolution.get_freq_group('day') + 4000 + """ + return get_freq_group(cls.get_freq(resostr)) + + @classmethod + def get_freq(cls, resostr): + """ + Return frequency str against resolution str. + + Example + ------- + >>> f.Resolution.get_freq('day') + 'D' + """ + return cls._reso_freq_map[resostr] + + @classmethod + def get_str_from_freq(cls, freq): + """ + Return resolution str against frequency str. + + Example + ------- + >>> Resolution.get_str_from_freq('H') + 'hour' + """ + return cls._freq_reso_map.get(freq, 'day') + + @classmethod + def get_reso_from_freq(cls, freq): + """ + Return resolution code against frequency str. + + Example + ------- + >>> Resolution.get_reso_from_freq('H') + 4 + + >>> Resolution.get_reso_from_freq('H') == Resolution.RESO_HR + True + """ + return cls.get_reso(cls.get_str_from_freq(freq)) + + @classmethod + def get_stride_from_decimal(cls, value, freq): + """ + Convert freq with decimal stride into a higher freq with integer stride + + Parameters + ---------- + value : integer or float + freq : string + Frequency string + + Raises + ------ + ValueError + If the float cannot be converted to an integer at any resolution. + + Example + ------- + >>> Resolution.get_stride_from_decimal(1.5, 'T') + (90, 'S') + + >>> Resolution.get_stride_from_decimal(1.04, 'H') + (3744, 'S') + + >>> Resolution.get_stride_from_decimal(1, 'D') + (1, 'D') + """ + if np.isclose(value % 1, 0): + return int(value), freq + else: + start_reso = cls.get_reso_from_freq(freq) + if start_reso == 0: + raise ValueError("Could not convert to integer offset " + "at any resolution") + + next_value = cls._reso_mult_map[start_reso] * value + next_name = cls._reso_str_bump_map[freq] + return cls.get_stride_from_decimal(next_value, next_name) + + +# ---------------------------------------------------------------------- +# Frequency Inference + + +# TODO: this is non performiant logic here (and duplicative) and this +# simply should call unique_1d directly +# plus no reason to depend on khash directly +cdef unique_deltas(ndarray[int64_t] arr): + cdef: + Py_ssize_t i, n = len(arr) + int64_t val + khiter_t k + kh_int64_t *table + int ret = 0 + list uniques = [] + + table = kh_init_int64() + kh_resize_int64(table, 10) + for i in range(n - 1): + val = arr[i + 1] - arr[i] + k = kh_get_int64(table, val) + if k == table.n_buckets: + kh_put_int64(table, val, &ret) + uniques.append(val) + kh_destroy_int64(table) + + result = np.array(uniques, dtype=np.int64) + result.sort() + return result + + +def _is_multiple(us, mult): + return us % mult == 0 + + +def _maybe_add_count(base, count): + if count != 1: + return '{count}{base}'.format(count=int(count), base=base) + else: + return base + + +class _FrequencyInferer(object): + """ + Not sure if I can avoid the state machine here + """ + + def __init__(self, index, warn=True): + self.index = index + self.values = np.asarray(index).view('i8') + + # This moves the values, which are implicitly in UTC, to the + # the timezone so they are in local time + if hasattr(index, 'tz'): + if index.tz is not None: + self.values = tz_convert(self.values, 'UTC', index.tz) + + self.warn = warn + + if len(index) < 3: + raise ValueError('Need at least 3 dates to infer frequency') + + self.is_monotonic = (self.index.is_monotonic_increasing or + self.index.is_monotonic_decreasing) + + @cache_readonly + def deltas(self): + return unique_deltas(self.values) + + @cache_readonly + def deltas_asi8(self): + return unique_deltas(self.index.asi8) + + @cache_readonly + def is_unique(self): + return len(self.deltas) == 1 + + @cache_readonly + def is_unique_asi8(self): + return len(self.deltas_asi8) == 1 + + def get_freq(self): + if not self.is_monotonic or not self.index.is_unique: + return None + + delta = self.deltas[0] + if _is_multiple(delta, _ONE_DAY): + return self._infer_daily_rule() + else: + # Business hourly, maybe. 17: one day / 65: one weekend + if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]): + return 'BH' + # Possibly intraday frequency. Here we use the + # original .asi8 values as the modified values + # will not work around DST transitions. See #8772 + elif not self.is_unique_asi8: + return None + delta = self.deltas_asi8[0] + if _is_multiple(delta, _ONE_HOUR): + # Hours + return _maybe_add_count('H', delta / _ONE_HOUR) + elif _is_multiple(delta, _ONE_MINUTE): + # Minutes + return _maybe_add_count('T', delta / _ONE_MINUTE) + elif _is_multiple(delta, _ONE_SECOND): + # Seconds + return _maybe_add_count('S', delta / _ONE_SECOND) + elif _is_multiple(delta, _ONE_MILLI): + # Milliseconds + return _maybe_add_count('L', delta / _ONE_MILLI) + elif _is_multiple(delta, _ONE_MICRO): + # Microseconds + return _maybe_add_count('U', delta / _ONE_MICRO) + else: + # Nanoseconds + return _maybe_add_count('N', delta) + + @cache_readonly + def day_deltas(self): + return [x / _ONE_DAY for x in self.deltas] + + @cache_readonly + def hour_deltas(self): + return [x / _ONE_HOUR for x in self.deltas] + + @cache_readonly + def fields(self): + return build_field_sarray(self.values) + + @cache_readonly + def rep_stamp(self): + return Timestamp(self.values[0]) + + def month_position_check(self): + # TODO: cythonize this, very slow + calendar_end = True + business_end = True + calendar_start = True + business_start = True + + years = self.fields['Y'] + months = self.fields['M'] + days = self.fields['D'] + weekdays = self.index.dayofweek + + from calendar import monthrange + for y, m, d, wd in zip(years, months, days, weekdays): + + if calendar_start: + calendar_start &= d == 1 + if business_start: + business_start &= d == 1 or (d <= 3 and wd == 0) + + if calendar_end or business_end: + _, daysinmonth = monthrange(y, m) + cal = d == daysinmonth + if calendar_end: + calendar_end &= cal + if business_end: + business_end &= cal or (daysinmonth - d < 3 and wd == 4) + elif not calendar_start and not business_start: + break + + if calendar_end: + return 'ce' + elif business_end: + return 'be' + elif calendar_start: + return 'cs' + elif business_start: + return 'bs' + else: + return None + + @cache_readonly + def mdiffs(self): + nmonths = self.fields['Y'] * 12 + self.fields['M'] + return unique_deltas(nmonths.astype('i8')) + + @cache_readonly + def ydiffs(self): + return unique_deltas(self.fields['Y'].astype('i8')) + + def _infer_daily_rule(self): + annual_rule = self._get_annual_rule() + if annual_rule: + nyears = self.ydiffs[0] + month = _MONTH_ALIASES[self.rep_stamp.month] + alias = '{prefix}-{month}'.format(prefix=annual_rule, month=month) + return _maybe_add_count(alias, nyears) + + quarterly_rule = self._get_quarterly_rule() + if quarterly_rule: + nquarters = self.mdiffs[0] / 3 + mod_dict = {0: 12, 2: 11, 1: 10} + month = _MONTH_ALIASES[mod_dict[self.rep_stamp.month % 3]] + alias = '{prefix}-{month}'.format(prefix=quarterly_rule, + month=month) + return _maybe_add_count(alias, nquarters) + + monthly_rule = self._get_monthly_rule() + if monthly_rule: + return _maybe_add_count(monthly_rule, self.mdiffs[0]) + + if self.is_unique: + days = self.deltas[0] / _ONE_DAY + if days % 7 == 0: + # Weekly + day = _weekday_rule_aliases[self.rep_stamp.weekday()] + return _maybe_add_count('W-{day}'.format(day=day), days / 7) + else: + return _maybe_add_count('D', days) + + if self._is_business_daily(): + return 'B' + + wom_rule = self._get_wom_rule() + if wom_rule: + return wom_rule + + def _get_annual_rule(self): + if len(self.ydiffs) > 1: + return None + + if len(unique(self.fields['M'])) > 1: + return None + + pos_check = self.month_position_check() + return {'cs': 'AS', 'bs': 'BAS', + 'ce': 'A', 'be': 'BA'}.get(pos_check) + + def _get_quarterly_rule(self): + if len(self.mdiffs) > 1: + return None + + if not self.mdiffs[0] % 3 == 0: + return None + + pos_check = self.month_position_check() + return {'cs': 'QS', 'bs': 'BQS', + 'ce': 'Q', 'be': 'BQ'}.get(pos_check) + + def _get_monthly_rule(self): + if len(self.mdiffs) > 1: + return None + pos_check = self.month_position_check() + return {'cs': 'MS', 'bs': 'BMS', + 'ce': 'M', 'be': 'BM'}.get(pos_check) + + def _is_business_daily(self): + # quick check: cannot be business daily + if self.day_deltas != [1, 3]: + return False + + # probably business daily, but need to confirm + first_weekday = self.index[0].weekday() + shifts = np.diff(self.index.asi8) + shifts = np.floor_divide(shifts, _ONE_DAY) + weekdays = np.mod(first_weekday + np.cumsum(shifts), 7) + return np.all(((weekdays == 0) & (shifts == 3)) | + ((weekdays > 0) & (weekdays <= 4) & (shifts == 1))) + + def _get_wom_rule(self): + # wdiffs = unique(np.diff(self.index.week)) + # We also need -47, -49, -48 to catch index spanning year boundary + # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all(): + # return None + + weekdays = unique(self.index.weekday) + if len(weekdays) > 1: + return None + + week_of_months = unique((self.index.day - 1) // 7) + # Only attempt to infer up to WOM-4. See #9425 + week_of_months = week_of_months[week_of_months < 4] + if len(week_of_months) == 0 or len(week_of_months) > 1: + return None + + # get which week + week = week_of_months[0] + 1 + wd = _weekday_rule_aliases[weekdays[0]] + + return 'WOM-{week}{weekday}'.format(week=week, weekday=wd) + + +class _TimedeltaFrequencyInferer(_FrequencyInferer): + + def _infer_daily_rule(self): + if self.is_unique: + days = self.deltas[0] / _ONE_DAY + if days % 7 == 0: + # Weekly + wd = _weekday_rule_aliases[self.rep_stamp.weekday()] + alias = 'W-{weekday}'.format(weekday=wd) + return _maybe_add_count(alias, days / 7) + else: + return _maybe_add_count('D', days) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 869ff5ee77bda9..aba213122ea31c 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -26,7 +26,8 @@ from util cimport (is_timedelta64_object, is_datetime64_object, is_integer_object, is_float_object, is_string_object) -from np_datetime cimport cmp_scalar, reverse_ops +from np_datetime cimport (cmp_scalar, reverse_ops, td64_to_tdstruct, + pandas_timedeltastruct) from nattype import nat_strings, NaT from nattype cimport _checknull_with_nat @@ -584,65 +585,26 @@ cdef class _Timedelta(timedelta): """ compute the components """ - cdef int64_t sfrac, ifrac, frac, ivalue = self.value - if self.is_populated: return - # put frac in seconds - frac = ivalue / (1000 * 1000 * 1000) - if frac < 0: - self._sign = -1 + cdef: + pandas_timedeltastruct tds - # even fraction - if (-frac % 86400) != 0: - self._d = -frac / 86400 + 1 - frac += 86400 * self._d - else: - frac = -frac + td64_to_tdstruct(self.value, &tds) + self._d = tds.days + if self._d < 0: + self._sign = -1 else: self._sign = 1 - self._d = 0 - - if frac >= 86400: - self._d += frac / 86400 - frac -= self._d * 86400 - - if frac >= 3600: - self._h = frac / 3600 - frac -= self._h * 3600 - else: - self._h = 0 - - if frac >= 60: - self._m = frac / 60 - frac -= self._m * 60 - else: - self._m = 0 - - if frac >= 0: - self._s = frac - frac -= self._s - else: - self._s = 0 - - sfrac = (self._h * 3600 + self._m * 60 - + self._s) * (1000 * 1000 * 1000) - if self._sign < 0: - ifrac = ivalue + self._d * DAY_NS - sfrac - else: - ifrac = ivalue - (self._d * DAY_NS + sfrac) - - if ifrac != 0: - self._ms = ifrac / (1000 * 1000) - ifrac -= self._ms * 1000 * 1000 - self._us = ifrac / 1000 - ifrac -= self._us * 1000 - self._ns = ifrac - else: - self._ms = 0 - self._us = 0 - self._ns = 0 + self._h = tds.hrs + self._m = tds.min + self._s = tds.sec + self._ms = tds.ms + self._us = tds.us + self._ns = tds.ns + self._seconds = tds.seconds + self._microseconds = tds.microseconds self.is_populated = 1 @@ -671,10 +633,6 @@ cdef class _Timedelta(timedelta): def components(self): """ Return a Components NamedTuple-like """ self._ensure_components() - if self._sign < 0: - return Components(-self._d, self._h, self._m, self._s, - self._ms, self._us, self._ns) - # return the named tuple return Components(self._d, self._h, self._m, self._s, self._ms, self._us, self._ns) @@ -717,8 +675,6 @@ cdef class _Timedelta(timedelta): .components will return the shown components """ self._ensure_components() - if self._sign < 0: - return -1 * self._d return self._d @property @@ -729,7 +685,7 @@ cdef class _Timedelta(timedelta): .components will return the shown components """ self._ensure_components() - return self._h * 3600 + self._m * 60 + self._s + return self._seconds @property def microseconds(self): @@ -739,7 +695,7 @@ cdef class _Timedelta(timedelta): .components will return the shown components """ self._ensure_components() - return self._ms * 1000 + self._us + return self._microseconds @property def nanoseconds(self): @@ -778,9 +734,9 @@ cdef class _Timedelta(timedelta): if format == 'all': seconds_pretty = "%02d.%03d%03d%03d" % ( self._s, self._ms, self._us, self._ns) - return "%s%d days%s%02d:%02d:%s" % (sign_pretty, self._d, - sign2_pretty, self._h, - self._m, seconds_pretty) + return "%d days%s%02d:%02d:%s" % (self._d, + sign2_pretty, self._h, + self._m, seconds_pretty) # by default not showing nano if self._ms or self._us or self._ns: @@ -794,7 +750,7 @@ cdef class _Timedelta(timedelta): if format == 'even_day': if not subs: - return "%s%d days" % (sign_pretty, self._d) + return "%d days" % (self._d) elif format == 'sub_day': if not self._d: @@ -806,10 +762,10 @@ cdef class _Timedelta(timedelta): self._h, self._m, seconds_pretty) if subs or format=='long': - return "%s%d days%s%02d:%02d:%s" % (sign_pretty, self._d, - sign2_pretty, self._h, - self._m, seconds_pretty) - return "%s%d days" % (sign_pretty, self._d) + return "%d days%s%02d:%02d:%s" % (self._d, + sign2_pretty, self._h, + self._m, seconds_pretty) + return "%d days" % (self._d) def __repr__(self): return "Timedelta('{0}')".format(self._repr_base(format='long')) diff --git a/pandas/_version.py b/pandas/_version.py index 4695b512feff5f..0fdb0efde1f055 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -75,7 +75,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): if e.errno == errno.ENOENT: continue if verbose: - print("unable to run %s" % dispcmd) + print("unable to run {dispcmd}".format(dispcmd=dispcmd)) print(e) return None else: @@ -87,7 +87,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False): stdout = stdout.decode() if p.returncode != 0: if verbose: - print("unable to run %s (error)" % dispcmd) + print("unable to run {dispcmd} (error)".format(dispcmd=dispcmd)) return None return stdout @@ -98,8 +98,10 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): dirname = os.path.basename(root) if not dirname.startswith(parentdir_prefix): if verbose: - print("guessing rootdir is '%s', but '%s' doesn't start with " - "prefix '%s'" % (root, dirname, parentdir_prefix)) + print("guessing rootdir is '{root}', but '{dirname}' " + "doesn't start with prefix '{parentdir_prefix}'".format( + root=root, dirname=dirname, + parentdir_prefix=parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, @@ -154,15 +156,15 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # "stabilization", as well as "HEAD" and "master". tags = set([r for r in refs if re.search(r'\d', r)]) if verbose: - print("discarding '%s', no digits" % ",".join(refs - tags)) + print("discarding '{}', no digits".format(",".join(refs - tags))) if verbose: - print("likely tags: %s" % ",".join(sorted(tags))) + print("likely tags: {}".format(",".join(sorted(tags)))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] if verbose: - print("picking %s" % r) + print("picking {r}".format(r=r)) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None @@ -184,7 +186,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if not os.path.exists(os.path.join(root, ".git")): if verbose: - print("no .git in %s" % root) + print("no .git in {root}".format(root=root)) raise NotThisMethod("no .git directory") GITS = ["git"] @@ -226,18 +228,21 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) + pieces["error"] = ("unable to parse git-describe output: " + "'{describe_out}'".format( + describe_out=describe_out)) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: - fmt = "tag '%s' doesn't start with prefix '%s'" - print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) + fmt = "tag '{full_tag}' doesn't start with prefix " \ + "'{tag_prefix}'" + print(fmt.format(full_tag=full_tag, tag_prefix=tag_prefix)) + pieces["error"] = ("tag '{full_tag}' doesn't start with " + "prefix '{tag_prefix}'".format( + full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] @@ -275,13 +280,13 @@ def render_pep440(pieces): rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) - rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) + rendered += "{:d}.g{}".format(pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered = "0+untagged.{:d}.g{}".format(pieces["distance"], + pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -315,17 +320,17 @@ def render_pep440_post(pieces): if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: - rendered += ".post%d" % pieces["distance"] + rendered += ".post{:d}".format(pieces["distance"]) if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) - rendered += "g%s" % pieces["short"] + rendered += "g{}".format(pieces["short"]) else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" - rendered += "+g%s" % pieces["short"] + rendered += "+g{}".format(pieces["short"]) return rendered @@ -359,7 +364,7 @@ def render_git_describe(pieces): if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + rendered += "-{:d}-g{}".format(pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] @@ -377,7 +382,7 @@ def render_git_describe_long(pieces): if pieces["closest-tag"]: rendered = pieces["closest-tag"] - rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) + rendered += "-{:d}-g{}".format(pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] @@ -409,7 +414,7 @@ def render(pieces, style): elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: - raise ValueError("unknown style '%s'" % style) + raise ValueError("unknown style '{style}'".format(style=style)) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None} diff --git a/pandas/conftest.py b/pandas/conftest.py index 90e5ac864e96f4..b9d0087b503068 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1,8 +1,10 @@ import pytest +from distutils.version import LooseVersion import numpy import pandas import pandas.util.testing as tm +import dateutil def pytest_addoption(parser): @@ -65,3 +67,11 @@ def ip(): pytest.importorskip('IPython', minversion="6.0.0") from IPython.core.interactiveshell import InteractiveShell return InteractiveShell() + + +is_dateutil_le_261 = pytest.mark.skipif( + LooseVersion(dateutil.__version__) > '2.6.1', + reason="dateutil api change version") +is_dateutil_gt_261 = pytest.mark.skipif( + LooseVersion(dateutil.__version__) <= '2.6.1', + reason="dateutil stable version") diff --git a/pandas/core/api.py b/pandas/core/api.py index 2f818a400162b3..1f46aaa40e9eb9 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -24,8 +24,8 @@ from pandas.core.panel import Panel, WidePanel from pandas.core.panel4d import Panel4D from pandas.core.reshape.reshape import ( - pivot_simple as pivot, get_dummies, - lreshape, wide_to_long) + pivot_simple as pivot, get_dummies) +from pandas.core.reshape.melt import lreshape, wide_to_long from pandas.core.indexing import IndexSlice from pandas.core.tools.numeric import to_numeric diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 13ea0eaf649303..645921bb007a16 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -25,7 +25,6 @@ is_timedelta64_dtype, is_categorical, is_categorical_dtype, - is_integer_dtype, is_list_like, is_sequence, is_scalar, is_dict_like) @@ -261,6 +260,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, # c.) infer from values if dtype is not None: + # The dtype argument takes precedence over values.dtype (if any) if isinstance(dtype, compat.string_types): if dtype == 'category': dtype = CategoricalDtype(categories, ordered) @@ -275,9 +275,12 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, ordered = dtype.ordered elif is_categorical(values): + # If no "dtype" was passed, use the one from "values", but honor + # the "ordered" and "categories" arguments dtype = values.dtype._from_categorical_dtype(values.dtype, categories, ordered) else: + # If dtype=None and values is not categorical, create a new dtype dtype = CategoricalDtype(categories, ordered) # At this point, dtype is always a CategoricalDtype @@ -294,28 +297,12 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, # sanitize input if is_categorical_dtype(values): + if dtype.categories is None: + dtype = CategoricalDtype(values.categories, dtype.ordered) - # we are either a Series or a CategoricalIndex - if isinstance(values, (ABCSeries, ABCCategoricalIndex)): - values = values._values - - if ordered is None: - ordered = values.ordered - if categories is None: - categories = values.categories - values = values.get_values() - - elif isinstance(values, (ABCIndexClass, ABCSeries)): - # we'll do inference later - pass - - else: - - # on numpy < 1.6 datetimelike get inferred to all i8 by - # _sanitize_array which is fine, but since factorize does this - # correctly no need here this is an issue because _sanitize_array - # also coerces np.nan to a string under certain versions of numpy - # as well + elif not isinstance(values, (ABCIndexClass, ABCSeries)): + # _sanitize_array coerces np.nan to a string under certain versions + # of numpy values = maybe_infer_to_datetimelike(values, convert_dates=True) if not isinstance(values, np.ndarray): values = _convert_to_list_like(values) @@ -335,7 +322,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, codes, categories = factorize(values, sort=True) except TypeError: codes, categories = factorize(values, sort=False) - if ordered: + if dtype.ordered: # raise, as we don't have a sortable data structure and so # the user should give us one by specifying categories raise TypeError("'values' is not ordered, please " @@ -347,34 +334,18 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, raise NotImplementedError("> 1 ndim Categorical are not " "supported at this time") - if dtype.categories is None: - # we're inferring from values - dtype = CategoricalDtype(categories, ordered) + # we're inferring from values + dtype = CategoricalDtype(categories, dtype.ordered) - else: - # there were two ways if categories are present - # - the old one, where each value is a int pointer to the levels - # array -> not anymore possible, but code outside of pandas could - # call us like that, so make some checks - # - the new one, where each value is also in the categories array - # (or np.nan) + elif is_categorical_dtype(values): + old_codes = (values.cat.codes if isinstance(values, ABCSeries) + else values.codes) + codes = _recode_for_categories(old_codes, values.dtype.categories, + dtype.categories) + else: codes = _get_codes_for_values(values, dtype.categories) - # TODO: check for old style usage. These warnings should be removes - # after 0.18/ in 2016 - if (is_integer_dtype(values) and - not is_integer_dtype(dtype.categories)): - warn("Values and categories have different dtypes. Did you " - "mean to use\n'Categorical.from_codes(codes, " - "categories)'?", RuntimeWarning, stacklevel=2) - - if (len(values) and is_integer_dtype(values) and - (codes == -1).all()): - warn("None of the categories were found in values. Did you " - "mean to use\n'Categorical.from_codes(codes, " - "categories)'?", RuntimeWarning, stacklevel=2) - if null_mask.any(): # Reinsert -1 placeholders for previously removed missing values full_codes = - np.ones(null_mask.shape, dtype=codes.dtype) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 196f4b26795768..f44fa347cb053b 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -3,6 +3,7 @@ """Top level ``eval`` module. """ +import warnings import tokenize from pandas.io.formats.printing import pprint_thing from pandas.core.computation.scope import _ensure_scope @@ -303,7 +304,8 @@ def eval(expr, parser='pandas', engine=None, truediv=True, "if there is no assignment") # assign if needed - if env.target is not None and parsed_expr.assigner is not None: + assigner = parsed_expr.assigner + if env.target is not None and assigner is not None: target_modified = True # if returning a copy, copy only on the first assignment @@ -317,22 +319,25 @@ def eval(expr, parser='pandas', engine=None, truediv=True, # TypeError is most commonly raised (e.g. int, list), but you # get IndexError if you try to do this assignment on np.ndarray. + # we will ignore numpy warnings here; e.g. if trying + # to use a non-numeric indexer try: - target[parsed_expr.assigner] = ret + with warnings.catch_warnings(record=True): + target[assigner] = ret except (TypeError, IndexError): raise ValueError("Cannot assign expression output to target") if not resolvers: - resolvers = ({parsed_expr.assigner: ret},) + resolvers = ({assigner: ret},) else: # existing resolver needs updated to handle # case of mutating existing column in copy for resolver in resolvers: - if parsed_expr.assigner in resolver: - resolver[parsed_expr.assigner] = ret + if assigner in resolver: + resolver[assigner] = ret break else: - resolvers += ({parsed_expr.assigner: ret},) + resolvers += ({assigner: ret},) ret = None first_expr = False diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index ae956bce113294..23abfa8b3fca14 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -307,7 +307,14 @@ def __init__(self, env, engine, parser, preparser=_preparse): def visit(self, node, **kwargs): if isinstance(node, string_types): clean = self.preparser(node) - node = ast.fix_missing_locations(ast.parse(clean)) + try: + node = ast.fix_missing_locations(ast.parse(clean)) + except SyntaxError as e: + from keyword import iskeyword + if any(iskeyword(x) for x in clean.split()): + e.msg = ("Python keyword not valid identifier" + " in numexpr query") + raise e method = 'visit_' + node.__class__.__name__ visitor = getattr(self, method) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f3b11e52cdd7ad..eae283e9bc00da 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -136,7 +136,7 @@ def trans(x): # noqa try: if np.allclose(new_result, result, rtol=0): return new_result - except: + except Exception: # comparison of an object dtype with a number type could # hit here @@ -151,14 +151,14 @@ def trans(x): # noqa elif dtype.kind in ['M', 'm'] and result.dtype.kind in ['i', 'f']: try: result = result.astype(dtype) - except: + except Exception: if dtype.tz: # convert to datetime and change timezone from pandas import to_datetime result = to_datetime(result).tz_localize('utc') result = result.tz_convert(dtype.tz) - except: + except Exception: pass return result @@ -210,7 +210,7 @@ def changeit(): new_result[mask] = om_at result[:] = new_result return result, False - except: + except Exception: pass # we are forced to change the dtype of the result as the input @@ -243,7 +243,7 @@ def changeit(): try: np.place(result, mask, other) - except: + except Exception: return changeit() return result, False @@ -274,14 +274,14 @@ def maybe_promote(dtype, fill_value=np.nan): if issubclass(dtype.type, np.datetime64): try: fill_value = tslib.Timestamp(fill_value).value - except: + except Exception: # the proper thing to do here would probably be to upcast # to object (but numpy 1.6.1 doesn't do this properly) fill_value = iNaT elif issubclass(dtype.type, np.timedelta64): try: fill_value = lib.Timedelta(fill_value).value - except: + except Exception: # as for datetimes, cannot upcast to object fill_value = iNaT else: @@ -592,12 +592,12 @@ def maybe_convert_scalar(values): def coerce_indexer_dtype(indexer, categories): """ coerce the indexer input array to the smallest dtype possible """ - l = len(categories) - if l < _int8_max: + length = len(categories) + if length < _int8_max: return _ensure_int8(indexer) - elif l < _int16_max: + elif length < _int16_max: return _ensure_int16(indexer) - elif l < _int32_max: + elif length < _int32_max: return _ensure_int32(indexer) return _ensure_int64(indexer) @@ -629,7 +629,7 @@ def conv(r, dtype): r = float(r) elif dtype.kind == 'i': r = int(r) - except: + except Exception: pass return r @@ -756,7 +756,7 @@ def maybe_convert_objects(values, convert_dates=True, convert_numeric=True, if not isna(new_values).all(): values = new_values - except: + except Exception: pass else: # soft-conversion @@ -817,7 +817,7 @@ def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, # If all NaNs, then do not-alter values = converted if not isna(converted).all() else values values = values.copy() if copy else values - except: + except Exception: pass return values @@ -888,10 +888,10 @@ def try_datetime(v): try: from pandas import to_datetime return to_datetime(v) - except: + except Exception: pass - except: + except Exception: pass return v.reshape(shape) @@ -903,7 +903,7 @@ def try_timedelta(v): from pandas import to_timedelta try: return to_timedelta(v)._values.reshape(shape) - except: + except Exception: return v.reshape(shape) inferred_type = lib.infer_datetimelike_array(_ensure_object(v)) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 4e15aa50e43194..23884869a4d9f6 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -571,12 +571,14 @@ def _concat_rangeindex_same_dtype(indexes): indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6) indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) """ + from pandas import Int64Index, RangeIndex start = step = next = None - for obj in indexes: - if not len(obj): - continue + # Filter the empty indexes + non_empty_indexes = [obj for obj in indexes if len(obj)] + + for obj in non_empty_indexes: if start is None: # This is set by the first non-empty index @@ -586,21 +588,23 @@ def _concat_rangeindex_same_dtype(indexes): elif step is None: # First non-empty index had only one element if obj._start == start: - from pandas import Int64Index return _concat_index_same_dtype(indexes, klass=Int64Index) step = obj._start - start non_consecutive = ((step != obj._step and len(obj) > 1) or (next is not None and obj._start != next)) if non_consecutive: - from pandas import Int64Index return _concat_index_same_dtype(indexes, klass=Int64Index) if step is not None: next = obj[-1] + step - if start is None: - start = obj._start - step = obj._step - stop = obj._stop if next is None else next - return indexes[0].__class__(start, stop, step) + if non_empty_indexes: + # Get the stop value from "next" or alternatively + # from the last non-empty index + stop = non_empty_indexes[-1]._stop if next is None else next + return RangeIndex(start, stop, step) + + # Here all "indexes" had 0 length, i.e. were empty. + # In this case return an empty range index. + return RangeIndex(0, 0) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 70f1ff0a5380dc..982b27fd21fb55 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2267,7 +2267,8 @@ def query(self, expr, inplace=False, **kwargs): by default, which allows you to treat both the index and columns of the frame as a column in the frame. The identifier ``index`` is used for the frame index; you can also - use the name of the index to identify it in a query. + use the name of the index to identify it in a query. Please note that + Python keywords may not be used as identifiers. For further details and examples see the ``query`` documentation in :ref:`indexing `. @@ -4028,6 +4029,8 @@ def combine(self, other, func, fill_value=None, overwrite=True): ---------- other : DataFrame func : function + Function that takes two series as inputs and return a Series or a + scalar fill_value : scalar value overwrite : boolean, default True If True then overwrite values for common keys in the calling frame @@ -4035,8 +4038,21 @@ def combine(self, other, func, fill_value=None, overwrite=True): Returns ------- result : DataFrame - """ + Examples + -------- + >>> df1 = DataFrame({'A': [0, 0], 'B': [4, 4]}) + >>> df2 = DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1.combine(df2, lambda s1, s2: s1 if s1.sum() < s2.sum() else s2) + A B + 0 0 3 + 1 0 3 + + See Also + -------- + DataFrame.combine_first : Combine two DataFrame objects and default to + non-null values in frame calling the method + """ other_idxlen = len(other.index) # save for compare this, other = self.align(other, copy=False) @@ -4124,16 +4140,24 @@ def combine_first(self, other): ---------- other : DataFrame + Returns + ------- + combined : DataFrame + Examples -------- - a's values prioritized, use values from b to fill holes: - - >>> a.combine_first(b) + df1's values prioritized, use values from df2 to fill holes: + >>> df1 = pd.DataFrame([[1, np.nan]]) + >>> df2 = pd.DataFrame([[3, 4]]) + >>> df1.combine_first(df2) + 0 1 + 0 1 4.0 - Returns - ------- - combined : DataFrame + See Also + -------- + DataFrame.combine : Perform series-wise operation on two DataFrames + using a given function """ import pandas.core.computation.expressions as expressions @@ -4637,7 +4661,7 @@ def unstack(self, level=-1, fill_value=None): other='melt')) def melt(self, id_vars=None, value_vars=None, var_name=None, value_name='value', col_level=None): - from pandas.core.reshape.reshape import melt + from pandas.core.reshape.melt import melt return melt(self, id_vars=id_vars, value_vars=value_vars, var_name=var_name, value_name=value_name, col_level=col_level) @@ -5781,7 +5805,12 @@ def idxmin(self, axis=0, skipna=True): 0 or 'index' for row-wise, 1 or 'columns' for column-wise skipna : boolean, default True Exclude NA/null values. If an entire row/column is NA, the result - will be NA + will be NA. + + Raises + ------ + ValueError + * If the row/column is empty Returns ------- @@ -5812,7 +5841,12 @@ def idxmax(self, axis=0, skipna=True): 0 or 'index' for row-wise, 1 or 'columns' for column-wise skipna : boolean, default True Exclude NA/null values. If an entire row/column is NA, the result - will be first index. + will be NA. + + Raises + ------ + ValueError + * If the row/column is empty Returns ------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f1edfe276dfad8..8b2a15e6d16668 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5092,14 +5092,15 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, Parameters ---------- - by : mapping, function, str, or iterable + by : mapping, function, label, or list of labels Used to determine the groups for the groupby. If ``by`` is a function, it's called on each value of the object's index. If a dict or Series is passed, the Series or dict VALUES will be used to determine the groups (the Series' values are first aligned; see ``.align()`` method). If an ndarray is passed, the - values are used as-is determine the groups. A str or list of strs - may be passed to group by the columns in ``self`` + values are used as-is determine the groups. A label or list of + labels may be passed to group by the columns in ``self``. Notice + that a tuple is interpreted a (single) key. axis : int, default 0 level : int, level name, or sequence of such, default None If the axis is a MultiIndex (hierarchical), group by a particular diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 8db75accc84e52..7a58b7d358fbb4 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1913,7 +1913,10 @@ def size(self): """ ids, _, ngroup = self.group_info ids = _ensure_platform_int(ids) - out = np.bincount(ids[ids != -1], minlength=ngroup or None) + if ngroup: + out = np.bincount(ids[ids != -1], minlength=ngroup) + else: + out = ids return Series(out, index=self.result_index, dtype='int64') @@ -2704,7 +2707,6 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, """ group_axis = obj._get_axis(axis) - is_axis_multiindex = isinstance(obj._info_axis, MultiIndex) # validate that the passed single level is compatible with the passed # axis of the object @@ -2765,9 +2767,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, elif isinstance(key, BaseGrouper): return key, [], obj - # when MultiIndex, allow tuple to be a key - if not isinstance(key, (tuple, list)) or \ - (isinstance(key, tuple) and is_axis_multiindex): + # Everything which is not a list is a key (including tuples): + if not isinstance(key, list): keys = [key] match_axis_length = False else: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 57d2d07294a53c..eb96cbad70099a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2032,7 +2032,7 @@ def equals(self, other): try: return array_equivalent(_values_from_object(self), _values_from_object(other)) - except: + except Exception: return False def identical(self, other): @@ -2315,7 +2315,7 @@ def intersection(self, other): try: indexer = Index(other._values).get_indexer(self._values) indexer = indexer.take((indexer != -1).nonzero()[0]) - except: + except Exception: # duplicates indexer = algos.unique1d( Index(other._values).get_indexer_non_unique(self._values)[0]) @@ -3022,13 +3022,13 @@ def _reindex_non_unique(self, target): new_indexer = None if len(missing): - l = np.arange(len(indexer)) + length = np.arange(len(indexer)) missing = _ensure_platform_int(missing) missing_labels = target.take(missing) - missing_indexer = _ensure_int64(l[~check]) + missing_indexer = _ensure_int64(length[~check]) cur_labels = self.take(indexer[check]).values - cur_indexer = _ensure_int64(l[check]) + cur_indexer = _ensure_int64(length[check]) new_labels = np.empty(tuple([len(indexer)]), dtype=object) new_labels[cur_indexer] = cur_labels diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 78869de318dce9..2e022cb1510085 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -55,8 +55,7 @@ from pandas._libs import (lib, index as libindex, tslib as libts, algos as libalgos, join as libjoin, Timestamp, period as libperiod) -from pandas._libs.tslibs import timezones - +from pandas._libs.tslibs import timezones, conversion # -------- some conversion wrapper functions @@ -384,8 +383,8 @@ def __new__(cls, data=None, getattr(data, 'tz', None) is None): # Convert tz-naive to UTC ints = subarr.view('i8') - subarr = libts.tz_localize_to_utc(ints, tz, - ambiguous=ambiguous) + subarr = conversion.tz_localize_to_utc(ints, tz, + ambiguous=ambiguous) subarr = subarr.view(_NS_DTYPE) subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz) @@ -449,7 +448,7 @@ def _generate(cls, start, end, periods, name, offset, try: inferred_tz = timezones.infer_tzinfo(start, end) - except: + except Exception: raise TypeError('Start and end cannot both be tz-aware with ' 'different timezones') @@ -531,8 +530,8 @@ def _generate(cls, start, end, periods, name, offset, index = _generate_regular_range(start, end, periods, offset) if tz is not None and getattr(index, 'tz', None) is None: - index = libts.tz_localize_to_utc(_ensure_int64(index), tz, - ambiguous=ambiguous) + index = conversion.tz_localize_to_utc(_ensure_int64(index), tz, + ambiguous=ambiguous) index = index.view(_NS_DTYPE) # index is localized datetime64 array -> have to convert @@ -561,11 +560,11 @@ def _convert_for_op(self, value): def _local_timestamps(self): if self.is_monotonic: - return libts.tz_convert(self.asi8, utc, self.tz) + return conversion.tz_convert(self.asi8, utc, self.tz) else: values = self.asi8 indexer = values.argsort() - result = libts.tz_convert(values.take(indexer), utc, self.tz) + result = conversion.tz_convert(values.take(indexer), utc, self.tz) n = len(indexer) reverse = np.empty(n, dtype=np.int_) @@ -1176,12 +1175,12 @@ def __iter__(self): # convert in chunks of 10k for efficiency data = self.asi8 - l = len(self) + length = len(self) chunksize = 10000 - chunks = int(l / chunksize) + 1 + chunks = int(length / chunksize) + 1 for i in range(chunks): start_i = i * chunksize - end_i = min((i + 1) * chunksize, l) + end_i = min((i + 1) * chunksize, length) converted = libts.ints_to_pydatetime(data[start_i:end_i], tz=self.tz, freq=self.freq, box=True) @@ -1644,7 +1643,7 @@ def normalize(self): ------- normalized : DatetimeIndex """ - new_values = libts.date_normalize(self.asi8, self.tz) + new_values = conversion.date_normalize(self.asi8, self.tz) return DatetimeIndex(new_values, freq='infer', name=self.name, tz=self.tz) @@ -1683,7 +1682,7 @@ def is_normalized(self): """ Returns True if all of the dates are at midnight ("no time") """ - return libts.dates_normalized(self.asi8, self.tz) + return conversion.is_date_array_normalized(self.asi8, self.tz) @cache_readonly def _resolution(self): @@ -1724,7 +1723,7 @@ def insert(self, loc, item): new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8)) if self.tz is not None: - new_dates = libts.tz_convert(new_dates, 'UTC', self.tz) + new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz) return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz) @@ -1764,7 +1763,7 @@ def delete(self, loc): freq = self.freq if self.tz is not None: - new_dates = libts.tz_convert(new_dates, 'UTC', self.tz) + new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz) return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz) def tz_convert(self, tz): @@ -1844,16 +1843,16 @@ def tz_localize(self, tz, ambiguous='raise', errors='raise'): """ if self.tz is not None: if tz is None: - new_dates = libts.tz_convert(self.asi8, 'UTC', self.tz) + new_dates = conversion.tz_convert(self.asi8, 'UTC', self.tz) else: raise TypeError("Already tz-aware, use tz_convert to convert.") else: tz = timezones.maybe_get_tz(tz) # Convert to UTC - new_dates = libts.tz_localize_to_utc(self.asi8, tz, - ambiguous=ambiguous, - errors=errors) + new_dates = conversion.tz_localize_to_utc(self.asi8, tz, + ambiguous=ambiguous, + errors=errors) new_dates = new_dates.view(_NS_DTYPE) return self._shallow_copy(new_dates, tz=tz) @@ -2194,7 +2193,7 @@ def _to_m8(key, tz=None): # this also converts strings key = Timestamp(key, tz=tz) - return np.int64(libts.pydt_to_i8(key)).view(_NS_DTYPE) + return np.int64(conversion.pydt_to_i8(key)).view(_NS_DTYPE) _CACHE_START = Timestamp(datetime(1950, 1, 1)) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 4cc59f52970589..e6294f7d47aff2 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2,7 +2,6 @@ # pylint: disable=E1101,E1103,W0232 import datetime import warnings -from functools import partial from sys import getsizeof import numpy as np @@ -28,8 +27,7 @@ is_true_slices) import pandas.core.base as base -from pandas.util._decorators import (Appender, cache_readonly, - deprecate, deprecate_kwarg) +from pandas.util._decorators import Appender, cache_readonly, deprecate_kwarg import pandas.core.common as com import pandas.core.missing as missing import pandas.core.algorithms as algos @@ -177,7 +175,8 @@ def _verify_integrity(self, labels=None, levels=None): " inconsistent state" % (i, label.max(), len(level))) - def _get_levels(self): + @property + def levels(self): return self._levels def _set_levels(self, levels, level=None, copy=False, validate=True, @@ -279,14 +278,8 @@ def set_levels(self, levels, level=None, inplace=False, if not inplace: return idx - # remove me in 0.14 and change to read only property - __set_levels = deprecate("setting `levels` directly", - partial(set_levels, inplace=True, - verify_integrity=True), - alt_name="set_levels") - levels = property(fget=_get_levels, fset=__set_levels) - - def _get_labels(self): + @property + def labels(self): return self._labels def _set_labels(self, labels, level=None, copy=False, validate=True, @@ -379,13 +372,6 @@ def set_labels(self, labels, level=None, inplace=False, if not inplace: return idx - # remove me in 0.14 and change to readonly property - __set_labels = deprecate("setting labels directly", - partial(set_labels, inplace=True, - verify_integrity=True), - alt_name="set_labels") - labels = property(fget=_get_labels, fset=__set_labels) - def copy(self, names=None, dtype=None, levels=None, labels=None, deep=False, _set_identity=False, **kwargs): """ @@ -446,6 +432,17 @@ def _shallow_copy_with_infer(self, values=None, **kwargs): **kwargs) return self._shallow_copy(values, **kwargs) + @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs) + def __contains__(self, key): + hash(key) + try: + self.get_loc(key) + return True + except (LookupError, TypeError): + return False + + contains = __contains__ + @Appender(_index_shared_docs['_shallow_copy']) def _shallow_copy(self, values=None, **kwargs): if values is not None: @@ -809,9 +806,10 @@ def duplicated(self, keep='first'): return duplicated_int64(ids, keep) - @Appender(ibase._index_shared_docs['fillna']) def fillna(self, value=None, downcast=None): - # isna is not implemented for MultiIndex + """ + fillna is not implemented for MultiIndex + """ raise NotImplementedError('isna is not defined for MultiIndex') @Appender(_index_shared_docs['dropna']) @@ -1370,17 +1368,6 @@ def nlevels(self): def levshape(self): return tuple(len(x) for x in self.levels) - @Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs) - def __contains__(self, key): - hash(key) - try: - self.get_loc(key) - return True - except LookupError: - return False - - contains = __contains__ - def __reduce__(self): """Necessary for making this object picklable""" d = dict(levels=[lev for lev in self.levels], diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index a6d5690767c10b..df242e657c9d7b 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -36,6 +36,7 @@ get_period_field_arr, _validate_end_alias, _quarter_to_myear) from pandas._libs.tslibs.fields import isleapyear_arr +from pandas._libs.tslibs import resolution from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds from pandas.core.base import _shared_docs @@ -752,8 +753,8 @@ def get_value(self, series, key): except (KeyError, IndexError): try: asdt, parsed, reso = parse_time_string(key, self.freq) - grp = frequencies.Resolution.get_freq_group(reso) - freqn = frequencies.get_freq_group(self.freq) + grp = resolution.Resolution.get_freq_group(reso) + freqn = resolution.get_freq_group(self.freq) vals = self._values @@ -912,8 +913,8 @@ def _get_string_slice(self, key): 'ordered time series') key, parsed, reso = parse_time_string(key, self.freq) - grp = frequencies.Resolution.get_freq_group(reso) - freqn = frequencies.get_freq_group(self.freq) + grp = resolution.Resolution.get_freq_group(reso) + freqn = resolution.get_freq_group(self.freq) if reso in ['day', 'hour', 'minute', 'second'] and not grp < freqn: raise KeyError(key) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index a4a5f7df9aa0f8..e4bc46fb7bdbed 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -35,20 +35,15 @@ from pandas._libs import (lib, index as libindex, tslib as libts, join as libjoin, Timedelta, NaT, iNaT) from pandas._libs.tslibs.timedeltas import array_to_timedelta64 +from pandas._libs.tslibs.fields import get_timedelta_field def _field_accessor(name, alias, docstring=None): def f(self): + values = self.asi8 + result = get_timedelta_field(values, alias) if self.hasnans: - result = np.empty(len(self), dtype='float64') - mask = self._isnan - imask = ~mask - result.flat[imask] = np.array([getattr(Timedelta(val), alias) - for val in self.asi8[imask]]) - result[mask] = np.nan - else: - result = np.array([getattr(Timedelta(val), alias) - for val in self.asi8], dtype='int64') + result = self._maybe_mask_results(result, convert='float64') return Index(result, name=self.name) @@ -841,7 +836,7 @@ def insert(self, loc, item): if _is_convertible_to_td(item): try: item = Timedelta(item) - except: + except Exception: pass freq = None diff --git a/pandas/core/reshape/api.py b/pandas/core/reshape/api.py index c75e0341918bb6..99286d807a2053 100644 --- a/pandas/core/reshape/api.py +++ b/pandas/core/reshape/api.py @@ -1,7 +1,7 @@ # flake8: noqa from pandas.core.reshape.concat import concat -from pandas.core.reshape.reshape import melt +from pandas.core.reshape.melt import melt from pandas.core.reshape.merge import ( merge, ordered_merge, merge_ordered, merge_asof) from pandas.core.reshape.pivot import pivot_table, crosstab diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py new file mode 100644 index 00000000000000..846d04221fe7f6 --- /dev/null +++ b/pandas/core/reshape/melt.py @@ -0,0 +1,386 @@ +# pylint: disable=E1101,E1103 +# pylint: disable=W0703,W0622,W0613,W0201 +import numpy as np + +from pandas.core.dtypes.common import is_list_like +from pandas import compat +from pandas.core.categorical import Categorical + +from pandas.core.frame import DataFrame +from pandas.core.index import MultiIndex + +from pandas.core.frame import _shared_docs +from pandas.util._decorators import Appender + +import re +import pandas.core.dtypes.concat as _concat +from pandas.core.dtypes.missing import notna + + +@Appender(_shared_docs['melt'] % + dict(caller='pd.melt(df, ', + versionadded="", + other='DataFrame.melt')) +def melt(frame, id_vars=None, value_vars=None, var_name=None, + value_name='value', col_level=None): + # TODO: what about the existing index? + if id_vars is not None: + if not is_list_like(id_vars): + id_vars = [id_vars] + elif (isinstance(frame.columns, MultiIndex) and + not isinstance(id_vars, list)): + raise ValueError('id_vars must be a list of tuples when columns' + ' are a MultiIndex') + else: + id_vars = list(id_vars) + else: + id_vars = [] + + if value_vars is not None: + if not is_list_like(value_vars): + value_vars = [value_vars] + elif (isinstance(frame.columns, MultiIndex) and + not isinstance(value_vars, list)): + raise ValueError('value_vars must be a list of tuples when' + ' columns are a MultiIndex') + else: + value_vars = list(value_vars) + frame = frame.loc[:, id_vars + value_vars] + else: + frame = frame.copy() + + if col_level is not None: # allow list or other? + # frame is a copy + frame.columns = frame.columns.get_level_values(col_level) + + if var_name is None: + if isinstance(frame.columns, MultiIndex): + if len(frame.columns.names) == len(set(frame.columns.names)): + var_name = frame.columns.names + else: + var_name = ['variable_{i}'.format(i=i) + for i in range(len(frame.columns.names))] + else: + var_name = [frame.columns.name if frame.columns.name is not None + else 'variable'] + if isinstance(var_name, compat.string_types): + var_name = [var_name] + + N, K = frame.shape + K -= len(id_vars) + + mdata = {} + for col in id_vars: + mdata[col] = np.tile(frame.pop(col).values, K) + + mcolumns = id_vars + var_name + [value_name] + + mdata[value_name] = frame.values.ravel('F') + for i, col in enumerate(var_name): + # asanyarray will keep the columns as an Index + mdata[col] = np.asanyarray(frame.columns + ._get_level_values(i)).repeat(N) + + return DataFrame(mdata, columns=mcolumns) + + +def lreshape(data, groups, dropna=True, label=None): + """ + Reshape long-format data to wide. Generalized inverse of DataFrame.pivot + + Parameters + ---------- + data : DataFrame + groups : dict + {new_name : list_of_columns} + dropna : boolean, default True + + Examples + -------- + >>> import pandas as pd + >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526], + ... 'team': ['Red Sox', 'Yankees'], + ... 'year1': [2007, 2007], 'year2': [2008, 2008]}) + >>> data + hr1 hr2 team year1 year2 + 0 514 545 Red Sox 2007 2008 + 1 573 526 Yankees 2007 2008 + + >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']}) + team year hr + 0 Red Sox 2007 514 + 1 Yankees 2007 573 + 2 Red Sox 2008 545 + 3 Yankees 2008 526 + + Returns + ------- + reshaped : DataFrame + """ + if isinstance(groups, dict): + keys = list(groups.keys()) + values = list(groups.values()) + else: + keys, values = zip(*groups) + + all_cols = list(set.union(*[set(x) for x in values])) + id_cols = list(data.columns.difference(all_cols)) + + K = len(values[0]) + + for seq in values: + if len(seq) != K: + raise ValueError('All column lists must be same length') + + mdata = {} + pivot_cols = [] + + for target, names in zip(keys, values): + to_concat = [data[col].values for col in names] + mdata[target] = _concat._concat_compat(to_concat) + pivot_cols.append(target) + + for col in id_cols: + mdata[col] = np.tile(data[col].values, K) + + if dropna: + mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool) + for c in pivot_cols: + mask &= notna(mdata[c]) + if not mask.all(): + mdata = dict((k, v[mask]) for k, v in compat.iteritems(mdata)) + + return DataFrame(mdata, columns=id_cols + pivot_cols) + + +def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'): + r""" + Wide panel to long format. Less flexible but more user-friendly than melt. + + With stubnames ['A', 'B'], this function expects to find one or more + group of columns with format Asuffix1, Asuffix2,..., Bsuffix1, Bsuffix2,... + You specify what you want to call this suffix in the resulting long format + with `j` (for example `j='year'`) + + Each row of these wide variables are assumed to be uniquely identified by + `i` (can be a single column name or a list of column names) + + All remaining variables in the data frame are left intact. + + Parameters + ---------- + df : DataFrame + The wide-format DataFrame + stubnames : str or list-like + The stub name(s). The wide format variables are assumed to + start with the stub names. + i : str or list-like + Column(s) to use as id variable(s) + j : str + The name of the subobservation variable. What you wish to name your + suffix in the long format. + sep : str, default "" + A character indicating the separation of the variable names + in the wide format, to be stripped from the names in the long format. + For example, if your column names are A-suffix1, A-suffix2, you + can strip the hypen by specifying `sep='-'` + + .. versionadded:: 0.20.0 + + suffix : str, default '\\d+' + A regular expression capturing the wanted suffixes. '\\d+' captures + numeric suffixes. Suffixes with no numbers could be specified with the + negated character class '\\D+'. You can also further disambiguate + suffixes, for example, if your wide variables are of the form + Aone, Btwo,.., and you have an unrelated column Arating, you can + ignore the last one by specifying `suffix='(!?one|two)'` + + .. versionadded:: 0.20.0 + + Returns + ------- + DataFrame + A DataFrame that contains each stub name as a variable, with new index + (i, j) + + Examples + -------- + >>> import pandas as pd + >>> import numpy as np + >>> np.random.seed(123) + >>> df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"}, + ... "A1980" : {0 : "d", 1 : "e", 2 : "f"}, + ... "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7}, + ... "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1}, + ... "X" : dict(zip(range(3), np.random.randn(3))) + ... }) + >>> df["id"] = df.index + >>> df + A1970 A1980 B1970 B1980 X id + 0 a d 2.5 3.2 -1.085631 0 + 1 b e 1.2 1.3 0.997345 1 + 2 c f 0.7 0.1 0.282978 2 + >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year") + ... # doctest: +NORMALIZE_WHITESPACE + X A B + id year + 0 1970 -1.085631 a 2.5 + 1 1970 0.997345 b 1.2 + 2 1970 0.282978 c 0.7 + 0 1980 -1.085631 d 3.2 + 1 1980 0.997345 e 1.3 + 2 1980 0.282978 f 0.1 + + With multuple id columns + + >>> df = pd.DataFrame({ + ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], + ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], + ... 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], + ... 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] + ... }) + >>> df + birth famid ht1 ht2 + 0 1 1 2.8 3.4 + 1 2 1 2.9 3.8 + 2 3 1 2.2 2.9 + 3 1 2 2.0 3.2 + 4 2 2 1.8 2.8 + 5 3 2 1.9 2.4 + 6 1 3 2.2 3.3 + 7 2 3 2.3 3.4 + 8 3 3 2.1 2.9 + >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age') + >>> l + ... # doctest: +NORMALIZE_WHITESPACE + ht + famid birth age + 1 1 1 2.8 + 2 3.4 + 2 1 2.9 + 2 3.8 + 3 1 2.2 + 2 2.9 + 2 1 1 2.0 + 2 3.2 + 2 1 1.8 + 2 2.8 + 3 1 1.9 + 2 2.4 + 3 1 1 2.2 + 2 3.3 + 2 1 2.3 + 2 3.4 + 3 1 2.1 + 2 2.9 + + Going from long back to wide just takes some creative use of `unstack` + + >>> w = l.reset_index().set_index(['famid', 'birth', 'age']).unstack() + >>> w.columns = pd.Index(w.columns).str.join('') + >>> w.reset_index() + famid birth ht1 ht2 + 0 1 1 2.8 3.4 + 1 1 2 2.9 3.8 + 2 1 3 2.2 2.9 + 3 2 1 2.0 3.2 + 4 2 2 1.8 2.8 + 5 2 3 1.9 2.4 + 6 3 1 2.2 3.3 + 7 3 2 2.3 3.4 + 8 3 3 2.1 2.9 + + Less wieldy column names are also handled + + >>> np.random.seed(0) + >>> df = pd.DataFrame({'A(quarterly)-2010': np.random.rand(3), + ... 'A(quarterly)-2011': np.random.rand(3), + ... 'B(quarterly)-2010': np.random.rand(3), + ... 'B(quarterly)-2011': np.random.rand(3), + ... 'X' : np.random.randint(3, size=3)}) + >>> df['id'] = df.index + >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS + A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 ... + 0 0.548814 0.544883 0.437587 ... + 1 0.715189 0.423655 0.891773 ... + 2 0.602763 0.645894 0.963663 ... + X id + 0 0 0 + 1 1 1 + 2 1 2 + + >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id', + ... j='year', sep='-') + ... # doctest: +NORMALIZE_WHITESPACE + X A(quarterly) B(quarterly) + id year + 0 2010 0 0.548814 0.437587 + 1 2010 1 0.715189 0.891773 + 2 2010 1 0.602763 0.963663 + 0 2011 0 0.544883 0.383442 + 1 2011 1 0.423655 0.791725 + 2 2011 1 0.645894 0.528895 + + If we have many columns, we could also use a regex to find our + stubnames and pass that list on to wide_to_long + + >>> stubnames = sorted( + ... set([match[0] for match in df.columns.str.findall( + ... r'[A-B]\(.*\)').values if match != [] ]) + ... ) + >>> list(stubnames) + ['A(quarterly)', 'B(quarterly)'] + + Notes + ----- + All extra variables are left untouched. This simply uses + `pandas.melt` under the hood, but is hard-coded to "do the right thing" + in a typicaly case. + """ + def get_var_names(df, stub, sep, suffix): + regex = "^{stub}{sep}{suffix}".format( + stub=re.escape(stub), sep=re.escape(sep), suffix=suffix) + return df.filter(regex=regex).columns.tolist() + + def melt_stub(df, stub, i, j, value_vars, sep): + newdf = melt(df, id_vars=i, value_vars=value_vars, + value_name=stub.rstrip(sep), var_name=j) + newdf[j] = Categorical(newdf[j]) + newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "") + + return newdf.set_index(i + [j]) + + if any(map(lambda s: s in df.columns.tolist(), stubnames)): + raise ValueError("stubname can't be identical to a column name") + + if not is_list_like(stubnames): + stubnames = [stubnames] + else: + stubnames = list(stubnames) + + if not is_list_like(i): + i = [i] + else: + i = list(i) + + if df[i].duplicated().any(): + raise ValueError("the id variables need to uniquely identify each row") + + value_vars = list(map(lambda stub: + get_var_names(df, stub, sep, suffix), stubnames)) + + value_vars_flattened = [e for sublist in value_vars for e in sublist] + id_vars = list(set(df.columns.tolist()).difference(value_vars_flattened)) + + melted = [] + for s, v in zip(stubnames, value_vars): + melted.append(melt_stub(df, s, i, j, v, sep)) + melted = melted[0].join(melted[1:], how='outer') + + if len(i) == 1: + new = df[id_vars].set_index(i).join(melted) + return new + + new = df[id_vars].merge(melted.reset_index(), on=i).set_index(i + [j]) + + return new diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 0234a5563326c5..412c00dc95ec00 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -126,7 +126,7 @@ def _groupby_and_merge(by, on, left, right, _merge_pieces, try: if k in merged: merged[k] = key - except: + except KeyError: pass pieces.append(merged) @@ -1268,8 +1268,10 @@ def _get_merge_keys(self): else: lt = left_join_keys[-1] - msg = "incompatible tolerance, must be compat " \ - "with type {lt}".format(lt=type(lt)) + msg = ("incompatible tolerance {tolerance}, must be compat " + "with type {lkdtype}".format( + tolerance=type(self.tolerance), + lkdtype=lt.dtype)) if is_datetime64_dtype(lt) or is_datetime64tz_dtype(lt): if not isinstance(self.tolerance, Timedelta): @@ -1505,12 +1507,12 @@ def _sort_labels(uniques, left, right): # tuplesafe uniques = Index(uniques).values - l = len(left) + llength = len(left) labels = np.concatenate([left, right]) _, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1) new_labels = _ensure_int64(new_labels) - new_left, new_right = new_labels[:l], new_labels[l:] + new_left, new_right = new_labels[:llength], new_labels[llength:] return new_left, new_right diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index b8885820f4a49d..96738afbca9e3c 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -4,7 +4,6 @@ from pandas import compat from functools import partial import itertools -import re import numpy as np @@ -14,7 +13,6 @@ needs_i8_conversion, is_sparse) from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.missing import notna -import pandas.core.dtypes.concat as _concat from pandas.core.series import Series from pandas.core.frame import DataFrame @@ -30,8 +28,6 @@ import pandas.core.algorithms as algos from pandas._libs import algos as _algos, reshape as _reshape -from pandas.core.frame import _shared_docs -from pandas.util._decorators import Appender from pandas.core.index import Index, MultiIndex, _get_na_value @@ -700,375 +696,6 @@ def _convert_level_number(level_num, columns): return result -@Appender(_shared_docs['melt'] % - dict(caller='pd.melt(df, ', - versionadded="", - other='DataFrame.melt')) -def melt(frame, id_vars=None, value_vars=None, var_name=None, - value_name='value', col_level=None): - # TODO: what about the existing index? - if id_vars is not None: - if not is_list_like(id_vars): - id_vars = [id_vars] - elif (isinstance(frame.columns, MultiIndex) and - not isinstance(id_vars, list)): - raise ValueError('id_vars must be a list of tuples when columns' - ' are a MultiIndex') - else: - id_vars = list(id_vars) - else: - id_vars = [] - - if value_vars is not None: - if not is_list_like(value_vars): - value_vars = [value_vars] - elif (isinstance(frame.columns, MultiIndex) and - not isinstance(value_vars, list)): - raise ValueError('value_vars must be a list of tuples when' - ' columns are a MultiIndex') - else: - value_vars = list(value_vars) - frame = frame.loc[:, id_vars + value_vars] - else: - frame = frame.copy() - - if col_level is not None: # allow list or other? - # frame is a copy - frame.columns = frame.columns.get_level_values(col_level) - - if var_name is None: - if isinstance(frame.columns, MultiIndex): - if len(frame.columns.names) == len(set(frame.columns.names)): - var_name = frame.columns.names - else: - var_name = ['variable_{i}'.format(i=i) - for i in range(len(frame.columns.names))] - else: - var_name = [frame.columns.name if frame.columns.name is not None - else 'variable'] - if isinstance(var_name, compat.string_types): - var_name = [var_name] - - N, K = frame.shape - K -= len(id_vars) - - mdata = {} - for col in id_vars: - mdata[col] = np.tile(frame.pop(col).values, K) - - mcolumns = id_vars + var_name + [value_name] - - mdata[value_name] = frame.values.ravel('F') - for i, col in enumerate(var_name): - # asanyarray will keep the columns as an Index - mdata[col] = np.asanyarray(frame.columns - ._get_level_values(i)).repeat(N) - - return DataFrame(mdata, columns=mcolumns) - - -def lreshape(data, groups, dropna=True, label=None): - """ - Reshape long-format data to wide. Generalized inverse of DataFrame.pivot - - Parameters - ---------- - data : DataFrame - groups : dict - {new_name : list_of_columns} - dropna : boolean, default True - - Examples - -------- - >>> import pandas as pd - >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526], - ... 'team': ['Red Sox', 'Yankees'], - ... 'year1': [2007, 2007], 'year2': [2008, 2008]}) - >>> data - hr1 hr2 team year1 year2 - 0 514 545 Red Sox 2007 2008 - 1 573 526 Yankees 2007 2008 - - >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']}) - team year hr - 0 Red Sox 2007 514 - 1 Yankees 2007 573 - 2 Red Sox 2008 545 - 3 Yankees 2008 526 - - Returns - ------- - reshaped : DataFrame - """ - if isinstance(groups, dict): - keys = list(groups.keys()) - values = list(groups.values()) - else: - keys, values = zip(*groups) - - all_cols = list(set.union(*[set(x) for x in values])) - id_cols = list(data.columns.difference(all_cols)) - - K = len(values[0]) - - for seq in values: - if len(seq) != K: - raise ValueError('All column lists must be same length') - - mdata = {} - pivot_cols = [] - - for target, names in zip(keys, values): - to_concat = [data[col].values for col in names] - mdata[target] = _concat._concat_compat(to_concat) - pivot_cols.append(target) - - for col in id_cols: - mdata[col] = np.tile(data[col].values, K) - - if dropna: - mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool) - for c in pivot_cols: - mask &= notna(mdata[c]) - if not mask.all(): - mdata = dict((k, v[mask]) for k, v in compat.iteritems(mdata)) - - return DataFrame(mdata, columns=id_cols + pivot_cols) - - -def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'): - r""" - Wide panel to long format. Less flexible but more user-friendly than melt. - - With stubnames ['A', 'B'], this function expects to find one or more - group of columns with format Asuffix1, Asuffix2,..., Bsuffix1, Bsuffix2,... - You specify what you want to call this suffix in the resulting long format - with `j` (for example `j='year'`) - - Each row of these wide variables are assumed to be uniquely identified by - `i` (can be a single column name or a list of column names) - - All remaining variables in the data frame are left intact. - - Parameters - ---------- - df : DataFrame - The wide-format DataFrame - stubnames : str or list-like - The stub name(s). The wide format variables are assumed to - start with the stub names. - i : str or list-like - Column(s) to use as id variable(s) - j : str - The name of the subobservation variable. What you wish to name your - suffix in the long format. - sep : str, default "" - A character indicating the separation of the variable names - in the wide format, to be stripped from the names in the long format. - For example, if your column names are A-suffix1, A-suffix2, you - can strip the hypen by specifying `sep='-'` - - .. versionadded:: 0.20.0 - - suffix : str, default '\\d+' - A regular expression capturing the wanted suffixes. '\\d+' captures - numeric suffixes. Suffixes with no numbers could be specified with the - negated character class '\\D+'. You can also further disambiguate - suffixes, for example, if your wide variables are of the form - Aone, Btwo,.., and you have an unrelated column Arating, you can - ignore the last one by specifying `suffix='(!?one|two)'` - - .. versionadded:: 0.20.0 - - Returns - ------- - DataFrame - A DataFrame that contains each stub name as a variable, with new index - (i, j) - - Examples - -------- - >>> import pandas as pd - >>> import numpy as np - >>> np.random.seed(123) - >>> df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"}, - ... "A1980" : {0 : "d", 1 : "e", 2 : "f"}, - ... "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7}, - ... "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1}, - ... "X" : dict(zip(range(3), np.random.randn(3))) - ... }) - >>> df["id"] = df.index - >>> df - A1970 A1980 B1970 B1980 X id - 0 a d 2.5 3.2 -1.085631 0 - 1 b e 1.2 1.3 0.997345 1 - 2 c f 0.7 0.1 0.282978 2 - >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year") - ... # doctest: +NORMALIZE_WHITESPACE - X A B - id year - 0 1970 -1.085631 a 2.5 - 1 1970 0.997345 b 1.2 - 2 1970 0.282978 c 0.7 - 0 1980 -1.085631 d 3.2 - 1 1980 0.997345 e 1.3 - 2 1980 0.282978 f 0.1 - - With multuple id columns - - >>> df = pd.DataFrame({ - ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], - ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], - ... 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], - ... 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] - ... }) - >>> df - birth famid ht1 ht2 - 0 1 1 2.8 3.4 - 1 2 1 2.9 3.8 - 2 3 1 2.2 2.9 - 3 1 2 2.0 3.2 - 4 2 2 1.8 2.8 - 5 3 2 1.9 2.4 - 6 1 3 2.2 3.3 - 7 2 3 2.3 3.4 - 8 3 3 2.1 2.9 - >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age') - >>> l - ... # doctest: +NORMALIZE_WHITESPACE - ht - famid birth age - 1 1 1 2.8 - 2 3.4 - 2 1 2.9 - 2 3.8 - 3 1 2.2 - 2 2.9 - 2 1 1 2.0 - 2 3.2 - 2 1 1.8 - 2 2.8 - 3 1 1.9 - 2 2.4 - 3 1 1 2.2 - 2 3.3 - 2 1 2.3 - 2 3.4 - 3 1 2.1 - 2 2.9 - - Going from long back to wide just takes some creative use of `unstack` - - >>> w = l.reset_index().set_index(['famid', 'birth', 'age']).unstack() - >>> w.columns = pd.Index(w.columns).str.join('') - >>> w.reset_index() - famid birth ht1 ht2 - 0 1 1 2.8 3.4 - 1 1 2 2.9 3.8 - 2 1 3 2.2 2.9 - 3 2 1 2.0 3.2 - 4 2 2 1.8 2.8 - 5 2 3 1.9 2.4 - 6 3 1 2.2 3.3 - 7 3 2 2.3 3.4 - 8 3 3 2.1 2.9 - - Less wieldy column names are also handled - - >>> np.random.seed(0) - >>> df = pd.DataFrame({'A(quarterly)-2010': np.random.rand(3), - ... 'A(quarterly)-2011': np.random.rand(3), - ... 'B(quarterly)-2010': np.random.rand(3), - ... 'B(quarterly)-2011': np.random.rand(3), - ... 'X' : np.random.randint(3, size=3)}) - >>> df['id'] = df.index - >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS - A(quarterly)-2010 A(quarterly)-2011 B(quarterly)-2010 ... - 0 0.548814 0.544883 0.437587 ... - 1 0.715189 0.423655 0.891773 ... - 2 0.602763 0.645894 0.963663 ... - X id - 0 0 0 - 1 1 1 - 2 1 2 - - >>> pd.wide_to_long(df, ['A(quarterly)', 'B(quarterly)'], i='id', - ... j='year', sep='-') - ... # doctest: +NORMALIZE_WHITESPACE - X A(quarterly) B(quarterly) - id year - 0 2010 0 0.548814 0.437587 - 1 2010 1 0.715189 0.891773 - 2 2010 1 0.602763 0.963663 - 0 2011 0 0.544883 0.383442 - 1 2011 1 0.423655 0.791725 - 2 2011 1 0.645894 0.528895 - - If we have many columns, we could also use a regex to find our - stubnames and pass that list on to wide_to_long - - >>> stubnames = sorted( - ... set([match[0] for match in df.columns.str.findall( - ... r'[A-B]\(.*\)').values if match != [] ]) - ... ) - >>> list(stubnames) - ['A(quarterly)', 'B(quarterly)'] - - Notes - ----- - All extra variables are left untouched. This simply uses - `pandas.melt` under the hood, but is hard-coded to "do the right thing" - in a typicaly case. - """ - def get_var_names(df, stub, sep, suffix): - regex = "^{stub}{sep}{suffix}".format( - stub=re.escape(stub), sep=re.escape(sep), suffix=suffix) - return df.filter(regex=regex).columns.tolist() - - def melt_stub(df, stub, i, j, value_vars, sep): - newdf = melt(df, id_vars=i, value_vars=value_vars, - value_name=stub.rstrip(sep), var_name=j) - newdf[j] = Categorical(newdf[j]) - newdf[j] = newdf[j].str.replace(re.escape(stub + sep), "") - - return newdf.set_index(i + [j]) - - if any(map(lambda s: s in df.columns.tolist(), stubnames)): - raise ValueError("stubname can't be identical to a column name") - - if not is_list_like(stubnames): - stubnames = [stubnames] - else: - stubnames = list(stubnames) - - if not is_list_like(i): - i = [i] - else: - i = list(i) - - if df[i].duplicated().any(): - raise ValueError("the id variables need to uniquely identify each row") - - value_vars = list(map(lambda stub: - get_var_names(df, stub, sep, suffix), stubnames)) - - value_vars_flattened = [e for sublist in value_vars for e in sublist] - id_vars = list(set(df.columns.tolist()).difference(value_vars_flattened)) - - melted = [] - for s, v in zip(stubnames, value_vars): - melted.append(melt_stub(df, s, i, j, v, sep)) - melted = melted[0].join(melted[1:], how='outer') - - if len(i) == 1: - new = df[id_vars].set_index(i).join(melted) - return new - - new = df[id_vars].merge(melted.reset_index(), on=i).set_index(i + [j]) - - return new - - def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, columns=None, sparse=False, drop_first=False): """ diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index fda339aa304612..2adf17a227a598 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -148,7 +148,7 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'): Parameters ---------- - x : ndarray or Series + x : 1d ndarray or Series q : integer or array of quantiles Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles diff --git a/pandas/core/series.py b/pandas/core/series.py index 1c92c4b8850ee4..dd86e51ee8154c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -597,7 +597,7 @@ def _ixs(self, i, axis=0): return values[i] except IndexError: raise - except: + except Exception: if isinstance(i, slice): indexer = self.index._convert_slice_indexer(i, kind='iloc') return self._get_values(indexer) @@ -675,7 +675,7 @@ def _get_with(self, key): if isinstance(key, tuple): try: return self._get_values_tuple(key) - except: + except Exception: if len(key) == 1: key = key[0] if isinstance(key, slice): @@ -818,7 +818,7 @@ def _set_with(self, key, value): if not isinstance(key, (list, Series, np.ndarray, Series)): try: key = list(key) - except: + except Exception: key = [key] if isinstance(key, Index): @@ -1306,7 +1306,13 @@ def idxmin(self, axis=None, skipna=True, *args, **kwargs): Parameters ---------- skipna : boolean, default True - Exclude NA/null values + Exclude NA/null values. If the entire Series is NA, the result + will be NA. + + Raises + ------ + ValueError + * If the Series is empty Returns ------- @@ -1336,7 +1342,13 @@ def idxmax(self, axis=None, skipna=True, *args, **kwargs): Parameters ---------- skipna : boolean, default True - Exclude NA/null values + Exclude NA/null values. If the entire Series is NA, the result + will be NA. + + Raises + ------ + ValueError + * If the Series is empty Returns ------- @@ -1361,13 +1373,13 @@ def idxmax(self, axis=None, skipna=True, *args, **kwargs): # ndarray compat argmin = deprecate('argmin', idxmin, - msg="'argmin' is deprecated. Use 'idxmin' instead. " + msg="'argmin' is deprecated, use 'idxmin' instead. " "The behavior of 'argmin' will be corrected to " "return the positional minimum in the future. " "Use 'series.values.argmin' to get the position of " "the minimum now.") argmax = deprecate('argmax', idxmax, - msg="'argmax' is deprecated. Use 'idxmax' instead. " + msg="'argmax' is deprecated, use 'idxmax' instead. " "The behavior of 'argmax' will be corrected to " "return the positional maximum in the future. " "Use 'series.values.argmax' to get the position of " @@ -1731,11 +1743,26 @@ def combine(self, other, func, fill_value=np.nan): ---------- other : Series or scalar value func : function + Function that takes two scalars as inputs and return a scalar fill_value : scalar value Returns ------- result : Series + + Examples + -------- + >>> s1 = Series([1, 2]) + >>> s2 = Series([0, 3]) + >>> s1.combine(s2, lambda x1, x2: x1 if x1 < x2 else x2) + 0 0 + 1 2 + dtype: int64 + + See Also + -------- + Series.combine_first : Combine Series values, choosing the calling + Series's values first """ if isinstance(other, Series): new_index = self.index.union(other.index) @@ -1764,7 +1791,21 @@ def combine_first(self, other): Returns ------- - y : Series + combined : Series + + Examples + -------- + >>> s1 = pd.Series([1, np.nan]) + >>> s2 = pd.Series([3, 4]) + >>> s1.combine_first(s2) + 0 1.0 + 1 4.0 + dtype: float64 + + See Also + -------- + Series.combine : Perform elementwise operation on two Series + using a given function """ new_index = self.index.union(other.index) this = self.reindex(new_index, copy=False) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index ae8aa275b2baea..19f7e459d0725f 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -36,9 +36,77 @@ def _guess_datetime_format_for_array(arr, **kwargs): return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs) +def _maybe_cache(arg, format, cache, tz, convert_listlike): + """ + Create a cache of unique dates from an array of dates + + Parameters + ---------- + arg : integer, float, string, datetime, list, tuple, 1-d array, Series + format : string + Strftime format to parse time + cache : boolean + True attempts to create a cache of converted values + tz : string + Timezone of the dates + convert_listlike : function + Conversion function to apply on dates + + Returns + ------- + cache_array : Series + Cache of converted, unique dates. Can be empty + """ + from pandas import Series + cache_array = Series() + if cache: + # Perform a quicker unique check + from pandas import Index + if not Index(arg).is_unique: + unique_dates = algorithms.unique(arg) + cache_dates = convert_listlike(unique_dates, True, format, tz=tz) + cache_array = Series(cache_dates, index=unique_dates) + return cache_array + + +def _convert_and_box_cache(arg, cache_array, box, errors, name=None): + """ + Convert array of dates with a cache and box the result + + Parameters + ---------- + arg : integer, float, string, datetime, list, tuple, 1-d array, Series + cache_array : Series + Cache of converted, unique dates + box : boolean + True boxes result as an Index-like, False returns an ndarray + errors : string + 'ignore' plus box=True will convert result to Index + name : string, default None + Name for a DatetimeIndex + + Returns + ------- + result : datetime of converted dates + Returns: + + - Index-like if box=True + - ndarray if box=False + """ + from pandas import Series, DatetimeIndex, Index + result = Series(arg).map(cache_array) + if box: + if errors == 'ignore': + return Index(result) + else: + return DatetimeIndex(result, name=name) + return result.values + + def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, utc=None, box=True, format=None, exact=True, - unit=None, infer_datetime_format=False, origin='unix'): + unit=None, infer_datetime_format=False, origin='unix', + cache=False): """ Convert argument to datetime. @@ -111,7 +179,12 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, origin. .. versionadded: 0.20.0 + cache : boolean, default False + If True, use a cache of unique, converted dates to apply the datetime + conversion. May produce sigificant speed-up when parsing duplicate date + strings, especially ones with timezone offsets. + .. versionadded: 0.22.0 Returns ------- ret : datetime if parsing succeeded. @@ -369,15 +442,28 @@ def _convert_listlike(arg, box, format, name=None, tz=tz): if isinstance(arg, tslib.Timestamp): result = arg elif isinstance(arg, ABCSeries): - from pandas import Series - values = _convert_listlike(arg._values, True, format) - result = Series(values, index=arg.index, name=arg.name) + cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike) + if not cache_array.empty: + result = arg.map(cache_array) + else: + from pandas import Series + values = _convert_listlike(arg._values, True, format) + result = Series(values, index=arg.index, name=arg.name) elif isinstance(arg, (ABCDataFrame, MutableMapping)): result = _assemble_from_unit_mappings(arg, errors=errors) elif isinstance(arg, ABCIndexClass): - result = _convert_listlike(arg, box, format, name=arg.name) + cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike) + if not cache_array.empty: + result = _convert_and_box_cache(arg, cache_array, box, errors, + name=arg.name) + else: + result = _convert_listlike(arg, box, format, name=arg.name) elif is_list_like(arg): - result = _convert_listlike(arg, box, format) + cache_array = _maybe_cache(arg, format, cache, tz, _convert_listlike) + if not cache_array.empty: + result = _convert_and_box_cache(arg, cache_array, box, errors) + else: + result = _convert_listlike(arg, box, format) else: result = _convert_listlike(np.array([arg]), box, format)[0] diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index be39f4baba0fb4..32bab09a0c4acf 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -764,7 +764,7 @@ def _parse_numpy(self): if orient == "columns": args = loads(json, dtype=None, numpy=True, labelled=True, precise_float=self.precise_float) - if args: + if len(args): args = (args[0].T, args[2], args[1]) self.obj = DataFrame(*args) elif orient == "split": diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index ef95e32cc241e8..4a13d2c9db9445 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -76,9 +76,10 @@ def write(self, df, path, compression='snappy', table, path, compression=compression, coerce_timestamps=coerce_timestamps, **kwargs) - def read(self, path, columns=None): + def read(self, path, columns=None, **kwargs): path, _, _ = get_filepath_or_buffer(path) - return self.api.parquet.read_table(path, columns=columns).to_pandas() + return self.api.parquet.read_table(path, columns=columns, + **kwargs).to_pandas() class FastParquetImpl(object): @@ -115,9 +116,9 @@ def write(self, df, path, compression='snappy', **kwargs): self.api.write(path, df, compression=compression, **kwargs) - def read(self, path, columns=None): + def read(self, path, columns=None, **kwargs): path, _, _ = get_filepath_or_buffer(path) - return self.api.ParquetFile(path).to_pandas(columns=columns) + return self.api.ParquetFile(path).to_pandas(columns=columns, **kwargs) def to_parquet(df, path, engine='auto', compression='snappy', **kwargs): @@ -175,7 +176,7 @@ def to_parquet(df, path, engine='auto', compression='snappy', **kwargs): if df.columns.inferred_type not in valid_types: raise ValueError("parquet must have string column names") - return impl.write(df, path, compression=compression) + return impl.write(df, path, compression=compression, **kwargs) def read_parquet(path, engine='auto', columns=None, **kwargs): @@ -205,4 +206,4 @@ def read_parquet(path, engine='auto', columns=None, **kwargs): """ impl = get_engine(engine) - return impl.read(path, columns=columns) + return impl.read(path, columns=columns, **kwargs) diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 2b3a91e2062b15..26e39f0df8b294 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -17,6 +17,7 @@ import pandas as pd from pandas import compat from pandas.io.common import get_filepath_or_buffer, BaseIterator +from pandas.errors import EmptyDataError import numpy as np import struct import pandas.io.sas.sas_constants as const @@ -594,6 +595,10 @@ def read(self, nrows=None): elif nrows is None: nrows = self.row_count + if len(self.column_types) == 0: + self.close() + raise EmptyDataError("No columns to parse from file") + if self._current_row_in_file_index >= self.row_count: return None diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py index 47d15195315ba2..aadd5a1beb28b6 100644 --- a/pandas/plotting/_converter.py +++ b/pandas/plotting/_converter.py @@ -28,6 +28,7 @@ from pandas.core.indexes.datetimes import date_range import pandas.core.tools.datetimes as tools +from pandas._libs.tslibs import resolution import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import FreqGroup from pandas.core.indexes.period import Period, PeriodIndex @@ -64,7 +65,7 @@ def time2num(d): if isinstance(d, compat.string_types): parsed = tools.to_datetime(d) if not isinstance(parsed, datetime): - raise ValueError('Could not parse time %s' % d) + raise ValueError('Could not parse time {d}'.format(d=d)) return _to_ordinalf(parsed.time()) if isinstance(d, pydt.time): return _to_ordinalf(d) @@ -166,7 +167,7 @@ def get_datevalue(date, freq): return date elif date is None: return None - raise ValueError("Unrecognizable date '%s'" % date) + raise ValueError("Unrecognizable date '{date}'".format(date=date)) def _dt_to_float_ordinal(dt): @@ -351,10 +352,12 @@ def __call__(self): estimate = (nmax - nmin) / (self._get_unit() * self._get_interval()) if estimate > self.MAXTICKS * 2: - raise RuntimeError(('MillisecondLocator estimated to generate %d ' - 'ticks from %s to %s: exceeds Locator.MAXTICKS' - '* 2 (%d) ') % - (estimate, dmin, dmax, self.MAXTICKS * 2)) + raise RuntimeError(('MillisecondLocator estimated to generate ' + '{estimate:d} ticks from {dmin} to {dmax}: ' + 'exceeds Locator.MAXTICKS' + '* 2 ({arg:d}) ').format( + estimate=estimate, dmin=dmin, dmax=dmax, + arg=self.MAXTICKS * 2)) freq = '%dL' % self._get_interval() tz = self.tz.tzname(None) @@ -505,7 +508,7 @@ def _daily_finder(vmin, vmax, freq): elif freq == FreqGroup.FR_HR: periodsperday = 24 else: # pragma: no cover - raise ValueError("unexpected frequency: %s" % freq) + raise ValueError("unexpected frequency: {freq}".format(freq=freq)) periodsperyear = 365 * periodsperday periodspermonth = 28 * periodsperday @@ -515,7 +518,7 @@ def _daily_finder(vmin, vmax, freq): elif freq == FreqGroup.FR_DAY: periodsperyear = 365 periodspermonth = 28 - elif frequencies.get_freq_group(freq) == FreqGroup.FR_WK: + elif resolution.get_freq_group(freq) == FreqGroup.FR_WK: periodsperyear = 52 periodspermonth = 3 else: # pragma: no cover @@ -853,7 +856,7 @@ def _annual_finder(vmin, vmax, freq): def get_finder(freq): if isinstance(freq, compat.string_types): freq = frequencies.get_freq(freq) - fgroup = frequencies.get_freq_group(freq) + fgroup = resolution.get_freq_group(freq) if fgroup == FreqGroup.FR_ANN: return _annual_finder @@ -864,7 +867,7 @@ def get_finder(freq): elif ((freq >= FreqGroup.FR_BUS) or fgroup == FreqGroup.FR_WK): return _daily_finder else: # pragma: no cover - errmsg = "Unsupported frequency: %s" % (freq) + errmsg = "Unsupported frequency: {freq}".format(freq=freq) raise NotImplementedError(errmsg) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 58f9b7ee6fc02d..62b2899f494134 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -749,7 +749,7 @@ def match_labels(data, e): err = np.tile([err], (self.nseries, len(self.data))) else: - msg = "No valid %s detected" % label + msg = "No valid {label} detected".format(label=label) raise ValueError(msg) return err @@ -1414,7 +1414,7 @@ def _plot(cls, ax, y, style=None, bw_method=None, ind=None, gkde = gaussian_kde(y) if bw_method is not None: msg = ('bw_method was added in Scipy 0.11.0.' + - ' Scipy version in use is %s.' % spv) + ' Scipy version in use is {spv}.'.format(spv=spv)) warnings.warn(msg) y = gkde.evaluate(ind) @@ -2452,7 +2452,7 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None, result = axes byline = by[0] if len(by) == 1 else by - fig.suptitle('Boxplot grouped by %s' % byline) + fig.suptitle('Boxplot grouped by {byline}'.format(byline=byline)) fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) return result diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 54f87febdc2141..d6048f54993e6d 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -525,7 +525,7 @@ def lag_plot(series, lag=1, ax=None, **kwds): if ax is None: ax = plt.gca() ax.set_xlabel("y(t)") - ax.set_ylabel("y(t + %s)" % lag) + ax.set_ylabel("y(t + {lag})".format(lag=lag)) ax.scatter(y1, y2, **kwds) return ax diff --git a/pandas/plotting/_style.py b/pandas/plotting/_style.py index 4c31ff0177488a..145597e52ae14e 100644 --- a/pandas/plotting/_style.py +++ b/pandas/plotting/_style.py @@ -131,7 +131,8 @@ def __getitem__(self, key): self._warn_if_deprecated() key = self._get_canonical_key(key) if key not in self: - raise ValueError('%s is not a valid pandas plotting option' % key) + raise ValueError( + '{key} is not a valid pandas plotting option'.format(key=key)) return super(_Options, self).__getitem__(key) def __setitem__(self, key, value): @@ -142,7 +143,8 @@ def __setitem__(self, key, value): def __delitem__(self, key): key = self._get_canonical_key(key) if key in self._DEFAULT_KEYS: - raise ValueError('Cannot remove default parameter %s' % key) + raise ValueError( + 'Cannot remove default parameter {key}'.format(key=key)) return super(_Options, self).__delitem__(key) def __contains__(self, key): diff --git a/pandas/plotting/_tools.py b/pandas/plotting/_tools.py index 047a57ead72f87..816586fbb82f52 100644 --- a/pandas/plotting/_tools.py +++ b/pandas/plotting/_tools.py @@ -84,8 +84,9 @@ def _get_layout(nplots, layout=None, layout_type='box'): raise ValueError(msg) if nrows * ncols < nplots: - raise ValueError('Layout of %sx%s must be larger than ' - 'required size %s' % (nrows, ncols, nplots)) + raise ValueError('Layout of {nrows}x{ncols} must be larger ' + 'than required size {nplots}'.format( + nrows=nrows, ncols=ncols, nplots=nplots)) return layout diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index f6c3a08c6721ac..4e9e1b51e6fdab 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -458,7 +458,7 @@ def _rolling_func(name, desc, how=None, func_kw=None, additional_kw=''): if how is None: how_arg_str = 'None' else: - how_arg_str = "'%s" % how + how_arg_str = "'{how}".format(how=how) @Substitution(desc, _unary_arg, _roll_kw % how_arg_str + additional_kw, _type_of_input_retval, _roll_notes) diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index d2874b1606e729..c2d1eb8ae13725 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -718,6 +718,18 @@ def test_float_truncation(self): expected = df.loc[[1], :] tm.assert_frame_equal(expected, result) + def test_disallow_python_keywords(self): + # GH 18221 + df = pd.DataFrame([[0, 0, 0]], columns=['foo', 'bar', 'class']) + msg = "Python keyword not valid identifier in numexpr query" + with tm.assert_raises_regex(SyntaxError, msg): + df.query('class == 0') + + df = pd.DataFrame() + df.index.name = 'lambda' + with tm.assert_raises_regex(SyntaxError, msg): + df.query('lambda == 0') + class TestEvalNumexprPython(TestEvalNumexprPandas): diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 7195cb43a70dc0..34ed9d3142923e 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -39,7 +39,7 @@ from pandas.util import testing as tm -@pytest.fixture(params=[True, False], ids=lambda val: str(val)) +@pytest.fixture(params=[True, False], ids=str) def coerce(request): return request.param @@ -60,16 +60,20 @@ def __getitem__(self): assert (not is_seq(A())) -def test_is_list_like(): - passes = ([], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]), - Series([]), Series(['a']).str) - fails = (1, '2', object(), str) +@pytest.mark.parametrize( + "ll", + [ + [], [1], (1, ), (1, 2), {'a': 1}, + set([1, 'a']), Series([1]), + Series([]), Series(['a']).str]) +def test_is_list_like_passes(ll): + assert inference.is_list_like(ll) - for p in passes: - assert inference.is_list_like(p) - for f in fails: - assert not inference.is_list_like(f) +@pytest.mark.parametrize( + "ll", [1, '2', object(), str]) +def test_is_list_like_fails(ll): + assert not inference.is_list_like(ll) @pytest.mark.parametrize('inner', [ @@ -93,15 +97,16 @@ def test_is_nested_list_like_fails(obj): assert not inference.is_nested_list_like(obj) -def test_is_dict_like(): - passes = [{}, {'A': 1}, Series([1])] - fails = ['1', 1, [1, 2], (1, 2), range(2), Index([1])] +@pytest.mark.parametrize( + "ll", [{}, {'A': 1}, Series([1])]) +def test_is_dict_like_passes(ll): + assert inference.is_dict_like(ll) - for p in passes: - assert inference.is_dict_like(p) - for f in fails: - assert not inference.is_dict_like(f) +@pytest.mark.parametrize( + "ll", ['1', 1, [1, 2], (1, 2), range(2), Index([1])]) +def test_is_dict_like_fails(ll): + assert not inference.is_dict_like(ll) def test_is_file_like(): @@ -148,15 +153,16 @@ class MockFile(object): assert not is_file(mock.Mock()) -def test_is_named_tuple(): - passes = (collections.namedtuple('Test', list('abc'))(1, 2, 3), ) - fails = ((1, 2, 3), 'a', Series({'pi': 3.14})) +@pytest.mark.parametrize( + "ll", [collections.namedtuple('Test', list('abc'))(1, 2, 3)]) +def test_is_names_tuple_passes(ll): + assert inference.is_named_tuple(ll) - for p in passes: - assert inference.is_named_tuple(p) - for f in fails: - assert not inference.is_named_tuple(f) +@pytest.mark.parametrize( + "ll", [(1, 2, 3), 'a', Series({'pi': 3.14})]) +def test_is_names_tuple_fails(ll): + assert not inference.is_named_tuple(ll) def test_is_hashable(): @@ -208,27 +214,32 @@ class OldStyleClass(): hash(c) # this will not raise -def test_is_re(): - passes = re.compile('ad'), - fails = 'x', 2, 3, object() +@pytest.mark.parametrize( + "ll", [re.compile('ad')]) +def test_is_re_passes(ll): + assert inference.is_re(ll) - for p in passes: - assert inference.is_re(p) - for f in fails: - assert not inference.is_re(f) +@pytest.mark.parametrize( + "ll", ['x', 2, 3, object()]) +def test_is_re_fails(ll): + assert not inference.is_re(ll) -def test_is_recompilable(): - passes = (r'a', u('x'), r'asdf', re.compile('adsf'), u(r'\u2233\s*'), - re.compile(r'')) - fails = 1, [], object() +@pytest.mark.parametrize( + "ll", [r'a', u('x'), + r'asdf', + re.compile('adsf'), + u(r'\u2233\s*'), + re.compile(r'')]) +def test_is_recompilable_passes(ll): + assert inference.is_re_compilable(ll) - for p in passes: - assert inference.is_re_compilable(p) - for f in fails: - assert not inference.is_re_compilable(f) +@pytest.mark.parametrize( + "ll", [1, [], object()]) +def test_is_recompilable_fails(ll): + assert not inference.is_re_compilable(ll) class TestInference(object): @@ -300,15 +311,14 @@ def test_maybe_convert_numeric_infinities(self): np.array(['foo_' + infinity], dtype=object), na_values, maybe_int) - def test_maybe_convert_numeric_post_floatify_nan(self): + def test_maybe_convert_numeric_post_floatify_nan(self, coerce): # see gh-13314 data = np.array(['1.200', '-999.000', '4.500'], dtype=object) expected = np.array([1.2, np.nan, 4.5], dtype=np.float64) nan_values = set([-999, -999.0]) - for coerce_type in (True, False): - out = lib.maybe_convert_numeric(data, nan_values, coerce_type) - tm.assert_numpy_array_equal(out, expected) + out = lib.maybe_convert_numeric(data, nan_values, coerce) + tm.assert_numpy_array_equal(out, expected) def test_convert_infs(self): arr = np.array(['inf', 'inf', 'inf'], dtype='O') @@ -739,6 +749,36 @@ def test_is_datetimelike_array_all_nan_nat_like(self): assert not lib.is_timedelta64_array(arr) assert not lib.is_timedelta_or_timedelta64_array(arr) + assert lib.is_datetime_with_singletz_array( + np.array([pd.Timestamp('20130101', tz='US/Eastern'), + pd.Timestamp('20130102', tz='US/Eastern')], + dtype=object)) + assert not lib.is_datetime_with_singletz_array( + np.array([pd.Timestamp('20130101', tz='US/Eastern'), + pd.Timestamp('20130102', tz='CET')], + dtype=object)) + + @pytest.mark.parametrize( + "func", + [ + 'is_datetime_array', + 'is_datetime64_array', + 'is_bool_array', + 'is_timedelta_array', + 'is_timedelta64_array', + 'is_timedelta_or_timedelta64_array', + 'is_date_array', + 'is_time_array', + 'is_interval_array', + 'is_period_array']) + def test_other_dtypes_for_array(self, func): + func = getattr(lib, func) + arr = np.array(['foo', 'bar']) + assert not func(arr) + + arr = np.array([1, 2]) + assert not func(arr) + def test_date(self): dates = [date(2012, 1, day) for day in range(1, 20)] @@ -752,6 +792,24 @@ def test_date(self): result = lib.infer_dtype(dates, skipna=True) assert result == 'date' + def test_is_numeric_array(self): + + assert lib.is_float_array(np.array([1, 2.0])) + assert lib.is_float_array(np.array([1, 2.0, np.nan])) + assert not lib.is_float_array(np.array([1, 2])) + + assert lib.is_integer_array(np.array([1, 2])) + assert not lib.is_integer_array(np.array([1, 2.0])) + + def test_is_string_array(self): + + assert lib.is_string_array(np.array(['foo', 'bar'])) + assert not lib.is_string_array( + np.array(['foo', 'bar', np.nan], dtype=object), skipna=False) + assert lib.is_string_array( + np.array(['foo', 'bar', np.nan], dtype=object), skipna=True) + assert not lib.is_string_array(np.array([1, 2])) + def test_to_object_array_tuples(self): r = (5, 6) values = [r] diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index be6d81c63ae1ef..c50aa858a15b53 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -306,6 +306,11 @@ def test_axis_aliases(self): result = f.sum(axis='columns') assert_series_equal(result, expected) + def test_class_axis(self): + # https://github.com/pandas-dev/pandas/issues/18147 + DataFrame.index # no exception! + DataFrame.columns # no exception! + def test_more_asMatrix(self): values = self.mixed_frame.as_matrix() assert values.shape[1] == len(self.mixed_frame.columns) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 2f750a76219052..03f780957b15ea 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -12,7 +12,7 @@ from pandas.errors import UnsupportedFunctionCall, PerformanceWarning from pandas.util.testing import (assert_frame_equal, assert_index_equal, assert_series_equal, assert_almost_equal) -from pandas.compat import (range, long, lrange, StringIO, lmap, lzip, map, zip, +from pandas.compat import (range, lrange, StringIO, lmap, lzip, map, zip, builtins, OrderedDict) from pandas import compat from collections import defaultdict @@ -264,7 +264,7 @@ def test_len(self): df = pd.DataFrame(dict(a=[np.nan] * 3, b=[1, 2, 3])) assert len(df.groupby(('a'))) == 0 assert len(df.groupby(('b'))) == 3 - assert len(df.groupby(('a', 'b'))) == 3 + assert len(df.groupby(['a', 'b'])) == 3 def test_basic_regression(self): # regression @@ -2051,30 +2051,6 @@ def afunc(data): assert_frame_equal(closure_bad, closure_good) - def test_multiindex_columns_empty_level(self): - l = [['count', 'values'], ['to filter', '']] - midx = MultiIndex.from_tuples(l) - - df = DataFrame([[long(1), 'A']], columns=midx) - - grouped = df.groupby('to filter').groups - assert grouped['A'] == [0] - - grouped = df.groupby([('to filter', '')]).groups - assert grouped['A'] == [0] - - df = DataFrame([[long(1), 'A'], [long(2), 'B']], columns=midx) - - expected = df.groupby('to filter').groups - result = df.groupby([('to filter', '')]).groups - assert result == expected - - df = DataFrame([[long(1), 'A'], [long(2), 'A']], columns=midx) - - expected = df.groupby('to filter').groups - result = df.groupby([('to filter', '')]).groups - tm.assert_dict_equal(result, expected) - def test_cython_median(self): df = DataFrame(np.random.randn(1000)) df.values[::2] = np.nan diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 9e6de8749952f8..cc422f2d1cdeb7 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -9,7 +9,7 @@ Index, MultiIndex, DataFrame, Series) from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal) -from pandas.compat import lrange +from pandas.compat import lrange, long from pandas import compat import numpy as np @@ -356,6 +356,30 @@ def test_multifunc_select_col_integer_cols(self): # it works! df.groupby(1, as_index=False)[2].agg({'Q': np.mean}) + def test_multiindex_columns_empty_level(self): + lst = [['count', 'values'], ['to filter', '']] + midx = MultiIndex.from_tuples(lst) + + df = DataFrame([[long(1), 'A']], columns=midx) + + grouped = df.groupby('to filter').groups + assert grouped['A'] == [0] + + grouped = df.groupby([('to filter', '')]).groups + assert grouped['A'] == [0] + + df = DataFrame([[long(1), 'A'], [long(2), 'B']], columns=midx) + + expected = df.groupby('to filter').groups + result = df.groupby([('to filter', '')]).groups + assert result == expected + + df = DataFrame([[long(1), 'A'], [long(2), 'A']], columns=midx) + + expected = df.groupby('to filter').groups + result = df.groupby([('to filter', '')]).groups + tm.assert_dict_equal(result, expected) + def test_groupby_multiindex_tuple(self): # GH 17979 df = pd.DataFrame([[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]], @@ -366,13 +390,18 @@ def test_groupby_multiindex_tuple(self): result = df.groupby(('b', 1)).groups tm.assert_dict_equal(expected, result) - df2 = pd.DataFrame([[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]], + df2 = pd.DataFrame(df.values, columns=pd.MultiIndex.from_arrays( [['a', 'b', 'b', 'c'], ['d', 'd', 'e', 'e']])) - df2.groupby([('b', 'd')]).groups - expected = df.groupby([('b', 'd')]).groups - result = df.groupby(('b', 'd')).groups + expected = df2.groupby([('b', 'd')]).groups + result = df.groupby(('b', 1)).groups + tm.assert_dict_equal(expected, result) + + df3 = pd.DataFrame(df.values, + columns=[('a', 'd'), ('b', 'd'), ('b', 'e'), 'c']) + expected = df3.groupby([('b', 'd')]).groups + result = df.groupby(('b', 1)).groups tm.assert_dict_equal(expected, result) @pytest.mark.parametrize('sort', [True, False]) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 501fe63137cf47..2a408b85f0ed16 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -202,7 +202,7 @@ def test_nth(self): freq='B') df = DataFrame(1, index=business_dates, columns=['a', 'b']) # get the first, fourth and last two business days for each month - key = (df.index.year, df.index.month) + key = [df.index.year, df.index.month] result = df.groupby(key, as_index=False).nth([0, 3, -2, -1]) expected_dates = pd.to_datetime( ['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1', diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 3d7977c63eeb64..1434656115d187 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -43,7 +43,7 @@ def seed_df(seed_nans, n, m): df = seed_df(seed_nans, n, m) bins = None, np.arange(0, max(5, df['3rd'].max()) + 1, 2) - keys = '1st', '2nd', ('1st', '2nd') + keys = '1st', '2nd', ['1st', '2nd'] for k, b in product(keys, bins): binned.append((df, k, b, n, m)) ids.append("{}-{}-{}".format(k, n, m)) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 50ee88bd82f409..a9c26ebb903593 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -2,9 +2,10 @@ import pytest -from datetime import datetime +from datetime import datetime, date import numpy as np import pandas as pd +import operator as op from pandas import (DatetimeIndex, Series, DataFrame, date_range, Index, Timedelta, Timestamp) @@ -330,3 +331,21 @@ def test_loc_datetime_length_one(self): result = df.loc['2016-10-01T00:00:00':] tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize('datetimelike', [ + Timestamp('20130101'), datetime(2013, 1, 1), + date(2013, 1, 1), np.datetime64('2013-01-01T00:00', 'ns')]) + @pytest.mark.parametrize('op,expected', [ + (op.lt, [True, False, False, False]), + (op.le, [True, True, False, False]), + (op.eq, [False, True, False, False]), + (op.gt, [False, False, False, True])]) + def test_selection_by_datetimelike(self, datetimelike, op, expected): + # GH issue #17965, test for ability to compare datetime64[ns] columns + # to datetimelike + df = DataFrame({'A': [pd.Timestamp('20120101'), + pd.Timestamp('20130101'), + np.nan, pd.Timestamp('20130103')]}) + result = op(df.A, datetimelike) + expected = Series(expected, name='A') + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 8205b4fde217b0..a1287c3102b779 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -12,6 +12,7 @@ from distutils.version import LooseVersion import pandas as pd +from pandas.conftest import is_dateutil_le_261, is_dateutil_gt_261 from pandas._libs import tslib from pandas._libs.tslibs import parsing from pandas.core.tools import datetimes as tools @@ -28,7 +29,8 @@ class TestTimeConversionFormats(object): - def test_to_datetime_format(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format(self, cache): values = ['1/1/2000', '1/2/2000', '1/3/2000'] results1 = [Timestamp('20000101'), Timestamp('20000201'), @@ -43,7 +45,7 @@ def test_to_datetime_format(self): (values[2], (results1[2], results2[2]))]: for i, fmt in enumerate(['%d/%m/%Y', '%m/%d/%Y']): - result = to_datetime(vals, format=fmt) + result = to_datetime(vals, format=fmt, cache=cache) expected = expecteds[i] if isinstance(expected, Series): @@ -53,14 +55,15 @@ def test_to_datetime_format(self): else: tm.assert_index_equal(result, expected) - def test_to_datetime_format_YYYYMMDD(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format_YYYYMMDD(self, cache): s = Series([19801222, 19801222] + [19810105] * 5) expected = Series([Timestamp(x) for x in s.apply(str)]) - result = to_datetime(s, format='%Y%m%d') + result = to_datetime(s, format='%Y%m%d', cache=cache) assert_series_equal(result, expected) - result = to_datetime(s.apply(str), format='%Y%m%d') + result = to_datetime(s.apply(str), format='%Y%m%d', cache=cache) assert_series_equal(result, expected) # with NaT @@ -69,44 +72,48 @@ def test_to_datetime_format_YYYYMMDD(self): expected[2] = np.nan s[2] = np.nan - result = to_datetime(s, format='%Y%m%d') + result = to_datetime(s, format='%Y%m%d', cache=cache) assert_series_equal(result, expected) # string with NaT s = s.apply(str) s[2] = 'nat' - result = to_datetime(s, format='%Y%m%d') + result = to_datetime(s, format='%Y%m%d', cache=cache) assert_series_equal(result, expected) # coercion # GH 7930 s = Series([20121231, 20141231, 99991231]) - result = pd.to_datetime(s, format='%Y%m%d', errors='ignore') + result = pd.to_datetime(s, format='%Y%m%d', errors='ignore', + cache=cache) expected = Series([datetime(2012, 12, 31), datetime(2014, 12, 31), datetime(9999, 12, 31)], dtype=object) tm.assert_series_equal(result, expected) - result = pd.to_datetime(s, format='%Y%m%d', errors='coerce') + result = pd.to_datetime(s, format='%Y%m%d', errors='coerce', + cache=cache) expected = Series(['20121231', '20141231', 'NaT'], dtype='M8[ns]') assert_series_equal(result, expected) - # GH 10178 - def test_to_datetime_format_integer(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format_integer(self, cache): + # GH 10178 s = Series([2000, 2001, 2002]) expected = Series([Timestamp(x) for x in s.apply(str)]) - result = to_datetime(s, format='%Y') + result = to_datetime(s, format='%Y', cache=cache) assert_series_equal(result, expected) s = Series([200001, 200105, 200206]) expected = Series([Timestamp(x[:4] + '-' + x[4:]) for x in s.apply(str) ]) - result = to_datetime(s, format='%Y%m') + result = to_datetime(s, format='%Y%m', cache=cache) assert_series_equal(result, expected) - def test_to_datetime_format_microsecond(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format_microsecond(self, cache): # these are locale dependent lang, _ = locale.getlocale() @@ -114,11 +121,12 @@ def test_to_datetime_format_microsecond(self): val = '01-{}-2011 00:00:01.978'.format(month_abbr) format = '%d-%b-%Y %H:%M:%S.%f' - result = to_datetime(val, format=format) + result = to_datetime(val, format=format, cache=cache) exp = datetime.strptime(val, format) assert result == exp - def test_to_datetime_format_time(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format_time(self, cache): data = [ ['01/10/2010 15:20', '%m/%d/%Y %H:%M', Timestamp('2010-01-10 15:20')], @@ -134,9 +142,10 @@ def test_to_datetime_format_time(self): # Timestamp('2010-01-10 09:12:56')] ] for s, format, dt in data: - assert to_datetime(s, format=format) == dt + assert to_datetime(s, format=format, cache=cache) == dt - def test_to_datetime_with_non_exact(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_with_non_exact(self, cache): # GH 10834 tm._skip_if_has_locale() @@ -147,12 +156,13 @@ def test_to_datetime_with_non_exact(self): s = Series(['19MAY11', 'foobar19MAY11', '19MAY11:00:00:00', '19MAY11 00:00:00Z']) - result = to_datetime(s, format='%d%b%y', exact=False) + result = to_datetime(s, format='%d%b%y', exact=False, cache=cache) expected = to_datetime(s.str.extract(r'(\d+\w+\d+)', expand=False), - format='%d%b%y') + format='%d%b%y', cache=cache) assert_series_equal(result, expected) - def test_parse_nanoseconds_with_formula(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_parse_nanoseconds_with_formula(self, cache): # GH8989 # trunctaing the nanoseconds when a format was provided @@ -161,44 +171,48 @@ def test_parse_nanoseconds_with_formula(self): "2012-01-01 09:00:00.001", "2012-01-01 09:00:00.001000", "2012-01-01 09:00:00.001000000", ]: - expected = pd.to_datetime(v) - result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f") + expected = pd.to_datetime(v, cache=cache) + result = pd.to_datetime(v, format="%Y-%m-%d %H:%M:%S.%f", + cache=cache) assert result == expected - def test_to_datetime_format_weeks(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_format_weeks(self, cache): data = [ ['2009324', '%Y%W%w', Timestamp('2009-08-13')], ['2013020', '%Y%U%w', Timestamp('2013-01-13')] ] for s, format, dt in data: - assert to_datetime(s, format=format) == dt + assert to_datetime(s, format=format, cache=cache) == dt class TestToDatetime(object): - def test_to_datetime_dt64s(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_dt64s(self, cache): in_bound_dts = [ np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] for dt in in_bound_dts: - assert pd.to_datetime(dt) == Timestamp(dt) + assert pd.to_datetime(dt, cache=cache) == Timestamp(dt) oob_dts = [np.datetime64('1000-01-01'), np.datetime64('5000-01-02'), ] for dt in oob_dts: pytest.raises(ValueError, pd.to_datetime, dt, errors='raise') pytest.raises(ValueError, Timestamp, dt) - assert pd.to_datetime(dt, errors='coerce') is NaT + assert pd.to_datetime(dt, errors='coerce', cache=cache) is NaT - def test_to_datetime_array_of_dt64s(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_array_of_dt64s(self, cache): dts = [np.datetime64('2000-01-01'), np.datetime64('2000-01-02'), ] # Assuming all datetimes are in bounds, to_datetime() returns # an array that is equal to Timestamp() parsing tm.assert_numpy_array_equal( - pd.to_datetime(dts, box=False), + pd.to_datetime(dts, box=False, cache=cache), np.array([Timestamp(x).asm8 for x in dts]) ) @@ -209,7 +223,8 @@ def test_to_datetime_array_of_dt64s(self): errors='raise') tm.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='coerce'), + pd.to_datetime(dts_with_oob, box=False, errors='coerce', + cache=cache), np.array( [ Timestamp(dts_with_oob[0]).asm8, @@ -224,20 +239,22 @@ def test_to_datetime_array_of_dt64s(self): # are converted to their .item(), which depending on the version of # numpy is either a python datetime.datetime or datetime.date tm.assert_numpy_array_equal( - pd.to_datetime(dts_with_oob, box=False, errors='ignore'), + pd.to_datetime(dts_with_oob, box=False, errors='ignore', + cache=cache), np.array( [dt.item() for dt in dts_with_oob], dtype='O' ) ) - def test_to_datetime_tz(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_tz(self, cache): # xref 8260 # uniform returns a DatetimeIndex arr = [pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')] - result = pd.to_datetime(arr) + result = pd.to_datetime(arr, cache=cache) expected = DatetimeIndex( ['2013-01-01 13:00:00', '2013-01-02 14:00:00'], tz='US/Pacific') tm.assert_index_equal(result, expected) @@ -245,9 +262,10 @@ def test_to_datetime_tz(self): # mixed tzs will raise arr = [pd.Timestamp('2013-01-01 13:00:00', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00', tz='US/Eastern')] - pytest.raises(ValueError, lambda: pd.to_datetime(arr)) + pytest.raises(ValueError, lambda: pd.to_datetime(arr, cache=cache)) - def test_to_datetime_tz_pytz(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_tz_pytz(self, cache): # see gh-8260 us_eastern = pytz.timezone('US/Eastern') arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1, @@ -255,18 +273,20 @@ def test_to_datetime_tz_pytz(self): us_eastern.localize(datetime(year=2000, month=6, day=1, hour=3, minute=0))], dtype=object) - result = pd.to_datetime(arr, utc=True) + result = pd.to_datetime(arr, utc=True, cache=cache) expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', '2000-06-01 07:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None) tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('cache', [True, False]) @pytest.mark.parametrize("init_constructor, end_constructor, test_method", [(Index, DatetimeIndex, tm.assert_index_equal), (list, DatetimeIndex, tm.assert_index_equal), (np.array, DatetimeIndex, tm.assert_index_equal), (Series, Series, tm.assert_series_equal)]) def test_to_datetime_utc_true(self, + cache, init_constructor, end_constructor, test_method): @@ -277,39 +297,47 @@ def test_to_datetime_utc_true(self, result = pd.to_datetime(init_constructor(data), format='%Y%m%d %H%M%S', - utc=True) + utc=True, + cache=cache) expected = end_constructor(expected_data) test_method(result, expected) # Test scalar case as well for scalar, expected in zip(data, expected_data): - result = pd.to_datetime(scalar, format='%Y%m%d %H%M%S', utc=True) + result = pd.to_datetime(scalar, format='%Y%m%d %H%M%S', utc=True, + cache=cache) assert result == expected - def test_to_datetime_utc_true_with_series_single_value(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_utc_true_with_series_single_value(self, cache): # GH 15760 UTC=True with Series ts = 1.5e18 - result = pd.to_datetime(pd.Series([ts]), utc=True) + result = pd.to_datetime(pd.Series([ts]), utc=True, cache=cache) expected = pd.Series([pd.Timestamp(ts, tz='utc')]) tm.assert_series_equal(result, expected) - def test_to_datetime_utc_true_with_series_tzaware_string(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_utc_true_with_series_tzaware_string(self, cache): ts = '2013-01-01 00:00:00-01:00' expected_ts = '2013-01-01 01:00:00' data = pd.Series([ts] * 3) - result = pd.to_datetime(data, utc=True) + result = pd.to_datetime(data, utc=True, cache=cache) expected = pd.Series([pd.Timestamp(expected_ts, tz='utc')] * 3) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('cache', [True, False]) @pytest.mark.parametrize('date, dtype', [('2013-01-01 01:00:00', 'datetime64[ns]'), ('2013-01-01 01:00:00', 'datetime64[ns, UTC]')]) - def test_to_datetime_utc_true_with_series_datetime_ns(self, date, dtype): + def test_to_datetime_utc_true_with_series_datetime_ns(self, cache, date, + dtype): expected = pd.Series([pd.Timestamp('2013-01-01 01:00:00', tz='UTC')]) - result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True) + result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True, + cache=cache) tm.assert_series_equal(result, expected) - def test_to_datetime_tz_psycopg2(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_tz_psycopg2(self, cache): # xref 8260 try: @@ -324,7 +352,7 @@ def test_to_datetime_tz_psycopg2(self): datetime(2000, 6, 1, 3, 0, tzinfo=tz2)], dtype=object) - result = pd.to_datetime(arr, errors='coerce', utc=True) + result = pd.to_datetime(arr, errors='coerce', utc=True, cache=cache) expected = DatetimeIndex(['2000-01-01 08:00:00+00:00', '2000-06-01 07:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None) @@ -337,32 +365,39 @@ def test_to_datetime_tz_psycopg2(self): assert is_datetime64_ns_dtype(i) # tz coerceion - result = pd.to_datetime(i, errors='coerce') + result = pd.to_datetime(i, errors='coerce', cache=cache) tm.assert_index_equal(result, i) - result = pd.to_datetime(i, errors='coerce', utc=True) + result = pd.to_datetime(i, errors='coerce', utc=True, cache=cache) expected = pd.DatetimeIndex(['2000-01-01 13:00:00'], dtype='datetime64[ns, UTC]') tm.assert_index_equal(result, expected) - def test_datetime_bool(self): + @pytest.mark.parametrize( + 'cache', + [pytest.param(True, + marks=pytest.mark.skipif(True, reason="GH 18111")), + False]) + def test_datetime_bool(self, cache): # GH13176 with pytest.raises(TypeError): to_datetime(False) - assert to_datetime(False, errors="coerce") is NaT - assert to_datetime(False, errors="ignore") is False + assert to_datetime(False, errors="coerce", cache=cache) is NaT + assert to_datetime(False, errors="ignore", cache=cache) is False with pytest.raises(TypeError): to_datetime(True) - assert to_datetime(True, errors="coerce") is NaT - assert to_datetime(True, errors="ignore") is True + assert to_datetime(True, errors="coerce", cache=cache) is NaT + assert to_datetime(True, errors="ignore", cache=cache) is True with pytest.raises(TypeError): - to_datetime([False, datetime.today()]) + to_datetime([False, datetime.today()], cache=cache) with pytest.raises(TypeError): - to_datetime(['20130101', True]) + to_datetime(['20130101', True], cache=cache) tm.assert_index_equal(to_datetime([0, False, NaT, 0.0], - errors="coerce"), - DatetimeIndex([to_datetime(0), NaT, - NaT, to_datetime(0)])) + errors="coerce", cache=cache), + DatetimeIndex([to_datetime(0, cache=cache), + NaT, + NaT, + to_datetime(0, cache=cache)])) def test_datetime_invalid_datatype(self): # GH13176 @@ -372,6 +407,39 @@ def test_datetime_invalid_datatype(self): with pytest.raises(TypeError): pd.to_datetime(pd.to_datetime) + @pytest.mark.parametrize("utc", [True, None]) + @pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None]) + @pytest.mark.parametrize("box", [True, False]) + @pytest.mark.parametrize("constructor", [list, tuple, np.array, pd.Index]) + def test_to_datetime_cache(self, utc, format, box, constructor): + date = '20130101 00:00:00' + test_dates = [date] * 10**5 + data = constructor(test_dates) + result = pd.to_datetime(data, utc=utc, format=format, box=box, + cache=True) + expected = pd.to_datetime(data, utc=utc, format=format, box=box, + cache=False) + if box: + tm.assert_index_equal(result, expected) + else: + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize("utc", [True, None]) + @pytest.mark.parametrize("format", ['%Y%m%d %H:%M:%S', None]) + def test_to_datetime_cache_series(self, utc, format): + date = '20130101 00:00:00' + test_dates = [date] * 10**5 + data = pd.Series(test_dates) + result = pd.to_datetime(data, utc=utc, format=format, cache=True) + expected = pd.to_datetime(data, utc=utc, format=format, cache=False) + tm.assert_series_equal(result, expected) + + def test_to_datetime_cache_scalar(self): + date = '20130101 00:00:00' + result = pd.to_datetime(date, cache=True) + expected = pd.Timestamp('20130101 00:00:00') + assert result == expected + @pytest.mark.parametrize('date, format', [('2017-20', '%Y-%W'), ('20 Sunday', '%W %A'), @@ -388,72 +456,77 @@ def test_week_without_day_and_calendar_year(self, date, format): class TestToDatetimeUnit(object): - - def test_unit(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_unit(self, cache): # GH 11758 # test proper behavior with erros with pytest.raises(ValueError): - to_datetime([1], unit='D', format='%Y%m%d') + to_datetime([1], unit='D', format='%Y%m%d', cache=cache) values = [11111111, 1, 1.0, tslib.iNaT, NaT, np.nan, 'NaT', ''] - result = to_datetime(values, unit='D', errors='ignore') + result = to_datetime(values, unit='D', errors='ignore', cache=cache) expected = Index([11111111, Timestamp('1970-01-02'), Timestamp('1970-01-02'), NaT, NaT, NaT, NaT, NaT], dtype=object) tm.assert_index_equal(result, expected) - result = to_datetime(values, unit='D', errors='coerce') + result = to_datetime(values, unit='D', errors='coerce', cache=cache) expected = DatetimeIndex(['NaT', '1970-01-02', '1970-01-02', 'NaT', 'NaT', 'NaT', 'NaT', 'NaT']) tm.assert_index_equal(result, expected) with pytest.raises(tslib.OutOfBoundsDatetime): - to_datetime(values, unit='D', errors='raise') + to_datetime(values, unit='D', errors='raise', cache=cache) values = [1420043460000, tslib.iNaT, NaT, np.nan, 'NaT'] - result = to_datetime(values, errors='ignore', unit='s') + result = to_datetime(values, errors='ignore', unit='s', cache=cache) expected = Index([1420043460000, NaT, NaT, NaT, NaT], dtype=object) tm.assert_index_equal(result, expected) - result = to_datetime(values, errors='coerce', unit='s') + result = to_datetime(values, errors='coerce', unit='s', cache=cache) expected = DatetimeIndex(['NaT', 'NaT', 'NaT', 'NaT', 'NaT']) tm.assert_index_equal(result, expected) with pytest.raises(tslib.OutOfBoundsDatetime): - to_datetime(values, errors='raise', unit='s') + to_datetime(values, errors='raise', unit='s', cache=cache) # if we have a string, then we raise a ValueError # and NOT an OutOfBoundsDatetime for val in ['foo', Timestamp('20130101')]: try: - to_datetime(val, errors='raise', unit='s') + to_datetime(val, errors='raise', unit='s', cache=cache) except tslib.OutOfBoundsDatetime: raise AssertionError("incorrect exception raised") except ValueError: pass - def test_unit_consistency(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_unit_consistency(self, cache): # consistency of conversions expected = Timestamp('1970-05-09 14:25:11') - result = pd.to_datetime(11111111, unit='s', errors='raise') + result = pd.to_datetime(11111111, unit='s', errors='raise', + cache=cache) assert result == expected assert isinstance(result, Timestamp) - result = pd.to_datetime(11111111, unit='s', errors='coerce') + result = pd.to_datetime(11111111, unit='s', errors='coerce', + cache=cache) assert result == expected assert isinstance(result, Timestamp) - result = pd.to_datetime(11111111, unit='s', errors='ignore') + result = pd.to_datetime(11111111, unit='s', errors='ignore', + cache=cache) assert result == expected assert isinstance(result, Timestamp) - def test_unit_with_numeric(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_unit_with_numeric(self, cache): # GH 13180 # coercions from floats/ints are ok @@ -462,10 +535,10 @@ def test_unit_with_numeric(self): arr1 = [1.434692e+18, 1.432766e+18] arr2 = np.array(arr1).astype('int64') for errors in ['ignore', 'raise', 'coerce']: - result = pd.to_datetime(arr1, errors=errors) + result = pd.to_datetime(arr1, errors=errors, cache=cache) tm.assert_index_equal(result, expected) - result = pd.to_datetime(arr2, errors=errors) + result = pd.to_datetime(arr2, errors=errors, cache=cache) tm.assert_index_equal(result, expected) # but we want to make sure that we are coercing @@ -474,7 +547,7 @@ def test_unit_with_numeric(self): '2015-06-19 05:33:20', '2015-05-27 22:33:20']) arr = ['foo', 1.434692e+18, 1.432766e+18] - result = pd.to_datetime(arr, errors='coerce') + result = pd.to_datetime(arr, errors='coerce', cache=cache) tm.assert_index_equal(result, expected) expected = DatetimeIndex(['2015-06-19 05:33:20', @@ -482,31 +555,33 @@ def test_unit_with_numeric(self): 'NaT', 'NaT']) arr = [1.434692e+18, 1.432766e+18, 'foo', 'NaT'] - result = pd.to_datetime(arr, errors='coerce') + result = pd.to_datetime(arr, errors='coerce', cache=cache) tm.assert_index_equal(result, expected) - def test_unit_mixed(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_unit_mixed(self, cache): # mixed integers/datetimes expected = DatetimeIndex(['2013-01-01', 'NaT', 'NaT']) arr = [pd.Timestamp('20130101'), 1.434692e+18, 1.432766e+18] - result = pd.to_datetime(arr, errors='coerce') + result = pd.to_datetime(arr, errors='coerce', cache=cache) tm.assert_index_equal(result, expected) with pytest.raises(ValueError): - pd.to_datetime(arr, errors='raise') + pd.to_datetime(arr, errors='raise', cache=cache) expected = DatetimeIndex(['NaT', 'NaT', '2013-01-01']) arr = [1.434692e+18, 1.432766e+18, pd.Timestamp('20130101')] - result = pd.to_datetime(arr, errors='coerce') + result = pd.to_datetime(arr, errors='coerce', cache=cache) tm.assert_index_equal(result, expected) with pytest.raises(ValueError): - pd.to_datetime(arr, errors='raise') + pd.to_datetime(arr, errors='raise', cache=cache) - def test_dataframe(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_dataframe(self, cache): df = DataFrame({'year': [2015, 2016], 'month': [2, 3], @@ -520,19 +595,20 @@ def test_dataframe(self): result = to_datetime({'year': df['year'], 'month': df['month'], - 'day': df['day']}) + 'day': df['day']}, cache=cache) expected = Series([Timestamp('20150204 00:00:00'), Timestamp('20160305 00:0:00')]) assert_series_equal(result, expected) # dict-like - result = to_datetime(df[['year', 'month', 'day']].to_dict()) + result = to_datetime(df[['year', 'month', 'day']].to_dict(), + cache=cache) assert_series_equal(result, expected) # dict but with constructable df2 = df[['year', 'month', 'day']].to_dict() df2['month'] = 2 - result = to_datetime(df2) + result = to_datetime(df2, cache=cache) expected2 = Series([Timestamp('20150204 00:00:00'), Timestamp('20160205 00:0:00')]) assert_series_equal(result, expected2) @@ -553,7 +629,8 @@ def test_dataframe(self): ] for d in units: - result = to_datetime(df[list(d.keys())].rename(columns=d)) + result = to_datetime(df[list(d.keys())].rename(columns=d), + cache=cache) expected = Series([Timestamp('20150204 06:58:10'), Timestamp('20160305 07:59:11')]) assert_series_equal(result, expected) @@ -568,13 +645,13 @@ def test_dataframe(self): 'us': 'us', 'ns': 'ns'} - result = to_datetime(df.rename(columns=d)) + result = to_datetime(df.rename(columns=d), cache=cache) expected = Series([Timestamp('20150204 06:58:10.001002003'), Timestamp('20160305 07:59:11.001002003')]) assert_series_equal(result, expected) # coerce back to int - result = to_datetime(df.astype(str)) + result = to_datetime(df.astype(str), cache=cache) assert_series_equal(result, expected) # passing coerce @@ -585,8 +662,8 @@ def test_dataframe(self): msg = ("cannot assemble the datetimes: time data .+ does not " "match format '%Y%m%d' \(match\)") with tm.assert_raises_regex(ValueError, msg): - to_datetime(df2) - result = to_datetime(df2, errors='coerce') + to_datetime(df2, cache=cache) + result = to_datetime(df2, errors='coerce', cache=cache) expected = Series([Timestamp('20150204 00:00:00'), NaT]) assert_series_equal(result, expected) @@ -597,7 +674,7 @@ def test_dataframe(self): with tm.assert_raises_regex(ValueError, msg): df2 = df.copy() df2['foo'] = 1 - to_datetime(df2) + to_datetime(df2, cache=cache) # not enough msg = ('to assemble mappings requires at least that \[year, month, ' @@ -608,7 +685,7 @@ def test_dataframe(self): ['month', 'day'], ['year', 'day', 'second']]: with tm.assert_raises_regex(ValueError, msg): - to_datetime(df[c]) + to_datetime(df[c], cache=cache) # duplicates msg = 'cannot assemble with duplicate keys' @@ -617,7 +694,7 @@ def test_dataframe(self): 'day': [4, 5]}) df2.columns = ['year', 'year', 'day'] with tm.assert_raises_regex(ValueError, msg): - to_datetime(df2) + to_datetime(df2, cache=cache) df2 = DataFrame({'year': [2015, 2016], 'month': [2, 20], @@ -625,16 +702,17 @@ def test_dataframe(self): 'hour': [4, 5]}) df2.columns = ['year', 'month', 'day', 'day'] with tm.assert_raises_regex(ValueError, msg): - to_datetime(df2) + to_datetime(df2, cache=cache) - def test_dataframe_dtypes(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_dataframe_dtypes(self, cache): # #13451 df = DataFrame({'year': [2015, 2016], 'month': [2, 3], 'day': [4, 5]}) # int16 - result = to_datetime(df.astype('int16')) + result = to_datetime(df.astype('int16'), cache=cache) expected = Series([Timestamp('20150204 00:00:00'), Timestamp('20160305 00:00:00')]) assert_series_equal(result, expected) @@ -642,7 +720,7 @@ def test_dataframe_dtypes(self): # mixed dtypes df['month'] = df['month'].astype('int8') df['day'] = df['day'].astype('int8') - result = to_datetime(df) + result = to_datetime(df, cache=cache) expected = Series([Timestamp('20150204 00:00:00'), Timestamp('20160305 00:00:00')]) assert_series_equal(result, expected) @@ -652,18 +730,19 @@ def test_dataframe_dtypes(self): 'month': [1.5, 1], 'day': [1, 1]}) with pytest.raises(ValueError): - to_datetime(df) + to_datetime(df, cache=cache) class TestToDatetimeMisc(object): - def test_index_to_datetime(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_index_to_datetime(self, cache): idx = Index(['1/1/2000', '1/2/2000', '1/3/2000']) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = idx.to_datetime() - expected = DatetimeIndex(pd.to_datetime(idx.values)) + expected = DatetimeIndex(pd.to_datetime(idx.values, cache=cache)) tm.assert_index_equal(result, expected) with tm.assert_produces_warning(FutureWarning, @@ -674,17 +753,19 @@ def test_index_to_datetime(self): expected = DatetimeIndex([today]) tm.assert_index_equal(result, expected) - def test_to_datetime_iso8601(self): - result = to_datetime(["2012-01-01 00:00:00"]) + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_iso8601(self, cache): + result = to_datetime(["2012-01-01 00:00:00"], cache=cache) exp = Timestamp("2012-01-01 00:00:00") assert result[0] == exp - result = to_datetime(['20121001']) # bad iso 8601 + result = to_datetime(['20121001'], cache=cache) # bad iso 8601 exp = Timestamp('2012-10-01') assert result[0] == exp - def test_to_datetime_default(self): - rs = to_datetime('2001') + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_default(self, cache): + rs = to_datetime('2001', cache=cache) xp = datetime(2001, 1, 1) assert rs == xp @@ -694,71 +775,80 @@ def test_to_datetime_default(self): # pytest.raises(ValueError, to_datetime('01-13-2012', # dayfirst=True)) - def test_to_datetime_on_datetime64_series(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_on_datetime64_series(self, cache): # #2699 s = Series(date_range('1/1/2000', periods=10)) - result = to_datetime(s) + result = to_datetime(s, cache=cache) assert result[0] == s[0] - def test_to_datetime_with_space_in_series(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_with_space_in_series(self, cache): # GH 6428 s = Series(['10/18/2006', '10/18/2008', ' ']) - pytest.raises(ValueError, lambda: to_datetime(s, errors='raise')) - result_coerce = to_datetime(s, errors='coerce') + pytest.raises(ValueError, lambda: to_datetime(s, + errors='raise', + cache=cache)) + result_coerce = to_datetime(s, errors='coerce', cache=cache) expected_coerce = Series([datetime(2006, 10, 18), datetime(2008, 10, 18), NaT]) tm.assert_series_equal(result_coerce, expected_coerce) - result_ignore = to_datetime(s, errors='ignore') + result_ignore = to_datetime(s, errors='ignore', cache=cache) tm.assert_series_equal(result_ignore, s) - def test_to_datetime_with_apply(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_with_apply(self, cache): # this is only locale tested with US/None locales tm._skip_if_has_locale() # GH 5195 # with a format and coerce a single item to_datetime fails td = Series(['May 04', 'Jun 02', 'Dec 11'], index=[1, 2, 3]) - expected = pd.to_datetime(td, format='%b %y') - result = td.apply(pd.to_datetime, format='%b %y') + expected = pd.to_datetime(td, format='%b %y', cache=cache) + result = td.apply(pd.to_datetime, format='%b %y', cache=cache) assert_series_equal(result, expected) td = pd.Series(['May 04', 'Jun 02', ''], index=[1, 2, 3]) pytest.raises(ValueError, lambda: pd.to_datetime(td, format='%b %y', - errors='raise')) + errors='raise', + cache=cache)) pytest.raises(ValueError, lambda: td.apply(pd.to_datetime, format='%b %y', - errors='raise')) - expected = pd.to_datetime(td, format='%b %y', errors='coerce') + errors='raise', cache=cache)) + expected = pd.to_datetime(td, format='%b %y', errors='coerce', + cache=cache) result = td.apply( - lambda x: pd.to_datetime(x, format='%b %y', errors='coerce')) + lambda x: pd.to_datetime(x, format='%b %y', errors='coerce', + cache=cache)) assert_series_equal(result, expected) - def test_to_datetime_types(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_types(self, cache): # empty string - result = to_datetime('') + result = to_datetime('', cache=cache) assert result is NaT - result = to_datetime(['', '']) + result = to_datetime(['', ''], cache=cache) assert isna(result).all() # ints result = Timestamp(0) - expected = to_datetime(0) + expected = to_datetime(0, cache=cache) assert result == expected # GH 3888 (strings) - expected = to_datetime(['2012'])[0] - result = to_datetime('2012') + expected = to_datetime(['2012'], cache=cache)[0] + result = to_datetime('2012', cache=cache) assert result == expected # array = ['2012','20120101','20120101 12:01:01'] array = ['20120101', '20120101 12:01:01'] - expected = list(to_datetime(array)) + expected = list(to_datetime(array, cache=cache)) result = lmap(Timestamp, array) tm.assert_almost_equal(result, expected) @@ -767,13 +857,15 @@ def test_to_datetime_types(self): # expected = to_datetime('2012') # assert result == expected - def test_to_datetime_unprocessable_input(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_unprocessable_input(self, cache): # GH 4928 tm.assert_numpy_array_equal( - to_datetime([1, '1'], errors='ignore'), + to_datetime([1, '1'], errors='ignore', cache=cache), np.array([1, '1'], dtype='O') ) - pytest.raises(TypeError, to_datetime, [1, '1'], errors='raise') + pytest.raises(TypeError, to_datetime, [1, '1'], errors='raise', + cache=cache) def test_to_datetime_other_datetime64_units(self): # 5/25/2012 @@ -809,7 +901,8 @@ def test_to_datetime_overflow(self): with pytest.raises(OverflowError): date_range(start='1/1/1700', freq='B', periods=100000) - def test_string_na_nat_conversion(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_string_na_nat_conversion(self, cache): # GH #999, #858 from pandas.compat import parse_date @@ -827,7 +920,7 @@ def test_string_na_nat_conversion(self): result = tslib.array_to_datetime(strings) tm.assert_almost_equal(result, expected) - result2 = to_datetime(strings) + result2 = to_datetime(strings, cache=cache) assert isinstance(result2, DatetimeIndex) tm.assert_numpy_array_equal(result, result2.values) @@ -835,22 +928,25 @@ def test_string_na_nat_conversion(self): # GH 10636, default is now 'raise' pytest.raises(ValueError, - lambda: to_datetime(malformed, errors='raise')) + lambda: to_datetime(malformed, errors='raise', + cache=cache)) - result = to_datetime(malformed, errors='ignore') + result = to_datetime(malformed, errors='ignore', cache=cache) tm.assert_numpy_array_equal(result, malformed) - pytest.raises(ValueError, to_datetime, malformed, errors='raise') + pytest.raises(ValueError, to_datetime, malformed, errors='raise', + cache=cache) idx = ['a', 'b', 'c', 'd', 'e'] series = Series(['1/1/2000', np.nan, '1/3/2000', np.nan, '1/5/2000'], index=idx, name='foo') - dseries = Series([to_datetime('1/1/2000'), np.nan, - to_datetime('1/3/2000'), np.nan, - to_datetime('1/5/2000')], index=idx, name='foo') + dseries = Series([to_datetime('1/1/2000', cache=cache), np.nan, + to_datetime('1/3/2000', cache=cache), np.nan, + to_datetime('1/5/2000', cache=cache)], + index=idx, name='foo') - result = to_datetime(series) - dresult = to_datetime(dseries) + result = to_datetime(series, cache=cache) + dresult = to_datetime(dseries, cache=cache) expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) for i in range(5): @@ -858,7 +954,7 @@ def test_string_na_nat_conversion(self): if isna(x): expected[i] = tslib.iNaT else: - expected[i] = to_datetime(x) + expected[i] = to_datetime(x, cache=cache) assert_series_equal(result, expected, check_names=False) assert result.name == 'foo' @@ -866,26 +962,29 @@ def test_string_na_nat_conversion(self): assert_series_equal(dresult, expected, check_names=False) assert dresult.name == 'foo' - def test_dti_constructor_numpy_timeunits(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_dti_constructor_numpy_timeunits(self, cache): # GH 9114 - base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT']) + base = pd.to_datetime(['2000-01-01T00:00', '2000-01-02T00:00', 'NaT'], + cache=cache) for dtype in ['datetime64[h]', 'datetime64[m]', 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', 'datetime64[ns]']: values = base.values.astype(dtype) tm.assert_index_equal(DatetimeIndex(values), base) - tm.assert_index_equal(to_datetime(values), base) + tm.assert_index_equal(to_datetime(values, cache=cache), base) - def test_dayfirst(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_dayfirst(self, cache): # GH 5917 arr = ['10/02/2014', '11/02/2014', '12/02/2014'] expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11), datetime(2014, 2, 12)]) idx1 = DatetimeIndex(arr, dayfirst=True) idx2 = DatetimeIndex(np.array(arr), dayfirst=True) - idx3 = to_datetime(arr, dayfirst=True) - idx4 = to_datetime(np.array(arr), dayfirst=True) + idx3 = to_datetime(arr, dayfirst=True, cache=cache) + idx4 = to_datetime(np.array(arr), dayfirst=True, cache=cache) idx5 = DatetimeIndex(Index(arr), dayfirst=True) idx6 = DatetimeIndex(Series(arr), dayfirst=True) tm.assert_index_equal(expected, idx1) @@ -897,6 +996,8 @@ def test_dayfirst(self): class TestGuessDatetimeFormat(object): + + @is_dateutil_le_261 def test_guess_datetime_format_for_array(self): tm._skip_if_not_us_locale() expected_format = '%Y-%m-%d %H:%M:%S.%f' @@ -917,10 +1018,32 @@ def test_guess_datetime_format_for_array(self): [np.nan, np.nan, np.nan], dtype='O')) assert format_for_string_of_nans is None + @is_dateutil_gt_261 + def test_guess_datetime_format_for_array_gt_261(self): + tm._skip_if_not_us_locale() + expected_format = '%Y-%m-%d %H:%M:%S.%f' + dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format) + + test_arrays = [ + np.array([dt_string, dt_string, dt_string], dtype='O'), + np.array([np.nan, np.nan, dt_string], dtype='O'), + np.array([dt_string, 'random_string'], dtype='O'), + ] + + for test_array in test_arrays: + assert tools._guess_datetime_format_for_array( + test_array) is None + + format_for_string_of_nans = tools._guess_datetime_format_for_array( + np.array( + [np.nan, np.nan, np.nan], dtype='O')) + assert format_for_string_of_nans is None + class TestToDatetimeInferFormat(object): - def test_to_datetime_infer_datetime_format_consistent_format(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_infer_datetime_format_consistent_format(self, cache): s = pd.Series(pd.date_range('20000101', periods=50, freq='H')) test_formats = ['%m-%d-%Y', '%m/%d/%Y %H:%M:%S.%f', @@ -929,90 +1052,113 @@ def test_to_datetime_infer_datetime_format_consistent_format(self): for test_format in test_formats: s_as_dt_strings = s.apply(lambda x: x.strftime(test_format)) - with_format = pd.to_datetime(s_as_dt_strings, format=test_format) + with_format = pd.to_datetime(s_as_dt_strings, format=test_format, + cache=cache) no_infer = pd.to_datetime(s_as_dt_strings, - infer_datetime_format=False) + infer_datetime_format=False, + cache=cache) yes_infer = pd.to_datetime(s_as_dt_strings, - infer_datetime_format=True) + infer_datetime_format=True, + cache=cache) # Whether the format is explicitly passed, it is inferred, or # it is not inferred, the results should all be the same tm.assert_series_equal(with_format, no_infer) tm.assert_series_equal(no_infer, yes_infer) - def test_to_datetime_infer_datetime_format_inconsistent_format(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_infer_datetime_format_inconsistent_format(self, + cache): s = pd.Series(np.array(['01/01/2011 00:00:00', '01-02-2011 00:00:00', '2011-01-03T00:00:00'])) # When the format is inconsistent, infer_datetime_format should just # fallback to the default parsing - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False, + cache=cache), + pd.to_datetime(s, infer_datetime_format=True, + cache=cache)) s = pd.Series(np.array(['Jan/01/2011', 'Feb/01/2011', 'Mar/01/2011'])) - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False, + cache=cache), + pd.to_datetime(s, infer_datetime_format=True, + cache=cache)) - def test_to_datetime_infer_datetime_format_series_with_nans(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_infer_datetime_format_series_with_nans(self, cache): s = pd.Series(np.array(['01/01/2011 00:00:00', np.nan, '01/03/2011 00:00:00', np.nan])) - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) - - def test_to_datetime_infer_datetime_format_series_starting_with_nans(self): + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False, + cache=cache), + pd.to_datetime(s, infer_datetime_format=True, + cache=cache)) + + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_infer_datetime_format_series_start_with_nans(self, + cache): s = pd.Series(np.array([np.nan, np.nan, '01/01/2011 00:00:00', '01/02/2011 00:00:00', '01/03/2011 00:00:00'])) - tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False), - pd.to_datetime(s, infer_datetime_format=True)) + tm.assert_series_equal(pd.to_datetime(s, infer_datetime_format=False, + cache=cache), + pd.to_datetime(s, infer_datetime_format=True, + cache=cache)) - def test_to_datetime_iso8601_noleading_0s(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_to_datetime_iso8601_noleading_0s(self, cache): # GH 11871 s = pd.Series(['2014-1-1', '2014-2-2', '2015-3-3']) expected = pd.Series([pd.Timestamp('2014-01-01'), pd.Timestamp('2014-02-02'), pd.Timestamp('2015-03-03')]) - tm.assert_series_equal(pd.to_datetime(s), expected) - tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d'), expected) + tm.assert_series_equal(pd.to_datetime(s, cache=cache), expected) + tm.assert_series_equal(pd.to_datetime(s, format='%Y-%m-%d', + cache=cache), expected) class TestDaysInMonth(object): # tests for issue #10154 - def test_day_not_in_month_coerce(self): - assert isna(to_datetime('2015-02-29', errors='coerce')) + @pytest.mark.parametrize('cache', [True, False]) + def test_day_not_in_month_coerce(self, cache): + assert isna(to_datetime('2015-02-29', errors='coerce', cache=cache)) assert isna(to_datetime('2015-02-29', format="%Y-%m-%d", - errors='coerce')) + errors='coerce', cache=cache)) assert isna(to_datetime('2015-02-32', format="%Y-%m-%d", - errors='coerce')) + errors='coerce', cache=cache)) assert isna(to_datetime('2015-04-31', format="%Y-%m-%d", - errors='coerce')) + errors='coerce', cache=cache)) - def test_day_not_in_month_raise(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_day_not_in_month_raise(self, cache): pytest.raises(ValueError, to_datetime, '2015-02-29', - errors='raise') + errors='raise', cache=cache) pytest.raises(ValueError, to_datetime, '2015-02-29', - errors='raise', format="%Y-%m-%d") + errors='raise', format="%Y-%m-%d", cache=cache) pytest.raises(ValueError, to_datetime, '2015-02-32', - errors='raise', format="%Y-%m-%d") + errors='raise', format="%Y-%m-%d", cache=cache) pytest.raises(ValueError, to_datetime, '2015-04-31', - errors='raise', format="%Y-%m-%d") + errors='raise', format="%Y-%m-%d", cache=cache) - def test_day_not_in_month_ignore(self): - assert to_datetime('2015-02-29', errors='ignore') == '2015-02-29' + @pytest.mark.parametrize('cache', [True, False]) + def test_day_not_in_month_ignore(self, cache): + assert to_datetime('2015-02-29', errors='ignore', + cache=cache) == '2015-02-29' assert to_datetime('2015-02-29', errors='ignore', - format="%Y-%m-%d") == '2015-02-29' + format="%Y-%m-%d", cache=cache) == '2015-02-29' assert to_datetime('2015-02-32', errors='ignore', - format="%Y-%m-%d") == '2015-02-32' + format="%Y-%m-%d", cache=cache) == '2015-02-32' assert to_datetime('2015-04-31', errors='ignore', - format="%Y-%m-%d") == '2015-04-31' + format="%Y-%m-%d", cache=cache) == '2015-04-31' class TestDatetimeParsingWrappers(object): - def test_parsers(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_parsers(self, cache): # https://github.com/dateutil/dateutil/issues/217 import dateutil @@ -1076,7 +1222,7 @@ def test_parsers(self): result3 = to_datetime([date_str], yearfirst=yearfirst) # result5 is used below result4 = to_datetime(np.array([date_str], dtype=object), - yearfirst=yearfirst) + yearfirst=yearfirst, cache=cache) result6 = DatetimeIndex([date_str], yearfirst=yearfirst) # result7 is used below result8 = DatetimeIndex(Index([date_str]), yearfirst=yearfirst) @@ -1106,7 +1252,8 @@ def test_parsers(self): assert result3 is tslib.NaT assert result4 is tslib.NaT - def test_parsers_dayfirst_yearfirst(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_parsers_dayfirst_yearfirst(self, cache): # OK # 2.5.1 10-11-12 [dayfirst=0, yearfirst=0] -> 2012-10-11 00:00:00 # 2.5.2 10-11-12 [dayfirst=0, yearfirst=1] -> 2012-10-11 00:00:00 @@ -1190,7 +1337,7 @@ def test_parsers_dayfirst_yearfirst(self): assert result2 == expected result3 = to_datetime(date_str, dayfirst=dayfirst, - yearfirst=yearfirst) + yearfirst=yearfirst, cache=cache) result4 = DatetimeIndex([date_str], dayfirst=dayfirst, yearfirst=yearfirst)[0] @@ -1199,7 +1346,8 @@ def test_parsers_dayfirst_yearfirst(self): assert result3 == expected assert result4 == expected - def test_parsers_timestring(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_parsers_timestring(self, cache): # must be the same as dateutil result cases = {'10:15': (parse('10:15'), datetime(1, 1, 1, 10, 15)), '9:05': (parse('9:05'), datetime(1, 1, 1, 9, 5))} @@ -1254,9 +1402,10 @@ def test_parsers_time(self): assert isinstance(res, list) assert res == expected_arr - def test_parsers_timezone_minute_offsets_roundtrip(self): + @pytest.mark.parametrize('cache', [True, False]) + def test_parsers_timezone_minute_offsets_roundtrip(self, cache): # GH11708 - base = to_datetime("2013-01-01 00:00:00") + base = to_datetime("2013-01-01 00:00:00", cache=cache) dt_strings = [ ('2013-01-01 05:45+0545', "Asia/Katmandu", @@ -1267,7 +1416,7 @@ def test_parsers_timezone_minute_offsets_roundtrip(self): ] for dt_string, tz, dt_string_repr in dt_strings: - dt_time = to_datetime(dt_string) + dt_time = to_datetime(dt_string, cache=cache) assert base == dt_time converted_time = dt_time.tz_localize('UTC').tz_convert(tz) assert dt_string_repr == repr(converted_time) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index ded5de9253eafd..dbd18de16cebde 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -158,6 +158,24 @@ def test_set_name_methods(self): assert res is None assert ind.names == new_names2 + def test_set_levels_labels_directly(self): + # setting levels/labels directly raises AttributeError + + levels = self.index.levels + new_levels = [[lev + 'a' for lev in level] for level in levels] + + labels = self.index.labels + major_labels, minor_labels = labels + major_labels = [(x + 1) % 3 for x in major_labels] + minor_labels = [(x + 1) % 1 for x in minor_labels] + new_labels = [major_labels, minor_labels] + + with pytest.raises(AttributeError): + self.index.levels = new_levels + + with pytest.raises(AttributeError): + self.index.labels = new_labels + def test_set_levels(self): # side note - you probably wouldn't want to use levels and labels # directly like this - but it is possible. @@ -578,16 +596,6 @@ def test_constructor_mismatched_label_levels(self): with tm.assert_raises_regex(ValueError, label_error): self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]]) - # deprecated properties - with warnings.catch_warnings(): - warnings.simplefilter('ignore') - - with tm.assert_raises_regex(ValueError, length_error): - self.index.copy().levels = [['a'], ['b']] - - with tm.assert_raises_regex(ValueError, label_error): - self.index.copy().labels = [[0, 0, 0, 0], [0, 0]] - def assert_multiindex_copied(self, copy, original): # Levels should be (at least, shallow copied) tm.assert_copy(copy.levels, original.levels) @@ -2981,3 +2989,13 @@ def test_nan_stays_float(self): assert pd.isna(df0.index.get_level_values(1)).all() # the following failed in 0.14.1 assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() + + def test_million_record_attribute_error(self): + # GH 18165 + r = list(range(1000000)) + df = pd.DataFrame({'a': r, 'b': r}, + index=pd.MultiIndex.from_tuples([(x, x) for x in r])) + + with tm.assert_raises_regex(AttributeError, + "'Series' object has no attribute 'foo'"): + df['a'].foo() diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 9fe10885186de0..7d88b547746f64 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -971,8 +971,8 @@ def test_append(self): ([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)), ([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)), ([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)), - ([RI(-4, -8), RI(-8, -12)], RI(-8, -12)), - ([RI(-4, -8), RI(3, -4)], RI(3, -8)), + ([RI(-4, -8), RI(-8, -12)], RI(0, 0)), + ([RI(-4, -8), RI(3, -4)], RI(0, 0)), ([RI(-4, -8), RI(3, 5)], RI(3, 5)), ([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])), ([RI(-2,), RI(3, 5)], RI(3, 5)), diff --git a/pandas/tests/io/sas/data/zero_variables.sas7bdat b/pandas/tests/io/sas/data/zero_variables.sas7bdat new file mode 100644 index 00000000000000..85fec09447ec50 Binary files /dev/null and b/pandas/tests/io/sas/data/zero_variables.sas7bdat differ diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index c3fb85811ca2ac..a5546b1198fc67 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -1,9 +1,11 @@ import pandas as pd from pandas.compat import PY2 import pandas.util.testing as tm +from pandas.errors import EmptyDataError import os import io import numpy as np +import pytest class TestSAS7BDAT(object): @@ -174,3 +176,11 @@ def test_date_time(): df0 = pd.read_csv(fname, parse_dates=['Date1', 'Date2', 'DateTime', 'DateTimeHi', 'Taiw']) tm.assert_frame_equal(df, df0) + + +def test_zero_variables(): + # Check if the SAS file has zero variables (PR #18184) + dirpath = tm.get_data_path() + fname = os.path.join(dirpath, "zero_variables.sas7bdat") + with pytest.raises(EmptyDataError): + pd.read_sas(fname) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index 940a331a9de847..b5d1435c29cb7b 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -18,7 +18,7 @@ try: DataFrame({'A': [1, 2]}).to_clipboard() _DEPS_INSTALLED = 1 -except PyperclipException: +except (PyperclipException, RuntimeError): _DEPS_INSTALLED = 0 diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 956f3c68eeb414..0b268dcca90e81 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -973,6 +973,7 @@ def test_importcheck_thread_safety(): def test_parse_failure_unseekable(): # Issue #17975 _skip_if_no('lxml') + _skip_if_no('bs4') class UnseekableStringIO(StringIO): def seekable(self): @@ -996,6 +997,7 @@ def seekable(self): def test_parse_failure_rewinds(): # Issue #17975 _skip_if_no('lxml') + _skip_if_no('bs4') class MockFile(object): def __init__(self, data): diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 9a4edf38e2ef4a..e7bcff22371b7e 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -105,7 +105,7 @@ def test_options_py(df_compat, pa): with pd.option_context('io.parquet.engine', 'pyarrow'): df.to_parquet(path) - result = read_parquet(path, compression=None) + result = read_parquet(path) tm.assert_frame_equal(result, df) @@ -118,7 +118,7 @@ def test_options_fp(df_compat, fp): with pd.option_context('io.parquet.engine', 'fastparquet'): df.to_parquet(path, compression=None) - result = read_parquet(path, compression=None) + result = read_parquet(path) tm.assert_frame_equal(result, df) @@ -130,7 +130,7 @@ def test_options_auto(df_compat, fp, pa): with pd.option_context('io.parquet.engine', 'auto'): df.to_parquet(path) - result = read_parquet(path, compression=None) + result = read_parquet(path) tm.assert_frame_equal(result, df) @@ -162,7 +162,7 @@ def test_cross_engine_pa_fp(df_cross_compat, pa, fp): with tm.ensure_clean() as path: df.to_parquet(path, engine=pa, compression=None) - result = read_parquet(path, engine=fp, compression=None) + result = read_parquet(path, engine=fp) tm.assert_frame_equal(result, df) @@ -174,7 +174,7 @@ def test_cross_engine_fp_pa(df_cross_compat, pa, fp): with tm.ensure_clean() as path: df.to_parquet(path, engine=fp, compression=None) - result = read_parquet(path, engine=pa, compression=None) + result = read_parquet(path, engine=pa) tm.assert_frame_equal(result, df) @@ -188,19 +188,23 @@ def check_error_on_write(self, df, engine, exc): with tm.ensure_clean() as path: to_parquet(df, path, engine, compression=None) - def check_round_trip(self, df, engine, expected=None, **kwargs): - + def check_round_trip(self, df, engine, expected=None, + write_kwargs=None, read_kwargs=None): + if write_kwargs is None: + write_kwargs = {} + if read_kwargs is None: + read_kwargs = {} with tm.ensure_clean() as path: - df.to_parquet(path, engine, **kwargs) - result = read_parquet(path, engine, **kwargs) + df.to_parquet(path, engine, **write_kwargs) + result = read_parquet(path, engine, **read_kwargs) if expected is None: expected = df tm.assert_frame_equal(result, expected) # repeat - to_parquet(df, path, engine, **kwargs) - result = pd.read_parquet(path, engine, **kwargs) + to_parquet(df, path, engine, **write_kwargs) + result = pd.read_parquet(path, engine, **read_kwargs) if expected is None: expected = df @@ -222,7 +226,7 @@ def test_columns_dtypes(self, engine): # unicode df.columns = [u'foo', u'bar'] - self.check_round_trip(df, engine, compression=None) + self.check_round_trip(df, engine, write_kwargs={'compression': None}) def test_columns_dtypes_invalid(self, engine): @@ -246,7 +250,7 @@ def test_columns_dtypes_invalid(self, engine): def test_write_with_index(self, engine): df = pd.DataFrame({'A': [1, 2, 3]}) - self.check_round_trip(df, engine, compression=None) + self.check_round_trip(df, engine, write_kwargs={'compression': None}) # non-default index for index in [[2, 3, 4], @@ -280,7 +284,8 @@ def test_compression(self, engine, compression): pytest.importorskip('brotli') df = pd.DataFrame({'A': [1, 2, 3]}) - self.check_round_trip(df, engine, compression=compression) + self.check_round_trip(df, engine, + write_kwargs={'compression': compression}) def test_read_columns(self, engine): # GH18154 @@ -289,7 +294,8 @@ def test_read_columns(self, engine): expected = pd.DataFrame({'string': list('abc')}) self.check_round_trip(df, engine, expected=expected, - compression=None, columns=["string"]) + write_kwargs={'compression': None}, + read_kwargs={'columns': ['string']}) class TestParquetPyArrow(Base): @@ -377,7 +383,7 @@ def test_basic(self, fp): 'timedelta': pd.timedelta_range('1 day', periods=3), }) - self.check_round_trip(df, fp, compression=None) + self.check_round_trip(df, fp, write_kwargs={'compression': None}) @pytest.mark.skip(reason="not supported") def test_duplicate_columns(self, fp): @@ -390,7 +396,8 @@ def test_duplicate_columns(self, fp): def test_bool_with_none(self, fp): df = pd.DataFrame({'a': [True, None, False]}) expected = pd.DataFrame({'a': [1.0, np.nan, 0.0]}, dtype='float16') - self.check_round_trip(df, fp, expected=expected, compression=None) + self.check_round_trip(df, fp, expected=expected, + write_kwargs={'compression': None}) def test_unsupported(self, fp): @@ -406,7 +413,7 @@ def test_categorical(self, fp): if LooseVersion(fastparquet.__version__) < LooseVersion("0.1.3"): pytest.skip("CategoricalDtype not supported for older fp") df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) - self.check_round_trip(df, fp, compression=None) + self.check_round_trip(df, fp, write_kwargs={'compression': None}) def test_datetime_tz(self, fp): # doesn't preserve tz @@ -416,4 +423,13 @@ def test_datetime_tz(self, fp): # warns on the coercion with catch_warnings(record=True): self.check_round_trip(df, fp, df.astype('datetime64[ns]'), - compression=None) + write_kwargs={'compression': None}) + + def test_filter_row_groups(self, fp): + d = {'a': list(range(0, 3))} + df = pd.DataFrame(d) + with tm.ensure_clean() as path: + df.to_parquet(path, fp, compression=None, + row_group_offsets=1) + result = read_parquet(path, fp, filters=[('a', '==', 0)]) + assert len(result) == 1 diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index c9c294e70e7b14..fd5b4611e58d6b 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1983,3 +1983,21 @@ def test_concat_will_upcast(dt, pdt): pdt(np.array([5], dtype=dt, ndmin=dims))] x = pd.concat(dfs) assert x.values.dtype == 'float64' + + +def test_concat_empty_and_non_empty_frame_regression(): + # GH 18178 regression test + df1 = pd.DataFrame({'foo': [1]}) + df2 = pd.DataFrame({'foo': []}) + expected = pd.DataFrame({'foo': [1.0]}) + result = pd.concat([df1, df2]) + assert_frame_equal(result, expected) + + +def test_concat_empty_and_non_empty_series_regression(): + # GH 18187 regression test + s1 = pd.Series([1]) + s2 = pd.Series([]) + expected = s1 + result = pd.concat([s1, s2]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index fc9f89934b4ea0..2722c3e92d85a1 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -11,8 +11,8 @@ from pandas.util.testing import assert_frame_equal -from pandas.core.reshape.reshape import ( - melt, lreshape, get_dummies, wide_to_long) +from pandas.core.reshape.reshape import get_dummies +from pandas.core.reshape.melt import melt, lreshape, wide_to_long import pandas.util.testing as tm from pandas.compat import range, u diff --git a/pandas/tests/scalar/test_parsing.py b/pandas/tests/scalar/test_parsing.py index 6908fecbd4e058..70961755ceec98 100644 --- a/pandas/tests/scalar/test_parsing.py +++ b/pandas/tests/scalar/test_parsing.py @@ -3,14 +3,12 @@ Tests for Timestamp parsing, aimed at pandas/_libs/tslibs/parsing.pyx """ from datetime import datetime - import numpy as np import pytest from dateutil.parser import parse - +from pandas.conftest import is_dateutil_le_261, is_dateutil_gt_261 from pandas import compat from pandas.util import testing as tm - from pandas._libs.tslibs import parsing @@ -67,37 +65,90 @@ def test_parsers_monthfreq(self): class TestGuessDatetimeFormat(object): - def test_guess_datetime_format_with_parseable_formats(self): + + @is_dateutil_le_261 + @pytest.mark.parametrize( + "string, format", + [ + ('20111230', '%Y%m%d'), + ('2011-12-30', '%Y-%m-%d'), + ('30-12-2011', '%d-%m-%Y'), + ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'), + ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'), + ('2011-12-30 00:00:00.000000', + '%Y-%m-%d %H:%M:%S.%f')]) + def test_guess_datetime_format_with_parseable_formats( + self, string, format): + tm._skip_if_not_us_locale() + + result = parsing._guess_datetime_format(string) + assert result == format + + @is_dateutil_gt_261 + @pytest.mark.parametrize( + "string", + ['20111230', '2011-12-30', '30-12-2011', + '2011-12-30 00:00:00', '2011-12-30T00:00:00', + '2011-12-30 00:00:00.000000']) + def test_guess_datetime_format_with_parseable_formats_gt_261( + self, string): tm._skip_if_not_us_locale() - dt_string_to_format = (('20111230', '%Y%m%d'), - ('2011-12-30', '%Y-%m-%d'), - ('30-12-2011', '%d-%m-%Y'), - ('2011-12-30 00:00:00', '%Y-%m-%d %H:%M:%S'), - ('2011-12-30T00:00:00', '%Y-%m-%dT%H:%M:%S'), - ('2011-12-30 00:00:00.000000', - '%Y-%m-%d %H:%M:%S.%f'), ) - - for dt_string, dt_format in dt_string_to_format: - assert parsing._guess_datetime_format(dt_string) == dt_format - - def test_guess_datetime_format_with_dayfirst(self): - ambiguous_string = '01/01/2011' - assert parsing._guess_datetime_format( - ambiguous_string, dayfirst=True) == '%d/%m/%Y' - assert parsing._guess_datetime_format( - ambiguous_string, dayfirst=False) == '%m/%d/%Y' - def test_guess_datetime_format_with_locale_specific_formats(self): + result = parsing._guess_datetime_format(string) + assert result is None + + @is_dateutil_le_261 + @pytest.mark.parametrize( + "dayfirst, expected", + [ + (True, "%d/%m/%Y"), + (False, "%m/%d/%Y")]) + def test_guess_datetime_format_with_dayfirst(self, dayfirst, expected): + ambiguous_string = '01/01/2011' + result = parsing._guess_datetime_format( + ambiguous_string, dayfirst=dayfirst) + assert result == expected + + @is_dateutil_gt_261 + @pytest.mark.parametrize( + "dayfirst", [True, False]) + def test_guess_datetime_format_with_dayfirst_gt_261(self, dayfirst): + ambiguous_string = '01/01/2011' + result = parsing._guess_datetime_format( + ambiguous_string, dayfirst=dayfirst) + assert result is None + + @is_dateutil_le_261 + @pytest.mark.parametrize( + "string, format", + [ + ('30/Dec/2011', '%d/%b/%Y'), + ('30/December/2011', '%d/%B/%Y'), + ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S')]) + def test_guess_datetime_format_with_locale_specific_formats( + self, string, format): # The month names will vary depending on the locale, in which # case these wont be parsed properly (dateutil can't parse them) tm._skip_if_has_locale() - dt_string_to_format = (('30/Dec/2011', '%d/%b/%Y'), - ('30/December/2011', '%d/%B/%Y'), - ('30/Dec/2011 00:00:00', '%d/%b/%Y %H:%M:%S'), ) + result = parsing._guess_datetime_format(string) + assert result == format + + @is_dateutil_gt_261 + @pytest.mark.parametrize( + "string", + [ + '30/Dec/2011', + '30/December/2011', + '30/Dec/2011 00:00:00']) + def test_guess_datetime_format_with_locale_specific_formats_gt_261( + self, string): + # The month names will vary depending on the locale, in which + # case these wont be parsed properly (dateutil can't parse them) + tm._skip_if_has_locale() - for dt_string, dt_format in dt_string_to_format: - assert parsing._guess_datetime_format(dt_string) == dt_format + result = parsing._guess_datetime_format(string) + assert result is None def test_guess_datetime_format_invalid_inputs(self): # A datetime string must include a year, month and a day for it @@ -117,17 +168,35 @@ def test_guess_datetime_format_invalid_inputs(self): for invalid_dt in invalid_dts: assert parsing._guess_datetime_format(invalid_dt) is None - def test_guess_datetime_format_nopadding(self): + @is_dateutil_le_261 + @pytest.mark.parametrize( + "string, format", + [ + ('2011-1-1', '%Y-%m-%d'), + ('30-1-2011', '%d-%m-%Y'), + ('1/1/2011', '%m/%d/%Y'), + ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'), + ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'), + ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')]) + def test_guess_datetime_format_nopadding(self, string, format): + # GH 11142 + result = parsing._guess_datetime_format(string) + assert result == format + + @is_dateutil_gt_261 + @pytest.mark.parametrize( + "string", + [ + '2011-1-1', + '30-1-2011', + '1/1/2011', + '2011-1-1 00:00:00', + '2011-1-1 0:0:0', + '2011-1-3T00:00:0']) + def test_guess_datetime_format_nopadding_gt_261(self, string): # GH 11142 - dt_string_to_format = (('2011-1-1', '%Y-%m-%d'), - ('30-1-2011', '%d-%m-%Y'), - ('1/1/2011', '%m/%d/%Y'), - ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'), - ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'), - ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')) - - for dt_string, dt_format in dt_string_to_format: - assert parsing._guess_datetime_format(dt_string) == dt_format + result = parsing._guess_datetime_format(string) + assert result is None class TestArrayToDatetime(object): diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py index 28d85c52604d94..8cfdf7a461879e 100644 --- a/pandas/tests/scalar/test_period.py +++ b/pandas/tests/scalar/test_period.py @@ -13,7 +13,7 @@ from pandas._libs import tslib, period as libperiod from pandas._libs.tslibs.parsing import DateParseError from pandas import Period, Timestamp, offsets -from pandas.tseries.frequencies import DAYS, MONTHS +from pandas._libs.tslibs.resolution import DAYS, _MONTHS as MONTHS class TestPeriodProperties(object): diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 4cd9a2fadeb326..a79fb554f94548 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -16,8 +16,9 @@ import pandas.util.testing as tm from pandas.tseries import offsets, frequencies -from pandas._libs import tslib, period +from pandas._libs import period from pandas._libs.tslibs.timezones import get_timezone +from pandas._libs.tslibs import conversion from pandas.compat import lrange, long, PY3 from pandas.util.testing import assert_series_equal @@ -77,12 +78,12 @@ def test_constructor(self): for result in [Timestamp(date_str), Timestamp(date)]: # only with timestring assert result.value == expected - assert tslib.pydt_to_i8(result) == expected + assert conversion.pydt_to_i8(result) == expected # re-creation shouldn't affect to internal value result = Timestamp(result) assert result.value == expected - assert tslib.pydt_to_i8(result) == expected + assert conversion.pydt_to_i8(result) == expected # with timezone for tz, offset in timezones: @@ -90,18 +91,18 @@ def test_constructor(self): tz=tz)]: expected_tz = expected - offset * 3600 * 1000000000 assert result.value == expected_tz - assert tslib.pydt_to_i8(result) == expected_tz + assert conversion.pydt_to_i8(result) == expected_tz # should preserve tz result = Timestamp(result) assert result.value == expected_tz - assert tslib.pydt_to_i8(result) == expected_tz + assert conversion.pydt_to_i8(result) == expected_tz # should convert to UTC result = Timestamp(result, tz='UTC') expected_utc = expected - offset * 3600 * 1000000000 assert result.value == expected_utc - assert tslib.pydt_to_i8(result) == expected_utc + assert conversion.pydt_to_i8(result) == expected_utc def test_constructor_with_stringoffset(self): # GH 7833 @@ -129,30 +130,30 @@ def test_constructor_with_stringoffset(self): for result in [Timestamp(date_str)]: # only with timestring assert result.value == expected - assert tslib.pydt_to_i8(result) == expected + assert conversion.pydt_to_i8(result) == expected # re-creation shouldn't affect to internal value result = Timestamp(result) assert result.value == expected - assert tslib.pydt_to_i8(result) == expected + assert conversion.pydt_to_i8(result) == expected # with timezone for tz, offset in timezones: result = Timestamp(date_str, tz=tz) expected_tz = expected assert result.value == expected_tz - assert tslib.pydt_to_i8(result) == expected_tz + assert conversion.pydt_to_i8(result) == expected_tz # should preserve tz result = Timestamp(result) assert result.value == expected_tz - assert tslib.pydt_to_i8(result) == expected_tz + assert conversion.pydt_to_i8(result) == expected_tz # should convert to UTC result = Timestamp(result, tz='UTC') expected_utc = expected assert result.value == expected_utc - assert tslib.pydt_to_i8(result) == expected_utc + assert conversion.pydt_to_i8(result) == expected_utc # This should be 2013-11-01 05:00 in UTC # converted to Chicago tz @@ -1101,13 +1102,18 @@ def test_timestamp(self): tsc = Timestamp('2014-10-11 11:00:01.12345678', tz='US/Central') utsc = tsc.tz_convert('UTC') + # utsc is a different representation of the same time assert tsc.timestamp() == utsc.timestamp() if PY3: - # should agree with datetime.timestamp method - dt = ts.to_pydatetime() - assert dt.timestamp() == ts.timestamp() + + # datetime.timestamp() converts in the local timezone + with tm.set_timezone('UTC'): + + # should agree with datetime.timestamp method + dt = ts.to_pydatetime() + assert dt.timestamp() == ts.timestamp() class TestTimestampNsOperations(object): diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 6b950be15ca465..c1e41892839285 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -334,6 +334,10 @@ def test_axis_alias(self): assert s._get_axis_number('rows') == 0 assert s._get_axis_name('rows') == 'index' + def test_class_axis(self): + # https://github.com/pandas-dev/pandas/issues/18147 + Series.index # no exception! + def test_numpy_unique(self): # it works! np.unique(self.ts) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index a8782b32d12f6d..f593ba85aec5f4 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -306,20 +306,18 @@ def f(): assert len(cat.codes) == 1 assert cat.codes[0] == 0 - # Catch old style constructor useage: two arrays, codes + categories - # We can only catch two cases: + # two arrays # - when the first is an integer dtype and the second is not # - when the resulting codes are all -1/NaN - with tm.assert_produces_warning(RuntimeWarning): + with tm.assert_produces_warning(None): c_old = Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"]) # noqa - with tm.assert_produces_warning(RuntimeWarning): + with tm.assert_produces_warning(None): c_old = Categorical([0, 1, 2, 0, 1, 2], # noqa categories=[3, 4, 5]) - # the next one are from the old docs, but unfortunately these don't - # trigger :-( + # the next one are from the old docs with tm.assert_produces_warning(None): c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) # noqa cat = Categorical([1, 2], categories=[1, 2, 3]) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index ba1a2ad1f42e2c..61b2b689bffd6c 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -21,7 +21,8 @@ from pandas.core.base import SpecificationError, AbstractMethodError from pandas.errors import UnsupportedFunctionCall from pandas.core.groupby import DataError -from pandas.tseries.frequencies import MONTHS, DAYS +from pandas._libs.tslibs.resolution import DAYS +from pandas.tseries.frequencies import MONTHS from pandas.tseries.frequencies import to_offset from pandas.core.indexes.datetimes import date_range from pandas.tseries.offsets import Minute, BDay diff --git a/pandas/tests/tseries/conftest.py b/pandas/tests/tseries/conftest.py index 25446c24b28c09..fc1ecf21c54465 100644 --- a/pandas/tests/tseries/conftest.py +++ b/pandas/tests/tseries/conftest.py @@ -1,10 +1,4 @@ import pytest -import pandas.tseries.offsets as offsets - - -@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__]) -def offset_types(request): - return request.param @pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', 'US/Eastern', diff --git a/pandas/tests/tseries/offsets/__init__.py b/pandas/tests/tseries/offsets/__init__.py new file mode 100644 index 00000000000000..40a96afc6ff09d --- /dev/null +++ b/pandas/tests/tseries/offsets/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/pandas/tests/tseries/offsets/common.py b/pandas/tests/tseries/offsets/common.py new file mode 100644 index 00000000000000..2e8eb224bca7fa --- /dev/null +++ b/pandas/tests/tseries/offsets/common.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +""" +Assertion helpers for offsets tests +""" + + +def assert_offset_equal(offset, base, expected): + actual = offset + base + actual_swapped = base + offset + actual_apply = offset.apply(base) + try: + assert actual == expected + assert actual_swapped == expected + assert actual_apply == expected + except AssertionError: + raise AssertionError("\nExpected: %s\nActual: %s\nFor Offset: %s)" + "\nAt Date: %s" % + (expected, actual, offset, base)) + + +def assert_onOffset(offset, date, expected): + actual = offset.onOffset(date) + assert actual == expected, ("\nExpected: %s\nActual: %s\nFor Offset: %s)" + "\nAt Date: %s" % + (expected, actual, offset, date)) diff --git a/pandas/tests/tseries/offsets/conftest.py b/pandas/tests/tseries/offsets/conftest.py new file mode 100644 index 00000000000000..25446c24b28c09 --- /dev/null +++ b/pandas/tests/tseries/offsets/conftest.py @@ -0,0 +1,13 @@ +import pytest +import pandas.tseries.offsets as offsets + + +@pytest.fixture(params=[getattr(offsets, o) for o in offsets.__all__]) +def offset_types(request): + return request.param + + +@pytest.fixture(params=[None, 'UTC', 'Asia/Tokyo', 'US/Eastern', + 'dateutil/Asia/Tokyo', 'dateutil/US/Pacific']) +def tz(request): + return request.param diff --git a/pandas/tests/tseries/data/cday-0.14.1.pickle b/pandas/tests/tseries/offsets/data/cday-0.14.1.pickle similarity index 100% rename from pandas/tests/tseries/data/cday-0.14.1.pickle rename to pandas/tests/tseries/offsets/data/cday-0.14.1.pickle diff --git a/pandas/tests/tseries/data/dateoffset_0_15_2.pickle b/pandas/tests/tseries/offsets/data/dateoffset_0_15_2.pickle similarity index 100% rename from pandas/tests/tseries/data/dateoffset_0_15_2.pickle rename to pandas/tests/tseries/offsets/data/dateoffset_0_15_2.pickle diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py similarity index 79% rename from pandas/tests/tseries/test_offsets.py rename to pandas/tests/tseries/offsets/test_offsets.py index 4fd3bba01602fc..b123fa127e29c4 100644 --- a/pandas/tests/tseries/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -4,7 +4,7 @@ from dateutil.relativedelta import relativedelta import pytest -from pandas.compat import range, iteritems +from pandas.compat import range from pandas import compat import numpy as np @@ -25,9 +25,9 @@ MonthBegin, SemiMonthBegin, SemiMonthEnd, BYearBegin, QuarterBegin, BQuarterBegin, BMonthBegin, DateOffset, Week, YearBegin, - YearEnd, Hour, Minute, Second, Day, Micro, + YearEnd, Day, QuarterEnd, BusinessMonthEnd, FY5253, - Milli, Nano, Easter, FY5253Quarter, + Nano, Easter, FY5253Quarter, LastWeekOfMonth) from pandas.core.tools.datetimes import ( format, ole2datetime, parse_time_string, @@ -35,11 +35,13 @@ import pandas.tseries.offsets as offsets from pandas.io.pickle import read_pickle from pandas._libs.tslibs import timezones -from pandas._libs.tslib import normalize_date, NaT, Timestamp, Timedelta +from pandas._libs.tslib import normalize_date, NaT, Timestamp import pandas._libs.tslib as tslib import pandas.util.testing as tm from pandas.tseries.holiday import USFederalHolidayCalendar +from .common import assert_offset_equal, assert_onOffset + def test_monthrange(): import calendar @@ -162,51 +164,44 @@ def test_apply_out_of_range(self, tz): class TestCommon(Base): - - def setup_method(self, method): - # exected value created by Base._get_offset - # are applied to 2011/01/01 09:00 (Saturday) - # used for .apply and .rollforward - self.expecteds = {'Day': Timestamp('2011-01-02 09:00:00'), - 'DateOffset': Timestamp('2011-01-02 09:00:00'), - 'BusinessDay': Timestamp('2011-01-03 09:00:00'), - 'CustomBusinessDay': - Timestamp('2011-01-03 09:00:00'), - 'CustomBusinessMonthEnd': - Timestamp('2011-01-31 09:00:00'), - 'CustomBusinessMonthBegin': - Timestamp('2011-01-03 09:00:00'), - 'MonthBegin': Timestamp('2011-02-01 09:00:00'), - 'BusinessMonthBegin': - Timestamp('2011-01-03 09:00:00'), - 'MonthEnd': Timestamp('2011-01-31 09:00:00'), - 'SemiMonthEnd': Timestamp('2011-01-15 09:00:00'), - 'SemiMonthBegin': Timestamp('2011-01-15 09:00:00'), - 'BusinessMonthEnd': Timestamp('2011-01-31 09:00:00'), - 'YearBegin': Timestamp('2012-01-01 09:00:00'), - 'BYearBegin': Timestamp('2011-01-03 09:00:00'), - 'YearEnd': Timestamp('2011-12-31 09:00:00'), - 'BYearEnd': Timestamp('2011-12-30 09:00:00'), - 'QuarterBegin': Timestamp('2011-03-01 09:00:00'), - 'BQuarterBegin': Timestamp('2011-03-01 09:00:00'), - 'QuarterEnd': Timestamp('2011-03-31 09:00:00'), - 'BQuarterEnd': Timestamp('2011-03-31 09:00:00'), - 'BusinessHour': Timestamp('2011-01-03 10:00:00'), - 'CustomBusinessHour': - Timestamp('2011-01-03 10:00:00'), - 'WeekOfMonth': Timestamp('2011-01-08 09:00:00'), - 'LastWeekOfMonth': Timestamp('2011-01-29 09:00:00'), - 'FY5253Quarter': Timestamp('2011-01-25 09:00:00'), - 'FY5253': Timestamp('2011-01-25 09:00:00'), - 'Week': Timestamp('2011-01-08 09:00:00'), - 'Easter': Timestamp('2011-04-24 09:00:00'), - 'Hour': Timestamp('2011-01-01 10:00:00'), - 'Minute': Timestamp('2011-01-01 09:01:00'), - 'Second': Timestamp('2011-01-01 09:00:01'), - 'Milli': Timestamp('2011-01-01 09:00:00.001000'), - 'Micro': Timestamp('2011-01-01 09:00:00.000001'), - 'Nano': Timestamp(np_datetime64_compat( - '2011-01-01T09:00:00.000000001Z'))} + # exected value created by Base._get_offset + # are applied to 2011/01/01 09:00 (Saturday) + # used for .apply and .rollforward + expecteds = {'Day': Timestamp('2011-01-02 09:00:00'), + 'DateOffset': Timestamp('2011-01-02 09:00:00'), + 'BusinessDay': Timestamp('2011-01-03 09:00:00'), + 'CustomBusinessDay': Timestamp('2011-01-03 09:00:00'), + 'CustomBusinessMonthEnd': Timestamp('2011-01-31 09:00:00'), + 'CustomBusinessMonthBegin': Timestamp('2011-01-03 09:00:00'), + 'MonthBegin': Timestamp('2011-02-01 09:00:00'), + 'BusinessMonthBegin': Timestamp('2011-01-03 09:00:00'), + 'MonthEnd': Timestamp('2011-01-31 09:00:00'), + 'SemiMonthEnd': Timestamp('2011-01-15 09:00:00'), + 'SemiMonthBegin': Timestamp('2011-01-15 09:00:00'), + 'BusinessMonthEnd': Timestamp('2011-01-31 09:00:00'), + 'YearBegin': Timestamp('2012-01-01 09:00:00'), + 'BYearBegin': Timestamp('2011-01-03 09:00:00'), + 'YearEnd': Timestamp('2011-12-31 09:00:00'), + 'BYearEnd': Timestamp('2011-12-30 09:00:00'), + 'QuarterBegin': Timestamp('2011-03-01 09:00:00'), + 'BQuarterBegin': Timestamp('2011-03-01 09:00:00'), + 'QuarterEnd': Timestamp('2011-03-31 09:00:00'), + 'BQuarterEnd': Timestamp('2011-03-31 09:00:00'), + 'BusinessHour': Timestamp('2011-01-03 10:00:00'), + 'CustomBusinessHour': Timestamp('2011-01-03 10:00:00'), + 'WeekOfMonth': Timestamp('2011-01-08 09:00:00'), + 'LastWeekOfMonth': Timestamp('2011-01-29 09:00:00'), + 'FY5253Quarter': Timestamp('2011-01-25 09:00:00'), + 'FY5253': Timestamp('2011-01-25 09:00:00'), + 'Week': Timestamp('2011-01-08 09:00:00'), + 'Easter': Timestamp('2011-04-24 09:00:00'), + 'Hour': Timestamp('2011-01-01 10:00:00'), + 'Minute': Timestamp('2011-01-01 09:01:00'), + 'Second': Timestamp('2011-01-01 09:00:01'), + 'Milli': Timestamp('2011-01-01 09:00:00.001000'), + 'Micro': Timestamp('2011-01-01 09:00:00.000001'), + 'Nano': Timestamp(np_datetime64_compat( + '2011-01-01T09:00:00.000000001Z'))} def test_return_type(self, offset_types): offset = self._get_offset(offset_types) @@ -623,7 +618,7 @@ def test_onOffset(self): (BDay(), datetime(2008, 1, 5), False)] for offset, d, expected in tests: - assertOnOffset(offset, d, expected) + assert_onOffset(offset, d, expected) def test_apply(self): tests = [] @@ -668,7 +663,7 @@ def test_apply(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) def test_apply_large_n(self): dt = datetime(2012, 10, 23) @@ -1272,7 +1267,7 @@ def test_apply(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) def test_apply_large_n(self): tests = [] @@ -1331,7 +1326,7 @@ def test_apply_large_n(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) def test_apply_nanoseconds(self): tests = [] @@ -1354,7 +1349,7 @@ def test_apply_nanoseconds(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) def test_offsets_compare_equal(self): # root cause of #456 @@ -1628,7 +1623,7 @@ def test_apply(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) def test_apply_nanoseconds(self): tests = [] @@ -1651,7 +1646,7 @@ def test_apply_nanoseconds(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) class TestCustomBusinessDay(Base): @@ -1752,7 +1747,7 @@ def test_onOffset(self): (CDay(), datetime(2008, 1, 5), False)] for offset, d, expected in tests: - assertOnOffset(offset, d, expected) + assert_onOffset(offset, d, expected) def test_apply(self): tests = [] @@ -1798,7 +1793,7 @@ def test_apply(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) def test_apply_large_n(self): dt = datetime(2012, 10, 23) @@ -1870,7 +1865,7 @@ def test_weekmask_and_holidays(self): def test_calendar(self): calendar = USFederalHolidayCalendar() dt = datetime(2014, 1, 17) - assertEq(CDay(calendar=calendar), dt, datetime(2014, 1, 21)) + assert_offset_equal(CDay(calendar=calendar), dt, datetime(2014, 1, 21)) def test_roundtrip_pickle(self): def _check_roundtrip(obj): @@ -1997,7 +1992,7 @@ def test_onOffset(self): (CBMonthEnd(), datetime(2008, 1, 1), False)] for offset, d, expected in tests: - assertOnOffset(offset, d, expected) + assert_onOffset(offset, d, expected) def test_apply(self): cbm = CBMonthEnd() @@ -2022,7 +2017,7 @@ def test_apply(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) def test_apply_large_n(self): dt = datetime(2012, 10, 23) @@ -2111,7 +2106,7 @@ def test_onOffset(self): (CBMonthBegin(), datetime(2008, 1, 31), False)] for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) + assert_onOffset(offset, dt, expected) def test_apply(self): cbm = CBMonthBegin() @@ -2135,7 +2130,7 @@ def test_apply(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) def test_apply_large_n(self): dt = datetime(2012, 10, 23) @@ -2174,13 +2169,6 @@ def test_datetimeindex(self): freq=cbmb).tolist()[0] == datetime(2012, 1, 3)) -def assertOnOffset(offset, date, expected): - actual = offset.onOffset(date) - assert actual == expected, ("\nExpected: %s\nActual: %s\nFor Offset: %s)" - "\nAt Date: %s" % - (expected, actual, offset, date)) - - class TestWeek(Base): _offset = Week @@ -2231,7 +2219,7 @@ def test_offset(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) def test_onOffset(self): for weekday in range(7): @@ -2244,7 +2232,7 @@ def test_onOffset(self): expected = True else: expected = False - assertOnOffset(offset, date, expected) + assert_onOffset(offset, date, expected) def test_offsets_compare_equal(self): # root cause of #456 @@ -2316,7 +2304,7 @@ def test_offset(self): for n, week, weekday, dt, expected in test_cases: offset = WeekOfMonth(n, week=week, weekday=weekday) - assertEq(offset, dt, expected) + assert_offset_equal(offset, dt, expected) # try subtracting result = datetime(2011, 2, 1) - WeekOfMonth(week=1, weekday=2) @@ -2457,7 +2445,7 @@ def test_offset(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) def test_onOffset(self): @@ -2467,7 +2455,7 @@ def test_onOffset(self): (BMonthBegin(), datetime(2008, 3, 3), True)] for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) + assert_onOffset(offset, dt, expected) def test_offsets_compare_equal(self): # root cause of #456 @@ -2515,7 +2503,7 @@ def test_offset(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) def test_normalize(self): dt = datetime(2007, 1, 1, 3) @@ -2530,7 +2518,7 @@ def test_onOffset(self): (BMonthEnd(), datetime(2008, 1, 1), False)] for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) + assert_onOffset(offset, dt, expected) def test_offsets_compare_equal(self): # root cause of #456 @@ -2577,7 +2565,7 @@ def test_offset(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) class TestMonthEnd(Base): @@ -2619,7 +2607,7 @@ def test_offset(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) def test_day_of_month(self): dt = datetime(2007, 1, 1) @@ -2644,7 +2632,7 @@ def test_onOffset(self): (MonthEnd(), datetime(2008, 1, 1), False)] for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) + assert_onOffset(offset, dt, expected) class TestSemiMonthEnd(Base): @@ -2759,7 +2747,7 @@ def test_offset_whole_year(self): datetime(2008, 12, 31)) for base, exp_date in zip(dates[:-1], dates[1:]): - assertEq(SemiMonthEnd(), base, exp_date) + assert_offset_equal(SemiMonthEnd(), base, exp_date) # ensure .apply_index works as expected s = DatetimeIndex(dates[:-1]) @@ -2775,7 +2763,7 @@ def test_offset_whole_year(self): def test_offset(self): for offset, cases in self._get_tests(): for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) def test_apply_index(self): for offset, cases in self._get_tests(): @@ -2793,30 +2781,30 @@ def test_onOffset(self): (datetime(2008, 2, 29), True)] for dt, expected in tests: - assertOnOffset(SemiMonthEnd(), dt, expected) - - def test_vectorized_offset_addition(self): - for klass, assert_func in zip([Series, DatetimeIndex], - [tm.assert_series_equal, - tm.assert_index_equal]): - s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - Timestamp('2000-02-15', tz='US/Central')], name='a') - - result = s + SemiMonthEnd() - result2 = SemiMonthEnd() + s - exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'), - Timestamp('2000-02-29', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) - - s = klass([Timestamp('2000-01-01 00:15:00', tz='US/Central'), - Timestamp('2000-02-01', tz='US/Central')], name='a') - result = s + SemiMonthEnd() - result2 = SemiMonthEnd() + s - exp = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - Timestamp('2000-02-15', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) + assert_onOffset(SemiMonthEnd(), dt, expected) + + @pytest.mark.parametrize('klass,assert_func', + [(Series, tm.assert_series_equal), + (DatetimeIndex, tm.assert_index_equal)]) + def test_vectorized_offset_addition(self, klass, assert_func): + s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + Timestamp('2000-02-15', tz='US/Central')], name='a') + + result = s + SemiMonthEnd() + result2 = SemiMonthEnd() + s + exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'), + Timestamp('2000-02-29', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + s = klass([Timestamp('2000-01-01 00:15:00', tz='US/Central'), + Timestamp('2000-02-01', tz='US/Central')], name='a') + result = s + SemiMonthEnd() + result2 = SemiMonthEnd() + s + exp = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + Timestamp('2000-02-15', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) class TestSemiMonthBegin(Base): @@ -2935,7 +2923,7 @@ def test_offset_whole_year(self): datetime(2008, 12, 15)) for base, exp_date in zip(dates[:-1], dates[1:]): - assertEq(SemiMonthBegin(), base, exp_date) + assert_offset_equal(SemiMonthBegin(), base, exp_date) # ensure .apply_index works as expected s = DatetimeIndex(dates[:-1]) @@ -2951,7 +2939,7 @@ def test_offset_whole_year(self): def test_offset(self): for offset, cases in self._get_tests(): for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) def test_apply_index(self): for offset, cases in self._get_tests(): @@ -2968,30 +2956,29 @@ def test_onOffset(self): (datetime(2008, 2, 15), True)] for dt, expected in tests: - assertOnOffset(SemiMonthBegin(), dt, expected) - - def test_vectorized_offset_addition(self): - for klass, assert_func in zip([Series, DatetimeIndex], - [tm.assert_series_equal, - tm.assert_index_equal]): - - s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - Timestamp('2000-02-15', tz='US/Central')], name='a') - result = s + SemiMonthBegin() - result2 = SemiMonthBegin() + s - exp = klass([Timestamp('2000-02-01 00:15:00', tz='US/Central'), - Timestamp('2000-03-01', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) - - s = klass([Timestamp('2000-01-01 00:15:00', tz='US/Central'), - Timestamp('2000-02-01', tz='US/Central')], name='a') - result = s + SemiMonthBegin() - result2 = SemiMonthBegin() + s - exp = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), - Timestamp('2000-02-15', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) + assert_onOffset(SemiMonthBegin(), dt, expected) + + @pytest.mark.parametrize('klass,assert_func', + [(Series, tm.assert_series_equal), + (DatetimeIndex, tm.assert_index_equal)]) + def test_vectorized_offset_addition(self, klass, assert_func): + s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + Timestamp('2000-02-15', tz='US/Central')], name='a') + result = s + SemiMonthBegin() + result2 = SemiMonthBegin() + s + exp = klass([Timestamp('2000-02-01 00:15:00', tz='US/Central'), + Timestamp('2000-03-01', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) + + s = klass([Timestamp('2000-01-01 00:15:00', tz='US/Central'), + Timestamp('2000-02-01', tz='US/Central')], name='a') + result = s + SemiMonthBegin() + result2 = SemiMonthBegin() + s + exp = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), + Timestamp('2000-02-15', tz='US/Central')], name='a') + assert_func(result, exp) + assert_func(result2, exp) class TestBQuarterBegin(Base): @@ -3081,7 +3068,7 @@ def test_offset(self): for offset, cases in tests: for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + assert_offset_equal(offset, base, expected) # corner offset = BQuarterBegin(n=-1, startingMonth=1) @@ -3104,100 +3091,100 @@ def test_isAnchored(self): assert BQuarterEnd().isAnchored() assert not BQuarterEnd(2, startingMonth=1).isAnchored() - def test_offset(self): - tests = [] - - tests.append((BQuarterEnd(startingMonth=1), - {datetime(2008, 1, 1): datetime(2008, 1, 31), - datetime(2008, 1, 31): datetime(2008, 4, 30), - datetime(2008, 2, 15): datetime(2008, 4, 30), - datetime(2008, 2, 29): datetime(2008, 4, 30), - datetime(2008, 3, 15): datetime(2008, 4, 30), - datetime(2008, 3, 31): datetime(2008, 4, 30), - datetime(2008, 4, 15): datetime(2008, 4, 30), - datetime(2008, 4, 30): datetime(2008, 7, 31), })) - - tests.append((BQuarterEnd(startingMonth=2), - {datetime(2008, 1, 1): datetime(2008, 2, 29), - datetime(2008, 1, 31): datetime(2008, 2, 29), - datetime(2008, 2, 15): datetime(2008, 2, 29), - datetime(2008, 2, 29): datetime(2008, 5, 30), - datetime(2008, 3, 15): datetime(2008, 5, 30), - datetime(2008, 3, 31): datetime(2008, 5, 30), - datetime(2008, 4, 15): datetime(2008, 5, 30), - datetime(2008, 4, 30): datetime(2008, 5, 30), })) - - tests.append((BQuarterEnd(startingMonth=1, n=0), - {datetime(2008, 1, 1): datetime(2008, 1, 31), - datetime(2008, 1, 31): datetime(2008, 1, 31), - datetime(2008, 2, 15): datetime(2008, 4, 30), - datetime(2008, 2, 29): datetime(2008, 4, 30), - datetime(2008, 3, 15): datetime(2008, 4, 30), - datetime(2008, 3, 31): datetime(2008, 4, 30), - datetime(2008, 4, 15): datetime(2008, 4, 30), - datetime(2008, 4, 30): datetime(2008, 4, 30), })) - - tests.append((BQuarterEnd(startingMonth=1, n=-1), - {datetime(2008, 1, 1): datetime(2007, 10, 31), - datetime(2008, 1, 31): datetime(2007, 10, 31), - datetime(2008, 2, 15): datetime(2008, 1, 31), - datetime(2008, 2, 29): datetime(2008, 1, 31), - datetime(2008, 3, 15): datetime(2008, 1, 31), - datetime(2008, 3, 31): datetime(2008, 1, 31), - datetime(2008, 4, 15): datetime(2008, 1, 31), - datetime(2008, 4, 30): datetime(2008, 1, 31), })) - - tests.append((BQuarterEnd(startingMonth=1, n=2), - {datetime(2008, 1, 31): datetime(2008, 7, 31), - datetime(2008, 2, 15): datetime(2008, 7, 31), - datetime(2008, 2, 29): datetime(2008, 7, 31), - datetime(2008, 3, 15): datetime(2008, 7, 31), - datetime(2008, 3, 31): datetime(2008, 7, 31), - datetime(2008, 4, 15): datetime(2008, 7, 31), - datetime(2008, 4, 30): datetime(2008, 10, 31), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - + offset_cases = [] + offset_cases.append((BQuarterEnd(startingMonth=1), + {datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 4, 30), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 7, 31), })) + + offset_cases.append((BQuarterEnd(startingMonth=2), + {datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2008, 2, 15): datetime(2008, 2, 29), + datetime(2008, 2, 29): datetime(2008, 5, 30), + datetime(2008, 3, 15): datetime(2008, 5, 30), + datetime(2008, 3, 31): datetime(2008, 5, 30), + datetime(2008, 4, 15): datetime(2008, 5, 30), + datetime(2008, 4, 30): datetime(2008, 5, 30), })) + + offset_cases.append((BQuarterEnd(startingMonth=1, n=0), + {datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 4, 30), })) + + offset_cases.append((BQuarterEnd(startingMonth=1, n=-1), + {datetime(2008, 1, 1): datetime(2007, 10, 31), + datetime(2008, 1, 31): datetime(2007, 10, 31), + datetime(2008, 2, 15): datetime(2008, 1, 31), + datetime(2008, 2, 29): datetime(2008, 1, 31), + datetime(2008, 3, 15): datetime(2008, 1, 31), + datetime(2008, 3, 31): datetime(2008, 1, 31), + datetime(2008, 4, 15): datetime(2008, 1, 31), + datetime(2008, 4, 30): datetime(2008, 1, 31), })) + + offset_cases.append((BQuarterEnd(startingMonth=1, n=2), + {datetime(2008, 1, 31): datetime(2008, 7, 31), + datetime(2008, 2, 15): datetime(2008, 7, 31), + datetime(2008, 2, 29): datetime(2008, 7, 31), + datetime(2008, 3, 15): datetime(2008, 7, 31), + datetime(2008, 3, 31): datetime(2008, 7, 31), + datetime(2008, 4, 15): datetime(2008, 7, 31), + datetime(2008, 4, 30): datetime(2008, 10, 31), })) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + def test_offset_corner_case(self): # corner offset = BQuarterEnd(n=-1, startingMonth=1) assert datetime(2010, 1, 31) + offset == datetime(2010, 1, 29) - def test_onOffset(self): - - tests = [ - (BQuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True), - (BQuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False), - (BQuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False), - (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False), - (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False), - (BQuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True), - (BQuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False), - (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False), - (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False), - (BQuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False), - (BQuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False), - (BQuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True), - (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False), - (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False), - (BQuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False), - (BQuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), True), - (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False), - (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False), - (BQuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False), - (BQuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True), - (BQuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False), - (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), True), - (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), False), - (BQuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False), - (BQuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False), - (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), True), - (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), False), - ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) + on_offset_cases = [ + (BQuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False), + (BQuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False), + (BQuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True), + (BQuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False), + (BQuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False), + (BQuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), True), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False), + (BQuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), True), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), False), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False), + (BQuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), True), + (BQuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) def makeFY5253LastOfMonthQuarter(*args, **kwds): @@ -3268,7 +3255,7 @@ def test_onOffset(self): ] for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) + assert_onOffset(offset, dt, expected) def test_apply(self): offset_lom_aug_sat = makeFY5253LastOfMonth(startingMonth=8, @@ -3410,7 +3397,7 @@ def test_onOffset(self): ] for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) + assert_onOffset(offset, dt, expected) def test_apply(self): date_seq_nem_8_sat = [datetime(2006, 9, 2), datetime(2007, 9, 1), @@ -3515,27 +3502,28 @@ def test_offset(self): datetime(2012, 9, 29), datetime(2012, 12, 29), datetime(2013, 3, 30), datetime(2013, 6, 29)] - assertEq(offset, base=GMCR[0], expected=GMCR[1]) - assertEq(offset, base=GMCR[0] + relativedelta(days=-1), - expected=GMCR[0]) - assertEq(offset, base=GMCR[1], expected=GMCR[2]) + assert_offset_equal(offset, base=GMCR[0], expected=GMCR[1]) + assert_offset_equal(offset, base=GMCR[0] + relativedelta(days=-1), + expected=GMCR[0]) + assert_offset_equal(offset, base=GMCR[1], expected=GMCR[2]) - assertEq(offset2, base=GMCR[0], expected=GMCR[2]) - assertEq(offset4, base=GMCR[0], expected=GMCR[4]) + assert_offset_equal(offset2, base=GMCR[0], expected=GMCR[2]) + assert_offset_equal(offset4, base=GMCR[0], expected=GMCR[4]) - assertEq(offset_neg1, base=GMCR[-1], expected=GMCR[-2]) - assertEq(offset_neg1, base=GMCR[-1] + relativedelta(days=+1), - expected=GMCR[-1]) - assertEq(offset_neg2, base=GMCR[-1], expected=GMCR[-3]) + assert_offset_equal(offset_neg1, base=GMCR[-1], expected=GMCR[-2]) + assert_offset_equal(offset_neg1, + base=GMCR[-1] + relativedelta(days=+1), + expected=GMCR[-1]) + assert_offset_equal(offset_neg2, base=GMCR[-1], expected=GMCR[-3]) date = GMCR[0] + relativedelta(days=-1) for expected in GMCR: - assertEq(offset, date, expected) + assert_offset_equal(offset, date, expected) date = date + offset date = GMCR[-1] + relativedelta(days=+1) for expected in reversed(GMCR): - assertEq(offset_neg1, date, expected) + assert_offset_equal(offset_neg1, date, expected) date = date + offset_neg1 def test_onOffset(self): @@ -3609,7 +3597,7 @@ def test_onOffset(self): ] for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) + assert_onOffset(offset, dt, expected) def test_year_has_extra_week(self): # End of long Q1 @@ -3722,29 +3710,35 @@ def test_onOffset(self): ] for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) + assert_onOffset(offset, dt, expected) def test_offset(self): offset = makeFY5253NearestEndMonthQuarter(1, startingMonth=8, weekday=WeekDay.THU, qtr_with_extra_week=4) - MU = [datetime(2012, 5, 31), datetime(2012, 8, 30), datetime(2012, 11, - 29), + MU = [datetime(2012, 5, 31), + datetime(2012, 8, 30), datetime(2012, 11, 29), datetime(2013, 2, 28), datetime(2013, 5, 30)] date = MU[0] + relativedelta(days=-1) for expected in MU: - assertEq(offset, date, expected) + assert_offset_equal(offset, date, expected) date = date + offset - assertEq(offset, datetime(2012, 5, 31), datetime(2012, 8, 30)) - assertEq(offset, datetime(2012, 5, 30), datetime(2012, 5, 31)) + assert_offset_equal(offset, + datetime(2012, 5, 31), + datetime(2012, 8, 30)) + assert_offset_equal(offset, + datetime(2012, 5, 30), + datetime(2012, 5, 31)) offset2 = FY5253Quarter(weekday=5, startingMonth=12, variation="last", qtr_with_extra_week=4) - assertEq(offset2, datetime(2013, 1, 15), datetime(2013, 3, 30)) + assert_offset_equal(offset2, + datetime(2013, 1, 15), + datetime(2013, 3, 30)) class TestQuarterBegin(Base): @@ -3762,64 +3756,65 @@ def test_isAnchored(self): assert QuarterBegin().isAnchored() assert not QuarterBegin(2, startingMonth=1).isAnchored() - def test_offset(self): - tests = [] - - tests.append((QuarterBegin(startingMonth=1), - {datetime(2007, 12, 1): datetime(2008, 1, 1), - datetime(2008, 1, 1): datetime(2008, 4, 1), - datetime(2008, 2, 15): datetime(2008, 4, 1), - datetime(2008, 2, 29): datetime(2008, 4, 1), - datetime(2008, 3, 15): datetime(2008, 4, 1), - datetime(2008, 3, 31): datetime(2008, 4, 1), - datetime(2008, 4, 15): datetime(2008, 7, 1), - datetime(2008, 4, 1): datetime(2008, 7, 1), })) - - tests.append((QuarterBegin(startingMonth=2), - {datetime(2008, 1, 1): datetime(2008, 2, 1), - datetime(2008, 1, 31): datetime(2008, 2, 1), - datetime(2008, 1, 15): datetime(2008, 2, 1), - datetime(2008, 2, 29): datetime(2008, 5, 1), - datetime(2008, 3, 15): datetime(2008, 5, 1), - datetime(2008, 3, 31): datetime(2008, 5, 1), - datetime(2008, 4, 15): datetime(2008, 5, 1), - datetime(2008, 4, 30): datetime(2008, 5, 1), })) - - tests.append((QuarterBegin(startingMonth=1, n=0), - {datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2008, 12, 1): datetime(2009, 1, 1), - datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2008, 2, 15): datetime(2008, 4, 1), - datetime(2008, 2, 29): datetime(2008, 4, 1), - datetime(2008, 3, 15): datetime(2008, 4, 1), - datetime(2008, 3, 31): datetime(2008, 4, 1), - datetime(2008, 4, 15): datetime(2008, 7, 1), - datetime(2008, 4, 30): datetime(2008, 7, 1), })) - - tests.append((QuarterBegin(startingMonth=1, n=-1), - {datetime(2008, 1, 1): datetime(2007, 10, 1), - datetime(2008, 1, 31): datetime(2008, 1, 1), - datetime(2008, 2, 15): datetime(2008, 1, 1), - datetime(2008, 2, 29): datetime(2008, 1, 1), - datetime(2008, 3, 15): datetime(2008, 1, 1), - datetime(2008, 3, 31): datetime(2008, 1, 1), - datetime(2008, 4, 15): datetime(2008, 4, 1), - datetime(2008, 4, 30): datetime(2008, 4, 1), - datetime(2008, 7, 1): datetime(2008, 4, 1)})) - - tests.append((QuarterBegin(startingMonth=1, n=2), - {datetime(2008, 1, 1): datetime(2008, 7, 1), - datetime(2008, 2, 15): datetime(2008, 7, 1), - datetime(2008, 2, 29): datetime(2008, 7, 1), - datetime(2008, 3, 15): datetime(2008, 7, 1), - datetime(2008, 3, 31): datetime(2008, 7, 1), - datetime(2008, 4, 15): datetime(2008, 10, 1), - datetime(2008, 4, 1): datetime(2008, 10, 1), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - + offset_cases = [] + offset_cases.append((QuarterBegin(startingMonth=1), + {datetime(2007, 12, 1): datetime(2008, 1, 1), + datetime(2008, 1, 1): datetime(2008, 4, 1), + datetime(2008, 2, 15): datetime(2008, 4, 1), + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 3, 15): datetime(2008, 4, 1), + datetime(2008, 3, 31): datetime(2008, 4, 1), + datetime(2008, 4, 15): datetime(2008, 7, 1), + datetime(2008, 4, 1): datetime(2008, 7, 1), })) + + offset_cases.append((QuarterBegin(startingMonth=2), + {datetime(2008, 1, 1): datetime(2008, 2, 1), + datetime(2008, 1, 31): datetime(2008, 2, 1), + datetime(2008, 1, 15): datetime(2008, 2, 1), + datetime(2008, 2, 29): datetime(2008, 5, 1), + datetime(2008, 3, 15): datetime(2008, 5, 1), + datetime(2008, 3, 31): datetime(2008, 5, 1), + datetime(2008, 4, 15): datetime(2008, 5, 1), + datetime(2008, 4, 30): datetime(2008, 5, 1), })) + + offset_cases.append((QuarterBegin(startingMonth=1, n=0), + {datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 12, 1): datetime(2009, 1, 1), + datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 2, 15): datetime(2008, 4, 1), + datetime(2008, 2, 29): datetime(2008, 4, 1), + datetime(2008, 3, 15): datetime(2008, 4, 1), + datetime(2008, 3, 31): datetime(2008, 4, 1), + datetime(2008, 4, 15): datetime(2008, 7, 1), + datetime(2008, 4, 30): datetime(2008, 7, 1), })) + + offset_cases.append((QuarterBegin(startingMonth=1, n=-1), + {datetime(2008, 1, 1): datetime(2007, 10, 1), + datetime(2008, 1, 31): datetime(2008, 1, 1), + datetime(2008, 2, 15): datetime(2008, 1, 1), + datetime(2008, 2, 29): datetime(2008, 1, 1), + datetime(2008, 3, 15): datetime(2008, 1, 1), + datetime(2008, 3, 31): datetime(2008, 1, 1), + datetime(2008, 4, 15): datetime(2008, 4, 1), + datetime(2008, 4, 30): datetime(2008, 4, 1), + datetime(2008, 7, 1): datetime(2008, 4, 1)})) + + offset_cases.append((QuarterBegin(startingMonth=1, n=2), + {datetime(2008, 1, 1): datetime(2008, 7, 1), + datetime(2008, 2, 15): datetime(2008, 7, 1), + datetime(2008, 2, 29): datetime(2008, 7, 1), + datetime(2008, 3, 15): datetime(2008, 7, 1), + datetime(2008, 3, 31): datetime(2008, 7, 1), + datetime(2008, 4, 15): datetime(2008, 10, 1), + datetime(2008, 4, 1): datetime(2008, 10, 1), })) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + def test_offset_corner_case(self): # corner offset = QuarterBegin(n=-1, startingMonth=1) assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 1) @@ -3841,127 +3836,104 @@ def test_isAnchored(self): assert QuarterEnd().isAnchored() assert not QuarterEnd(2, startingMonth=1).isAnchored() - def test_offset(self): - tests = [] - - tests.append((QuarterEnd(startingMonth=1), - {datetime(2008, 1, 1): datetime(2008, 1, 31), - datetime(2008, 1, 31): datetime(2008, 4, 30), - datetime(2008, 2, 15): datetime(2008, 4, 30), - datetime(2008, 2, 29): datetime(2008, 4, 30), - datetime(2008, 3, 15): datetime(2008, 4, 30), - datetime(2008, 3, 31): datetime(2008, 4, 30), - datetime(2008, 4, 15): datetime(2008, 4, 30), - datetime(2008, 4, 30): datetime(2008, 7, 31), })) - - tests.append((QuarterEnd(startingMonth=2), - {datetime(2008, 1, 1): datetime(2008, 2, 29), - datetime(2008, 1, 31): datetime(2008, 2, 29), - datetime(2008, 2, 15): datetime(2008, 2, 29), - datetime(2008, 2, 29): datetime(2008, 5, 31), - datetime(2008, 3, 15): datetime(2008, 5, 31), - datetime(2008, 3, 31): datetime(2008, 5, 31), - datetime(2008, 4, 15): datetime(2008, 5, 31), - datetime(2008, 4, 30): datetime(2008, 5, 31), })) - - tests.append((QuarterEnd(startingMonth=1, n=0), - {datetime(2008, 1, 1): datetime(2008, 1, 31), - datetime(2008, 1, 31): datetime(2008, 1, 31), - datetime(2008, 2, 15): datetime(2008, 4, 30), - datetime(2008, 2, 29): datetime(2008, 4, 30), - datetime(2008, 3, 15): datetime(2008, 4, 30), - datetime(2008, 3, 31): datetime(2008, 4, 30), - datetime(2008, 4, 15): datetime(2008, 4, 30), - datetime(2008, 4, 30): datetime(2008, 4, 30), })) - - tests.append((QuarterEnd(startingMonth=1, n=-1), - {datetime(2008, 1, 1): datetime(2007, 10, 31), - datetime(2008, 1, 31): datetime(2007, 10, 31), - datetime(2008, 2, 15): datetime(2008, 1, 31), - datetime(2008, 2, 29): datetime(2008, 1, 31), - datetime(2008, 3, 15): datetime(2008, 1, 31), - datetime(2008, 3, 31): datetime(2008, 1, 31), - datetime(2008, 4, 15): datetime(2008, 1, 31), - datetime(2008, 4, 30): datetime(2008, 1, 31), - datetime(2008, 7, 1): datetime(2008, 4, 30)})) - - tests.append((QuarterEnd(startingMonth=1, n=2), - {datetime(2008, 1, 31): datetime(2008, 7, 31), - datetime(2008, 2, 15): datetime(2008, 7, 31), - datetime(2008, 2, 29): datetime(2008, 7, 31), - datetime(2008, 3, 15): datetime(2008, 7, 31), - datetime(2008, 3, 31): datetime(2008, 7, 31), - datetime(2008, 4, 15): datetime(2008, 7, 31), - datetime(2008, 4, 30): datetime(2008, 10, 31), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - + offset_cases = [] + offset_cases.append((QuarterEnd(startingMonth=1), + {datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 4, 30), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 7, 31), })) + + offset_cases.append((QuarterEnd(startingMonth=2), + {datetime(2008, 1, 1): datetime(2008, 2, 29), + datetime(2008, 1, 31): datetime(2008, 2, 29), + datetime(2008, 2, 15): datetime(2008, 2, 29), + datetime(2008, 2, 29): datetime(2008, 5, 31), + datetime(2008, 3, 15): datetime(2008, 5, 31), + datetime(2008, 3, 31): datetime(2008, 5, 31), + datetime(2008, 4, 15): datetime(2008, 5, 31), + datetime(2008, 4, 30): datetime(2008, 5, 31), })) + + offset_cases.append((QuarterEnd(startingMonth=1, n=0), + {datetime(2008, 1, 1): datetime(2008, 1, 31), + datetime(2008, 1, 31): datetime(2008, 1, 31), + datetime(2008, 2, 15): datetime(2008, 4, 30), + datetime(2008, 2, 29): datetime(2008, 4, 30), + datetime(2008, 3, 15): datetime(2008, 4, 30), + datetime(2008, 3, 31): datetime(2008, 4, 30), + datetime(2008, 4, 15): datetime(2008, 4, 30), + datetime(2008, 4, 30): datetime(2008, 4, 30), })) + + offset_cases.append((QuarterEnd(startingMonth=1, n=-1), + {datetime(2008, 1, 1): datetime(2007, 10, 31), + datetime(2008, 1, 31): datetime(2007, 10, 31), + datetime(2008, 2, 15): datetime(2008, 1, 31), + datetime(2008, 2, 29): datetime(2008, 1, 31), + datetime(2008, 3, 15): datetime(2008, 1, 31), + datetime(2008, 3, 31): datetime(2008, 1, 31), + datetime(2008, 4, 15): datetime(2008, 1, 31), + datetime(2008, 4, 30): datetime(2008, 1, 31), + datetime(2008, 7, 1): datetime(2008, 4, 30)})) + + offset_cases.append((QuarterEnd(startingMonth=1, n=2), + {datetime(2008, 1, 31): datetime(2008, 7, 31), + datetime(2008, 2, 15): datetime(2008, 7, 31), + datetime(2008, 2, 29): datetime(2008, 7, 31), + datetime(2008, 3, 15): datetime(2008, 7, 31), + datetime(2008, 3, 31): datetime(2008, 7, 31), + datetime(2008, 4, 15): datetime(2008, 7, 31), + datetime(2008, 4, 30): datetime(2008, 10, 31), })) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + def test_offset_corner_case(self): # corner offset = QuarterEnd(n=-1, startingMonth=1) assert datetime(2010, 2, 1) + offset == datetime(2010, 1, 31) - def test_onOffset(self): - - tests = [(QuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True), - (QuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), - False), - (QuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), - False), - (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), - False), - (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), - False), - (QuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True), - (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), - False), - (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 31), - False), - (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), - False), - (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), - False), - (QuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), - False), - (QuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), - False), - (QuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True), - (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), - False), - (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), - False), - (QuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), - False), - (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), - False), - (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 31), True), - (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), - False), - (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), - False), - (QuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), - False), - (QuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), - True), - (QuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), - False), - (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), - False), - (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), True), - (QuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), - False), - (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), - False), - (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 31), - False), - (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), - False), - (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), - True), ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) + on_offset_cases = [ + (QuarterEnd(1, startingMonth=1), datetime(2008, 1, 31), True), + (QuarterEnd(1, startingMonth=1), datetime(2007, 12, 31), False), + (QuarterEnd(1, startingMonth=1), datetime(2008, 2, 29), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 30), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 3, 31), False), + (QuarterEnd(1, startingMonth=1), datetime(2008, 4, 30), True), + (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 30), False), + (QuarterEnd(1, startingMonth=1), datetime(2008, 5, 31), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 29), False), + (QuarterEnd(1, startingMonth=1), datetime(2007, 6, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 1, 31), False), + (QuarterEnd(1, startingMonth=2), datetime(2007, 12, 31), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 2, 29), True), + (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2007, 3, 31), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 4, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 30), False), + (QuarterEnd(1, startingMonth=2), datetime(2008, 5, 31), True), + (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 29), False), + (QuarterEnd(1, startingMonth=2), datetime(2007, 6, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2008, 1, 31), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 12, 31), True), + (QuarterEnd(1, startingMonth=3), datetime(2008, 2, 29), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 3, 31), True), + (QuarterEnd(1, startingMonth=3), datetime(2008, 4, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 30), False), + (QuarterEnd(1, startingMonth=3), datetime(2008, 5, 31), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 29), False), + (QuarterEnd(1, startingMonth=3), datetime(2007, 6, 30), True)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) class TestBYearBegin(Base): @@ -3971,43 +3943,43 @@ def test_misspecified(self): pytest.raises(ValueError, BYearBegin, month=13) pytest.raises(ValueError, BYearEnd, month=13) - def test_offset(self): - tests = [] - - tests.append((BYearBegin(), - {datetime(2008, 1, 1): datetime(2009, 1, 1), - datetime(2008, 6, 30): datetime(2009, 1, 1), - datetime(2008, 12, 31): datetime(2009, 1, 1), - datetime(2011, 1, 1): datetime(2011, 1, 3), - datetime(2011, 1, 3): datetime(2012, 1, 2), - datetime(2005, 12, 30): datetime(2006, 1, 2), - datetime(2005, 12, 31): datetime(2006, 1, 2)})) - - tests.append((BYearBegin(0), - {datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2008, 6, 30): datetime(2009, 1, 1), - datetime(2008, 12, 31): datetime(2009, 1, 1), - datetime(2005, 12, 30): datetime(2006, 1, 2), - datetime(2005, 12, 31): datetime(2006, 1, 2), })) - - tests.append((BYearBegin(-1), - {datetime(2007, 1, 1): datetime(2006, 1, 2), - datetime(2009, 1, 4): datetime(2009, 1, 1), - datetime(2009, 1, 1): datetime(2008, 1, 1), - datetime(2008, 6, 30): datetime(2008, 1, 1), - datetime(2008, 12, 31): datetime(2008, 1, 1), - datetime(2006, 12, 29): datetime(2006, 1, 2), - datetime(2006, 12, 30): datetime(2006, 1, 2), - datetime(2006, 1, 1): datetime(2005, 1, 3), })) - - tests.append((BYearBegin(-2), - {datetime(2007, 1, 1): datetime(2005, 1, 3), - datetime(2007, 6, 30): datetime(2006, 1, 2), - datetime(2008, 12, 31): datetime(2007, 1, 1), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) + offset_cases = [] + offset_cases.append((BYearBegin(), + {datetime(2008, 1, 1): datetime(2009, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2011, 1, 1): datetime(2011, 1, 3), + datetime(2011, 1, 3): datetime(2012, 1, 2), + datetime(2005, 12, 30): datetime(2006, 1, 2), + datetime(2005, 12, 31): datetime(2006, 1, 2)})) + + offset_cases.append((BYearBegin(0), + {datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2005, 12, 30): datetime(2006, 1, 2), + datetime(2005, 12, 31): datetime(2006, 1, 2), })) + + offset_cases.append((BYearBegin(-1), + {datetime(2007, 1, 1): datetime(2006, 1, 2), + datetime(2009, 1, 4): datetime(2009, 1, 1), + datetime(2009, 1, 1): datetime(2008, 1, 1), + datetime(2008, 6, 30): datetime(2008, 1, 1), + datetime(2008, 12, 31): datetime(2008, 1, 1), + datetime(2006, 12, 29): datetime(2006, 1, 2), + datetime(2006, 12, 30): datetime(2006, 1, 2), + datetime(2006, 1, 1): datetime(2005, 1, 3), })) + + offset_cases.append((BYearBegin(-2), + {datetime(2007, 1, 1): datetime(2005, 1, 3), + datetime(2007, 6, 30): datetime(2006, 1, 2), + datetime(2008, 12, 31): datetime(2007, 1, 1), })) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) class TestYearBegin(Base): @@ -4016,91 +3988,89 @@ class TestYearBegin(Base): def test_misspecified(self): pytest.raises(ValueError, YearBegin, month=13) - def test_offset(self): - tests = [] - - tests.append((YearBegin(), - {datetime(2008, 1, 1): datetime(2009, 1, 1), - datetime(2008, 6, 30): datetime(2009, 1, 1), - datetime(2008, 12, 31): datetime(2009, 1, 1), - datetime(2005, 12, 30): datetime(2006, 1, 1), - datetime(2005, 12, 31): datetime(2006, 1, 1), })) - - tests.append((YearBegin(0), - {datetime(2008, 1, 1): datetime(2008, 1, 1), - datetime(2008, 6, 30): datetime(2009, 1, 1), - datetime(2008, 12, 31): datetime(2009, 1, 1), - datetime(2005, 12, 30): datetime(2006, 1, 1), - datetime(2005, 12, 31): datetime(2006, 1, 1), })) - - tests.append((YearBegin(3), - {datetime(2008, 1, 1): datetime(2011, 1, 1), - datetime(2008, 6, 30): datetime(2011, 1, 1), - datetime(2008, 12, 31): datetime(2011, 1, 1), - datetime(2005, 12, 30): datetime(2008, 1, 1), - datetime(2005, 12, 31): datetime(2008, 1, 1), })) - - tests.append((YearBegin(-1), - {datetime(2007, 1, 1): datetime(2006, 1, 1), - datetime(2007, 1, 15): datetime(2007, 1, 1), - datetime(2008, 6, 30): datetime(2008, 1, 1), - datetime(2008, 12, 31): datetime(2008, 1, 1), - datetime(2006, 12, 29): datetime(2006, 1, 1), - datetime(2006, 12, 30): datetime(2006, 1, 1), - datetime(2007, 1, 1): datetime(2006, 1, 1), })) - - tests.append((YearBegin(-2), - {datetime(2007, 1, 1): datetime(2005, 1, 1), - datetime(2008, 6, 30): datetime(2007, 1, 1), - datetime(2008, 12, 31): datetime(2007, 1, 1), })) - - tests.append((YearBegin(month=4), - {datetime(2007, 4, 1): datetime(2008, 4, 1), - datetime(2007, 4, 15): datetime(2008, 4, 1), - datetime(2007, 3, 1): datetime(2007, 4, 1), - datetime(2007, 12, 15): datetime(2008, 4, 1), - datetime(2012, 1, 31): datetime(2012, 4, 1), })) - - tests.append((YearBegin(0, month=4), - {datetime(2007, 4, 1): datetime(2007, 4, 1), - datetime(2007, 3, 1): datetime(2007, 4, 1), - datetime(2007, 12, 15): datetime(2008, 4, 1), - datetime(2012, 1, 31): datetime(2012, 4, 1), })) - - tests.append((YearBegin(4, month=4), - {datetime(2007, 4, 1): datetime(2011, 4, 1), - datetime(2007, 4, 15): datetime(2011, 4, 1), - datetime(2007, 3, 1): datetime(2010, 4, 1), - datetime(2007, 12, 15): datetime(2011, 4, 1), - datetime(2012, 1, 31): datetime(2015, 4, 1), })) - - tests.append((YearBegin(-1, month=4), - {datetime(2007, 4, 1): datetime(2006, 4, 1), - datetime(2007, 3, 1): datetime(2006, 4, 1), - datetime(2007, 12, 15): datetime(2007, 4, 1), - datetime(2012, 1, 31): datetime(2011, 4, 1), })) - - tests.append((YearBegin(-3, month=4), - {datetime(2007, 4, 1): datetime(2004, 4, 1), - datetime(2007, 3, 1): datetime(2004, 4, 1), - datetime(2007, 12, 15): datetime(2005, 4, 1), - datetime(2012, 1, 31): datetime(2009, 4, 1), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_onOffset(self): - - tests = [ - (YearBegin(), datetime(2007, 1, 3), False), - (YearBegin(), datetime(2008, 1, 1), True), - (YearBegin(), datetime(2006, 12, 31), False), - (YearBegin(), datetime(2006, 1, 2), False), - ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) + offset_cases = [] + offset_cases.append((YearBegin(), + {datetime(2008, 1, 1): datetime(2009, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2005, 12, 30): datetime(2006, 1, 1), + datetime(2005, 12, 31): datetime(2006, 1, 1), })) + + offset_cases.append((YearBegin(0), + {datetime(2008, 1, 1): datetime(2008, 1, 1), + datetime(2008, 6, 30): datetime(2009, 1, 1), + datetime(2008, 12, 31): datetime(2009, 1, 1), + datetime(2005, 12, 30): datetime(2006, 1, 1), + datetime(2005, 12, 31): datetime(2006, 1, 1), })) + + offset_cases.append((YearBegin(3), + {datetime(2008, 1, 1): datetime(2011, 1, 1), + datetime(2008, 6, 30): datetime(2011, 1, 1), + datetime(2008, 12, 31): datetime(2011, 1, 1), + datetime(2005, 12, 30): datetime(2008, 1, 1), + datetime(2005, 12, 31): datetime(2008, 1, 1), })) + + offset_cases.append((YearBegin(-1), + {datetime(2007, 1, 1): datetime(2006, 1, 1), + datetime(2007, 1, 15): datetime(2007, 1, 1), + datetime(2008, 6, 30): datetime(2008, 1, 1), + datetime(2008, 12, 31): datetime(2008, 1, 1), + datetime(2006, 12, 29): datetime(2006, 1, 1), + datetime(2006, 12, 30): datetime(2006, 1, 1), + datetime(2007, 1, 1): datetime(2006, 1, 1), })) + + offset_cases.append((YearBegin(-2), + {datetime(2007, 1, 1): datetime(2005, 1, 1), + datetime(2008, 6, 30): datetime(2007, 1, 1), + datetime(2008, 12, 31): datetime(2007, 1, 1), })) + + offset_cases.append((YearBegin(month=4), + {datetime(2007, 4, 1): datetime(2008, 4, 1), + datetime(2007, 4, 15): datetime(2008, 4, 1), + datetime(2007, 3, 1): datetime(2007, 4, 1), + datetime(2007, 12, 15): datetime(2008, 4, 1), + datetime(2012, 1, 31): datetime(2012, 4, 1), })) + + offset_cases.append((YearBegin(0, month=4), + {datetime(2007, 4, 1): datetime(2007, 4, 1), + datetime(2007, 3, 1): datetime(2007, 4, 1), + datetime(2007, 12, 15): datetime(2008, 4, 1), + datetime(2012, 1, 31): datetime(2012, 4, 1), })) + + offset_cases.append((YearBegin(4, month=4), + {datetime(2007, 4, 1): datetime(2011, 4, 1), + datetime(2007, 4, 15): datetime(2011, 4, 1), + datetime(2007, 3, 1): datetime(2010, 4, 1), + datetime(2007, 12, 15): datetime(2011, 4, 1), + datetime(2012, 1, 31): datetime(2015, 4, 1), })) + + offset_cases.append((YearBegin(-1, month=4), + {datetime(2007, 4, 1): datetime(2006, 4, 1), + datetime(2007, 3, 1): datetime(2006, 4, 1), + datetime(2007, 12, 15): datetime(2007, 4, 1), + datetime(2012, 1, 31): datetime(2011, 4, 1), })) + + offset_cases.append((YearBegin(-3, month=4), + {datetime(2007, 4, 1): datetime(2004, 4, 1), + datetime(2007, 3, 1): datetime(2004, 4, 1), + datetime(2007, 12, 15): datetime(2005, 4, 1), + datetime(2012, 1, 31): datetime(2009, 4, 1), })) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [(YearBegin(), datetime(2007, 1, 3), False), + (YearBegin(), datetime(2008, 1, 1), True), + (YearBegin(), datetime(2006, 12, 31), False), + (YearBegin(), datetime(2006, 1, 2), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) class TestBYearEndLagged(Base): @@ -4109,20 +4079,20 @@ def test_bad_month_fail(self): pytest.raises(Exception, BYearEnd, month=13) pytest.raises(Exception, BYearEnd, month=0) - def test_offset(self): - tests = [] + offset_cases = [] + offset_cases.append((BYearEnd(month=6), + {datetime(2008, 1, 1): datetime(2008, 6, 30), + datetime(2007, 6, 30): datetime(2008, 6, 30)}, )) - tests.append((BYearEnd(month=6), - {datetime(2008, 1, 1): datetime(2008, 6, 30), - datetime(2007, 6, 30): datetime(2008, 6, 30)}, )) + offset_cases.append((BYearEnd(n=-1, month=6), + {datetime(2008, 1, 1): datetime(2007, 6, 29), + datetime(2007, 6, 30): datetime(2007, 6, 29)}, )) - tests.append((BYearEnd(n=-1, month=6), - {datetime(2008, 1, 1): datetime(2007, 6, 29), - datetime(2007, 6, 30): datetime(2007, 6, 29)}, )) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assert base + offset == expected + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert base + offset == expected def test_roll(self): offset = BYearEnd(month=6) @@ -4131,64 +4101,60 @@ def test_roll(self): assert offset.rollforward(date) == datetime(2010, 6, 30) assert offset.rollback(date) == datetime(2009, 6, 30) - def test_onOffset(self): - - tests = [ - (BYearEnd(month=2), datetime(2007, 2, 28), True), - (BYearEnd(month=6), datetime(2007, 6, 30), False), - ] + on_offset_cases = [(BYearEnd(month=2), datetime(2007, 2, 28), True), + (BYearEnd(month=6), datetime(2007, 6, 30), False)] - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) class TestBYearEnd(Base): _offset = BYearEnd - def test_offset(self): - tests = [] - - tests.append((BYearEnd(), - {datetime(2008, 1, 1): datetime(2008, 12, 31), - datetime(2008, 6, 30): datetime(2008, 12, 31), - datetime(2008, 12, 31): datetime(2009, 12, 31), - datetime(2005, 12, 30): datetime(2006, 12, 29), - datetime(2005, 12, 31): datetime(2006, 12, 29), })) - - tests.append((BYearEnd(0), - {datetime(2008, 1, 1): datetime(2008, 12, 31), - datetime(2008, 6, 30): datetime(2008, 12, 31), - datetime(2008, 12, 31): datetime(2008, 12, 31), - datetime(2005, 12, 31): datetime(2006, 12, 29), })) - - tests.append((BYearEnd(-1), - {datetime(2007, 1, 1): datetime(2006, 12, 29), - datetime(2008, 6, 30): datetime(2007, 12, 31), - datetime(2008, 12, 31): datetime(2007, 12, 31), - datetime(2006, 12, 29): datetime(2005, 12, 30), - datetime(2006, 12, 30): datetime(2006, 12, 29), - datetime(2007, 1, 1): datetime(2006, 12, 29), })) - - tests.append((BYearEnd(-2), - {datetime(2007, 1, 1): datetime(2005, 12, 30), - datetime(2008, 6, 30): datetime(2006, 12, 29), - datetime(2008, 12, 31): datetime(2006, 12, 29), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_onOffset(self): - - tests = [ - (BYearEnd(), datetime(2007, 12, 31), True), - (BYearEnd(), datetime(2008, 1, 1), False), - (BYearEnd(), datetime(2006, 12, 31), False), - (BYearEnd(), datetime(2006, 12, 29), True), - ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) + offset_cases = [] + offset_cases.append((BYearEnd(), + {datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2009, 12, 31), + datetime(2005, 12, 30): datetime(2006, 12, 29), + datetime(2005, 12, 31): datetime(2006, 12, 29), })) + + offset_cases.append((BYearEnd(0), + {datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2008, 12, 31), + datetime(2005, 12, 31): datetime(2006, 12, 29), })) + + offset_cases.append((BYearEnd(-1), + {datetime(2007, 1, 1): datetime(2006, 12, 29), + datetime(2008, 6, 30): datetime(2007, 12, 31), + datetime(2008, 12, 31): datetime(2007, 12, 31), + datetime(2006, 12, 29): datetime(2005, 12, 30), + datetime(2006, 12, 30): datetime(2006, 12, 29), + datetime(2007, 1, 1): datetime(2006, 12, 29), })) + + offset_cases.append((BYearEnd(-2), + {datetime(2007, 1, 1): datetime(2005, 12, 30), + datetime(2008, 6, 30): datetime(2006, 12, 29), + datetime(2008, 12, 31): datetime(2006, 12, 29), })) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [(BYearEnd(), datetime(2007, 12, 31), True), + (BYearEnd(), datetime(2008, 1, 1), False), + (BYearEnd(), datetime(2006, 12, 31), False), + (BYearEnd(), datetime(2006, 12, 29), True)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) class TestYearEnd(Base): @@ -4197,286 +4163,115 @@ class TestYearEnd(Base): def test_misspecified(self): pytest.raises(ValueError, YearEnd, month=13) - def test_offset(self): - tests = [] - - tests.append((YearEnd(), - {datetime(2008, 1, 1): datetime(2008, 12, 31), - datetime(2008, 6, 30): datetime(2008, 12, 31), - datetime(2008, 12, 31): datetime(2009, 12, 31), - datetime(2005, 12, 30): datetime(2005, 12, 31), - datetime(2005, 12, 31): datetime(2006, 12, 31), })) - - tests.append((YearEnd(0), - {datetime(2008, 1, 1): datetime(2008, 12, 31), - datetime(2008, 6, 30): datetime(2008, 12, 31), - datetime(2008, 12, 31): datetime(2008, 12, 31), - datetime(2005, 12, 30): datetime(2005, 12, 31), })) - - tests.append((YearEnd(-1), - {datetime(2007, 1, 1): datetime(2006, 12, 31), - datetime(2008, 6, 30): datetime(2007, 12, 31), - datetime(2008, 12, 31): datetime(2007, 12, 31), - datetime(2006, 12, 29): datetime(2005, 12, 31), - datetime(2006, 12, 30): datetime(2005, 12, 31), - datetime(2007, 1, 1): datetime(2006, 12, 31), })) - - tests.append((YearEnd(-2), - {datetime(2007, 1, 1): datetime(2005, 12, 31), - datetime(2008, 6, 30): datetime(2006, 12, 31), - datetime(2008, 12, 31): datetime(2006, 12, 31), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_onOffset(self): - - tests = [ - (YearEnd(), datetime(2007, 12, 31), True), - (YearEnd(), datetime(2008, 1, 1), False), - (YearEnd(), datetime(2006, 12, 31), True), - (YearEnd(), datetime(2006, 12, 29), False), - ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) + offset_cases = [] + offset_cases.append((YearEnd(), + {datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2009, 12, 31), + datetime(2005, 12, 30): datetime(2005, 12, 31), + datetime(2005, 12, 31): datetime(2006, 12, 31), })) + + offset_cases.append((YearEnd(0), + {datetime(2008, 1, 1): datetime(2008, 12, 31), + datetime(2008, 6, 30): datetime(2008, 12, 31), + datetime(2008, 12, 31): datetime(2008, 12, 31), + datetime(2005, 12, 30): datetime(2005, 12, 31), })) + + offset_cases.append((YearEnd(-1), + {datetime(2007, 1, 1): datetime(2006, 12, 31), + datetime(2008, 6, 30): datetime(2007, 12, 31), + datetime(2008, 12, 31): datetime(2007, 12, 31), + datetime(2006, 12, 29): datetime(2005, 12, 31), + datetime(2006, 12, 30): datetime(2005, 12, 31), + datetime(2007, 1, 1): datetime(2006, 12, 31), })) + + offset_cases.append((YearEnd(-2), + {datetime(2007, 1, 1): datetime(2005, 12, 31), + datetime(2008, 6, 30): datetime(2006, 12, 31), + datetime(2008, 12, 31): datetime(2006, 12, 31), })) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [(YearEnd(), datetime(2007, 12, 31), True), + (YearEnd(), datetime(2008, 1, 1), False), + (YearEnd(), datetime(2006, 12, 31), True), + (YearEnd(), datetime(2006, 12, 29), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) class TestYearEndDiffMonth(Base): - def test_offset(self): - tests = [] - - tests.append((YearEnd(month=3), - {datetime(2008, 1, 1): datetime(2008, 3, 31), - datetime(2008, 2, 15): datetime(2008, 3, 31), - datetime(2008, 3, 31): datetime(2009, 3, 31), - datetime(2008, 3, 30): datetime(2008, 3, 31), - datetime(2005, 3, 31): datetime(2006, 3, 31), - datetime(2006, 7, 30): datetime(2007, 3, 31)})) - - tests.append((YearEnd(0, month=3), - {datetime(2008, 1, 1): datetime(2008, 3, 31), - datetime(2008, 2, 28): datetime(2008, 3, 31), - datetime(2008, 3, 31): datetime(2008, 3, 31), - datetime(2005, 3, 30): datetime(2005, 3, 31), })) - - tests.append((YearEnd(-1, month=3), - {datetime(2007, 1, 1): datetime(2006, 3, 31), - datetime(2008, 2, 28): datetime(2007, 3, 31), - datetime(2008, 3, 31): datetime(2007, 3, 31), - datetime(2006, 3, 29): datetime(2005, 3, 31), - datetime(2006, 3, 30): datetime(2005, 3, 31), - datetime(2007, 3, 1): datetime(2006, 3, 31), })) - - tests.append((YearEnd(-2, month=3), - {datetime(2007, 1, 1): datetime(2005, 3, 31), - datetime(2008, 6, 30): datetime(2007, 3, 31), - datetime(2008, 3, 31): datetime(2006, 3, 31), })) - - for offset, cases in tests: - for base, expected in compat.iteritems(cases): - assertEq(offset, base, expected) - - def test_onOffset(self): - - tests = [ - (YearEnd(month=3), datetime(2007, 3, 31), True), - (YearEnd(month=3), datetime(2008, 1, 1), False), - (YearEnd(month=3), datetime(2006, 3, 31), True), - (YearEnd(month=3), datetime(2006, 3, 29), False), - ] - - for offset, dt, expected in tests: - assertOnOffset(offset, dt, expected) - - -def assertEq(offset, base, expected): - actual = offset + base - actual_swapped = base + offset - actual_apply = offset.apply(base) - try: - assert actual == expected - assert actual_swapped == expected - assert actual_apply == expected - except AssertionError: - raise AssertionError("\nExpected: %s\nActual: %s\nFor Offset: %s)" - "\nAt Date: %s" % - (expected, actual, offset, base)) + offset_cases = [] + offset_cases.append((YearEnd(month=3), + {datetime(2008, 1, 1): datetime(2008, 3, 31), + datetime(2008, 2, 15): datetime(2008, 3, 31), + datetime(2008, 3, 31): datetime(2009, 3, 31), + datetime(2008, 3, 30): datetime(2008, 3, 31), + datetime(2005, 3, 31): datetime(2006, 3, 31), + datetime(2006, 7, 30): datetime(2007, 3, 31)})) + + offset_cases.append((YearEnd(0, month=3), + {datetime(2008, 1, 1): datetime(2008, 3, 31), + datetime(2008, 2, 28): datetime(2008, 3, 31), + datetime(2008, 3, 31): datetime(2008, 3, 31), + datetime(2005, 3, 30): datetime(2005, 3, 31), })) + + offset_cases.append((YearEnd(-1, month=3), + {datetime(2007, 1, 1): datetime(2006, 3, 31), + datetime(2008, 2, 28): datetime(2007, 3, 31), + datetime(2008, 3, 31): datetime(2007, 3, 31), + datetime(2006, 3, 29): datetime(2005, 3, 31), + datetime(2006, 3, 30): datetime(2005, 3, 31), + datetime(2007, 3, 1): datetime(2006, 3, 31), })) + + offset_cases.append((YearEnd(-2, month=3), + {datetime(2007, 1, 1): datetime(2005, 3, 31), + datetime(2008, 6, 30): datetime(2007, 3, 31), + datetime(2008, 3, 31): datetime(2006, 3, 31), })) + + @pytest.mark.parametrize('case', offset_cases) + def test_offset(self, case): + offset, cases = case + for base, expected in compat.iteritems(cases): + assert_offset_equal(offset, base, expected) + + on_offset_cases = [(YearEnd(month=3), datetime(2007, 3, 31), True), + (YearEnd(month=3), datetime(2008, 1, 1), False), + (YearEnd(month=3), datetime(2006, 3, 31), True), + (YearEnd(month=3), datetime(2006, 3, 29), False)] + + @pytest.mark.parametrize('case', on_offset_cases) + def test_onOffset(self, case): + offset, dt, expected = case + assert_onOffset(offset, dt, expected) def test_Easter(): - assertEq(Easter(), datetime(2010, 1, 1), datetime(2010, 4, 4)) - assertEq(Easter(), datetime(2010, 4, 5), datetime(2011, 4, 24)) - assertEq(Easter(2), datetime(2010, 1, 1), datetime(2011, 4, 24)) - - assertEq(Easter(), datetime(2010, 4, 4), datetime(2011, 4, 24)) - assertEq(Easter(2), datetime(2010, 4, 4), datetime(2012, 4, 8)) + assert_offset_equal(Easter(), datetime(2010, 1, 1), datetime(2010, 4, 4)) + assert_offset_equal(Easter(), datetime(2010, 4, 5), datetime(2011, 4, 24)) + assert_offset_equal(Easter(2), datetime(2010, 1, 1), datetime(2011, 4, 24)) - assertEq(-Easter(), datetime(2011, 1, 1), datetime(2010, 4, 4)) - assertEq(-Easter(), datetime(2010, 4, 5), datetime(2010, 4, 4)) - assertEq(-Easter(2), datetime(2011, 1, 1), datetime(2009, 4, 12)) + assert_offset_equal(Easter(), datetime(2010, 4, 4), datetime(2011, 4, 24)) + assert_offset_equal(Easter(2), datetime(2010, 4, 4), datetime(2012, 4, 8)) - assertEq(-Easter(), datetime(2010, 4, 4), datetime(2009, 4, 12)) - assertEq(-Easter(2), datetime(2010, 4, 4), datetime(2008, 3, 23)) + assert_offset_equal(-Easter(), datetime(2011, 1, 1), datetime(2010, 4, 4)) + assert_offset_equal(-Easter(), datetime(2010, 4, 5), datetime(2010, 4, 4)) + assert_offset_equal(-Easter(2), + datetime(2011, 1, 1), + datetime(2009, 4, 12)) - -class TestTicks(object): - - ticks = [Hour, Minute, Second, Milli, Micro, Nano] - - def test_ticks(self): - offsets = [(Hour, Timedelta(hours=5)), - (Minute, Timedelta(hours=2, minutes=3)), - (Second, Timedelta(hours=2, seconds=3)), - (Milli, Timedelta(hours=2, milliseconds=3)), - (Micro, Timedelta(hours=2, microseconds=3)), - (Nano, Timedelta(hours=2, nanoseconds=3))] - - for kls, expected in offsets: - offset = kls(3) - result = offset + Timedelta(hours=2) - assert isinstance(result, Timedelta) - assert result == expected - - def test_Hour(self): - assertEq(Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 1)) - assertEq(Hour(-1), datetime(2010, 1, 1, 1), datetime(2010, 1, 1)) - assertEq(2 * Hour(), datetime(2010, 1, 1), datetime(2010, 1, 1, 2)) - assertEq(-1 * Hour(), datetime(2010, 1, 1, 1), datetime(2010, 1, 1)) - - assert Hour(3) + Hour(2) == Hour(5) - assert Hour(3) - Hour(2) == Hour() - - assert Hour(4) != Hour(1) - - def test_Minute(self): - assertEq(Minute(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 1)) - assertEq(Minute(-1), datetime(2010, 1, 1, 0, 1), datetime(2010, 1, 1)) - assertEq(2 * Minute(), datetime(2010, 1, 1), - datetime(2010, 1, 1, 0, 2)) - assertEq(-1 * Minute(), datetime(2010, 1, 1, 0, 1), - datetime(2010, 1, 1)) - - assert Minute(3) + Minute(2) == Minute(5) - assert Minute(3) - Minute(2) == Minute() - assert Minute(5) != Minute() - - def test_Second(self): - assertEq(Second(), datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 0, 1)) - assertEq(Second(-1), datetime(2010, 1, 1, - 0, 0, 1), datetime(2010, 1, 1)) - assertEq(2 * Second(), datetime(2010, 1, 1), - datetime(2010, 1, 1, 0, 0, 2)) - assertEq(-1 * Second(), datetime(2010, 1, 1, 0, 0, 1), - datetime(2010, 1, 1)) - - assert Second(3) + Second(2) == Second(5) - assert Second(3) - Second(2) == Second() - - def test_Millisecond(self): - assertEq(Milli(), datetime(2010, 1, 1), - datetime(2010, 1, 1, 0, 0, 0, 1000)) - assertEq(Milli(-1), datetime(2010, 1, 1, 0, - 0, 0, 1000), datetime(2010, 1, 1)) - assertEq(Milli(2), datetime(2010, 1, 1), - datetime(2010, 1, 1, 0, 0, 0, 2000)) - assertEq(2 * Milli(), datetime(2010, 1, 1), - datetime(2010, 1, 1, 0, 0, 0, 2000)) - assertEq(-1 * Milli(), datetime(2010, 1, 1, 0, 0, 0, 1000), - datetime(2010, 1, 1)) - - assert Milli(3) + Milli(2) == Milli(5) - assert Milli(3) - Milli(2) == Milli() - - def test_MillisecondTimestampArithmetic(self): - assertEq(Milli(), Timestamp('2010-01-01'), - Timestamp('2010-01-01 00:00:00.001')) - assertEq(Milli(-1), Timestamp('2010-01-01 00:00:00.001'), - Timestamp('2010-01-01')) - - def test_Microsecond(self): - assertEq(Micro(), datetime(2010, 1, 1), - datetime(2010, 1, 1, 0, 0, 0, 1)) - assertEq(Micro(-1), datetime(2010, 1, 1, - 0, 0, 0, 1), datetime(2010, 1, 1)) - assertEq(2 * Micro(), datetime(2010, 1, 1), - datetime(2010, 1, 1, 0, 0, 0, 2)) - assertEq(-1 * Micro(), datetime(2010, 1, 1, 0, 0, 0, 1), - datetime(2010, 1, 1)) - - assert Micro(3) + Micro(2) == Micro(5) - assert Micro(3) - Micro(2) == Micro() - - def test_NanosecondGeneric(self): - timestamp = Timestamp(datetime(2010, 1, 1)) - assert timestamp.nanosecond == 0 - - result = timestamp + Nano(10) - assert result.nanosecond == 10 - - reverse_result = Nano(10) + timestamp - assert reverse_result.nanosecond == 10 - - def test_Nanosecond(self): - timestamp = Timestamp(datetime(2010, 1, 1)) - assertEq(Nano(), timestamp, timestamp + np.timedelta64(1, 'ns')) - assertEq(Nano(-1), timestamp + np.timedelta64(1, 'ns'), timestamp) - assertEq(2 * Nano(), timestamp, timestamp + np.timedelta64(2, 'ns')) - assertEq(-1 * Nano(), timestamp + np.timedelta64(1, 'ns'), timestamp) - - assert Nano(3) + Nano(2) == Nano(5) - assert Nano(3) - Nano(2) == Nano() - - # GH9284 - assert Nano(1) + Nano(10) == Nano(11) - assert Nano(5) + Micro(1) == Nano(1005) - assert Micro(5) + Nano(1) == Nano(5001) - - def test_tick_zero(self): - for t1 in self.ticks: - for t2 in self.ticks: - assert t1(0) == t2(0) - assert t1(0) + t2(0) == t1(0) - - if t1 is not Nano: - assert t1(2) + t2(0) == t1(2) - if t1 is Nano: - assert t1(2) + Nano(0) == t1(2) - - def test_tick_equalities(self): - for t in self.ticks: - assert t(3) == t(3) - assert t() == t(1) - - # not equals - assert t(3) != t(2) - assert t(3) != t(-3) - - def test_tick_operators(self): - for t in self.ticks: - assert t(3) + t(2) == t(5) - assert t(3) - t(2) == t(1) - assert t(800) + t(300) == t(1100) - assert t(1000) - t(5) == t(995) - - def test_tick_offset(self): - for t in self.ticks: - assert not t().isAnchored() - - def test_compare_ticks(self): - for kls in self.ticks: - three = kls(3) - four = kls(4) - - for _ in range(10): - assert three < kls(4) - assert kls(3) < four - assert four > kls(3) - assert kls(4) > three - assert kls(3) == kls(3) - assert kls(3) != kls(4) + assert_offset_equal(-Easter(), datetime(2010, 4, 4), datetime(2009, 4, 12)) + assert_offset_equal(-Easter(2), + datetime(2010, 4, 4), + datetime(2008, 3, 23)) class TestOffsetNames(object): @@ -4641,19 +4436,6 @@ def test_rule_code(self): assert k == _get_freq_str(code) -def test_apply_ticks(): - result = offsets.Hour(3).apply(offsets.Hour(4)) - exp = offsets.Hour(7) - assert (result == exp) - - -def test_delta_to_tick(): - delta = timedelta(3) - - tick = offsets._delta_to_tick(delta) - assert (tick == offsets.Day(3)) - - def test_dateoffset_misc(): oset = offsets.DateOffset(months=2, days=4) # it works @@ -4875,27 +4657,29 @@ def test_springforward_singular(self): self._test_all_offsets(n=1, tstart=self._make_timestamp( self.ts_pre_springfwd, hrs_pre, tz), expected_utc_offset=None) - def test_all_offset_classes(self): - tests = {MonthBegin: ['11/2/2012', '12/1/2012'], - MonthEnd: ['11/2/2012', '11/30/2012'], - BMonthBegin: ['11/2/2012', '12/3/2012'], - BMonthEnd: ['11/2/2012', '11/30/2012'], - CBMonthBegin: ['11/2/2012', '12/3/2012'], - CBMonthEnd: ['11/2/2012', '11/30/2012'], - SemiMonthBegin: ['11/2/2012', '11/15/2012'], - SemiMonthEnd: ['11/2/2012', '11/15/2012'], - Week: ['11/2/2012', '11/9/2012'], - YearBegin: ['11/2/2012', '1/1/2013'], - YearEnd: ['11/2/2012', '12/31/2012'], - BYearBegin: ['11/2/2012', '1/1/2013'], - BYearEnd: ['11/2/2012', '12/31/2012'], - QuarterBegin: ['11/2/2012', '12/1/2012'], - QuarterEnd: ['11/2/2012', '12/31/2012'], - BQuarterBegin: ['11/2/2012', '12/3/2012'], - BQuarterEnd: ['11/2/2012', '12/31/2012'], - Day: ['11/4/2012', '11/4/2012 23:00']} - - for offset, test_values in iteritems(tests): - first = Timestamp(test_values[0], tz='US/Eastern') + offset() - second = Timestamp(test_values[1], tz='US/Eastern') - assert first == second + offset_classes = {MonthBegin: ['11/2/2012', '12/1/2012'], + MonthEnd: ['11/2/2012', '11/30/2012'], + BMonthBegin: ['11/2/2012', '12/3/2012'], + BMonthEnd: ['11/2/2012', '11/30/2012'], + CBMonthBegin: ['11/2/2012', '12/3/2012'], + CBMonthEnd: ['11/2/2012', '11/30/2012'], + SemiMonthBegin: ['11/2/2012', '11/15/2012'], + SemiMonthEnd: ['11/2/2012', '11/15/2012'], + Week: ['11/2/2012', '11/9/2012'], + YearBegin: ['11/2/2012', '1/1/2013'], + YearEnd: ['11/2/2012', '12/31/2012'], + BYearBegin: ['11/2/2012', '1/1/2013'], + BYearEnd: ['11/2/2012', '12/31/2012'], + QuarterBegin: ['11/2/2012', '12/1/2012'], + QuarterEnd: ['11/2/2012', '12/31/2012'], + BQuarterBegin: ['11/2/2012', '12/3/2012'], + BQuarterEnd: ['11/2/2012', '12/31/2012'], + Day: ['11/4/2012', '11/4/2012 23:00']}.items() + + @pytest.mark.parametrize('tup', offset_classes) + def test_all_offset_classes(self, tup): + offset, test_values = tup + + first = Timestamp(test_values[0], tz='US/Eastern') + offset() + second = Timestamp(test_values[1], tz='US/Eastern') + assert first == second diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py new file mode 100644 index 00000000000000..24033d4ff6cbde --- /dev/null +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -0,0 +1,236 @@ +# -*- coding: utf-8 -*- +""" +Tests for offsets.Tick and subclasses +""" +from datetime import datetime, timedelta + +import pytest +import numpy as np + +from pandas import Timedelta, Timestamp +from pandas.tseries import offsets +from pandas.tseries.offsets import Hour, Minute, Second, Milli, Micro, Nano + +from .common import assert_offset_equal + +# --------------------------------------------------------------------- +# Test Helpers + +tick_classes = [Hour, Minute, Second, Milli, Micro, Nano] + + +# --------------------------------------------------------------------- + + +def test_apply_ticks(): + result = offsets.Hour(3).apply(offsets.Hour(4)) + exp = offsets.Hour(7) + assert (result == exp) + + +def test_delta_to_tick(): + delta = timedelta(3) + + tick = offsets._delta_to_tick(delta) + assert (tick == offsets.Day(3)) + + +# --------------------------------------------------------------------- + + +def test_Hour(): + assert_offset_equal(Hour(), + datetime(2010, 1, 1), datetime(2010, 1, 1, 1)) + assert_offset_equal(Hour(-1), + datetime(2010, 1, 1, 1), datetime(2010, 1, 1)) + assert_offset_equal(2 * Hour(), + datetime(2010, 1, 1), datetime(2010, 1, 1, 2)) + assert_offset_equal(-1 * Hour(), + datetime(2010, 1, 1, 1), datetime(2010, 1, 1)) + + assert Hour(3) + Hour(2) == Hour(5) + assert Hour(3) - Hour(2) == Hour() + + assert Hour(4) != Hour(1) + + +def test_Minute(): + assert_offset_equal(Minute(), + datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 1)) + assert_offset_equal(Minute(-1), + datetime(2010, 1, 1, 0, 1), datetime(2010, 1, 1)) + assert_offset_equal(2 * Minute(), + datetime(2010, 1, 1), datetime(2010, 1, 1, 0, 2)) + assert_offset_equal(-1 * Minute(), + datetime(2010, 1, 1, 0, 1), datetime(2010, 1, 1)) + + assert Minute(3) + Minute(2) == Minute(5) + assert Minute(3) - Minute(2) == Minute() + assert Minute(5) != Minute() + + +def test_Second(): + assert_offset_equal(Second(), + datetime(2010, 1, 1), + datetime(2010, 1, 1, 0, 0, 1)) + assert_offset_equal(Second(-1), + datetime(2010, 1, 1, 0, 0, 1), + datetime(2010, 1, 1)) + assert_offset_equal(2 * Second(), + datetime(2010, 1, 1), + datetime(2010, 1, 1, 0, 0, 2)) + assert_offset_equal(-1 * Second(), + datetime(2010, 1, 1, 0, 0, 1), + datetime(2010, 1, 1)) + + assert Second(3) + Second(2) == Second(5) + assert Second(3) - Second(2) == Second() + + +def test_Millisecond(): + assert_offset_equal(Milli(), + datetime(2010, 1, 1), + datetime(2010, 1, 1, 0, 0, 0, 1000)) + assert_offset_equal(Milli(-1), + datetime(2010, 1, 1, 0, 0, 0, 1000), + datetime(2010, 1, 1)) + assert_offset_equal(Milli(2), + datetime(2010, 1, 1), + datetime(2010, 1, 1, 0, 0, 0, 2000)) + assert_offset_equal(2 * Milli(), + datetime(2010, 1, 1), + datetime(2010, 1, 1, 0, 0, 0, 2000)) + assert_offset_equal(-1 * Milli(), + datetime(2010, 1, 1, 0, 0, 0, 1000), + datetime(2010, 1, 1)) + + assert Milli(3) + Milli(2) == Milli(5) + assert Milli(3) - Milli(2) == Milli() + + +def test_MillisecondTimestampArithmetic(): + assert_offset_equal(Milli(), + Timestamp('2010-01-01'), + Timestamp('2010-01-01 00:00:00.001')) + assert_offset_equal(Milli(-1), + Timestamp('2010-01-01 00:00:00.001'), + Timestamp('2010-01-01')) + + +def test_Microsecond(): + assert_offset_equal(Micro(), + datetime(2010, 1, 1), + datetime(2010, 1, 1, 0, 0, 0, 1)) + assert_offset_equal(Micro(-1), + datetime(2010, 1, 1, 0, 0, 0, 1), + datetime(2010, 1, 1)) + + assert_offset_equal(2 * Micro(), + datetime(2010, 1, 1), + datetime(2010, 1, 1, 0, 0, 0, 2)) + assert_offset_equal(-1 * Micro(), + datetime(2010, 1, 1, 0, 0, 0, 1), + datetime(2010, 1, 1)) + + assert Micro(3) + Micro(2) == Micro(5) + assert Micro(3) - Micro(2) == Micro() + + +def test_NanosecondGeneric(): + timestamp = Timestamp(datetime(2010, 1, 1)) + assert timestamp.nanosecond == 0 + + result = timestamp + Nano(10) + assert result.nanosecond == 10 + + reverse_result = Nano(10) + timestamp + assert reverse_result.nanosecond == 10 + + +def test_Nanosecond(): + timestamp = Timestamp(datetime(2010, 1, 1)) + assert_offset_equal(Nano(), + timestamp, + timestamp + np.timedelta64(1, 'ns')) + assert_offset_equal(Nano(-1), + timestamp + np.timedelta64(1, 'ns'), + timestamp) + assert_offset_equal(2 * Nano(), + timestamp, + timestamp + np.timedelta64(2, 'ns')) + assert_offset_equal(-1 * Nano(), + timestamp + np.timedelta64(1, 'ns'), + timestamp) + + assert Nano(3) + Nano(2) == Nano(5) + assert Nano(3) - Nano(2) == Nano() + + # GH9284 + assert Nano(1) + Nano(10) == Nano(11) + assert Nano(5) + Micro(1) == Nano(1005) + assert Micro(5) + Nano(1) == Nano(5001) + + +@pytest.mark.parametrize('kls, expected', + [(Hour, Timedelta(hours=5)), + (Minute, Timedelta(hours=2, minutes=3)), + (Second, Timedelta(hours=2, seconds=3)), + (Milli, Timedelta(hours=2, milliseconds=3)), + (Micro, Timedelta(hours=2, microseconds=3)), + (Nano, Timedelta(hours=2, nanoseconds=3))]) +def test_tick_addition(kls, expected): + offset = kls(3) + result = offset + Timedelta(hours=2) + assert isinstance(result, Timedelta) + assert result == expected + + +@pytest.mark.parametrize('cls1', tick_classes) +@pytest.mark.parametrize('cls2', tick_classes) +def test_tick_zero(cls1, cls2): + assert cls1(0) == cls2(0) + assert cls1(0) + cls2(0) == cls1(0) + + if cls1 is not Nano: + assert cls1(2) + cls2(0) == cls1(2) + + if cls1 is Nano: + assert cls1(2) + Nano(0) == cls1(2) + + +@pytest.mark.parametrize('cls', tick_classes) +def test_tick_equalities(cls): + assert cls(3) == cls(3) + assert cls() == cls(1) + + # not equals + assert cls(3) != cls(2) + assert cls(3) != cls(-3) + + +@pytest.mark.parametrize('cls', tick_classes) +def test_tick_operators(cls): + assert cls(3) + cls(2) == cls(5) + assert cls(3) - cls(2) == cls(1) + assert cls(800) + cls(300) == cls(1100) + assert cls(1000) - cls(5) == cls(995) + + +@pytest.mark.parametrize('cls', tick_classes) +def test_tick_offset(cls): + assert not cls().isAnchored() + + +@pytest.mark.parametrize('cls', tick_classes) +def test_compare_ticks(cls): + three = cls(3) + four = cls(4) + + # TODO: WTF? What is this range(10) supposed to do? + for _ in range(10): + assert three < cls(4) + assert cls(3) < four + assert four > cls(3) + assert cls(4) > three + assert cls(3) == cls(3) + assert cls(3) != cls(4) diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index 39a9a87141753e..9666a4c154c635 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -7,6 +7,7 @@ from pandas import (Index, DatetimeIndex, Timestamp, Series, date_range, period_range) +from pandas._libs.tslibs import resolution import pandas.tseries.frequencies as frequencies from pandas.core.tools.datetimes import to_datetime @@ -169,6 +170,19 @@ def test_to_offset_leading_zero(self): result = frequencies.to_offset(freqstr) assert (result.n == -194) + def test_to_offset_leading_plus(self): + freqstr = '+1d' + result = frequencies.to_offset(freqstr) + assert (result.n == 1) + + freqstr = '+2h30min' + result = frequencies.to_offset(freqstr) + assert (result.n == 150) + + for bad_freq in ['+-1d', '-+1h', '+1', '-7', '+d', '-m']: + with tm.assert_raises_regex(ValueError, 'Invalid frequency:'): + frequencies.to_offset(bad_freq) + def test_to_offset_pd_timedelta(self): # Tests for #9064 td = Timedelta(days=1, seconds=1) @@ -370,35 +384,35 @@ def test_freq_code(self): result = frequencies.get_freq(freqstr) assert result == code - result = frequencies.get_freq_group(freqstr) + result = resolution.get_freq_group(freqstr) assert result == code // 1000 * 1000 - result = frequencies.get_freq_group(code) + result = resolution.get_freq_group(code) assert result == code // 1000 * 1000 def test_freq_group(self): - assert frequencies.get_freq_group('A') == 1000 - assert frequencies.get_freq_group('3A') == 1000 - assert frequencies.get_freq_group('-1A') == 1000 - assert frequencies.get_freq_group('A-JAN') == 1000 - assert frequencies.get_freq_group('A-MAY') == 1000 - - assert frequencies.get_freq_group('Y') == 1000 - assert frequencies.get_freq_group('3Y') == 1000 - assert frequencies.get_freq_group('-1Y') == 1000 - assert frequencies.get_freq_group('Y-JAN') == 1000 - assert frequencies.get_freq_group('Y-MAY') == 1000 - - assert frequencies.get_freq_group(offsets.YearEnd()) == 1000 - assert frequencies.get_freq_group(offsets.YearEnd(month=1)) == 1000 - assert frequencies.get_freq_group(offsets.YearEnd(month=5)) == 1000 - - assert frequencies.get_freq_group('W') == 4000 - assert frequencies.get_freq_group('W-MON') == 4000 - assert frequencies.get_freq_group('W-FRI') == 4000 - assert frequencies.get_freq_group(offsets.Week()) == 4000 - assert frequencies.get_freq_group(offsets.Week(weekday=1)) == 4000 - assert frequencies.get_freq_group(offsets.Week(weekday=5)) == 4000 + assert resolution.get_freq_group('A') == 1000 + assert resolution.get_freq_group('3A') == 1000 + assert resolution.get_freq_group('-1A') == 1000 + assert resolution.get_freq_group('A-JAN') == 1000 + assert resolution.get_freq_group('A-MAY') == 1000 + + assert resolution.get_freq_group('Y') == 1000 + assert resolution.get_freq_group('3Y') == 1000 + assert resolution.get_freq_group('-1Y') == 1000 + assert resolution.get_freq_group('Y-JAN') == 1000 + assert resolution.get_freq_group('Y-MAY') == 1000 + + assert resolution.get_freq_group(offsets.YearEnd()) == 1000 + assert resolution.get_freq_group(offsets.YearEnd(month=1)) == 1000 + assert resolution.get_freq_group(offsets.YearEnd(month=5)) == 1000 + + assert resolution.get_freq_group('W') == 4000 + assert resolution.get_freq_group('W-MON') == 4000 + assert resolution.get_freq_group('W-FRI') == 4000 + assert resolution.get_freq_group(offsets.Week()) == 4000 + assert resolution.get_freq_group(offsets.Week(weekday=1)) == 4000 + assert resolution.get_freq_group(offsets.Week(weekday=5)) == 4000 def test_get_to_timestamp_base(self): tsb = frequencies.get_to_timestamp_base @@ -510,7 +524,7 @@ def test_get_freq_code(self): (frequencies.get_freq('W-FRI'), -2)) def test_frequency_misc(self): - assert (frequencies.get_freq_group('T') == + assert (resolution.get_freq_group('T') == frequencies.FreqGroup.FR_MIN) code, stride = frequencies.get_freq_code(offsets.Hour()) diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py index ddcf1bb7d8b7bd..3dfad2d4af75ed 100644 --- a/pandas/tests/tseries/test_timezones.py +++ b/pandas/tests/tseries/test_timezones.py @@ -13,11 +13,11 @@ import pandas.util.testing as tm import pandas.tseries.offsets as offsets -from pandas.compat import lrange, zip +from pandas.compat import lrange, zip, PY3 from pandas.core.indexes.datetimes import bdate_range, date_range from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas._libs import tslib -from pandas._libs.tslibs import timezones +from pandas._libs.tslibs import timezones, conversion from pandas import (Index, Series, DataFrame, isna, Timestamp, NaT, DatetimeIndex, to_datetime) from pandas.util.testing import (assert_frame_equal, assert_series_equal, @@ -1278,16 +1278,22 @@ def test_replace_tzinfo(self): result_dt = dt.replace(tzinfo=tzinfo) result_pd = Timestamp(dt).replace(tzinfo=tzinfo) - if hasattr(result_dt, 'timestamp'): # New method in Py 3.3 - assert result_dt.timestamp() == result_pd.timestamp() + if PY3: + # datetime.timestamp() converts in the local timezone + with tm.set_timezone('UTC'): + assert result_dt.timestamp() == result_pd.timestamp() + assert result_dt == result_pd assert result_dt == result_pd.to_pydatetime() result_dt = dt.replace(tzinfo=tzinfo).replace(tzinfo=None) result_pd = Timestamp(dt).replace(tzinfo=tzinfo).replace(tzinfo=None) - if hasattr(result_dt, 'timestamp'): # New method in Py 3.3 - assert result_dt.timestamp() == result_pd.timestamp() + if PY3: + # datetime.timestamp() converts in the local timezone + with tm.set_timezone('UTC'): + assert result_dt.timestamp() == result_pd.timestamp() + assert result_dt == result_pd assert result_dt == result_pd.to_pydatetime() @@ -1732,14 +1738,14 @@ class TestTslib(object): def test_tslib_tz_convert(self): def compare_utc_to_local(tz_didx, utc_didx): - f = lambda x: tslib.tz_convert_single(x, 'UTC', tz_didx.tz) - result = tslib.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz) + f = lambda x: conversion.tz_convert_single(x, 'UTC', tz_didx.tz) + result = conversion.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz) result_single = np.vectorize(f)(tz_didx.asi8) tm.assert_numpy_array_equal(result, result_single) def compare_local_to_utc(tz_didx, utc_didx): - f = lambda x: tslib.tz_convert_single(x, tz_didx.tz, 'UTC') - result = tslib.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC') + f = lambda x: conversion.tz_convert_single(x, tz_didx.tz, 'UTC') + result = conversion.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC') result_single = np.vectorize(f)(utc_didx.asi8) tm.assert_numpy_array_equal(result, result_single) @@ -1764,14 +1770,14 @@ def compare_local_to_utc(tz_didx, utc_didx): compare_local_to_utc(tz_didx, utc_didx) # Check empty array - result = tslib.tz_convert(np.array([], dtype=np.int64), - timezones.maybe_get_tz('US/Eastern'), - timezones.maybe_get_tz('Asia/Tokyo')) + result = conversion.tz_convert(np.array([], dtype=np.int64), + timezones.maybe_get_tz('US/Eastern'), + timezones.maybe_get_tz('Asia/Tokyo')) tm.assert_numpy_array_equal(result, np.array([], dtype=np.int64)) # Check all-NaT array - result = tslib.tz_convert(np.array([tslib.iNaT], dtype=np.int64), - timezones.maybe_get_tz('US/Eastern'), - timezones.maybe_get_tz('Asia/Tokyo')) + result = conversion.tz_convert(np.array([tslib.iNaT], dtype=np.int64), + timezones.maybe_get_tz('US/Eastern'), + timezones.maybe_get_tz('Asia/Tokyo')) tm.assert_numpy_array_equal(result, np.array( [tslib.iNaT], dtype=np.int64)) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index be25a439f9075b..fef88587a7282e 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- from datetime import timedelta -from pandas.compat import long, zip +from pandas.compat import zip from pandas import compat import re import warnings @@ -13,18 +13,21 @@ is_timedelta64_dtype, is_datetime64_dtype) -import pandas.core.algorithms as algos -from pandas.core.algorithms import unique from pandas.tseries.offsets import DateOffset -from pandas.util._decorators import cache_readonly, deprecate_kwarg +from pandas.util._decorators import deprecate_kwarg import pandas.tseries.offsets as offsets -from pandas._libs import lib, tslib +from pandas._libs import tslib from pandas._libs.tslib import Timedelta from pandas._libs.tslibs.frequencies import ( # noqa get_freq_code, _base_and_stride, _period_str_to_code, _INVALID_FREQ_ERROR, opattern, _lite_rule_alias, _dont_uppercase, _period_code_map, _reverse_period_code_map) +from pandas._libs.tslibs.resolution import (Resolution, + _FrequencyInferer, + _TimedeltaFrequencyInferer) +from pandas._libs.tslibs.parsing import _get_rule_month + from pytz import AmbiguousTimeError @@ -52,184 +55,6 @@ class FreqGroup(object): RESO_DAY = 6 -class Resolution(object): - - RESO_US = RESO_US - RESO_MS = RESO_MS - RESO_SEC = RESO_SEC - RESO_MIN = RESO_MIN - RESO_HR = RESO_HR - RESO_DAY = RESO_DAY - - _reso_str_map = { - RESO_NS: 'nanosecond', - RESO_US: 'microsecond', - RESO_MS: 'millisecond', - RESO_SEC: 'second', - RESO_MIN: 'minute', - RESO_HR: 'hour', - RESO_DAY: 'day' - } - - # factor to multiply a value by to convert it to the next finer grained - # resolution - _reso_mult_map = { - RESO_NS: None, - RESO_US: 1000, - RESO_MS: 1000, - RESO_SEC: 1000, - RESO_MIN: 60, - RESO_HR: 60, - RESO_DAY: 24 - } - - _reso_str_bump_map = { - 'D': 'H', - 'H': 'T', - 'T': 'S', - 'S': 'L', - 'L': 'U', - 'U': 'N', - 'N': None - } - - _str_reso_map = dict([(v, k) for k, v in compat.iteritems(_reso_str_map)]) - - _reso_freq_map = { - 'year': 'A', - 'quarter': 'Q', - 'month': 'M', - 'day': 'D', - 'hour': 'H', - 'minute': 'T', - 'second': 'S', - 'millisecond': 'L', - 'microsecond': 'U', - 'nanosecond': 'N'} - - _freq_reso_map = dict([(v, k) - for k, v in compat.iteritems(_reso_freq_map)]) - - @classmethod - def get_str(cls, reso): - """ - Return resolution str against resolution code. - - Example - ------- - >>> Resolution.get_str(Resolution.RESO_SEC) - 'second' - """ - return cls._reso_str_map.get(reso, 'day') - - @classmethod - def get_reso(cls, resostr): - """ - Return resolution str against resolution code. - - Example - ------- - >>> Resolution.get_reso('second') - 2 - - >>> Resolution.get_reso('second') == Resolution.RESO_SEC - True - """ - return cls._str_reso_map.get(resostr, cls.RESO_DAY) - - @classmethod - def get_freq_group(cls, resostr): - """ - Return frequency str against resolution str. - - Example - ------- - >>> f.Resolution.get_freq_group('day') - 4000 - """ - return get_freq_group(cls.get_freq(resostr)) - - @classmethod - def get_freq(cls, resostr): - """ - Return frequency str against resolution str. - - Example - ------- - >>> f.Resolution.get_freq('day') - 'D' - """ - return cls._reso_freq_map[resostr] - - @classmethod - def get_str_from_freq(cls, freq): - """ - Return resolution str against frequency str. - - Example - ------- - >>> Resolution.get_str_from_freq('H') - 'hour' - """ - return cls._freq_reso_map.get(freq, 'day') - - @classmethod - def get_reso_from_freq(cls, freq): - """ - Return resolution code against frequency str. - - Example - ------- - >>> Resolution.get_reso_from_freq('H') - 4 - - >>> Resolution.get_reso_from_freq('H') == Resolution.RESO_HR - True - """ - return cls.get_reso(cls.get_str_from_freq(freq)) - - @classmethod - def get_stride_from_decimal(cls, value, freq): - """ - Convert freq with decimal stride into a higher freq with integer stride - - Parameters - ---------- - value : integer or float - freq : string - Frequency string - - Raises - ------ - ValueError - If the float cannot be converted to an integer at any resolution. - - Example - ------- - >>> Resolution.get_stride_from_decimal(1.5, 'T') - (90, 'S') - - >>> Resolution.get_stride_from_decimal(1.04, 'H') - (3744, 'S') - - >>> Resolution.get_stride_from_decimal(1, 'D') - (1, 'D') - """ - - if np.isclose(value % 1, 0): - return int(value), freq - else: - start_reso = cls.get_reso_from_freq(freq) - if start_reso == 0: - raise ValueError( - "Could not convert to integer offset at any resolution" - ) - - next_value = cls._reso_mult_map[start_reso] * value - next_name = cls._reso_str_bump_map[freq] - return cls.get_stride_from_decimal(next_value, next_name) - - def get_to_timestamp_base(base): """ Return frequency code group used for base of to_timestamp against @@ -258,31 +83,6 @@ def get_to_timestamp_base(base): return base -def get_freq_group(freq): - """ - Return frequency code group of given frequency str or offset. - - Example - ------- - >>> get_freq_group('W-MON') - 4000 - - >>> get_freq_group('W-FRI') - 4000 - """ - if isinstance(freq, offsets.DateOffset): - freq = freq.rule_code - - if isinstance(freq, compat.string_types): - base, mult = get_freq_code(freq) - freq = base - elif isinstance(freq, int): - pass - else: - raise ValueError('input must be str, offset or int') - return (freq // 1000) * 1000 - - def get_freq(freq): """ Return frequency code of given frequency str. @@ -562,278 +362,6 @@ def infer_freq(index, warn=True): return inferer.get_freq() -_ONE_MICRO = long(1000) -_ONE_MILLI = _ONE_MICRO * 1000 -_ONE_SECOND = _ONE_MILLI * 1000 -_ONE_MINUTE = 60 * _ONE_SECOND -_ONE_HOUR = 60 * _ONE_MINUTE -_ONE_DAY = 24 * _ONE_HOUR - - -class _FrequencyInferer(object): - """ - Not sure if I can avoid the state machine here - """ - - def __init__(self, index, warn=True): - self.index = index - self.values = np.asarray(index).view('i8') - - # This moves the values, which are implicitly in UTC, to the - # the timezone so they are in local time - if hasattr(index, 'tz'): - if index.tz is not None: - self.values = tslib.tz_convert(self.values, 'UTC', index.tz) - - self.warn = warn - - if len(index) < 3: - raise ValueError('Need at least 3 dates to infer frequency') - - self.is_monotonic = (self.index.is_monotonic_increasing or - self.index.is_monotonic_decreasing) - - @cache_readonly - def deltas(self): - return tslib.unique_deltas(self.values) - - @cache_readonly - def deltas_asi8(self): - return tslib.unique_deltas(self.index.asi8) - - @cache_readonly - def is_unique(self): - return len(self.deltas) == 1 - - @cache_readonly - def is_unique_asi8(self): - return len(self.deltas_asi8) == 1 - - def get_freq(self): - if not self.is_monotonic or not self.index.is_unique: - return None - - delta = self.deltas[0] - if _is_multiple(delta, _ONE_DAY): - return self._infer_daily_rule() - else: - # Business hourly, maybe. 17: one day / 65: one weekend - if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]): - return 'BH' - # Possibly intraday frequency. Here we use the - # original .asi8 values as the modified values - # will not work around DST transitions. See #8772 - elif not self.is_unique_asi8: - return None - delta = self.deltas_asi8[0] - if _is_multiple(delta, _ONE_HOUR): - # Hours - return _maybe_add_count('H', delta / _ONE_HOUR) - elif _is_multiple(delta, _ONE_MINUTE): - # Minutes - return _maybe_add_count('T', delta / _ONE_MINUTE) - elif _is_multiple(delta, _ONE_SECOND): - # Seconds - return _maybe_add_count('S', delta / _ONE_SECOND) - elif _is_multiple(delta, _ONE_MILLI): - # Milliseconds - return _maybe_add_count('L', delta / _ONE_MILLI) - elif _is_multiple(delta, _ONE_MICRO): - # Microseconds - return _maybe_add_count('U', delta / _ONE_MICRO) - else: - # Nanoseconds - return _maybe_add_count('N', delta) - - @cache_readonly - def day_deltas(self): - return [x / _ONE_DAY for x in self.deltas] - - @cache_readonly - def hour_deltas(self): - return [x / _ONE_HOUR for x in self.deltas] - - @cache_readonly - def fields(self): - return tslib.build_field_sarray(self.values) - - @cache_readonly - def rep_stamp(self): - return lib.Timestamp(self.values[0]) - - def month_position_check(self): - # TODO: cythonize this, very slow - calendar_end = True - business_end = True - calendar_start = True - business_start = True - - years = self.fields['Y'] - months = self.fields['M'] - days = self.fields['D'] - weekdays = self.index.dayofweek - - from calendar import monthrange - for y, m, d, wd in zip(years, months, days, weekdays): - - if calendar_start: - calendar_start &= d == 1 - if business_start: - business_start &= d == 1 or (d <= 3 and wd == 0) - - if calendar_end or business_end: - _, daysinmonth = monthrange(y, m) - cal = d == daysinmonth - if calendar_end: - calendar_end &= cal - if business_end: - business_end &= cal or (daysinmonth - d < 3 and wd == 4) - elif not calendar_start and not business_start: - break - - if calendar_end: - return 'ce' - elif business_end: - return 'be' - elif calendar_start: - return 'cs' - elif business_start: - return 'bs' - else: - return None - - @cache_readonly - def mdiffs(self): - nmonths = self.fields['Y'] * 12 + self.fields['M'] - return tslib.unique_deltas(nmonths.astype('i8')) - - @cache_readonly - def ydiffs(self): - return tslib.unique_deltas(self.fields['Y'].astype('i8')) - - def _infer_daily_rule(self): - annual_rule = self._get_annual_rule() - if annual_rule: - nyears = self.ydiffs[0] - month = _month_aliases[self.rep_stamp.month] - alias = '{prefix}-{month}'.format(prefix=annual_rule, month=month) - return _maybe_add_count(alias, nyears) - - quarterly_rule = self._get_quarterly_rule() - if quarterly_rule: - nquarters = self.mdiffs[0] / 3 - mod_dict = {0: 12, 2: 11, 1: 10} - month = _month_aliases[mod_dict[self.rep_stamp.month % 3]] - alias = '{prefix}-{month}'.format(prefix=quarterly_rule, - month=month) - return _maybe_add_count(alias, nquarters) - - monthly_rule = self._get_monthly_rule() - if monthly_rule: - return _maybe_add_count(monthly_rule, self.mdiffs[0]) - - if self.is_unique: - days = self.deltas[0] / _ONE_DAY - if days % 7 == 0: - # Weekly - day = _weekday_rule_aliases[self.rep_stamp.weekday()] - return _maybe_add_count('W-{day}'.format(day=day), days / 7) - else: - return _maybe_add_count('D', days) - - if self._is_business_daily(): - return 'B' - - wom_rule = self._get_wom_rule() - if wom_rule: - return wom_rule - - def _get_annual_rule(self): - if len(self.ydiffs) > 1: - return None - - if len(algos.unique(self.fields['M'])) > 1: - return None - - pos_check = self.month_position_check() - return {'cs': 'AS', 'bs': 'BAS', - 'ce': 'A', 'be': 'BA'}.get(pos_check) - - def _get_quarterly_rule(self): - if len(self.mdiffs) > 1: - return None - - if not self.mdiffs[0] % 3 == 0: - return None - - pos_check = self.month_position_check() - return {'cs': 'QS', 'bs': 'BQS', - 'ce': 'Q', 'be': 'BQ'}.get(pos_check) - - def _get_monthly_rule(self): - if len(self.mdiffs) > 1: - return None - pos_check = self.month_position_check() - return {'cs': 'MS', 'bs': 'BMS', - 'ce': 'M', 'be': 'BM'}.get(pos_check) - - def _is_business_daily(self): - # quick check: cannot be business daily - if self.day_deltas != [1, 3]: - return False - - # probably business daily, but need to confirm - first_weekday = self.index[0].weekday() - shifts = np.diff(self.index.asi8) - shifts = np.floor_divide(shifts, _ONE_DAY) - weekdays = np.mod(first_weekday + np.cumsum(shifts), 7) - return np.all(((weekdays == 0) & (shifts == 3)) | - ((weekdays > 0) & (weekdays <= 4) & (shifts == 1))) - - def _get_wom_rule(self): - # wdiffs = unique(np.diff(self.index.week)) - # We also need -47, -49, -48 to catch index spanning year boundary - # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all(): - # return None - - weekdays = unique(self.index.weekday) - if len(weekdays) > 1: - return None - - week_of_months = unique((self.index.day - 1) // 7) - # Only attempt to infer up to WOM-4. See #9425 - week_of_months = week_of_months[week_of_months < 4] - if len(week_of_months) == 0 or len(week_of_months) > 1: - return None - - # get which week - week = week_of_months[0] + 1 - wd = _weekday_rule_aliases[weekdays[0]] - - return 'WOM-{week}{weekday}'.format(week=week, weekday=wd) - - -class _TimedeltaFrequencyInferer(_FrequencyInferer): - - def _infer_daily_rule(self): - if self.is_unique: - days = self.deltas[0] / _ONE_DAY - if days % 7 == 0: - # Weekly - wd = _weekday_rule_aliases[self.rep_stamp.weekday()] - alias = 'W-{weekday}'.format(weekday=wd) - return _maybe_add_count(alias, days / 7) - else: - return _maybe_add_count('D', days) - - -def _maybe_add_count(base, count): - if count != 1: - return '{count}{base}'.format(count=int(count), base=base) - else: - return base - - def _maybe_coerce_freq(code): """ we might need to coerce a code to a rule_code and uppercase it @@ -963,9 +491,6 @@ def is_superperiod(source, target): return target in ['N'] -_get_rule_month = tslib._get_rule_month - - def _is_annual(rule): rule = rule.upper() return rule == 'A' or rule.startswith('A-') @@ -992,13 +517,5 @@ def _is_weekly(rule): return rule == 'W' or rule.startswith('W-') -DAYS = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] - MONTHS = tslib._MONTHS _month_numbers = tslib._MONTH_NUMBERS -_month_aliases = tslib._MONTH_ALIASES -_weekday_rule_aliases = dict((k, v) for k, v in enumerate(DAYS)) - - -def _is_multiple(us, mult): - return us % mult == 0 diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 5843aaa23be574..4dc26f4dd69e29 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -22,6 +22,7 @@ _int_to_weekday, _weekday_to_int, _determine_offset, apply_index_wraps, + shift_month, BeginMixin, EndMixin, BaseOffset) @@ -252,6 +253,8 @@ def apply_index(self, i): "applied vectorized".format(kwd=kwd)) def isAnchored(self): + # TODO: Does this make sense for the general case? It would help + # if there were a canonical docstring for what isAnchored means. return (self.n == 1) def _params(self): @@ -280,10 +283,10 @@ def _repr_attrs(self): if not hasattr(self, key): kwds_new[key] = self.kwds[key] if len(kwds_new) > 0: - attrs.append('kwds=%s' % (kwds_new)) + attrs.append('kwds={kwds_new}'.format(kwds_new=kwds_new)) elif attr not in exclude: value = getattr(self, attr) - attrs.append('%s=%s' % (attr, value)) + attrs.append('{attr}={value}'.format(attr=attr, value=value)) out = '' if attrs: @@ -721,6 +724,7 @@ def apply(self, other): return result else: + # TODO: Figure out the end of this sente raise ApplyTypeError( 'Only know how to combine business hour with ') @@ -927,10 +931,10 @@ def apply(self, other): n = self.n _, days_in_month = tslib.monthrange(other.year, other.month) if other.day != days_in_month: - other = other + relativedelta(months=-1, day=31) + other = shift_month(other, -1, 'end') if n <= 0: n = n + 1 - other = other + relativedelta(months=n, day=31) + other = shift_month(other, n, 'end') return other @apply_index_wraps @@ -956,7 +960,7 @@ def apply(self, other): if other.day > 1 and n <= 0: # then roll forward if n<=0 n += 1 - return other + relativedelta(months=n, day=1) + return shift_month(other, n, 'start') @apply_index_wraps def apply_index(self, i): @@ -1002,12 +1006,12 @@ def apply(self, other): if not self.onOffset(other): _, days_in_month = tslib.monthrange(other.year, other.month) if 1 < other.day < self.day_of_month: - other += relativedelta(day=self.day_of_month) + other = other.replace(day=self.day_of_month) if n > 0: # rollforward so subtract 1 n -= 1 elif self.day_of_month < other.day < days_in_month: - other += relativedelta(day=self.day_of_month) + other = other.replace(day=self.day_of_month) if n < 0: # rollforward in the negative direction so add 1 n += 1 @@ -1084,11 +1088,11 @@ def onOffset(self, dt): def _apply(self, n, other): # if other.day is not day_of_month move to day_of_month and update n if other.day < self.day_of_month: - other += relativedelta(day=self.day_of_month) + other = other.replace(day=self.day_of_month) if n > 0: n -= 1 elif other.day > self.day_of_month: - other += relativedelta(day=self.day_of_month) + other = other.replace(day=self.day_of_month) if n == 0: n = 1 else: @@ -1096,7 +1100,7 @@ def _apply(self, n, other): months = n // 2 day = 31 if n % 2 else self.day_of_month - return other + relativedelta(months=months, day=day) + return shift_month(other, months, day) def _get_roll(self, i, before_day_of_month, after_day_of_month): n = self.n @@ -1141,13 +1145,13 @@ def onOffset(self, dt): def _apply(self, n, other): # if other.day is not day_of_month move to day_of_month and update n if other.day < self.day_of_month: - other += relativedelta(day=self.day_of_month) + other = other.replace(day=self.day_of_month) if n == 0: n = -1 else: n -= 1 elif other.day > self.day_of_month: - other += relativedelta(day=self.day_of_month) + other = other.replace(day=self.day_of_month) if n == 0: n = 1 elif n < 0: @@ -1155,7 +1159,7 @@ def _apply(self, n, other): months = n // 2 + n % 2 day = 1 if n % 2 else self.day_of_month - return other + relativedelta(months=months, day=day) + return shift_month(other, months, day) def _get_roll(self, i, before_day_of_month, after_day_of_month): n = self.n @@ -1191,7 +1195,7 @@ def apply(self, other): n = n - 1 elif n <= 0 and other.day > lastBDay: n = n + 1 - other = other + relativedelta(months=n, day=31) + other = shift_month(other, n, 'end') if other.weekday() > 4: other = other - BDay() @@ -1215,7 +1219,7 @@ def apply(self, other): other = other + timedelta(days=first - other.day) n -= 1 - other = other + relativedelta(months=n) + other = shift_month(other, n, None) wkday, _ = tslib.monthrange(other.year, other.month) first = _get_firstbday(wkday) result = datetime(other.year, other.month, first, @@ -1520,8 +1524,7 @@ def apply(self, other): else: months = self.n + 1 - other = self.getOffsetOfMonth( - other + relativedelta(months=months, day=1)) + other = self.getOffsetOfMonth(shift_month(other, months, 'start')) other = datetime(other.year, other.month, other.day, base.hour, base.minute, base.second, base.microsecond) return other @@ -1612,8 +1615,7 @@ def apply(self, other): else: months = self.n + 1 - return self.getOffsetOfMonth( - other + relativedelta(months=months, day=1)) + return self.getOffsetOfMonth(shift_month(other, months, 'start')) def getOffsetOfMonth(self, dt): m = MonthEnd() @@ -1716,7 +1718,7 @@ def apply(self, other): elif n <= 0 and other.day > lastBDay and monthsToGo == 0: n = n + 1 - other = other + relativedelta(months=monthsToGo + 3 * n, day=31) + other = shift_month(other, monthsToGo + 3 * n, 'end') other = tslib._localize_pydatetime(other, base.tzinfo) if other.weekday() > 4: other = other - BDay() @@ -1761,7 +1763,7 @@ def apply(self, other): n = n - 1 # get the first bday for result - other = other + relativedelta(months=3 * n - monthsSince) + other = shift_month(other, 3 * n - monthsSince, None) wkday, _ = tslib.monthrange(other.year, other.month) first = _get_firstbday(wkday) result = datetime(other.year, other.month, first, @@ -1795,7 +1797,7 @@ def apply(self, other): if n > 0 and not (other.day >= days_in_month and monthsToGo == 0): n = n - 1 - other = other + relativedelta(months=monthsToGo + 3 * n, day=31) + other = shift_month(other, monthsToGo + 3 * n, 'end') return other @apply_index_wraps @@ -1830,7 +1832,7 @@ def apply(self, other): # after start, so come back an extra period as if rolled forward n = n + 1 - other = other + relativedelta(months=3 * n - monthsSince, day=1) + other = shift_month(other, 3 * n - monthsSince, 'start') return other @apply_index_wraps @@ -1889,7 +1891,7 @@ def apply(self, other): (other.month == self.month and other.day > lastBDay)): years += 1 - other = other + relativedelta(years=years) + other = shift_month(other, 12 * years, None) _, days_in_month = tslib.monthrange(other.year, self.month) result = datetime(other.year, self.month, days_in_month, @@ -1927,7 +1929,7 @@ def apply(self, other): years += 1 # set first bday for result - other = other + relativedelta(years=years) + other = shift_month(other, years * 12, None) wkday, days_in_month = tslib.monthrange(other.year, self.month) first = _get_firstbday(wkday) return datetime(other.year, self.month, first, other.hour, @@ -2145,8 +2147,8 @@ def onOffset(self, dt): if self.variation == "nearest": # We have to check the year end of "this" cal year AND the previous - return year_end == dt or \ - self.get_year_end(dt - relativedelta(months=1)) == dt + return (year_end == dt or + self.get_year_end(shift_month(dt, -1, None)) == dt) else: return year_end == dt @@ -2226,8 +2228,8 @@ def get_year_end(self, dt): def get_target_month_end(self, dt): target_month = datetime( dt.year, self.startingMonth, 1, tzinfo=dt.tzinfo) - next_month_first_of = target_month + relativedelta(months=+1) - return next_month_first_of + relativedelta(days=-1) + next_month_first_of = shift_month(target_month, 1, None) + return next_month_first_of + timedelta(days=-1) def _get_year_end_nearest(self, dt): target_date = self.get_target_month_end(dt) @@ -2382,7 +2384,7 @@ def apply(self, other): qtr_lens = self.get_weeks(other + self._offset) for weeks in qtr_lens: - start += relativedelta(weeks=weeks) + start += timedelta(weeks=weeks) if start > other: other = start n -= 1 @@ -2399,7 +2401,7 @@ def apply(self, other): qtr_lens = self.get_weeks(other) for weeks in reversed(qtr_lens): - end -= relativedelta(weeks=weeks) + end -= timedelta(weeks=weeks) if end < other: other = end n -= 1 @@ -2442,7 +2444,7 @@ def onOffset(self, dt): current = next_year_end for qtr_len in qtr_lens[0:4]: - current += relativedelta(weeks=qtr_len) + current += timedelta(weeks=qtr_len) if dt == current: return True return False diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 7c9250e52d4825..6be6152b09fc8c 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -3,7 +3,7 @@ import inspect import types import warnings -from textwrap import dedent +from textwrap import dedent, wrap from functools import wraps, update_wrapper @@ -29,11 +29,16 @@ def deprecate(name, alternative, alt_name=None, klass=None, alt_name = alt_name or alternative.__name__ klass = klass or FutureWarning - msg = msg or "{} is deprecated. Use {} instead".format(name, alt_name) + msg = msg or "{} is deprecated, use {} instead".format(name, alt_name) + @wraps(alternative) def wrapper(*args, **kwargs): warnings.warn(msg, klass, stacklevel=stacklevel) return alternative(*args, **kwargs) + + if getattr(wrapper, '__doc__', None) is not None: + wrapper.__doc__ = ('\n'.join(wrap(msg, 70)) + '\n' + + dedent(wrapper.__doc__)) return wrapper diff --git a/setup.py b/setup.py index dd24c5c14ee69e..c3e0c037625da9 100755 --- a/setup.py +++ b/setup.py @@ -350,6 +350,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/tslibs/fields.pyx', 'pandas/_libs/tslibs/offsets.pyx', 'pandas/_libs/tslibs/frequencies.pyx', + 'pandas/_libs/tslibs/resolution.pyx', 'pandas/_libs/tslibs/parsing.pyx', 'pandas/io/sas/sas.pyx'] @@ -580,6 +581,13 @@ def pxd(name): 'pyxfile': '_libs/tslibs/parsing', 'pxdfiles': ['_libs/src/util', '_libs/src/khash']}, + '_libs.tslibs.resolution': { + 'pyxfile': '_libs/tslibs/resolution', + 'pxdfiles': ['_libs/src/util', + '_libs/src/khash', + '_libs/tslibs/frequencies', + '_libs/tslibs/timezones'], + 'depends': tseries_depends}, '_libs.tslibs.strptime': { 'pyxfile': '_libs/tslibs/strptime', 'pxdfiles': ['_libs/src/util', @@ -761,6 +769,7 @@ def pxd(name): 'pandas.tests.series', 'pandas.tests.scalar', 'pandas.tests.tseries', + 'pandas.tests.tseries.offsets', 'pandas.tests.plotting', 'pandas.tests.tools', 'pandas.tests.util', @@ -796,7 +805,7 @@ def pxd(name): 'pandas.tests.io.formats': ['data/*.csv'], 'pandas.tests.io.msgpack': ['data/*.mp'], 'pandas.tests.reshape': ['data/*.csv'], - 'pandas.tests.tseries': ['data/*.pickle'], + 'pandas.tests.tseries.offsets': ['data/*.pickle'], 'pandas.io.formats': ['templates/*.tpl'] }, ext_modules=extensions,