From 883379c73bf05698a7264dd2159ead83d0fb6e4a Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Thu, 26 Mar 2020 18:59:44 -0400 Subject: [PATCH] Fix to _get_nearest_indexer for pydata/xarray#3751 (#32905) --- doc/source/whatsnew/v1.1.0.rst | 1 + environment.yml | 1 + pandas/core/indexes/base.py | 14 +++++++++----- pandas/tests/test_downstream.py | 15 +++++++++++++++ requirements-dev.txt | 1 + 5 files changed, 27 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 38e99cb649834..4b9a54675ae5b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -336,6 +336,7 @@ Indexing - Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`) - Bug in :meth:`DataFrame.loc:` and :meth:`Series.loc` with a :class:`DatetimeIndex`, :class:`TimedeltaIndex`, or :class:`PeriodIndex` incorrectly allowing lookups of non-matching datetime-like dtypes (:issue:`32650`) - Bug in :meth:`Series.__getitem__` indexing with non-standard scalars, e.g. ``np.dtype`` (:issue:`32684`) +- Fix to preserve the ability to index with the "nearest" method with xarray's CFTimeIndex, an :class:`Index` subclass (`pydata/xarray#3751 `_, :issue:`32905`). - Bug in :class:`Index` constructor where an unhelpful error message was raised for ``numpy`` scalars (:issue:`33017`) - Bug in :meth:`DataFrame.lookup` incorrectly raising an ``AttributeError`` when ``frame.index`` or ``frame.columns`` is not unique; this will now raise a ``ValueError`` with a helpful error message (:issue:`33041`) diff --git a/environment.yml b/environment.yml index 532c36038fcaf..cf579738f6fe9 100644 --- a/environment.yml +++ b/environment.yml @@ -101,6 +101,7 @@ dependencies: - s3fs # pandas.read_csv... when using 's3://...' path - sqlalchemy # pandas.read_sql, DataFrame.to_sql - xarray # DataFrame.to_xarray + - cftime # Needed for downstream xarray.CFTimeIndex test - pyreadstat # pandas.read_spss - tabulate>=0.8.3 # DataFrame.to_markdown - pip: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5b439a851a709..8ee20084e0298 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3049,8 +3049,9 @@ def _get_nearest_indexer(self, target: "Index", limit, tolerance) -> np.ndarray: left_indexer = self.get_indexer(target, "pad", limit=limit) right_indexer = self.get_indexer(target, "backfill", limit=limit) - left_distances = np.abs(self[left_indexer] - target) - right_distances = np.abs(self[right_indexer] - target) + target_values = target._values + left_distances = np.abs(self._values[left_indexer] - target_values) + right_distances = np.abs(self._values[right_indexer] - target_values) op = operator.lt if self.is_monotonic_increasing else operator.le indexer = np.where( @@ -3059,13 +3060,16 @@ def _get_nearest_indexer(self, target: "Index", limit, tolerance) -> np.ndarray: right_indexer, ) if tolerance is not None: - indexer = self._filter_indexer_tolerance(target, indexer, tolerance) + indexer = self._filter_indexer_tolerance(target_values, indexer, tolerance) return indexer def _filter_indexer_tolerance( - self, target: "Index", indexer: np.ndarray, tolerance + self, + target: Union["Index", np.ndarray, ExtensionArray], + indexer: np.ndarray, + tolerance, ) -> np.ndarray: - distance = abs(self.values[indexer] - target) + distance = abs(self._values[indexer] - target) indexer = np.where(distance <= tolerance, indexer, -1) return indexer diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 122ef1f47968e..57542aa3bc7f6 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -8,6 +8,8 @@ import numpy as np # noqa import pytest +import pandas.util._test_decorators as td + from pandas import DataFrame import pandas._testing as tm @@ -47,6 +49,19 @@ def test_xarray(df): assert df.to_xarray() is not None +@td.skip_if_no("cftime") +@td.skip_if_no("xarray", "0.10.4") +def test_xarray_cftimeindex_nearest(): + # https://github.com/pydata/xarray/issues/3751 + import cftime + import xarray + + times = xarray.cftime_range("0001", periods=2) + result = times.get_loc(cftime.DatetimeGregorian(2000, 1, 1), method="nearest") + expected = 1 + assert result == expected + + def test_oo_optimizable(): # GH 21071 subprocess.check_call([sys.executable, "-OO", "-c", "import pandas"]) diff --git a/requirements-dev.txt b/requirements-dev.txt index 9ee67c56ab8ca..6a2cc7b53615e 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -68,6 +68,7 @@ tables>=3.4.2 s3fs sqlalchemy xarray +cftime pyreadstat tabulate>=0.8.3 git+https://github.com/pandas-dev/pydata-sphinx-theme.git@master