pandas-dev · MichaelTiemannOSC · Jul 2, 2023 · Jul 2, 2023 · Jul 2, 2023 · Jul 2, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -141,6 +141,7 @@ Other enhancements
 - :meth:`MultiIndex.sortlevel` and :meth:`Index.sortlevel` gained a new keyword ``na_position`` (:issue:`51612`)
 - :meth:`arrays.DatetimeArray.map`, :meth:`arrays.TimedeltaArray.map` and :meth:`arrays.PeriodArray.map` can now take a ``na_action`` argument (:issue:`51644`)
 - :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`).
+- :meth:`arrays.BaseMaskedArray.take` handle non-na float as fill value (triggered by ``ufloat`` NaN from ``uncertainties`` package) (see `PR <https://github.com/pandas-dev/pandas/pull/53970>`_)
 - :meth:`pandas.read_html` now supports the ``storage_options`` keyword when used with a URL, allowing users to add headers the outbound HTTP request (:issue:`49944`)
 - Add :meth:`diff()` and :meth:`round()` for :class:`Index` (:issue:`19708`)
 - Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`)
@@ -160,6 +161,7 @@ Other enhancements
 - :meth:`DataFrame.stack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`)
 - :meth:`DataFrame.unstack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`)
 - :meth:`DataFrameGroupby.agg` and :meth:`DataFrameGroupby.transform` now support grouping by multiple keys when the index is not a :class:`MultiIndex` for ``engine="numba"`` (:issue:`53486`)
+- :meth:`GroupBy.first` if series contains only NA values (which might be NaN), return the first NA value, else return np.nan (see `PR <https://github.com/pandas-dev/pandas/pull/53970>`_)
 - :meth:`Series.explode` now supports pyarrow-backed list types (:issue:`53602`)
 - :meth:`Series.str.join` now supports ``ArrowDtype(pa.string())`` (:issue:`53646`)
 - :meth:`SeriesGroupby.agg` and :meth:`DataFrameGroupby.agg` now support passing in multiple functions for ``engine="numba"`` (:issue:`53486`)
@@ -447,6 +449,7 @@ Datetimelike
 - Bug in constructing a :class:`Timestamp` with ``ts_input=pd.NA`` raising ``TypeError`` (:issue:`45481`)
 - Bug in parsing datetime strings with weekday but no day e.g. "2023 Sept Thu" incorrectly raising ``AttributeError`` instead of ``ValueError`` (:issue:`52659`)
 
+
 Timedelta
 ^^^^^^^^^
 - :meth:`TimedeltaIndex.map` with ``na_action="ignore"`` now works as expected (:issue:`51644`)
@@ -487,6 +490,7 @@ Conversion
 - Bug in :meth:`DataFrame.info` raising  ``ValueError`` when ``use_numba`` is set (:issue:`51922`)
 - Bug in :meth:`DataFrame.insert` raising ``TypeError`` if ``loc`` is ``np.int64`` (:issue:`53193`)
 - Bug in :meth:`HDFStore.select` loses precision of large int when stored and retrieved (:issue:`54186`)
+-
 
 Strings
 ^^^^^^^
@@ -497,6 +501,7 @@ Interval
 ^^^^^^^^
 - :meth:`pd.IntervalIndex.get_indexer` and :meth:`pd.IntervalIndex.get_indexer_nonunique` raising if ``target`` is read-only array (:issue:`53703`)
 - Bug in :class:`IntervalDtype` where the object could be kept alive when deleted (:issue:`54184`)
+-
 
 Indexing
 ^^^^^^^^
@@ -612,6 +617,7 @@ ExtensionArray
 - Bug in :meth:`~arrays.ArrowExtensionArray.__iter__` and :meth:`~arrays.ArrowExtensionArray.__getitem__` returning python datetime and timedelta objects for non-nano dtypes (:issue:`53326`)
 - Bug where the :class:`DataFrame` repr would not work when a column would have an :class:`ArrowDtype` with an ``pyarrow.ExtensionDtype`` (:issue:`54063`)
 - Bug where the ``__from_arrow__`` method of masked ExtensionDtypes(e.g. :class:`Float64Dtype`, :class:`BooleanDtype`) would not accept pyarrow arrays of type ``pyarrow.null()`` (:issue:`52223`)
+-
 
 Styler
 ^^^^^^
@@ -640,7 +646,7 @@ Other
 - Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`)
 - Bug in :meth:`period_range` the default behavior when freq was not passed as an argument was incorrect(:issue:`53687`)
 - Fixed incorrect ``__name__`` attribute of ``pandas._libs.json`` (:issue:`52898`)
--
+- Change ``~tests/extension/test_boolean.py`` to use pd.NA instead of np.nan (following similar patterns in ``~tests/extension/test_integer.py`` and ``~tests/extension/test_float.py``), updating :func:`make_data`, :func:`data_missing`, :func:`data_missing_for_sorting`, :func:`data_for_grouping`, :func:`_check_op` (see `PR <https://github.com/pandas-dev/pandas/pull/53970>`_)
 
 .. ***DO NOT USE THIS SECTION***
 

diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -876,6 +876,8 @@ def take(
         # we only fill where the indexer is null
         # not existing missing values
         # TODO(jreback) what if we have a non-na float as a fill value?
+        # NaN with uncertainties is scalar but does not register as `isna`,
+        # so use fact that NaN != NaN
         if allow_fill and notna(fill_value):
             fill_mask = np.asarray(indexer) == -1
             result[fill_mask] = fill_value

@@ -3272,7 +3272,10 @@ def first(x: Series):
                 """Helper function for first item that isn't NA."""
                 arr = x.array[notna(x.array)]
                 if not len(arr):
-                    return np.nan
+                    nan_arr = x.array[isna(x.array)]
+                    if not len(nan_arr):
+                        return np.nan
+                    return nan_arr[0]
                 return arr[0]
 
             if isinstance(obj, DataFrame):

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -590,7 +590,7 @@ def nkeys(self) -> int:
 
     def get_iterator(
         self, data: NDFrameT, axis: AxisInt = 0
-    ) -> Iterator[tuple[Hashable, NDFrameT]]:
+    ) -> Iterator[tuple[Hashable, NDFrameT]]:  # Doesn't work with non-hashable EA types
         """
         Groupby iterator
 

diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
@@ -356,10 +356,6 @@ def test_groupby_extension_no_sort(self):
         """
         super().test_groupby_extension_no_sort()
 
-    @pytest.mark.xfail(reason="GH#39098: Converts agg result to object")
-    def test_groupby_agg_extension(self, data_for_grouping):
-        super().test_groupby_agg_extension(data_for_grouping)
-
 
 class TestArithmeticOps(BaseJSON, base.BaseArithmeticOpsTests):
     def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):

diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py
@@ -28,7 +28,7 @@
 
 
 def make_data():
-    return [True, False] * 4 + [np.nan] + [True, False] * 44 + [np.nan] + [True, False]
+    return [True, False] * 4 + [pd.NA] + [True, False] * 44 + [pd.NA] + [True, False]
 
 
 @pytest.fixture
@@ -48,7 +48,7 @@ def data_for_twos(dtype):
 
 @pytest.fixture
 def data_missing(dtype):
-    return pd.array([np.nan, True], dtype=dtype)
+    return pd.array([pd.NA, True], dtype=dtype)
 
 
 @pytest.fixture
@@ -58,7 +58,7 @@ def data_for_sorting(dtype):
 
 @pytest.fixture
 def data_missing_for_sorting(dtype):
-    return pd.array([True, np.nan, False], dtype=dtype)
+    return pd.array([True, pd.NA, False], dtype=dtype)
 
 
 @pytest.fixture
@@ -76,7 +76,7 @@ def na_value():
 def data_for_grouping(dtype):
     b = True
     a = False
-    na = np.nan
+    na = pd.NA
     return pd.array([b, b, na, na, a, a, b], dtype=dtype)
 
 
@@ -147,7 +147,7 @@ def _check_op(self, obj, op, other, op_name, exc=NotImplementedError):
                 expected = expected.astype("Float64")
             if op_name == "__rpow__":
                 # for rpow, combine does not propagate NaN
-                expected[result.isna()] = np.nan
+                expected[result.isna()] = pd.NA
             self.assert_equal(result, expected)
         else:
             with pytest.raises(exc):