diff --git a/doc/whatsnew/index.rst b/doc/whatsnew/index.rst index 5926f1b9e6..406f902cdf 100644 --- a/doc/whatsnew/index.rst +++ b/doc/whatsnew/index.rst @@ -8,6 +8,7 @@ v0.13 .. toctree:: :maxdepth: 2 + v0.13.2 v0.13.1 v0.13.0 diff --git a/doc/whatsnew/v0.13.2.rst b/doc/whatsnew/v0.13.2.rst new file mode 100644 index 0000000000..8cbdb0959a --- /dev/null +++ b/doc/whatsnew/v0.13.2.rst @@ -0,0 +1,4 @@ +v0.13.2 (January 2024) +---------------------- + +This is a minor release containing only internal changes that adapt to upcoming deprecations in pandas. All users are encouraged to update. diff --git a/pyproject.toml b/pyproject.toml index 0f8e7cfd01..0a4e497d0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,4 +66,6 @@ exclude = ["doc/_static/*.svg"] [tool.pytest.ini_options] filterwarnings = [ "ignore:The --rsyncdir command line argument and rsyncdirs config variable are deprecated.:DeprecationWarning", + "ignore:\\s*Pyarrow will become a required dependency of pandas:DeprecationWarning", + "ignore:datetime.datetime.utcfromtimestamp\\(\\) is deprecated:DeprecationWarning", ] diff --git a/seaborn/_base.py b/seaborn/_base.py index e96954c07b..0b43523193 100644 --- a/seaborn/_base.py +++ b/seaborn/_base.py @@ -942,9 +942,9 @@ def iter_data( for key in iter_keys: - # Pandas fails with singleton tuple inputs - pd_key = key[0] if len(key) == 1 else key - + pd_key = ( + key[0] if len(key) == 1 and _version_predates(pd, "2.2.0") else key + ) try: data_subset = grouped_data.get_group(pd_key) except KeyError: diff --git a/seaborn/_compat.py b/seaborn/_compat.py index 5427b7c843..bd2f0c12d3 100644 --- a/seaborn/_compat.py +++ b/seaborn/_compat.py @@ -2,6 +2,7 @@ from typing import Literal import numpy as np +import pandas as pd import matplotlib as mpl from matplotlib.figure import Figure from seaborn.utils import _version_predates @@ -114,3 +115,9 @@ def get_legend_handles(legend): return legend.legendHandles else: return legend.legend_handles + + +def groupby_apply_include_groups(val): + if _version_predates(pd, "2.2.0"): + return {} + return {"include_groups": val} diff --git a/seaborn/_core/plot.py b/seaborn/_core/plot.py index c432056baf..14348e357f 100644 --- a/seaborn/_core/plot.py +++ b/seaborn/_core/plot.py @@ -41,6 +41,7 @@ from seaborn._core.exceptions import PlotSpecError from seaborn._core.rules import categorical_order from seaborn._compat import get_layout_engine, set_layout_engine +from seaborn.utils import _version_predates from seaborn.rcmod import axes_style, plotting_context from seaborn.palettes import color_palette @@ -1637,9 +1638,10 @@ def split_generator(keep_na=False) -> Generator: for key in itertools.product(*grouping_keys): - # Pandas fails with singleton tuple inputs - pd_key = key[0] if len(key) == 1 else key - + pd_key = ( + key[0] if len(key) == 1 and _version_predates(pd, "2.2.0") + else key + ) try: df_subset = grouped_df.get_group(pd_key) except KeyError: diff --git a/seaborn/categorical.py b/seaborn/categorical.py index 99a813b2a7..a43c085ba2 100644 --- a/seaborn/categorical.py +++ b/seaborn/categorical.py @@ -28,6 +28,7 @@ _scatter_legend_artist, _version_predates, ) +from seaborn._compat import groupby_apply_include_groups from seaborn._statistics import ( EstimateAggregator, LetterValues, @@ -634,10 +635,10 @@ def get_props(element, artist=mpl.lines.Line2D): ax = self._get_axes(sub_vars) grouped = sub_data.groupby(self.orient)[value_var] + positions = sorted(sub_data[self.orient].unique().astype(float)) value_data = [x.to_numpy() for _, x in grouped] stats = pd.DataFrame(mpl.cbook.boxplot_stats(value_data, whis=whis, bootstrap=bootstrap)) - positions = grouped.grouper.result_index.to_numpy(dtype=float) orig_width = width * self._native_width data = pd.DataFrame({self.orient: positions, "width": orig_width}) @@ -1207,7 +1208,7 @@ def plot_points( agg_data = sub_data if sub_data.empty else ( sub_data .groupby(self.orient) - .apply(aggregator, agg_var) + .apply(aggregator, agg_var, **groupby_apply_include_groups(False)) .reindex(pd.Index(positions, name=self.orient)) .reset_index() ) @@ -1278,7 +1279,7 @@ def plot_bars( agg_data = sub_data if sub_data.empty else ( sub_data .groupby(self.orient) - .apply(aggregator, agg_var) + .apply(aggregator, agg_var, **groupby_apply_include_groups(False)) .reset_index() ) @@ -2281,7 +2282,7 @@ def swarmplot( {order_vars} dodge : bool When a `hue` variable is assigned, setting this to `True` will - separate the swaarms for different hue levels along the categorical + separate the swarms for different hue levels along the categorical axis and narrow the amount of space allotedto each strip. Otherwise, the points for each level will be plotted in the same swarm. {orient} diff --git a/seaborn/relational.py b/seaborn/relational.py index bd5ecfdfd1..ff0701c793 100644 --- a/seaborn/relational.py +++ b/seaborn/relational.py @@ -17,6 +17,7 @@ _get_transform_functions, _scatter_legend_artist, ) +from ._compat import groupby_apply_include_groups from ._statistics import EstimateAggregator, WeightedAggregator from .axisgrid import FacetGrid, _facet_docs from ._docstrings import DocstringComponents, _core_docs @@ -290,7 +291,11 @@ def plot(self, ax, kws): grouped = sub_data.groupby(orient, sort=self.sort) # Could pass as_index=False instead of reset_index, # but that fails on a corner case with older pandas. - sub_data = grouped.apply(agg, other).reset_index() + sub_data = ( + grouped + .apply(agg, other, **groupby_apply_include_groups(False)) + .reset_index() + ) else: sub_data[f"{other}min"] = np.nan sub_data[f"{other}max"] = np.nan diff --git a/tests/_stats/test_density.py b/tests/_stats/test_density.py index d02a144244..2c0c48adff 100644 --- a/tests/_stats/test_density.py +++ b/tests/_stats/test_density.py @@ -6,6 +6,7 @@ from seaborn._core.groupby import GroupBy from seaborn._stats.density import KDE, _no_scipy +from seaborn._compat import groupby_apply_include_groups class TestKDE: @@ -93,7 +94,10 @@ def test_common_norm(self, df, common_norm): areas = ( res.groupby("alpha") - .apply(lambda x: self.integrate(x["density"], x[ori])) + .apply( + lambda x: self.integrate(x["density"], x[ori]), + **groupby_apply_include_groups(False), + ) ) if common_norm: @@ -111,11 +115,18 @@ def test_common_norm_variables(self, df): def integrate_by_color_and_sum(x): return ( x.groupby("color") - .apply(lambda y: self.integrate(y["density"], y[ori])) + .apply( + lambda y: self.integrate(y["density"], y[ori]), + **groupby_apply_include_groups(False) + ) .sum() ) - areas = res.groupby("alpha").apply(integrate_by_color_and_sum) + areas = ( + res + .groupby("alpha") + .apply(integrate_by_color_and_sum, **groupby_apply_include_groups(False)) + ) assert_array_almost_equal(areas, [1, 1], decimal=3) @pytest.mark.parametrize("param", ["norm", "grid"]) diff --git a/tests/test_categorical.py b/tests/test_categorical.py index eaca2e78de..3df7824787 100644 --- a/tests/test_categorical.py +++ b/tests/test_categorical.py @@ -2078,7 +2078,7 @@ def test_xy_native_scale_log_transform(self): def test_datetime_native_scale_axis(self): - x = pd.date_range("2010-01-01", periods=20, freq="m") + x = pd.date_range("2010-01-01", periods=20, freq="MS") y = np.arange(20) ax = barplot(x=x, y=y, native_scale=True) assert "Date" in ax.xaxis.get_major_locator().__class__.__name__