diff --git a/bigframes/operations/plot.py b/bigframes/operations/plot.py new file mode 100644 index 0000000000..7416b7bc7c --- /dev/null +++ b/bigframes/operations/plot.py @@ -0,0 +1,108 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Sequence + +import matplotlib.pyplot as plt + +import bigframes.constants as constants +import third_party.bigframes_vendored.pandas.plotting._core as vendordt + + +class PlotAccessor: + __doc__ = vendordt.PlotAccessor.__doc__ + + def __init__(self, data) -> None: + self._parent = data + + def hist(self, by: Sequence[str] | None = None, bins: int = 10, **kwargs): + if by is not None: + raise NotImplementedError( + f"Non-none `by` argument is not yet supported. {constants.FEEDBACK_LINK}" + ) + if kwargs.pop("backend", None) is not None: + raise NotImplementedError( + f"Only support matplotlib backend for now. {constants.FEEDBACK_LINK}" + ) + import bigframes.dataframe as dataframe + + if isinstance(self._parent, dataframe.DataFrame): + raise NotImplementedError( + f"`Dataframe.plot.hist` is not implemented yet. {constants.FEEDBACK_LINK}" + ) + + return self._hist_series( + by=by, + bins=bins, + **kwargs, + ) + + def _hist_series( + self, + by: Sequence[str] | None = None, + bins: int = 10, + **kwargs, + ): + # Only supported some arguments to adorn plots. + ax = kwargs.pop("ax", None) + figsize = kwargs.pop("figsize", None) + legend = kwargs.pop("legend", False) + grid = kwargs.pop("grid", None) + xticks = kwargs.pop("xticks", None) + yticks = kwargs.pop("yticks", None) + + # Calculates the bins' values and weights through BigQuery + import bigframes.pandas as bpd + + series = self._parent.copy() + binned = bpd.cut(series, bins=bins, labels=None) + binned_data = ( + binned.struct.explode() + .value_counts() + .to_pandas() + .sort_index(level="left_exclusive") + ) + weights = binned_data.values + left_bins = binned_data.index.get_level_values("left_exclusive") + right_bins = binned_data.index.get_level_values("right_inclusive") + bin_edges = left_bins.union(right_bins, sort=True) + + # This code takes the hist_series function from pandas and tweaks it a bit. + if kwargs.get("layout", None) is not None: + raise ValueError("The 'layout' keyword is not supported when 'by' is None") + + fig = kwargs.pop( + "figure", plt.gcf() if plt.get_fignums() else plt.figure(figsize=figsize) + ) + if figsize is not None and tuple(figsize) != tuple(fig.get_size_inches()): + fig.set_size_inches(*figsize, forward=True) + + ax = kwargs.pop("ax", None) + if ax is None: + ax = fig.gca() + elif ax.get_figure() != fig: + raise AssertionError("passed axis not bound to passed figure") + + if legend: + kwargs["label"] = series.name + ax.hist(x=left_bins, bins=bin_edges, weights=weights, **kwargs) + if legend: + ax.legend() + if grid is not None: + ax.grid(grid) + if xticks is not None: + ax.set_xticks(xticks) + if yticks is not None: + ax.set_yticks(yticks) + + return ax diff --git a/bigframes/series.py b/bigframes/series.py index 4aef959a76..6c51fcf27e 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -50,6 +50,7 @@ import bigframes.operations.aggregations as agg_ops import bigframes.operations.base import bigframes.operations.datetimes as dt +import bigframes.operations.plot as plot import bigframes.operations.strings as strings import bigframes.operations.structs as structs import third_party.bigframes_vendored.pandas.core.series as vendored_pandas_series @@ -1551,6 +1552,10 @@ def __array_ufunc__( def str(self) -> strings.StringMethods: return strings.StringMethods(self._block) + @property + def plot(self): + return plot.PlotAccessor(self) + def _slice( self, start: typing.Optional[int] = None, diff --git a/setup.py b/setup.py index 516d5b8a19..027c1b76af 100644 --- a/setup.py +++ b/setup.py @@ -58,6 +58,7 @@ "tabulate >= 0.9", "ipywidgets >=7.7.1", "humanize >= 4.6.0", + "matplotlib >= 3.7.1", ] extras = { # Optional test dependencies packages. If they're missed, may skip some tests. diff --git a/tests/system/small/operations/test_plot.py b/tests/system/small/operations/test_plot.py new file mode 100644 index 0000000000..b8b3e5c720 --- /dev/null +++ b/tests/system/small/operations/test_plot.py @@ -0,0 +1,50 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas._testing as tm + + +def test_series_hist_bins(scalars_dfs): + scalars_df, scalars_pandas_df = scalars_dfs + ax = scalars_df["int64_col"].plot.hist(bins=5) + pd_ax = scalars_pandas_df["int64_col"].hist(bins=5) + + # Check hist has same height compared to the pandas one. + assert len(ax.patches) == len(pd_ax.patches) + for i in range(len(ax.patches)): + assert ax.patches[i].xy == pd_ax.patches[i].xy + assert ax.patches[i]._height == pd_ax.patches[i]._height + + +def test_series_hist_ticks_props(scalars_dfs): + scalars_df, scalars_pandas_df = scalars_dfs + + xticks = [20, 18] + yticks = [30, 40] + + ax = scalars_df["float64_col"].plot.hist(xticks=xticks, yticks=yticks) + pd_ax = scalars_pandas_df["float64_col"].plot.hist(xticks=xticks, yticks=yticks) + xlabels = ax.get_xticklabels() + pd_xlables = pd_ax.get_xticklabels() + assert len(xlabels) == len(pd_xlables) + for i in range(len(pd_xlables)): + tm.assert_almost_equal(xlabels[i].get_fontsize(), pd_xlables[i].get_fontsize()) + tm.assert_almost_equal(xlabels[i].get_rotation(), pd_xlables[i].get_rotation()) + + ylabels = ax.get_yticklabels() + pd_ylables = pd_ax.get_yticklabels() + assert len(xlabels) == len(pd_xlables) + for i in range(len(pd_xlables)): + tm.assert_almost_equal(ylabels[i].get_fontsize(), pd_ylables[i].get_fontsize()) + tm.assert_almost_equal(ylabels[i].get_rotation(), pd_ylables[i].get_rotation()) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 6c01a6dd0c..16730353ef 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -3111,6 +3111,17 @@ def str(self): """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + @property + def plot(self): + """ + Make plots of Series. + + Returns: + bigframes.operations.plot.PlotAccessor: + An accessor making plots. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def isin(self, values): """ Whether elements in Series are contained in values. diff --git a/third_party/bigframes_vendored/pandas/plotting/_core.py b/third_party/bigframes_vendored/pandas/plotting/_core.py new file mode 100644 index 0000000000..3136667eff --- /dev/null +++ b/third_party/bigframes_vendored/pandas/plotting/_core.py @@ -0,0 +1,53 @@ +from typing import Sequence + +from bigframes import constants + + +class PlotAccessor: + def hist(self, by: Sequence[str] | None = None, bins: int = 10, **kwargs): + """ + Draw histogram of the input series using matplotlib. + + Parameters + ---------- + by : str or sequence, optional + If passed, then used to form histograms for separate groups. + Currently, it is not supported yet. + bins : int, default 10 + Number of histogram bins to be used. + ax : matplotlib axes object, default None + An axes of the current figure. + grid : bool, default None (matlab style default) + Axis grid lines. + xticks : sequence + Values to use for the xticks. + yticks : sequence + Values to use for the yticks. + figsize : a tuple (width, height) in inches + Size of a figure object. + backend : str, default None + Backend to use instead of the backend specified in the option + ``plotting.backend``. Currently, only `matplotlib` is not supported yet. + legend : bool, default False + Place legend on axis subplots. + **kwargs + Options to pass to matplotlib plotting method. + + Returns + ------- + class:`matplotlib.Axes` + A histogram plot. + + Examples + -------- + For Series: + + .. plot:: + :context: close-figs + + >>> import bigframes.pandas as bpd + >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> ser = bpd.Series([1, 2, 2, 4, 6, 6], index=lst) + >>> hist = ser.plot.hist() + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)