Skip to content

Commit

Permalink
feat: (Series|DataFrame).plot
Browse files Browse the repository at this point in the history
  • Loading branch information
chelsea-lin committed Mar 13, 2024
1 parent 276f228 commit 5ff0641
Show file tree
Hide file tree
Showing 3 changed files with 214 additions and 29 deletions.
57 changes: 28 additions & 29 deletions bigframes/operations/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,31 +23,45 @@
class PlotAccessor:
__doc__ = vendordt.PlotAccessor.__doc__

_common_kinds = ("line", "area", "hist")
_dataframe_kinds = ("scatter",)
_all_kinds = _common_kinds + _dataframe_kinds

def __call__(self, **kwargs):
import bigframes.series as series

if kwargs.pop("backend", None) is not None:
raise NotImplementedError(
f"Only support matplotlib backend for now. {constants.FEEDBACK_LINK}"
)

kind = kwargs.pop("kind", "line")
if kind not in self._all_kinds:
raise NotImplementedError(
f"{kind} is not a valid plot kind supported for now. {constants.FEEDBACK_LINK}"
)

data = self._parent.copy()
if kind in self._dataframe_kinds and isinstance(data, series.Series):
raise ValueError(f"plot kind {kind} can only be used for data frames")

return bfplt.plot(data, kind=kind, **kwargs)

def __init__(self, data) -> None:
self._parent = data

def hist(
self, by: typing.Optional[typing.Sequence[str]] = None, bins: int = 10, **kwargs
):
if kwargs.pop("backend", None) is not None:
raise NotImplementedError(
f"Only support matplotlib backend for now. {constants.FEEDBACK_LINK}"
)
return bfplt.plot(self._parent.copy(), kind="hist", by=by, bins=bins, **kwargs)
return self(kind="hist", by=by, bins=bins, **kwargs)

def line(
self,
x: typing.Optional[typing.Hashable] = None,
y: typing.Optional[typing.Hashable] = None,
**kwargs,
):
return bfplt.plot(
self._parent,
kind="line",
x=x,
y=y,
**kwargs,
)
return self(kind="line", x=x, y=y, **kwargs)

def area(
self,
Expand All @@ -56,14 +70,7 @@ def area(
stacked: bool = True,
**kwargs,
):
return bfplt.plot(
self._parent.copy(),
kind="area",
x=x,
y=y,
stacked=stacked,
**kwargs,
)
return self(kind="area", x=x, y=y, stacked=stacked, **kwargs)

def scatter(
self,
Expand All @@ -73,12 +80,4 @@ def scatter(
c: typing.Union[typing.Hashable, typing.Sequence[typing.Hashable]] = None,
**kwargs,
):
return bfplt.plot(
self._parent.copy(),
kind="scatter",
x=x,
y=y,
s=s,
c=c,
**kwargs,
)
return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs)
28 changes: 28 additions & 0 deletions tests/system/small/operations/test_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,31 @@ def test_sampling_plot_args_random_state():
msg = "numpy array are different"
with pytest.raises(AssertionError, match=msg):
tm.assert_almost_equal(ax_0.lines[0].get_data()[1], ax_2.lines[0].get_data()[1])


@pytest.mark.parametrize(
("kind", "col_names", "kwargs"),
[
pytest.param("hist", ["int64_col", "int64_too"], {}),
pytest.param("line", ["int64_col", "int64_too"], {}),
pytest.param("area", ["int64_col", "int64_too"], {"stacked": False}),
pytest.param(
"scatter", ["int64_col", "int64_too"], {"x": "int64_col", "y": "int64_too"}
),
pytest.param(
"scatter",
["int64_col"],
{},
marks=pytest.mark.xfail(raises=ValueError),
),
pytest.param(
"uknown",
["int64_col", "int64_too"],
{},
marks=pytest.mark.xfail(raises=NotImplementedError),
),
],
)
def test_plot_call(scalars_dfs, kind, col_names, kwargs):
scalars_df, _ = scalars_dfs
scalars_df[col_names].plot(kind=kind, **kwargs)
158 changes: 158 additions & 0 deletions third_party/bigframes_vendored/pandas/plotting/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,166 @@
class PlotAccessor:
"""
Make plots of Series or DataFrame with the `matplotlib` backend.
Parameters
----------
data : Series or DataFrame
The object for which the method is called.
kind : str
The kind of plot to produce:
- 'line' : line plot (default)
- 'hist' : histogram
- 'area' : area plot
- 'scatter' : scatter plot (DataFrame only)
ax : matplotlib axes object, default None
An axes of the current figure.
subplots : bool or sequence of iterables, default False
Whether to group columns into subplots:
- ``False`` : No subplots will be used
- ``True`` : Make separate subplots for each column.
- sequence of iterables of column labels: Create a subplot for each
group of columns. For example `[('a', 'c'), ('b', 'd')]` will
create 2 subplots: one with columns 'a' and 'c', and one
with columns 'b' and 'd'. Remaining columns that aren't specified
will be plotted in additional subplots (one per column).
.. versionadded:: 1.5.0
sharex : bool, default True if ax is None else False
In case ``subplots=True``, share x axis and set some x axis labels
to invisible; defaults to True if ax is None otherwise False if
an ax is passed in; Be aware, that passing in both an ax and
``sharex=True`` will alter all x axis labels for all axis in a figure.
sharey : bool, default False
In case ``subplots=True``, share y axis and set some y axis labels to invisible.
layout : tuple, optional
(rows, columns) for the layout of subplots.
figsize : a tuple (width, height) in inches
Size of a figure object.
use_index : bool, default True
Use index as ticks for x axis.
title : str or list
Title to use for the plot. If a string is passed, print the string
at the top of the figure. If a list is passed and `subplots` is
True, print each item in the list above the corresponding subplot.
grid : bool, default None (matlab style default)
Axis grid lines.
legend : bool or {'reverse'}
Place legend on axis subplots.
style : list or dict
The matplotlib line style per column.
logx : bool or 'sym', default False
Use log scaling or symlog scaling on x axis.
logy : bool or 'sym' default False
Use log scaling or symlog scaling on y axis.
loglog : bool or 'sym', default False
Use log scaling or symlog scaling on both x and y axes.
xticks : sequence
Values to use for the xticks.
yticks : sequence
Values to use for the yticks.
xlim : 2-tuple/list
Set the x limits of the current axes.
ylim : 2-tuple/list
Set the y limits of the current axes.
xlabel : label, optional
Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the
x-column name for planar plots.
.. versionchanged:: 1.2.0
Now applicable to planar plots (`scatter`, `hexbin`).
.. versionchanged:: 2.0.0
Now applicable to histograms.
ylabel : label, optional
Name to use for the ylabel on y-axis. Default will show no ylabel, or the
y-column name for planar plots.
.. versionchanged:: 1.2.0
Now applicable to planar plots (`scatter`, `hexbin`).
.. versionchanged:: 2.0.0
Now applicable to histograms.
rot : float, default None
Rotation for ticks (xticks for vertical, yticks for horizontal
plots).
fontsize : float, default None
Font size for xticks and yticks.
colormap : str or matplotlib colormap object, default None
Colormap to select colors from. If string, load colormap with that
name from matplotlib.
colorbar : bool, optional
If True, plot colorbar (only relevant for 'scatter' and 'hexbin'
plots).
position : float
Specify relative alignments for bar plot layout.
From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5
(center).
table : bool, Series or DataFrame, default False
If True, draw a table using the data in the DataFrame and the data
will be transposed to meet matplotlib's default layout.
If a Series or DataFrame is passed, use passed data to draw a
table.
yerr : DataFrame, Series, array-like, dict and str
See :ref:`Plotting with Error Bars <visualization.errorbars>` for
detail.
xerr : DataFrame, Series, array-like, dict and str
Equivalent to yerr.
stacked : bool, default False in line and bar plots, and True in area plot
If True, create stacked plot.
secondary_y : bool or sequence, default False
Whether to plot on the secondary y-axis if a list/tuple, which
columns to plot on secondary y-axis.
mark_right : bool, default True
When using a secondary_y axis, automatically mark the column
labels with "(right)" in the legend.
include_bool : bool, default is False
If True, boolean values can be plotted.
**kwargs
Options to pass to matplotlib plotting method.
Returns
-------
:class:`matplotlib.axes.Axes` or numpy.ndarray of them
Notes
-----
- See matplotlib documentation online for more on this subject
Examples
--------
For Series:
.. plot::
:context: close-figs
>>> import bigframes.pandas as bpd
>>> ser = bpd.Series([1, 2, 3, 3])
>>> plot = ser.plot(kind='hist', title="My plot")
For DataFrame:
.. plot::
:context: close-figs
>>> df = bpd.DataFrame({'length': [1.5, 0.5, 1.2, 0.9, 3],
... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]},
... index=['pig', 'rabbit', 'duck', 'chicken', 'horse'])
>>> plot = df.plot(title="DataFrame Plot")
"""


def hist(
self, by: typing.Optional[typing.Sequence[str]] = None, bins: int = 10, **kwargs
):
Expand Down

0 comments on commit 5ff0641

Please sign in to comment.