-
Notifications
You must be signed in to change notification settings - Fork 43
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
dd3643d
commit 1cb6c5e
Showing
7 changed files
with
330 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
# Copyright 2023 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
from typing import Sequence | ||
|
||
import matplotlib.pyplot as plt | ||
|
||
import bigframes.constants as constants | ||
import third_party.bigframes_vendored.pandas.plotting._core as vendordt | ||
|
||
|
||
class PlotAccessor: | ||
__doc__ = vendordt.PlotAccessor.__doc__ | ||
|
||
def __init__(self, data) -> None: | ||
self._parent = data | ||
|
||
def hist(self, by: Sequence[str] | None = None, bins: int = 10, **kwargs): | ||
if by is not None: | ||
raise NotImplementedError( | ||
f"Non-none `by` argument is not yet supported. {constants.FEEDBACK_LINK}" | ||
) | ||
if kwargs.pop("backend", None) is not None: | ||
raise NotImplementedError( | ||
f"Only support matplotlib backend for now. {constants.FEEDBACK_LINK}" | ||
) | ||
import bigframes.dataframe as dataframe | ||
|
||
if isinstance(self._parent, dataframe.DataFrame): | ||
raise NotImplementedError( | ||
f"`Dataframe.plot.hist` is not implemented yet. {constants.FEEDBACK_LINK}" | ||
) | ||
|
||
return self._hist_series( | ||
by=by, | ||
bins=bins, | ||
**kwargs, | ||
) | ||
|
||
def _hist_series( | ||
self, | ||
by: Sequence[str] | None = None, | ||
bins: int = 10, | ||
**kwargs, | ||
): | ||
# Only supported some arguments to adorn plots. | ||
ax = kwargs.pop("ax", None) | ||
figsize = kwargs.pop("figsize", None) | ||
legend = kwargs.pop("legend", False) | ||
grid = kwargs.pop("grid", None) | ||
xticks = kwargs.pop("xticks", None) | ||
yticks = kwargs.pop("yticks", None) | ||
|
||
# Calculates the bins' values and weights through BigQuery | ||
import bigframes.pandas as bpd | ||
|
||
series = self._parent.copy() | ||
binned = bpd.cut(series, bins=bins, labels=None) | ||
binned_data = ( | ||
binned.struct.explode() | ||
.value_counts() | ||
.to_pandas() | ||
.sort_index(level="left_exclusive") | ||
) | ||
weights = binned_data.values | ||
left_bins = binned_data.index.get_level_values("left_exclusive") | ||
right_bins = binned_data.index.get_level_values("right_inclusive") | ||
bin_edges = left_bins.union(right_bins, sort=True) | ||
|
||
# This code takes the hist_series function from pandas and tweaks it a bit. | ||
if kwargs.get("layout", None) is not None: | ||
raise ValueError("The 'layout' keyword is not supported when 'by' is None") | ||
|
||
fig = kwargs.pop( | ||
"figure", plt.gcf() if plt.get_fignums() else plt.figure(figsize=figsize) | ||
) | ||
if figsize is not None and tuple(figsize) != tuple(fig.get_size_inches()): | ||
fig.set_size_inches(*figsize, forward=True) | ||
|
||
ax = kwargs.pop("ax", None) | ||
if ax is None: | ||
ax = fig.gca() | ||
elif ax.get_figure() != fig: | ||
raise AssertionError("passed axis not bound to passed figure") | ||
|
||
if legend: | ||
kwargs["label"] = series.name | ||
ax.hist(x=left_bins, bins=bin_edges, weights=weights, **kwargs) | ||
if legend: | ||
ax.legend() | ||
if grid is not None: | ||
ax.grid(grid) | ||
if xticks is not None: | ||
ax.set_xticks(xticks) | ||
if yticks is not None: | ||
ax.set_yticks(yticks) | ||
|
||
return ax |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
# Copyright 2023 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
from typing import Iterable | ||
|
||
from matplotlib.axes import Axes | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
|
||
|
||
def hist_series( | ||
series, | ||
by=None, | ||
ax=None, | ||
grid: bool = True, | ||
xlabelsize: int | None = None, | ||
xrot: float | None = None, | ||
ylabelsize: int | None = None, | ||
yrot: float | None = None, | ||
figsize=None, | ||
bins: int = 10, | ||
legend: bool = False, | ||
**kwargs, | ||
): | ||
import bigframes.pandas as bpd | ||
|
||
# Calculates the bins' values and weights through BigQuery | ||
binned = bpd.cut(series, bins=bins, labels=None) | ||
binned_data = ( | ||
binned.struct.explode() | ||
.value_counts() | ||
.to_pandas() | ||
.sort_index(level="left_exclusive") | ||
) | ||
|
||
weights = binned_data.values | ||
left_bins = binned_data.index.get_level_values("left_exclusive") | ||
right_bins = binned_data.index.get_level_values("right_inclusive") | ||
bin_edges = left_bins.union(right_bins, sort=True) | ||
|
||
# This code takes the hist_series function from pandas and tweaks it a bit. | ||
if kwargs.get("layout", None) is not None: | ||
raise ValueError("The 'layout' keyword is not supported when 'by' is None") | ||
# hack until the plotting interface is a bit more unified | ||
fig = kwargs.pop( | ||
"figure", plt.gcf() if plt.get_fignums() else plt.figure(figsize=figsize) | ||
) | ||
if figsize is not None and tuple(figsize) != tuple(fig.get_size_inches()): | ||
fig.set_size_inches(*figsize, forward=True) | ||
if ax is None: | ||
ax = fig.gca() | ||
elif ax.get_figure() != fig: | ||
raise AssertionError("passed axis not bound to passed figure") | ||
if legend: | ||
kwargs["label"] = series.name | ||
ax.hist(x=left_bins, bins=bin_edges, weights=weights, **kwargs) | ||
if legend: | ||
ax.legend() | ||
ax.grid(grid) | ||
axes = np.array([ax]) | ||
|
||
_set_ticks_props( | ||
axes, | ||
xlabelsize=xlabelsize, | ||
xrot=xrot, | ||
ylabelsize=ylabelsize, | ||
yrot=yrot, | ||
) | ||
|
||
if hasattr(axes, "ndim"): | ||
if axes.ndim == 1 and len(axes) == 1: | ||
return axes[0] | ||
return axes | ||
|
||
|
||
def _set_ticks_props( | ||
axes: Iterable[Axes], | ||
xlabelsize: int | None = None, | ||
xrot=None, | ||
ylabelsize: int | None = None, | ||
yrot=None, | ||
): | ||
for ax in axes: | ||
if xlabelsize is not None: | ||
plt.setp(ax.get_xticklabels(), fontsize=xlabelsize) | ||
if xrot is not None: | ||
plt.setp(ax.get_xticklabels(), rotation=xrot) | ||
if ylabelsize is not None: | ||
plt.setp(ax.get_yticklabels(), fontsize=ylabelsize) | ||
if yrot is not None: | ||
plt.setp(ax.get_yticklabels(), rotation=yrot) | ||
return axes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# Copyright 2023 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import pandas._testing as tm | ||
|
||
|
||
def test_series_hist_bins(scalars_dfs): | ||
scalars_df, scalars_pandas_df = scalars_dfs | ||
ax = scalars_df["int64_col"].plot.hist(bins=5) | ||
pd_ax = scalars_pandas_df["int64_col"].hist(bins=5) | ||
|
||
# Check hist has same height compared to the pandas one. | ||
assert len(ax.patches) == len(pd_ax.patches) | ||
for i in range(len(ax.patches)): | ||
assert ax.patches[i].xy == pd_ax.patches[i].xy | ||
assert ax.patches[i]._height == pd_ax.patches[i]._height | ||
|
||
|
||
def test_series_hist_ticks_props(scalars_dfs): | ||
scalars_df, scalars_pandas_df = scalars_dfs | ||
|
||
xticks = [20, 18] | ||
yticks = [30, 40] | ||
|
||
ax = scalars_df["float64_col"].plot.hist(xticks=xticks, yticks=yticks) | ||
pd_ax = scalars_pandas_df["float64_col"].plot.hist(xticks=xticks, yticks=yticks) | ||
xlabels = ax.get_xticklabels() | ||
pd_xlables = pd_ax.get_xticklabels() | ||
assert len(xlabels) == len(pd_xlables) | ||
for i in range(len(pd_xlables)): | ||
tm.assert_almost_equal(xlabels[i].get_fontsize(), pd_xlables[i].get_fontsize()) | ||
tm.assert_almost_equal(xlabels[i].get_rotation(), pd_xlables[i].get_rotation()) | ||
|
||
ylabels = ax.get_yticklabels() | ||
pd_ylables = pd_ax.get_yticklabels() | ||
assert len(xlabels) == len(pd_xlables) | ||
for i in range(len(pd_xlables)): | ||
tm.assert_almost_equal(ylabels[i].get_fontsize(), pd_ylables[i].get_fontsize()) | ||
tm.assert_almost_equal(ylabels[i].get_rotation(), pd_ylables[i].get_rotation()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
from typing import Sequence | ||
|
||
from bigframes import constants | ||
|
||
|
||
class PlotAccessor: | ||
def hist(self, by: Sequence[str] | None = None, bins: int = 10, **kwargs): | ||
""" | ||
Draw histogram of the input series using matplotlib. | ||
Parameters | ||
---------- | ||
by : str or sequence, optional | ||
If passed, then used to form histograms for separate groups. | ||
Currently, it is not supported yet. | ||
bins : int, default 10 | ||
Number of histogram bins to be used. | ||
ax : matplotlib axes object, default None | ||
An axes of the current figure. | ||
grid : bool, default None (matlab style default) | ||
Axis grid lines. | ||
xticks : sequence | ||
Values to use for the xticks. | ||
yticks : sequence | ||
Values to use for the yticks. | ||
figsize : a tuple (width, height) in inches | ||
Size of a figure object. | ||
backend : str, default None | ||
Backend to use instead of the backend specified in the option | ||
``plotting.backend``. Currently, only `matplotlib` is not supported yet. | ||
legend : bool, default False | ||
Place legend on axis subplots. | ||
**kwargs | ||
Options to pass to matplotlib plotting method. | ||
Returns | ||
------- | ||
class:`matplotlib.Axes` | ||
A histogram plot. | ||
Examples | ||
-------- | ||
For Series: | ||
.. plot:: | ||
:context: close-figs | ||
>>> import bigframes.pandas as bpd | ||
>>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] | ||
>>> ser = bpd.Series([1, 2, 2, 4, 6, 6], index=lst) | ||
>>> hist = ser.plot.hist() | ||
""" | ||
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) |