Skip to content

Commit

Permalink
Add plot.barh in Series (#673)
Browse files Browse the repository at this point in the history
This PR add series.plot.barh in Series.

Can be tested as below:

```python
import databricks.koalas as ks

kdf = ks.range(10)
kdf.to_pandas()['id'].plot.barh(colormap='Paired').figure.savefig("image1.png")
kdf['id'].plot.barh(colormap='Paired').figure.savefig("image2.png")
```

![image1](https://user-images.githubusercontent.com/6477701/63411569-c68b4100-c430-11e9-9d88-9f1113663def.png)

In case of this plot, we sample and match the row numbers around 1000.

```python
import databricks.koalas as ks

ks.range(1001)['id'].plot.barh(colormap='Paired').figure.savefig("image3.png")
```

![image3](https://user-images.githubusercontent.com/6477701/63411580-cc812200-c430-11e9-945b-0f74305c2507.png)


Partially addresses #665
  • Loading branch information
HyukjinKwon authored Aug 22, 2019
1 parent d9147dc commit 0969855
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 5 deletions.
70 changes: 66 additions & 4 deletions databricks/koalas/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,11 @@ def _get_standard_kind(kind):

if LooseVersion(pd.__version__) < LooseVersion('0.25'):
from pandas.plotting._core import _all_kinds, BarPlot, BoxPlot, HistPlot, MPLPlot, PiePlot, \
AreaPlot, LinePlot
AreaPlot, LinePlot, BarhPlot
else:
from pandas.plotting._core import PlotAccessor
from pandas.plotting._matplotlib import BarPlot, BoxPlot, HistPlot, PiePlot, AreaPlot, LinePlot
from pandas.plotting._matplotlib import BarPlot, BoxPlot, HistPlot, PiePlot, AreaPlot, \
LinePlot, BarhPlot
from pandas.plotting._matplotlib.core import MPLPlot
_all_kinds = PlotAccessor._all_kinds

Expand Down Expand Up @@ -490,8 +491,38 @@ def _make_plot(self):
super(KoalasLinePlot, self)._make_plot()


class KoalasBarhPlot(BarhPlot):
max_rows = 1000

def __init__(self, data, **kwargs):
# Simply use the first 1k elements and make it into a pandas dataframe
# For categorical variables, it is likely called from df.x.value_counts().plot.barh()
data = data.head(KoalasBarhPlot.max_rows + 1).to_pandas().to_frame()
self.partial = False
if len(data) > KoalasBarhPlot.max_rows:
self.partial = True
data = data.iloc[:KoalasBarhPlot.max_rows]
super(KoalasBarhPlot, self).__init__(data, **kwargs)

def _make_plot(self):
if self.partial:
self._get_ax(0).text(
1, 1, 'showing top 1,000 elements only', size=6, ha='right', va='bottom',
transform=self._get_ax(0).transAxes)
self.data = self.data.iloc[:KoalasBarhPlot.max_rows]

super(KoalasBarhPlot, self)._make_plot()


_klasses = [
KoalasHistPlot, KoalasBarPlot, KoalasBoxPlot, KoalasPiePlot, KoalasAreaPlot, KoalasLinePlot]
KoalasHistPlot,
KoalasBarPlot,
KoalasBoxPlot,
KoalasPiePlot,
KoalasAreaPlot,
KoalasLinePlot,
KoalasBarhPlot,
]
_plot_klass = {getattr(klass, '_kind'): klass for klass in _klasses}


Expand Down Expand Up @@ -715,7 +746,38 @@ def bar(self, **kwds):
return self(kind='bar', **kwds)

def barh(self, **kwds):
return _unsupported_function(class_name='pd.Series', method_name='barh')()
"""
Make a horizontal bar plot.
A horizontal bar plot is a plot that presents quantitative data with
rectangular bars with lengths proportional to the values that they
represent. A bar plot shows comparisons among discrete categories. One
axis of the plot shows the specific categories being compared, and the
other axis represents a measured value.
Parameters
----------
x : label or position, default DataFrame.index
Column to be used for categories.
y : label or position, default All numeric columns in dataframe
Columns to be plotted from the DataFrame.
**kwds
Keyword arguments to pass on to :meth:`databricks.koalas.DataFrame.plot`.
Returns
-------
:class:`matplotlib.axes.Axes` or numpy.ndarray of them
See Also
--------
matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib.
Examples
--------
>>> df = ks.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
>>> ax = df.val.plot.barh()
"""
return self(kind='barh', **kwds)

def box(self, **kwds):
"""
Expand Down
23 changes: 22 additions & 1 deletion databricks/koalas/tests/test_series_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,27 @@ def test_line_plot(self):
ax2 = kdf['a'].plot("line", colormap='Paired')
self.compare_plots(ax1, ax2)

def test_barh_plot(self):
pdf = self.pdf1
kdf = self.kdf1

ax1 = pdf['a'].plot("barh", colormap='Paired')
ax2 = kdf['a'].plot("barh", colormap='Paired')
self.compare_plots(ax1, ax2)

def test_barh_plot_limited(self):
pdf = self.pdf2
kdf = self.kdf2

_, ax1 = plt.subplots(1, 1)
ax1 = pdf['id'][:1000].plot.barh(colormap='Paired')
ax1.text(1, 1, 'showing top 1,000 elements only', size=6, ha='right', va='bottom',
transform=ax1.transAxes)
_, ax2 = plt.subplots(1, 1)
ax2 = kdf['id'].plot.barh(colormap='Paired')

self.compare_plots(ax1, ax2)

def test_hist_plot(self):
pdf = self.pdf1
kdf = self.kdf1
Expand Down Expand Up @@ -210,7 +231,7 @@ def test_box_summary(self):
def test_missing(self):
ks = self.kdf1['a']

unsupported_functions = ['kde', 'barh']
unsupported_functions = ['kde']
for name in unsupported_functions:
with self.assertRaisesRegex(PandasNotImplementedError,
"method.*Series.*{}.*not implemented".format(name)):
Expand Down
1 change: 1 addition & 0 deletions docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@ specific plotting methods of the form ``Series.plot.<kind>``.
Series.plot
Series.plot.area
Series.plot.bar
Series.plot.barh
Series.plot.box
Series.plot.hist
Series.plot.line
Expand Down

0 comments on commit 0969855

Please sign in to comment.