Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add plot.area in Series #670

Merged
merged 1 commit into from
Aug 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 60 additions & 4 deletions databricks/koalas/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@ def _get_standard_kind(kind):


if LooseVersion(pd.__version__) < LooseVersion('0.25'):
from pandas.plotting._core import _all_kinds, BarPlot, BoxPlot, HistPlot, MPLPlot, PiePlot
from pandas.plotting._core import _all_kinds, BarPlot, BoxPlot, HistPlot, MPLPlot, PiePlot, \
AreaPlot
else:
from pandas.plotting._core import PlotAccessor
from pandas.plotting._matplotlib import BarPlot, BoxPlot, HistPlot, PiePlot
from pandas.plotting._matplotlib import BarPlot, BoxPlot, HistPlot, PiePlot, AreaPlot
from pandas.plotting._matplotlib.core import MPLPlot
_all_kinds = PlotAccessor._all_kinds

Expand Down Expand Up @@ -447,7 +448,28 @@ def _make_plot(self):
super(KoalasPiePlot, self)._make_plot()


_klasses = [KoalasHistPlot, KoalasBarPlot, KoalasBoxPlot, KoalasPiePlot]
class KoalasAreaPlot(AreaPlot):
def __init__(self, data, **kwargs):
from databricks.koalas import DataFrame

self.fraction = 1 / (len(data) / 1000) # make sure the records are roughly 1000.
if self.fraction > 1:
self.fraction = 1
sampled = data._kdf._sdf.sample(fraction=float(self.fraction))
data = DataFrame(data._kdf._internal.copy(sdf=sampled)).to_pandas()
super(KoalasAreaPlot, self).__init__(data, **kwargs)

def _make_plot(self):
if self.fraction < 1:
self._get_ax(0).text(
1, 1, 'showing the sampled result by fraction %s' % self.fraction,
size=6, ha='right', va='bottom',
transform=self._get_ax(0).transAxes)

super(KoalasAreaPlot, self)._make_plot()


_klasses = [KoalasHistPlot, KoalasBarPlot, KoalasBoxPlot, KoalasPiePlot, KoalasAreaPlot]
_plot_klass = {getattr(klass, '_kind'): klass for klass in _klasses}


Expand Down Expand Up @@ -696,7 +718,41 @@ def kde(self, bw_method=None, ind=None, **kwds):
density = kde

def area(self, **kwds):
return _unsupported_function(class_name='pd.Series', method_name='area')()
"""
Draw a stacked area plot.

An area plot displays quantitative data visually.
This function wraps the matplotlib area function.

Parameters
----------
x : label or position, optional
Coordinates for the X axis. By default uses the index.
y : label or position, optional
Column to plot. By default uses all columns.
stacked : bool, default True
Area plots are stacked by default. Set to False to create a
unstacked plot.
**kwds : optional
Additional keyword arguments are documented in
:meth:`DataFrame.plot`.

Returns
-------
matplotlib.axes.Axes or numpy.ndarray
Area plot, or array of area plots if subplots is True.

Examples
--------
>>> df = ks.DataFrame({
... 'sales': [3, 2, 3, 9, 10, 6],
... 'signups': [5, 5, 6, 12, 14, 13],
... 'visits': [20, 42, 28, 62, 81, 50],
... }, index=pd.date_range(start='2018/01/01', end='2018/07/01',
... freq='M'))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like there are some bugs in lower pandas versions. I can't use existing test way in test_series_plot.py. It passes in my local but fails in Travis in certain condition (failed from pandas library).

>>> ax = df.sales.plot.area()
"""
return self(kind='area', **kwds)

def pie(self, **kwds):
"""
Expand Down
6 changes: 4 additions & 2 deletions databricks/koalas/tests/test_series_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from io import BytesIO

import matplotlib
matplotlib.use('agg')
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
Expand All @@ -29,6 +28,9 @@
from databricks.koalas.plot import KoalasHistPlotSummary, KoalasBoxPlotSummary


matplotlib.use('agg')


class SeriesPlotTest(ReusedSQLTestCase, TestUtils):

@property
Expand Down Expand Up @@ -201,7 +203,7 @@ def test_box_summary(self):
def test_missing(self):
ks = self.kdf1['a']

unsupported_functions = ['area', 'kde', 'barh', 'line']
unsupported_functions = ['kde', 'barh', 'line']
for name in unsupported_functions:
with self.assertRaisesRegex(PandasNotImplementedError,
"method.*Series.*{}.*not implemented".format(name)):
Expand Down
1 change: 1 addition & 0 deletions docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ specific plotting methods of the form ``Series.plot.<kind>``.
:toctree: api/

Series.plot
Series.plot.area
Series.plot.bar
Series.plot.box
Series.plot.hist
Expand Down