Skip to content

Commit

Permalink
Merge pull request #7351 from sinhrks/boxcln
Browse files Browse the repository at this point in the history
CLN: Simplify boxplot and tests
  • Loading branch information
jreback committed Jun 17, 2014
2 parents f1c5386 + de69d62 commit 7a2db77
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 139 deletions.
145 changes: 70 additions & 75 deletions pandas/tests/test_graphics.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,54 @@ def _check_has_errorbars(self, axes, xerr=0, yerr=0):
self.assertEqual(xerr, xerr_count)
self.assertEqual(yerr, yerr_count)

def _check_box_return_type(self, returned, return_type, expected_keys=None):
"""
Check box returned type is correct
Parameters
----------
returned : object to be tested, returned from boxplot
return_type : str
return_type passed to boxplot
expected_keys : list-like, optional
group labels in subplot case. If not passed,
the function checks assuming boxplot uses single ax
"""
from matplotlib.axes import Axes
types = {'dict': dict, 'axes': Axes, 'both': tuple}
if expected_keys is None:
# should be fixed when the returning default is changed
if return_type is None:
return_type = 'dict'

self.assertTrue(isinstance(returned, types[return_type]))
if return_type == 'both':
self.assertIsInstance(returned.ax, Axes)
self.assertIsInstance(returned.lines, dict)
else:
# should be fixed when the returning default is changed
if return_type is None:
for r in self._flatten_visible(returned):
self.assertIsInstance(r, Axes)
return

self.assertTrue(isinstance(returned, OrderedDict))
self.assertEqual(sorted(returned.keys()), sorted(expected_keys))
for key, value in iteritems(returned):
self.assertTrue(isinstance(value, types[return_type]))
# check returned dict has correct mapping
if return_type == 'axes':
self.assertEqual(value.get_title(), key)
elif return_type == 'both':
self.assertEqual(value.ax.get_title(), key)
self.assertIsInstance(value.ax, Axes)
self.assertIsInstance(value.lines, dict)
elif return_type == 'dict':
line = value['medians'][0]
self.assertEqual(line.get_axes().get_title(), key)
else:
raise AssertionError


@tm.mplskip
class TestSeriesPlots(TestPlotBase):
Expand Down Expand Up @@ -1421,65 +1469,20 @@ def test_boxplot_return_type(self):

with tm.assert_produces_warning(FutureWarning):
result = df.boxplot()
self.assertIsInstance(result, dict) # change to Axes in future
# change to Axes in future
self._check_box_return_type(result, 'dict')

with tm.assert_produces_warning(False):
result = df.boxplot(return_type='dict')
self.assertIsInstance(result, dict)
self._check_box_return_type(result, 'dict')

with tm.assert_produces_warning(False):
result = df.boxplot(return_type='axes')
self.assertIsInstance(result, mpl.axes.Axes)
self._check_box_return_type(result, 'axes')

with tm.assert_produces_warning(False):
result = df.boxplot(return_type='both')
self.assertIsInstance(result, tuple)

@slow
def test_boxplot_return_type_by(self):
import matplotlib as mpl

df = DataFrame(np.random.randn(10, 2))
df['g'] = ['a'] * 5 + ['b'] * 5

# old style: return_type=None
result = df.boxplot(by='g')
self.assertIsInstance(result, np.ndarray)
self.assertIsInstance(result[0], mpl.axes.Axes)

result = df.boxplot(by='g', return_type='dict')
self.assertIsInstance(result, dict)
self.assertIsInstance(result[0], dict)

result = df.boxplot(by='g', return_type='axes')
self.assertIsInstance(result, dict)
self.assertIsInstance(result[0], mpl.axes.Axes)

result = df.boxplot(by='g', return_type='both')
self.assertIsInstance(result, dict)
self.assertIsInstance(result[0], tuple)
self.assertIsInstance(result[0][0], mpl.axes.Axes)
self.assertIsInstance(result[0][1], dict)

# now for groupby
with tm.assert_produces_warning(FutureWarning):
result = df.groupby('g').boxplot()
self.assertIsInstance(result, dict)
self.assertIsInstance(result['a'], dict)

result = df.groupby('g').boxplot(return_type='dict')
self.assertIsInstance(result, dict)
self.assertIsInstance(result['a'], dict)

result = df.groupby('g').boxplot(return_type='axes')
self.assertIsInstance(result, dict)
self.assertIsInstance(result['a'], mpl.axes.Axes)

result = df.groupby('g').boxplot(return_type='both')
self.assertIsInstance(result, dict)
self.assertIsInstance(result['a'], tuple)
self.assertIsInstance(result['a'][0], mpl.axes.Axes)
self.assertIsInstance(result['a'][1], dict)
self._check_box_return_type(result, 'both')

@slow
def test_kde(self):
Expand Down Expand Up @@ -2278,47 +2281,39 @@ def test_grouped_hist(self):
with tm.assertRaises(AttributeError):
plotting.grouped_hist(df.A, by=df.C, foo='bar')

def _check_box_dict(self, returned, return_type,
expected_klass, expected_keys):
self.assertTrue(isinstance(returned, OrderedDict))
self.assertEqual(sorted(returned.keys()), sorted(expected_keys))
for key, value in iteritems(returned):
self.assertTrue(isinstance(value, expected_klass))
# check returned dict has correct mapping
if return_type == 'axes':
self.assertEqual(value.get_title(), key)
elif return_type == 'both':
self.assertEqual(value.ax.get_title(), key)
elif return_type == 'dict':
line = value['medians'][0]
self.assertEqual(line.get_axes().get_title(), key)
else:
raise AssertionError

@slow
def test_grouped_box_return_type(self):
import matplotlib.axes

df = self.hist_df

# old style: return_type=None
result = df.boxplot(by='gender')
self.assertIsInstance(result, np.ndarray)
self._check_box_return_type(result, None,
expected_keys=['height', 'weight', 'category'])

# now for groupby
with tm.assert_produces_warning(FutureWarning):
result = df.groupby('gender').boxplot()
self._check_box_return_type(result, 'dict', expected_keys=['Male', 'Female'])

columns2 = 'X B C D A G Y N Q O'.split()
df2 = DataFrame(random.randn(50, 10), columns=columns2)
categories2 = 'A B C D E F G H I J'.split()
df2['category'] = categories2 * 5

types = {'dict': dict, 'axes': matplotlib.axes.Axes, 'both': tuple}
for t, klass in iteritems(types):
for t in ['dict', 'axes', 'both']:
returned = df.groupby('classroom').boxplot(return_type=t)
self._check_box_dict(returned, t, klass, ['A', 'B', 'C'])
self._check_box_return_type(returned, t, expected_keys=['A', 'B', 'C'])

returned = df.boxplot(by='classroom', return_type=t)
self._check_box_dict(returned, t, klass, ['height', 'weight', 'category'])
self._check_box_return_type(returned, t,
expected_keys=['height', 'weight', 'category'])

returned = df2.groupby('category').boxplot(return_type=t)
self._check_box_dict(returned, t, klass, categories2)
self._check_box_return_type(returned, t, expected_keys=categories2)

returned = df2.boxplot(by='category', return_type=t)
self._check_box_dict(returned, t, klass, columns2)
self._check_box_return_type(returned, t, expected_keys=columns2)

@slow
def test_grouped_box_layout(self):
Expand Down
103 changes: 39 additions & 64 deletions pandas/tools/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -2323,13 +2323,11 @@ def boxplot(data, column=None, by=None, ax=None, fontsize=None,
if return_type not in valid_types:
raise ValueError("return_type")


from pandas import Series, DataFrame
if isinstance(data, Series):
data = DataFrame({'x': data})
column = 'x'


def _get_colors():
return _get_standard_colors(color=kwds.get('color'), num_colors=1)

Expand All @@ -2340,8 +2338,9 @@ def maybe_color_bp(bp):
setp(bp['whiskers'],color=colors[0],alpha=1)
setp(bp['medians'],color=colors[2],alpha=1)

def plot_group(grouped, ax):
keys, values = zip(*grouped)
BP = namedtuple("Boxplot", ['ax', 'lines']) # namedtuple to hold results

def plot_group(keys, values, ax):
keys = [com.pprint_thing(x) for x in keys]
values = [remove_na(v) for v in values]
bp = ax.boxplot(values, **kwds)
Expand All @@ -2350,7 +2349,14 @@ def plot_group(grouped, ax):
else:
ax.set_yticklabels(keys, rotation=rot, fontsize=fontsize)
maybe_color_bp(bp)
return bp

# Return axes in multiplot case, maybe revisit later # 985
if return_type == 'dict':
return bp
elif return_type == 'both':
return BP(ax=ax, lines=bp)
else:
return ax

colors = _get_colors()
if column is None:
Expand All @@ -2361,56 +2367,14 @@ def plot_group(grouped, ax):
else:
columns = [column]

BP = namedtuple("Boxplot", ['ax', 'lines']) # namedtuple to hold results

if by is not None:
fig, axes, d = _grouped_plot_by_column(plot_group, data, columns=columns,
by=by, grid=grid, figsize=figsize,
ax=ax, layout=layout)

# Return axes in multiplot case, maybe revisit later # 985
if return_type is None:
ret = axes
if return_type == 'axes':
ret = compat.OrderedDict()
axes = _flatten(axes)[:len(d)]
for k, ax in zip(d.keys(), axes):
ret[k] = ax
elif return_type == 'dict':
ret = d
elif return_type == 'both':
ret = compat.OrderedDict()
axes = _flatten(axes)[:len(d)]
for (k, line), ax in zip(d.items(), axes):
ret[k] = BP(ax=ax, lines=line)
result = _grouped_plot_by_column(plot_group, data, columns=columns,
by=by, grid=grid, figsize=figsize,
ax=ax, layout=layout, return_type=return_type)
else:
if layout is not None:
raise ValueError("The 'layout' keyword is not supported when "
"'by' is None")
if ax is None:
ax = _gca()
fig = ax.get_figure()
data = data._get_numeric_data()
if columns:
cols = columns
else:
cols = data.columns
keys = [com.pprint_thing(x) for x in cols]

# Return boxplot dict in single plot case

clean_values = [remove_na(x) for x in data[cols].values.T]

bp = ax.boxplot(clean_values, **kwds)
maybe_color_bp(bp)

if kwds.get('vert', 1):
ax.set_xticklabels(keys, rotation=rot, fontsize=fontsize)
else:
ax.set_yticklabels(keys, rotation=rot, fontsize=fontsize)
ax.grid(grid)

ret = ax

if return_type is None:
msg = ("\nThe default value for 'return_type' will change to "
Expand All @@ -2420,13 +2384,18 @@ def plot_group(grouped, ax):
"return_type='dict'.")
warnings.warn(msg, FutureWarning)
return_type = 'dict'
if return_type == 'dict':
ret = bp
elif return_type == 'both':
ret = BP(ax=ret, lines=bp)
if ax is None:
ax = _gca()
data = data._get_numeric_data()
if columns is None:
columns = data.columns
else:
data = data[columns]

fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2)
return ret
result = plot_group(columns, data.values.T, ax)
ax.grid(grid)

return result


def format_date_labels(ax, rot):
Expand Down Expand Up @@ -2734,7 +2703,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None,
if subplots is True:
naxes = len(grouped)
nrows, ncols = _get_layout(naxes, layout=layout)
_, axes = _subplots(nrows=nrows, ncols=ncols, naxes=naxes, squeeze=False,
fig, axes = _subplots(nrows=nrows, ncols=ncols, naxes=naxes, squeeze=False,
sharex=False, sharey=True)
axes = _flatten(axes)

Expand All @@ -2744,6 +2713,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None,
rot=rot, grid=grid, **kwds)
ax.set_title(com.pprint_thing(key))
ret[key] = d
fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2)
else:
from pandas.tools.merge import concat
keys, frames = zip(*grouped)
Expand Down Expand Up @@ -2795,9 +2765,8 @@ def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True,

def _grouped_plot_by_column(plotf, data, columns=None, by=None,
numeric_only=True, grid=False,
figsize=None, ax=None, layout=None, **kwargs):
from pandas.core.frame import DataFrame

figsize=None, ax=None, layout=None, return_type=None,
**kwargs):
grouped = data.groupby(by)
if columns is None:
if not isinstance(by, (list, tuple)):
Expand All @@ -2818,20 +2787,26 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None,

ravel_axes = _flatten(axes)

out_dict = compat.OrderedDict()
result = compat.OrderedDict()
for i, col in enumerate(columns):
ax = ravel_axes[i]
gp_col = grouped[col]
re_plotf = plotf(gp_col, ax, **kwargs)
keys, values = zip(*gp_col)
re_plotf = plotf(keys, values, ax, **kwargs)
ax.set_title(col)
ax.set_xlabel(com.pprint_thing(by))
result[col] = re_plotf
ax.grid(grid)
out_dict[col] = re_plotf

# Return axes in multiplot case, maybe revisit later # 985
if return_type is None:
result = axes

byline = by[0] if len(by) == 1 else by
fig.suptitle('Boxplot grouped by %s' % byline)
fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2)

return fig, axes, out_dict
return result


def table(ax, data, rowLabels=None, colLabels=None,
Expand Down

0 comments on commit 7a2db77

Please sign in to comment.