From de69d62d7d7664a18e2942af8e3b3ceeded1132d Mon Sep 17 00:00:00 2001 From: sinhrks Date: Thu, 29 May 2014 22:42:40 +0900 Subject: [PATCH] CLN: Simplify boxplot and modify its test --- pandas/tests/test_graphics.py | 145 ++++++++++++++++------------------ pandas/tools/plotting.py | 103 +++++++++--------------- 2 files changed, 109 insertions(+), 139 deletions(-) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index e0bb179132b34..c49607eef1b42 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -356,6 +356,54 @@ def _check_has_errorbars(self, axes, xerr=0, yerr=0): self.assertEqual(xerr, xerr_count) self.assertEqual(yerr, yerr_count) + def _check_box_return_type(self, returned, return_type, expected_keys=None): + """ + Check box returned type is correct + + Parameters + ---------- + returned : object to be tested, returned from boxplot + return_type : str + return_type passed to boxplot + expected_keys : list-like, optional + group labels in subplot case. If not passed, + the function checks assuming boxplot uses single ax + """ + from matplotlib.axes import Axes + types = {'dict': dict, 'axes': Axes, 'both': tuple} + if expected_keys is None: + # should be fixed when the returning default is changed + if return_type is None: + return_type = 'dict' + + self.assertTrue(isinstance(returned, types[return_type])) + if return_type == 'both': + self.assertIsInstance(returned.ax, Axes) + self.assertIsInstance(returned.lines, dict) + else: + # should be fixed when the returning default is changed + if return_type is None: + for r in self._flatten_visible(returned): + self.assertIsInstance(r, Axes) + return + + self.assertTrue(isinstance(returned, OrderedDict)) + self.assertEqual(sorted(returned.keys()), sorted(expected_keys)) + for key, value in iteritems(returned): + self.assertTrue(isinstance(value, types[return_type])) + # check returned dict has correct mapping + if return_type == 'axes': + self.assertEqual(value.get_title(), key) + elif return_type == 'both': + self.assertEqual(value.ax.get_title(), key) + self.assertIsInstance(value.ax, Axes) + self.assertIsInstance(value.lines, dict) + elif return_type == 'dict': + line = value['medians'][0] + self.assertEqual(line.get_axes().get_title(), key) + else: + raise AssertionError + @tm.mplskip class TestSeriesPlots(TestPlotBase): @@ -1421,65 +1469,20 @@ def test_boxplot_return_type(self): with tm.assert_produces_warning(FutureWarning): result = df.boxplot() - self.assertIsInstance(result, dict) # change to Axes in future + # change to Axes in future + self._check_box_return_type(result, 'dict') with tm.assert_produces_warning(False): result = df.boxplot(return_type='dict') - self.assertIsInstance(result, dict) + self._check_box_return_type(result, 'dict') with tm.assert_produces_warning(False): result = df.boxplot(return_type='axes') - self.assertIsInstance(result, mpl.axes.Axes) + self._check_box_return_type(result, 'axes') with tm.assert_produces_warning(False): result = df.boxplot(return_type='both') - self.assertIsInstance(result, tuple) - - @slow - def test_boxplot_return_type_by(self): - import matplotlib as mpl - - df = DataFrame(np.random.randn(10, 2)) - df['g'] = ['a'] * 5 + ['b'] * 5 - - # old style: return_type=None - result = df.boxplot(by='g') - self.assertIsInstance(result, np.ndarray) - self.assertIsInstance(result[0], mpl.axes.Axes) - - result = df.boxplot(by='g', return_type='dict') - self.assertIsInstance(result, dict) - self.assertIsInstance(result[0], dict) - - result = df.boxplot(by='g', return_type='axes') - self.assertIsInstance(result, dict) - self.assertIsInstance(result[0], mpl.axes.Axes) - - result = df.boxplot(by='g', return_type='both') - self.assertIsInstance(result, dict) - self.assertIsInstance(result[0], tuple) - self.assertIsInstance(result[0][0], mpl.axes.Axes) - self.assertIsInstance(result[0][1], dict) - - # now for groupby - with tm.assert_produces_warning(FutureWarning): - result = df.groupby('g').boxplot() - self.assertIsInstance(result, dict) - self.assertIsInstance(result['a'], dict) - - result = df.groupby('g').boxplot(return_type='dict') - self.assertIsInstance(result, dict) - self.assertIsInstance(result['a'], dict) - - result = df.groupby('g').boxplot(return_type='axes') - self.assertIsInstance(result, dict) - self.assertIsInstance(result['a'], mpl.axes.Axes) - - result = df.groupby('g').boxplot(return_type='both') - self.assertIsInstance(result, dict) - self.assertIsInstance(result['a'], tuple) - self.assertIsInstance(result['a'][0], mpl.axes.Axes) - self.assertIsInstance(result['a'][1], dict) + self._check_box_return_type(result, 'both') @slow def test_kde(self): @@ -2278,47 +2281,39 @@ def test_grouped_hist(self): with tm.assertRaises(AttributeError): plotting.grouped_hist(df.A, by=df.C, foo='bar') - def _check_box_dict(self, returned, return_type, - expected_klass, expected_keys): - self.assertTrue(isinstance(returned, OrderedDict)) - self.assertEqual(sorted(returned.keys()), sorted(expected_keys)) - for key, value in iteritems(returned): - self.assertTrue(isinstance(value, expected_klass)) - # check returned dict has correct mapping - if return_type == 'axes': - self.assertEqual(value.get_title(), key) - elif return_type == 'both': - self.assertEqual(value.ax.get_title(), key) - elif return_type == 'dict': - line = value['medians'][0] - self.assertEqual(line.get_axes().get_title(), key) - else: - raise AssertionError - @slow def test_grouped_box_return_type(self): - import matplotlib.axes - df = self.hist_df + # old style: return_type=None + result = df.boxplot(by='gender') + self.assertIsInstance(result, np.ndarray) + self._check_box_return_type(result, None, + expected_keys=['height', 'weight', 'category']) + + # now for groupby + with tm.assert_produces_warning(FutureWarning): + result = df.groupby('gender').boxplot() + self._check_box_return_type(result, 'dict', expected_keys=['Male', 'Female']) + columns2 = 'X B C D A G Y N Q O'.split() df2 = DataFrame(random.randn(50, 10), columns=columns2) categories2 = 'A B C D E F G H I J'.split() df2['category'] = categories2 * 5 - types = {'dict': dict, 'axes': matplotlib.axes.Axes, 'both': tuple} - for t, klass in iteritems(types): + for t in ['dict', 'axes', 'both']: returned = df.groupby('classroom').boxplot(return_type=t) - self._check_box_dict(returned, t, klass, ['A', 'B', 'C']) + self._check_box_return_type(returned, t, expected_keys=['A', 'B', 'C']) returned = df.boxplot(by='classroom', return_type=t) - self._check_box_dict(returned, t, klass, ['height', 'weight', 'category']) + self._check_box_return_type(returned, t, + expected_keys=['height', 'weight', 'category']) returned = df2.groupby('category').boxplot(return_type=t) - self._check_box_dict(returned, t, klass, categories2) + self._check_box_return_type(returned, t, expected_keys=categories2) returned = df2.boxplot(by='category', return_type=t) - self._check_box_dict(returned, t, klass, columns2) + self._check_box_return_type(returned, t, expected_keys=columns2) @slow def test_grouped_box_layout(self): diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 814c1f60cea50..37a982acc0bbd 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -2323,13 +2323,11 @@ def boxplot(data, column=None, by=None, ax=None, fontsize=None, if return_type not in valid_types: raise ValueError("return_type") - from pandas import Series, DataFrame if isinstance(data, Series): data = DataFrame({'x': data}) column = 'x' - def _get_colors(): return _get_standard_colors(color=kwds.get('color'), num_colors=1) @@ -2340,8 +2338,9 @@ def maybe_color_bp(bp): setp(bp['whiskers'],color=colors[0],alpha=1) setp(bp['medians'],color=colors[2],alpha=1) - def plot_group(grouped, ax): - keys, values = zip(*grouped) + BP = namedtuple("Boxplot", ['ax', 'lines']) # namedtuple to hold results + + def plot_group(keys, values, ax): keys = [com.pprint_thing(x) for x in keys] values = [remove_na(v) for v in values] bp = ax.boxplot(values, **kwds) @@ -2350,7 +2349,14 @@ def plot_group(grouped, ax): else: ax.set_yticklabels(keys, rotation=rot, fontsize=fontsize) maybe_color_bp(bp) - return bp + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type == 'dict': + return bp + elif return_type == 'both': + return BP(ax=ax, lines=bp) + else: + return ax colors = _get_colors() if column is None: @@ -2361,56 +2367,14 @@ def plot_group(grouped, ax): else: columns = [column] - BP = namedtuple("Boxplot", ['ax', 'lines']) # namedtuple to hold results - if by is not None: - fig, axes, d = _grouped_plot_by_column(plot_group, data, columns=columns, - by=by, grid=grid, figsize=figsize, - ax=ax, layout=layout) - - # Return axes in multiplot case, maybe revisit later # 985 - if return_type is None: - ret = axes - if return_type == 'axes': - ret = compat.OrderedDict() - axes = _flatten(axes)[:len(d)] - for k, ax in zip(d.keys(), axes): - ret[k] = ax - elif return_type == 'dict': - ret = d - elif return_type == 'both': - ret = compat.OrderedDict() - axes = _flatten(axes)[:len(d)] - for (k, line), ax in zip(d.items(), axes): - ret[k] = BP(ax=ax, lines=line) + result = _grouped_plot_by_column(plot_group, data, columns=columns, + by=by, grid=grid, figsize=figsize, + ax=ax, layout=layout, return_type=return_type) else: if layout is not None: raise ValueError("The 'layout' keyword is not supported when " "'by' is None") - if ax is None: - ax = _gca() - fig = ax.get_figure() - data = data._get_numeric_data() - if columns: - cols = columns - else: - cols = data.columns - keys = [com.pprint_thing(x) for x in cols] - - # Return boxplot dict in single plot case - - clean_values = [remove_na(x) for x in data[cols].values.T] - - bp = ax.boxplot(clean_values, **kwds) - maybe_color_bp(bp) - - if kwds.get('vert', 1): - ax.set_xticklabels(keys, rotation=rot, fontsize=fontsize) - else: - ax.set_yticklabels(keys, rotation=rot, fontsize=fontsize) - ax.grid(grid) - - ret = ax if return_type is None: msg = ("\nThe default value for 'return_type' will change to " @@ -2420,13 +2384,18 @@ def plot_group(grouped, ax): "return_type='dict'.") warnings.warn(msg, FutureWarning) return_type = 'dict' - if return_type == 'dict': - ret = bp - elif return_type == 'both': - ret = BP(ax=ret, lines=bp) + if ax is None: + ax = _gca() + data = data._get_numeric_data() + if columns is None: + columns = data.columns + else: + data = data[columns] - fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) - return ret + result = plot_group(columns, data.values.T, ax) + ax.grid(grid) + + return result def format_date_labels(ax, rot): @@ -2734,7 +2703,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, if subplots is True: naxes = len(grouped) nrows, ncols = _get_layout(naxes, layout=layout) - _, axes = _subplots(nrows=nrows, ncols=ncols, naxes=naxes, squeeze=False, + fig, axes = _subplots(nrows=nrows, ncols=ncols, naxes=naxes, squeeze=False, sharex=False, sharey=True) axes = _flatten(axes) @@ -2744,6 +2713,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, rot=rot, grid=grid, **kwds) ax.set_title(com.pprint_thing(key)) ret[key] = d + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) else: from pandas.tools.merge import concat keys, frames = zip(*grouped) @@ -2795,9 +2765,8 @@ def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True, def _grouped_plot_by_column(plotf, data, columns=None, by=None, numeric_only=True, grid=False, - figsize=None, ax=None, layout=None, **kwargs): - from pandas.core.frame import DataFrame - + figsize=None, ax=None, layout=None, return_type=None, + **kwargs): grouped = data.groupby(by) if columns is None: if not isinstance(by, (list, tuple)): @@ -2818,20 +2787,26 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None, ravel_axes = _flatten(axes) - out_dict = compat.OrderedDict() + result = compat.OrderedDict() for i, col in enumerate(columns): ax = ravel_axes[i] gp_col = grouped[col] - re_plotf = plotf(gp_col, ax, **kwargs) + keys, values = zip(*gp_col) + re_plotf = plotf(keys, values, ax, **kwargs) ax.set_title(col) ax.set_xlabel(com.pprint_thing(by)) + result[col] = re_plotf ax.grid(grid) - out_dict[col] = re_plotf + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type is None: + result = axes byline = by[0] if len(by) == 1 else by fig.suptitle('Boxplot grouped by %s' % byline) + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) - return fig, axes, out_dict + return result def table(ax, data, rowLabels=None, colLabels=None,