From de69d62d7d7664a18e2942af8e3b3ceeded1132d Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Thu, 29 May 2014 22:42:40 +0900
Subject: [PATCH] CLN: Simplify boxplot and modify its test

---
 pandas/tests/test_graphics.py | 145 ++++++++++++++++------------------
 pandas/tools/plotting.py      | 103 +++++++++---------------
 2 files changed, 109 insertions(+), 139 deletions(-)

diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
index e0bb179132b34..c49607eef1b42 100644
--- a/pandas/tests/test_graphics.py
+++ b/pandas/tests/test_graphics.py
@@ -356,6 +356,54 @@ def _check_has_errorbars(self, axes, xerr=0, yerr=0):
             self.assertEqual(xerr, xerr_count)
             self.assertEqual(yerr, yerr_count)
 
+    def _check_box_return_type(self, returned, return_type, expected_keys=None):
+        """
+        Check box returned type is correct
+
+        Parameters
+        ----------
+        returned : object to be tested, returned from boxplot
+        return_type : str
+            return_type passed to boxplot
+        expected_keys : list-like, optional
+            group labels in subplot case. If not passed,
+            the function checks assuming boxplot uses single ax
+        """
+        from matplotlib.axes import Axes
+        types = {'dict': dict, 'axes': Axes, 'both': tuple}
+        if expected_keys is None:
+            # should be fixed when the returning default is changed
+            if return_type is None:
+                return_type = 'dict'
+
+            self.assertTrue(isinstance(returned, types[return_type]))
+            if return_type == 'both':
+                self.assertIsInstance(returned.ax, Axes)
+                self.assertIsInstance(returned.lines, dict)
+        else:
+            # should be fixed when the returning default is changed
+            if return_type is None:
+                for r in self._flatten_visible(returned):
+                    self.assertIsInstance(r, Axes)
+                return
+
+            self.assertTrue(isinstance(returned, OrderedDict))
+            self.assertEqual(sorted(returned.keys()), sorted(expected_keys))
+            for key, value in iteritems(returned):
+                self.assertTrue(isinstance(value, types[return_type]))
+                # check returned dict has correct mapping
+                if return_type == 'axes':
+                    self.assertEqual(value.get_title(), key)
+                elif return_type == 'both':
+                    self.assertEqual(value.ax.get_title(), key)
+                    self.assertIsInstance(value.ax, Axes)
+                    self.assertIsInstance(value.lines, dict)
+                elif return_type == 'dict':
+                    line = value['medians'][0]
+                    self.assertEqual(line.get_axes().get_title(), key)
+                else:
+                    raise AssertionError
+
 
 @tm.mplskip
 class TestSeriesPlots(TestPlotBase):
@@ -1421,65 +1469,20 @@ def test_boxplot_return_type(self):
 
         with tm.assert_produces_warning(FutureWarning):
             result = df.boxplot()
-        self.assertIsInstance(result, dict)  # change to Axes in future
+        # change to Axes in future
+        self._check_box_return_type(result, 'dict')
 
         with tm.assert_produces_warning(False):
             result = df.boxplot(return_type='dict')
-        self.assertIsInstance(result, dict)
+        self._check_box_return_type(result, 'dict')
 
         with tm.assert_produces_warning(False):
             result = df.boxplot(return_type='axes')
-        self.assertIsInstance(result, mpl.axes.Axes)
+        self._check_box_return_type(result, 'axes')
 
         with tm.assert_produces_warning(False):
             result = df.boxplot(return_type='both')
-        self.assertIsInstance(result, tuple)
-
-    @slow
-    def test_boxplot_return_type_by(self):
-        import matplotlib as mpl
-
-        df = DataFrame(np.random.randn(10, 2))
-        df['g'] = ['a'] * 5 + ['b'] * 5
-
-        # old style: return_type=None
-        result = df.boxplot(by='g')
-        self.assertIsInstance(result, np.ndarray)
-        self.assertIsInstance(result[0], mpl.axes.Axes)
-
-        result = df.boxplot(by='g', return_type='dict')
-        self.assertIsInstance(result, dict)
-        self.assertIsInstance(result[0], dict)
-
-        result = df.boxplot(by='g', return_type='axes')
-        self.assertIsInstance(result, dict)
-        self.assertIsInstance(result[0], mpl.axes.Axes)
-
-        result = df.boxplot(by='g', return_type='both')
-        self.assertIsInstance(result, dict)
-        self.assertIsInstance(result[0], tuple)
-        self.assertIsInstance(result[0][0], mpl.axes.Axes)
-        self.assertIsInstance(result[0][1], dict)
-
-        # now for groupby
-        with tm.assert_produces_warning(FutureWarning):
-            result = df.groupby('g').boxplot()
-        self.assertIsInstance(result, dict)
-        self.assertIsInstance(result['a'], dict)
-
-        result = df.groupby('g').boxplot(return_type='dict')
-        self.assertIsInstance(result, dict)
-        self.assertIsInstance(result['a'], dict)
-
-        result = df.groupby('g').boxplot(return_type='axes')
-        self.assertIsInstance(result, dict)
-        self.assertIsInstance(result['a'], mpl.axes.Axes)
-
-        result = df.groupby('g').boxplot(return_type='both')
-        self.assertIsInstance(result, dict)
-        self.assertIsInstance(result['a'], tuple)
-        self.assertIsInstance(result['a'][0], mpl.axes.Axes)
-        self.assertIsInstance(result['a'][1], dict)
+        self._check_box_return_type(result, 'both')
 
     @slow
     def test_kde(self):
@@ -2278,47 +2281,39 @@ def test_grouped_hist(self):
         with tm.assertRaises(AttributeError):
             plotting.grouped_hist(df.A, by=df.C, foo='bar')
 
-    def _check_box_dict(self, returned, return_type,
-                        expected_klass, expected_keys):
-        self.assertTrue(isinstance(returned, OrderedDict))
-        self.assertEqual(sorted(returned.keys()), sorted(expected_keys))
-        for key, value in iteritems(returned):
-            self.assertTrue(isinstance(value, expected_klass))
-            # check returned dict has correct mapping
-            if return_type == 'axes':
-                self.assertEqual(value.get_title(), key)
-            elif return_type == 'both':
-                self.assertEqual(value.ax.get_title(), key)
-            elif return_type == 'dict':
-                line = value['medians'][0]
-                self.assertEqual(line.get_axes().get_title(), key)
-            else:
-                raise AssertionError
-
     @slow
     def test_grouped_box_return_type(self):
-        import matplotlib.axes
-
         df = self.hist_df
 
+        # old style: return_type=None
+        result = df.boxplot(by='gender')
+        self.assertIsInstance(result, np.ndarray)
+        self._check_box_return_type(result, None,
+                                    expected_keys=['height', 'weight', 'category'])
+
+        # now for groupby
+        with tm.assert_produces_warning(FutureWarning):
+            result = df.groupby('gender').boxplot()
+        self._check_box_return_type(result, 'dict', expected_keys=['Male', 'Female'])
+
         columns2 = 'X B C D A G Y N Q O'.split()
         df2 = DataFrame(random.randn(50, 10), columns=columns2)
         categories2 = 'A B C D E F G H I J'.split()
         df2['category'] = categories2 * 5
 
-        types = {'dict': dict, 'axes': matplotlib.axes.Axes, 'both': tuple}
-        for t, klass in iteritems(types):
+        for t in ['dict', 'axes', 'both']:
             returned = df.groupby('classroom').boxplot(return_type=t)
-            self._check_box_dict(returned, t, klass, ['A', 'B', 'C'])
+            self._check_box_return_type(returned, t, expected_keys=['A', 'B', 'C'])
 
             returned = df.boxplot(by='classroom', return_type=t)
-            self._check_box_dict(returned, t, klass, ['height', 'weight', 'category'])
+            self._check_box_return_type(returned, t,
+                                        expected_keys=['height', 'weight', 'category'])
 
             returned = df2.groupby('category').boxplot(return_type=t)
-            self._check_box_dict(returned, t, klass, categories2)
+            self._check_box_return_type(returned, t, expected_keys=categories2)
 
             returned = df2.boxplot(by='category', return_type=t)
-            self._check_box_dict(returned, t, klass, columns2)
+            self._check_box_return_type(returned, t, expected_keys=columns2)
 
     @slow
     def test_grouped_box_layout(self):
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index 814c1f60cea50..37a982acc0bbd 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -2323,13 +2323,11 @@ def boxplot(data, column=None, by=None, ax=None, fontsize=None,
     if return_type not in valid_types:
         raise ValueError("return_type")
 
-
     from pandas import Series, DataFrame
     if isinstance(data, Series):
         data = DataFrame({'x': data})
         column = 'x'
 
-
     def _get_colors():
         return _get_standard_colors(color=kwds.get('color'), num_colors=1)
 
@@ -2340,8 +2338,9 @@ def maybe_color_bp(bp):
             setp(bp['whiskers'],color=colors[0],alpha=1)
             setp(bp['medians'],color=colors[2],alpha=1)
 
-    def plot_group(grouped, ax):
-        keys, values = zip(*grouped)
+    BP = namedtuple("Boxplot", ['ax', 'lines'])  # namedtuple to hold results
+
+    def plot_group(keys, values, ax):
         keys = [com.pprint_thing(x) for x in keys]
         values = [remove_na(v) for v in values]
         bp = ax.boxplot(values, **kwds)
@@ -2350,7 +2349,14 @@ def plot_group(grouped, ax):
         else:
             ax.set_yticklabels(keys, rotation=rot, fontsize=fontsize)
         maybe_color_bp(bp)
-        return bp
+
+        # Return axes in multiplot case, maybe revisit later # 985
+        if return_type == 'dict':
+            return bp
+        elif return_type == 'both':
+            return BP(ax=ax, lines=bp)
+        else:
+            return ax
 
     colors = _get_colors()
     if column is None:
@@ -2361,56 +2367,14 @@ def plot_group(grouped, ax):
         else:
             columns = [column]
 
-    BP = namedtuple("Boxplot", ['ax', 'lines'])  # namedtuple to hold results
-
     if by is not None:
-        fig, axes, d = _grouped_plot_by_column(plot_group, data, columns=columns,
-                                               by=by, grid=grid, figsize=figsize,
-                                               ax=ax, layout=layout)
-
-        # Return axes in multiplot case, maybe revisit later # 985
-        if return_type is None:
-            ret = axes
-        if return_type == 'axes':
-            ret = compat.OrderedDict()
-            axes = _flatten(axes)[:len(d)]
-            for k, ax in zip(d.keys(), axes):
-                ret[k] = ax
-        elif return_type == 'dict':
-            ret = d
-        elif return_type == 'both':
-            ret = compat.OrderedDict()
-            axes = _flatten(axes)[:len(d)]
-            for (k, line), ax in zip(d.items(), axes):
-                ret[k] = BP(ax=ax, lines=line)
+        result = _grouped_plot_by_column(plot_group, data, columns=columns,
+                                         by=by, grid=grid, figsize=figsize,
+                                         ax=ax, layout=layout, return_type=return_type)
     else:
         if layout is not None:
             raise ValueError("The 'layout' keyword is not supported when "
                              "'by' is None")
-        if ax is None:
-            ax = _gca()
-        fig = ax.get_figure()
-        data = data._get_numeric_data()
-        if columns:
-            cols = columns
-        else:
-            cols = data.columns
-        keys = [com.pprint_thing(x) for x in cols]
-
-        # Return boxplot dict in single plot case
-
-        clean_values = [remove_na(x) for x in data[cols].values.T]
-
-        bp = ax.boxplot(clean_values, **kwds)
-        maybe_color_bp(bp)
-
-        if kwds.get('vert', 1):
-            ax.set_xticklabels(keys, rotation=rot, fontsize=fontsize)
-        else:
-            ax.set_yticklabels(keys, rotation=rot, fontsize=fontsize)
-        ax.grid(grid)
-
-        ret = ax
 
         if return_type is None:
             msg = ("\nThe default value for 'return_type' will change to "
@@ -2420,13 +2384,18 @@ def plot_group(grouped, ax):
                    "return_type='dict'.")
             warnings.warn(msg, FutureWarning)
             return_type = 'dict'
-        if return_type == 'dict':
-            ret = bp
-        elif return_type == 'both':
-            ret = BP(ax=ret, lines=bp)
+        if ax is None:
+            ax = _gca()
+        data = data._get_numeric_data()
+        if columns is None:
+            columns = data.columns
+        else:
+            data = data[columns]
 
-    fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2)
-    return ret
+        result = plot_group(columns, data.values.T, ax)
+        ax.grid(grid)
+
+    return result
 
 
 def format_date_labels(ax, rot):
@@ -2734,7 +2703,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None,
     if subplots is True:
         naxes = len(grouped)
         nrows, ncols = _get_layout(naxes, layout=layout)
-        _, axes = _subplots(nrows=nrows, ncols=ncols, naxes=naxes, squeeze=False,
+        fig, axes = _subplots(nrows=nrows, ncols=ncols, naxes=naxes, squeeze=False,
                             sharex=False, sharey=True)
         axes = _flatten(axes)
 
@@ -2744,6 +2713,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None,
                               rot=rot, grid=grid, **kwds)
             ax.set_title(com.pprint_thing(key))
             ret[key] = d
+        fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2)
     else:
         from pandas.tools.merge import concat
         keys, frames = zip(*grouped)
@@ -2795,9 +2765,8 @@ def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True,
 
 def _grouped_plot_by_column(plotf, data, columns=None, by=None,
                             numeric_only=True, grid=False,
-                            figsize=None, ax=None, layout=None, **kwargs):
-    from pandas.core.frame import DataFrame
-
+                            figsize=None, ax=None, layout=None, return_type=None,
+                            **kwargs):
     grouped = data.groupby(by)
     if columns is None:
         if not isinstance(by, (list, tuple)):
@@ -2818,20 +2787,26 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None,
 
     ravel_axes = _flatten(axes)
 
-    out_dict = compat.OrderedDict()
+    result = compat.OrderedDict()
     for i, col in enumerate(columns):
         ax = ravel_axes[i]
         gp_col = grouped[col]
-        re_plotf = plotf(gp_col, ax, **kwargs)
+        keys, values = zip(*gp_col)
+        re_plotf = plotf(keys, values, ax, **kwargs)
         ax.set_title(col)
         ax.set_xlabel(com.pprint_thing(by))
+        result[col] = re_plotf
         ax.grid(grid)
-        out_dict[col] = re_plotf
+
+    # Return axes in multiplot case, maybe revisit later # 985
+    if return_type is None:
+        result = axes
 
     byline = by[0] if len(by) == 1 else by
     fig.suptitle('Boxplot grouped by %s' % byline)
+    fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2)
 
-    return fig, axes, out_dict
+    return result
 
 
 def table(ax, data, rowLabels=None, colLabels=None,