From 29af36fd6a1a3de3f735fcaa860520e2ba178531 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Mon, 2 Apr 2018 21:48:14 -0700 Subject: [PATCH] [bugfix] convert metrics to numeric in dataframe (#4726) * [bugfix] convert metrics to numeric in dataframe It appears sometimes the dbapi driver and pandas's read_sql fail at returning the proper numeric types for metrics and they show up as `object` in the dataframe. This results in "No numeric types to aggregate" errors when trying to perform aggregations or pivoting in pandas. This PR looks for metrics in dataframes that are typed as "object" and uses pandas' to_numeric to convert. * Fix tests * Remove all iteritems --- superset/models/core.py | 2 +- superset/viz.py | 15 ++++++++++++--- tests/viz_tests.py | 2 ++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/superset/models/core.py b/superset/models/core.py index cd7cc44b9eb45..bd50751961f3a 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -702,7 +702,7 @@ def needs_conversion(df_series): return True return False - for k, v in df.dtypes.iteritems(): + for k, v in df.dtypes.items(): if v.type == numpy.object_ and needs_conversion(df[k]): df[k] = df[k].apply(utils.json_dumps_w_dates) return df diff --git a/superset/viz.py b/superset/viz.py index bfc08f1942015..2aca6657c26b8 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -173,11 +173,21 @@ def get_df(self, query_obj=None): if self.datasource.offset: df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset) df[DTTM_ALIAS] += self.time_shift + + self.df_metrics_to_num(df, query_obj.get('metrics') or []) + df.replace([np.inf, -np.inf], np.nan) fillna = self.get_fillna_for_columns(df.columns) df = df.fillna(fillna) return df + @staticmethod + def df_metrics_to_num(df, metrics): + """Converting metrics to numeric when pandas.read_sql cannot""" + for col, dtype in df.dtypes.items(): + if dtype.type == np.object_ and col in metrics: + df[col] = pd.to_numeric(df[col]) + def query_obj(self): """Building a query object""" form_data = self.form_data @@ -1069,7 +1079,6 @@ def process_data(self, df, aggregate=False): df = df.fillna(0) if fd.get('granularity') == 'all': raise Exception(_('Pick a time granularity for your time series')) - if not aggregate: df = df.pivot_table( index=DTTM_ALIAS, @@ -1393,7 +1402,7 @@ def get_data(self, df): pt = (pt / pt.sum()).T pt = pt.reindex(row.index) chart_data = [] - for name, ys in pt.iteritems(): + for name, ys in pt.items(): if pt[name].dtype.kind not in 'biufc' or name in self.groupby: continue if isinstance(name, string_types): @@ -1404,7 +1413,7 @@ def get_data(self, df): l = [str(s) for s in name[1:]] # noqa: E741 series_title = ', '.join(l) values = [] - for i, v in ys.iteritems(): + for i, v in ys.items(): x = i if isinstance(x, (tuple, list)): x = ', '.join([text_type(s) for s in x]) diff --git a/tests/viz_tests.py b/tests/viz_tests.py index 6822837e28312..a5adfc1f2540f 100644 --- a/tests/viz_tests.py +++ b/tests/viz_tests.py @@ -77,6 +77,8 @@ def test_get_df_handles_dttm_col(self): results.df.empty = False datasource.query = Mock(return_value=results) test_viz = viz.BaseViz(datasource, form_data) + + test_viz.df_metrics_to_num = Mock() test_viz.get_fillna_for_columns = Mock(return_value=0) test_viz.get_df(query_obj) mock_call = df.__setitem__.mock_calls[0]