Skip to content

Commit

Permalink
[bugfix] convert metrics to numeric in dataframe (apache#4726)
Browse files Browse the repository at this point in the history
* [bugfix] convert metrics to numeric in dataframe

It appears sometimes the dbapi driver and pandas's read_sql fail at
returning the proper numeric types for metrics and they show up as
`object` in the dataframe. This results in "No numeric types to
aggregate" errors when trying to perform aggregations or pivoting in
pandas.

This PR looks for metrics in dataframes that are typed as "object"
and uses pandas' to_numeric to convert.

* Fix tests

* Remove all iteritems
  • Loading branch information
mistercrunch authored Apr 3, 2018
1 parent 22ca561 commit f8fba74
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 4 deletions.
2 changes: 1 addition & 1 deletion superset/models/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,7 @@ def needs_conversion(df_series):
return True
return False

for k, v in df.dtypes.iteritems():
for k, v in df.dtypes.items():
if v.type == numpy.object_ and needs_conversion(df[k]):
df[k] = df[k].apply(utils.json_dumps_w_dates)
return df
Expand Down
15 changes: 12 additions & 3 deletions superset/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,21 @@ def get_df(self, query_obj=None):
if self.datasource.offset:
df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset)
df[DTTM_ALIAS] += self.time_shift

self.df_metrics_to_num(df, query_obj.get('metrics') or [])

df.replace([np.inf, -np.inf], np.nan)
fillna = self.get_fillna_for_columns(df.columns)
df = df.fillna(fillna)
return df

@staticmethod
def df_metrics_to_num(df, metrics):
"""Converting metrics to numeric when pandas.read_sql cannot"""
for col, dtype in df.dtypes.items():
if dtype.type == np.object_ and col in metrics:
df[col] = pd.to_numeric(df[col])

def query_obj(self):
"""Building a query object"""
form_data = self.form_data
Expand Down Expand Up @@ -1060,7 +1070,6 @@ def process_data(self, df, aggregate=False):
df = df.fillna(0)
if fd.get('granularity') == 'all':
raise Exception(_('Pick a time granularity for your time series'))

if not aggregate:
df = df.pivot_table(
index=DTTM_ALIAS,
Expand Down Expand Up @@ -1384,7 +1393,7 @@ def get_data(self, df):
pt = (pt / pt.sum()).T
pt = pt.reindex(row.index)
chart_data = []
for name, ys in pt.iteritems():
for name, ys in pt.items():
if pt[name].dtype.kind not in 'biufc' or name in self.groupby:
continue
if isinstance(name, string_types):
Expand All @@ -1395,7 +1404,7 @@ def get_data(self, df):
l = [str(s) for s in name[1:]] # noqa: E741
series_title = ', '.join(l)
values = []
for i, v in ys.iteritems():
for i, v in ys.items():
x = i
if isinstance(x, (tuple, list)):
x = ', '.join([text_type(s) for s in x])
Expand Down
2 changes: 2 additions & 0 deletions tests/viz_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ def test_get_df_handles_dttm_col(self):
results.df.empty = False
datasource.query = Mock(return_value=results)
test_viz = viz.BaseViz(datasource, form_data)

test_viz.df_metrics_to_num = Mock()
test_viz.get_fillna_for_columns = Mock(return_value=0)
test_viz.get_df(query_obj)
mock_call = df.__setitem__.mock_calls[0]
Expand Down

0 comments on commit f8fba74

Please sign in to comment.