Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: appropriately handle non-string column/field names: #899

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions altair/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def sanitize_dataframe(df):
"""Sanitize a DataFrame to prepare it for serialization.

* Make a copy
* Convert all column names to strings
* Raise ValueError if it has a hierarchical index.
* Convert categoricals to strings.
* Convert np.bool_ dtypes to Python bool objects
Expand All @@ -106,6 +107,9 @@ def to_list_if_array(val):
else:
return val

# All column names must be strings
df.columns = map(str, df.columns)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we want to convert column names to strings, we should probably put this line in a try/expect that raises an informative error telling the user how to fix the issue.


for col_name, dtype in df.dtypes.iteritems():
if str(dtype) == 'category':
# XXXX: work around bug in to_json for categorical types
Expand Down Expand Up @@ -209,7 +213,7 @@ def parse_shorthand(shorthand, data=None, parse_aggregates=True,
>>> parse_shorthand('count()', data) == {'aggregate': 'count', 'type': 'quantitative'}
True
"""
if not shorthand:
if shorthand == '' or shorthand is None:
return {}

valid_typecodes = list(TYPECODE_MAP) + list(INV_TYPECODE_MAP)
Expand Down Expand Up @@ -239,9 +243,12 @@ def parse_shorthand(shorthand, data=None, parse_aggregates=True,
# find matches depending on valid fields passed
if isinstance(shorthand, dict):
attrs = shorthand
else:
elif isinstance(shorthand, six.string_types):
attrs = next(exp.match(shorthand).groupdict() for exp in regexps
if exp.match(shorthand))
else:
raise ValueError("column names must be strings, but {0} is of type {1}"
"".format(shorthand, type(shorthand).__name__))

# Handle short form of the type expression
if 'type' in attrs:
Expand Down
11 changes: 11 additions & 0 deletions altair/utils/tests/test_core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pandas as pd

import pytest

import altair as alt
from .. import parse_shorthand, update_nested

Expand All @@ -8,8 +10,17 @@ def test_parse_shorthand():
def check(s, **kwargs):
assert parse_shorthand(s) == kwargs

# empty shorthand leads to empty dict
check('')

# dict shorthand is processed
check({'type': 'Q', 'field': 'foo'},
type='quantitative', field='foo')

# invalid type leads to a value error
with pytest.raises(ValueError):
check(0)

# Fields alone
check('foobar', field='foobar')
check('blah:(fd ', field='blah:(fd ')
Expand Down
8 changes: 8 additions & 0 deletions altair/utils/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import warnings
import json

import six

import numpy as np
import pandas as pd

Expand Down Expand Up @@ -77,3 +79,9 @@ def test_sanitize_dataframe_infs():
df_clean = sanitize_dataframe(df)
assert list(df_clean.dtypes) == [object]
assert list(df_clean['x']) == [0, 1, 2, None, None, None]


def test_sanitize_dataframe_colnames():
df = pd.DataFrame(list(zip([1,2,3], [4,5, 6])))
df_clean = sanitize_dataframe(df)
assert all(isinstance(col, six.string_types) for col in df_clean.columns)