Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: preserve format and name keywords when consolidating InlineData #1092

Merged
merged 1 commit into from
Aug 15, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 59 additions & 37 deletions altair/vegalite/v2/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,50 @@

# ------------------------------------------------------------------------
# Data Utilities
def _dataset_name(data):
"""Generate a unique hash of the data"""
def hash_(dct):
dct_str = json.dumps(dct, sort_keys=True)
return hashlib.md5(dct_str.encode()).hexdigest()
def _dataset_name(values):
"""Generate a unique hash of the data

Parameters
----------
values : list or dict
A list/dict representation of data values.

Returns
-------
name : string
A unique name generated from the hash of the values.
"""
if isinstance(values, core.InlineDataset):
values = values.to_dict()
values_json = json.dumps(values, sort_keys=True)
hsh = hashlib.md5(values_json.encode()).hexdigest()
return 'data-' + hsh


def _consolidate_data(data, context):
"""If data is specified inline, then move it to context['datasets']

This function will modify context in-place, and return a new version of data
"""
values = Undefined
kwds = {}

if isinstance(data, core.InlineData):
return 'data-' + hash_(data.values)
elif isinstance(data, dict) and 'values' in data:
return 'data-' + hash_(data['values'])
else:
raise ValueError("Cannot generate name for data {0}".format(data))
if data.name is Undefined and data.values is not Undefined:
values = data.values
kwds = {'format': data.format}

elif isinstance(data, dict):
if 'name' not in data and 'values' in data:
values = data['values']
kwds = {k:v for k,v in data.items() if k != 'values'}

if values is not Undefined:
name = _dataset_name(values)
data = core.NamedData(name=name, **kwds)
context.setdefault('datasets', {})[name] = values

return data


def _prepare_data(data, context):
Expand All @@ -46,35 +78,25 @@ def _prepare_data(data, context):
"""
if data is Undefined:
return data
if isinstance(data, core.InlineData):
if data_transformers.consolidate_datasets:
name = _dataset_name(data)
context.setdefault('datasets', {})[name] = data.values
return core.NamedData(name=name)
else:
return data
elif isinstance(data, dict) and 'values' in data:
if data_transformers.consolidate_datasets:
name = _dataset_name(data)
context.setdefault('datasets', {})[name] = data['values']
return core.NamedData(name=name)
else:
return data
elif isinstance(data, pd.DataFrame):

# convert dataframes to dict
if isinstance(data, pd.DataFrame):
data = pipe(data, data_transformers.get())
if data_transformers.consolidate_datasets and isinstance(data, dict) and 'values' in data:
name = _dataset_name(data)
context.setdefault('datasets', {})[name] = data['values']
return core.NamedData(name=name)
else:
return data
elif isinstance(data, (dict, core.Data, core.UrlData, core.NamedData)):
return data
elif isinstance(data, six.string_types):
return core.UrlData(data)
else:

# convert string input to a URLData
if isinstance(data, six.string_types):
data = core.UrlData(data)

# consolidate inline data to top-level datasets
if data_transformers.consolidate_datasets:
data = _consolidate_data(data, context)

# if data is still not a recognized type, then return
if not isinstance(data, (dict, core.Data, core.UrlData,
core.InlineData, core.NamedData)):
warnings.warn("data of type {0} not recognized".format(type(data)))
return data

return data


# ------------------------------------------------------------------------
Expand Down
32 changes: 32 additions & 0 deletions altair/vegalite/v2/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,3 +460,35 @@ def test_consolidate_datasets(basic_chart):

for spec in dct_consolidated['hconcat']:
assert spec['data'] == {'name': name}


def test_consolidate_InlineData():
data = alt.InlineData(
values=[{'a': 1, 'b': 1}, {'a': 2, 'b': 2}],
format={'type': 'csv'}
)
chart = alt.Chart(data).mark_point()

with alt.data_transformers.enable(consolidate_datasets=False):
dct = chart.to_dict()
assert dct['data']['format'] == data.format
assert dct['data']['values'] == data.values

with alt.data_transformers.enable(consolidate_datasets=True):
dct = chart.to_dict()
assert dct['data']['format'] == data.format
assert list(dct['datasets'].values())[0] == data.values

data = alt.InlineData(
values=[],
name='runtime_data'
)
chart = alt.Chart(data).mark_point()

with alt.data_transformers.enable(consolidate_datasets=False):
dct = chart.to_dict()
assert dct['data'] == data.to_dict()

with alt.data_transformers.enable(consolidate_datasets=True):
dct = chart.to_dict()
assert dct['data'] == data.to_dict()