Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detailed report for testing.assert_equal and testing.assert_identical #1507

Merged
merged 11 commits into from
Jan 18, 2019
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ Enhancements
- Upsampling an array via interpolation with resample is now dask-compatible,
as long as the array is not chunked along the resampling dimension.
By `Spencer Clark <https://github.com/spencerkclark>`_.
- :py:func:`xarray.testing.assert_equal` and
:py:func:`xarray.testing.assert_identical` now provide a more detailed
report showing what exactly differs between the two objects (dimensions /
coordinates / variables / attributes) (:issue:`1507`).
By `Benoit Bovy <https://github.com/benbovy>`_.

Bug fixes
~~~~~~~~~
Expand Down
143 changes: 137 additions & 6 deletions xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import numpy as np
import pandas as pd

from .duck_array_ops import array_equiv
from .options import OPTIONS
from .pycompat import (
PY2, bytes_type, dask_array_type, unicode_type, zip_longest)
Expand Down Expand Up @@ -411,6 +412,15 @@ def short_dask_repr(array, show_dtype=True):
return 'dask.array<shape=%s, chunksize=%s>' % (array.shape, chunksize)


def short_data_repr(array):
if isinstance(getattr(array, 'variable', array)._data, dask_array_type):
return short_dask_repr(array)
elif array._in_memory or array.size < 1e5:
return short_array_repr(array.values)
else:
return u'[%s values with dtype=%s]' % (array.size, array.dtype)


def array_repr(arr):
# used for DataArray, Variable and IndexVariable
if hasattr(arr, 'name') and arr.name is not None:
Expand All @@ -421,12 +431,7 @@ def array_repr(arr):
summary = [u'<xarray.%s %s(%s)>'
% (type(arr).__name__, name_str, dim_summary(arr))]

if isinstance(getattr(arr, 'variable', arr)._data, dask_array_type):
summary.append(short_dask_repr(arr))
elif arr._in_memory or arr.size < 1e5:
summary.append(short_array_repr(arr.values))
else:
summary.append(u'[%s values with dtype=%s]' % (arr.size, arr.dtype))
summary.append(short_data_repr(arr))

if hasattr(arr, 'coords'):
if arr.coords:
Expand Down Expand Up @@ -463,3 +468,129 @@ def dataset_repr(ds):
summary.append(attrs_repr(ds.attrs))

return u'\n'.join(summary)


def diff_dim_summary(a, b):
if a.dims != b.dims:
return "Differing dimensions:\n ({}) != ({})".format(
dim_summary(a), dim_summary(b))
else:
return ""


def _diff_mapping_repr(a_mapping, b_mapping, compat,
title, summarizer, col_width=None):

def extra_items_repr(extra_keys, mapping, ab_side):
extra_repr = [summarizer(k, mapping[k], col_width) for k in extra_keys]
if extra_repr:
return ["{} contains more {}:".format(ab_side, title)] + extra_repr
else:
return []

a_keys = set(a_mapping)
b_keys = set(b_mapping)

summary = []

diff_items = []

for k in a_keys & b_keys:
try:
# compare xarray variable
compatible = getattr(a_mapping[k], compat)(b_mapping[k])
is_variable = True
except AttributeError:
# compare attribute value
compatible = a_mapping[k] == b_mapping[k]
is_variable = False

if not compatible:
temp = [summarizer(k, vars[k], col_width)
for vars in (a_mapping, b_mapping)]

if compat == 'identical' and is_variable:
attrs_summary = []

for m in (a_mapping, b_mapping):
attr_s = "\n".join([summarize_attr(ak, av)
for ak, av in m[k].attrs.items()])
attrs_summary.append(attr_s)

temp = ["\n".join([var_s, attr_s]) if attr_s else var_s
for var_s, attr_s in zip(temp, attrs_summary)]

diff_items += [ab_side + s[1:]
for ab_side, s in zip(('L', 'R'), temp)]

if diff_items:
summary += ["Differing {}:".format(title)] + diff_items

summary += extra_items_repr(a_keys - b_keys, a_mapping, "Left")
summary += extra_items_repr(b_keys - a_keys, b_mapping, "Right")

return "\n".join(summary)
benbovy marked this conversation as resolved.
Show resolved Hide resolved


diff_coords_repr = functools.partial(_diff_mapping_repr,
title="coordinates",
summarizer=summarize_coord)


diff_data_vars_repr = functools.partial(_diff_mapping_repr,
title="data variables",
summarizer=summarize_datavar)


diff_attrs_repr = functools.partial(_diff_mapping_repr,
title="attributes",
summarizer=summarize_attr)


def _compat_to_str(compat):
if compat == "equals":
return "equal"
else:
return compat


def diff_array_repr(a, b, compat):
# used for DataArray, Variable and IndexVariable
summary = ["Left and right {} objects are not {}"
.format(type(a).__name__, _compat_to_str(compat))]

if not array_equiv(a.data, b.data):
temp = [wrap_indent(short_array_repr(obj), start=' ')
for obj in (a, b)]
diff_data_repr = [ab_side + "\n" + ab_data_repr
for ab_side, ab_data_repr in zip(('L', 'R'), temp)]
summary += ["Differing values:"] + diff_data_repr

if hasattr(a, 'coords'):
col_width = _calculate_col_width(set(a.coords) | set(b.coords))
summary.append(diff_coords_repr(a.coords, b.coords, compat,
col_width=col_width))

if compat == 'identical':
summary.append(diff_attrs_repr(a.attrs, b.attrs, compat))

return "\n".join(summary)


def diff_dataset_repr(a, b, compat):
summary = ["Left and right {} objects are not {}"
.format(type(a).__name__, _compat_to_str(compat))]

col_width = _calculate_col_width(
set(_get_col_items(a.variables) + _get_col_items(b.variables)))

summary.append(diff_dim_summary(a, b))
summary.append(diff_coords_repr(a.coords, b.coords, compat,
col_width=col_width))
summary.append(diff_data_vars_repr(a.data_vars, b.data_vars, compat,
col_width=col_width))

if compat == 'identical':
summary.append(diff_attrs_repr(a.attrs, b.attrs, compat))

return "\n".join(summary)
17 changes: 12 additions & 5 deletions xarray/testing.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
"""Testing functions exposed to the user API"""
from __future__ import absolute_import, division, print_function

from io import StringIO

import numpy as np

from xarray.core import duck_array_ops
from xarray.core import formatting


def _decode_string_data(data):
Expand Down Expand Up @@ -49,8 +52,10 @@ def assert_equal(a, b):
import xarray as xr
__tracebackhide__ = True # noqa: F841
assert type(a) == type(b) # noqa
if isinstance(a, (xr.Variable, xr.DataArray, xr.Dataset)):
assert a.equals(b), '{}\n{}'.format(a, b)
if isinstance(a, (xr.Variable, xr.DataArray)):
assert a.equals(b), formatting.diff_array_repr(a, b, 'equals')
elif isinstance(a, xr.Dataset):
assert a.equals(b), formatting.diff_dataset_repr(a, b, 'equals')
else:
raise TypeError('{} not supported by assertion comparison'
.format(type(a)))
Expand All @@ -76,11 +81,13 @@ def assert_identical(a, b):
import xarray as xr
__tracebackhide__ = True # noqa: F841
assert type(a) == type(b) # noqa
if isinstance(a, xr.DataArray):
if isinstance(a, xr.Variable):
assert a.identical(b), formatting.diff_array_repr(a, b, 'identical')
elif isinstance(a, xr.DataArray):
assert a.name == b.name
assert_identical(a._to_temp_dataset(), b._to_temp_dataset())
assert a.identical(b), formatting.diff_array_repr(a, b, 'identical')
elif isinstance(a, (xr.Dataset, xr.Variable)):
assert a.identical(b), '{}\n{}'.format(a, b)
assert a.identical(b), formatting.diff_dataset_repr(a, b, 'identical')
else:
raise TypeError('{} not supported by assertion comparison'
.format(type(a)))
Expand Down
94 changes: 94 additions & 0 deletions xarray/tests/test_formatting.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function

from textwrap import dedent

import numpy as np
import pandas as pd

import xarray as xr
from xarray.core import formatting
from xarray.core.pycompat import PY3

Expand Down Expand Up @@ -190,6 +193,97 @@ def test_attribute_repr(self):
assert u'\n' not in newlines
assert u'\t' not in tabs

def test_diff_array_repr(self):
da_a = xr.DataArray([[1, 2, 3], [4, 5, 7]],
dims=('x', 'y'),
coords={'x': ['a', 'b'], 'y': [1, 2, 3]},
attrs={'units': 'm', 'description': 'desc'})

da_b = xr.DataArray([1, 2],
dims='x',
coords={'x': ['a', 'c'], 'label': ('x', [1, 2])},
attrs={'units': 'kg'})

expected = dedent("""\
Left and right DataArray objects are not identical
Differing values:
L
array([[1, 2, 3],
[4, 5, 7]])
R
array([1, 2])
Differing coordinates:
L * x (x) <U1 'a' 'b'
R * x (x) <U1 'a' 'c'
Left contains more coordinates:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Left contains more coordinates" sounds a little funny to me.

Maybe "Coordinates on the left DataArray but not the right" or "Coordinates only on the left object"?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed! I followed pytest's reports too blindly here.

* y (y) int64 1 2 3
Right contains more coordinates:
label (x) int64 1 2
Differing attributes:
L units: m
R units: kg
Left contains more attributes:
description: desc""")

actual = formatting.diff_array_repr(da_a, da_b, 'identical')
assert actual == expected

va = xr.Variable('x', [1, 2, 3], {'title': 'test Variable'})
vb = xr.Variable(('x', 'y'), [[1, 2, 3], [4, 5, 6]])

expected = dedent("""\
Left and right Variable objects are not equal
Differing values:
L
array([1, 2, 3])
R
array([[1, 2, 3],
[4, 5, 6]])""")

actual = formatting.diff_array_repr(va, vb, 'equals')
assert actual == expected

def test_diff_dataset_repr(self):
ds_a = xr.Dataset(
data_vars={'var1': (('x', 'y'), [[1, 2, 3], [4, 5, 7]]),
'var2': ('x', [3, 4])},
coords={'x': ['a', 'b'], 'y': [1, 2, 3]},
attrs={'units': 'm', 'description': 'desc'}
)

ds_b = xr.Dataset(
data_vars={'var1': ('x', [1, 2])},
coords={'x': ('x', ['a', 'c'], {'source': 0}),
'label': ('x', [1, 2])},
attrs={'units': 'kg'}
)

expected = dedent("""\
Left and right Dataset objects are not identical
Differing dimensions:
(x: 2, y: 3) != (x: 2)
Differing coordinates:
L * x (x) <U1 'a' 'b'
R * x (x) <U1 'a' 'c'
source: 0
Left contains more coordinates:
* y (y) int64 1 2 3
Right contains more coordinates:
label (x) int64 1 2
Differing data variables:
L var1 (x, y) int64 1 2 3 4 5 7
R var1 (x) int64 1 2
Left contains more data variables:
var2 (x) int64 3 4
Differing attributes:
L units: m
R units: kg
Left contains more attributes:
description: desc""")

actual = formatting.diff_dataset_repr(ds_a, ds_b, 'identical')
assert actual == expected


def test_set_numpy_options():
original_options = np.get_printoptions()
Expand Down