diff --git a/tardis/io/tests/test_HDFWriter.py b/tardis/io/tests/test_HDFWriter.py new file mode 100644 index 00000000000..a9031c79fe7 --- /dev/null +++ b/tardis/io/tests/test_HDFWriter.py @@ -0,0 +1,135 @@ +import os + +import numpy as np +import pandas as pd +import pandas.util.testing as pdt +import pytest +from astropy import units as u +from astropy.tests.helper import assert_quantity_allclose +from numpy.testing import assert_almost_equal, assert_array_almost_equal + +from tardis.io.util import HDFWriter + + +#Test Cases + +#DataFrame +#None +#Numpy Arrays +#Strings +#Numeric Values +#Pandas Series Object +#MultiIndex Object +#Quantity Objects with - Numeric Values, Numpy Arrays, DataFrame, Pandas Series, None objects + +class MockHDF(HDFWriter, object): + hdf_properties = ['property'] + class_properties = {} + + def __init__(self, property): + self.property = property + +simple_objects = [1.5, 'random_string', 4.2e7] + +@pytest.mark.parametrize("attr", simple_objects) +def test_simple_write(tmpdir, attr): + fname = str(tmpdir.mkdir('data').join('test.hdf')) + actual = MockHDF(attr) + actual.to_hdf(fname, path='test') + expected = pd.read_hdf(fname, key='/test/mock_hdf/scalars')['property'] + assert actual.property == expected + +mock_df = pd.DataFrame({'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']), + 'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}) +complex_objects = [np.array([4.0e14, 2, 2e14, 27.5]), + pd.Series([1., 2., 3.]), mock_df] + +@pytest.mark.parametrize("attr", complex_objects) +def test_complex_obj_write(tmpdir, attr): + fname = str(tmpdir.mkdir('data').join('test.hdf')) + actual = MockHDF(attr) + actual.to_hdf(fname, path='test') + expected = pd.read_hdf(fname, key='/test/mock_hdf/property').values + assert_array_almost_equal(actual.property, expected) + +arr = np.array([['L1', 'L1', 'L2', 'L2', 'L3', 'L3', 'L4', 'L4'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]) +mock_multiIndex = pd.MultiIndex.from_arrays(arr.transpose()) + +def test_MultiIndex_write(tmpdir): + fname = str(tmpdir.mkdir('data').join('test.hdf')) + actual = MockHDF(mock_multiIndex) + actual.to_hdf(fname, path='test') + expected = pd.read_hdf(fname, key='/test/mock_hdf/property') + expected = pd.MultiIndex.from_tuples(expected.unstack().values) + pdt.assert_almost_equal(actual.property, expected) + +#Test Quantity Objects + +quantity_objects = [np.array([4.0e14, 2, 2e14, 27.5]), mock_df] + +@pytest.mark.parametrize("attr", quantity_objects) +def test_quantity_objects_write(tmpdir, attr): + fname = str(tmpdir.mkdir('data').join('test.hdf')) + attr_quantity = u.Quantity(attr, 'g/cm**3') + actual = MockHDF(attr_quantity) + actual.to_hdf(fname, path='test') + expected = pd.read_hdf(fname, key='/test/mock_hdf/property') + assert_array_almost_equal(actual.property.cgs.value, expected) + +scalar_quantity_objects = [1.5, 4.2e7] + +@pytest.mark.parametrize("attr", scalar_quantity_objects) +def test_scalar_quantity_objects_write(tmpdir, attr): + fname = str(tmpdir.mkdir('data').join('test.hdf')) + attr_quantity = u.Quantity(attr, 'g/cm**3') + actual = MockHDF(attr_quantity) + actual.to_hdf(fname, path='test') + expected = pd.read_hdf(fname, key='/test/mock_hdf/scalars/')['property'] + assert_array_almost_equal(actual.property.cgs.value, expected) + +def test_none_write(tmpdir): + fname = str(tmpdir.mkdir('data').join('test.hdf')) + actual = MockHDF(None) + actual.to_hdf(fname, path='test') + expected = pd.read_hdf(fname, key='/test/mock_hdf/scalars/')['property'] + if expected == 'none': + expected = None + assert actual.property == expected + +# Test class_properties parameter (like homologous_density is a class +# instance/object inside Model class) + +class MockClass(HDFWriter, object): + hdf_properties = ['property', 'nested_object'] + class_properties = {'nested_object': MockHDF} + + def __init__(self, property, nested_object): + self.property = property + self.nested_object = nested_object + +@pytest.mark.parametrize("attr", quantity_objects) +def test_objects_write(tmpdir, attr): + fname = str(tmpdir.mkdir('data').join('test.hdf')) + nested_object = MockHDF(np.array([4.0e14, 2, 2e14, 27.5])) + attr_quantity = u.Quantity(attr, 'g/cm**3') + actual = MockClass(attr_quantity, nested_object) + actual.to_hdf(fname, path='test') + expected_property = pd.read_hdf(fname, key='/test/mock_class/property') + assert_array_almost_equal(actual.property.cgs.value, expected_property) + nested_property = pd.read_hdf( + fname, key='/test/mock_class/nested_object/property') + assert_array_almost_equal( + actual.nested_object.property, nested_property) + + +def test_snake_case(): + assert MockHDF.convert_to_snake_case( + "HomologousDensity") == "homologous_density" + assert MockHDF.convert_to_snake_case("TARDISSpectrum") == "tardis_spectrum" + assert MockHDF.convert_to_snake_case("BasePlasma") == "base_plasma" + assert MockHDF.convert_to_snake_case("LTEPlasma") == "lte_plasma" + assert MockHDF.convert_to_snake_case( + "MonteCarloRunner") == "monte_carlo_runner" + assert MockHDF.convert_to_snake_case( + "homologous_density") == "homologous_density" diff --git a/tardis/io/util.py b/tardis/io/util.py index 9582b12ce32..fb3ce0ffce2 100644 --- a/tardis/io/util.py +++ b/tardis/io/util.py @@ -1,6 +1,7 @@ #Utility functions for the IO part of TARDIS import os +import re import pandas as pd import numpy as np import collections @@ -166,6 +167,106 @@ def check_equality(item1, item2): return True +class HDFWriter(object): + + @staticmethod + def to_hdf_util(path_or_buf, path, elements, complevel=9, complib='blosc'): + """ + A function to uniformly store TARDIS data + to an HDF file. + + Scalars will be stored in a Series under path/scalars + 1D arrays will be stored under path/property_name as distinct Series + 2D arrays will be stored under path/property_name as distinct DataFrames + + Units will be stored as their CGS value + + Parameters + ---------- + path_or_buf: + Path or buffer to the HDF store + path: str + Path inside the HDF store to store the `elements` + elements: dict + A dict of property names and their values to be + stored. + + Returns + ------- + + """ + scalars = {} + for key, value in elements.iteritems(): + if value is None: + value = 'none' + if hasattr(value, 'cgs'): + value = value.cgs.value + if np.isscalar(value): + scalars[key] = value + elif hasattr(value, 'shape'): + if value.ndim == 1: + # This try,except block is only for model.plasma.levels + try: + pd.Series(value).to_hdf(path_or_buf, + os.path.join(path, key)) + except NotImplementedError: + pd.DataFrame(value).to_hdf(path_or_buf, + os.path.join(path, key)) + else: + pd.DataFrame(value).to_hdf( + path_or_buf, os.path.join(path, key)) + else: + try: + value.to_hdf(path_or_buf, path, name=key) + except AttributeError: + data = pd.DataFrame([value]) + data.to_hdf(path_or_buf, os.path.join(path, key)) + + if scalars: + scalars_series = pd.Series(scalars) + + # Unfortunately, with to_hdf we cannot append, so merge beforehand + scalars_path = os.path.join(path, 'scalars') + with pd.HDFStore(path_or_buf, complevel=complevel, complib=complib) as store: + if scalars_path in store: + scalars_series = store[scalars_path].append(scalars_series) + scalars_series.to_hdf(path_or_buf, os.path.join(path, 'scalars')) + + def get_properties(self): + data = {name: getattr(self, name) for name in self.hdf_properties} + return data + + @staticmethod + def convert_to_snake_case(s): + s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', s) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + + def to_hdf(self, file_path, path='', name=None): + """ + Parameters + ---------- + file_path: str + Path or buffer to the HDF store + path: str + Path inside the HDF store to store the `elements` + name: str + Group inside the HDF store to which the `elements` need to be saved + + Returns + ------- + + """ + if name is None: + try: + name = self.hdf_name + except AttributeError: + name = self.convert_to_snake_case(self.__class__.__name__) + + data = self.get_properties() + buff_path = os.path.join(path, name) + self.to_hdf_util(file_path, buff_path, data) + +#Deprecated def to_hdf(path_or_buf, path, elements, complevel=9, complib='blosc'): """ A function to uniformly store TARDIS data