From a4d414ab07849fb8718204b2998c5d8a5ff57ac5 Mon Sep 17 00:00:00 2001 From: Wei Ji <23487320+weiji14@users.noreply.github.com> Date: Mon, 21 Sep 2020 13:02:15 +1200 Subject: [PATCH] Refactor info to allow datetime inputs from xarray.Dataset and pandas.DataFrame tables (#619) Changes the backend mechanism of `info` from using lib.virtualfile_from_matrix() (which only supports single non-datetime dtypes) to using lib.virtualfile_from_vectors() (which supports datetime inputs as of #464). * Refactor info to use virtualfile_from_vectors to support datetime inputs * Test that xarray.Dataset inputs into pygmt.info works too * Expect failures on test_info_*_time_column on GMT 6.1.1 * Document xarray.Datasets with 1D data_vars as allowed inputs to info Co-authored-by: Dongdong Tian --- pygmt/modules.py | 18 ++++++++------ pygmt/tests/test_info.py | 53 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 60 insertions(+), 11 deletions(-) diff --git a/pygmt/modules.py b/pygmt/modules.py index 1cacf67d6df..477bc99221b 100644 --- a/pygmt/modules.py +++ b/pygmt/modules.py @@ -78,9 +78,10 @@ def info(table, **kwargs): Parameters ---------- - table : pandas.DataFrame or np.ndarray or str - Either a pandas dataframe, a 1D/2D numpy.ndarray or a file name to an - ASCII data table. + table : str or np.ndarray or pandas.DataFrame or xarray.Dataset + Pass in either a file name to an ASCII data table, a 1D/2D numpy array, + a pandas dataframe, or an xarray dataset made up of 1D xarray.DataArray + data variables. per_column : bool Report the min/max values per column in separate columns. spacing : str @@ -107,10 +108,13 @@ def info(table, **kwargs): if kind == "file": file_context = dummy_context(table) elif kind == "matrix": - _table = np.asanyarray(table) - if table.ndim == 1: # 1D arrays need to be 2D and transposed - _table = np.transpose(np.atleast_2d(_table)) - file_context = lib.virtualfile_from_matrix(_table) + try: + # pandas.DataFrame and xarray.Dataset types + arrays = [array for _, array in table.items()] + except AttributeError: + # Python lists, tuples, and numpy ndarray types + arrays = np.atleast_2d(np.asanyarray(table).T) + file_context = lib.virtualfile_from_vectors(*arrays) else: raise GMTInvalidInput(f"Unrecognized data type: {type(table)}") diff --git a/pygmt/tests/test_info.py b/pygmt/tests/test_info.py index 142b56dce8e..92e7616adc6 100644 --- a/pygmt/tests/test_info.py +++ b/pygmt/tests/test_info.py @@ -8,13 +8,17 @@ import pandas as pd import pytest import xarray as xr +from packaging.version import Version -from .. import info +from .. import clib, info from ..exceptions import GMTInvalidInput TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data") POINTS_DATA = os.path.join(TEST_DATA_DIR, "points.txt") +with clib.Session() as _lib: + gmt_version = Version(_lib.info["version"]) + def test_info(): "Make sure info works on file name inputs" @@ -33,7 +37,48 @@ def test_info_dataframe(): table = pd.read_csv(POINTS_DATA, sep=" ", header=None) output = info(table=table) expected_output = ( - ": N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n" + ": N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n" + ) + assert output == expected_output + + +@pytest.mark.xfail( + condition=gmt_version <= Version("6.1.1"), + reason="UNIX timestamps returned instead of ISO datetime, should work on GMT 6.2.0 " + "after https://github.com/GenericMappingTools/gmt/issues/4241 is resolved", +) +def test_info_pandas_dataframe_time_column(): + "Make sure info works on pandas.DataFrame inputs with a time column" + table = pd.DataFrame( + data={ + "z": [10, 13, 12, 15, 14], + "time": pd.date_range(start="2020-01-01", periods=5), + } + ) + output = info(table=table) + expected_output = ( + ": N = 5 <10/15> <2020-01-01T00:00:00/2020-01-05T00:00:00>\n" + ) + assert output == expected_output + + +@pytest.mark.xfail( + condition=gmt_version <= Version("6.1.1"), + reason="UNIX timestamp returned instead of ISO datetime, should work on GMT 6.2.0 " + "after https://github.com/GenericMappingTools/gmt/issues/4241 is resolved", +) +def test_info_xarray_dataset_time_column(): + "Make sure info works on xarray.Dataset 1D inputs with a time column" + table = xr.Dataset( + coords={"index": [0, 1, 2, 3, 4]}, + data_vars={ + "z": ("index", [10, 13, 12, 15, 14]), + "time": ("index", pd.date_range(start="2020-01-01", periods=5)), + }, + ) + output = info(table=table) + expected_output = ( + ": N = 5 <10/15> <2020-01-01T00:00:00/2020-01-05T00:00:00>\n" ) assert output == expected_output @@ -43,7 +88,7 @@ def test_info_2d_array(): table = np.loadtxt(POINTS_DATA) output = info(table=table) expected_output = ( - ": N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n" + ": N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n" ) assert output == expected_output @@ -51,7 +96,7 @@ def test_info_2d_array(): def test_info_1d_array(): "Make sure info works on 1D numpy.ndarray inputs" output = info(table=np.arange(20)) - expected_output = ": N = 20 <0/19>\n" + expected_output = ": N = 20 <0/19>\n" assert output == expected_output