Skip to content

Commit

Permalink
Refactor info to allow datetime inputs from xarray.Dataset and pandas…
Browse files Browse the repository at this point in the history
….DataFrame tables (#619)

Changes the backend mechanism of `info`
from using lib.virtualfile_from_matrix()
(which only supports single non-datetime dtypes)
to using lib.virtualfile_from_vectors()
(which supports datetime inputs as of #464).

* Refactor info to use virtualfile_from_vectors to support datetime inputs
* Test that xarray.Dataset inputs into pygmt.info works too
* Expect failures on test_info_*_time_column on GMT 6.1.1
* Document xarray.Datasets with 1D data_vars as allowed inputs to info

Co-authored-by: Dongdong Tian <[email protected]>
  • Loading branch information
weiji14 and seisman authored Sep 21, 2020
1 parent 9e0a868 commit a4d414a
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 11 deletions.
18 changes: 11 additions & 7 deletions pygmt/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,10 @@ def info(table, **kwargs):
Parameters
----------
table : pandas.DataFrame or np.ndarray or str
Either a pandas dataframe, a 1D/2D numpy.ndarray or a file name to an
ASCII data table.
table : str or np.ndarray or pandas.DataFrame or xarray.Dataset
Pass in either a file name to an ASCII data table, a 1D/2D numpy array,
a pandas dataframe, or an xarray dataset made up of 1D xarray.DataArray
data variables.
per_column : bool
Report the min/max values per column in separate columns.
spacing : str
Expand All @@ -107,10 +108,13 @@ def info(table, **kwargs):
if kind == "file":
file_context = dummy_context(table)
elif kind == "matrix":
_table = np.asanyarray(table)
if table.ndim == 1: # 1D arrays need to be 2D and transposed
_table = np.transpose(np.atleast_2d(_table))
file_context = lib.virtualfile_from_matrix(_table)
try:
# pandas.DataFrame and xarray.Dataset types
arrays = [array for _, array in table.items()]
except AttributeError:
# Python lists, tuples, and numpy ndarray types
arrays = np.atleast_2d(np.asanyarray(table).T)
file_context = lib.virtualfile_from_vectors(*arrays)
else:
raise GMTInvalidInput(f"Unrecognized data type: {type(table)}")

Expand Down
53 changes: 49 additions & 4 deletions pygmt/tests/test_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@
import pandas as pd
import pytest
import xarray as xr
from packaging.version import Version

from .. import info
from .. import clib, info
from ..exceptions import GMTInvalidInput

TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
POINTS_DATA = os.path.join(TEST_DATA_DIR, "points.txt")

with clib.Session() as _lib:
gmt_version = Version(_lib.info["version"])


def test_info():
"Make sure info works on file name inputs"
Expand All @@ -33,7 +37,48 @@ def test_info_dataframe():
table = pd.read_csv(POINTS_DATA, sep=" ", header=None)
output = info(table=table)
expected_output = (
"<matrix memory>: N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n"
"<vector memory>: N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n"
)
assert output == expected_output


@pytest.mark.xfail(
condition=gmt_version <= Version("6.1.1"),
reason="UNIX timestamps returned instead of ISO datetime, should work on GMT 6.2.0 "
"after https://github.com/GenericMappingTools/gmt/issues/4241 is resolved",
)
def test_info_pandas_dataframe_time_column():
"Make sure info works on pandas.DataFrame inputs with a time column"
table = pd.DataFrame(
data={
"z": [10, 13, 12, 15, 14],
"time": pd.date_range(start="2020-01-01", periods=5),
}
)
output = info(table=table)
expected_output = (
"<vector memory>: N = 5 <10/15> <2020-01-01T00:00:00/2020-01-05T00:00:00>\n"
)
assert output == expected_output


@pytest.mark.xfail(
condition=gmt_version <= Version("6.1.1"),
reason="UNIX timestamp returned instead of ISO datetime, should work on GMT 6.2.0 "
"after https://github.com/GenericMappingTools/gmt/issues/4241 is resolved",
)
def test_info_xarray_dataset_time_column():
"Make sure info works on xarray.Dataset 1D inputs with a time column"
table = xr.Dataset(
coords={"index": [0, 1, 2, 3, 4]},
data_vars={
"z": ("index", [10, 13, 12, 15, 14]),
"time": ("index", pd.date_range(start="2020-01-01", periods=5)),
},
)
output = info(table=table)
expected_output = (
"<vector memory>: N = 5 <10/15> <2020-01-01T00:00:00/2020-01-05T00:00:00>\n"
)
assert output == expected_output

Expand All @@ -43,15 +88,15 @@ def test_info_2d_array():
table = np.loadtxt(POINTS_DATA)
output = info(table=table)
expected_output = (
"<matrix memory>: N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n"
"<vector memory>: N = 20 <11.5309/61.7074> <-2.9289/7.8648> <0.1412/0.9338>\n"
)
assert output == expected_output


def test_info_1d_array():
"Make sure info works on 1D numpy.ndarray inputs"
output = info(table=np.arange(20))
expected_output = "<matrix memory>: N = 20 <0/19>\n"
expected_output = "<vector memory>: N = 20 <0/19>\n"
assert output == expected_output


Expand Down

0 comments on commit a4d414a

Please sign in to comment.