Skip to content

Commit

Permalink
**Breaking**: data_kind: Now 'matrix' represents a 2-D numpy array an…
Browse files Browse the repository at this point in the history
…d unrecognized data types fall back to 'vectors' (#3351)

Co-authored-by: Wei Ji <[email protected]>
  • Loading branch information
seisman and weiji14 authored Oct 16, 2024
1 parent 8a30c8e commit a5c0aa2
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 36 deletions.
37 changes: 19 additions & 18 deletions pygmt/clib/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -1787,10 +1787,7 @@ def virtualfile_in( # noqa: PLR0912
"grid": self.virtualfile_from_grid,
"image": tempfile_from_image,
"stringio": self.virtualfile_from_stringio,
# Note: virtualfile_from_matrix is not used because a matrix can be
# converted to vectors instead, and using vectors allows for better
# handling of string type inputs (e.g. for datetime data types)
"matrix": self.virtualfile_from_vectors,
"matrix": self.virtualfile_from_matrix,
"vectors": self.virtualfile_from_vectors,
}[kind]

Expand All @@ -1807,29 +1804,33 @@ def virtualfile_in( # noqa: PLR0912
warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2)
_data = (data,) if not isinstance(data, pathlib.PurePath) else (str(data),)
elif kind == "vectors":
_data = [x, y]
if z is not None:
_data.append(z)
if extra_arrays:
_data.extend(extra_arrays)
elif kind == "matrix": # turn 2-D arrays into list of vectors
if hasattr(data, "items") and not hasattr(data, "to_frame"):
if data is None:
# data is None, so data must be given via x/y/z.
_data = [x, y]
if z is not None:
_data.append(z)
if extra_arrays:
_data.extend(extra_arrays)
elif hasattr(data, "items") and not hasattr(data, "to_frame"):
# pandas.DataFrame or xarray.Dataset types.
# pandas.Series will be handled below like a 1-D numpy.ndarray.
_data = [array for _, array in data.items()]
elif hasattr(data, "ndim") and data.ndim == 2 and data.dtype.kind in "iuf":
# Just use virtualfile_from_matrix for 2-D numpy.ndarray
# which are signed integer (i), unsigned integer (u) or
# floating point (f) types
_virtualfile_from = self.virtualfile_from_matrix
_data = (data,)
else:
# Python list, tuple, numpy.ndarray, and pandas.Series types
_data = np.atleast_2d(np.asanyarray(data).T)
elif kind == "matrix":
# GMT can only accept a 2-D matrix which are signed integer (i), unsigned
# integer (u) or floating point (f) types. For other data types, we need to
# use virtualfile_from_vectors instead, which turns the matrix into a list
# of vectors and allows for better handling of non-integer/float type inputs
# (e.g. for string or datetime data types).
_data = (data,)
if data.dtype.kind not in "iuf":
_virtualfile_from = self.virtualfile_from_vectors
_data = data.T

# Finally create the virtualfile from the data, to be passed into GMT
file_context = _virtualfile_from(*_data)

return file_context

def virtualfile_from_data(
Expand Down
37 changes: 22 additions & 15 deletions pygmt/helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,12 @@ def data_kind(
- ``"grid"``: a :class:`xarray.DataArray` object that is not 3-D
- ``"image"``: a 3-D :class:`xarray.DataArray` object
- ``"stringio"``: a :class:`io.StringIO` object
- ``"matrix"``: anything else that is not ``None``
- ``"vectors"``: ``data`` is ``None`` and ``required=True``
- ``"matrix"``: a 2-D array-like object that implements ``__array_interface__``
(e.g., :class:`numpy.ndarray`)
- ``"vectors"``: ``data`` is ``None`` and ``required=True``, or any unrecognized
data. Common data types include, a :class:`pandas.DataFrame` object, a dictionary
with array-like values, a 1-D/3-D :class:`numpy.ndarray` object, or array-like
objects.
Parameters
----------
Expand Down Expand Up @@ -268,27 +272,27 @@ def data_kind(
The "matrix"`` kind:
>>> data_kind(data=np.arange(10)) # 1-D numpy.ndarray
'matrix'
>>> data_kind(data=np.arange(10).reshape((5, 2))) # 2-D numpy.ndarray
'matrix'
The "vectors" kind:
>>> data_kind(data=np.arange(10)) # 1-D numpy.ndarray
'vectors'
>>> data_kind(data=np.arange(60).reshape((3, 4, 5))) # 3-D numpy.ndarray
'matrix'
'vectors'
>>> data_kind(xr.DataArray(np.arange(12), name="x").to_dataset()) # xarray.Dataset
'matrix'
'vectors'
>>> data_kind(data=[1, 2, 3]) # 1-D sequence
'matrix'
'vectors'
>>> data_kind(data=[[1, 2, 3], [4, 5, 6]]) # sequence of sequences
'matrix'
'vectors'
>>> data_kind(data={"x": [1, 2, 3], "y": [4, 5, 6]}) # dictionary
'matrix'
'vectors'
>>> data_kind(data=pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})) # pd.DataFrame
'matrix'
'vectors'
>>> data_kind(data=pd.Series([1, 2, 3], name="x")) # pd.Series
'matrix'
The "vectors" kind:
'vectors'
>>> data_kind(data=None)
'vectors'
"""
Expand All @@ -312,7 +316,10 @@ def data_kind(
# geopandas.GeoDataFrame or shapely.geometry).
# Reference: https://gist.github.com/sgillies/2217756
kind = "geojson"
case x if x is not None: # Any not-None is considered as a matrix.
case x if hasattr(x, "__array_interface__") and data.ndim == 2:
# 2-D Array-like objects that implements ``__array_interface__`` (e.g.,
# numpy.ndarray).
# Reference: https://numpy.org/doc/stable/reference/arrays.interface.html
kind = "matrix"
case _: # Fall back to "vectors" if data is None and required=True.
kind = "vectors"
Expand Down
2 changes: 1 addition & 1 deletion pygmt/src/legend.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def legend(
kwargs["F"] = box

kind = data_kind(spec)
if kind not in {"vectors", "file", "stringio"}: # kind="vectors" means spec is None
if spec is not None and kind not in {"file", "stringio"}:
raise GMTInvalidInput(f"Unrecognized data type: {type(spec)}")
if kind == "file" and is_nonstr_iter(spec):
raise GMTInvalidInput("Only one legend specification file is allowed.")
Expand Down
2 changes: 1 addition & 1 deletion pygmt/src/x2sys_cross.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def x2sys_cross(
match data_kind(track):
case "file":
file_contexts.append(contextlib.nullcontext(track))
case "matrix":
case "vectors":
# find suffix (-E) of trackfiles used (e.g. xyz, csv, etc) from
# $X2SYS_HOME/TAGNAME/TAGNAME.tag file
tagfile = Path(
Expand Down
28 changes: 27 additions & 1 deletion pygmt/tests/test_clib_virtualfile_in.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
import pandas as pd
import pytest
import xarray as xr
from packaging.version import Version
from pygmt import clib
from pygmt.clib import __gmt_version__
from pygmt.exceptions import GMTInvalidInput
from pygmt.helpers import GMTTempFile
from pygmt.helpers import GMTTempFile, data_kind

POINTS_DATA = Path(__file__).parent / "data" / "points.txt"

Expand Down Expand Up @@ -101,3 +103,27 @@ def test_virtualfile_in_fail_non_valid_data(data):
z=data[:, 2],
data=data,
)


@pytest.mark.xfail(
condition=Version(__gmt_version__) <= Version("6.5.0"),
reason="Upstream bug fixed in https://github.com/GenericMappingTools/gmt/pull/8600",
)
def test_virtualfile_in_matrix_string_dtype():
"""
Pass a string dtype matrix should work and the matrix should be passed via a series
of vectors.
"""
data = np.array([["11:30W", "30:30S"], ["12:30W", "30:00S"]])
assert data_kind(data) == "matrix" # data is recognized as "matrix" kind
assert data.dtype.type == np.str_
assert data.dtype.kind not in "iuf" # dtype is not in numeric dtypes

with clib.Session() as lib:
with lib.virtualfile_in(data=data) as vintbl:
with GMTTempFile() as outfile:
lib.call_module("info", [vintbl, "-C", f"->{outfile.name}"])
output = outfile.read(keep_tabs=False)
assert output == "347.5 348.5 -30.5 -30\n"
# Should check that lib.virtualfile_from_vectors is called once,
# not lib.virtualfile_from_matrix, but it's technically complicated.

0 comments on commit a5c0aa2

Please sign in to comment.