From 70fc9e42f1b76469173573f1430ff3acf7118a8c Mon Sep 17 00:00:00 2001 From: Dongdong Tian Date: Fri, 13 Oct 2023 22:08:43 +0800 Subject: [PATCH] Refactor the data_kind and the virtualfile_to_data functions --- pygmt/clib/session.py | 35 ++++---- pygmt/helpers/__init__.py | 1 + pygmt/helpers/utils.py | 166 +++++++++++++++++++------------------- pygmt/src/contour.py | 2 +- 4 files changed, 99 insertions(+), 105 deletions(-) diff --git a/pygmt/clib/session.py b/pygmt/clib/session.py index 8db686812c1..4e3ae6fe99b 100644 --- a/pygmt/clib/session.py +++ b/pygmt/clib/session.py @@ -32,6 +32,7 @@ fmt_docstring, tempfile_from_geojson, tempfile_from_image, + validate_data_input, ) FAMILIES = [ @@ -1474,11 +1475,8 @@ def virtualfile_from_data( self, check_kind=None, data=None, - x=None, - y=None, - z=None, - extra_arrays=None, - required_z=False, + vectors=None, + ncols=1, required_data=True, ): """ @@ -1497,13 +1495,11 @@ def virtualfile_from_data( Any raster or vector data format. This could be a file name or path, a raster grid, a vector matrix/arrays, or other supported data input. - x/y/z : 1-D arrays or None - x, y, and z columns as numpy arrays. - extra_arrays : list of 1-D arrays - Optional. A list of numpy arrays in addition to x, y, and z. - All of these arrays must be of the same size as the x/y/z arrays. - required_z : bool - State whether the 'z' column is required. + vectors : list of 1-D arrays or None + A list of 1-D arrays. Each array will be a column in the table. + All of these arrays must be of the same size. + ncols : int + The minimum number of columns required for the data. required_data : bool Set to True when 'data' is required, or False when dealing with optional virtual files. [Default is True]. @@ -1537,8 +1533,13 @@ def virtualfile_from_data( ... : N = 3 <7/9> <4/6> <1/3> """ - kind = data_kind( - data, x, y, z, required_z=required_z, required_data=required_data + kind = data_kind(data, required=required_data) + validate_data_input( + data=data, + vectors=vectors, + ncols=ncols, + required_data=required_data, + kind=kind, ) if check_kind: @@ -1579,11 +1580,7 @@ def virtualfile_from_data( warnings.warn(message=msg, category=RuntimeWarning, stacklevel=2) _data = (data,) if not isinstance(data, pathlib.PurePath) else (str(data),) elif kind == "vectors": - _data = [np.atleast_1d(x), np.atleast_1d(y)] - if z is not None: - _data.append(np.atleast_1d(z)) - if extra_arrays: - _data.extend(extra_arrays) + _data = [np.atleast_1d(v) for v in vectors] elif kind == "matrix": # turn 2-D arrays into list of vectors try: # pandas.Series will be handled below like a 1-D numpy.ndarray diff --git a/pygmt/helpers/__init__.py b/pygmt/helpers/__init__.py index 5eb265e8002..71bc0582252 100644 --- a/pygmt/helpers/__init__.py +++ b/pygmt/helpers/__init__.py @@ -20,4 +20,5 @@ is_nonstr_iter, launch_external_viewer, non_ascii_to_octal, + validate_data_input, ) diff --git a/pygmt/helpers/utils.py b/pygmt/helpers/utils.py index 31629a6ea52..d6b7ab8ce5a 100644 --- a/pygmt/helpers/utils.py +++ b/pygmt/helpers/utils.py @@ -15,113 +15,125 @@ from pygmt.exceptions import GMTInvalidInput -def _validate_data_input( - data=None, x=None, y=None, z=None, required_z=False, required_data=True, kind=None +def validate_data_input( + data=None, vectors=None, ncols=2, required_data=True, kind=None ): """ - Check if the combination of data/x/y/z is valid. + Check if the data input is valid. Examples -------- - >>> _validate_data_input(data="infile") - >>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6]) - >>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6], z=[7, 8, 9]) - >>> _validate_data_input(data=None, required_data=False) - >>> _validate_data_input() + >>> validate_data_input(data="infile") + >>> validate_data_input(vectors=[[1, 2, 3], [4, 5, 6]], ncols=2) + >>> validate_data_input(vectors=[[1, 2, 3], [4, 5, 6], [7, 8, 9]], ncols=3) + >>> validate_data_input(data=None, required_data=False) + >>> validate_data_input() Traceback (most recent call last): ... pygmt.exceptions.GMTInvalidInput: No input data provided. - >>> _validate_data_input(x=[1, 2, 3]) + >>> validate_data_input(vectors=[[1, 2, 3], None], ncols=2) Traceback (most recent call last): ... - pygmt.exceptions.GMTInvalidInput: Must provide both x and y. - >>> _validate_data_input(y=[4, 5, 6]) + pygmt.exceptions.GMTInvalidInput: The 'y' column can't be None. + >>> validate_data_input(vectors=[None, [4, 5, 6]], ncols=2) Traceback (most recent call last): ... - pygmt.exceptions.GMTInvalidInput: Must provide both x and y. - >>> _validate_data_input(x=[1, 2, 3], y=[4, 5, 6], required_z=True) + pygmt.exceptions.GMTInvalidInput: The 'x' column can't be None. + >>> validate_data_input(vectors=[[1, 2, 3], [4, 5, 6], None], ncols=3) Traceback (most recent call last): ... - pygmt.exceptions.GMTInvalidInput: Must provide x, y, and z. + pygmt.exceptions.GMTInvalidInput: The 'z' column can't be None. >>> import numpy as np >>> import pandas as pd >>> import xarray as xr >>> data = np.arange(8).reshape((4, 2)) - >>> _validate_data_input(data=data, required_z=True, kind="matrix") + >>> validate_data_input(data=data, ncols=3, kind="matrix") Traceback (most recent call last): ... - pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns. - >>> _validate_data_input( + pygmt.exceptions.GMTInvalidInput: data must have at least 3 columns. + >>> validate_data_input( ... data=pd.DataFrame(data, columns=["x", "y"]), - ... required_z=True, + ... ncols=3, ... kind="matrix", ... ) Traceback (most recent call last): ... - pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns. - >>> _validate_data_input( + pygmt.exceptions.GMTInvalidInput: data must have at least 3 columns. + >>> validate_data_input( ... data=xr.Dataset(pd.DataFrame(data, columns=["x", "y"])), - ... required_z=True, + ... ncols=3, ... kind="matrix", ... ) Traceback (most recent call last): ... - pygmt.exceptions.GMTInvalidInput: data must provide x, y, and z columns. - >>> _validate_data_input(data="infile", x=[1, 2, 3]) + pygmt.exceptions.GMTInvalidInput: data must have at least 3 columns. + >>> validate_data_input(data="infile", vectors=[[1, 2, 3], None]) Traceback (most recent call last): ... - pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z. - >>> _validate_data_input(data="infile", y=[4, 5, 6]) + pygmt.exceptions.GMTInvalidInput: Too much data. Pass in either 'data' or 1-D arrays. # noqa: W505 + >>> validate_data_input(data="infile", vectors=[None, [4, 5, 6]]) Traceback (most recent call last): ... - pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z. - >>> _validate_data_input(data="infile", z=[7, 8, 9]) + pygmt.exceptions.GMTInvalidInput: Too much data. Pass in either 'data' or 1-D arrays. # noqa: W505 + >>> validate_data_input(data="infile", vectors=[None, None, [7, 8, 9]]) Traceback (most recent call last): ... - pygmt.exceptions.GMTInvalidInput: Too much data. Use either data or x/y/z. + pygmt.exceptions.GMTInvalidInput: Too much data. Pass in either 'data' or 1-D arrays. # noqa: W505 Raises ------ GMTInvalidInput If the data input is not valid. """ - if data is None: # data is None - if x is None and y is None: # both x and y are None - if required_data: # data is not optional - raise GMTInvalidInput("No input data provided.") - elif x is None or y is None: # either x or y is None - raise GMTInvalidInput("Must provide both x and y.") - if required_z and z is None: # both x and y are not None, now check z - raise GMTInvalidInput("Must provide x, y, and z.") - else: # data is not None - if x is not None or y is not None or z is not None: - raise GMTInvalidInput("Too much data. Use either data or x/y/z.") - # For 'matrix' kind, check if data has the required z column - if kind == "matrix" and required_z: + if kind is None: + kind = data_kind(data=data, required=required_data) + + if kind == "vectors": # From data_kind, we know that data is None + if vectors is None: + raise GMTInvalidInput("No input data provided.") + if len(vectors) < ncols: + raise GMTInvalidInput( + f"Requires {ncols} 1-D arrays but got {len(vectors)}." + ) + for i, v in enumerate(vectors[:ncols]): + if v is None: + if i < 3: + msg = f"The '{'xyz'[i]}' column can't be None." + else: + msg = "Column {i} can't be None." + raise GMTInvalidInput(msg) + else: + if vectors is not None and any(v is not None for v in vectors): + raise GMTInvalidInput("Too much data. Pass in either 'data' or 1-D arrays.") + if kind == "matrix": # check number of columns for matrix-like data if hasattr(data, "shape"): # np.ndarray or pd.DataFrame - if len(data.shape) == 1 and data.shape[0] < 3: - raise GMTInvalidInput("data must provide x, y, and z columns.") - if len(data.shape) > 1 and data.shape[1] < 3: - raise GMTInvalidInput("data must provide x, y, and z columns.") - if hasattr(data, "data_vars") and len(data.data_vars) < 3: # xr.Dataset - raise GMTInvalidInput("data must provide x, y, and z columns.") + if len(data.shape) == 1 and data.shape[0] < ncols: + raise GMTInvalidInput(f"data must have at least {ncols} columns.") + if len(data.shape) > 1 and data.shape[1] < ncols: + raise GMTInvalidInput(f"data must have at least {ncols} columns.") + if hasattr(data, "data_vars") and len(data.data_vars) < ncols: # xr.Dataset + raise GMTInvalidInput(f"data must have at least {ncols} columns.") -def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data=True): +def data_kind(data=None, required=True): """ - Check what kind of data is provided to a module. + Determine the kind of data that will be passed to a module. - Possible types: + It checks the type of the ``data`` argument and determines the kind of + data. Falls back to ``"vectors"`` if ``data`` is None but required. - * a file name provided as 'data' - * a pathlib.PurePath object provided as 'data' - * an xarray.DataArray object provided as 'data' - * a 2-D matrix provided as 'data' - * 1-D arrays x and y (and z, optionally) - * an optional argument (None, bool, int or float) provided as 'data' + Possible data kinds: - Arguments should be ``None`` if not used. If doesn't fit any of these - categories (or fits more than one), will raise an exception. + - ``'file'``: a file name or a pathlib.PurePath object providfed as 'data' + - ``'arg'``: an optional argument (None, bool, int or float) provided + as 'data' + - ``'grid'``: an xarray.DataArray with 2 dimensions provided as 'data' + - ``'image'``: an xarray.DataArray with 3 dimensions provided as 'data' + - ``'geojson'``: a geo-like Python object that implements + ``__geo_interface__`` (geopandas.GeoDataFrame or shapely.geometry) + provided as 'data' + - ``'matrix'``: a 2-D array provided as 'data' + - ``'vectors'``: a list of 1-D arrays provided as 'vectors' Parameters ---------- @@ -129,13 +141,7 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data Pass in either a file name or :class:`pathlib.Path` to an ASCII data table, an :class:`xarray.DataArray`, a 1-D/2-D {table-classes} or an option argument. - x/y : 1-D arrays or None - x and y columns as numpy arrays. - z : 1-D array or None - z column as numpy array. To be used optionally when x and y are given. - required_z : bool - State whether the 'z' column is required. - required_data : bool + required : bool Set to True when 'data' is required, or False when dealing with optional virtual files. [Default is True]. @@ -151,29 +157,28 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data >>> import numpy as np >>> import xarray as xr >>> import pathlib - >>> data_kind(data=None, x=np.array([1, 2, 3]), y=np.array([4, 5, 6])) + >>> data_kind(data=None) 'vectors' - >>> data_kind(data=np.arange(10).reshape((5, 2)), x=None, y=None) + >>> data_kind(data=np.arange(10).reshape((5, 2))) 'matrix' - >>> data_kind(data="my-data-file.txt", x=None, y=None) + >>> data_kind(data="my-data-file.txt") 'file' - >>> data_kind(data=pathlib.Path("my-data-file.txt"), x=None, y=None) + >>> data_kind(data=pathlib.Path("my-data-file.txt")) 'file' - >>> data_kind(data=None, x=None, y=None, required_data=False) + >>> data_kind(data=None, required=False) 'arg' - >>> data_kind(data=2.0, x=None, y=None, required_data=False) + >>> data_kind(data=2.0, required=False) 'arg' - >>> data_kind(data=True, x=None, y=None, required_data=False) + >>> data_kind(data=True, required=False) 'arg' >>> data_kind(data=xr.DataArray(np.random.rand(4, 3))) 'grid' >>> data_kind(data=xr.DataArray(np.random.rand(3, 4, 5))) 'image' """ - # determine the data kind if isinstance(data, (str, pathlib.PurePath)): kind = "file" - elif isinstance(data, (bool, int, float)) or (data is None and not required_data): + elif isinstance(data, (bool, int, float)) or (data is None and not required): kind = "arg" elif isinstance(data, xr.DataArray): kind = "image" if len(data.dims) == 3 else "grid" @@ -181,19 +186,10 @@ def data_kind(data=None, x=None, y=None, z=None, required_z=False, required_data # geo-like Python object that implements ``__geo_interface__`` # (geopandas.GeoDataFrame or shapely.geometry) kind = "geojson" - elif data is not None: + elif data is not None: # anything but None is taken as a matrix kind = "matrix" - else: + else: # fallback to vectors if data is None but required kind = "vectors" - _validate_data_input( - data=data, - x=x, - y=y, - z=z, - required_z=required_z, - required_data=required_data, - kind=kind, - ) return kind diff --git a/pygmt/src/contour.py b/pygmt/src/contour.py index 6aaf22b7cd6..ac34dcb5d95 100644 --- a/pygmt/src/contour.py +++ b/pygmt/src/contour.py @@ -116,7 +116,7 @@ def contour(self, data=None, x=None, y=None, z=None, **kwargs): with Session() as lib: file_context = lib.virtualfile_from_data( - check_kind="vector", data=data, x=x, y=y, z=z, required_z=True + check_kind="vector", data=data, vectors=[x, y, z], ncols=3 ) with file_context as fname: lib.call_module(