GenericMappingTools · seisman · Dec 2, 2024 · Dec 5, 2024 · Dec 5, 2024 · Dec 5, 2024
diff --git a/doc/api/index.rst b/doc/api/index.rst
@@ -172,6 +172,7 @@ Input/output
     :toctree: generated
 
     load_dataarray
+    read
 
 GMT Defaults
 ------------

diff --git a/pygmt/__init__.py b/pygmt/__init__.py
@@ -54,6 +54,7 @@
     makecpt,
     nearneighbor,
     project,
+    read,
     select,
     sph2grd,
     sphdistance,

diff --git a/pygmt/datasets/samples.py b/pygmt/datasets/samples.py
@@ -8,8 +8,7 @@
 import pandas as pd
 import xarray as xr
 from pygmt.exceptions import GMTInvalidInput
-from pygmt.io import load_dataarray
-from pygmt.src import which
+from pygmt.src import read, which
 
 
 def _load_japan_quakes() -> pd.DataFrame:
@@ -203,8 +202,7 @@ def _load_earth_relief_holes() -> xr.DataArray:
         The Earth relief grid. Coordinates are latitude and longitude in degrees. Relief
         is in meters.
     """
-    fname = which("@earth_relief_20m_holes.grd", download="c")
-    return load_dataarray(fname, engine="netcdf4")
+    return read("@earth_relief_20m_holes.grd", kind="grid")  # type: ignore[return-value]
 
 
 class GMTSampleData(NamedTuple):

diff --git a/pygmt/helpers/testing.py b/pygmt/helpers/testing.py
@@ -7,9 +7,9 @@
 import string
 from pathlib import Path
 
+import xarray as xr
 from pygmt.exceptions import GMTImageComparisonFailure
-from pygmt.io import load_dataarray
-from pygmt.src import which
+from pygmt.src import read
 
 
 def check_figures_equal(*, extensions=("png",), tol=0.0, result_dir="result_images"):
@@ -144,17 +144,16 @@ def wrapper(*args, ext="png", request=None, **kwargs):
     return decorator
 
 
-def load_static_earth_relief():
+def load_static_earth_relief() -> xr.DataArray:
     """
-    Load the static_earth_relief file for internal testing.
+    Load the static_earth_relief.nc file for internal testing.
 
     Returns
     -------
-    data : xarray.DataArray
+    data
         A grid of Earth relief for internal tests.
     """
-    fname = which("@static_earth_relief.nc", download="c")
-    return load_dataarray(fname)
+    return read("@static_earth_relief.nc", kind="grid")  # type: ignore[return-value]
 
 
 def skip_if_no(package):

diff --git a/pygmt/src/__init__.py b/pygmt/src/__init__.py
@@ -41,6 +41,7 @@
 from pygmt.src.plot3d import plot3d
 from pygmt.src.project import project
 from pygmt.src.psconvert import psconvert
+from pygmt.src.read import read
 from pygmt.src.rose import rose
 from pygmt.src.select import select
 from pygmt.src.shift_origin import shift_origin

diff --git a/pygmt/src/read.py b/pygmt/src/read.py
@@ -0,0 +1,118 @@
+"""
+Read a file into an appropriate object.
+"""
+
+from collections.abc import Mapping, Sequence
+from pathlib import PurePath
+from typing import Any, Literal
+
+import pandas as pd
+import xarray as xr
+from pygmt.clib import Session
+from pygmt.helpers import build_arg_list, is_nonstr_iter
+from pygmt.src.which import which
+
+
+def read(
+    file: str | PurePath,
+    kind: Literal["dataset", "grid", "image"],
+    region: Sequence[float] | str | None = None,
+    header: int | None = None,
+    column_names: pd.Index | None = None,
+    dtype: type | Mapping[Any, type] | None = None,
+    index_col: str | int | None = None,
+) -> pd.DataFrame | xr.DataArray:
+    """
+    Read a dataset, grid, or image from a file and return the appropriate object.
+
+    The returned object is a :class:`pandas.DataFrame` for datasets, and
+    :class:`xarray.DataArray` for grids and images.
+
+    For datasets, keyword arguments ``column_names``, ``header``, ``dtype``, and
+    ``index_col`` are supported.
+
+    Parameters
+    ----------
+    file
+        The file name to read.
+    kind
+        The kind of data to read. Valid values are ``"dataset"``, ``"grid"``, and
+        ``"image"``.
+    region
+        The region of interest. Only data within this region will be read.
+    column_names
+        A list of column names.
+    header
+        Row number containing column names. ``header=None`` means not to parse the
+        column names from table header. Ignored if the row number is larger than the
-        column names from table header. Ignored if the row number is larger than the
+        column names from the table header. Ignored if the row number is larger than the
-        column names from table header. Ignored if the row number is larger than the
+        column names from the table header. Ignored if the row number is larger than the
+        number of headers in the table.
-        number of headers in the table.
+        number of header lines in the table.
-        number of headers in the table.
+        number of header lines in the table.
+    dtype
+        Data type. Can be a single type for all columns or a dictionary mapping
+        column names to types.
+    index_col
+        Column to set as index.
+
+    Returns
+    -------
+    data
+        Return type depends on the ``kind`` argument:
+
+        - ``"dataset"``: :class:`pandas.DataFrame`
+        - ``"grid"`` or ``"image"``: :class:`xarray.DataArray`
+
+
+    Examples
+    --------
+    Read a dataset into a :class:`pandas.DataFrame` object:
+
+    >>> from pygmt import read
+    >>> df = read("@hotspots.txt", kind="dataset")
+    >>> type(df)
+    <class 'pandas.core.frame.DataFrame'>
+
+    Read a grid into an :class:`xarray.DataArray` object:
+
+    >>> dataarray = read("@earth_relief_01d", kind="grid")
+    >>> type(dataarray)
+    <class 'xarray.core.dataarray.DataArray'>
+    """
+    if kind not in {"dataset", "grid", "image"}:
+        msg = f"Invalid kind {kind}: must be one of 'dataset', 'grid', or 'image'."
+        raise ValueError(msg)
+
+    if kind != "dataset" and any(
+        v is not None for v in [column_names, header, dtype, index_col]
+    ):
+        msg = (
+            "Only the 'dataset' kind supports the 'column_names', 'header', "
+            "'dtype', and 'index_col' arguments."
+        )
+        raise ValueError(msg)
+
+    kwdict = {
+        "R": "/".join(f"{v}" for v in region) if is_nonstr_iter(region) else region,  # type: ignore[union-attr]
+        "T": {"dataset": "d", "grid": "g", "image": "i"}[kind],
+    }
+
+    with Session() as lib:
+        with lib.virtualfile_out(kind=kind) as voutfile:
+            lib.call_module("read", args=[file, voutfile, *build_arg_list(kwdict)])
+
+        match kind:
+            case "dataset":
+                return lib.virtualfile_to_dataset(
+                    vfname=voutfile,
+                    column_names=column_names,
+                    header=header,
+                    dtype=dtype,
+                    index_col=index_col,
+                )
+            case "grid" | "image":
+                raster = lib.virtualfile_to_raster(vfname=voutfile, kind=kind)
+                # Add "source" encoding
+                source = which(fname=file)
+                raster.encoding["source"] = (
+                    source[0] if isinstance(source, list) else source
+                )
+                _ = raster.gmt  # Load GMTDataArray accessor information
+                return raster
diff --git a/pygmt/tests/test_datatypes_dataset.py b/pygmt/tests/test_datatypes_dataset.py
@@ -6,8 +6,7 @@
 
 import pandas as pd
 import pytest
-from pygmt import which
-from pygmt.clib import Session
+from pygmt import read, which
 from pygmt.helpers import GMTTempFile
 
 
@@ -44,11 +43,7 @@ def dataframe_from_gmt(fname, **kwargs):
     """
     Read tabular data as pandas.DataFrame using GMT virtual file.
     """
-    with Session() as lib:
-        with lib.virtualfile_out(kind="dataset") as vouttbl:
-            lib.call_module("read", [fname, vouttbl, "-Td"])
-            df = lib.virtualfile_to_dataset(vfname=vouttbl, **kwargs)
-            return df
+    return read(fname, kind="dataset", **kwargs)
 
 
 @pytest.mark.benchmark

diff --git a/pygmt/tests/test_read.py b/pygmt/tests/test_read.py
@@ -0,0 +1,28 @@
+"""
+Test the read function.
+"""
+
+import pytest
+from pygmt import read
+
+
+def test_read_invalid_kind():
+    """
+    Test that an invalid kind raises a ValueError.
+    """
+    with pytest.raises(ValueError, match="Invalid kind"):
+        read("file.cpt", kind="cpt")
+
+
+def test_read_invalid_arguments():
+    """
+    Test that invalid arguments raise a ValueError for non-'dataset' kind.
+    """
+    with pytest.raises(ValueError, match="Only the 'dataset' kind supports"):
+        read("file.nc", kind="grid", column_names="foo")
+
+    with pytest.raises(ValueError, match="Only the 'dataset' kind supports"):
+        read("file.nc", kind="grid", header=1)
+
+    with pytest.raises(ValueError, match="Only the 'dataset' kind supports"):
+        read("file.nc", kind="grid", dtype="float")
-Original file line number
+Diff line change
@@ Expand Up / @@ -172,6 +172,7 @@ Input/output @@
         :toctree: generated
         load_dataarray
+        read
     GMT Defaults
     ------------
@@ Expand Down @@