diff --git a/.github/workflows/ci_tests.yaml b/.github/workflows/ci_tests.yaml index 9d30c785369..05ac0d3008d 100644 --- a/.github/workflows/ci_tests.yaml +++ b/.github/workflows/ci_tests.yaml @@ -71,7 +71,7 @@ jobs: optional-packages: '' - python-version: '3.12' numpy-version: '1.26' - optional-packages: ' contextily geopandas ipython rioxarray sphinx-gallery' + optional-packages: ' contextily geopandas ipython pyarrow rioxarray sphinx-gallery' timeout-minutes: 30 defaults: diff --git a/.github/workflows/ci_tests_dev.yaml b/.github/workflows/ci_tests_dev.yaml index 4606092f9f4..1123dd83367 100644 --- a/.github/workflows/ci_tests_dev.yaml +++ b/.github/workflows/ci_tests_dev.yaml @@ -153,7 +153,7 @@ jobs: python -m pip install --pre --prefer-binary \ --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \ numpy pandas xarray netCDF4 packaging \ - build contextily dvc geopandas ipython rioxarray \ + build contextily dvc geopandas ipython pyarrow rioxarray \ 'pytest>=6.0' pytest-cov pytest-doctestplus pytest-mpl \ sphinx-gallery diff --git a/.github/workflows/ci_tests_legacy.yaml b/.github/workflows/ci_tests_legacy.yaml index 73f242f4f92..f7a18f548dc 100644 --- a/.github/workflows/ci_tests_legacy.yaml +++ b/.github/workflows/ci_tests_legacy.yaml @@ -72,6 +72,7 @@ jobs: contextily geopandas ipython + pyarrow rioxarray sphinx-gallery build diff --git a/doc/install.rst b/doc/install.rst index 500128f9775..f3594e52521 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -107,6 +107,15 @@ The following are optional dependencies: * `GeoPandas `__: For using and plotting GeoDataFrame objects. * `RioXarray `__: For saving multi-band rasters to GeoTIFFs. +.. note:: + + If you have `PyArrow `__ + installed, PyGMT does have some initial support for ``pandas.Series`` and + ``pandas.DataFrame`` objects with Apache Arrow-backed arrays. Specifically, + only uint/int/float dtypes are supported for now. Support for datetime and + string Arrow dtypes are still working in progress. For more details, see + `issue #2800 `__. + Installing GMT and other dependencies ------------------------------------- diff --git a/pygmt/tests/test_clib_virtualfiles.py b/pygmt/tests/test_clib_virtualfiles.py index 984ec187e87..133f154d081 100644 --- a/pygmt/tests/test_clib_virtualfiles.py +++ b/pygmt/tests/test_clib_virtualfiles.py @@ -2,6 +2,7 @@ Test the C API functions related to virtual files. """ import os +from importlib.util import find_spec from itertools import product import numpy as np @@ -321,16 +322,21 @@ def test_virtualfile_from_matrix_slice(dtypes): def test_virtualfile_from_vectors_pandas(dtypes): """ - Pass vectors to a dataset using pandas Series. + Pass vectors to a dataset using pandas.Series, checking both numpy and + pyarrow dtypes. """ size = 13 + if find_spec("pyarrow") is not None: + dtypes.extend([f"{dtype}[pyarrow]" for dtype in dtypes]) + for dtype in dtypes: data = pd.DataFrame( data={ - "x": np.arange(size, dtype=dtype), - "y": np.arange(size, size * 2, 1, dtype=dtype), - "z": np.arange(size * 2, size * 3, 1, dtype=dtype), - } + "x": np.arange(size), + "y": np.arange(size, size * 2, 1), + "z": np.arange(size * 2, size * 3, 1), + }, + dtype=dtype, ) with clib.Session() as lib: with lib.virtualfile_from_vectors(data.x, data.y, data.z) as vfile: diff --git a/pygmt/tests/test_geopandas.py b/pygmt/tests/test_geopandas.py index 58e59b374d4..74d13cbd7fc 100644 --- a/pygmt/tests/test_geopandas.py +++ b/pygmt/tests/test_geopandas.py @@ -5,6 +5,7 @@ import pandas as pd import pytest from pygmt import Figure, info, makecpt, which +from pygmt.helpers.testing import skip_if_no gpd = pytest.importorskip("geopandas") shapely = pytest.importorskip("shapely") @@ -161,6 +162,24 @@ def test_geopandas_plot3d_non_default_circle(): "int64", pd.Int32Dtype(), pd.Int64Dtype(), + pytest.param( + "int32[pyarrow]", + marks=[ + skip_if_no(package="pyarrow"), + pytest.mark.xfail( + reason="geopandas doesn't support writing columns with pyarrow dtypes to OGR_GMT yet." + ), + ], + ), + pytest.param( + "int64[pyarrow]", + marks=[ + skip_if_no(package="pyarrow"), + pytest.mark.xfail( + reason="geopandas doesn't support writing columns with pyarrow dtypes to OGR_GMT yet." + ), + ], + ), ], ) @pytest.mark.mpl_image_compare(filename="test_geopandas_plot_int_dtypes.png") diff --git a/pygmt/tests/test_info.py b/pygmt/tests/test_info.py index 999965417a3..b1c247e5bd3 100644 --- a/pygmt/tests/test_info.py +++ b/pygmt/tests/test_info.py @@ -12,6 +12,7 @@ import xarray as xr from pygmt import info from pygmt.exceptions import GMTInvalidInput +from pygmt.helpers.testing import skip_if_no TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "data") POINTS_DATA = os.path.join(TEST_DATA_DIR, "points.txt") @@ -74,16 +75,27 @@ def test_info_2d_list(): assert output == expected_output -def test_info_series(): +@pytest.mark.parametrize( + "dtype", + ["int64", pytest.param("int64[pyarrow]", marks=skip_if_no(package="pyarrow"))], +) +def test_info_series(dtype): """ Make sure info works on a pandas.Series input. """ - output = info(pd.Series(data=[0, 4, 2, 8, 6])) + output = info(pd.Series(data=[0, 4, 2, 8, 6], dtype=dtype)) expected_output = ": N = 5 <0/8>\n" assert output == expected_output -def test_info_dataframe(): +@pytest.mark.parametrize( + "dtype", + [ + "float64", + pytest.param("float64[pyarrow]", marks=skip_if_no(package="pyarrow")), + ], +) +def test_info_dataframe(dtype): """ Make sure info works on pandas.DataFrame inputs. """