Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pyarrow: Support date32[day] and date64[ms] dtypes in pandas objects #2845

Merged
merged 9 commits into from
Dec 17, 2023
1 change: 1 addition & 0 deletions .github/workflows/ci_doctests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ jobs:
contextily
geopandas
ipython
pyarrow
rioxarray
build
make
Expand Down
4 changes: 2 additions & 2 deletions doc/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ The following are optional dependencies:
If you have `PyArrow <https://arrow.apache.org/docs/python/index.html>`__
installed, PyGMT does have some initial support for ``pandas.Series`` and
``pandas.DataFrame`` objects with Apache Arrow-backed arrays. Specifically,
only uint/int/float dtypes are supported for now. Support for datetime and
string Arrow dtypes are still working in progress. For more details, see
only uint/int/float and date32/date64 dtypes are supported for now. Support
for string Arrow dtypes is still a work in progress. For more details, see
`issue #2800 <https://github.com/GenericMappingTools/pygmt/issues/2800>`__.

Installing GMT and other dependencies
Expand Down
34 changes: 33 additions & 1 deletion pygmt/clib/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,43 @@
True
>>> all(isinstance(i, np.ndarray) for i in arrays)
True

>>> data = [[1, 2], (3, 4), range(5, 7)]
>>> all(isinstance(i, np.ndarray) for i in vectors_to_arrays(data))
True

>>> import datetime
>>> import pytest
>>> pa = pytest.importorskip("pyarrow")
>>> vectors = [
... pd.Series(
... data=[datetime.date(2020, 1, 1), datetime.date(2021, 12, 31)],
... dtype="date32[day][pyarrow]",
... ),
... pd.Series(
... data=[datetime.date(2022, 1, 1), datetime.date(2023, 12, 31)],
... dtype="date64[ms][pyarrow]",
... ),
... ]
>>> arrays = vectors_to_arrays(vectors)
>>> all(a.flags.c_contiguous for a in arrays)
True
>>> all(isinstance(a, np.ndarray) for a in arrays)
True
>>> all(isinstance(a.dtype, np.dtypes.DateTime64DType) for a in arrays)
True
"""
arrays = [as_c_contiguous(np.asarray(i)) for i in vectors]
arrays = []
seisman marked this conversation as resolved.
Show resolved Hide resolved
for vector in vectors:
vec_dtype = str(getattr(vector, "dtype", ""))
if "[pyarrow]" in vec_dtype: # handle pyarrow date32/date64 dtypes
array = vector.to_numpy(

Check warning on line 195 in pygmt/clib/conversion.py

View check run for this annotation

Codecov / codecov/patch

pygmt/clib/conversion.py#L195

Added line #L195 was not covered by tests
dtype=np.datetime64 if vec_dtype.startswith("date") else None
)
else:
array = np.asarray(vector)
arrays.append(as_c_contiguous(array))

return arrays


Expand Down
14 changes: 11 additions & 3 deletions pygmt/tests/test_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,22 @@ def test_info_numpy_array_time_column():
assert output == expected_output


def test_info_pandas_dataframe_time_column():
@pytest.mark.parametrize(
"dtype",
[
"datetime64[ns]",
pytest.param("date32[day][pyarrow]", marks=skip_if_no(package="pyarrow")),
pytest.param("date64[ms][pyarrow]", marks=skip_if_no(package="pyarrow")),
],
)
def test_info_pandas_dataframe_date_column(dtype):
"""
Make sure info works on pandas.DataFrame inputs with a time column.
Make sure info works on pandas.DataFrame inputs with a date column.
"""
table = pd.DataFrame(
data={
"z": [10, 13, 12, 15, 14],
"time": pd.date_range(start="2020-01-01", periods=5),
"date": pd.date_range(start="2020-01-01", periods=5).astype(dtype=dtype),
}
)
output = info(data=table)
Expand Down