Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clib.conversion._to_numpy: Add tests for pyarrow.array with pyarrow string types #3608

Merged
merged 7 commits into from
Nov 15, 2024
9 changes: 7 additions & 2 deletions pygmt/clib/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,13 @@ def _to_numpy(data: Any) -> np.ndarray:
array
The C contiguous NumPy array.
"""
# Mapping of unsupported dtypes to the expected NumPy dtype.
# Mapping of unsupported dtypes to expected NumPy dtypes.
dtypes: dict[str, type] = {
# For string dtypes.
"large_string": np.str_, # pa.large_string and pa.large_utf8
"string": np.str_, # pa.string and pa.utf8
"string_view": np.str_, # pa.string_view
# For datetime dtypes.
"date32[day][pyarrow]": np.datetime64,
"date64[ms][pyarrow]": np.datetime64,
}
Expand All @@ -173,7 +178,7 @@ def _to_numpy(data: Any) -> np.ndarray:
# we can remove the workaround in PyGMT v0.17.0.
array = np.ascontiguousarray(data.astype(float))
else:
vec_dtype = str(getattr(data, "dtype", ""))
vec_dtype = str(getattr(data, "dtype", getattr(data, "type", "")))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pyarrow array only has .type, not .dtype.

array = np.ascontiguousarray(data, dtype=dtypes.get(vec_dtype))
return array

Expand Down
23 changes: 23 additions & 0 deletions pygmt/tests/test_clib_to_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def test_to_numpy_pandas_series_numpy_dtypes_numeric(dtype, expected_dtype):
# - int8, int16, int32, int64
# - uint8, uint16, uint32, uint64
# - float16, float32, float64
# - String dtypes: string/utf8, large_string/large_utf8, string_view
seisman marked this conversation as resolved.
Show resolved Hide resolved
#
# In PyArrow, array types can be specified in two ways:
#
Expand Down Expand Up @@ -250,3 +251,25 @@ def test_to_numpy_pyarrow_array_pyarrow_dtypes_numeric_with_na(dtype, expected_d
result = _to_numpy(array)
_check_result(result, expected_dtype)
npt.assert_array_equal(result, array)


@pytest.mark.skipif(not _HAS_PYARROW, reason="pyarrow is not installed")
@pytest.mark.parametrize(
"dtype",
[
None,
"string",
"utf8", # alias for string
"large_string",
"large_utf8", # alias for large_string
"string_view",
],
)
def test_to_numpy_pyarrow_array_pyarrow_dtypes_string(dtype):
"""
Test the _to_numpy function with PyArrow arrays of PyArrow string types.
"""
array = pa.array(["abc", "defg", "12345"], type=dtype)
result = _to_numpy(array)
_check_result(result, np.str_)
npt.assert_array_equal(result, array)
Loading