From adcb30fbb1a7f16724afc332dfa673f251a5a5d7 Mon Sep 17 00:00:00 2001 From: Jean-Francois Zinque Date: Mon, 27 Sep 2021 23:28:59 +0200 Subject: [PATCH 1/4] add test for all pandas-compatible numpy dtypes --- tests/core/test_dtypes.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/core/test_dtypes.py b/tests/core/test_dtypes.py index dcb013b6b..32c1b2bc5 100644 --- a/tests/core/test_dtypes.py +++ b/tests/core/test_dtypes.py @@ -397,6 +397,28 @@ def test_default_numeric_dtypes(): ) +@pytest.mark.parametrize( + "alias, np_dtype", + [ + (alias, np_dtype) + for alias, np_dtype in np.sctypeDict.items() + # int, uint have different bitwidth under pandas and numpy. + if np_dtype != np.void and alias not in ("int", "uint") + ], +) +def test_numpy_dtypes(alias, np_dtype): + """Test that all pandas-compatible numpy dtypes are understood.""" + try: + np.dtype(alias) + except TypeError: + # not a valid alias + assert pandas_engine.Engine.dtype(np_dtype) + else: + assert pandas_engine.Engine.dtype(alias) == pandas_engine.Engine.dtype( + np_dtype + ) + + @pytest.mark.parametrize( "examples", [ From ca7375f4669650d0e309a97c3358b46fac2491bb Mon Sep 17 00:00:00 2001 From: Jean-Francois Zinque Date: Mon, 27 Sep 2021 23:30:33 +0200 Subject: [PATCH 2/4] add support for np.bytes_ --- pandera/engines/numpy_engine.py | 11 +++++++++++ pandera/engines/pandas_engine.py | 3 +++ 2 files changed, 14 insertions(+) diff --git a/pandera/engines/numpy_engine.py b/pandera/engines/numpy_engine.py index c66bb5fc4..fa8c0b232 100644 --- a/pandera/engines/numpy_engine.py +++ b/pandera/engines/numpy_engine.py @@ -313,6 +313,17 @@ class Complex64(Complex128): bit_width: int = 64 +############################################################################### +# bytes +############################################################################### + + +@Engine.register_dtype(equivalents=["bytes", bytes, np.bytes_]) +@immutable +class Bytes(DataType): + type = np.dtype("bytes") + + ############################################################################### # string ############################################################################### diff --git a/pandera/engines/pandas_engine.py b/pandera/engines/pandas_engine.py index 9b0e9ca15..b3753e5dc 100644 --- a/pandera/engines/pandas_engine.py +++ b/pandera/engines/pandas_engine.py @@ -459,8 +459,11 @@ def check(self, pandera_dtype: dtypes.DataType) -> bool: "decimal", "mixed-integer", "mixed", + "bytes", + bytes, object, np.object_, + np.bytes_, ], ) From 59c0f8c5c85835888c1cab99f9be51ed720f6286 Mon Sep 17 00:00:00 2001 From: Jean-Francois Zinque Date: Mon, 27 Sep 2021 23:31:15 +0200 Subject: [PATCH 3/4] add support for rare object aliases --- pandera/engines/pandas_engine.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandera/engines/pandas_engine.py b/pandera/engines/pandas_engine.py index b3753e5dc..febf77560 100644 --- a/pandera/engines/pandas_engine.py +++ b/pandera/engines/pandas_engine.py @@ -454,6 +454,8 @@ def check(self, pandera_dtype: dtypes.DataType) -> bool: numpy_engine.Object, equivalents=[ "object", + "object_", + "object0", "O", "bytes", "decimal", From cc4f82c0171fe97dd9039bef470bfd84ae51832f Mon Sep 17 00:00:00 2001 From: Jean-Francois Zinque Date: Mon, 27 Sep 2021 23:31:52 +0200 Subject: [PATCH 4/4] add support for platform-specific numpy dtypes --- pandera/engines/pandas_engine.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandera/engines/pandas_engine.py b/pandera/engines/pandas_engine.py index febf77560..44b11dcc4 100644 --- a/pandera/engines/pandas_engine.py +++ b/pandera/engines/pandas_engine.py @@ -137,7 +137,10 @@ def dtype(cls, data_type: Any) -> "DataType": # into a numpy or pandas dtype. np_or_pd_dtype = pd.api.types.pandas_dtype(data_type) if isinstance(np_or_pd_dtype, np.dtype): - np_or_pd_dtype = np_or_pd_dtype.type + # cast alias to platform-agnostic dtype + # e.g.: np.intc -> np.int32 + common_np_dtype = np.dtype(np_or_pd_dtype.name) + np_or_pd_dtype = common_np_dtype.type return engine.Engine.dtype(cls, np_or_pd_dtype)