Skip to content

Commit

Permalink
fix(pandas): support non-string categorical columns
Browse files Browse the repository at this point in the history
  • Loading branch information
jcrist authored and cpcloud committed Jan 25, 2024
1 parent 78434a0 commit 5de08c7
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 1 deletion.
4 changes: 3 additions & 1 deletion ibis/formats/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ def to_ibis(cls, typ, nullable=True):
elif pdt.is_datetime64_dtype(typ):
return dt.Timestamp(nullable=nullable)
elif isinstance(typ, pdt.CategoricalDtype):
return dt.String(nullable=nullable)
if typ.categories is None or pdt.is_string_dtype(typ.categories):
return dt.String(nullable=nullable)
return cls.to_ibis(typ.categories.dtype, nullable=nullable)
elif pdt.is_extension_array_dtype(typ):
if _has_arrow_dtype and isinstance(typ, pd.ArrowDtype):
return PyArrowType.to_ibis(typ.pyarrow_dtype, nullable=nullable)
Expand Down
2 changes: 2 additions & 0 deletions ibis/formats/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ def test_dtype_from_pandas_arrow_list_dtype():
dt.Timestamp("US/Eastern"),
),
(pd.CategoricalDtype(), dt.String()),
(pd.CategoricalDtype(["a", "b", "c"]), dt.String()),
(pd.CategoricalDtype(np.array([1, 2, 3], dtype="int64")), dt.int64),
(pd.Series([], dtype="string").dtype, dt.String()),
],
ids=str,
Expand Down

0 comments on commit 5de08c7

Please sign in to comment.