Add conversion for pandas categorical arrays

Fixes #87
theislab · Apr 6, 2023 · 928f952 · 928f952
1 parent 079ee1e
commit 928f952
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 0 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 S3method(py_to_r,numpy.ndarray)
+S3method(py_to_r,pandas.core.arrays.categorical.Categorical)
 S3method(py_to_r,pandas.core.arrays.masked.BaseMaskedArray)
 export(.AnnDataVersions)
 export(AnnData2SCE)

diff --git a/R/reticulate.R b/R/reticulate.R
@@ -12,6 +12,8 @@
 #' - `pandas.core.arrays.masked.BaseMaskedArray` - Handle conversion of
 #'   **pandas** arrays (used when by `AnnData` objects when there are missing
 #'   values)
+#' - `pandas.core.arrays.categorical.Categorical` - Handle conversion of
+#'   **pandas** categorical arrays
 #'
 #' @author Luke Zappia
 #'
@@ -84,3 +86,29 @@ py_to_r.pandas.core.arrays.masked.BaseMaskedArray <- function(x) {
 
     return(x)
 }
+
+#' @export
+py_to_r.pandas.core.arrays.categorical.Categorical <- function(x) {
+    disable_conversion_scope(x)
+
+    # Get the category levels
+    cats <- reticulate::py_to_r(x$categories$to_list())
+
+    # Record which values should be NA
+    is_na <- reticulate::py_to_r(x$isna())
+
+    # Fill NA values with a dummy
+    x <- x$fillna(value = cats[1])
+
+    # Convert to list and then to R using default conversion
+    x <- x$tolist()
+    x <- reticulate::py_to_r(x)
+
+    # Restore the NA values
+    x[is_na] <- NA
+
+    # Convert to factor
+    x <- factor(x, levels = cats)
+
+    return(x)
+}
diff --git a/man/r-py-conversion.Rd b/man/r-py-conversion.Rd