From 928f952626df47bc866a51d1f095045debcadc76 Mon Sep 17 00:00:00 2001 From: Luke Zappia Date: Thu, 6 Apr 2023 14:02:30 +0200 Subject: [PATCH] Add conversion for pandas categorical arrays Fixes #87 --- NAMESPACE | 1 + R/reticulate.R | 28 ++++++++++++++++++++++++++++ man/r-py-conversion.Rd | 2 ++ 3 files changed, 31 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 62096a7..c96626e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand S3method(py_to_r,numpy.ndarray) +S3method(py_to_r,pandas.core.arrays.categorical.Categorical) S3method(py_to_r,pandas.core.arrays.masked.BaseMaskedArray) export(.AnnDataVersions) export(AnnData2SCE) diff --git a/R/reticulate.R b/R/reticulate.R index c6986e2..737e328 100644 --- a/R/reticulate.R +++ b/R/reticulate.R @@ -12,6 +12,8 @@ #' - `pandas.core.arrays.masked.BaseMaskedArray` - Handle conversion of #' **pandas** arrays (used when by `AnnData` objects when there are missing #' values) +#' - `pandas.core.arrays.categorical.Categorical` - Handle conversion of +#' **pandas** categorical arrays #' #' @author Luke Zappia #' @@ -84,3 +86,29 @@ py_to_r.pandas.core.arrays.masked.BaseMaskedArray <- function(x) { return(x) } + +#' @export +py_to_r.pandas.core.arrays.categorical.Categorical <- function(x) { + disable_conversion_scope(x) + + # Get the category levels + cats <- reticulate::py_to_r(x$categories$to_list()) + + # Record which values should be NA + is_na <- reticulate::py_to_r(x$isna()) + + # Fill NA values with a dummy + x <- x$fillna(value = cats[1]) + + # Convert to list and then to R using default conversion + x <- x$tolist() + x <- reticulate::py_to_r(x) + + # Restore the NA values + x[is_na] <- NA + + # Convert to factor + x <- factor(x, levels = cats) + + return(x) +} diff --git a/man/r-py-conversion.Rd b/man/r-py-conversion.Rd index 25f8b6a..24dfa59 100644 --- a/man/r-py-conversion.Rd +++ b/man/r-py-conversion.Rd @@ -24,6 +24,8 @@ These functions are extensions of the default conversion functions in the \item \code{pandas.core.arrays.masked.BaseMaskedArray} - Handle conversion of \strong{pandas} arrays (used when by \code{AnnData} objects when there are missing values) +\item \code{pandas.core.arrays.categorical.Categorical} - Handle conversion of +\strong{pandas} categorical arrays } } \seealso{