From 4c7f57d76b6e35d7150ec10cc5d41f5902de9f05 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 16 Mar 2023 09:54:07 +0100 Subject: [PATCH] (feat): add cache workaround --- .../experimental/read_remote/read_remote.py | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/anndata/experimental/read_remote/read_remote.py b/anndata/experimental/read_remote/read_remote.py index 046ec579e..631a4d44c 100644 --- a/anndata/experimental/read_remote/read_remote.py +++ b/anndata/experimental/read_remote/read_remote.py @@ -24,7 +24,7 @@ class LazyCategoricalArray(ExplicitlyIndexedNDArrayMixin): - __slots__ = ("codes", "categories", "attrs") + __slots__ = ("codes", "attrs", "_categories", "_categories_cache") def __init__(self, group, *args, **kwargs): """Class for lazily reading categorical data from formatted zarr group @@ -33,11 +33,16 @@ def __init__(self, group, *args, **kwargs): group (zarr.Group): group containing "codes" and "categories" key as well as "ordered" attr """ self.codes = group["codes"] - self.categories = group["categories"][ - ... - ] # slots don't mix with cached_property, ExpicitlyIndexedArray uses slots + self._categories = group["categories"] + self._categories_cache = None self.attrs = dict(group.attrs) + @property + def categories(self): # __slots__ and cached_property are incompatible + if self._categories_cache is None: + self._categories_cache = self._categories[...] + return self._categories_cache + @property def dtype(self) -> pd.CategoricalDtype: return pd.CategoricalDtype(self.categories, self.ordered) @@ -50,14 +55,6 @@ def shape(self) -> Tuple[int, ...]: def ordered(self): return bool(self.attrs["ordered"]) - def __array__( - self, *args - ) -> np.ndarray: # may need to override this, copied for now - a = self[...] - if args: - a = a.astype(args[0]) - return a - def __getitem__(self, selection) -> pd.Categorical: codes = self.codes.oindex[selection] if codes.shape == (): # handle 0d case