From f636d093b42cd49a0a891005cfa22d617f501fe0 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 22 Sep 2023 16:58:18 -0400 Subject: [PATCH] [python] Support enumerations in `update_obs`/`update_var` [WIP] --- apis/python/src/tiledbsoma/io/ingest.py | 10 ++++++++++ apis/python/tests/test_update_dataframes.py | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 540c7d2fef..e6b85a9cf3 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -1510,12 +1510,22 @@ def _update_dataframe( # schema-creation logic. atype = arrow_schema.field(add_key).type dtype = tiledb_type_from_arrow_type(atype) + + enum_label = None + if pa.types.is_dictionary(arrow_table.schema.field(add_key).type): + enum_label = add_key + ordered = atype.ordered + dt = cast(pd.CategoricalDtype, new_data[add_key].dtype) + values = dt.categories + se.add_enumeration(tiledb.Enumeration(add_key, ordered, list(values))) + filters = tiledb_create_options.attr_filters_tiledb(add_key, ["ZstdFilter"]) se.add_attribute( tiledb.Attr( name=add_key, dtype=dtype, filters=filters, + enum_label=enum_label, ) ) diff --git a/apis/python/tests/test_update_dataframes.py b/apis/python/tests/test_update_dataframes.py index 6e0ac7c28b..861633b229 100644 --- a/apis/python/tests/test_update_dataframes.py +++ b/apis/python/tests/test_update_dataframes.py @@ -58,8 +58,14 @@ def test_add(adata): new_obs = adata.obs new_var = adata.var + # boolean new_obs["is_g1"] = new_obs["groups"] == "g1" + # int new_obs["seq"] = np.arange(new_obs.shape[0], dtype=np.int32) + # categorical of string + new_obs["parity"] = pd.Categorical( + np.asarray([["even", "odd"][e % 2] for e in range(len(new_obs))]) + ) new_var["vst.mean.sq"] = new_var["vst.mean"] ** 2