single-cell-data · johnkerl · Sep 28, 2023 · Sep 22, 2023 · Sep 25, 2023 · Sep 27, 2023
diff --git a/apis/python/src/tiledbsoma/io/_registration/signatures.py b/apis/python/src/tiledbsoma/io/_registration/signatures.py
@@ -72,7 +72,10 @@ def _string_dict_from_pandas_dataframe(
         df.reset_index(inplace=True)
         if default_index_name in df:
             if "index" in df:
-                df.drop(columns=["index"], inplace=True)
+                # Avoid warning:
+                # "A value is trying to be set on a copy of a slice from a DataFrame"
+                # df.drop(columns=["index"], inplace=True)
+                df = df.drop(columns=["index"])
         else:
             df.rename(columns={"index": default_index_name}, inplace=True)
     else:

diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py
@@ -1513,12 +1513,22 @@ def _update_dataframe(
         # schema-creation logic.
         atype = arrow_schema.field(add_key).type
         dtype = tiledb_type_from_arrow_type(atype)
+
+        enum_label = None
+        if pa.types.is_dictionary(arrow_table.schema.field(add_key).type):
+            enum_label = add_key
+            ordered = atype.ordered
+            dt = cast(pd.CategoricalDtype, new_data[add_key].dtype)
+            values = dt.categories
+            se.add_enumeration(tiledb.Enumeration(add_key, ordered, list(values)))
+
         filters = tiledb_create_options.attr_filters_tiledb(add_key, ["ZstdFilter"])
         se.add_attribute(
             tiledb.Attr(
                 name=add_key,
                 dtype=dtype,
                 filters=filters,
+                enum_label=enum_label,
             )
         )
 

diff --git a/apis/python/tests/test_update_dataframes.py b/apis/python/tests/test_update_dataframes.py
@@ -71,8 +71,14 @@ def test_add(adata, readback):
             new_obs = adata.obs
             new_var = adata.var
 
+    # boolean
     new_obs["is_g1"] = new_obs["groups"] == "g1"
+    # int
     new_obs["seq"] = np.arange(new_obs.shape[0], dtype=np.int32)
+    # categorical of string
+    new_obs["parity"] = pd.Categorical(
+        np.asarray([["even", "odd"][e % 2] for e in range(len(new_obs))])
+    )
 
     new_var["vst.mean.sq"] = new_var["vst.mean"] ** 2
 
@@ -83,9 +89,15 @@ def test_add(adata, readback):
     with tiledbsoma.Experiment.open(output_path) as exp:
         o2 = exp.obs.schema
         v2 = exp.ms["RNA"].var.schema
+        obs = exp.obs.read().concat().to_pandas()
 
     assert o2.field("is_g1").type == pa.bool_()
     assert o2.field("seq").type == pa.int32()
+    assert o2.field("parity").type == pa.dictionary(
+        index_type=pa.int8(), value_type=pa.string(), ordered=False
+    )
+    assert obs["parity"][0] == "even"
+    assert obs["parity"][1] == "odd"
     assert v2.field("vst.mean.sq").type == pa.float64()