Skip to content

Commit

Permalink
[python] Support enumerations in update_obs/update_var [WIP]
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Sep 22, 2023
1 parent 503ddd1 commit f636d09
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 0 deletions.
10 changes: 10 additions & 0 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1510,12 +1510,22 @@ def _update_dataframe(
# schema-creation logic.
atype = arrow_schema.field(add_key).type
dtype = tiledb_type_from_arrow_type(atype)

enum_label = None
if pa.types.is_dictionary(arrow_table.schema.field(add_key).type):
enum_label = add_key
ordered = atype.ordered
dt = cast(pd.CategoricalDtype, new_data[add_key].dtype)
values = dt.categories
se.add_enumeration(tiledb.Enumeration(add_key, ordered, list(values)))

filters = tiledb_create_options.attr_filters_tiledb(add_key, ["ZstdFilter"])
se.add_attribute(
tiledb.Attr(
name=add_key,
dtype=dtype,
filters=filters,
enum_label=enum_label,
)
)

Expand Down
6 changes: 6 additions & 0 deletions apis/python/tests/test_update_dataframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,14 @@ def test_add(adata):
new_obs = adata.obs
new_var = adata.var

# boolean
new_obs["is_g1"] = new_obs["groups"] == "g1"
# int
new_obs["seq"] = np.arange(new_obs.shape[0], dtype=np.int32)
# categorical of string
new_obs["parity"] = pd.Categorical(
np.asarray([["even", "odd"][e % 2] for e in range(len(new_obs))])
)

new_var["vst.mean.sq"] = new_var["vst.mean"] ** 2

Expand Down

0 comments on commit f636d09

Please sign in to comment.