Skip to content

Commit

Permalink
[python] Support enumerations in update_obs/update_var (#1707) (#…
Browse files Browse the repository at this point in the history
…1736)

* [python] Support enumerations in `update_obs`/`update_var` [WIP]

* expand unit tests

* silence a warning

* Avoid unnecessary MacOS 3.7 check

* experimenting

* code-review feedback

Co-authored-by: John Kerl <[email protected]>
  • Loading branch information
github-actions[bot] and johnkerl authored Sep 29, 2023
1 parent a8623c8 commit 68d2630
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 8 deletions.
15 changes: 8 additions & 7 deletions .github/workflows/python-ci-single.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,14 @@ jobs:
cache: pip
cache-dependency-path: ./apis/python/setup.py

- name: Cache native libraries
uses: actions/cache@v3
with:
path: |
build
dist
key: libtiledbsoma-build-dist-${{ inputs.os }}-${{ inputs.python_version }}-${{ hashFiles('libtiledbsoma', 'scripts/bld') }}
# Experiment for MacOS CI false negatives ...
# - name: Cache native libraries
# uses: actions/cache@v3
# with:
# path: |
# build
# dist
# key: libtiledbsoma-build-dist-${{ inputs.os }}-${{ inputs.python_version }}-${{ hashFiles('libtiledbsoma', 'scripts/bld') }}

- name: Install testing prereqs
run: python -m pip -v install -U pip pytest-cov 'typeguard<3.0' types-setuptools
Expand Down
6 changes: 5 additions & 1 deletion apis/python/src/tiledbsoma/io/_registration/signatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,11 @@ def _string_dict_from_pandas_dataframe(
df.reset_index(inplace=True)
if default_index_name in df:
if "index" in df:
df.drop(columns=["index"], inplace=True)
# Avoid the warning:
# "A value is trying to be set on a copy of a slice from a DataFrame"
# which would occur if we did:
# df.drop(columns=["index"], inplace=True)
df = df.drop(columns=["index"])
else:
df.rename(columns={"index": default_index_name}, inplace=True)
else:
Expand Down
12 changes: 12 additions & 0 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1513,12 +1513,24 @@ def _update_dataframe(
# schema-creation logic.
atype = arrow_schema.field(add_key).type
dtype = tiledb_type_from_arrow_type(atype)

enum_label: Optional[str] = None
if pa.types.is_dictionary(arrow_table.schema.field(add_key).type):
enum_label = add_key
dt = cast(pd.CategoricalDtype, new_data[add_key].dtype)
se.add_enumeration(
tiledb.Enumeration(
name=add_key, ordered=atype.ordered, values=list(dt.categories)
)
)

filters = tiledb_create_options.attr_filters_tiledb(add_key, ["ZstdFilter"])
se.add_attribute(
tiledb.Attr(
name=add_key,
dtype=dtype,
filters=filters,
enum_label=enum_label,
)
)

Expand Down
12 changes: 12 additions & 0 deletions apis/python/tests/test_update_dataframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,14 @@ def test_add(adata, readback):
new_obs = adata.obs
new_var = adata.var

# boolean
new_obs["is_g1"] = new_obs["groups"] == "g1"
# int
new_obs["seq"] = np.arange(new_obs.shape[0], dtype=np.int32)
# categorical of string
new_obs["parity"] = pd.Categorical(
np.asarray([["even", "odd"][e % 2] for e in range(len(new_obs))])
)

new_var["vst.mean.sq"] = new_var["vst.mean"] ** 2

Expand All @@ -83,9 +89,15 @@ def test_add(adata, readback):
with tiledbsoma.Experiment.open(output_path) as exp:
o2 = exp.obs.schema
v2 = exp.ms["RNA"].var.schema
obs = exp.obs.read().concat().to_pandas()

assert o2.field("is_g1").type == pa.bool_()
assert o2.field("seq").type == pa.int32()
assert o2.field("parity").type == pa.dictionary(
index_type=pa.int8(), value_type=pa.string(), ordered=False
)
assert obs["parity"][0] == "even"
assert obs["parity"][1] == "odd"
assert v2.field("vst.mean.sq").type == pa.float64()


Expand Down

0 comments on commit 68d2630

Please sign in to comment.