diff --git a/.github/workflows/python-ci-single.yml b/.github/workflows/python-ci-single.yml index b0dfffc9cc..5cd9e84ed5 100644 --- a/.github/workflows/python-ci-single.yml +++ b/.github/workflows/python-ci-single.yml @@ -80,13 +80,14 @@ jobs: cache: pip cache-dependency-path: ./apis/python/setup.py - - name: Cache native libraries - uses: actions/cache@v3 - with: - path: | - build - dist - key: libtiledbsoma-build-dist-${{ inputs.os }}-${{ inputs.python_version }}-${{ hashFiles('libtiledbsoma', 'scripts/bld') }} +# Experiment for MacOS CI false negatives ... +# - name: Cache native libraries +# uses: actions/cache@v3 +# with: +# path: | +# build +# dist +# key: libtiledbsoma-build-dist-${{ inputs.os }}-${{ inputs.python_version }}-${{ hashFiles('libtiledbsoma', 'scripts/bld') }} - name: Install testing prereqs run: python -m pip -v install -U pip pytest-cov 'typeguard<3.0' types-setuptools diff --git a/apis/python/src/tiledbsoma/io/_registration/signatures.py b/apis/python/src/tiledbsoma/io/_registration/signatures.py index 7e5ed4d83f..5a4903ff74 100644 --- a/apis/python/src/tiledbsoma/io/_registration/signatures.py +++ b/apis/python/src/tiledbsoma/io/_registration/signatures.py @@ -72,7 +72,11 @@ def _string_dict_from_pandas_dataframe( df.reset_index(inplace=True) if default_index_name in df: if "index" in df: - df.drop(columns=["index"], inplace=True) + # Avoid the warning: + # "A value is trying to be set on a copy of a slice from a DataFrame" + # which would occur if we did: + # df.drop(columns=["index"], inplace=True) + df = df.drop(columns=["index"]) else: df.rename(columns={"index": default_index_name}, inplace=True) else: diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 37c35b9139..d601b84be7 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -1513,12 +1513,24 @@ def _update_dataframe( # schema-creation logic. atype = arrow_schema.field(add_key).type dtype = tiledb_type_from_arrow_type(atype) + + enum_label: Optional[str] = None + if pa.types.is_dictionary(arrow_table.schema.field(add_key).type): + enum_label = add_key + dt = cast(pd.CategoricalDtype, new_data[add_key].dtype) + se.add_enumeration( + tiledb.Enumeration( + name=add_key, ordered=atype.ordered, values=list(dt.categories) + ) + ) + filters = tiledb_create_options.attr_filters_tiledb(add_key, ["ZstdFilter"]) se.add_attribute( tiledb.Attr( name=add_key, dtype=dtype, filters=filters, + enum_label=enum_label, ) ) diff --git a/apis/python/tests/test_update_dataframes.py b/apis/python/tests/test_update_dataframes.py index e72c574a6a..484f19ab31 100644 --- a/apis/python/tests/test_update_dataframes.py +++ b/apis/python/tests/test_update_dataframes.py @@ -71,8 +71,14 @@ def test_add(adata, readback): new_obs = adata.obs new_var = adata.var + # boolean new_obs["is_g1"] = new_obs["groups"] == "g1" + # int new_obs["seq"] = np.arange(new_obs.shape[0], dtype=np.int32) + # categorical of string + new_obs["parity"] = pd.Categorical( + np.asarray([["even", "odd"][e % 2] for e in range(len(new_obs))]) + ) new_var["vst.mean.sq"] = new_var["vst.mean"] ** 2 @@ -83,9 +89,15 @@ def test_add(adata, readback): with tiledbsoma.Experiment.open(output_path) as exp: o2 = exp.obs.schema v2 = exp.ms["RNA"].var.schema + obs = exp.obs.read().concat().to_pandas() assert o2.field("is_g1").type == pa.bool_() assert o2.field("seq").type == pa.int32() + assert o2.field("parity").type == pa.dictionary( + index_type=pa.int8(), value_type=pa.string(), ordered=False + ) + assert obs["parity"][0] == "even" + assert obs["parity"][1] == "odd" assert v2.field("vst.mean.sq").type == pa.float64()