From d2c8a86dc445c04bcdaf4901bab2cc0875cb03b5 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Wed, 25 Oct 2023 17:19:18 -0400 Subject: [PATCH] [python] Allow `X_layer_name=None` for outgest of X-free experiments (#1830) --- apis/python/src/tiledbsoma/io/ingest.py | 36 ++++++++++++---------- apis/python/tests/test_basic_anndata_io.py | 6 ++++ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index 34a8c4482c..513fe4b3bd 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -2641,7 +2641,7 @@ def to_h5ad( h5ad_path: Path, measurement_name: str, *, - X_layer_name: str = "data", + X_layer_name: Optional[str] = "data", obs_id_name: str = "obs_id", var_id_name: str = "var_id", obsm_varm_width_hints: Optional[Dict[str, Dict[str, int]]] = None, @@ -2683,7 +2683,7 @@ def to_anndata( experiment: Experiment, measurement_name: str, *, - X_layer_name: str = "data", + X_layer_name: Optional[str] = "data", obs_id_name: str = "obs_id", var_id_name: str = "var_id", obsm_varm_width_hints: Optional[Dict[str, Dict[str, int]]] = None, @@ -2737,22 +2737,26 @@ def to_anndata( nobs = len(obs_df.index) nvar = len(var_df.index) - if X_layer_name not in measurement.X: - raise ValueError( - f"X_layer_name {X_layer_name} not found in data: {measurement.X.keys()}" - ) - X_data = measurement.X[X_layer_name] X_csr = None + X_ndarray = None X_dtype = None # some datasets have no X - if isinstance(X_data, DenseNDArray): - X_ndarray = X_data.read((slice(None), slice(None))).to_numpy() - X_dtype = X_ndarray.dtype - elif isinstance(X_data, SparseNDArray): - X_mat = X_data.read().tables().concat().to_pandas() # TODO: CSR/CSC options ... - X_csr = conversions.csr_from_tiledb_df(X_mat, nobs, nvar) - X_dtype = X_csr.dtype - else: - raise TypeError(f"Unexpected NDArray type {type(X_data)}") + if X_layer_name is not None: + if X_layer_name not in measurement.X: + raise ValueError( + f"X_layer_name {X_layer_name} not found in data: {measurement.X.keys()}" + ) + X_data = measurement.X[X_layer_name] + if isinstance(X_data, DenseNDArray): + X_ndarray = X_data.read((slice(None), slice(None))).to_numpy() + X_dtype = X_ndarray.dtype + elif isinstance(X_data, SparseNDArray): + X_mat = ( + X_data.read().tables().concat().to_pandas() + ) # TODO: CSR/CSC options ... + X_csr = conversions.csr_from_tiledb_df(X_mat, nobs, nvar) + X_dtype = X_csr.dtype + else: + raise TypeError(f"Unexpected NDArray type {type(X_data)}") if obsm_varm_width_hints is None: obsm_varm_width_hints = {} diff --git a/apis/python/tests/test_basic_anndata_io.py b/apis/python/tests/test_basic_anndata_io.py index 713b11431d..db77ce65b7 100644 --- a/apis/python/tests/test_basic_anndata_io.py +++ b/apis/python/tests/test_basic_anndata_io.py @@ -742,6 +742,9 @@ def test_X_empty(h5ad_file_X_empty): assert "data" in exp.ms["RNA"].X assert exp.ms["RNA"].X["data"].nnz == 0 + tiledbsoma.io.to_anndata(exp, measurement_name="RNA") + # TODO: more + def test_X_none(h5ad_file_X_none): tempdir = tempfile.TemporaryDirectory() @@ -755,6 +758,9 @@ def test_X_none(h5ad_file_X_none): assert exp.ms["RNA"].var.count == 1838 assert list(exp.ms["RNA"].X.keys()) == [] + tiledbsoma.io.to_anndata(exp, measurement_name="RNA", X_layer_name=None) + # TODO: more + # There exist in the wild AnnData files with categorical-int columns where the "not in the category" # is indicated by the presence of floating-point math.NaN in cells. Here we test that we can ingest