Skip to content

Commit

Permalink
[python] Allow X_layer_name=None for outgest of X-free experiments (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl authored Oct 25, 2023
1 parent 19dd9cc commit 09f9daf
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 16 deletions.
36 changes: 20 additions & 16 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2641,7 +2641,7 @@ def to_h5ad(
h5ad_path: Path,
measurement_name: str,
*,
X_layer_name: str = "data",
X_layer_name: Optional[str] = "data",
obs_id_name: str = "obs_id",
var_id_name: str = "var_id",
obsm_varm_width_hints: Optional[Dict[str, Dict[str, int]]] = None,
Expand Down Expand Up @@ -2683,7 +2683,7 @@ def to_anndata(
experiment: Experiment,
measurement_name: str,
*,
X_layer_name: str = "data",
X_layer_name: Optional[str] = "data",
obs_id_name: str = "obs_id",
var_id_name: str = "var_id",
obsm_varm_width_hints: Optional[Dict[str, Dict[str, int]]] = None,
Expand Down Expand Up @@ -2737,22 +2737,26 @@ def to_anndata(
nobs = len(obs_df.index)
nvar = len(var_df.index)

if X_layer_name not in measurement.X:
raise ValueError(
f"X_layer_name {X_layer_name} not found in data: {measurement.X.keys()}"
)
X_data = measurement.X[X_layer_name]
X_csr = None
X_ndarray = None
X_dtype = None # some datasets have no X
if isinstance(X_data, DenseNDArray):
X_ndarray = X_data.read((slice(None), slice(None))).to_numpy()
X_dtype = X_ndarray.dtype
elif isinstance(X_data, SparseNDArray):
X_mat = X_data.read().tables().concat().to_pandas() # TODO: CSR/CSC options ...
X_csr = conversions.csr_from_tiledb_df(X_mat, nobs, nvar)
X_dtype = X_csr.dtype
else:
raise TypeError(f"Unexpected NDArray type {type(X_data)}")
if X_layer_name is not None:
if X_layer_name not in measurement.X:
raise ValueError(
f"X_layer_name {X_layer_name} not found in data: {measurement.X.keys()}"
)
X_data = measurement.X[X_layer_name]
if isinstance(X_data, DenseNDArray):
X_ndarray = X_data.read((slice(None), slice(None))).to_numpy()
X_dtype = X_ndarray.dtype
elif isinstance(X_data, SparseNDArray):
X_mat = (
X_data.read().tables().concat().to_pandas()
) # TODO: CSR/CSC options ...
X_csr = conversions.csr_from_tiledb_df(X_mat, nobs, nvar)
X_dtype = X_csr.dtype
else:
raise TypeError(f"Unexpected NDArray type {type(X_data)}")

if obsm_varm_width_hints is None:
obsm_varm_width_hints = {}
Expand Down
6 changes: 6 additions & 0 deletions apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,9 @@ def test_X_empty(h5ad_file_X_empty):
assert "data" in exp.ms["RNA"].X
assert exp.ms["RNA"].X["data"].nnz == 0

tiledbsoma.io.to_anndata(exp, measurement_name="RNA")
# TODO: more


def test_X_none(h5ad_file_X_none):
tempdir = tempfile.TemporaryDirectory()
Expand All @@ -755,6 +758,9 @@ def test_X_none(h5ad_file_X_none):
assert exp.ms["RNA"].var.count == 1838
assert list(exp.ms["RNA"].X.keys()) == []

tiledbsoma.io.to_anndata(exp, measurement_name="RNA", X_layer_name=None)
# TODO: more


# There exist in the wild AnnData files with categorical-int columns where the "not in the category"
# is indicated by the presence of floating-point math.NaN in cells. Here we test that we can ingest
Expand Down

0 comments on commit 09f9daf

Please sign in to comment.