Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport release-1.5] [python] Allow X_layer_name=None for outgest of X-free experiments #1832

Merged
merged 1 commit into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 20 additions & 16 deletions apis/python/src/tiledbsoma/io/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2641,7 +2641,7 @@ def to_h5ad(
h5ad_path: Path,
measurement_name: str,
*,
X_layer_name: str = "data",
X_layer_name: Optional[str] = "data",
obs_id_name: str = "obs_id",
var_id_name: str = "var_id",
obsm_varm_width_hints: Optional[Dict[str, Dict[str, int]]] = None,
Expand Down Expand Up @@ -2683,7 +2683,7 @@ def to_anndata(
experiment: Experiment,
measurement_name: str,
*,
X_layer_name: str = "data",
X_layer_name: Optional[str] = "data",
obs_id_name: str = "obs_id",
var_id_name: str = "var_id",
obsm_varm_width_hints: Optional[Dict[str, Dict[str, int]]] = None,
Expand Down Expand Up @@ -2737,22 +2737,26 @@ def to_anndata(
nobs = len(obs_df.index)
nvar = len(var_df.index)

if X_layer_name not in measurement.X:
raise ValueError(
f"X_layer_name {X_layer_name} not found in data: {measurement.X.keys()}"
)
X_data = measurement.X[X_layer_name]
X_csr = None
X_ndarray = None
X_dtype = None # some datasets have no X
if isinstance(X_data, DenseNDArray):
X_ndarray = X_data.read((slice(None), slice(None))).to_numpy()
X_dtype = X_ndarray.dtype
elif isinstance(X_data, SparseNDArray):
X_mat = X_data.read().tables().concat().to_pandas() # TODO: CSR/CSC options ...
X_csr = conversions.csr_from_tiledb_df(X_mat, nobs, nvar)
X_dtype = X_csr.dtype
else:
raise TypeError(f"Unexpected NDArray type {type(X_data)}")
if X_layer_name is not None:
if X_layer_name not in measurement.X:
raise ValueError(
f"X_layer_name {X_layer_name} not found in data: {measurement.X.keys()}"
)
X_data = measurement.X[X_layer_name]
if isinstance(X_data, DenseNDArray):
X_ndarray = X_data.read((slice(None), slice(None))).to_numpy()
X_dtype = X_ndarray.dtype
elif isinstance(X_data, SparseNDArray):
X_mat = (
X_data.read().tables().concat().to_pandas()
) # TODO: CSR/CSC options ...
X_csr = conversions.csr_from_tiledb_df(X_mat, nobs, nvar)
X_dtype = X_csr.dtype
else:
raise TypeError(f"Unexpected NDArray type {type(X_data)}")

if obsm_varm_width_hints is None:
obsm_varm_width_hints = {}
Expand Down
6 changes: 6 additions & 0 deletions apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,9 @@ def test_X_empty(h5ad_file_X_empty):
assert "data" in exp.ms["RNA"].X
assert exp.ms["RNA"].X["data"].nnz == 0

tiledbsoma.io.to_anndata(exp, measurement_name="RNA")
# TODO: more


def test_X_none(h5ad_file_X_none):
tempdir = tempfile.TemporaryDirectory()
Expand All @@ -755,6 +758,9 @@ def test_X_none(h5ad_file_X_none):
assert exp.ms["RNA"].var.count == 1838
assert list(exp.ms["RNA"].X.keys()) == []

tiledbsoma.io.to_anndata(exp, measurement_name="RNA", X_layer_name=None)
# TODO: more


# There exist in the wild AnnData files with categorical-int columns where the "not in the category"
# is indicated by the presence of floating-point math.NaN in cells. Here we test that we can ingest
Expand Down
Loading