Skip to content

Commit

Permalink
Implement SOMA_level dimension-slicing
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed May 19, 2022
1 parent 1408ca2 commit 55409de
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 17 deletions.
14 changes: 13 additions & 1 deletion apis/python/src/tiledbsc/annotation_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,19 @@ def keys(self) -> List[str]:
return self.get_attr_names()

# ----------------------------------------------------------------
def from_dataframe(self, dataframe: pd.DataFrame, extent: int) -> None:
def dim_select(self, ids): # TODO: re None
"""
TODO
"""
if ids is None:
with tiledb.open(self.uri) as A: # TODO: with self.open
return A.df[:]
else:
with tiledb.open(self.uri) as A: # TODO: with self.open
return A.df[ids]

# ----------------------------------------------------------------
def from_dataframe(self, dataframe: pandas.DataFrame, extent: int) -> None:
"""
Populates the obs/ or var/ subgroup for a SOMA object.
Expand Down
44 changes: 28 additions & 16 deletions apis/python/src/tiledbsc/annotation_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,16 @@ def __init__(
self.dim_name = dim_name

# ----------------------------------------------------------------
def from_anndata(self, matrix, dim_values):
def dim_select(self, ids): # TODO: re None
"""
Populates an array in the obsm/ or varm/ subgroup for a SOMA object.
:param matrix: anndata.obsm['foo'], anndata.varm['foo'], or anndata.raw.varm['foo'].
:param dim_values: anndata.obs_names, anndata.var_names, or anndata.raw.var_names.
TODO
"""

if self.verbose:
s = util.get_start_stamp()
print(f"{self.indent}START WRITING {self.uri}")

if isinstance(matrix, pd.DataFrame):
self._from_pandas_dataframe(matrix, dim_values)
if ids is None:
with tiledb.open(self.uri) as A: # TODO: with self.open
return A.df[:]
else:
self._numpy_ndarray_or_scipy_sparse_csr_matrix(matrix, dim_values)

if self.verbose:
print(util.format_elapsed(s, f"{self.indent}FINISH WRITING {self.uri}"))
with tiledb.open(self.uri) as A: # TODO: with self.open
return A.df[ids]

# ----------------------------------------------------------------
def shape(self):
Expand All @@ -67,6 +58,27 @@ def shape(self):
num_cols = A.schema.nattr
return (num_rows, num_cols)

# ----------------------------------------------------------------
def from_anndata(self, matrix, dim_values):
"""
Populates an array in the obsm/ or varm/ subgroup for a SOMA object.
:param matrix: anndata.obsm['foo'], anndata.varm['foo'], or anndata.raw.varm['foo'].
:param dim_values: anndata.obs_names, anndata.var_names, or anndata.raw.var_names.
"""

if self.verbose:
s = util.get_start_stamp()
print(f"{self.indent}START WRITING {self.uri}")

if isinstance(matrix, pd.DataFrame):
self._from_pandas_dataframe(matrix, dim_values)
else:
self._numpy_ndarray_or_scipy_sparse_csr_matrix(matrix, dim_values)

if self.verbose:
print(util.format_elapsed(s, f"{self.indent}FINISH WRITING {self.uri}"))

# ----------------------------------------------------------------
def _numpy_ndarray_or_scipy_sparse_csr_matrix(self, matrix, dim_values):
# We do not have column names for anndata-provenance annotation matrices.
Expand Down
12 changes: 12 additions & 0 deletions apis/python/src/tiledbsc/annotation_pairwise_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,18 @@ def shape(self):
# * https://github.com/TileDB-Inc/TileDB-Py/pull/1055
return A.df[:].shape # nnz x 3 -- id_i, id_j, and value

# ----------------------------------------------------------------
def dim_select(self, ids): # TODO: re None
"""
TODO
"""
if ids is None:
with tiledb.open(self.uri) as A: # TODO: with self.open
return A.df[:, :]
else:
with tiledb.open(self.uri) as A: # TODO: with self.open
return A.df[ids, ids]

# ----------------------------------------------------------------
def from_anndata(self, matrix, dim_values):
"""
Expand Down
16 changes: 16 additions & 0 deletions apis/python/src/tiledbsc/assay_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,22 @@ def __init__(
# We don't have a .shape() method since X is sparse. One should
# instead use the row-counts for the soma's obs and var.

# ----------------------------------------------------------------
def dim_select(self, obs_ids, var_ids): # TODO: maybe one is None
"""
TODO
"""
assert obs_ids != None or var_ids != None # TODO: elaborate
if var_ids == None:
with tiledb.open(self.uri) as A: # TODO: with self.open
return A.df[obs_ids, :]
elif obs_ids == None:
with tiledb.open(self.uri) as A: # TODO: with self.open
return A.df[:, var_ids]
else:
with tiledb.open(self.uri) as A: # TODO: with self.open
return A.df[obs_ids, var_ids]

# ----------------------------------------------------------------
def from_matrix(self, matrix, row_names, col_names) -> None:
"""
Expand Down
44 changes: 44 additions & 0 deletions apis/python/src/tiledbsc/soma.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,50 @@ def __str__(self):
"""
return f"name={self.name},uri={self.uri}"

# ----------------------------------------------------------------
def dim_select(
self,
slice_obs_ids, # TODO: None means all ...
slice_var_ids, # TODO: None means all ...
) -> Dict: # XXX TEMP
"""
TODO
"""

assert slice_obs_ids != None or slice_var_ids != None

if slice_obs_ids is None:
# Try the var slice first to see if that produces zero results -- if so we don't need to
# load the obs.
slice_var_df = self.var.dim_select(slice_var_ids)
if slice_var_df.shape[0] == 0: # TODO: comment
return None
slice_obs_df = self.obs.dim_select(slice_obs_ids)
if slice_obs_df.shape[0] == 0: # TODO: comment
return None

elif slice_var_ids is None:
# Try the obs slice first to see if that produces zero results -- if so we don't need to
# load the var.
slice_obs_df = self.obs.dim_select(slice_obs_ids)
if slice_obs_df.shape[0] == 0: # TODO: comment
return None
slice_var_df = self.var.dim_select(slice_var_ids)
if slice_var_df.shape[0] == 0: # TODO: comment
return None

else:
slice_obs_df = self.obs.dim_select(slice_obs_ids)
if slice_obs_df.shape[0] == 0: # TODO: comment
return None
slice_var_df = self.var.dim_select(slice_var_ids)
if slice_var_df.shape[0] == 0: # TODO: comment
return None

return self._assemble_soma_slice(
slice_obs_ids, slice_var_ids, slice_obs_df, slice_var_df
)

# ----------------------------------------------------------------
def from_h5ad(self, input_path: str):
"""
Expand Down

0 comments on commit 55409de

Please sign in to comment.