Skip to content

Commit

Permalink
soma.foo.schema() -> soma.foo.tiledb_array_schema()
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed May 20, 2022
1 parent aac677c commit 30f8d25
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 20 deletions.
4 changes: 2 additions & 2 deletions apis/python/src/tiledbsc/annotation_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ def ids(self) -> List[str]:
def keys(self) -> List[str]:
"""
Returns the column names for the `obs` or `var` dataframe. For obs and varp, `.keys()` is a
keystroke-saver for the more general array-schema accessor `get_attr_names`.
keystroke-saver for the more general array-schema accessor `attr_names`.
"""
return self.get_attr_names()
return self.attr_names()

# ----------------------------------------------------------------
def dim_select(self, ids):
Expand Down
7 changes: 0 additions & 7 deletions apis/python/src/tiledbsc/assay_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,6 @@ def __init__(
# We don't have a .shape() method since X is sparse. One should
# instead use the row-counts for the soma's obs and var.

# ----------------------------------------------------------------
def dim_names(self):
"""
Keystroke-saving accessor for `row_dim_name` and `col_dim_name`.
"""
return (self.row_dim_name, self.col_dim_name)

# ----------------------------------------------------------------
def dim_select(self, obs_ids, var_ids):
"""
Expand Down
30 changes: 25 additions & 5 deletions apis/python/src/tiledbsc/tiledb_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
from .tiledb_object import TileDBObject
from .tiledb_group import TileDBGroup

from typing import Optional, List
from typing import Optional, List, Dict


class TileDBArray(TileDBObject):
"""
Wraps arrays from TileDB-Py by retaining a URI, verbose flag, etc.
Also serves as an abstraction layer to hide TileDB-specific details from the API, unless
requested.
"""

def __init__(
Expand Down Expand Up @@ -37,29 +39,47 @@ def open(self):
A = tiledb.open(self.uri)
return A

def schema(self):
def tiledb_array_schema(self):
"""
Returns the TileDB array schema.
"""
with tiledb.open(self.uri) as A:
return A.schema

def get_dim_names(self) -> List[str]:
def dim_names(self) -> List[str]:
"""
Reads the dimension names from the schema: for example, ['obs_id', 'var_id'].
"""
with tiledb.open(self.uri) as A:
return [A.schema.domain.dim(i).name for i in range(A.schema.domain.ndim)]

def get_attr_names(self) -> List[str]:
def dim_names_to_types(self) -> Dict[str, str]:
"""
Returns a dict mapping from dimension name to dimension type.
"""
with tiledb.open(self.uri) as A:
dom = A.schema.domain
return {dom.dim(i).name: dom.dim(i).dtype for i in range(dom.ndim)}

def attr_names(self) -> List[str]:
"""
Reads the attribute names from the schema: for example, the list of column names in a dataframe.
"""
with tiledb.open(self.uri) as A:
return [A.schema.attr(i).name for i in range(A.schema.nattr)]

def attr_names_to_types(self) -> Dict[str, str]:
"""
Returns a dict mapping from attribute name to attribute type.
"""
with tiledb.open(self.uri) as A:
schema = A.schema
return {
schema.attr(i).name: schema.attr(i).dtype for i in range(schema.nattr)
}

def has_attr_name(self, attr_name: str) -> bool:
"""
Returns true if the array has the specified attribute name, false otherwise.
"""
return attr_name in self.get_attr_names()
return attr_name in self.attr_names()
12 changes: 6 additions & 6 deletions apis/python/tests/test_soma_group_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ def test_soma_group_indexing(h5ad_file):
["uns", "varm", "X", "raw", "obsp", "varp", "var", "obsm", "obs"]
)
assert set(soma.X.get_member_names()) == set(["data"])
assert soma.X.data.get_dim_names() == ["obs_id", "var_id"]
assert soma.X.data.dim_names() == ["obs_id", "var_id"]

assert soma.obs.get_dim_names() == ["obs_id"]
assert soma.obs.dim_names() == ["obs_id"]
assert soma.obs.dim_name == "obs_id"
assert soma.obs.keys() == [
"orig.ident",
Expand Down Expand Up @@ -159,7 +159,7 @@ def test_soma_group_indexing(h5ad_file):
np.dtype("int32"),
]

assert soma.var.get_dim_names() == ["var_id"]
assert soma.var.dim_names() == ["var_id"]
assert soma.obs.dim_name == "obs_id"
assert soma.var.keys() == [
"vst.mean",
Expand Down Expand Up @@ -206,7 +206,7 @@ def test_soma_group_indexing(h5ad_file):
assert set(soma.obsm.keys()) == set(["X_pca", "X_tsne"])
assert isinstance(soma.obsm["X_pca"], tiledbsc.AnnotationMatrix)
assert soma.obsm["nonesuch"] is None
assert soma.obsm["X_pca"].get_dim_names() == ["obs_id"]
assert soma.obsm["X_pca"].dim_names() == ["obs_id"]
assert soma.obsm["X_pca"].df().shape == (80, 20)
assert list(soma.obsm["X_pca"].df().dtypes) == [
np.dtype("O"),
Expand Down Expand Up @@ -235,12 +235,12 @@ def test_soma_group_indexing(h5ad_file):
assert isinstance(soma.varm["PCs"], tiledbsc.AnnotationMatrix)
assert soma.varm["nonesuch"] is None
assert soma.varm.get_member_names() == ["PCs"]
assert soma.varm["PCs"].get_dim_names() == ["var_id"]
assert soma.varm["PCs"].dim_names() == ["var_id"]

assert set(soma.obsp.get_member_names()) == set(["distances"])
assert isinstance(soma.obsp["distances"], tiledbsc.AnnotationPairwiseMatrix)
assert soma.varp["nonesuch"] is None
assert soma.obsp["distances"].get_dim_names() == ["obs_id_i", "obs_id_j"]
assert soma.obsp["distances"].dim_names() == ["obs_id_i", "obs_id_j"]

assert set(soma.uns.get_member_names()) == set(["neighbors"])
assert isinstance(soma.uns["neighbors"], tiledbsc.UnsGroup)
Expand Down

0 comments on commit 30f8d25

Please sign in to comment.