Skip to content

Commit

Permalink
neaten, and comment
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Jul 15, 2022
1 parent 7971580 commit 82f4843
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 42 deletions.
20 changes: 8 additions & 12 deletions apis/python/src/tiledbsc/raw_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,28 +35,24 @@ def __init__(
super().__init__(uri=uri, name=name, parent=parent)
self.parent_obs = obs

# TODO: COMMENT
member_names = ["X", "var", "varm", "varp"]
child_uris = self._get_child_uris(member_names) # See comments in that function
# See comments in _get_child_uris
child_uris = self._get_child_uris(["X", "var", "varm", "varp"])

X_uri = child_uris["X"] # See comments in that function
var_uri = child_uris["var"]
varm_uri = child_uris["varm"]
varp_uri = child_uris["varp"]

self.var = AnnotationDataFrame(uri=var_uri, name="var", parent=self)
self.var = AnnotationDataFrame(uri=child_uris["var"], name="var", parent=self)
self.X = AssayMatrixGroup(
uri=X_uri,
uri=child_uris["X"],
name="X",
row_dim_name="obs_id",
col_dim_name="var_id",
row_dataframe=self.parent_obs,
col_dataframe=self.var,
parent=self,
)
self.varm = AnnotationMatrixGroup(uri=varm_uri, name="varm", parent=self)
self.varm = AnnotationMatrixGroup(
uri=child_uris["varm"], name="varm", parent=self
)
self.varp = AnnotationPairwiseMatrixGroup(
uri=varp_uri,
uri=child_uris["varp"],
name="varp",
row_dataframe=self.var,
col_dataframe=self.var,
Expand Down
22 changes: 10 additions & 12 deletions apis/python/src/tiledbsc/soma.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,10 @@ def __init__(
ctx=ctx,
)

# TODO: COMMENT
member_names = ["obs", "var", "X", "obsm", "varm", "obsp", "varp", "raw", "uns"]
child_uris = self._get_child_uris(member_names) # See comments in that function
# See comments in _get_child_uris
child_uris = self._get_child_uris(
["obs", "var", "X", "obsm", "varm", "obsp", "varp", "raw", "uns"]
)

obs_uri = child_uris["obs"]
var_uri = child_uris["var"]
Expand Down Expand Up @@ -133,15 +134,12 @@ def __init__(
self.raw = RawGroup(uri=raw_uri, name="raw", obs=self.obs, parent=self)
self.uns = UnsGroup(uri=uns_uri, name="uns", parent=self)

# If URI is "/something/test1" then:
# * obs_uri is "/something/test1/obs"
# * var_uri is "/something/test1/var"
# * data_uri is "/something/test1/X"

# If URI is "tiledb://namespace/s3://bucketname/something/test1" then:
# * obs_uri is "tiledb://namespace/s3://bucketname/something/test1/obs"
# * var_uri is "tiledb://namespace/s3://bucketname/something/test1/var"
# * data_uri is "tiledb://namespace/s3://bucketname/something/test1/X"
# Sample SOMA/member URIs:
#
# SOMA member
# "/foo/test1" "/foo/test1/obs"
# "tiledb://ns/s3://bkt/foo/test1" "tiledb://ns/s3://bkt/foo/test1/obs"
# "tiledb://ns/test1" "tiledb://ns/95cb11b5-2052-461b-9b99-cfa94e51e23f"

# ----------------------------------------------------------------
def __repr__(self) -> str:
Expand Down
7 changes: 6 additions & 1 deletion apis/python/src/tiledbsc/soma_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ class SOMACollection(TileDBGroup):
Implements a collection of `SOMA` objects.
"""

# XXX COMMENT
# This is a cache to avoid the overhead of calling the SOMA constructor repeatedly. That
# constructor isn't particuarly expensive, except that for tiledb-cloud URIs it needs to
# interrogate the server repeatedly for recursive group-member URIs, which has web-request
# latency.
_somas: Dict[str, SOMA]

# ----------------------------------------------------------------
Expand Down Expand Up @@ -127,6 +130,7 @@ def __iter__(self) -> Iterator[SOMA]:
"""
for name, uri in self._get_member_names_to_uris().items():
if name not in self._somas:
# SOMA-constructor cache
self._somas[name] = SOMA(uri=uri, name=name, parent=self, ctx=self._ctx)
yield self._somas[name]

Expand Down Expand Up @@ -160,6 +164,7 @@ def __getitem__(self, name: str) -> Optional[SOMA]:
member exists. Overloads the `[...]` operator.
"""
if name in self._somas:
# SOMA-constructor cache
return self._somas[name]

with self._open("r") as G:
Expand Down
31 changes: 18 additions & 13 deletions apis/python/src/tiledbsc/tiledb_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class TileDBGroup(TileDBObject):
Wraps groups from TileDB-Py by retaining a URI, options, etc.
"""

# Cache to avoid repeated calls to the REST server for resolving group-member URIs
# in the tiledb-cloud case. We invalidate this on add-member or remove-member.
_cached_member_names_to_uris: Optional[Dict[str, str]]

def __init__(
Expand All @@ -40,9 +42,10 @@ def exists(self) -> bool:
object has not yet been populated, e.g. before calling `from_anndata` -- or, if the
SOMA has been populated but doesn't have this member (e.g. not all SOMAs have a `varp`).
"""
# TODO: NOTE WHAT IF VFS.DELETE AFTER INSTANTIATION
# TODO: NON-CACHEABLE AND WHY
return tiledb.object_type(self.uri, ctx=self._ctx) == "group"
# For tiledb:// URIs this is a REST-server request which we'd like to cache.
# However, remove-and-replace use-cases are possible and common in notebooks
# and it turns out caching the existence-check isn't a robust approach.
return bool(tiledb.object_type(self.uri, ctx=self._ctx) == "group")

def create_unless_exists(self) -> None:
"""
Expand Down Expand Up @@ -90,17 +93,17 @@ def _open(self, mode: str = "r") -> tiledb.Group:
# This works in with-open-as contexts because tiledb.Group has __enter__ and __exit__ methods.
return tiledb.Group(self.uri, mode=mode, ctx=self._ctx)

# XXX COMMENT
def _get_child_uris(self, member_names: Sequence[str]) -> Dict[str, str]:
"""
Computes the URI for a child of the given object. For local disk, S3, and
tiledb://.../s3://... pre-creation URIs, this is simply the parent's URI, a slash, and the
member name. For post-creation TileDB-Cloud URIs, this is computed from the parent's
information. (This is because in TileDB Cloud, members have URIs like
tiledb://namespace/df584345-28b7-45e5-abeb-043d409b1a97.)
Batched version of `get_child_uri`. Since there's REST-server latency for getting
name-to-URI mapping for group-member URIs, in the tiledb://... case, total latency
is reduced when we ask for all group-element name-to-URI mappings in a single
request to the REST server.
"""
if not self.exists():
# TODO: comment
# Group not constructed yet. Here, appending "/" and name is appropriate in all
# cases: even for tiledb://... URIs, pre-construction URIs are of the form
# tiledb://namespace/s3://something/something/soma/membername.
return {
member_name: self.uri + "/" + member_name
for member_name in member_names
Expand All @@ -123,14 +126,16 @@ def _get_child_uris(self, member_names: Sequence[str]) -> Dict[str, str]:

return answer

# XXX COMMENT
def _get_child_uri(self, member_name: str) -> str:
"""
Computes the URI for a child of the given object. For local disk, S3, and
tiledb://.../s3://... pre-creation URIs, this is simply the parent's URI, a slash, and the
member name. For post-creation TileDB-Cloud URIs, this is computed from the parent's
information. (This is because in TileDB Cloud, members have URIs like
tiledb://namespace/df584345-28b7-45e5-abeb-043d409b1a97.)
Please use _get_child_uris whenever possible, to reduce the number of REST-server requests
in the tiledb//... URIs.
"""
if not self.exists():
# TODO: comment
Expand Down Expand Up @@ -180,7 +185,7 @@ def _add_object(self, obj: TileDBObject, relative: Optional[bool] = None) -> Non
relative = not child_uri.startswith("tiledb://")
if relative:
child_uri = obj.name
self._cached_member_names_to_uris = None # invalidate
self._cached_member_names_to_uris = None # invalidate on add-member
with self._open("w") as G:
G.add(uri=child_uri, relative=relative, name=obj.name)
# See _get_child_uri. Key point is that, on TileDB Cloud, URIs change from pre-creation to
Expand All @@ -196,7 +201,7 @@ def _remove_object(self, obj: TileDBObject) -> None:
self._remove_object_by_name(obj.name)

def _remove_object_by_name(self, member_name: str) -> None:
self._cached_member_names_to_uris = None # invalidate
self._cached_member_names_to_uris = None # invalidate on remove-member
if self.uri.startswith("tiledb://"):
mapping = self._get_member_names_to_uris()
if member_name not in mapping:
Expand Down
7 changes: 3 additions & 4 deletions apis/python/src/tiledbsc/uns_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,11 @@ def from_anndata_uns(self, uns: Mapping[str, Any]) -> None:
# Must be done first, to create the parent directory
self.create_unless_exists()

child_uris = self._get_child_uris(
list(uns.keys())
) # See comments in that function
# See comments in _get_child_uris
child_uris = self._get_child_uris(list(uns.keys()))

for key in uns.keys():
component_uri = child_uris[key] # See comments in that function
component_uri = child_uris[key]
value = uns[key]

if key == "rank_genes_groups":
Expand Down

0 comments on commit 82f4843

Please sign in to comment.