Skip to content

Commit

Permalink
caching
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Jul 13, 2022
1 parent c71dfca commit b424f59
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 55 deletions.
28 changes: 0 additions & 28 deletions apis/python/src/tiledbsc/soma.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os
from collections import Counter
from typing import Optional, Sequence
import time

import pandas as pd
import tiledb
Expand Down Expand Up @@ -60,9 +59,6 @@ def __init__(
:param uri: URI of the TileDB group
"""

t1 = time.time()

print(time.time(), "AAA001")
# People can (and should) call by name. However, it's easy to forget. For example,
# if someone does 'tiledbsc.SOMA("myuri", ctx)' instead of 'tiledbsc.SOMA("myury", ctx)',
# behavior will not be what they expect, and we should let them know sooner than later.
Expand All @@ -76,7 +72,6 @@ def __init__(
assert isinstance(ctx, tiledb.Ctx)
if parent is not None:
assert isinstance(parent, TileDBGroup)
print(time.time(), "AAA002")

if ctx is None and config is not None:
ctx = tiledb.Ctx(config)
Expand All @@ -86,7 +81,6 @@ def __init__(
name = os.path.basename(uri.rstrip("/"))
if name == "":
name = "soma"
print(time.time(), "AAA003")
super().__init__(
uri=uri,
name=name,
Expand All @@ -95,8 +89,6 @@ def __init__(
ctx=ctx,
)

# t01 = time.time()
# print(time.time(), "AAA004")
# obs_uri = self._get_child_uri("obs") # See comments in that function
# var_uri = self._get_child_uri("var")
# X_uri = self._get_child_uri("X")
Expand All @@ -106,11 +98,6 @@ def __init__(
# varp_uri = self._get_child_uri("varp")
# raw_uri = self._get_child_uri("raw")
# uns_uri = self._get_child_uri("uns")
# t02 = time.time()
# print("BBB001 %.3f" % (t02-t01))

t01 = time.time()
print(time.time(), "AAA104")

member_names = ["obs", "var", "X", "obsm", "varm", "obsp", "varp", "raw", "uns"]
child_uris = self._get_child_uris(member_names) # See comments in that function
Expand All @@ -125,14 +112,8 @@ def __init__(
raw_uri = child_uris["raw"]
uns_uri = child_uris["uns"]

t02 = time.time()
print("BBB001 %.3f" % (t02-t01))

print(time.time(), "AAA005")
self.obs = AnnotationDataFrame(uri=obs_uri, name="obs", parent=self)
print(time.time(), "AAA006")
self.var = AnnotationDataFrame(uri=var_uri, name="var", parent=self)
print(time.time(), "AAA007")
self.X = AssayMatrixGroup(
uri=X_uri,
name="X",
Expand All @@ -142,29 +123,23 @@ def __init__(
col_dataframe=self.var,
parent=self,
)
print(time.time(), "AAA008")
self.obsm = AnnotationMatrixGroup(uri=obsm_uri, name="obsm", parent=self)
print(time.time(), "AAA009")
self.varm = AnnotationMatrixGroup(uri=varm_uri, name="varm", parent=self)
print(time.time(), "AAA010")
self.obsp = AnnotationPairwiseMatrixGroup(
uri=obsp_uri,
name="obsp",
row_dataframe=self.obs,
col_dataframe=self.obs,
parent=self,
)
print(time.time(), "AAA011")
self.varp = AnnotationPairwiseMatrixGroup(
uri=varp_uri,
name="varp",
row_dataframe=self.var,
col_dataframe=self.var,
parent=self,
)
print(time.time(), "AAA012")
self.raw = RawGroup(uri=raw_uri, name="raw", obs=self.obs, parent=self)
print(time.time(), "AAA013")
self.uns = UnsGroup(uri=uns_uri, name="uns", parent=self)

# If URI is "/something/test1" then:
Expand All @@ -177,9 +152,6 @@ def __init__(
# * var_uri is "tiledb://namespace/s3://bucketname/something/test1/var"
# * data_uri is "tiledb://namespace/s3://bucketname/something/test1/X"

t2 = time.time()
print("SOMA CTOR SECONDS %.3f" % (t2-t1))

# ----------------------------------------------------------------
def __repr__(self) -> str:
"""
Expand Down
54 changes: 27 additions & 27 deletions apis/python/src/tiledbsc/tiledb_group.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import Dict, Optional, Sequence
from typing import Dict, Optional, Sequence, List
import time

import tiledb
Expand All @@ -16,6 +16,8 @@ class TileDBGroup(TileDBObject):
Wraps groups from TileDB-Py by retaining a URI, options, etc.
"""

_cached_member_names_to_uris: Dict[str, str]

def __init__(
self,
uri: str,
Expand All @@ -31,6 +33,7 @@ def __init__(
See the TileDBObject constructor.
"""
super().__init__(uri, name, parent=parent, soma_options=soma_options, ctx=ctx)
self._cached_member_names_to_uris = None

def exists(self) -> bool:
"""
Expand Down Expand Up @@ -97,7 +100,10 @@ def _get_child_uris(self, member_names: List[str]) -> Dict[str, str]:
"""
if not self.exists():
# TODO: comment
return {member_name : self.uri + "/" + member_name for member_name in member_names}
return {
member_name: self.uri + "/" + member_name
for member_name in member_names
}

answer = {}

Expand Down Expand Up @@ -125,30 +131,20 @@ def _get_child_uri(self, member_name: str) -> str:
information. (This is because in TileDB Cloud, members have URIs like
tiledb://namespace/df584345-28b7-45e5-abeb-043d409b1a97.)
"""
t1 = time.time()
print("--XXXENTER")
if not self.exists():
# TODO: comment
print("--XXXEXIT1 %.3f" % (time.time() - t1))
return self.uri + "/" + member_name
with self._open() as G:
if member_name in G:
return G[member_name].uri
else:
return self.uri + "/" + member_name
# mapping = self._get_member_names_to_uris()
# if member_name in mapping:
# print("--XXXEXIT2 %.3f" % (time.time() - t1))
# return mapping[member_name]
# else:
# # Truly a slash, not os.path.join:
# # * If the client is Linux/Un*x/Mac, it's the same of course
# # * On Windows, os.path.sep is a backslash but backslashes are _not_ accepted for S3 or
# # tiledb-cloud URIs, whereas in Windows versions for years now forward slashes _are_
# # accepted for local-disk paths.
# # This means forward slash is acceptable in all cases.
# print("--XXXEXIT3 %.3f" % (time.time() - t1))
# return self.uri + "/" + member_name
mapping = self._get_member_names_to_uris()
if member_name in mapping:
return mapping[member_name]
else:
# Truly a slash, not os.path.join:
# * If the client is Linux/Un*x/Mac, it's the same of course
# * On Windows, os.path.sep is a backslash but backslashes are _not_ accepted for S3 or
# tiledb-cloud URIs, whereas in Windows versions for years now forward slashes _are_
# accepted for local-disk paths.
# This means forward slash is acceptable in all cases.
return self.uri + "/" + member_name

def _add_object(self, obj: TileDBObject, relative: Optional[bool] = None) -> None:
"""
Expand Down Expand Up @@ -183,9 +179,10 @@ def _add_object(self, obj: TileDBObject, relative: Optional[bool] = None) -> Non
relative = not child_uri.startswith("tiledb://")
if relative:
child_uri = obj.name
self._cached_member_names_to_uris = None # invalidate
with self._open("w") as G:
retval = G.add(uri=child_uri, relative=relative, name=obj.name)
print("RETVAL ", retval)
#####print("RETVAL ", retval)
# See _get_child_uri. Key point is that, on TileDB Cloud, URIs change from pre-creation to
# post-creation. Example:
# * Upload to pre-creation URI tiledb://namespace/s3://bucket/something/something/somaname
Expand All @@ -194,12 +191,13 @@ def _add_object(self, obj: TileDBObject, relative: Optional[bool] = None) -> Non
# * Member pre-creation URI tiledb://namespace/s3://bucket/something/something/somaname/obs
# * Member post-creation URI tiledb://somaname/e4de581a-1353-4150-b1f4-6ed12548e497
obj.uri = self._get_child_uri(obj.name)
print("REMAP", child_uri, "TO", obj.uri)
####print("REMAP", child_uri, "TO", obj.uri)

def _remove_object(self, obj: TileDBObject) -> None:
self._remove_object_by_name(obj.name)

def _remove_object_by_name(self, member_name: str) -> None:
self._cached_member_names_to_uris = None # invalidate
if self.uri.startswith("tiledb://"):
mapping = self._get_member_names_to_uris()
if member_name not in mapping:
Expand Down Expand Up @@ -230,8 +228,10 @@ def _get_member_names_to_uris(self) -> Dict[str, str]:
Like `_get_member_names()` and `_get_member_uris`, but returns a dict mapping from
member name to member URI.
"""
with self._open("r") as G:
return {obj.name: obj.uri for obj in G}
if self._cached_member_names_to_uris is None:
with self._open("r") as G:
self._cached_member_names_to_uris = {obj.name: obj.uri for obj in G}
return self._cached_member_names_to_uris

def show_metadata(self, recursively: bool = True, indent: str = "") -> None:
"""
Expand Down

0 comments on commit b424f59

Please sign in to comment.