Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to TileDB-Py 0.15.2 #144

Merged
merged 7 commits into from
Jun 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apis/python/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ zip_safe = False
# The numpy pin is to avoid
# "ImportError: Numba needs NumPy 1.21 or less"
install_requires =
tiledb>=0.15.1
tiledb>=0.15.2
scipy
numpy<1.22
pandas
Expand Down
63 changes: 22 additions & 41 deletions apis/python/src/tiledbsc/annotation_matrix_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,9 @@ def to_dict_of_csr(self) -> Dict[str, scipy.sparse.csr_matrix]:
Member arrays are returned in sparse CSR format.
"""

grp = None
try: # Not all groups have all four of obsm, obsp, varm, and varp.
grp = tiledb.Group(self.uri, mode="r")
except:
pass
if grp == None:
if (
not self.exists()
): # Not all groups have all four of obsm, obsp, varm, and varp.
if self._verbose:
print(f"{self._indent}{self.uri} not found")
return {}
Expand All @@ -99,29 +96,25 @@ def to_dict_of_csr(self) -> Dict[str, scipy.sparse.csr_matrix]:
s = util.get_start_stamp()
print(f"{self._indent}START read {self.uri}")

# TODO: fold this element-enumeration into the TileDB group class. Maybe on the same PR
# where we support somagroup['name'] with overloading of the [] operator.
matrices_in_group = {}
for element in grp:
with tiledb.open(element.uri) as A:
with tiledb.open(element.uri) as A:
if self._verbose:
s2 = util.get_start_stamp()
print(f"{self._indent}START read {element.uri}")
with self._open() as G:
matrices_in_group = {}
for element in G:
if self._verbose:
s2 = util.get_start_stamp()
print(f"{self._indent}START read {element.uri}")

with tiledb.open(element.uri) as A:
df = pd.DataFrame(A[:])
df.set_index(self.dim_name, inplace=True)
matrix_name = os.path.basename(element.uri) # e.g. 'X_pca'
matrices_in_group[matrix_name] = df.to_numpy()

if self._verbose:
print(
util.format_elapsed(
s2, f"{self._indent}FINISH read {element.uri}"
)
if self._verbose:
print(
util.format_elapsed(
s2, f"{self._indent}FINISH read {element.uri}"
)

grp.close()
)

if self._verbose:
print(util.format_elapsed(s, f"{self._indent}FINISH read {self.uri}"))
Expand Down Expand Up @@ -150,37 +143,25 @@ def __getitem__(self, name):
member exists. Overloads the `[...]` operator.
"""

# TODO: If TileDB-Py were to support `name in G` the line-count could reduce here.
with self._open("r") as G:
try:
obj = G[name] # This returns a tiledb.object.Object.
except:
if not name in G:
return None

obj = G[name] # This returns a tiledb.object.Object.
if obj.type == tiledb.tiledb.Group:
raise Exception(
"Internal error: found group element where array element was expected."
)
elif obj.type == tiledb.libtiledb.Array:
return AnnotationMatrix(
uri=obj.uri, name=name, dim_name=self.dim_name, parent=self
)
else:
if obj.type != tiledb.libtiledb.Array:
raise Exception(
f"Internal error: found group element neither subgroup nor array: type is {str(obj.type)}"
)
return AnnotationMatrix(
uri=obj.uri, name=name, dim_name=self.dim_name, parent=self
)

def __contains__(self, name):
"""
Implements the `in` operator, e.g. `"namegoeshere" in soma.obsm/soma.varm`.
"""
# TODO: this will get easier once TileDB.group.Group supports `name` in `__contains__`.
# See SC-18057 and https://github.com/single-cell-data/TileDB-SingleCell/issues/113.
with self._open("r") as G:
answer = False
try:
# This returns a tiledb.object.Object.
G[name]
return True
except:
return False
return name in G
10 changes: 1 addition & 9 deletions apis/python/src/tiledbsc/annotation_pairwise_matrix_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,13 +201,5 @@ def __contains__(self, name):
"""
Implements `"namegoeshere" in soma.obsp/soma.varp`.
"""
# TODO: this will get easier once TileDB.group.Group supports `name` in `__contains__`.
# See SC-18057 and https://github.com/single-cell-data/TileDB-SingleCell/issues/113.
with self._open("r") as G:
answer = False
try:
# This returns a tiledb.object.Object.
G[name]
return True
except:
return False
return name in G
38 changes: 13 additions & 25 deletions apis/python/src/tiledbsc/assay_matrix_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,47 +92,35 @@ def __getitem__(self, name):
member exists. Overloads the `[...]` operator.
"""

# TODO: If TileDB-Py were to support `name in G` the line-count could reduce here.
with self._open("r") as G:
try:
obj = G[name] # This returns a tiledb.object.Object.
except:
if not name in G:
return None

obj = G[name] # This returns a tiledb.object.Object.
if obj.type == tiledb.tiledb.Group:
raise Exception(
"Internal error: found group element where array element was expected."
)
elif obj.type == tiledb.libtiledb.Array:
return AssayMatrix(
uri=obj.uri,
name=name,
row_dim_name=self.row_dim_name,
col_dim_name=self.col_dim_name,
row_dataframe=self.row_dataframe,
col_dataframe=self.col_dataframe,
parent=self,
)

else:
if obj.type != tiledb.libtiledb.Array:
raise Exception(
f"Internal error: found group element neither subgroup nor array: type is {str(obj.type)}"
)
return AssayMatrix(
uri=obj.uri,
name=name,
row_dim_name=self.row_dim_name,
col_dim_name=self.col_dim_name,
row_dataframe=self.row_dataframe,
col_dataframe=self.col_dataframe,
parent=self,
)

def __contains__(self, name):
"""
Implements the `in` operator, e.g. `"data" in soma.X`.
"""
# TODO: this will get easier once TileDB.group.Group supports `name` in `__contains__`.
# See SC-18057 and https://github.com/single-cell-data/TileDB-SingleCell/issues/113.
with self._open("r") as G:
answer = False
try:
# This returns a tiledb.object.Object.
G[name]
return True
except:
return False
return name in G

# ----------------------------------------------------------------
def from_matrix_and_dim_values(self, matrix, row_names, col_names) -> None:
Expand Down
9 changes: 1 addition & 8 deletions apis/python/src/tiledbsc/soma_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,8 @@ def __contains__(self, name: str) -> bool:
"""
Implements `name in soco`
"""
# TODO: this will get easier once TileDB.group.Group supports `name` in `__contains__`.
# See SC-18057 and https://github.com/single-cell-data/TileDB-SingleCell/issues/113.
with self._open("r") as G:
answer = False
try:
G[name] # This returns a tiledb.object.Object.
return True
except:
return False
return name in G

# At the tiledb-py API level, *all* groups are name-indexable. But here at the tiledbsc-py
# level, we implement name-indexing only for some groups:
Expand Down
31 changes: 4 additions & 27 deletions apis/python/src/tiledbsc/tiledb_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,39 +66,16 @@ def _create(self):
tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY
] = self.__class__.__name__

def _open_withlessly(self, mode="r"):
"""
This is just a convenience wrapper around tiledb.open of the tiledb group
associated with this SOMA element.
"""
assert mode in ["w", "r"]
if mode == "w" and not self.exists():
self._create()
if mode == "r" and not self.exists():
raise Exception(f"Does not exist: {self.uri}")
return tiledb.Group(self.uri, mode=mode, ctx=self._ctx)

@contextmanager
def _open(self, mode="r"):
"""
This is just a convenience wrapper around tiledb.open of the tiledb group
associated with this SOMA element, supporting Python with-as syntax.
TODO: One TileDB.Py's Group objects have `__enter__` and `__exit__`
method, fold this and _open_withlessly together.
This is just a convenience wrapper around tiledb group-open.
It works asa `with self._open() as G:` as well as `G = self._open(); ...; G.close()`.
"""
assert mode in ("r", "w")

# Do this check here, not just in _open_withlessly -- otherwise we get
# UnboundLocalError: local variable 'G' referenced before assignment
# which is super-confusing for the user.
if mode == "r" and not self.exists():
raise Exception(f"Does not exist: {self.uri}")

try:
G = self._open_withlessly(mode)
yield G
finally:
G.close()
# This works in with-open-as contexts because tiledb.Group has __enter__ and __exit__ methods.
return tiledb.Group(self.uri, mode=mode, ctx=self._ctx)

def _add_object(self, obj: TileDBObject):
"""
Expand Down
13 changes: 1 addition & 12 deletions apis/python/src/tiledbsc/uns_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,6 @@ def to_dict_of_matrices(self) -> Dict:
s = util.get_start_stamp()
print(f"{self._indent}START read {self.uri}")

# TODO: fold this element-enumeration into the TileDB group class. Maybe on the same PR
# where we support somagroup['name'] with overloading of the [] operator.
retval = {}
for element in grp:
name = os.path.basename(element.uri) # TODO: update for tiledb cloud
Expand Down Expand Up @@ -195,14 +193,5 @@ def __contains__(self, name):
"""
Implements '"namegoeshere" in soma.uns'.
"""

# TODO: this will get easier once TileDB.group.Group supports `name` in `__contains__`.
# See SC-18057 and https://github.com/single-cell-data/TileDB-SingleCell/issues/113.
with self._open("r") as G:
answer = False
try:
# This returns a tiledb.object.Object.
G[name]
return True
except:
return False
return name in G
8 changes: 2 additions & 6 deletions apis/python/tests/test_soco_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,8 @@ def test_import_anndata(tmp_path):

soco = tiledbsc.SOMACollection(soco_dir)

# TODO: change this to with-open-as syntax once
# https://github.com/TileDB-Inc/TileDB-Py/pull/1124
# is in a TileDB-Py release which we articulate a dependency on.
G = tiledb.Group(soma1_dir)
assert G.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA"
G.close()
with tiledb.Group(soma1_dir) as G:
assert G.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA"

soco._create()
assert len(soco._get_member_names()) == 0
Expand Down
8 changes: 2 additions & 6 deletions apis/python/tests/test_tiledbsc.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,8 @@ def test_import_anndata(adata):
# raw/var
# raw/varm/PCs

# TODO: change this to with-open-as syntax once
# https://github.com/TileDB-Inc/TileDB-Py/pull/1124
# is in a TileDB-Py release which we articulate a dependency on.
G = tiledb.Group(output_path)
assert G.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA"
G.close()
with tiledb.Group(output_path) as G:
assert G.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA"

# Check X/data (dense)
with tiledb.open(os.path.join(output_path, "X", "data")) as A:
Expand Down