Skip to content

Commit

Permalink
show_metadata() method, and some minor neatens (#145)
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl authored Jun 3, 2022
1 parent b334fad commit 668b816
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 56 deletions.
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsc/annotation_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def from_dataframe(self, dataframe: pd.DataFrame, extent: int) -> None:
mode=mode,
)

self.set_soma_object_type_metadata()
self._set_soma_object_type_metadata()

if self._verbose:
print(util.format_elapsed(s, f"{self._indent}FINISH WRITING {self.uri}"))
Expand Down
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsc/annotation_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def from_matrix_and_dim_values(self, matrix, dim_values):
else:
self._numpy_ndarray_or_scipy_sparse_csr_matrix(matrix, dim_values)

self.set_soma_object_type_metadata()
self._set_soma_object_type_metadata()

if self._verbose:
print(util.format_elapsed(s, f"{self._indent}FINISH WRITING {self.uri}"))
Expand Down
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsc/assay_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def from_matrix_and_dim_values(self, matrix, row_names, col_names) -> None:
else:
self._create_empty_array(matrix_dtype=matrix.dtype)

self.set_soma_object_type_metadata()
self._set_soma_object_type_metadata()

self._ingest_data(matrix, row_names, col_names)
if self._verbose:
Expand Down
15 changes: 12 additions & 3 deletions apis/python/src/tiledbsc/tiledb_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import tiledbsc.util_tiledb
from .soma_options import SOMAOptions
from .tiledb_object import TileDBObject
from .tiledb_group import TileDBGroup

from typing import Optional, List, Dict

Expand All @@ -18,7 +17,9 @@ def __init__(
self,
uri: str,
name: str,
parent: Optional[TileDBGroup] = None,
# It's a circular import if we say this, but this is really:
# parent: Optional[TileDBGroup] = None,
parent=None,
):
"""
See the TileDBObject constructor.
Expand Down Expand Up @@ -98,7 +99,7 @@ def has_attr_name(self, attr_name: str) -> bool:
"""
return attr_name in self.attr_names()

def set_soma_object_type_metadata(self) -> None:
def _set_soma_object_type_metadata(self) -> None:
"""
This helps nested-structured traversals (especially those that start at the SOMACollection
level) confidently navigate with a minimum of introspection on group contents.
Expand All @@ -107,3 +108,11 @@ def set_soma_object_type_metadata(self) -> None:
A.meta[
tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY
] = self.__class__.__name__

def show_metadata(self, recursively=True, indent=""):
"""
Shows metadata for the array.
"""
print(f"{indent}[{self.name}]")
for key, value in self.metadata().items():
print(f"{indent}- {key}: {value}")
62 changes: 62 additions & 0 deletions apis/python/src/tiledbsc/tiledb_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import tiledbsc.util_tiledb
from .soma_options import SOMAOptions
from .tiledb_object import TileDBObject
from .tiledb_array import TileDBArray

from contextlib import contextmanager

Expand Down Expand Up @@ -61,11 +62,45 @@ def _create(self):
if self._verbose:
print(f"{self._indent}Creating TileDB group {self.uri}")
tiledb.group_create(uri=self.uri, ctx=self._ctx)

self._set_soma_object_type_metadata()

def _set_soma_object_type_metadata(self):
"""
This helps nested-structured traversals (especially those that start at the SOMACollection
level) confidently navigate with a minimum of introspection on group contents.
"""
with self._open("w") as G:
G.meta[
tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY
] = self.__class__.__name__

def _set_soma_object_type_metadata_recursively(self):
"""
SOMAs/SOCOs written very early on in the development of this project may not have these set.
Using this method we can after-populate these, without needig to re-ingest entire datasets.
Any SOMAs/SOCOs ingested from June 2022 onward won't need this -- this metadata will be
written at ingestion time.
"""
self._set_soma_object_type_metadata()
with self._open() as G:
for O in G: # This returns a tiledb.object.Object
# It might appear simpler to have all this code within TileDBObject class,
# rather than (with a little duplication) in TileDBGroup and TileDBArray.
# However, getting it to work with a recursive data structure and finding the
# required methods, it was simpler to split the logic this way.
object_type = tiledb.object_type(O.uri)
if object_type == "group":
group = TileDBGroup(uri=O.uri, name=O.name, parent=self)
group._set_soma_object_type_metadata_recursively()
elif object_type == "array":
array = TileDBArray(uri=O.uri, name=O.name, parent=self)
array._set_soma_object_type_metadata()
else:
raise Exception(
f"Unexpected object_type found: {object_type} at {O.uri}"
)

def _open(self, mode="r"):
"""
This is just a convenience wrapper around tiledb group-open.
Expand Down Expand Up @@ -132,3 +167,30 @@ def _get_member_names_to_uris(self) -> Dict[str, str]:
"""
with self._open("r") as G:
return {O.name: O.uri for O in G}

def show_metadata(self, recursively=True, indent=""):
"""
Shows metadata for the group, recursively by default.
"""
print(f"{indent}[{self.name}]")
for key, value in self.metadata().items():
print(f"{indent}- {key}: {value}")
if recursively:
child_indent = indent + " "
with self._open() as G:
for O in G: # This returns a tiledb.object.Object
# It might appear simpler to have all this code within TileDBObject class,
# rather than (with a little duplication) in TileDBGroup and TileDBArray.
# However, getting it to work with a recursive data structure and finding the
# required methods, it was simpler to split the logic this way.
object_type = tiledb.object_type(O.uri)
if object_type == "group":
group = TileDBGroup(uri=O.uri, name=O.name, parent=self)
group.show_metadata(recursively, indent=child_indent)
elif object_type == "array":
array = TileDBArray(uri=O.uri, name=O.name, parent=self)
array.show_metadata(recursively, indent=child_indent)
else:
raise Exception(
f"Unexpected object_type found: {object_type} at {O.uri}"
)
36 changes: 15 additions & 21 deletions apis/python/src/tiledbsc/uns_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,7 @@ def to_dict_of_matrices(self) -> Dict:
"""
Reads the recursive group/array uns data from TileDB storage and returns them as a recursive dict of matrices.
"""
grp = None
try: # Not all groups have uns
grp = tiledb.Group(self.uri, mode="r")
except:
pass
if grp == None:
if not self.exists():
if self._verbose:
print(f"{self._indent}{self.uri} not found")
return {}
Expand All @@ -128,24 +123,23 @@ def to_dict_of_matrices(self) -> Dict:
s = util.get_start_stamp()
print(f"{self._indent}START read {self.uri}")

retval = {}
for element in grp:
name = os.path.basename(element.uri) # TODO: update for tiledb cloud
with self._open() as G:
retval = {}
for element in G:
name = os.path.basename(element.uri) # TODO: update for tiledb cloud

if element.type == tiledb.tiledb.Group:
child_group = UnsGroup(uri=element.uri, name=name, parent=self)
retval[name] = child_group.to_dict_of_matrices()
if element.type == tiledb.tiledb.Group:
child_group = UnsGroup(uri=element.uri, name=name, parent=self)
retval[name] = child_group.to_dict_of_matrices()

elif element.type == tiledb.libtiledb.Array:
child_array = UnsArray(uri=element.uri, name=name, parent=self)
retval[name] = child_array.to_matrix()
elif element.type == tiledb.libtiledb.Array:
child_array = UnsArray(uri=element.uri, name=name, parent=self)
retval[name] = child_array.to_matrix()

else:
raise Exception(
f"Internal error: found uns group element neither group nor array: type is {str(element.type)}"
)

grp.close()
else:
raise Exception(
f"Internal error: found uns group element neither group nor array: type is {str(element.type)}"
)

if self._verbose:
print(util.format_elapsed(s, f"{self._indent}FINISH read {self.uri}"))
Expand Down
53 changes: 24 additions & 29 deletions apis/python/src/tiledbsc/util_tiledb.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,10 @@ def __show_array_schema(uri: str, ctx: Optional[tiledb.Ctx] = None):

# ----------------------------------------------------------------
def __show_array_schemas_for_group(group_uri: str, ctx: Optional[tiledb.Ctx] = None):
group = None
try:
group = tiledb.Group(group_uri, mode="r", ctx=ctx)
except:
return

for element in group:
if element.type == tiledb.libtiledb.Array:
__show_array_schema(element.uri, ctx)
group.close()
with tiledb.Group(group_uri, mode="r", ctx=ctx) as G:
for element in G:
if element.type == tiledb.libtiledb.Array:
__show_array_schema(element.uri, ctx)


# ================================================================
Expand All @@ -68,22 +62,23 @@ def show_tiledb_group_array_schemas(uri: str, ctx: Optional[tiledb.Ctx] = None):
single-cell matrix-API data, and won't necessarily traverse items in a familiar
application-specific order.
"""
group = tiledb.Group(uri, mode="r", ctx=ctx)
print()
print("================================================================")
print(uri)

for element in group:
# Note: use `element.type` rather than `isinstance(element, tiledb.group.Group)`
# since type(element) is `tiledb.object.Object` in all cases.
if element.type == tiledb.group.Group:
show_tiledb_group_array_schemas(element.uri)
elif element.type == tiledb.libtiledb.Array:
print()
print("----------------------------------------------------------------")
print(element.uri)
with tiledb.open(element.uri, ctx=ctx) as A:
print(A.schema)
else:
print("Skipping element type", element.type)
group.close()
with tiledb.Group(uri, mode="r", ctx=ctx) as G:
print()
print("================================================================")
print(uri)

for element in G:
# Note: use `element.type` rather than `isinstance(element, tiledb.group.Group)`
# since type(element) is `tiledb.object.Object` in all cases.
if element.type == tiledb.group.Group:
show_tiledb_group_array_schemas(element.uri)
elif element.type == tiledb.libtiledb.Array:
print()
print(
"----------------------------------------------------------------"
)
print(element.uri)
with tiledb.open(element.uri, ctx=ctx) as A:
print(A.schema)
else:
print("Skipping element type", element.type)
4 changes: 4 additions & 0 deletions apis/python/tools/desc-soma
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ def main():

for uri in sys.argv[1:]:
tiledbsc.util_tiledb.show_single_cell_group(uri)
print()
print("METADATA:")
soma = tiledbsc.SOMA(uri)
soma.show_metadata(recursively=True)


if __name__ == "__main__":
Expand Down

0 comments on commit 668b816

Please sign in to comment.