Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

show_metadata() method #145

Merged
merged 1 commit into from
Jun 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsc/annotation_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def from_dataframe(self, dataframe: pd.DataFrame, extent: int) -> None:
mode=mode,
)

self.set_soma_object_type_metadata()
self._set_soma_object_type_metadata()

if self._verbose:
print(util.format_elapsed(s, f"{self._indent}FINISH WRITING {self.uri}"))
Expand Down
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsc/annotation_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def from_matrix_and_dim_values(self, matrix, dim_values):
else:
self._numpy_ndarray_or_scipy_sparse_csr_matrix(matrix, dim_values)

self.set_soma_object_type_metadata()
self._set_soma_object_type_metadata()

if self._verbose:
print(util.format_elapsed(s, f"{self._indent}FINISH WRITING {self.uri}"))
Expand Down
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsc/assay_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def from_matrix_and_dim_values(self, matrix, row_names, col_names) -> None:
else:
self._create_empty_array(matrix_dtype=matrix.dtype)

self.set_soma_object_type_metadata()
self._set_soma_object_type_metadata()

self._ingest_data(matrix, row_names, col_names)
if self._verbose:
Expand Down
15 changes: 12 additions & 3 deletions apis/python/src/tiledbsc/tiledb_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import tiledbsc.util_tiledb
from .soma_options import SOMAOptions
from .tiledb_object import TileDBObject
from .tiledb_group import TileDBGroup

from typing import Optional, List, Dict

Expand All @@ -18,7 +17,9 @@ def __init__(
self,
uri: str,
name: str,
parent: Optional[TileDBGroup] = None,
# It's a circular import if we say this, but this is really:
# parent: Optional[TileDBGroup] = None,
parent=None,
):
"""
See the TileDBObject constructor.
Expand Down Expand Up @@ -98,7 +99,7 @@ def has_attr_name(self, attr_name: str) -> bool:
"""
return attr_name in self.attr_names()

def set_soma_object_type_metadata(self) -> None:
def _set_soma_object_type_metadata(self) -> None:
"""
This helps nested-structured traversals (especially those that start at the SOMACollection
level) confidently navigate with a minimum of introspection on group contents.
Expand All @@ -107,3 +108,11 @@ def set_soma_object_type_metadata(self) -> None:
A.meta[
tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY
] = self.__class__.__name__

def show_metadata(self, recursively=True, indent=""):
"""
Shows metadata for the array.
"""
print(f"{indent}[{self.name}]")
for key, value in self.metadata().items():
print(f"{indent}- {key}: {value}")
62 changes: 62 additions & 0 deletions apis/python/src/tiledbsc/tiledb_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import tiledbsc.util_tiledb
from .soma_options import SOMAOptions
from .tiledb_object import TileDBObject
from .tiledb_array import TileDBArray

from contextlib import contextmanager

Expand Down Expand Up @@ -61,11 +62,45 @@ def _create(self):
if self._verbose:
print(f"{self._indent}Creating TileDB group {self.uri}")
tiledb.group_create(uri=self.uri, ctx=self._ctx)

self._set_soma_object_type_metadata()

def _set_soma_object_type_metadata(self):
"""
This helps nested-structured traversals (especially those that start at the SOMACollection
level) confidently navigate with a minimum of introspection on group contents.
"""
with self._open("w") as G:
G.meta[
tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY
] = self.__class__.__name__

def _set_soma_object_type_metadata_recursively(self):
"""
SOMAs/SOCOs written very early on in the development of this project may not have these set.
Using this method we can after-populate these, without needig to re-ingest entire datasets.
Any SOMAs/SOCOs ingested from June 2022 onward won't need this -- this metadata will be
written at ingestion time.
"""
self._set_soma_object_type_metadata()
with self._open() as G:
for O in G: # This returns a tiledb.object.Object
# It might appear simpler to have all this code within TileDBObject class,
# rather than (with a little duplication) in TileDBGroup and TileDBArray.
# However, getting it to work with a recursive data structure and finding the
# required methods, it was simpler to split the logic this way.
object_type = tiledb.object_type(O.uri)
if object_type == "group":
group = TileDBGroup(uri=O.uri, name=O.name, parent=self)
group._set_soma_object_type_metadata_recursively()
elif object_type == "array":
array = TileDBArray(uri=O.uri, name=O.name, parent=self)
array._set_soma_object_type_metadata()
else:
raise Exception(
f"Unexpected object_type found: {object_type} at {O.uri}"
)

def _open(self, mode="r"):
"""
This is just a convenience wrapper around tiledb group-open.
Expand Down Expand Up @@ -132,3 +167,30 @@ def _get_member_names_to_uris(self) -> Dict[str, str]:
"""
with self._open("r") as G:
return {O.name: O.uri for O in G}

def show_metadata(self, recursively=True, indent=""):
"""
Shows metadata for the group, recursively by default.
"""
print(f"{indent}[{self.name}]")
for key, value in self.metadata().items():
print(f"{indent}- {key}: {value}")
if recursively:
child_indent = indent + " "
with self._open() as G:
for O in G: # This returns a tiledb.object.Object
# It might appear simpler to have all this code within TileDBObject class,
# rather than (with a little duplication) in TileDBGroup and TileDBArray.
# However, getting it to work with a recursive data structure and finding the
# required methods, it was simpler to split the logic this way.
object_type = tiledb.object_type(O.uri)
if object_type == "group":
group = TileDBGroup(uri=O.uri, name=O.name, parent=self)
group.show_metadata(recursively, indent=child_indent)
elif object_type == "array":
array = TileDBArray(uri=O.uri, name=O.name, parent=self)
array.show_metadata(recursively, indent=child_indent)
else:
raise Exception(
f"Unexpected object_type found: {object_type} at {O.uri}"
)
36 changes: 15 additions & 21 deletions apis/python/src/tiledbsc/uns_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,7 @@ def to_dict_of_matrices(self) -> Dict:
"""
Reads the recursive group/array uns data from TileDB storage and returns them as a recursive dict of matrices.
"""
grp = None
try: # Not all groups have uns
grp = tiledb.Group(self.uri, mode="r")
except:
pass
if grp == None:
if not self.exists():
if self._verbose:
print(f"{self._indent}{self.uri} not found")
return {}
Expand All @@ -128,24 +123,23 @@ def to_dict_of_matrices(self) -> Dict:
s = util.get_start_stamp()
print(f"{self._indent}START read {self.uri}")

retval = {}
for element in grp:
name = os.path.basename(element.uri) # TODO: update for tiledb cloud
with self._open() as G:
retval = {}
for element in G:
name = os.path.basename(element.uri) # TODO: update for tiledb cloud

if element.type == tiledb.tiledb.Group:
child_group = UnsGroup(uri=element.uri, name=name, parent=self)
retval[name] = child_group.to_dict_of_matrices()
if element.type == tiledb.tiledb.Group:
child_group = UnsGroup(uri=element.uri, name=name, parent=self)
retval[name] = child_group.to_dict_of_matrices()

elif element.type == tiledb.libtiledb.Array:
child_array = UnsArray(uri=element.uri, name=name, parent=self)
retval[name] = child_array.to_matrix()
elif element.type == tiledb.libtiledb.Array:
child_array = UnsArray(uri=element.uri, name=name, parent=self)
retval[name] = child_array.to_matrix()

else:
raise Exception(
f"Internal error: found uns group element neither group nor array: type is {str(element.type)}"
)

grp.close()
else:
raise Exception(
f"Internal error: found uns group element neither group nor array: type is {str(element.type)}"
)

if self._verbose:
print(util.format_elapsed(s, f"{self._indent}FINISH read {self.uri}"))
Expand Down
53 changes: 24 additions & 29 deletions apis/python/src/tiledbsc/util_tiledb.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,10 @@ def __show_array_schema(uri: str, ctx: Optional[tiledb.Ctx] = None):

# ----------------------------------------------------------------
def __show_array_schemas_for_group(group_uri: str, ctx: Optional[tiledb.Ctx] = None):
group = None
try:
group = tiledb.Group(group_uri, mode="r", ctx=ctx)
except:
return

for element in group:
if element.type == tiledb.libtiledb.Array:
__show_array_schema(element.uri, ctx)
group.close()
with tiledb.Group(group_uri, mode="r", ctx=ctx) as G:
for element in G:
if element.type == tiledb.libtiledb.Array:
__show_array_schema(element.uri, ctx)


# ================================================================
Expand All @@ -68,22 +62,23 @@ def show_tiledb_group_array_schemas(uri: str, ctx: Optional[tiledb.Ctx] = None):
single-cell matrix-API data, and won't necessarily traverse items in a familiar
application-specific order.
"""
group = tiledb.Group(uri, mode="r", ctx=ctx)
print()
print("================================================================")
print(uri)

for element in group:
# Note: use `element.type` rather than `isinstance(element, tiledb.group.Group)`
# since type(element) is `tiledb.object.Object` in all cases.
if element.type == tiledb.group.Group:
show_tiledb_group_array_schemas(element.uri)
elif element.type == tiledb.libtiledb.Array:
print()
print("----------------------------------------------------------------")
print(element.uri)
with tiledb.open(element.uri, ctx=ctx) as A:
print(A.schema)
else:
print("Skipping element type", element.type)
group.close()
with tiledb.Group(uri, mode="r", ctx=ctx) as G:
print()
print("================================================================")
print(uri)

for element in G:
# Note: use `element.type` rather than `isinstance(element, tiledb.group.Group)`
# since type(element) is `tiledb.object.Object` in all cases.
if element.type == tiledb.group.Group:
show_tiledb_group_array_schemas(element.uri)
elif element.type == tiledb.libtiledb.Array:
print()
print(
"----------------------------------------------------------------"
)
print(element.uri)
with tiledb.open(element.uri, ctx=ctx) as A:
print(A.schema)
else:
print("Skipping element type", element.type)
4 changes: 4 additions & 0 deletions apis/python/tools/desc-soma
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ def main():

for uri in sys.argv[1:]:
tiledbsc.util_tiledb.show_single_cell_group(uri)
print()
print("METADATA:")
soma = tiledbsc.SOMA(uri)
soma.show_metadata(recursively=True)


if __name__ == "__main__":
Expand Down