From 093d46d4c1cbce2e9001752b71ffd7ae798061bc Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 2 Jun 2022 10:01:41 -0400 Subject: [PATCH 1/7] Update to tiledb-py 0.15.2 --- apis/python/setup.cfg | 2 +- apis/python/src/tiledbsc/uns_group.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/apis/python/setup.cfg b/apis/python/setup.cfg index c3e89923d2..c64a482d46 100644 --- a/apis/python/setup.cfg +++ b/apis/python/setup.cfg @@ -32,7 +32,7 @@ zip_safe = False # The numpy pin is to avoid # "ImportError: Numba needs NumPy 1.21 or less" install_requires = - tiledb>=0.15.1 + tiledb>=0.15.2 scipy numpy<1.22 pandas diff --git a/apis/python/src/tiledbsc/uns_group.py b/apis/python/src/tiledbsc/uns_group.py index 4c6b7c99a8..196c347aaa 100644 --- a/apis/python/src/tiledbsc/uns_group.py +++ b/apis/python/src/tiledbsc/uns_group.py @@ -128,8 +128,6 @@ def to_dict_of_matrices(self) -> Dict: s = util.get_start_stamp() print(f"{self._indent}START read {self.uri}") - # TODO: fold this element-enumeration into the TileDB group class. Maybe on the same PR - # where we support somagroup['name'] with overloading of the [] operator. retval = {} for element in grp: name = os.path.basename(element.uri) # TODO: update for tiledb cloud From efe0fd75fde61e1cc8e8a8d06dcb2484af40a1db Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 2 Jun 2022 10:06:47 -0400 Subject: [PATCH 2/7] with-open-as --- apis/python/src/tiledbsc/tiledb_group.py | 31 +++--------------------- apis/python/tests/test_soco_ops.py | 8 ++---- 2 files changed, 6 insertions(+), 33 deletions(-) diff --git a/apis/python/src/tiledbsc/tiledb_group.py b/apis/python/src/tiledbsc/tiledb_group.py index b77216a943..c942bf3d75 100644 --- a/apis/python/src/tiledbsc/tiledb_group.py +++ b/apis/python/src/tiledbsc/tiledb_group.py @@ -66,39 +66,16 @@ def _create(self): tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY ] = self.__class__.__name__ - def _open_withlessly(self, mode="r"): - """ - This is just a convenience wrapper around tiledb.open of the tiledb group - associated with this SOMA element. - """ - assert mode in ["w", "r"] - if mode == "w" and not self.exists(): - self._create() - if mode == "r" and not self.exists(): - raise Exception(f"Does not exist: {self.uri}") - return tiledb.Group(self.uri, mode=mode, ctx=self._ctx) - - @contextmanager def _open(self, mode="r"): """ - This is just a convenience wrapper around tiledb.open of the tiledb group - associated with this SOMA element, supporting Python with-as syntax. - TODO: One TileDB.Py's Group objects have `__enter__` and `__exit__` - method, fold this and _open_withlessly together. + This is just a convenience wrapper around tiledb group-open. + It works asa `with self._open() as G:` as well as `G = self._open(); ...; G.close()`. """ assert mode in ("r", "w") - - # Do this check here, not just in _open_withlessly -- otherwise we get - # UnboundLocalError: local variable 'G' referenced before assignment - # which is super-confusing for the user. if mode == "r" and not self.exists(): raise Exception(f"Does not exist: {self.uri}") - - try: - G = self._open_withlessly(mode) - yield G - finally: - G.close() + # This works in with-open-as contexts because tiledb.Group has __enter__ and __exit__ methods. + return tiledb.Group(self.uri, mode=mode, ctx=self._ctx) def _add_object(self, obj: TileDBObject): """ diff --git a/apis/python/tests/test_soco_ops.py b/apis/python/tests/test_soco_ops.py index 1272a3920b..2085195814 100644 --- a/apis/python/tests/test_soco_ops.py +++ b/apis/python/tests/test_soco_ops.py @@ -28,12 +28,8 @@ def test_import_anndata(tmp_path): soco = tiledbsc.SOMACollection(soco_dir) - # TODO: change this to with-open-as syntax once - # https://github.com/TileDB-Inc/TileDB-Py/pull/1124 - # is in a TileDB-Py release which we articulate a dependency on. - G = tiledb.Group(soma1_dir) - assert G.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA" - G.close() + with tiledb.Group(soma1_dir) as G: + assert G.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA" soco._create() assert len(soco._get_member_names()) == 0 From ac9dc8ce515840e21375fe8cf19b1698de25a827 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 2 Jun 2022 10:27:21 -0400 Subject: [PATCH 3/7] AnnotationMatrixGroup updates --- .../src/tiledbsc/annotation_matrix_group.py | 63 +++++++------------ 1 file changed, 22 insertions(+), 41 deletions(-) diff --git a/apis/python/src/tiledbsc/annotation_matrix_group.py b/apis/python/src/tiledbsc/annotation_matrix_group.py index 7f372b6fa6..b90db87669 100644 --- a/apis/python/src/tiledbsc/annotation_matrix_group.py +++ b/apis/python/src/tiledbsc/annotation_matrix_group.py @@ -85,12 +85,9 @@ def to_dict_of_csr(self) -> Dict[str, scipy.sparse.csr_matrix]: Member arrays are returned in sparse CSR format. """ - grp = None - try: # Not all groups have all four of obsm, obsp, varm, and varp. - grp = tiledb.Group(self.uri, mode="r") - except: - pass - if grp == None: + if ( + not self.exists() + ): # Not all groups have all four of obsm, obsp, varm, and varp. if self._verbose: print(f"{self._indent}{self.uri} not found") return {} @@ -99,29 +96,25 @@ def to_dict_of_csr(self) -> Dict[str, scipy.sparse.csr_matrix]: s = util.get_start_stamp() print(f"{self._indent}START read {self.uri}") - # TODO: fold this element-enumeration into the TileDB group class. Maybe on the same PR - # where we support somagroup['name'] with overloading of the [] operator. - matrices_in_group = {} - for element in grp: - with tiledb.open(element.uri) as A: - with tiledb.open(element.uri) as A: - if self._verbose: - s2 = util.get_start_stamp() - print(f"{self._indent}START read {element.uri}") + with self._open() as G: + matrices_in_group = {} + for element in G: + if self._verbose: + s2 = util.get_start_stamp() + print(f"{self._indent}START read {element.uri}") + with tiledb.open(element.uri) as A: df = pd.DataFrame(A[:]) df.set_index(self.dim_name, inplace=True) matrix_name = os.path.basename(element.uri) # e.g. 'X_pca' matrices_in_group[matrix_name] = df.to_numpy() - if self._verbose: - print( - util.format_elapsed( - s2, f"{self._indent}FINISH read {element.uri}" - ) + if self._verbose: + print( + util.format_elapsed( + s2, f"{self._indent}FINISH read {element.uri}" ) - - grp.close() + ) if self._verbose: print(util.format_elapsed(s, f"{self._indent}FINISH read {self.uri}")) @@ -150,37 +143,25 @@ def __getitem__(self, name): member exists. Overloads the `[...]` operator. """ - # TODO: If TileDB-Py were to support `name in G` the line-count could reduce here. with self._open("r") as G: - try: - obj = G[name] # This returns a tiledb.object.Object. - except: + if not name in G: return None - + obj = G[name] # This returns a tiledb.object.Object. if obj.type == tiledb.tiledb.Group: raise Exception( "Internal error: found group element where array element was expected." ) - elif obj.type == tiledb.libtiledb.Array: - return AnnotationMatrix( - uri=obj.uri, name=name, dim_name=self.dim_name, parent=self - ) - else: + if obj.type != tiledb.libtiledb.Array: raise Exception( f"Internal error: found group element neither subgroup nor array: type is {str(obj.type)}" ) + return AnnotationMatrix( + uri=obj.uri, name=name, dim_name=self.dim_name, parent=self + ) def __contains__(self, name): """ Implements the `in` operator, e.g. `"namegoeshere" in soma.obsm/soma.varm`. """ - # TODO: this will get easier once TileDB.group.Group supports `name` in `__contains__`. - # See SC-18057 and https://github.com/single-cell-data/TileDB-SingleCell/issues/113. with self._open("r") as G: - answer = False - try: - # This returns a tiledb.object.Object. - G[name] - return True - except: - return False + return name in G From c9aa3f00a6d7e0a7881bbf94172254e9cfc60361 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 2 Jun 2022 10:27:39 -0400 Subject: [PATCH 4/7] AnnotationPairwiseMatrixGroup updates --- .../src/tiledbsc/annotation_pairwise_matrix_group.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/apis/python/src/tiledbsc/annotation_pairwise_matrix_group.py b/apis/python/src/tiledbsc/annotation_pairwise_matrix_group.py index 9d87a5d9fd..922429b11d 100644 --- a/apis/python/src/tiledbsc/annotation_pairwise_matrix_group.py +++ b/apis/python/src/tiledbsc/annotation_pairwise_matrix_group.py @@ -201,13 +201,5 @@ def __contains__(self, name): """ Implements `"namegoeshere" in soma.obsp/soma.varp`. """ - # TODO: this will get easier once TileDB.group.Group supports `name` in `__contains__`. - # See SC-18057 and https://github.com/single-cell-data/TileDB-SingleCell/issues/113. with self._open("r") as G: - answer = False - try: - # This returns a tiledb.object.Object. - G[name] - return True - except: - return False + return name in G From b3e40e24a28dbbee84b0c7baabde2a1e94c8bfcf Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 2 Jun 2022 10:27:51 -0400 Subject: [PATCH 5/7] AssayMatrixGroup updates --- .../python/src/tiledbsc/assay_matrix_group.py | 38 +++++++------------ 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/apis/python/src/tiledbsc/assay_matrix_group.py b/apis/python/src/tiledbsc/assay_matrix_group.py index ae29cddfc9..3759ad2750 100644 --- a/apis/python/src/tiledbsc/assay_matrix_group.py +++ b/apis/python/src/tiledbsc/assay_matrix_group.py @@ -92,47 +92,35 @@ def __getitem__(self, name): member exists. Overloads the `[...]` operator. """ - # TODO: If TileDB-Py were to support `name in G` the line-count could reduce here. with self._open("r") as G: - try: - obj = G[name] # This returns a tiledb.object.Object. - except: + if not name in G: return None + obj = G[name] # This returns a tiledb.object.Object. if obj.type == tiledb.tiledb.Group: raise Exception( "Internal error: found group element where array element was expected." ) - elif obj.type == tiledb.libtiledb.Array: - return AssayMatrix( - uri=obj.uri, - name=name, - row_dim_name=self.row_dim_name, - col_dim_name=self.col_dim_name, - row_dataframe=self.row_dataframe, - col_dataframe=self.col_dataframe, - parent=self, - ) - - else: + if obj.type != tiledb.libtiledb.Array: raise Exception( f"Internal error: found group element neither subgroup nor array: type is {str(obj.type)}" ) + return AssayMatrix( + uri=obj.uri, + name=name, + row_dim_name=self.row_dim_name, + col_dim_name=self.col_dim_name, + row_dataframe=self.row_dataframe, + col_dataframe=self.col_dataframe, + parent=self, + ) def __contains__(self, name): """ Implements the `in` operator, e.g. `"data" in soma.X`. """ - # TODO: this will get easier once TileDB.group.Group supports `name` in `__contains__`. - # See SC-18057 and https://github.com/single-cell-data/TileDB-SingleCell/issues/113. with self._open("r") as G: - answer = False - try: - # This returns a tiledb.object.Object. - G[name] - return True - except: - return False + return name in G # ---------------------------------------------------------------- def from_matrix_and_dim_values(self, matrix, row_names, col_names) -> None: From ad9f89e7c3d5f32bec1afc77ef07ea1159d83fc8 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 2 Jun 2022 10:28:17 -0400 Subject: [PATCH 6/7] "name in G" updates --- apis/python/src/tiledbsc/soma_collection.py | 9 +-------- apis/python/src/tiledbsc/uns_group.py | 11 +---------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/apis/python/src/tiledbsc/soma_collection.py b/apis/python/src/tiledbsc/soma_collection.py index ab62f23949..b44579e67a 100644 --- a/apis/python/src/tiledbsc/soma_collection.py +++ b/apis/python/src/tiledbsc/soma_collection.py @@ -74,15 +74,8 @@ def __contains__(self, name: str) -> bool: """ Implements `name in soco` """ - # TODO: this will get easier once TileDB.group.Group supports `name` in `__contains__`. - # See SC-18057 and https://github.com/single-cell-data/TileDB-SingleCell/issues/113. with self._open("r") as G: - answer = False - try: - G[name] # This returns a tiledb.object.Object. - return True - except: - return False + return name in G # At the tiledb-py API level, *all* groups are name-indexable. But here at the tiledbsc-py # level, we implement name-indexing only for some groups: diff --git a/apis/python/src/tiledbsc/uns_group.py b/apis/python/src/tiledbsc/uns_group.py index 196c347aaa..aeeebc0436 100644 --- a/apis/python/src/tiledbsc/uns_group.py +++ b/apis/python/src/tiledbsc/uns_group.py @@ -193,14 +193,5 @@ def __contains__(self, name): """ Implements '"namegoeshere" in soma.uns'. """ - - # TODO: this will get easier once TileDB.group.Group supports `name` in `__contains__`. - # See SC-18057 and https://github.com/single-cell-data/TileDB-SingleCell/issues/113. with self._open("r") as G: - answer = False - try: - # This returns a tiledb.object.Object. - G[name] - return True - except: - return False + return name in G From 146cba8c735456f5654628d8729b0df105f671af Mon Sep 17 00:00:00 2001 From: John Kerl Date: Thu, 2 Jun 2022 10:28:30 -0400 Subject: [PATCH 7/7] with-open-as --- apis/python/tests/test_tiledbsc.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/apis/python/tests/test_tiledbsc.py b/apis/python/tests/test_tiledbsc.py index c307d9c091..4490b953a2 100644 --- a/apis/python/tests/test_tiledbsc.py +++ b/apis/python/tests/test_tiledbsc.py @@ -51,12 +51,8 @@ def test_import_anndata(adata): # raw/var # raw/varm/PCs - # TODO: change this to with-open-as syntax once - # https://github.com/TileDB-Inc/TileDB-Py/pull/1124 - # is in a TileDB-Py release which we articulate a dependency on. - G = tiledb.Group(output_path) - assert G.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA" - G.close() + with tiledb.Group(output_path) as G: + assert G.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA" # Check X/data (dense) with tiledb.open(os.path.join(output_path, "X", "data")) as A: