Skip to content

Commit

Permalink
Predicates for is-SOMA and is-SOMA-collection (#176)
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl authored Jun 21, 2022
1 parent 8e9fe52 commit c9bb1f7
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 26 deletions.
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsc/tiledb_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def _set_soma_object_type_metadata(self) -> None:
"""
with self._open("w") as A:
A.meta[
tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY
tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY
] = self.__class__.__name__

def show_metadata(self, recursively=True, indent=""):
Expand Down
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsc/tiledb_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def _set_soma_object_type_metadata(self):
"""
with self._open("w") as G:
G.meta[
tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY
tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY
] = self.__class__.__name__

def _set_soma_object_type_metadata_recursively(self):
Expand Down
76 changes: 75 additions & 1 deletion apis/python/src/tiledbsc/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,84 @@
import time
from typing import Optional, List, Union

# This is for group/array metadata we write, to help nested-structured traversals (especially those
# that start at the SOMACollection level) confidently navigate with a minimum of introspection on
# group contents.
SOMA_OBJECT_TYPE_METADATA_KEY = "soma_object_type"

# ----------------------------------------------------------------
def is_local_path(path: str) -> bool:
def is_soma(uri: str, ctx: Optional[tiledb.Ctx] = None) -> bool:
"""
Tells whether the URI points to a SOMA or not.
"""
# A SOMA is a TileDB group, but TileDB uses groups for many things including SOMACollections and
# SOMA elements such as obs and var. If this is not a group, we say it is not a SOMA; if it is,
# we ask more questions.
if tiledb.object_type(uri, ctx=ctx) != "group":
return False

# We can check object-type metadata, but it's possible the URI was created using an
# early-access/beta-level version of our code that wasn't yet writing object-type metadata. If
# we find object-type metadata saying this TileDB group is a SOMA, we say so; if not, we ask
# more questions.
with tiledb.Group(uri, mode="r", ctx=ctx) as G:
if SOMA_OBJECT_TYPE_METADATA_KEY in G.meta:
# Really `tiledbsc.SOMA.__name__`, but prevent a circular package import, so `"SOMA"`
return G.meta[SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA"

# At this point this path could be a SOMACollection, SOMA, or maybe SOMA element
# (or some manually created TileDB group).
if "obs" in G and "var" in G:
return True

# There is a chance that:
# o this is a TileDB group;
# o it's intended to hold a SOMA but it hasn't been populated yet;
# o it was created before object-type metadata was put in place;
# o it's remained unpopulated this whole time.
# Here we say this is not a SOMA, and accept the small risk of false negative.
return False


# ----------------------------------------------------------------
def is_soma_collection(uri: str, ctx: Optional[tiledb.Ctx] = None) -> bool:
"""
Tells whether the URI points to a SOMACollection or not.
"""
# A SOMACollection is a TileDB group, but TileDB uses groups for many things including SOMAs and
# SOMA elements such as obs and var. If this is not a group, we say it is not a SOMACollection;
# if it is, we ask more questions.
if tiledb.object_type(uri, ctx=ctx) != "group":
return False

# We can check object-type metadata, but it's possible the URI was created using an
# early-access/beta-level version of our code that wasn't yet writing object-type metadata. If
# we find object-type metadata saying this TileDB group is a SOMA, we say so; if not, we ask
# more questions.
with tiledb.Group(uri, mode="r", ctx=ctx) as G:
if SOMA_OBJECT_TYPE_METADATA_KEY in G.meta:
# Really `tiledbsc.SOMA.__name__`, but prevent a circular package import, so `"SOMA"`
return G.meta[SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMACollection"

# At this point this path could be a SOMACollection, SOMA, or maybe SOMA element
# (or some manually created TileDB group).
if "obs" in G and "var" in G:
# Very slight chance of false negative if someone created and populated a SOMACollection
# using beta-level code, and added a SOMA named 'obs' and another SOMA named 'var'.
return False

# There is a chance that:
# o this is a TileDB group;
# o it's intended to hold a SOMACollection but it hasn't been populated yet;
# o it was created before object-type metadata was put in place;
# o it's remained unpopulated this whole time.
# Here we say this is a SOMACollection, and accept the small risk of false positive.
return True


# ----------------------------------------------------------------
def is_local_path(path: str) -> bool:
"""
Returns information about start time of an event. Nominally float seconds since the epoch,
but articulated here as being compatible with the format_elapsed function.
"""
Expand Down
5 changes: 0 additions & 5 deletions apis/python/src/tiledbsc/util_tiledb.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@
import tiledb
from typing import Optional

# This is for group/array metadata we write, to help nested-structured traversals (especially those
# that start at the SOMACollection level) confidently navigate with a minimum of introspection on
# group contents.
SOMA_OBJECT_TYPE_METADATA_KEY = "soma_object_type"

# ================================================================
def show_single_cell_group(soma_uri: str, ctx: Optional[tiledb.Ctx] = None):
"""
Expand Down
25 changes: 8 additions & 17 deletions apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,15 @@ def test_import_anndata(adata):
# raw/varm/PCs

with tiledb.Group(output_path) as G:
assert G.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA"
assert G.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA"

# Check X/data (dense)
with tiledb.open(os.path.join(output_path, "X", "data")) as A:
df = A[:]
keys = list(df.keys())
assert keys == ["value", "obs_id", "var_id"]
assert A.ndim == 2
assert (
A.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "AssayMatrix"
)
assert A.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY] == "AssayMatrix"
# Convenience accessors
assert soma.X["data"].shape() == soma.X.data.shape()

Expand All @@ -72,17 +70,14 @@ def test_import_anndata(adata):
assert df.columns.to_list() == ["obs_id", "var_id", "value"]
# verify sparsity of raw data
assert df.shape[0] == orig.raw.X.nnz
assert (
A.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "AssayMatrix"
)
assert A.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY] == "AssayMatrix"

# Check obs
with tiledb.open(os.path.join(output_path, "obs")) as A:
df = A.df[:]
assert df.columns.to_list() == orig.obs_keys()
assert (
A.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY]
== "AnnotationDataFrame"
A.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY] == "AnnotationDataFrame"
)
assert sorted(soma.obs.ids()) == sorted(list(orig.obs_names))
# Convenience accessors
Expand All @@ -95,8 +90,7 @@ def test_import_anndata(adata):
df = A.df[:]
assert df.columns.to_list() == orig.var_keys()
assert (
A.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY]
== "AnnotationDataFrame"
A.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY] == "AnnotationDataFrame"
)
assert sorted(soma.var.ids()) == sorted(list(orig.var_names))
# Convenience accessors
Expand All @@ -113,7 +107,7 @@ def test_import_anndata(adata):
assert df.shape[0] == orig.obsm[key].shape[0]
assert soma.obsm[key].shape() == orig.obsm[key].shape
assert (
A.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY]
A.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY]
== "AnnotationMatrix"
)
# Convenience accessors: soma.obsm.X_pca <-> soma.obsm['X_pca']
Expand All @@ -127,7 +121,7 @@ def test_import_anndata(adata):
assert df.shape[0] == orig.varm[key].shape[0]
assert soma.varm[key].shape() == orig.varm[key].shape
assert (
A.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY]
A.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY]
== "AnnotationMatrix"
)
# Convenience accessors:
Expand All @@ -147,10 +141,7 @@ def test_import_anndata(adata):
shape = soma.obsp[key].df().shape
assert shape[0] == orig.obsp[key].nnz
assert shape[1] == 1
assert (
A.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY]
== "AssayMatrix"
)
assert A.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY] == "AssayMatrix"
# Convenience accessors:
for key in soma.obsp.keys():
assert getattr(soma.obsp, key).shape() == soma.obsp[key].shape()
Expand Down
12 changes: 11 additions & 1 deletion apis/python/tests/test_soco_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import tiledb
import tiledbsc
import tiledbsc.io
import tiledbsc.util

import pytest
import tempfile
Expand Down Expand Up @@ -29,7 +30,7 @@ def test_import_anndata(tmp_path):
soco = tiledbsc.SOMACollection(soco_dir)

with tiledb.Group(soma1_dir) as G:
assert G.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA"
assert G.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA"

soco.create_unless_exists()
assert len(soco._get_member_names()) == 0
Expand All @@ -43,3 +44,12 @@ def test_import_anndata(tmp_path):
assert len(soco._get_member_names()) == 1
soco.remove(soma2)
assert len(soco._get_member_names()) == 0

assert tiledbsc.util.is_soma(soma1.uri)
assert tiledbsc.util.is_soma(soma2.uri)
assert not tiledbsc.util.is_soma(soma1.obs.uri)
assert not tiledbsc.util.is_soma(soma2.var.uri)

assert not tiledbsc.util.is_soma_collection(soma2.var.uri)
assert not tiledbsc.util.is_soma_collection(soma2.uri)
assert tiledbsc.util.is_soma_collection(soco.uri)

0 comments on commit c9bb1f7

Please sign in to comment.