Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Predicates for is-SOMA and is-SOMA-collection #176

Merged
merged 1 commit into from
Jun 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsc/tiledb_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def _set_soma_object_type_metadata(self) -> None:
"""
with self._open("w") as A:
A.meta[
tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY
tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY
] = self.__class__.__name__

def show_metadata(self, recursively=True, indent=""):
Expand Down
2 changes: 1 addition & 1 deletion apis/python/src/tiledbsc/tiledb_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def _set_soma_object_type_metadata(self):
"""
with self._open("w") as G:
G.meta[
tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY
tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY
] = self.__class__.__name__

def _set_soma_object_type_metadata_recursively(self):
Expand Down
76 changes: 75 additions & 1 deletion apis/python/src/tiledbsc/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,84 @@
import time
from typing import Optional, List, Union

# This is for group/array metadata we write, to help nested-structured traversals (especially those
# that start at the SOMACollection level) confidently navigate with a minimum of introspection on
# group contents.
SOMA_OBJECT_TYPE_METADATA_KEY = "soma_object_type"

# ----------------------------------------------------------------
def is_local_path(path: str) -> bool:
def is_soma(uri: str, ctx: Optional[tiledb.Ctx] = None) -> bool:
"""
Tells whether the URI points to a SOMA or not.
"""
# A SOMA is a TileDB group, but TileDB uses groups for many things including SOMACollections and
# SOMA elements such as obs and var. If this is not a group, we say it is not a SOMA; if it is,
# we ask more questions.
if tiledb.object_type(uri, ctx=ctx) != "group":
return False

# We can check object-type metadata, but it's possible the URI was created using an
# early-access/beta-level version of our code that wasn't yet writing object-type metadata. If
# we find object-type metadata saying this TileDB group is a SOMA, we say so; if not, we ask
# more questions.
with tiledb.Group(uri, mode="r", ctx=ctx) as G:
if SOMA_OBJECT_TYPE_METADATA_KEY in G.meta:
# Really `tiledbsc.SOMA.__name__`, but prevent a circular package import, so `"SOMA"`
return G.meta[SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA"

# At this point this path could be a SOMACollection, SOMA, or maybe SOMA element
# (or some manually created TileDB group).
if "obs" in G and "var" in G:
return True

# There is a chance that:
# o this is a TileDB group;
# o it's intended to hold a SOMA but it hasn't been populated yet;
# o it was created before object-type metadata was put in place;
# o it's remained unpopulated this whole time.
# Here we say this is not a SOMA, and accept the small risk of false negative.
return False


# ----------------------------------------------------------------
def is_soma_collection(uri: str, ctx: Optional[tiledb.Ctx] = None) -> bool:
"""
Tells whether the URI points to a SOMACollection or not.
"""
# A SOMACollection is a TileDB group, but TileDB uses groups for many things including SOMAs and
# SOMA elements such as obs and var. If this is not a group, we say it is not a SOMACollection;
# if it is, we ask more questions.
if tiledb.object_type(uri, ctx=ctx) != "group":
return False

# We can check object-type metadata, but it's possible the URI was created using an
# early-access/beta-level version of our code that wasn't yet writing object-type metadata. If
# we find object-type metadata saying this TileDB group is a SOMA, we say so; if not, we ask
# more questions.
with tiledb.Group(uri, mode="r", ctx=ctx) as G:
if SOMA_OBJECT_TYPE_METADATA_KEY in G.meta:
# Really `tiledbsc.SOMA.__name__`, but prevent a circular package import, so `"SOMA"`
return G.meta[SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMACollection"

# At this point this path could be a SOMACollection, SOMA, or maybe SOMA element
# (or some manually created TileDB group).
if "obs" in G and "var" in G:
# Very slight chance of false negative if someone created and populated a SOMACollection
# using beta-level code, and added a SOMA named 'obs' and another SOMA named 'var'.
return False

# There is a chance that:
# o this is a TileDB group;
# o it's intended to hold a SOMACollection but it hasn't been populated yet;
# o it was created before object-type metadata was put in place;
# o it's remained unpopulated this whole time.
# Here we say this is a SOMACollection, and accept the small risk of false positive.
return True


# ----------------------------------------------------------------
def is_local_path(path: str) -> bool:
"""
Returns information about start time of an event. Nominally float seconds since the epoch,
but articulated here as being compatible with the format_elapsed function.
"""
Expand Down
5 changes: 0 additions & 5 deletions apis/python/src/tiledbsc/util_tiledb.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@
import tiledb
from typing import Optional

# This is for group/array metadata we write, to help nested-structured traversals (especially those
# that start at the SOMACollection level) confidently navigate with a minimum of introspection on
# group contents.
SOMA_OBJECT_TYPE_METADATA_KEY = "soma_object_type"

# ================================================================
def show_single_cell_group(soma_uri: str, ctx: Optional[tiledb.Ctx] = None):
"""
Expand Down
25 changes: 8 additions & 17 deletions apis/python/tests/test_basic_anndata_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,15 @@ def test_import_anndata(adata):
# raw/varm/PCs

with tiledb.Group(output_path) as G:
assert G.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA"
assert G.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA"

# Check X/data (dense)
with tiledb.open(os.path.join(output_path, "X", "data")) as A:
df = A[:]
keys = list(df.keys())
assert keys == ["value", "obs_id", "var_id"]
assert A.ndim == 2
assert (
A.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "AssayMatrix"
)
assert A.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY] == "AssayMatrix"
# Convenience accessors
assert soma.X["data"].shape() == soma.X.data.shape()

Expand All @@ -72,17 +70,14 @@ def test_import_anndata(adata):
assert df.columns.to_list() == ["obs_id", "var_id", "value"]
# verify sparsity of raw data
assert df.shape[0] == orig.raw.X.nnz
assert (
A.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "AssayMatrix"
)
assert A.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY] == "AssayMatrix"

# Check obs
with tiledb.open(os.path.join(output_path, "obs")) as A:
df = A.df[:]
assert df.columns.to_list() == orig.obs_keys()
assert (
A.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY]
== "AnnotationDataFrame"
A.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY] == "AnnotationDataFrame"
)
assert sorted(soma.obs.ids()) == sorted(list(orig.obs_names))
# Convenience accessors
Expand All @@ -95,8 +90,7 @@ def test_import_anndata(adata):
df = A.df[:]
assert df.columns.to_list() == orig.var_keys()
assert (
A.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY]
== "AnnotationDataFrame"
A.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY] == "AnnotationDataFrame"
)
assert sorted(soma.var.ids()) == sorted(list(orig.var_names))
# Convenience accessors
Expand All @@ -113,7 +107,7 @@ def test_import_anndata(adata):
assert df.shape[0] == orig.obsm[key].shape[0]
assert soma.obsm[key].shape() == orig.obsm[key].shape
assert (
A.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY]
A.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY]
== "AnnotationMatrix"
)
# Convenience accessors: soma.obsm.X_pca <-> soma.obsm['X_pca']
Expand All @@ -127,7 +121,7 @@ def test_import_anndata(adata):
assert df.shape[0] == orig.varm[key].shape[0]
assert soma.varm[key].shape() == orig.varm[key].shape
assert (
A.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY]
A.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY]
== "AnnotationMatrix"
)
# Convenience accessors:
Expand All @@ -147,10 +141,7 @@ def test_import_anndata(adata):
shape = soma.obsp[key].df().shape
assert shape[0] == orig.obsp[key].nnz
assert shape[1] == 1
assert (
A.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY]
== "AssayMatrix"
)
assert A.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY] == "AssayMatrix"
# Convenience accessors:
for key in soma.obsp.keys():
assert getattr(soma.obsp, key).shape() == soma.obsp[key].shape()
Expand Down
12 changes: 11 additions & 1 deletion apis/python/tests/test_soco_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import tiledb
import tiledbsc
import tiledbsc.io
import tiledbsc.util

import pytest
import tempfile
Expand Down Expand Up @@ -29,7 +30,7 @@ def test_import_anndata(tmp_path):
soco = tiledbsc.SOMACollection(soco_dir)

with tiledb.Group(soma1_dir) as G:
assert G.meta[tiledbsc.util_tiledb.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA"
assert G.meta[tiledbsc.util.SOMA_OBJECT_TYPE_METADATA_KEY] == "SOMA"

soco.create_unless_exists()
assert len(soco._get_member_names()) == 0
Expand All @@ -43,3 +44,12 @@ def test_import_anndata(tmp_path):
assert len(soco._get_member_names()) == 1
soco.remove(soma2)
assert len(soco._get_member_names()) == 0

assert tiledbsc.util.is_soma(soma1.uri)
assert tiledbsc.util.is_soma(soma2.uri)
assert not tiledbsc.util.is_soma(soma1.obs.uri)
assert not tiledbsc.util.is_soma(soma2.var.uri)

assert not tiledbsc.util.is_soma_collection(soma2.var.uri)
assert not tiledbsc.util.is_soma_collection(soma2.uri)
assert tiledbsc.util.is_soma_collection(soco.uri)