Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SOMA-component dataframe/schema accessors #104

Merged
merged 3 commits into from
May 20, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions apis/python/src/tiledbsc/annotation_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@ def dim_select(self, ids):
with tiledb.open(self.uri) as A: # TODO: with self.open
return A.df[ids]

# ----------------------------------------------------------------
def df(self, ids=None) -> pd.DataFrame:
"""
Keystroke-saving alias for `.dim_select()`. If `ids` are provided, they're used
to subselect; if not, the entire dataframe is returned.
"""
return self.dim_select(ids)

# ----------------------------------------------------------------
# TODO: this is a v1 for prototype/demo timeframe -- needs expanding.
def attribute_filter(self, query_string, col_names_to_keep):
Expand Down
8 changes: 8 additions & 0 deletions apis/python/src/tiledbsc/annotation_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ def dim_select(self, ids):
with tiledb.open(self.uri) as A: # TODO: with self.open
return A.df[ids]

# ----------------------------------------------------------------
def df(self, ids=None) -> pd.DataFrame:
"""
Keystroke-saving alias for `.dim_select()`. If `ids` are provided, they're used
to subselect; if not, the entire dataframe is returned.
"""
return self.dim_select(ids)

# ----------------------------------------------------------------
def shape(self):
"""
Expand Down
8 changes: 8 additions & 0 deletions apis/python/src/tiledbsc/annotation_pairwise_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@ def dim_select(self, ids):
with tiledb.open(self.uri) as A: # TODO: with self.open
return A.df[ids, ids]

# ----------------------------------------------------------------
def df(self, ids=None) -> pd.DataFrame:
"""
Keystroke-saving alias for `.dim_select()`. If `ids` are provided, they're used
to subselect; if not, the entire dataframe is returned.
"""
return self.dim_select(ids)

# ----------------------------------------------------------------
def from_anndata(self, matrix, dim_values):
"""
Expand Down
16 changes: 16 additions & 0 deletions apis/python/src/tiledbsc/assay_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import scipy
import numpy as np
import pandas as pd

from typing import Optional
import time
Expand Down Expand Up @@ -44,6 +45,13 @@ def __init__(
# We don't have a .shape() method since X is sparse. One should
# instead use the row-counts for the soma's obs and var.

# ----------------------------------------------------------------
def dim_names(self):
"""
Keystroke-saving accessor for `row_dim_name` and `col_dim_name`.
"""
return (self.row_dim_name, self.col_dim_name)

# ----------------------------------------------------------------
def dim_select(self, obs_ids, var_ids):
"""
Expand All @@ -63,6 +71,14 @@ def dim_select(self, obs_ids, var_ids):
else:
return A.df[obs_ids, var_ids]

# ----------------------------------------------------------------
def df(self, obs_ids=None, var_ids=None) -> pd.DataFrame:
"""
Keystroke-saving alias for `.dim_select()`. If either of `obs_ids` or `var_ids`
are provided, they're used to subselect; if not, the entire dataframe is returned.
"""
return self.dim_select(obs_ids, var_ids)

# ----------------------------------------------------------------
def from_matrix(self, matrix, row_names, col_names) -> None:
"""
Expand Down
64 changes: 64 additions & 0 deletions apis/python/tests/test_soma_group_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import tiledb
import tiledbsc

import numpy as np

import pytest
import tempfile
import os
Expand Down Expand Up @@ -51,6 +53,16 @@ def test_soma_group_indexing(h5ad_file):
assert soma.X.data.get_dim_names() == ["obs_id", "var_id"]

assert soma.obs.get_dim_names() == ["obs_id"]
assert soma.obs.dim_name == "obs_id"
assert soma.obs.keys() == [
"orig.ident",
"nCount_RNA",
"nFeature_RNA",
"RNA_snn_res.0.8",
"letter.idents",
"groups",
"RNA_snn_res.1",
]
assert set(soma.obs.ids()) == set(
[
b"AAATTCGAATCACG",
Expand Down Expand Up @@ -135,8 +147,27 @@ def test_soma_group_indexing(h5ad_file):
b"TTTAGCTGTACTCT",
]
)
assert soma.obs.df().shape == (80, 7)
assert soma.obs.df(["AAGCAAGAGCTTAG", "TTGGTACTGAATCC"]).shape == (2, 7)
assert list(soma.obs.df().dtypes) == [
np.dtype("int32"),
np.dtype("float64"),
np.dtype("int32"),
np.dtype("int32"),
np.dtype("int32"),
np.dtype("O"),
np.dtype("int32"),
]

assert soma.var.get_dim_names() == ["var_id"]
assert soma.obs.dim_name == "obs_id"
assert soma.var.keys() == [
"vst.mean",
"vst.variance",
"vst.variance.expected",
"vst.variance.standardized",
"vst.variable",
]
assert set(soma.var.ids()) == set(
[
b"AKR1C3",
Expand All @@ -161,11 +192,44 @@ def test_soma_group_indexing(h5ad_file):
b"VDAC3",
]
)
assert soma.var.shape() == (20, 5)
assert soma.var.df(["RUFY1", "AKR1C3"]).shape == (2, 5)
assert list(soma.var.df().dtypes) == [
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("int32"),
]

assert set(soma.obsm.get_member_names()) == set(["X_pca", "X_tsne"])
assert set(soma.obsm.keys()) == set(["X_pca", "X_tsne"])
assert isinstance(soma.obsm["X_pca"], tiledbsc.AnnotationMatrix)
assert soma.obsm["nonesuch"] is None
assert soma.obsm["X_pca"].get_dim_names() == ["obs_id"]
assert soma.obsm["X_pca"].df().shape == (80, 20)
assert list(soma.obsm["X_pca"].df().dtypes) == [
np.dtype("O"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
np.dtype("float64"),
]

assert set(soma.varm.get_member_names()) == set(["PCs"])
assert isinstance(soma.varm["PCs"], tiledbsc.AnnotationMatrix)
Expand Down