From 9c3c83929dc239d12fd82f87c5859942054db126 Mon Sep 17 00:00:00 2001
From: John Kerl <kerl.john.r@gmail.com>
Date: Fri, 20 May 2022 11:41:02 -0400
Subject: [PATCH 1/3] matrix.df() accessors

---
 apis/python/src/tiledbsc/annotation_dataframe.py       | 8 ++++++++
 apis/python/src/tiledbsc/annotation_matrix.py          | 8 ++++++++
 apis/python/src/tiledbsc/annotation_pairwise_matrix.py | 8 ++++++++
 apis/python/src/tiledbsc/assay_matrix.py               | 9 +++++++++
 4 files changed, 33 insertions(+)

diff --git a/apis/python/src/tiledbsc/annotation_dataframe.py b/apis/python/src/tiledbsc/annotation_dataframe.py
index bdd4cec683..7dd46a5846 100644
--- a/apis/python/src/tiledbsc/annotation_dataframe.py
+++ b/apis/python/src/tiledbsc/annotation_dataframe.py
@@ -78,6 +78,14 @@ def dim_select(self, ids):
             with tiledb.open(self.uri) as A:  # TODO: with self.open
                 return A.df[ids]
 
+    # ----------------------------------------------------------------
+    def df(self, ids=None) -> pd.DataFrame:
+        """
+        Keystroke-saving alias for `.dim_select()`. If `ids` are provided, they're used
+        to subselect; if not, the entire dataframe is returned.
+        """
+        return self.dim_select(ids)
+
     # ----------------------------------------------------------------
     # TODO: this is a v1 for prototype/demo timeframe -- needs expanding.
     def attribute_filter(self, query_string, col_names_to_keep):
diff --git a/apis/python/src/tiledbsc/annotation_matrix.py b/apis/python/src/tiledbsc/annotation_matrix.py
index 36c17bdbf4..0a21497051 100644
--- a/apis/python/src/tiledbsc/annotation_matrix.py
+++ b/apis/python/src/tiledbsc/annotation_matrix.py
@@ -43,6 +43,14 @@ def dim_select(self, ids):
             with tiledb.open(self.uri) as A:  # TODO: with self.open
                 return A.df[ids]
 
+    # ----------------------------------------------------------------
+    def df(self, ids=None) -> pd.DataFrame:
+        """
+        Keystroke-saving alias for `.dim_select()`. If `ids` are provided, they're used
+        to subselect; if not, the entire dataframe is returned.
+        """
+        return self.dim_select(ids)
+
     # ----------------------------------------------------------------
     def shape(self):
         """
diff --git a/apis/python/src/tiledbsc/annotation_pairwise_matrix.py b/apis/python/src/tiledbsc/annotation_pairwise_matrix.py
index 2637398083..94b4dbd94b 100644
--- a/apis/python/src/tiledbsc/annotation_pairwise_matrix.py
+++ b/apis/python/src/tiledbsc/annotation_pairwise_matrix.py
@@ -60,6 +60,14 @@ def dim_select(self, ids):
             with tiledb.open(self.uri) as A:  # TODO: with self.open
                 return A.df[ids, ids]
 
+    # ----------------------------------------------------------------
+    def df(self, ids=None) -> pd.DataFrame:
+        """
+        Keystroke-saving alias for `.dim_select()`. If `ids` are provided, they're used
+        to subselect; if not, the entire dataframe is returned.
+        """
+        return self.dim_select(ids)
+
     # ----------------------------------------------------------------
     def from_anndata(self, matrix, dim_values):
         """
diff --git a/apis/python/src/tiledbsc/assay_matrix.py b/apis/python/src/tiledbsc/assay_matrix.py
index 02d0c44a85..5ab2af228b 100644
--- a/apis/python/src/tiledbsc/assay_matrix.py
+++ b/apis/python/src/tiledbsc/assay_matrix.py
@@ -7,6 +7,7 @@
 
 import scipy
 import numpy as np
+import pandas as pd
 
 from typing import Optional
 import time
@@ -63,6 +64,14 @@ def dim_select(self, obs_ids, var_ids):
                 else:
                     return A.df[obs_ids, var_ids]
 
+    # ----------------------------------------------------------------
+    def df(self, obs_ids=None, var_ids=None) -> pd.DataFrame:
+        """
+        Keystroke-saving alias for `.dim_select()`. If either of `obs_ids` or `var_ids`
+        are provided, they're used to subselect; if not, the entire dataframe is returned.
+        """
+        return self.dim_select(obs_ids, var_ids)
+
     # ----------------------------------------------------------------
     def from_matrix(self, matrix, row_names, col_names) -> None:
         """

From aac677c0259c7c63b39cf7350caf9a3c1852dea9 Mon Sep 17 00:00:00 2001
From: John Kerl <kerl.john.r@gmail.com>
Date: Fri, 20 May 2022 12:02:46 -0400
Subject: [PATCH 2/3] unit-test cases

---
 apis/python/src/tiledbsc/assay_matrix.py      |  7 ++
 apis/python/tests/test_soma_group_indexing.py | 64 +++++++++++++++++++
 2 files changed, 71 insertions(+)

diff --git a/apis/python/src/tiledbsc/assay_matrix.py b/apis/python/src/tiledbsc/assay_matrix.py
index 5ab2af228b..b50a24214b 100644
--- a/apis/python/src/tiledbsc/assay_matrix.py
+++ b/apis/python/src/tiledbsc/assay_matrix.py
@@ -45,6 +45,13 @@ def __init__(
     # We don't have a .shape() method since X is sparse. One should
     # instead use the row-counts for the soma's obs and var.
 
+    # ----------------------------------------------------------------
+    def dim_names(self):
+        """
+        Keystroke-saving accessor for `row_dim_name` and `col_dim_name`.
+        """
+        return (self.row_dim_name, self.col_dim_name)
+
     # ----------------------------------------------------------------
     def dim_select(self, obs_ids, var_ids):
         """
diff --git a/apis/python/tests/test_soma_group_indexing.py b/apis/python/tests/test_soma_group_indexing.py
index 8fd73b5876..793bd35b6d 100644
--- a/apis/python/tests/test_soma_group_indexing.py
+++ b/apis/python/tests/test_soma_group_indexing.py
@@ -2,6 +2,8 @@
 import tiledb
 import tiledbsc
 
+import numpy as np
+
 import pytest
 import tempfile
 import os
@@ -51,6 +53,16 @@ def test_soma_group_indexing(h5ad_file):
     assert soma.X.data.get_dim_names() == ["obs_id", "var_id"]
 
     assert soma.obs.get_dim_names() == ["obs_id"]
+    assert soma.obs.dim_name == "obs_id"
+    assert soma.obs.keys() == [
+        "orig.ident",
+        "nCount_RNA",
+        "nFeature_RNA",
+        "RNA_snn_res.0.8",
+        "letter.idents",
+        "groups",
+        "RNA_snn_res.1",
+    ]
     assert set(soma.obs.ids()) == set(
         [
             b"AAATTCGAATCACG",
@@ -135,8 +147,27 @@ def test_soma_group_indexing(h5ad_file):
             b"TTTAGCTGTACTCT",
         ]
     )
+    assert soma.obs.df().shape == (80, 7)
+    assert soma.obs.df(["AAGCAAGAGCTTAG", "TTGGTACTGAATCC"]).shape == (2, 7)
+    assert list(soma.obs.df().dtypes) == [
+        np.dtype("int32"),
+        np.dtype("float64"),
+        np.dtype("int32"),
+        np.dtype("int32"),
+        np.dtype("int32"),
+        np.dtype("O"),
+        np.dtype("int32"),
+    ]
 
     assert soma.var.get_dim_names() == ["var_id"]
+    assert soma.obs.dim_name == "obs_id"
+    assert soma.var.keys() == [
+        "vst.mean",
+        "vst.variance",
+        "vst.variance.expected",
+        "vst.variance.standardized",
+        "vst.variable",
+    ]
     assert set(soma.var.ids()) == set(
         [
             b"AKR1C3",
@@ -161,11 +192,44 @@ def test_soma_group_indexing(h5ad_file):
             b"VDAC3",
         ]
     )
+    assert soma.var.shape() == (20, 5)
+    assert soma.var.df(["RUFY1", "AKR1C3"]).shape == (2, 5)
+    assert list(soma.var.df().dtypes) == [
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("int32"),
+    ]
 
     assert set(soma.obsm.get_member_names()) == set(["X_pca", "X_tsne"])
+    assert set(soma.obsm.keys()) == set(["X_pca", "X_tsne"])
     assert isinstance(soma.obsm["X_pca"], tiledbsc.AnnotationMatrix)
     assert soma.obsm["nonesuch"] is None
     assert soma.obsm["X_pca"].get_dim_names() == ["obs_id"]
+    assert soma.obsm["X_pca"].df().shape == (80, 20)
+    assert list(soma.obsm["X_pca"].df().dtypes) == [
+        np.dtype("O"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+        np.dtype("float64"),
+    ]
 
     assert set(soma.varm.get_member_names()) == set(["PCs"])
     assert isinstance(soma.varm["PCs"], tiledbsc.AnnotationMatrix)

From 30f8d254ce67abf7d15328c1e17ebc00a88ed1e6 Mon Sep 17 00:00:00 2001
From: John Kerl <kerl.john.r@gmail.com>
Date: Fri, 20 May 2022 14:31:37 -0400
Subject: [PATCH 3/3] soma.foo.schema() -> soma.foo.tiledb_array_schema()

---
 .../src/tiledbsc/annotation_dataframe.py      |  4 +--
 apis/python/src/tiledbsc/assay_matrix.py      |  7 -----
 apis/python/src/tiledbsc/tiledb_array.py      | 30 +++++++++++++++----
 apis/python/tests/test_soma_group_indexing.py | 12 ++++----
 4 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/apis/python/src/tiledbsc/annotation_dataframe.py b/apis/python/src/tiledbsc/annotation_dataframe.py
index 7dd46a5846..e742d0eecc 100644
--- a/apis/python/src/tiledbsc/annotation_dataframe.py
+++ b/apis/python/src/tiledbsc/annotation_dataframe.py
@@ -61,9 +61,9 @@ def ids(self) -> List[str]:
     def keys(self) -> List[str]:
         """
         Returns the column names for the `obs` or `var` dataframe.  For obs and varp, `.keys()` is a
-        keystroke-saver for the more general array-schema accessor `get_attr_names`.
+        keystroke-saver for the more general array-schema accessor `attr_names`.
         """
-        return self.get_attr_names()
+        return self.attr_names()
 
     # ----------------------------------------------------------------
     def dim_select(self, ids):
diff --git a/apis/python/src/tiledbsc/assay_matrix.py b/apis/python/src/tiledbsc/assay_matrix.py
index b50a24214b..5ab2af228b 100644
--- a/apis/python/src/tiledbsc/assay_matrix.py
+++ b/apis/python/src/tiledbsc/assay_matrix.py
@@ -45,13 +45,6 @@ def __init__(
     # We don't have a .shape() method since X is sparse. One should
     # instead use the row-counts for the soma's obs and var.
 
-    # ----------------------------------------------------------------
-    def dim_names(self):
-        """
-        Keystroke-saving accessor for `row_dim_name` and `col_dim_name`.
-        """
-        return (self.row_dim_name, self.col_dim_name)
-
     # ----------------------------------------------------------------
     def dim_select(self, obs_ids, var_ids):
         """
diff --git a/apis/python/src/tiledbsc/tiledb_array.py b/apis/python/src/tiledbsc/tiledb_array.py
index f03a41e19a..608fad7df9 100644
--- a/apis/python/src/tiledbsc/tiledb_array.py
+++ b/apis/python/src/tiledbsc/tiledb_array.py
@@ -3,12 +3,14 @@
 from .tiledb_object import TileDBObject
 from .tiledb_group import TileDBGroup
 
-from typing import Optional, List
+from typing import Optional, List, Dict
 
 
 class TileDBArray(TileDBObject):
     """
     Wraps arrays from TileDB-Py by retaining a URI, verbose flag, etc.
+    Also serves as an abstraction layer to hide TileDB-specific details from the API, unless
+    requested.
     """
 
     def __init__(
@@ -37,29 +39,47 @@ def open(self):
         A = tiledb.open(self.uri)
         return A
 
-    def schema(self):
+    def tiledb_array_schema(self):
         """
         Returns the TileDB array schema.
         """
         with tiledb.open(self.uri) as A:
             return A.schema
 
-    def get_dim_names(self) -> List[str]:
+    def dim_names(self) -> List[str]:
         """
         Reads the dimension names from the schema: for example, ['obs_id', 'var_id'].
         """
         with tiledb.open(self.uri) as A:
             return [A.schema.domain.dim(i).name for i in range(A.schema.domain.ndim)]
 
-    def get_attr_names(self) -> List[str]:
+    def dim_names_to_types(self) -> Dict[str, str]:
+        """
+        Returns a dict mapping from dimension name to dimension type.
+        """
+        with tiledb.open(self.uri) as A:
+            dom = A.schema.domain
+            return {dom.dim(i).name: dom.dim(i).dtype for i in range(dom.ndim)}
+
+    def attr_names(self) -> List[str]:
         """
         Reads the attribute names from the schema: for example, the list of column names in a dataframe.
         """
         with tiledb.open(self.uri) as A:
             return [A.schema.attr(i).name for i in range(A.schema.nattr)]
 
+    def attr_names_to_types(self) -> Dict[str, str]:
+        """
+        Returns a dict mapping from attribute name to attribute type.
+        """
+        with tiledb.open(self.uri) as A:
+            schema = A.schema
+            return {
+                schema.attr(i).name: schema.attr(i).dtype for i in range(schema.nattr)
+            }
+
     def has_attr_name(self, attr_name: str) -> bool:
         """
         Returns true if the array has the specified attribute name, false otherwise.
         """
-        return attr_name in self.get_attr_names()
+        return attr_name in self.attr_names()
diff --git a/apis/python/tests/test_soma_group_indexing.py b/apis/python/tests/test_soma_group_indexing.py
index 793bd35b6d..9bcaffdd37 100644
--- a/apis/python/tests/test_soma_group_indexing.py
+++ b/apis/python/tests/test_soma_group_indexing.py
@@ -50,9 +50,9 @@ def test_soma_group_indexing(h5ad_file):
         ["uns", "varm", "X", "raw", "obsp", "varp", "var", "obsm", "obs"]
     )
     assert set(soma.X.get_member_names()) == set(["data"])
-    assert soma.X.data.get_dim_names() == ["obs_id", "var_id"]
+    assert soma.X.data.dim_names() == ["obs_id", "var_id"]
 
-    assert soma.obs.get_dim_names() == ["obs_id"]
+    assert soma.obs.dim_names() == ["obs_id"]
     assert soma.obs.dim_name == "obs_id"
     assert soma.obs.keys() == [
         "orig.ident",
@@ -159,7 +159,7 @@ def test_soma_group_indexing(h5ad_file):
         np.dtype("int32"),
     ]
 
-    assert soma.var.get_dim_names() == ["var_id"]
+    assert soma.var.dim_names() == ["var_id"]
     assert soma.obs.dim_name == "obs_id"
     assert soma.var.keys() == [
         "vst.mean",
@@ -206,7 +206,7 @@ def test_soma_group_indexing(h5ad_file):
     assert set(soma.obsm.keys()) == set(["X_pca", "X_tsne"])
     assert isinstance(soma.obsm["X_pca"], tiledbsc.AnnotationMatrix)
     assert soma.obsm["nonesuch"] is None
-    assert soma.obsm["X_pca"].get_dim_names() == ["obs_id"]
+    assert soma.obsm["X_pca"].dim_names() == ["obs_id"]
     assert soma.obsm["X_pca"].df().shape == (80, 20)
     assert list(soma.obsm["X_pca"].df().dtypes) == [
         np.dtype("O"),
@@ -235,12 +235,12 @@ def test_soma_group_indexing(h5ad_file):
     assert isinstance(soma.varm["PCs"], tiledbsc.AnnotationMatrix)
     assert soma.varm["nonesuch"] is None
     assert soma.varm.get_member_names() == ["PCs"]
-    assert soma.varm["PCs"].get_dim_names() == ["var_id"]
+    assert soma.varm["PCs"].dim_names() == ["var_id"]
 
     assert set(soma.obsp.get_member_names()) == set(["distances"])
     assert isinstance(soma.obsp["distances"], tiledbsc.AnnotationPairwiseMatrix)
     assert soma.varp["nonesuch"] is None
-    assert soma.obsp["distances"].get_dim_names() == ["obs_id_i", "obs_id_j"]
+    assert soma.obsp["distances"].dim_names() == ["obs_id_i", "obs_id_j"]
 
     assert set(soma.uns.get_member_names()) == set(["neighbors"])
     assert isinstance(soma.uns["neighbors"], tiledbsc.UnsGroup)