diff --git a/apis/python/doc/annotation_dataframe.md b/apis/python/doc/annotation_dataframe.md index b2f685832b..cc23102548 100644 --- a/apis/python/doc/annotation_dataframe.md +++ b/apis/python/doc/annotation_dataframe.md @@ -55,46 +55,59 @@ def keys() -> List[str] Returns the column names for the `obs` or `var` dataframe. For obs and varp, `.keys()` is a keystroke-saver for the more general array-schema accessor `attr_names`. + + +#### keyset + +```python +def keyset() -> Set[str] +``` + +Same as `.keys` but returns as set. + #### dim\_select ```python -def dim_select(ids) +def dim_select(ids, attrs=None) ``` -Selects a slice out of the dataframe with specified `obs_ids` (for `obs`) or `var_ids` (for `var`). -If `ids` is `None`, the entire dataframe is returned. +Selects a slice out of the dataframe with specified `obs_ids` (for `obs`) or `var_ids` (for +`var`). If `ids` is `None`, the entire dataframe is returned. Similarly, if `attrs` are +provided, they're used for the query; else, all attributes are returned. #### df ```python -def df(ids=None) -> pd.DataFrame +def df(ids=None, attrs=None) -> pd.DataFrame ``` Keystroke-saving alias for `.dim_select()`. If `ids` are provided, they're used -to subselect; if not, the entire dataframe is returned. +to subselect; if not, the entire dataframe is returned. If `attrs` are provided, +they're used for the query; else, all attributes are returned. #### attribute\_filter ```python -def attribute_filter(query_string, col_names_to_keep) +def attribute_filter(query_string, attrs=None) ``` -Selects from obs/var using a TileDB-Py `QueryCondition` string such as -`cell_type == "blood"`. Returns None if the slice is empty. -This is a v1 implementation for the prototype/demo timeframe. +Selects from obs/var using a TileDB-Py `QueryCondition` string such as `cell_type == +"blood"`. If `attrs` is `None`, returns all column names in the dataframe; use `[]` for +`attrs` to select none of them. Any column names specified in the `query_string` must be +included in `attrs` if `attrs` is not `None`. Returns `None` if the slice is empty. #### from\_dataframe ```python -def from_dataframe(dataframe: pd.DataFrame, extent: int) -> None +def from_dataframe(dataframe: pd.DataFrame, extent: int = 2048) -> None ``` Populates the `obs` or `var` subgroup for a SOMA object. @@ -104,14 +117,3 @@ Populates the `obs` or `var` subgroup for a SOMA object. - `dataframe`: `anndata.obs`, `anndata.var`, `anndata.raw.var`. - `extent`: TileDB `extent` parameter for the array schema. - - -#### to\_dataframe - -```python -def to_dataframe() -> pd.DataFrame -``` - -Reads the TileDB `obs` or `var` array and returns a type of pandas dataframe -and dimension values. - diff --git a/apis/python/doc/assay_matrix.md b/apis/python/doc/assay_matrix.md index 4ecfda49d2..d22c863f00 100644 --- a/apis/python/doc/assay_matrix.md +++ b/apis/python/doc/assay_matrix.md @@ -62,7 +62,7 @@ Note: currently implemented via data scan -- will be optimized for TileDB core 2 #### dim\_select ```python -def dim_select(obs_ids, var_ids) +def dim_select(obs_ids, var_ids) -> pd.DataFrame ``` Selects a slice out of the matrix with specified `obs_ids` and/or `var_ids`. @@ -80,6 +80,26 @@ def df(obs_ids=None, var_ids=None) -> pd.DataFrame Keystroke-saving alias for `.dim_select()`. If either of `obs_ids` or `var_ids` are provided, they're used to subselect; if not, the entire dataframe is returned. + + +#### csr + +```python +def csr(obs_ids=None, var_ids=None) -> scipy.sparse.csr_matrix +``` + +Like `.df()` but returns results in `scipy.sparse.csr_matrix` format. + + + +#### csc + +```python +def csc(obs_ids=None, var_ids=None) -> scipy.sparse.csc_matrix +``` + +Like `.df()` but returns results in `scipy.sparse.csc_matrix` format. + #### from\_matrix\_and\_dim\_values @@ -91,6 +111,10 @@ def from_matrix_and_dim_values(matrix, row_names, col_names) -> None Imports a matrix -- nominally `scipy.sparse.csr_matrix` or `numpy.ndarray` -- into a TileDB array which is used for `X`, `raw.X`, `obsp` members, and `varp` members. +The `row_names` and `col_names` are row and column labels for the matrix; the matrix may be +`scipy.sparse.csr_matrix`, `scipy.sparse.csc_matrix`, `numpy.ndarray`, etc. +For ingest from `AnnData`, these should be `ann.obs_names` and `ann.var_names`. + #### ingest\_data\_whole diff --git a/apis/python/doc/assay_matrix_group.md b/apis/python/doc/assay_matrix_group.md index e41a6574b0..dc4befbfbb 100644 --- a/apis/python/doc/assay_matrix_group.md +++ b/apis/python/doc/assay_matrix_group.md @@ -86,12 +86,15 @@ def __contains__(name) Implements the `in` operator, e.g. `"data" in soma.X`. - + -#### from\_matrix\_and\_dim\_values +#### add\_layer\_from\_matrix\_and\_dim\_values ```python -def from_matrix_and_dim_values(matrix, row_names, col_names) -> None +def add_layer_from_matrix_and_dim_values(matrix, + row_names: str, + col_names: str, + layer_name="data") -> None ``` Populates the `X` or `raw.X` subgroup for a `SOMA` object. For `X` and `raw.X`, nominally `row_names` will be `anndata.obs_names` and `col_names` will be `anndata.var_names` or `anndata.raw.var_names`. For `obsp` elements, both will be `anndata.obs_names`; for `varp elements, both will be `anndata.var_names`. diff --git a/apis/python/doc/soma.md b/apis/python/doc/soma.md index 0b2e440e10..555398408a 100644 --- a/apis/python/doc/soma.md +++ b/apis/python/doc/soma.md @@ -87,3 +87,35 @@ def var_keys() An alias for `soma.var.ids()`. + + +#### cell\_count + +```python +def cell_count() -> int +``` + +Returns the `obs_id` in `soma.obs`. + + + +#### get\_obs\_value\_counts + +```python +def get_obs_value_counts(obs_label: str) -> pd.DataFrame +``` + +Given an obs label, e.g. `cell_type`, returns a dataframe count the number of different +values for that label in the SOMA. + + + +#### get\_var\_value\_counts + +```python +def get_var_value_counts(var_label: str) -> pd.DataFrame +``` + +Given an var label, e.g. `feature_name`, returns a dataframe count the number of different +values for that label in the SOMA. + diff --git a/apis/python/doc/soma_collection.md b/apis/python/doc/soma_collection.md index 725f7457c2..95ee166e35 100644 --- a/apis/python/doc/soma_collection.md +++ b/apis/python/doc/soma_collection.md @@ -33,6 +33,16 @@ Create a new `SOMACollection` object. The existing group is opened at the specif - `uri`: URI of the TileDB group - `verbose`: Print status messages + + +#### \_\_len\_\_ + +```python +def __len__() -> int +``` + +Implements `len(soco)`. Returns the number of elements in the collection. + #### add @@ -53,6 +63,16 @@ def remove(soma: SOMA) -> None Removes a `SOMA` from the `SOMACollection`. + + +#### keys + +```python +def keys() -> None +``` + +Returns the names of the SOMAs in the collection. + #### \_\_iter\_\_ @@ -84,3 +104,59 @@ def __getitem__(name) Returns a `SOMA` element at the given name within the group, or `None` if no such member exists. Overloads the `[...]` operator. + + +#### cell\_count + +```python +def cell_count() -> int +``` + +Returns sum of `soma.cell_count()` over SOMAs in the collection. + + + +#### find\_unique\_obs\_values + +```python +def find_unique_obs_values(obs_label: str) +``` + +Given an `obs` label such as `cell_type` or `tissue`, returns a list of unique values for +that label among all SOMAs in the collection. + + + +#### find\_unique\_var\_values + +```python +def find_unique_var_values(var_label: str) +``` + +Given an `var` label such as `feature_name`, returns a list of unique values for +that label among all SOMAs in the collection. + + + +#### get\_obs\_value\_counts + +```python +def get_obs_value_counts(obs_label: str, do_sum: bool) +``` + +For a given obs label, e.g. "cell_type", count the number of occurrences of different values in +SOMAs in the collection. If `do_sum` is false, count the number of SOMAs having that value. If +`do_sum` is true, count the total number of instances of that value across the collection. + + + +#### get\_var\_value\_counts + +```python +def get_var_value_counts(var_label: str, do_sum: bool) +``` + +For a given var label, e.g. "feature_name", count the number of occurrences of different values in +SOMAs in the collection. If `do_sum` is false, count the number of SOMAs having that value. If +`do_sum` is true, count the total number of instances of that value across the collection. + diff --git a/apis/python/doc/tiledb_array.md b/apis/python/doc/tiledb_array.md index 8becfb93a4..851515995f 100644 --- a/apis/python/doc/tiledb_array.md +++ b/apis/python/doc/tiledb_array.md @@ -96,6 +96,16 @@ def has_attr_name(attr_name: str) -> bool Returns true if the array has the specified attribute name, false otherwise. + + +#### has\_attr\_names + +```python +def has_attr_names(attr_names: List[str]) -> bool +``` + +Returns true if the array has all of the specified attribute names, false otherwise. + #### show\_metadata diff --git a/apis/python/doc/util.md b/apis/python/doc/util.md index 540beff85f..f76c787acb 100644 --- a/apis/python/doc/util.md +++ b/apis/python/doc/util.md @@ -2,6 +2,17 @@ # tiledbsc.util + + +#### is\_local\_path + +```python +def is_local_path(path: str) -> bool +``` + +Returns information about start time of an event. Nominally float seconds since the epoch, +but articulated here as being compatible with the format_elapsed function. + #### get\_start\_stamp @@ -24,6 +35,28 @@ def format_elapsed(start_stamp, message: str) Returns the message along with an elapsed-time indicator, with end time relative to start start from `get_start_stamp`. Used for annotating elapsed time of a task. + + +#### X\_and\_ids\_to\_sparse\_matrix + +```python +def X_and_ids_to_sparse_matrix( + Xdf: pd.DataFrame, + row_dim_name: str, + col_dim_name: str, + attr_name: str, + row_labels: List[str], + col_labels: List[str], + return_as: str = "csr" +) -> Union[scipy.sparse.csr_matrix, scipy.sparse.csc_matrix] +``` + +This is needed when we read a TileDB X.df[:]. Since TileDB X is sparse 2D string-dimensioned, +the return value of which is a dict with three columns -- obs_id, var_id, and value. For +conversion to anndata, we need make a sparse COO/IJV-format array where the indices are +not strings but ints, matching the obs and var labels. +The `return_as` parameter must be one of `"csr"` or `"csc"`. + ## ETATracker Objects