Skip to content

Commit

Permalink
simplify read_as_pandas
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Oct 19, 2022
1 parent 1606c12 commit 4e89837
Showing 1 changed file with 47 additions and 1 deletion.
48 changes: 47 additions & 1 deletion apis/python/src/tiledbsoma/soma_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

# from .query_condition import QueryCondition
from . import query_condition as qcmodule
from . import util, util_arrow, util_tiledb
from . import util, util_arrow
from .logging import log_io
from .soma_collection import SOMACollectionBase
from .tiledb_array import TileDBArray
Expand Down Expand Up @@ -216,6 +216,52 @@ def read_all(
)
)

def read_as_pandas(
self,
*,
ids: Optional[Ids] = None,
value_filter: Optional[str] = None,
column_names: Optional[Sequence[str]] = None,
result_order: Optional[SOMAResultOrder] = None,
# to rename index to 'obs_id' or 'var_id', if desired, for anndata
id_column_name: Optional[str] = None,
) -> Iterator[pd.DataFrame]:
"""
Reads from SOMA storage into memory. For ``to_anndata``, as well as for any interactive use where the user wants a Pandas dataframe. Returns a generator over dataframes for batched read. See also ``read_as_pandas_all`` for a convenience wrapper.
TODO: params-list
"""
for tbl in self.read(
ids=ids,
value_filter=value_filter,
column_names=column_names,
result_order=result_order,
):
yield tbl.to_pandas()

def read_as_pandas_all(
self,
*,
ids: Optional[Ids] = None,
value_filter: Optional[str] = None,
column_names: Optional[Sequence[str]] = None,
result_order: Optional[SOMAResultOrder] = None,
# to rename index to 'obs_id' or 'var_id', if desired, for anndata
id_column_name: Optional[str] = None,
) -> pd.DataFrame:
"""
This is a convenience method around ``read``. It concatenates all partial read results into a single DataFrame. Its nominal use is to simplify unit-test cases.
"""
return pd.concat(
self.read_as_pandas(
ids=ids,
value_filter=value_filter,
column_names=column_names,
result_order=result_order,
id_column_name=id_column_name,
)
)

def _get_is_sparse(self) -> bool:
if self._cached_is_sparse is None:

Expand Down

0 comments on commit 4e89837

Please sign in to comment.