From 103c53e4edc317032b79df1b50cc63435d60f345 Mon Sep 17 00:00:00 2001 From: Lilly Date: Thu, 30 Nov 2023 10:54:05 +0100 Subject: [PATCH 1/4] Added smillie_2019 dataloader and renamed smillie to tasccoda_example --- pertpy/data/__init__.py | 3 ++- pertpy/data/_datasets.py | 33 +++++++++++++++++++++++++++--- pertpy/plot/_coda.py | 4 ++-- pertpy/tools/_coda/_base_coda.py | 2 +- pertpy/tools/_coda/_tasccoda.py | 16 +++++++-------- tests/tools/_coda/test_tasccoda.py | 2 +- 6 files changed, 44 insertions(+), 16 deletions(-) diff --git a/pertpy/data/__init__.py b/pertpy/data/__init__.py index 7a5a965a..f176cf06 100644 --- a/pertpy/data/__init__.py +++ b/pertpy/data/__init__.py @@ -38,11 +38,12 @@ schraivogel_2020_tap_screen_chr11, sciplex3_raw, shifrut_2018, - smillie, + smillie_2019, srivatsan_2020_sciplex2, srivatsan_2020_sciplex3, srivatsan_2020_sciplex4, stephenson_2021_subsampled, + tasccoda_example, tian_2019_day7neuron, tian_2019_ipsc, tian_2021_crispra, diff --git a/pertpy/data/_datasets.py b/pertpy/data/_datasets.py index 09f3c674..4621d8b6 100644 --- a/pertpy/data/_datasets.py +++ b/pertpy/data/_datasets.py @@ -133,8 +133,10 @@ def sciplex3_raw() -> AnnData: # pragma: no cover return adata -def smillie() -> AnnData: # pragma: no cover - """scRNA-seq data of the small intestine of mice under Ulcerative Colitis. +def tasccoda_example() -> AnnData: # pragma: no cover + """Example for the coda part of a mudata object (mudata['coda']) when preparing a dataset for processing with tascCODA. + Created using the smillie dataset, which comprises scRNA-seq data of the small intestine of mice under Ulcerative Colitis. + The full dataset containing the actual count data can be obtained via smillie_2019(). References: Smillie, Christopher S et al. “Intra- and Inter-cellular Rewiring of the Human Colon during Ulcerative Colitis.” @@ -143,7 +145,7 @@ def smillie() -> AnnData: # pragma: no cover Returns: :class:`~anndata.AnnData` object of the dataset. """ - output_file_name = "smillie.h5ad" + output_file_name = "tasccoda_smillie.h5ad" output_file_path = settings.datasetdir.__str__() + "/" + output_file_name if not Path(output_file_path).exists(): _download( @@ -1460,3 +1462,28 @@ def dong_2023() -> AnnData: # pragma: no cover adata = sc.read_h5ad(output_file_path) return adata + + +def smillie_2019() -> AnnData: # pragma: no cover + """scRNA-seq data of the small intestine of mice under Ulcerative Colitis. + The resulting AnnData when preparing this dataset for processing with tascCODA is available via tasccoda_example(). + + References: + Smillie, Christopher S et al. “Intra- and Inter-cellular Rewiring of the Human Colon during Ulcerative Colitis.” + Cell vol. 178,3 (2019): 714-730.e22. doi:10.1016/j.cell.2019.06.029 + + Returns: + :class:`~anndata.AnnData` object of the dataset. + """ + output_file_name = "smillie_2019.h5ad.zip" + output_file_path = settings.datasetdir.__str__() + "/" + output_file_name.replace(".zip", "") + if not Path(output_file_path).exists(): + _download( + url="https://figshare.com/ndownloader/files/43317285", + output_file_name=output_file_name, + output_path=settings.datasetdir, + is_zip=True, + ) + adata = sc.read_h5ad(output_file_path) + + return adata diff --git a/pertpy/plot/_coda.py b/pertpy/plot/_coda.py index d62b88e0..4fc59f8b 100644 --- a/pertpy/plot/_coda.py +++ b/pertpy/plot/_coda.py @@ -726,7 +726,7 @@ def draw_tree( # pragma: no cover Examples: Example with tascCODA: >>> import pertpy as pt - >>> adata = pt.dt.smillie() + >>> adata = pt.dt.tasccoda_example() >>> tasccoda = pt.tl.Tasccoda() >>> mdata = tasccoda.load( >>> adata, type="sample_level", @@ -815,7 +815,7 @@ def draw_effects( # pragma: no cover Examples: Example with tascCODA: >>> import pertpy as pt - >>> adata = pt.dt.smillie() + >>> adata = pt.dt.tasccoda_example() >>> tasccoda = pt.tl.Tasccoda() >>> mdata = tasccoda.load( >>> adata, type="sample_level", diff --git a/pertpy/tools/_coda/_base_coda.py b/pertpy/tools/_coda/_base_coda.py index 42fa2119..492e36d4 100644 --- a/pertpy/tools/_coda/_base_coda.py +++ b/pertpy/tools/_coda/_base_coda.py @@ -1005,7 +1005,7 @@ def get_node_df(self, data: AnnData | MuData, modality_key: str = "coda"): Examples: Example with tascCODA (works only for model of type tree_agg, i.e. a tascCODA model): >>> import pertpy as pt - >>> adata = pt.dt.smillie() + >>> adata = pt.dt.tasccoda_example() >>> tasccoda = pt.tl.Tasccoda() >>> mdata = tasccoda.load( >>> adata, type="sample_level", diff --git a/pertpy/tools/_coda/_tasccoda.py b/pertpy/tools/_coda/_tasccoda.py index a4e2ea03..3df790d5 100644 --- a/pertpy/tools/_coda/_tasccoda.py +++ b/pertpy/tools/_coda/_tasccoda.py @@ -104,7 +104,7 @@ def load( Examples: >>> import pertpy as pt - >>> adata = pt.dt.smillie() + >>> adata = pt.dt.tasccoda_example() >>> tasccoda = pt.tl.Tasccoda() >>> mdata = tasccoda.load( >>> adata, type="sample_level", @@ -176,7 +176,7 @@ def prepare( Examples: >>> import pertpy as pt - >>> adata = pt.dt.smillie() + >>> adata = pt.dt.tasccoda_example() >>> tasccoda = pt.tl.Tasccoda() >>> mdata = tasccoda.load( >>> adata, type="sample_level", @@ -322,7 +322,7 @@ def set_init_mcmc_states(self, rng_key: None, ref_index: np.ndarray, sample_adat Examples: >>> import pertpy as pt - >>> adata = pt.dt.smillie() + >>> adata = pt.dt.tasccoda_example() >>> tasccoda = pt.tl.Tasccoda() >>> mdata = tasccoda.load( >>> adata, type="sample_level", @@ -486,7 +486,7 @@ def make_arviz( # type: ignore Examples: >>> import pertpy as pt - >>> adata = pt.dt.smillie() + >>> adata = pt.dt.tasccoda_example() >>> tasccoda = pt.tl.Tasccoda() >>> mdata = tasccoda.load( >>> adata, type="sample_level", @@ -597,7 +597,7 @@ def run_nuts( """ Examples: >>> import pertpy as pt - >>> adata = pt.dt.smillie() + >>> adata = pt.dt.tasccoda_example() >>> tasccoda = pt.tl.Tasccoda() >>> mdata = tasccoda.load( >>> adata, type="sample_level", @@ -617,7 +617,7 @@ def summary(self, data: AnnData | MuData, extended: bool = False, modality_key: """ Examples: >>> import pertpy as pt - >>> adata = pt.dt.smillie() + >>> adata = pt.dt.tasccoda_example() >>> tasccoda = pt.tl.Tasccoda() >>> mdata = tasccoda.load( >>> adata, type="sample_level", @@ -638,7 +638,7 @@ def credible_effects(self, data: AnnData | MuData, modality_key: str = "coda", e """ Examples: >>> import pertpy as pt - >>> adata = pt.dt.smillie() + >>> adata = pt.dt.tasccoda_example() >>> tasccoda = pt.tl.Tasccoda() >>> mdata = tasccoda.load( >>> adata, type="sample_level", @@ -659,7 +659,7 @@ def set_fdr(self, data: AnnData | MuData, est_fdr: float, modality_key: str = "c """ Examples: >>> import pertpy as pt - >>> adata = pt.dt.smillie() + >>> adata = pt.dt.tasccoda_example() >>> tasccoda = pt.tl.Tasccoda() >>> mdata = tasccoda.load( >>> adata, type="sample_level", diff --git a/tests/tools/_coda/test_tasccoda.py b/tests/tools/_coda/test_tasccoda.py index a577ae0a..0913ca56 100644 --- a/tests/tools/_coda/test_tasccoda.py +++ b/tests/tools/_coda/test_tasccoda.py @@ -20,7 +20,7 @@ class TesttascCODA: @pytest.fixture def smillie_adata(self): - smillie_adata = pt.dt.smillie() + smillie_adata = pt.dt.tasccoda_example() smillie_adata = sc.pp.subsample(smillie_adata, 0.1, copy=True) return smillie_adata From fd11536a11e35acbdbfdad16691fe14085860287 Mon Sep 17 00:00:00 2001 From: Lilly May <93096564+Lilly-May@users.noreply.github.com> Date: Thu, 30 Nov 2023 11:21:03 +0100 Subject: [PATCH 2/4] Update pertpy/data/_datasets.py Co-authored-by: Lukas Heumos --- pertpy/data/_datasets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pertpy/data/_datasets.py b/pertpy/data/_datasets.py index 4621d8b6..db417690 100644 --- a/pertpy/data/_datasets.py +++ b/pertpy/data/_datasets.py @@ -1466,6 +1466,7 @@ def dong_2023() -> AnnData: # pragma: no cover def smillie_2019() -> AnnData: # pragma: no cover """scRNA-seq data of the small intestine of mice under Ulcerative Colitis. + The resulting AnnData when preparing this dataset for processing with tascCODA is available via tasccoda_example(). References: From 11a8cda4dedcd01d46cb2d10657490a569654ce1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 30 Nov 2023 10:21:13 +0000 Subject: [PATCH 3/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pertpy/data/_datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pertpy/data/_datasets.py b/pertpy/data/_datasets.py index db417690..ce1898be 100644 --- a/pertpy/data/_datasets.py +++ b/pertpy/data/_datasets.py @@ -1466,7 +1466,7 @@ def dong_2023() -> AnnData: # pragma: no cover def smillie_2019() -> AnnData: # pragma: no cover """scRNA-seq data of the small intestine of mice under Ulcerative Colitis. - + The resulting AnnData when preparing this dataset for processing with tascCODA is available via tasccoda_example(). References: From df9fdcd23703f7f301b635f43e781d59daad6166 Mon Sep 17 00:00:00 2001 From: Lilly Date: Thu, 30 Nov 2023 11:27:19 +0100 Subject: [PATCH 4/4] Fixed docs explanation for tasccoda_example --- pertpy/data/_datasets.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pertpy/data/_datasets.py b/pertpy/data/_datasets.py index ce1898be..3bd8f2a6 100644 --- a/pertpy/data/_datasets.py +++ b/pertpy/data/_datasets.py @@ -134,7 +134,9 @@ def sciplex3_raw() -> AnnData: # pragma: no cover def tasccoda_example() -> AnnData: # pragma: no cover - """Example for the coda part of a mudata object (mudata['coda']) when preparing a dataset for processing with tascCODA. + """Example for the coda part of a mudata object. + + Resulting AnnData object (mudata['coda']) when preparing a dataset for processing with tascCODA. Created using the smillie dataset, which comprises scRNA-seq data of the small intestine of mice under Ulcerative Colitis. The full dataset containing the actual count data can be obtained via smillie_2019().