From 087843ddb16f24825c9c474b5d82fb8d1272ae3f Mon Sep 17 00:00:00 2001 From: cobioda Date: Thu, 19 Dec 2024 11:28:07 +0100 Subject: [PATCH] v0.1 --- README.md | 14 +-- docs/notebooks/example.ipynb | 175 ++++++++++++++++++++++++++++++++++- 2 files changed, 178 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index d3f13d9..9ab8432 100644 --- a/README.md +++ b/README.md @@ -7,16 +7,16 @@ ---

- Documentation - Examples + Documentation • + ExamplesPreprint

[![Tests][badge-tests]][link-tests] [![Documentation][badge-docs]][link-docs] -[badge-tests]: https://img.shields.io/github/actions/workflow/status/bfxomics/scispy/test.yaml?branch=main -[link-tests]: https://github.com/bfxomics/scispy/actions/workflows/test.yml +[badge-tests]: https://img.shields.io/github/actions/workflow/status/cobioda/scispy/test.yaml?branch=main +[link-tests]: https://github.com/cobioda/scispy/actions/workflows/test.yml [badge-docs]: https://img.shields.io/readthedocs/scispy @@ -28,14 +28,14 @@

- +

## Features - **Read in-situ spatial-omics assays experiments**: build on top of spatialdata package - **Automatic cell type annotation**: scanvi implementation -- **Import anatomical .csv shape file from xenium explorer**: as anndata observation +- **Import anatomical .csv shape file from xenium explorer**: as anndata observations - **Automatic run pseudobulk data analysis**: using decoupler and pydeseq2 packages - **Compute cell type proportion in region**: integrating statistical test in case of replicates - **Produce high quality spatial figures**: build on top of spatialdata_plot package @@ -45,7 +45,7 @@ Please refer to the [documentation][link-docs]. In particular, the - [API documentation][link-api]. -- [Tutorials][link-tutorial] +- [Tutorials][https://scispy.readthedocs.io/en/latest/docs/notebooks/example.ipynb] ## Installation diff --git a/docs/notebooks/example.ipynb b/docs/notebooks/example.ipynb index 431cbeb..66f8958 100644 --- a/docs/notebooks/example.ipynb +++ b/docs/notebooks/example.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Example notebook" + "# xenium tutorial" ] }, { @@ -13,9 +13,40 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", + "import scispy as scis\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import scanpy as sc\n", + "import anndata as ad\n", "from anndata import AnnData\n", - "import scispy" + "import spatialdata as sd\n", + "from spatialdata import SpatialData\n", + "from spatialdata import polygon_query\n", + "from matplotlib import pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### load raw data and perform scanpy analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sdata = scis.io.load_xenium('xenium_output_dir')\n", + "scis.pp.run_scanpy(sdata)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### annotate your spatial cells using a single-cell reference" ] }, { @@ -24,7 +55,143 @@ "metadata": {}, "outputs": [], "source": [ - "adata = AnnData(np.random.normal(size=(20, 10)))" + "scref = sc.read_h5ad('scref.h5ad')\n", + "scis.pp.scvi_annotate(sdata['table'], \n", + " scref, \n", + " label_ref='celltype', \n", + " label_key='celltype')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### add anatomical regions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scis.tl.add_to_shapes(sdata, \n", + " shape_key='shapes', \n", + " scale_factor=1/0.2125, \n", + " target_coordinates='global', \n", + " shape_file='coordinates.csv')\n", + "\n", + "sdata['shapes'].name = sdata['shapes'].name.astype('category')\n", + "\n", + "f, ax = plt.subplots(figsize=(8, 5))\n", + "sdata.pl.render_images(cmap='Blues').pl.show(ax=ax)\n", + "sdata.pl.render_shapes(elements='shapes', color='name', fill_alpha=0.8).pl.show(\"global\",ax=ax)\n", + "ax.set_title('my_sample')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sdata['table'].obs['anatomy'] = 'unknown'\n", + "for i, shape in sdata['shapes'].iterrows():\n", + " try:\n", + " sd_shape = polygon_query(sdata, polygon=shape.geometry, target_coordinate_system=\"global\")\n", + " sdata['table'].obs.anatomy[sdata['table'].obs['cell_id'].isin(sd_shape.table.obs['cell_id'])] = shape['name']\n", + " except:\n", + " print(shape['name'], \" contains invalid coordinates\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### save sdata object" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sdata.write(\"sdata_1\")\n", + "\n", + "#import spatialdata_xenium_explorer\n", + "#spatialdata_xenium_explorer.write(\"explorer_1\", sdata, shapes_key='cell_boundaries', points_key='transcripts', gene_column='feature_name', layer='counts', polygon_max_vertices=40, mode='-it')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### concatenate sdata objects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "adatas = []\n", + "samples = ['sdata_1','sdata_2','sdata_3','sdata_4']\n", + "for s in samples:\n", + " sdata = SpatialData.read(s)\n", + " adatas += [sdata.table]\n", + "\n", + "adata = ad.concat(adatas)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### visualize cell type proportions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scis.tl.scis_prop(adata, \n", + " celltype='celltype', \n", + " zone='anatomy', \n", + " top=10, \n", + " replicate='sample', \n", + " condition='genotype', \n", + " condition_order=['KO','CTRL'],\n", + " figsize=(6,4))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### run pseudobulk analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = scis.tl.run_pseudobulk(sdata['table'], \n", + " replicate_key='sample', \n", + " cond_key='genotype', \n", + " cond_1='KO', \n", + " cond_2='CTRL', \n", + " groups_key='celltype')\n", + "\n", + "# plot results as seaborn heatmap\n", + "pivlfc = pd.pivot_table(df, values=[\"log2FoldChange\"], index=[\"index\"], columns=['celltype'], fill_value=0)\n", + "sns.clustermap(pivlfc, cmap=\"vlag\", figsize=(2, 8))" ] } ],