diff --git a/tabula_sapiens_tutorial.ipynb b/tabula_sapiens_tutorial.ipynb
new file mode 100644
index 0000000..2722a92
--- /dev/null
+++ b/tabula_sapiens_tutorial.ipynb
@@ -0,0 +1,1585 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e0bca785",
+ "metadata": {
+ "id": "e0bca785"
+ },
+ "source": [
+ "# Using Tabula Sapiens as a reference for annotating new datasets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e01a8a33-3e2f-4cce-b159-73a448e1d712",
+ "metadata": {
+ "id": "e01a8a33-3e2f-4cce-b159-73a448e1d712"
+ },
+ "source": [
+ "This notebook allows you to annotate your data with a number of annotation methods using the Tabula Sapiens dataset as the reference.\n",
+ "\n",
+ "Initial setup:\n",
+ "1. Make sure GPU is enabled (Runtime -> Change Runtime Type -> Hardware Accelerator -> GPU)\n",
+ "2. We also highly recommend getting Colab PRO for access to an extended RAM session.\n",
+ "\n",
+ "\n",
+ "Integration Methods Provided:\n",
+ "- scVI [(Lopez et al. 2018)](https://www.nature.com/articles/s41592-018-0229-2)\n",
+ "- bbKNN [(Polański et al. 2020)](https://academic.oup.com/bioinformatics/article/36/3/964/5545955)\n",
+ "- Scanorama [(He et al. 2019)](https://www.nature.com/articles/s41587-019-0113-3)\n",
+ "- Harmony [(Korsunsky et al. 2019)](https://www.nature.com/articles/s41592-019-0619-0)\n",
+ "\n",
+ "Annotation Methods:\n",
+ "- KNN on integrated spaces\n",
+ "- scANVI [(Xu et al. 2021)](https://www.embopress.org/doi/full/10.15252/msb.20209620)\n",
+ "- onClass [(Wang et al. 2020)](https://www.biorxiv.org/content/10.1101/810234v2)\n",
+ "- Celltypist [(Dominguez Conde et al. 2022)](https://www.science.org/doi/10.1126/science.abl5197)\n",
+ "- SVM\n",
+ "- RandomForest\n",
+ "\n",
+ "To use the notebook, simply connect to your Google Drive account, set the necessary arguments, select your methods, and run all the code blocks!\n",
+ "\n",
+ "**User action is only required in Steps 1-3.**\n",
+ "\n",
+ "Last edited: 08/27/2024\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "45661f72-94d4-47b3-b941-53a2e8bab666",
+ "metadata": {
+ "id": "45661f72-94d4-47b3-b941-53a2e8bab666"
+ },
+ "source": [
+ "## Step 1: Setup environment\n",
+ "\n",
+ "We omit the output of those lines for readability."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "h41Q6U5wMwyP",
+ "metadata": {
+ "id": "h41Q6U5wMwyP",
+ "outputId": "03c4cf1b-7fde-4628-cb87-012a39e33360",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Cloning into 'PopV'...\n",
+ "remote: Enumerating objects: 1215, done.\u001b[K\n",
+ "remote: Counting objects: 100% (474/474), done.\u001b[K\n",
+ "remote: Compressing objects: 100% (212/212), done.\u001b[K\n",
+ "remote: Total 1215 (delta 245), reused 435 (delta 238), pack-reused 741 (from 1)\u001b[K\n",
+ "Receiving objects: 100% (1215/1215), 282.87 MiB | 15.42 MiB/s, done.\n",
+ "Resolving deltas: 100% (715/715), done.\n",
+ "Updating files: 100% (66/66), done.\n"
+ ]
+ }
+ ],
+ "source": [
+ "clone_github_repo = (\n",
+ " True # Set to True if running outside of already exisiting Github repository.\n",
+ ")\n",
+ "if clone_github_repo:\n",
+ " !git clone https://github.com/czbiohub/PopV.git"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "TFNOSbM8fn4s",
+ "metadata": {
+ "id": "TFNOSbM8fn4s",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "7fa6609f-2d71-4140-a235-13b5e5aeacf2"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting popv\n",
+ " Downloading popv-0.4.2-py3-none-any.whl.metadata (9.4 kB)\n",
+ "Collecting anndata>0.8.0 (from popv)\n",
+ " Downloading anndata-0.10.9-py3-none-any.whl.metadata (6.9 kB)\n",
+ "Collecting bbknn>1.5.0 (from popv)\n",
+ " Downloading bbknn-1.6.0-py3-none-any.whl.metadata (8.2 kB)\n",
+ "Collecting celltypist>1.3.0 (from popv)\n",
+ " Downloading celltypist-1.6.3-py3-none-any.whl.metadata (43 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.7/43.7 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: gdown>4.6.0 in /usr/local/lib/python3.10/dist-packages (from popv) (5.1.0)\n",
+ "Requirement already satisfied: h5py>3.7.0 in /usr/local/lib/python3.10/dist-packages (from popv) (3.11.0)\n",
+ "Collecting harmony-pytorch>0.1.6 (from popv)\n",
+ " Downloading harmony_pytorch-0.1.8-py3-none-any.whl.metadata (3.9 kB)\n",
+ "Requirement already satisfied: huggingface-hub>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from popv) (0.23.5)\n",
+ "Collecting obonet>=1.0 (from popv)\n",
+ " Downloading obonet-1.1.0-py3-none-any.whl.metadata (6.8 kB)\n",
+ "Collecting onclass>=1.3 (from popv)\n",
+ " Downloading OnClass-1.3-py3-none-any.whl.metadata (653 bytes)\n",
+ "Collecting pandas<2.0.0,>=1.4.0 (from popv)\n",
+ " Downloading pandas-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n",
+ "Collecting scanorama>1.7.2 (from popv)\n",
+ " Downloading scanorama-1.7.4-py3-none-any.whl.metadata (525 bytes)\n",
+ "Collecting scanpy>1.9.0 (from popv)\n",
+ " Downloading scanpy-1.10.2-py3-none-any.whl.metadata (9.3 kB)\n",
+ "Collecting scikit-learn<1.2,>0.21.2 (from popv)\n",
+ " Downloading scikit_learn-1.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)\n",
+ "Collecting scikit-misc>=0.1 (from popv)\n",
+ " Downloading scikit_misc-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.0 kB)\n",
+ "Collecting scvi-tools>1.0.0 (from popv)\n",
+ " Downloading scvi_tools-1.1.6-py3-none-any.whl.metadata (17 kB)\n",
+ "Requirement already satisfied: tensorflow>2.11.0 in /usr/local/lib/python3.10/dist-packages (from popv) (2.17.0)\n",
+ "Requirement already satisfied: transformers>4.25.0 in /usr/local/lib/python3.10/dist-packages (from popv) (4.42.4)\n",
+ "Collecting array-api-compat!=1.5,>1.4 (from anndata>0.8.0->popv)\n",
+ " Downloading array_api_compat-1.8-py3-none-any.whl.metadata (1.5 kB)\n",
+ "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anndata>0.8.0->popv) (1.2.2)\n",
+ "Requirement already satisfied: natsort in /usr/local/lib/python3.10/dist-packages (from anndata>0.8.0->popv) (8.4.0)\n",
+ "Requirement already satisfied: numpy>=1.23 in /usr/local/lib/python3.10/dist-packages (from anndata>0.8.0->popv) (1.26.4)\n",
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from anndata>0.8.0->popv) (24.1)\n",
+ "Requirement already satisfied: scipy>1.8 in /usr/local/lib/python3.10/dist-packages (from anndata>0.8.0->popv) (1.13.1)\n",
+ "Requirement already satisfied: Cython in /usr/local/lib/python3.10/dist-packages (from bbknn>1.5.0->popv) (3.0.11)\n",
+ "Collecting annoy (from bbknn>1.5.0->popv)\n",
+ " Downloading annoy-1.17.3.tar.gz (647 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m647.5/647.5 kB\u001b[0m \u001b[31m39.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Collecting pynndescent (from bbknn>1.5.0->popv)\n",
+ " Downloading pynndescent-0.5.13-py3-none-any.whl.metadata (6.8 kB)\n",
+ "Collecting umap-learn (from bbknn>1.5.0->popv)\n",
+ " Downloading umap_learn-0.5.6-py3-none-any.whl.metadata (21 kB)\n",
+ "Requirement already satisfied: openpyxl>=3.0.4 in /usr/local/lib/python3.10/dist-packages (from celltypist>1.3.0->popv) (3.1.5)\n",
+ "Requirement already satisfied: click>=7.1.2 in /usr/local/lib/python3.10/dist-packages (from celltypist>1.3.0->popv) (8.1.7)\n",
+ "Requirement already satisfied: requests>=2.23.0 in /usr/local/lib/python3.10/dist-packages (from celltypist>1.3.0->popv) (2.32.3)\n",
+ "Collecting leidenalg>=0.9.0 (from celltypist>1.3.0->popv)\n",
+ " Downloading leidenalg-0.10.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)\n",
+ "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from gdown>4.6.0->popv) (4.12.3)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from gdown>4.6.0->popv) (3.15.4)\n",
+ "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from gdown>4.6.0->popv) (4.66.5)\n",
+ "Requirement already satisfied: torch>=1.12 in /usr/local/lib/python3.10/dist-packages (from harmony-pytorch>0.1.6->popv) (2.4.0+cu121)\n",
+ "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from harmony-pytorch>0.1.6->popv) (5.9.5)\n",
+ "Requirement already satisfied: threadpoolctl in /usr/local/lib/python3.10/dist-packages (from harmony-pytorch>0.1.6->popv) (3.5.0)\n",
+ "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.11.1->popv) (2024.6.1)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.11.1->popv) (6.0.2)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.11.1->popv) (4.12.2)\n",
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from obonet>=1.0->popv) (3.3)\n",
+ "Collecting sentence-transformers (from onclass>=1.3->popv)\n",
+ " Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)\n",
+ "Collecting fbpca>=1.0 (from onclass>=1.3->popv)\n",
+ " Downloading fbpca-1.0.tar.gz (11 kB)\n",
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Requirement already satisfied: matplotlib>=2.0.2 in /usr/local/lib/python3.10/dist-packages (from onclass>=1.3->popv) (3.7.1)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2.0.0,>=1.4.0->popv) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2.0.0,>=1.4.0->popv) (2024.1)\n",
+ "Collecting geosketch>=1.0 (from scanorama>1.7.2->popv)\n",
+ " Downloading geosketch-1.2-py3-none-any.whl.metadata (406 bytes)\n",
+ "Collecting intervaltree>=3.1.0 (from scanorama>1.7.2->popv)\n",
+ " Downloading intervaltree-3.1.0.tar.gz (32 kB)\n",
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from scanpy>1.9.0->popv) (1.4.2)\n",
+ "Collecting legacy-api-wrap>=1.4 (from scanpy>1.9.0->popv)\n",
+ " Downloading legacy_api_wrap-1.4-py3-none-any.whl.metadata (1.8 kB)\n",
+ "Requirement already satisfied: numba>=0.56 in /usr/local/lib/python3.10/dist-packages (from scanpy>1.9.0->popv) (0.60.0)\n",
+ "Requirement already satisfied: patsy in /usr/local/lib/python3.10/dist-packages (from scanpy>1.9.0->popv) (0.5.6)\n",
+ "Requirement already satisfied: seaborn>=0.13 in /usr/local/lib/python3.10/dist-packages (from scanpy>1.9.0->popv) (0.13.1)\n",
+ "Collecting session-info (from scanpy>1.9.0->popv)\n",
+ " Downloading session_info-1.0.0.tar.gz (24 kB)\n",
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Requirement already satisfied: statsmodels>=0.13 in /usr/local/lib/python3.10/dist-packages (from scanpy>1.9.0->popv) (0.14.2)\n",
+ "Collecting docrep>=0.3.2 (from scvi-tools>1.0.0->popv)\n",
+ " Downloading docrep-0.3.2.tar.gz (33 kB)\n",
+ " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Requirement already satisfied: flax in /usr/local/lib/python3.10/dist-packages (from scvi-tools>1.0.0->popv) (0.8.4)\n",
+ "Requirement already satisfied: jax>=0.4.4 in /usr/local/lib/python3.10/dist-packages (from scvi-tools>1.0.0->popv) (0.4.26)\n",
+ "Requirement already satisfied: jaxlib>=0.4.3 in /usr/local/lib/python3.10/dist-packages (from scvi-tools>1.0.0->popv) (0.4.26+cuda12.cudnn89)\n",
+ "Collecting lightning<2.2,>=2.0 (from scvi-tools>1.0.0->popv)\n",
+ " Downloading lightning-2.1.4-py3-none-any.whl.metadata (57 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.2/57.2 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting ml-collections>=0.1.1 (from scvi-tools>1.0.0->popv)\n",
+ " Downloading ml_collections-0.1.1.tar.gz (77 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ "Collecting mudata>=0.1.2 (from scvi-tools>1.0.0->popv)\n",
+ " Downloading mudata-0.3.0-py3-none-any.whl.metadata (8.3 kB)\n",
+ "Collecting numpyro>=0.12.1 (from scvi-tools>1.0.0->popv)\n",
+ " Downloading numpyro-0.15.2-py3-none-any.whl.metadata (36 kB)\n",
+ "Requirement already satisfied: optax in /usr/local/lib/python3.10/dist-packages (from scvi-tools>1.0.0->popv) (0.2.2)\n",
+ "Collecting pyro-ppl>=1.6.0 (from scvi-tools>1.0.0->popv)\n",
+ " Downloading pyro_ppl-1.9.1-py3-none-any.whl.metadata (7.8 kB)\n",
+ "Requirement already satisfied: rich>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from scvi-tools>1.0.0->popv) (13.8.0)\n",
+ "Collecting torchmetrics>=0.11.0 (from scvi-tools>1.0.0->popv)\n",
+ " Downloading torchmetrics-1.4.1-py3-none-any.whl.metadata (20 kB)\n",
+ "Requirement already satisfied: absl-py>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (1.4.0)\n",
+ "Requirement already satisfied: astunparse>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (1.6.3)\n",
+ "Requirement already satisfied: flatbuffers>=24.3.25 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (24.3.25)\n",
+ "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (0.6.0)\n",
+ "Requirement already satisfied: google-pasta>=0.1.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (0.2.0)\n",
+ "Requirement already satisfied: libclang>=13.0.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (18.1.1)\n",
+ "Requirement already satisfied: ml-dtypes<0.5.0,>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (0.4.0)\n",
+ "Requirement already satisfied: opt-einsum>=2.3.2 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (3.3.0)\n",
+ "Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (3.20.3)\n",
+ "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (71.0.4)\n",
+ "Requirement already satisfied: six>=1.12.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (1.16.0)\n",
+ "Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (2.4.0)\n",
+ "Requirement already satisfied: wrapt>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (1.16.0)\n",
+ "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (1.64.1)\n",
+ "Requirement already satisfied: tensorboard<2.18,>=2.17 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (2.17.0)\n",
+ "Requirement already satisfied: keras>=3.2.0 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (3.4.1)\n",
+ "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /usr/local/lib/python3.10/dist-packages (from tensorflow>2.11.0->popv) (0.37.1)\n",
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers>4.25.0->popv) (2024.5.15)\n",
+ "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers>4.25.0->popv) (0.4.4)\n",
+ "Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers>4.25.0->popv) (0.19.1)\n",
+ "Requirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from astunparse>=1.6.0->tensorflow>2.11.0->popv) (0.44.0)\n",
+ "Requirement already satisfied: sortedcontainers<3.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from intervaltree>=3.1.0->scanorama>1.7.2->popv) (2.4.0)\n",
+ "Requirement already satisfied: namex in /usr/local/lib/python3.10/dist-packages (from keras>=3.2.0->tensorflow>2.11.0->popv) (0.0.8)\n",
+ "Requirement already satisfied: optree in /usr/local/lib/python3.10/dist-packages (from keras>=3.2.0->tensorflow>2.11.0->popv) (0.12.1)\n",
+ "Collecting igraph<0.12,>=0.10.0 (from leidenalg>=0.9.0->celltypist>1.3.0->popv)\n",
+ " Downloading igraph-0.11.6-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.9 kB)\n",
+ "Collecting lightning-utilities<2.0,>=0.8.0 (from lightning<2.2,>=2.0->scvi-tools>1.0.0->popv)\n",
+ " Downloading lightning_utilities-0.11.6-py3-none-any.whl.metadata (5.2 kB)\n",
+ "Collecting pytorch-lightning (from lightning<2.2,>=2.0->scvi-tools>1.0.0->popv)\n",
+ " Downloading pytorch_lightning-2.4.0-py3-none-any.whl.metadata (21 kB)\n",
+ "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.0.2->onclass>=1.3->popv) (1.2.1)\n",
+ "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.0.2->onclass>=1.3->popv) (0.12.1)\n",
+ "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.0.2->onclass>=1.3->popv) (4.53.1)\n",
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.0.2->onclass>=1.3->popv) (1.4.5)\n",
+ "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.0.2->onclass>=1.3->popv) (9.4.0)\n",
+ "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=2.0.2->onclass>=1.3->popv) (3.1.4)\n",
+ "Requirement already satisfied: contextlib2 in /usr/local/lib/python3.10/dist-packages (from ml-collections>=0.1.1->scvi-tools>1.0.0->popv) (21.6.0)\n",
+ "Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba>=0.56->scanpy>1.9.0->popv) (0.43.0)\n",
+ "Requirement already satisfied: multipledispatch in /usr/local/lib/python3.10/dist-packages (from numpyro>=0.12.1->scvi-tools>1.0.0->popv) (1.0.0)\n",
+ "Requirement already satisfied: et-xmlfile in /usr/local/lib/python3.10/dist-packages (from openpyxl>=3.0.4->celltypist>1.3.0->popv) (1.1.0)\n",
+ "Collecting pyro-api>=0.1.1 (from pyro-ppl>=1.6.0->scvi-tools>1.0.0->popv)\n",
+ " Downloading pyro_api-0.1.2-py3-none-any.whl.metadata (2.5 kB)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.23.0->celltypist>1.3.0->popv) (3.3.2)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.23.0->celltypist>1.3.0->popv) (3.8)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.23.0->celltypist>1.3.0->popv) (2.0.7)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.23.0->celltypist>1.3.0->popv) (2024.7.4)\n",
+ "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=12.0.0->scvi-tools>1.0.0->popv) (3.0.0)\n",
+ "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=12.0.0->scvi-tools>1.0.0->popv) (2.16.1)\n",
+ "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.18,>=2.17->tensorflow>2.11.0->popv) (3.7)\n",
+ "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.18,>=2.17->tensorflow>2.11.0->popv) (0.7.2)\n",
+ "Requirement already satisfied: werkzeug>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from tensorboard<2.18,>=2.17->tensorflow>2.11.0->popv) (3.0.4)\n",
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.12->harmony-pytorch>0.1.6->popv) (1.13.2)\n",
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.12->harmony-pytorch>0.1.6->popv) (3.1.4)\n",
+ "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->gdown>4.6.0->popv) (2.6)\n",
+ "Requirement already satisfied: msgpack in /usr/local/lib/python3.10/dist-packages (from flax->scvi-tools>1.0.0->popv) (1.0.8)\n",
+ "Requirement already satisfied: orbax-checkpoint in /usr/local/lib/python3.10/dist-packages (from flax->scvi-tools>1.0.0->popv) (0.6.1)\n",
+ "Requirement already satisfied: tensorstore in /usr/local/lib/python3.10/dist-packages (from flax->scvi-tools>1.0.0->popv) (0.1.64)\n",
+ "Requirement already satisfied: chex>=0.1.86 in /usr/local/lib/python3.10/dist-packages (from optax->scvi-tools>1.0.0->popv) (0.1.86)\n",
+ "Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from requests[socks]->gdown>4.6.0->popv) (1.7.1)\n",
+ "Collecting stdlib_list (from session-info->scanpy>1.9.0->popv)\n",
+ " Downloading stdlib_list-0.10.0-py3-none-any.whl.metadata (3.3 kB)\n",
+ "Requirement already satisfied: toolz>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from chex>=0.1.86->optax->scvi-tools>1.0.0->popv) (0.12.1)\n",
+ "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.10/dist-packages (from fsspec[http]<2025.0,>=2022.5.0->lightning<2.2,>=2.0->scvi-tools>1.0.0->popv) (3.10.5)\n",
+ "Collecting texttable>=1.6.2 (from igraph<0.12,>=0.10.0->leidenalg>=0.9.0->celltypist>1.3.0->popv)\n",
+ " Downloading texttable-1.7.0-py2.py3-none-any.whl.metadata (9.8 kB)\n",
+ "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=12.0.0->scvi-tools>1.0.0->popv) (0.1.2)\n",
+ "Requirement already satisfied: MarkupSafe>=2.1.1 in /usr/local/lib/python3.10/dist-packages (from werkzeug>=1.0.1->tensorboard<2.18,>=2.17->tensorflow>2.11.0->popv) (2.1.5)\n",
+ "Requirement already satisfied: etils[epath,epy] in /usr/local/lib/python3.10/dist-packages (from orbax-checkpoint->flax->scvi-tools>1.0.0->popv) (1.7.0)\n",
+ "Requirement already satisfied: nest_asyncio in /usr/local/lib/python3.10/dist-packages (from orbax-checkpoint->flax->scvi-tools>1.0.0->popv) (1.6.0)\n",
+ "Requirement already satisfied: humanize in /usr/local/lib/python3.10/dist-packages (from orbax-checkpoint->flax->scvi-tools>1.0.0->popv) (4.10.0)\n",
+ "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.12->harmony-pytorch>0.1.6->popv) (1.3.0)\n",
+ "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2025.0,>=2022.5.0->lightning<2.2,>=2.0->scvi-tools>1.0.0->popv) (2.4.0)\n",
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2025.0,>=2022.5.0->lightning<2.2,>=2.0->scvi-tools>1.0.0->popv) (1.3.1)\n",
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2025.0,>=2022.5.0->lightning<2.2,>=2.0->scvi-tools>1.0.0->popv) (24.2.0)\n",
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2025.0,>=2022.5.0->lightning<2.2,>=2.0->scvi-tools>1.0.0->popv) (1.4.1)\n",
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2025.0,>=2022.5.0->lightning<2.2,>=2.0->scvi-tools>1.0.0->popv) (6.0.5)\n",
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2025.0,>=2022.5.0->lightning<2.2,>=2.0->scvi-tools>1.0.0->popv) (1.9.4)\n",
+ "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]<2025.0,>=2022.5.0->lightning<2.2,>=2.0->scvi-tools>1.0.0->popv) (4.0.3)\n",
+ "Requirement already satisfied: importlib_resources in /usr/local/lib/python3.10/dist-packages (from etils[epath,epy]->orbax-checkpoint->flax->scvi-tools>1.0.0->popv) (6.4.4)\n",
+ "Requirement already satisfied: zipp in /usr/local/lib/python3.10/dist-packages (from etils[epath,epy]->orbax-checkpoint->flax->scvi-tools>1.0.0->popv) (3.20.1)\n",
+ "Downloading popv-0.4.2-py3-none-any.whl (37 kB)\n",
+ "Downloading anndata-0.10.9-py3-none-any.whl (128 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.0/129.0 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading bbknn-1.6.0-py3-none-any.whl (14 kB)\n",
+ "Downloading celltypist-1.6.3-py3-none-any.whl (7.3 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.3/7.3 MB\u001b[0m \u001b[31m79.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading harmony_pytorch-0.1.8-py3-none-any.whl (8.5 kB)\n",
+ "Downloading obonet-1.1.0-py3-none-any.whl (9.1 kB)\n",
+ "Downloading OnClass-1.3-py3-none-any.whl (11 kB)\n",
+ "Downloading pandas-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.1 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.1/12.1 MB\u001b[0m \u001b[31m117.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading scanorama-1.7.4-py3-none-any.whl (12 kB)\n",
+ "Downloading scanpy-1.10.2-py3-none-any.whl (2.1 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m84.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading scikit_learn-1.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30.5 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m30.5/30.5 MB\u001b[0m \u001b[31m71.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading scikit_misc-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (188 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m188.8/188.8 kB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading scvi_tools-1.1.6-py3-none-any.whl (387 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m387.7/387.7 kB\u001b[0m \u001b[31m33.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading array_api_compat-1.8-py3-none-any.whl (38 kB)\n",
+ "Downloading geosketch-1.2-py3-none-any.whl (8.2 kB)\n",
+ "Downloading legacy_api_wrap-1.4-py3-none-any.whl (15 kB)\n",
+ "Downloading leidenalg-0.10.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m80.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading lightning-2.1.4-py3-none-any.whl (2.0 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m77.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading mudata-0.3.0-py3-none-any.whl (39 kB)\n",
+ "Downloading numpyro-0.15.2-py3-none-any.whl (348 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m348.1/348.1 kB\u001b[0m \u001b[31m31.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading pynndescent-0.5.13-py3-none-any.whl (56 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.9/56.9 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading pyro_ppl-1.9.1-py3-none-any.whl (755 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m756.0/756.0 kB\u001b[0m \u001b[31m51.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading torchmetrics-1.4.1-py3-none-any.whl (866 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m866.2/866.2 kB\u001b[0m \u001b[31m57.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading umap_learn-0.5.6-py3-none-any.whl (85 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.7/85.7 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.1/227.1 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading igraph-0.11.6-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m87.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading lightning_utilities-0.11.6-py3-none-any.whl (26 kB)\n",
+ "Downloading pyro_api-0.1.2-py3-none-any.whl (11 kB)\n",
+ "Downloading pytorch_lightning-2.4.0-py3-none-any.whl (815 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m815.2/815.2 kB\u001b[0m \u001b[31m51.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading stdlib_list-0.10.0-py3-none-any.whl (79 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.8/79.8 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading texttable-1.7.0-py2.py3-none-any.whl (10 kB)\n",
+ "Building wheels for collected packages: annoy, docrep, fbpca, intervaltree, ml-collections, session-info\n",
+ " Building wheel for annoy (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for annoy: filename=annoy-1.17.3-cp310-cp310-linux_x86_64.whl size=550688 sha256=42a17e3fbbae325ccd700119474995187d618ccaa587a7d7de88e9e953fdf5fe\n",
+ " Stored in directory: /root/.cache/pip/wheels/64/8a/da/f714bcf46c5efdcfcac0559e63370c21abe961c48e3992465a\n",
+ " Building wheel for docrep (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for docrep: filename=docrep-0.3.2-py3-none-any.whl size=19876 sha256=df36c9e1c40ceea212c559f64a05244cb0476f60f09eaad8797e6346b72b3c37\n",
+ " Stored in directory: /root/.cache/pip/wheels/c3/64/48/03c38d8d906159eaa210b3c548fdb590eb3e2a4a5745ae2172\n",
+ " Building wheel for fbpca (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for fbpca: filename=fbpca-1.0-py3-none-any.whl size=11373 sha256=10e56763d553a8cedbce87e7f38a0df2bf5a3502a757e3a1e4203a67099bfaf1\n",
+ " Stored in directory: /root/.cache/pip/wheels/3c/ea/60/8d1c9fbbc99492a1775b36a5e29c8c1ef309cc5821bd5a219d\n",
+ " Building wheel for intervaltree (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for intervaltree: filename=intervaltree-3.1.0-py2.py3-none-any.whl size=26096 sha256=59f78fe61f5a57097bcf156471ca579a916b664cca0f1c0d80365a8d58726bfe\n",
+ " Stored in directory: /root/.cache/pip/wheels/fa/80/8c/43488a924a046b733b64de3fac99252674c892a4c3801c0a61\n",
+ " Building wheel for ml-collections (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for ml-collections: filename=ml_collections-0.1.1-py3-none-any.whl size=94507 sha256=a1eccff8dd133ea3988c7985470210207b59be86ffa6b02451cc05771dfe74f2\n",
+ " Stored in directory: /root/.cache/pip/wheels/7b/89/c9/a9b87790789e94aadcfc393c283e3ecd5ab916aed0a31be8fe\n",
+ " Building wheel for session-info (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for session-info: filename=session_info-1.0.0-py3-none-any.whl size=8023 sha256=e3c32b3268028a8ad4c71645321084701c9f2a171875ffd458e7f9de8f39bc0e\n",
+ " Stored in directory: /root/.cache/pip/wheels/6a/aa/b9/eb5d4031476ec10802795b97ccf937b9bd998d68a9b268765a\n",
+ "Successfully built annoy docrep fbpca intervaltree ml-collections session-info\n",
+ "Installing collected packages: texttable, pyro-api, fbpca, array-api-compat, annoy, stdlib_list, scikit-misc, obonet, ml-collections, lightning-utilities, legacy-api-wrap, intervaltree, igraph, docrep, session-info, scikit-learn, pandas, leidenalg, torchmetrics, pyro-ppl, pynndescent, numpyro, harmony-pytorch, geosketch, anndata, umap-learn, scanorama, pytorch-lightning, mudata, sentence-transformers, scanpy, lightning, bbknn, scvi-tools, onclass, celltypist, popv\n",
+ " Attempting uninstall: scikit-learn\n",
+ " Found existing installation: scikit-learn 1.3.2\n",
+ " Uninstalling scikit-learn-1.3.2:\n",
+ " Successfully uninstalled scikit-learn-1.3.2\n",
+ " Attempting uninstall: pandas\n",
+ " Found existing installation: pandas 2.1.4\n",
+ " Uninstalling pandas-2.1.4:\n",
+ " Successfully uninstalled pandas-2.1.4\n",
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "bigframes 1.15.0 requires scikit-learn>=1.2.2, but you have scikit-learn 1.1.3 which is incompatible.\n",
+ "cudf-cu12 24.4.1 requires pandas<2.2.2dev0,>=2.0, but you have pandas 1.5.3 which is incompatible.\n",
+ "google-colab 1.0.0 requires pandas==2.1.4, but you have pandas 1.5.3 which is incompatible.\n",
+ "xarray 2024.6.0 requires pandas>=2.0, but you have pandas 1.5.3 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0mSuccessfully installed anndata-0.10.9 annoy-1.17.3 array-api-compat-1.8 bbknn-1.6.0 celltypist-1.6.3 docrep-0.3.2 fbpca-1.0 geosketch-1.2 harmony-pytorch-0.1.8 igraph-0.11.6 intervaltree-3.1.0 legacy-api-wrap-1.4 leidenalg-0.10.2 lightning-2.1.4 lightning-utilities-0.11.6 ml-collections-0.1.1 mudata-0.3.0 numpyro-0.15.2 obonet-1.1.0 onclass-1.3 pandas-1.5.3 popv-0.4.2 pynndescent-0.5.13 pyro-api-0.1.2 pyro-ppl-1.9.1 pytorch-lightning-2.4.0 scanorama-1.7.4 scanpy-1.10.2 scikit-learn-1.1.3 scikit-misc-0.5.1 scvi-tools-1.1.6 sentence-transformers-3.0.1 session-info-1.0.0 stdlib_list-0.10.0 texttable-1.7.0 torchmetrics-1.4.1 umap-learn-0.5.6\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install popv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install anndata==0.10.8"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "85kQrlZ-FYaz",
+ "outputId": "18bd8865-ee2d-4498-fc26-65ebbfcc3b43"
+ },
+ "id": "85kQrlZ-FYaz",
+ "execution_count": 3,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting anndata==0.10.8\n",
+ " Downloading anndata-0.10.8-py3-none-any.whl.metadata (6.6 kB)\n",
+ "Requirement already satisfied: array-api-compat!=1.5,>1.4 in /usr/local/lib/python3.10/dist-packages (from anndata==0.10.8) (1.8)\n",
+ "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anndata==0.10.8) (1.2.2)\n",
+ "Requirement already satisfied: h5py>=3.1 in /usr/local/lib/python3.10/dist-packages (from anndata==0.10.8) (3.11.0)\n",
+ "Requirement already satisfied: natsort in /usr/local/lib/python3.10/dist-packages (from anndata==0.10.8) (8.4.0)\n",
+ "Requirement already satisfied: numpy>=1.23 in /usr/local/lib/python3.10/dist-packages (from anndata==0.10.8) (1.26.4)\n",
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from anndata==0.10.8) (24.1)\n",
+ "Requirement already satisfied: pandas!=2.1.0rc0,!=2.1.2,>=1.4 in /usr/local/lib/python3.10/dist-packages (from anndata==0.10.8) (1.5.3)\n",
+ "Requirement already satisfied: scipy>1.8 in /usr/local/lib/python3.10/dist-packages (from anndata==0.10.8) (1.13.1)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas!=2.1.0rc0,!=2.1.2,>=1.4->anndata==0.10.8) (2.8.2)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas!=2.1.0rc0,!=2.1.2,>=1.4->anndata==0.10.8) (2024.1)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas!=2.1.0rc0,!=2.1.2,>=1.4->anndata==0.10.8) (1.16.0)\n",
+ "Downloading anndata-0.10.8-py3-none-any.whl (124 kB)\n",
+ "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/124.4 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m124.4/124.4 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hInstalling collected packages: anndata\n",
+ " Attempting uninstall: anndata\n",
+ " Found existing installation: anndata 0.10.9\n",
+ " Uninstalling anndata-0.10.9:\n",
+ " Successfully uninstalled anndata-0.10.9\n",
+ "Successfully installed anndata-0.10.8\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "MlQ3oQoRtd1C",
+ "metadata": {
+ "id": "MlQ3oQoRtd1C"
+ },
+ "source": [
+ "## Restart the Session after installation (User Action Required)\n",
+ "\n",
+ "Runtime -> \"Restart session\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "935b37ea-29ee-4134-b793-fb1be48d1156",
+ "metadata": {
+ "id": "935b37ea-29ee-4134-b793-fb1be48d1156",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "37f0b213-6927-42e8-c0f9-1e394a20ceae"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "WARNING:tensorflow:From /usr/local/lib/python3.10/dist-packages/tensorflow/python/compat/v2_compat.py:98: disable_resource_variables (from tensorflow.python.ops.resource_variables_toggle) is deprecated and will be removed in a future version.\n",
+ "Instructions for updating:\n",
+ "non-resource variables are not supported in the long term\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%capture\n",
+ "import popv\n",
+ "import numpy as np\n",
+ "import scanpy as sc\n",
+ "import os\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9039153f-9c09-4486-a5b0-fec6c733bf8e",
+ "metadata": {
+ "id": "9039153f-9c09-4486-a5b0-fec6c733bf8e"
+ },
+ "source": [
+ "## Step 2: Load your data (User Action Required)\n",
+ "Here we provide three options to load your data:\n",
+ "1. Connect to Google Drive (highly recommended)\n",
+ "2. Download your data from the cloud and save into this session or on Google drive.\n",
+ "3. Upload your data manually into this session (files are not persistent and will be deleted when session is closed)\n",
+ "\n",
+ "As an example, we use a subsampled version of the [Lung Cell Atlas](https://hlca.ds.czbiohub.org/) \\[1] for our query data.\n",
+ "\n",
+ "\\[1] Travaglini, K. et al. A molecular cell atlas of the human lung from single-cell RNA sequencing. *Nature* **587**, 619–625(2020)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "f71c3163",
+ "metadata": {
+ "id": "f71c3163"
+ },
+ "outputs": [],
+ "source": [
+ "os.chdir(\"../../\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "db0a86c6",
+ "metadata": {
+ "id": "db0a86c6",
+ "outputId": "c6fd82d4-8ec6-498b-a4b9-5204e85d7668",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 36
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "'/'"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ }
+ },
+ "metadata": {},
+ "execution_count": 3
+ }
+ ],
+ "source": [
+ "pwd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "cfa9ac7b",
+ "metadata": {
+ "id": "cfa9ac7b"
+ },
+ "outputs": [],
+ "source": [
+ "output_folder = \"tmp_testing\"\n",
+ "if not os.path.exists(output_folder):\n",
+ " os.mkdir(output_folder)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "lkm3ELAfV8-D",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "lkm3ELAfV8-D",
+ "outputId": "4e92d0c5-4058-484c-e56c-dfbc5215b6df"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "downloading\n",
+ "--2024-08-29 03:31:42-- https://www.dropbox.com/s/mrf8y7emfupo4he/LCA.h5ad?dl=1\n",
+ "Resolving www.dropbox.com (www.dropbox.com)... 162.125.81.18, 2620:100:6031:18::a27d:5112\n",
+ "Connecting to www.dropbox.com (www.dropbox.com)|162.125.81.18|:443... connected.\n",
+ "HTTP request sent, awaiting response... 302 Found\n",
+ "Location: https://www.dropbox.com/scl/fi/5henh04air005wccgxbje/LCA.h5ad?rlkey=yqw49zvvkipjfo5a92tkyan6v&dl=1 [following]\n",
+ "--2024-08-29 03:31:43-- https://www.dropbox.com/scl/fi/5henh04air005wccgxbje/LCA.h5ad?rlkey=yqw49zvvkipjfo5a92tkyan6v&dl=1\n",
+ "Reusing existing connection to www.dropbox.com:443.\n",
+ "HTTP request sent, awaiting response... 302 Found\n",
+ "Location: https://ucdc6774ffd790e04d8786582413.dl.dropboxusercontent.com/cd/0/inline/CZgQQzNmnh9taqmKnT2qqpfHeYEUr_ycPzaRoro8NnF5tmjfGBCRY3PBXHaEZfypsQmrd3zxqEPVyMxdEodQfrsmXWs8kcBie6ZyTsKTZG4mPqEGhVfXtXdEIY9ESId4sctG6qEVkaQN8deSUYxonWHu/file?dl=1# [following]\n",
+ "--2024-08-29 03:31:44-- https://ucdc6774ffd790e04d8786582413.dl.dropboxusercontent.com/cd/0/inline/CZgQQzNmnh9taqmKnT2qqpfHeYEUr_ycPzaRoro8NnF5tmjfGBCRY3PBXHaEZfypsQmrd3zxqEPVyMxdEodQfrsmXWs8kcBie6ZyTsKTZG4mPqEGhVfXtXdEIY9ESId4sctG6qEVkaQN8deSUYxonWHu/file?dl=1\n",
+ "Resolving ucdc6774ffd790e04d8786582413.dl.dropboxusercontent.com (ucdc6774ffd790e04d8786582413.dl.dropboxusercontent.com)... 162.125.81.15, 2620:100:6031:15::a27d:510f\n",
+ "Connecting to ucdc6774ffd790e04d8786582413.dl.dropboxusercontent.com (ucdc6774ffd790e04d8786582413.dl.dropboxusercontent.com)|162.125.81.15|:443... connected.\n",
+ "HTTP request sent, awaiting response... 302 Found\n",
+ "Location: /cd/0/inline2/CZiyd0BPxmA4Rb6SJQfGYQnWMlhrw8mmCLBaWfR2_HXq3qDYt8_sZ4PUIZTPBD1o41tzmdGq3oQa1XJQrim1hKSO3ElZcGj638nT8SLupYL5MlDJUJASJHXbgVG5xxAMdsGm89CCaFvTQiwGS5Z1v0bAZgyyYHYPMjYJVVA1JCo_gVmTdvd2KpXVvUD4NQeV6yXPBfZUnZC3L-ImZL3RIpFP8qLm8chpXMzF7je_YISCYdu284eMNIWdFiYMTgXkeKdLutmSHGC2JhjDwGG2Jcp5KW6ln58lHmcjCKQ273wy0JyBU5HU508Kj7ncpk4QbOYJj2MLI0a3YnMNd-M4nkLLPp_d9AL24YtFj6fiZfXAmxyXr1D9Wsdj5XHnRkH2ieo/file?dl=1 [following]\n",
+ "--2024-08-29 03:31:44-- https://ucdc6774ffd790e04d8786582413.dl.dropboxusercontent.com/cd/0/inline2/CZiyd0BPxmA4Rb6SJQfGYQnWMlhrw8mmCLBaWfR2_HXq3qDYt8_sZ4PUIZTPBD1o41tzmdGq3oQa1XJQrim1hKSO3ElZcGj638nT8SLupYL5MlDJUJASJHXbgVG5xxAMdsGm89CCaFvTQiwGS5Z1v0bAZgyyYHYPMjYJVVA1JCo_gVmTdvd2KpXVvUD4NQeV6yXPBfZUnZC3L-ImZL3RIpFP8qLm8chpXMzF7je_YISCYdu284eMNIWdFiYMTgXkeKdLutmSHGC2JhjDwGG2Jcp5KW6ln58lHmcjCKQ273wy0JyBU5HU508Kj7ncpk4QbOYJj2MLI0a3YnMNd-M4nkLLPp_d9AL24YtFj6fiZfXAmxyXr1D9Wsdj5XHnRkH2ieo/file?dl=1\n",
+ "Reusing existing connection to ucdc6774ffd790e04d8786582413.dl.dropboxusercontent.com:443.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 354684075 (338M) [application/binary]\n",
+ "Saving to: ‘tmp_testing/LCA.h5ad’\n",
+ "\n",
+ "tmp_testing/LCA.h5a 100%[===================>] 338.25M 11.3MB/s in 21s \n",
+ "\n",
+ "2024-08-29 03:32:06 (15.9 MB/s) - ‘tmp_testing/LCA.h5ad’ saved [354684075/354684075]\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "input_file = {\n",
+ " \"source\": \"wget\",\n",
+ " \"path\": \"tmp_testing/LCA.h5ad\",\n",
+ " \"link\": \"https://www.dropbox.com/s/mrf8y7emfupo4he/LCA.h5ad?dl=1\",\n",
+ "}\n",
+ "\n",
+ "if input_file[\"source\"] == \"gdrive\":\n",
+ " # OPTION 1: Connect to Google Drive\n",
+ " # This is the recomended method especially for large datasets\n",
+ " from google.colab import drive\n",
+ "\n",
+ " drive.mount(\"/content/drive\")\n",
+ " query_adata = sc.read(input_file[\"path\"])\n",
+ "elif input_file[\"source\"] == \"local\":\n",
+ " # OPTION 2: Uploading data manually\n",
+ " # Click the folder icon on the left navigation bar, and select the upload icon\n",
+ " # Note: Manually uploaded data is automatically deleted when the colab session ends\n",
+ " # This is not recommended if your dataset is very large\n",
+ " query_adata = sc.read(input_file[\"path\"])\n",
+ "else:\n",
+ " # OPTION 3: Downloading from the cloud (Dropbox, AWS, Google Drive, etc)\n",
+ " # Google Colab supports wget, curl, and gdown commands\n",
+ " # It is recommended to download the data into Google Drive and read from there.\n",
+ " # This way your data will be persistent.\n",
+ " print(\"downloading\")\n",
+ " try:\n",
+ " !wget -O {input_file['path']} {input_file['link']}\n",
+ " query_adata = sc.read(input_file[\"path\"])\n",
+ " except:\n",
+ " raise Exception(\n",
+ " f\"Default download failed with wget. Use custom downloader or check provided link \"\n",
+ " + input_file[\"link\"]\n",
+ " )\n",
+ "\n",
+ "query_adata.obs_names_make_unique()\n",
+ "#Downsample to reduce RAM usage. Not necessary with Google Colab Pro.\n",
+ "sc.pp.subsample(query_adata, 0.2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eQwL4OPPu-o7",
+ "metadata": {
+ "id": "eQwL4OPPu-o7"
+ },
+ "source": [
+ "## Step 3 (User Action Required): Setting Up Annotation Parameters\n",
+ "\n",
+ "Here is where you set the parameters for the automated annotation.\n",
+ "\n",
+ "Arguments:\n",
+ "- **tissue:** Tabula Sapiens tissue to annotate your data with. Available tissues: [\"Bladder\", \"Blood\", \"Bone_Marrow\", \"Kidney\", \"Large_Intestine\", \"Lung\",\"Lymph_Node\", \"Pancreas\", \"Small_Intestine\", \"Spleen\", \"Thymus\",\"Trachea\", \"Vasculature\"]\n",
+ "- **save_location:** location to save results to. By default will save to a folder named `annotation_results`. It is highly recommended you provide a Google Drive folder here.\n",
+ "- **query_batch_key:** key in `query_adata.obs` for batch correction. Set to None for no batch correction.\n",
+ "- **algorithms:** these are the methods to run. By default, will run all methods.\n",
+ "Options: [\"knn_on_scvi_pred\", \"scanvi_pred\", \"knn_on_bbknn_pred\", \"svm_pred\", \"rf_pred\", \"onclass_pred\", \"knn_on_scanorama_pred\",\n",
+ "\n",
+ "\n",
+ "Lesser used parameters\n",
+ "- **query_labels_key**: scANVI has the option to use labeled cells in the query dataset during training. To use some prelabeled cells from the query dataset, set `query_labels_key` to the corresponding key in `query_adata.obs`\n",
+ "- **unknown_celltype_label**: If `query_labels_key` is not None, will treat everything not labeled `unknown_celltype_label` as a labeled cell"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "RBolKuGDvP0Z",
+ "metadata": {
+ "id": "RBolKuGDvP0Z"
+ },
+ "outputs": [],
+ "source": [
+ "\"\"\"\n",
+ "tissue options:\n",
+ "['Bladder','Blood','Bone_Marrow','Fat',\n",
+ "'Heart','Kidney','Large_Intestine','Liver',\n",
+ "'Lung','Lymph_Node','Mammary','Muscle',\n",
+ "'Pancreas','Prostate','Salivary Gland',\n",
+ "'Skin','Small_Intestine','Spleen',\n",
+ "'Thymus','Trachea','Vasculature']\n",
+ "\"\"\"\n",
+ "tissue = \"Lung\"\n",
+ "\n",
+ "query_batch_key = \"donor_method\"\n",
+ "algorithms = None\n",
+ "\n",
+ "# Lesser used parameters\n",
+ "query_labels_key = None\n",
+ "unknown_celltype_label = \"unknown\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ffB8B0dUceGb",
+ "metadata": {
+ "id": "ffB8B0dUceGb"
+ },
+ "source": [
+ "## Step 4: Downloading Reference Data and Pretrained Models\n",
+ "No more user input required! Just run all the following code blocks.\n",
+ "\n",
+ "**NOTE: PopV has only been evaluated with the Lung/Thymus/Lymph_Node as a reference dataset. Different tissues have different annotation quality and the Tabula sapiens community is currently actively improving the annotation quality. We strongly expect improvement of cell annotation when the updated annotation is released. Upon release the Zenodo repository will be updated.**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "Zty7C8HAZwwr",
+ "metadata": {
+ "id": "Zty7C8HAZwwr"
+ },
+ "outputs": [],
+ "source": [
+ "import requests\n",
+ "\n",
+ "res = requests.get(\"https://zenodo.org/api/records/7587774\")\n",
+ "tissue_download_path = {\n",
+ " ind[\"key\"][3:-14]: ind[\"links\"][\"self\"] for ind in res.json()[\"files\"]\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "cfc3c4a5",
+ "metadata": {
+ "id": "cfc3c4a5"
+ },
+ "outputs": [],
+ "source": [
+ "res = requests.get(\"https://zenodo.org/api/records/7580707\")\n",
+ "pretrained_models_download_path = {\n",
+ " ind[\"key\"][18:-10]: ind[\"links\"][\"self\"] for ind in res.json()[\"files\"]\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "3d63bea8",
+ "metadata": {
+ "id": "3d63bea8",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "412dbb83-838e-4acb-8cf1-3e27032737ec"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "--2024-08-29 03:32:14-- https://zenodo.org/api/records/7587774/files/TS_Lung_filtered.h5ad/content\n",
+ "Resolving zenodo.org (zenodo.org)... 188.185.79.172, 188.184.98.238, 188.184.103.159, ...\n",
+ "Connecting to zenodo.org (zenodo.org)|188.185.79.172|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 1953465248 (1.8G) [application/octet-stream]\n",
+ "Saving to: ‘tmp/TS_Lung.h5ad’\n",
+ "\n",
+ "tmp/TS_Lung.h5ad 100%[===================>] 1.82G 11.7MB/s in 2m 48s \n",
+ "\n",
+ "2024-08-29 03:35:02 (11.1 MB/s) - ‘tmp/TS_Lung.h5ad’ saved [1953465248/1953465248]\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "output_folder = \"tmp\"\n",
+ "refdata_url = tissue_download_path[tissue]\n",
+ "if not os.path.exists(output_folder):\n",
+ " os.mkdir(output_folder)\n",
+ "output_fn = f\"{output_folder}/TS_{tissue}.h5ad\"\n",
+ "if not os.path.exists(output_fn):\n",
+ " !wget -O $output_fn $refdata_url"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "ab9a16a4",
+ "metadata": {
+ "id": "ab9a16a4",
+ "outputId": "98821fb8-a9ed-4a29-c80d-ca10a2476ff6",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "!echo $output_model_tar_fn $model_url"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "66e75578",
+ "metadata": {
+ "id": "66e75578",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "2ed95646-033d-4986-ec02-83e1027665ae"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "--2024-08-29 03:35:03-- https://zenodo.org/api/records/7580707/files/pretrained_models_Lung_ts.tar.gz/content\n",
+ "Resolving zenodo.org (zenodo.org)... 188.184.103.159, 188.184.98.238, 188.185.79.172, ...\n",
+ "Connecting to zenodo.org (zenodo.org)|188.184.103.159|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 75525173 (72M) [application/octet-stream]\n",
+ "Saving to: ‘tmp/pretrained_model_Lung.tar.gz’\n",
+ "\n",
+ "tmp/pretrained_mode 100%[===================>] 72.03M 21.3MB/s in 4.4s \n",
+ "\n",
+ "2024-08-29 03:35:08 (16.5 MB/s) - ‘tmp/pretrained_model_Lung.tar.gz’ saved [75525173/75525173]\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "model_url = pretrained_models_download_path[tissue]\n",
+ "output_model_tar_fn = f\"{output_folder}/pretrained_model_{tissue}.tar.gz\"\n",
+ "output_model_fn = f\"{output_folder}/pretrained_model_{tissue}\"\n",
+ "if not os.path.exists(output_model_fn):\n",
+ " os.mkdir(output_model_fn)\n",
+ "if not os.path.exists(output_model_tar_fn):\n",
+ " !wget -O $output_model_tar_fn $model_url\n",
+ " !tar -xzf $output_model_tar_fn -C $output_model_fn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "c268bde8",
+ "metadata": {
+ "id": "c268bde8"
+ },
+ "outputs": [],
+ "source": [
+ "# read in the reference dataset\n",
+ "ref_adata = sc.read_h5ad(output_fn)\n",
+ "sc.pp.subsample(ref_adata, 0.05)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6a830cd8-b897-498c-b3f8-1dddff8e5aa8",
+ "metadata": {
+ "id": "6a830cd8-b897-498c-b3f8-1dddff8e5aa8"
+ },
+ "source": [
+ "### Setup reference data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "33ad9aa6-271f-425b-ba5b-8f554001b0c0",
+ "metadata": {
+ "id": "33ad9aa6-271f-425b-ba5b-8f554001b0c0"
+ },
+ "outputs": [],
+ "source": [
+ "# Following parameters are specific to Tabula Sapiens dataset and contain the annotated cell-type and the batch_key that are corrected for during model training.\n",
+ "ref_labels_key = \"cell_ontology_class\"\n",
+ "ref_batch_key = \"donor_assay\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "4e54d258-a49b-430f-818c-d16a2cf068ed",
+ "metadata": {
+ "id": "4e54d258-a49b-430f-818c-d16a2cf068ed"
+ },
+ "outputs": [],
+ "source": [
+ "min_celltype_size = np.min(ref_adata.obs.groupby(ref_labels_key).size())\n",
+ "n_samples_per_label = np.max((min_celltype_size, 500))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "de507788-dfc3-4b54-979b-f0472ac014f5",
+ "metadata": {
+ "id": "de507788-dfc3-4b54-979b-f0472ac014f5"
+ },
+ "source": [
+ "### Preprocess query with ref dataset\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "popv.Config.num_threads = 1"
+ ],
+ "metadata": {
+ "id": "dmWdD5u4JfpI"
+ },
+ "id": "dmWdD5u4JfpI",
+ "execution_count": 15,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "db50776c-e9d2-4198-8b19-b0cdebd6b167",
+ "metadata": {
+ "id": "db50776c-e9d2-4198-8b19-b0cdebd6b167"
+ },
+ "outputs": [],
+ "source": [
+ "from popv.preprocessing import Process_Query\n",
+ "\n",
+ "adata = Process_Query(\n",
+ " query_adata,\n",
+ " ref_adata,\n",
+ " query_labels_key=query_labels_key,\n",
+ " query_batch_key=query_batch_key,\n",
+ " ref_labels_key=ref_labels_key,\n",
+ " ref_batch_key=ref_batch_key,\n",
+ " unknown_celltype_label=unknown_celltype_label,\n",
+ " save_path_trained_models=output_model_fn,\n",
+ " cl_obo_folder=\"content/PopV/resources/ontology/\",\n",
+ " prediction_mode=\"inference\", # 'fast' mode gives fast results (does not include BBKNN and Scanorama and makes more inaccurate predictions)\n",
+ " n_samples_per_label=n_samples_per_label,\n",
+ " accelerator=\"cuda\",\n",
+ " compute_embedding=True,\n",
+ " hvg=None,\n",
+ ").adata"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "e58408ba",
+ "metadata": {
+ "id": "e58408ba",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "ee9e7c18-a031-4075-c6e8-955fd3be08af"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "AnnData object with n_obs × n_vars = 16797 × 4000\n",
+ " obs: 'tissue_in_publication', 'donor_id', 'free_annotation', 'compartment', 'cell_type_ontology_term_id', 'cell_type', 'assay', 'sex', 'tissue', 'cell_ontology_class', 'cell_ontology_class_tissue', 'donor_tissue', 'assay_correct', 'donor_assay', '_batch_annotation', '_labels_annotation', '_ref_subsample', 'method', 'donor', 'cell_ontology_type', 'donor_method', 'cell_ontology_id', '_dataset', 'n_counts'\n",
+ " var: 'mean', 'std'\n",
+ " uns: 'Filtered_cells', 'log1p', 'unknown_celltype_label', '_pretrained_scvi_path', '_save_path_trained_models', '_prediction_mode', '_cl_obo_file', '_cl_ontology_file', '_nlp_emb_file', '_accelerator', '_devices', '_compute_embedding', '_return_probabilities', 'prediction_keys'\n",
+ " obsm: 'X_pca', 'X_scvi', 'X_scvi_umap', 'X_umap'\n",
+ " layers: 'scvi_counts', 'scaled_counts'"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 17
+ }
+ ],
+ "source": [
+ "adata"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "1e6b602d-8e13-4e1d-b31e-632a0c4a2284",
+ "metadata": {
+ "collapsed": true,
+ "id": "1e6b602d-8e13-4e1d-b31e-632a0c4a2284",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "e398bfab-a32e-43b4-be09-d6cca20aecc0"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\tInitialization is completed.\n",
+ "\tCompleted 1 / 10 iteration(s).\n",
+ "\tCompleted 2 / 10 iteration(s).\n",
+ "\tCompleted 3 / 10 iteration(s).\n",
+ "\tCompleted 4 / 10 iteration(s).\n",
+ "\tCompleted 5 / 10 iteration(s).\n",
+ "Reach convergence after 5 iteration(s).\n",
+ "Found 4000 genes among all datasets\n",
+ "[[0. 0.10779221 0.53405573 0.00453858 0.80134159 0.00917431\n",
+ " 0.08 0.05882353 0.09090909 0.05615293 0.0625 ]\n",
+ " [0. 0. 0.14025974 0.60060514 0.12857143 0.83944954\n",
+ " 0.03555556 0.05064935 0.38636364 0.05714286 0.45833333]\n",
+ " [0. 0. 0. 0.36006051 0.50616612 0.01376147\n",
+ " 0.36444444 0.19236884 0.25 0.16726404 0.08333333]\n",
+ " [0. 0. 0. 0. 0.05597579 0.72477064\n",
+ " 0.01777778 0.01210287 0.34090909 0.00955795 0.33333333]\n",
+ " [0. 0. 0. 0. 0. 0.32568807\n",
+ " 0.22666667 0.44197138 0.63636364 0.49820789 0.41666667]\n",
+ " [0. 0. 0. 0. 0. 0.\n",
+ " 0.01777778 0.02522936 0.70454545 0.07568807 0.625 ]\n",
+ " [0. 0. 0. 0. 0. 0.\n",
+ " 0. 0.17333333 0. 0.62666667 0.04166667]\n",
+ " [0. 0. 0. 0. 0. 0.\n",
+ " 0. 0. 0.43181818 0.54958184 0.22916667]\n",
+ " [0. 0. 0. 0. 0. 0.\n",
+ " 0. 0. 0. 0.20454545 0.52083333]\n",
+ " [0. 0. 0. 0. 0. 0.\n",
+ " 0. 0. 0. 0. 0.60416667]\n",
+ " [0. 0. 0. 0. 0. 0.\n",
+ " 0. 0. 0. 0. 0. ]]\n",
+ "Processing datasets (1, 5)\n",
+ "Processing datasets (0, 4)\n",
+ "Processing datasets (3, 5)\n",
+ "Processing datasets (5, 8)\n",
+ "Processing datasets (4, 8)\n",
+ "Processing datasets (6, 9)\n",
+ "Processing datasets (5, 10)\n",
+ "Processing datasets (9, 10)\n",
+ "Processing datasets (1, 3)\n",
+ "Processing datasets (7, 9)\n",
+ "Processing datasets (0, 2)\n",
+ "Processing datasets (8, 10)\n",
+ "Processing datasets (2, 4)\n",
+ "Processing datasets (4, 9)\n",
+ "Processing datasets (1, 10)\n",
+ "Processing datasets (4, 7)\n",
+ "Processing datasets (7, 8)\n",
+ "Processing datasets (4, 10)\n",
+ "Processing datasets (1, 8)\n",
+ "Processing datasets (2, 6)\n",
+ "Processing datasets (2, 3)\n",
+ "Processing datasets (3, 8)\n",
+ "Processing datasets (3, 10)\n",
+ "Processing datasets (4, 5)\n",
+ "Processing datasets (2, 8)\n",
+ "Processing datasets (7, 10)\n",
+ "Processing datasets (4, 6)\n",
+ "Processing datasets (8, 9)\n",
+ "Processing datasets (2, 7)\n",
+ "Processing datasets (6, 7)\n",
+ "Processing datasets (2, 9)\n",
+ "Processing datasets (1, 2)\n",
+ "Processing datasets (1, 4)\n",
+ "Processing datasets (0, 1)\n",
+ "\u001b[34mINFO \u001b[0m File tmp/pretrained_model_Lung/scvi/model.pt already downloaded \n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True\n",
+ "INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores\n",
+ "INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs\n",
+ "INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs\n",
+ "INFO:lightning.pytorch.utilities.rank_zero:You are using a CUDA device ('NVIDIA A100-SXM4-40GB') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n",
+ "INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 200/200: 100%|██████████| 200/200 [03:50<00:00, 1.16s/it, v_num=1, train_loss_step=1.95e+3, train_loss_epoch=1.92e+3]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=200` reached.\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\rEpoch 200/200: 100%|██████████| 200/200 [03:50<00:00, 1.15s/it, v_num=1, train_loss_step=1.95e+3, train_loss_epoch=1.92e+3]\n",
+ "\u001b[34mINFO \u001b[0m Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup \n",
+ "\u001b[34mINFO \u001b[0m File tmp/pretrained_model_Lung/\u001b[35m/scanvi/\u001b[0m\u001b[95mmodel.pt\u001b[0m already downloaded \n",
+ "\u001b[34mINFO \u001b[0m Training for \u001b[1;36m20\u001b[0m epochs. \n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True\n",
+ "INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores\n",
+ "INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs\n",
+ "INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs\n",
+ "INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 20/20: 100%|██████████| 20/20 [00:15<00:00, 1.39it/s, v_num=1, train_loss_step=1.71e+3, train_loss_epoch=2.01e+3]"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.\n"
+ ]
+ },
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\rEpoch 20/20: 100%|██████████| 20/20 [00:15<00:00, 1.31it/s, v_num=1, train_loss_step=1.71e+3, train_loss_epoch=2.01e+3]\n",
+ "\u001b[34mINFO \u001b[0m AnnData object appears to be a copy. Attempting to transfer setup. \n"
+ ]
+ }
+ ],
+ "source": [
+ "from popv.annotation import annotate_data\n",
+ "\n",
+ "annotate_data(adata, save_path=f\"{output_folder}/popv_output\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "a13f55b3",
+ "metadata": {
+ "id": "a13f55b3",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 36
+ },
+ "outputId": "66244885-db46-4851-bfd5-49d212ab341f"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "'tmp/pretrained_model_Lung/'"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "string"
+ }
+ },
+ "metadata": {},
+ "execution_count": 19
+ }
+ ],
+ "source": [
+ "adata.uns[\"_save_path_trained_models\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "57be6575",
+ "metadata": {
+ "id": "57be6575"
+ },
+ "outputs": [],
+ "source": [
+ "# Optional: save the full anndata will all objects\n",
+ "# adata.write(f'{output_folder}/query_and_reference_popv.h5ad')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "mPpRz3mdrr0M",
+ "metadata": {
+ "id": "mPpRz3mdrr0M",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "c09d29e3-be95-43d5-a456-bb23cc33860d"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "AnnData object with n_obs × n_vars = 16797 × 4000\n",
+ " obs: 'tissue_in_publication', 'donor_id', 'free_annotation', 'compartment', 'cell_type_ontology_term_id', 'cell_type', 'assay', 'sex', 'tissue', 'cell_ontology_class', 'cell_ontology_class_tissue', 'donor_tissue', 'assay_correct', 'donor_assay', '_batch_annotation', '_labels_annotation', '_ref_subsample', 'method', 'donor', 'cell_ontology_type', 'donor_method', 'cell_ontology_id', '_dataset', 'n_counts', 'popv_celltypist_prediction', 'popv_celltypist_prediction_probabilities', 'popv_knn_on_bbknn_prediction', 'popv_knn_on_bbknn_prediction_probabilities', 'popv_knn_on_harmony_prediction', 'popv_knn_on_harmony_prediction_probabilities', 'popv_knn_on_scanorama_prediction', 'popv_knn_on_scanorama_prediction_probabilities', 'subsampled_labels', '_scvi_batch', '_scvi_labels', 'popv_knn_on_scvi_prediction', 'popv_knn_on_scvi_prediction_probabilities', '_labels_annotation_cell_ontology_id', 'popv_onclass_prediction', 'popv_onclass_seen', 'popv_onclass_prediction_probabilities', 'popv_onclass_seen_probabilities', 'popv_rf_prediction', 'popv_rf_prediction_probabilities', 'popv_scanvi_prediction', 'popv_scanvi_prediction_probabilities', 'popv_svm_prediction', 'popv_svm_prediction_probabilities', 'popv_majority_vote_prediction', 'popv_majority_vote_score', 'popv_prediction', 'popv_prediction_score', 'popv_prediction_depth', 'popv_prediction_onclass_relative_depth', 'popv_parent'\n",
+ " var: 'mean', 'std'\n",
+ " uns: 'Filtered_cells', 'log1p', 'unknown_celltype_label', '_pretrained_scvi_path', '_save_path_trained_models', '_prediction_mode', '_cl_obo_file', '_cl_ontology_file', '_nlp_emb_file', '_accelerator', '_devices', '_compute_embedding', '_return_probabilities', 'prediction_keys', 'neighbors', 'over_clustering', '_scvi_uuid', '_scvi_manager_uuid', 'prediction_keys_seen'\n",
+ " obsm: 'X_pca', 'X_scvi', 'X_scvi_umap', 'X_umap', 'X_bbknn_umap_popv', 'X_pca_harmony', 'X_umap_harmony_popv', 'X_scanorama', 'X_umap_scanorma_popv', 'X_scvi_umap_popv', 'onclass_probabilities', 'X_scanvi', 'X_scanvi_umap_popv'\n",
+ " layers: 'scvi_counts', 'scaled_counts'\n",
+ " obsp: 'connectivities', 'distances'"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 21
+ }
+ ],
+ "source": [
+ "adata"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "cell_types = pd.unique(adata.obs[[i for i in adata.uns['prediction_keys']] + ['cell_ontology_type']].values.ravel('K'))\n",
+ "palette = sc.plotting.palettes.default_102\n",
+ "celltype_colors = {i: j for i, j in zip(list(cell_types), palette)}"
+ ],
+ "metadata": {
+ "id": "4o_4nhUJT2oF"
+ },
+ "id": "4o_4nhUJT2oF",
+ "execution_count": 22,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "31479a95",
+ "metadata": {
+ "id": "31479a95",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "outputId": "2b9530ac-bcce-4e2f-997d-1e8cb99901b1"
+ },
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "