From 8fb99b31928fde63928f7c7b6655fa03d68468f3 Mon Sep 17 00:00:00 2001
From: Emanuele Bezzi <ebezzi@chanzuckerberg.com>
Date: Mon, 6 May 2024 13:06:56 -0700
Subject: [PATCH] [docs] New embeddings notebook spellcheck (#1125)

* update embedding notebook

* lint

* lint

* remove unused dependency

* [docs] corrections to new embedding notebook

---------

Co-authored-by: Pablo Garcia-Nieto <pgarcia-nieto@chanzuckerberg.com>
---
 .../comp_bio_embedding_exploration.ipynb         | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/api/python/notebooks/analysis_demo/comp_bio_embedding_exploration.ipynb b/api/python/notebooks/analysis_demo/comp_bio_embedding_exploration.ipynb
index a9453aa79..8e872a8ce 100644
--- a/api/python/notebooks/analysis_demo/comp_bio_embedding_exploration.ipynb
+++ b/api/python/notebooks/analysis_demo/comp_bio_embedding_exploration.ipynb
@@ -35,8 +35,8 @@
     "\n",
     "**Disclaimers** \n",
     "\n",
-    "1. These embeddings were explored in-depth in a [cellxgene](https://github.com/chanzuckerberg/cellxgene) instance and not all of the insights gleaned there will be expanded on here.\n",
-    "2. Most of the following examples utilize UMAP to visualize embeddings in a 2D scatter plot, however as shown [here](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1011288) and [here](https://www.cell.com/cell-systems/pdf/S2405-4712(23)00209-0.pdf), biological interpretations from these visualizations may be innacurate.\n",
+    "1. These embeddings were explored in-depth in a [cellxgene](https://github.com/chanzuckerberg/cellxgene) instance and not all the insights gleaned there will be expanded on here.\n",
+    "2. Most of the following examples utilize UMAP to visualize embeddings in a 2D scatter plot, however as shown [here](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1011288) and [here](https://www.cell.com/cell-systems/abstract/S2405-4712%2823%2900209-0), biological interpretations from these visualizations may be inaccurate.\n",
     "\n",
     "\n",
     "\n",
@@ -77,7 +77,7 @@
     "warnings.filterwarnings(\"ignore\")\n",
     "\n",
     "\n",
-    "def remove_missing_embbeding_cells(adata: anndata.AnnData, emb_names: List[str]):\n",
+    "def remove_missing_embedding_cells(adata: anndata.AnnData, emb_names: List[str]):\n",
     "    \"\"\"Embeddings with missing data contain all NaN,\n",
     "    so we must find the intersection of non-NaN rows in the fetched embeddings\n",
     "    and subset the AnnData accordingly.\n",
@@ -133,7 +133,7 @@
     "CENSUS_VERSION = \"2023-12-15\"\n",
     "EXPERIMENT_NAME = \"homo_sapiens\"\n",
     "\n",
-    "# These are embeddings avialable to this Census version\n",
+    "# These are embeddings available to this Census version\n",
     "embedding_names = [\"geneformer\", \"scvi\", \"scgpt\", \"uce\"]"
    ]
   },
@@ -176,7 +176,7 @@
     "# Let's subset to 150K\n",
     "n_subset_cells = 150000\n",
     "\n",
-    "print(\"Selecting \", n_subset_cells, \" random cells\")\n",
+    "print(\"Selecting\", n_subset_cells, \"random cells\")\n",
     "idx_rand = np.random.choice(obs_df.shape[0], size=n_subset_cells, replace=False)\n",
     "soma_joinids_subset = obs_df[\"soma_joinid\"].values[idx_rand].tolist()"
    ]
@@ -207,7 +207,7 @@
     "    obs_embeddings=embedding_names,\n",
     ")\n",
     "\n",
-    "adata = remove_missing_embbeding_cells(adata, embedding_names)\n",
+    "adata = remove_missing_embedding_cells(adata, embedding_names)\n",
     "adata = generate_umaps_from_embeddings(adata, embedding_names)"
    ]
   },
@@ -616,7 +616,7 @@
     "    obs_embeddings=embedding_names,\n",
     ")\n",
     "\n",
-    "adata = remove_missing_embbeding_cells(adata, embedding_names)\n",
+    "adata = remove_missing_embedding_cells(adata, embedding_names)\n",
     "adata = generate_umaps_from_embeddings(adata, embedding_names)"
    ]
   },
@@ -729,7 +729,7 @@
     "    obs_embeddings=embedding_names,\n",
     ")\n",
     "\n",
-    "adata = remove_missing_embbeding_cells(adata, embedding_names)\n",
+    "adata = remove_missing_embedding_cells(adata, embedding_names)\n",
     "adata = generate_umaps_from_embeddings(adata, embedding_names)"
    ]
   },