diff --git a/docs/scrna2.ipynb b/docs/scrna2.ipynb index c5e6fbf..677901e 100644 --- a/docs/scrna2.ipynb +++ b/docs/scrna2.ipynb @@ -193,8 +193,20 @@ "metadata": {}, "source": [ "None of the cell type names are valid.\n", - "\n", - "We'll now search the public ontology and add the name found in the dataset as a synonym to the top match found in the public ontology." + "We'll now look up the non-validated cell types using the values of the public ontology and create a mapping." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "hide-output" + ] + }, + "outputs": [], + "source": [ + "curator.non_validated[\"cell_type_untrusted\"]" ] }, { @@ -207,23 +219,25 @@ }, "outputs": [], "source": [ - "bionty = bt.CellType.public() # access the public ontology through bionty\n", - "name_mapping = {}\n", - "for invalid_name in adata_validated.obs[\"cell_type_untrusted\"].unique():\n", - " ontology_id = bionty.search(invalid_name).index[0] # top search hit through iloc[0]\n", - " record = bt.CellType.from_source(ontology_id=ontology_id)\n", - " name_mapping[invalid_name] = record.name # map the original name to standardized name\n", - " record.save()\n", - " # record.add_synonym(name) # optionally save the invalid name as synonym so that it becomes searchable\n", - "# print the mapping\n", - "print(name_mapping)" + "ct_public_lo = bt.CellType.public().lookup()\n", + "name_mapping = {\n", + " \"Dendritic cells\": ct_public_lo.dendritic_cell.name,\n", + " \"CD19+ B\": ct_public_lo.b_cell_cd19_positive.name,\n", + " \"CD4+/CD45RO+ Memory\": ct_public_lo.effector_memory_cd45ra_positive_alpha_beta_t_cell_terminally_differentiated.name,\n", + " \"CD8+ Cytotoxic T\": ct_public_lo.cd8_positive_alpha_beta_cytotoxic_t_cell.name,\n", + " \"CD4+/CD25 T Reg\": ct_public_lo.cd4_positive_cd25_positive_alpha_beta_regulatory_t_cell.name,\n", + " \"CD14+ Monocytes\": ct_public_lo.cd14_positive_monocyte.name,\n", + " \"CD56+ NK\": ct_public_lo.cd56_positive_cd161_positive_immature_natural_killer_cell_human.name,\n", + " \"CD8+/CD45RA+ Naive Cytotoxic\": ct_public_lo.cd8_positive_alpha_beta_memory_t_cell_cd45ro_positive.name,\n", + " \"CD34+\": ct_public_lo.cd34_positive_cd56_positive_cd117_positive_common_innate_lymphoid_precursor_human.name\n", + "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "We can now standardize cell type names using the search-based mapper:" + "We can now standardize cell type names using the lookup-based mapper:" ] }, { @@ -476,7 +490,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "lamindb", "language": "python", "name": "python3" }, @@ -490,7 +504,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.11.7" }, "nbproject": { "id": "ManDYgmftZ8C",