From 006395b34034f0a19090767cc59eabf2f9e7b8f8 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 19:04:36 -0800 Subject: [PATCH 01/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/mouse_thymus_cite_totalvi with DVC --- data/.gitignore | 1 + data/mouse_thymus_cite_totalvi.dvc | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 data/mouse_thymus_cite_totalvi.dvc diff --git a/data/.gitignore b/data/.gitignore index cfffa51..c29c7d1 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -1 +1,2 @@ /mouse_thymus_cite.h5mu +/mouse_thymus_cite_totalvi diff --git a/data/mouse_thymus_cite_totalvi.dvc b/data/mouse_thymus_cite_totalvi.dvc new file mode 100644 index 0000000..76f534a --- /dev/null +++ b/data/mouse_thymus_cite_totalvi.dvc @@ -0,0 +1,6 @@ +outs: +- md5: d153ca5de73d12ff2ce714dc41451fcc.dir + size: 72953633 + nfiles: 1 + hash: md5 + path: mouse_thymus_cite_totalvi From 2b942c2a05166422df1535dc0627745dbc5b4046 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 19:10:38 -0800 Subject: [PATCH 02/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/mouse_thymus_cite_totalvi with DVC --- data/mouse_thymus_cite_totalvi.dvc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/mouse_thymus_cite_totalvi.dvc b/data/mouse_thymus_cite_totalvi.dvc index 76f534a..2b5c362 100644 --- a/data/mouse_thymus_cite_totalvi.dvc +++ b/data/mouse_thymus_cite_totalvi.dvc @@ -1,5 +1,5 @@ outs: -- md5: d153ca5de73d12ff2ce714dc41451fcc.dir +- md5: ca64a2dc33acb804857624547930db8f.dir size: 72953633 nfiles: 1 hash: md5 From 2706c387c3ae828c61eb0c25da4c639d7cdeb181 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 19:36:49 -0800 Subject: [PATCH 03/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/mouse_thymus_cite.h5mu with DVC --- data/mouse_thymus_cite.h5mu.dvc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/mouse_thymus_cite.h5mu.dvc b/data/mouse_thymus_cite.h5mu.dvc index 4b19806..158441c 100644 --- a/data/mouse_thymus_cite.h5mu.dvc +++ b/data/mouse_thymus_cite.h5mu.dvc @@ -1,5 +1,5 @@ outs: -- md5: 0932a99a0e1571da676b2cc4881d189a - size: 6628168757 +- md5: 22b9936c775f8121d02e6234eef28623 + size: 5073938312 hash: md5 path: mouse_thymus_cite.h5mu From 4dcf01e62f168ff470cf8490b218ab7f41ef12d1 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 19:38:23 -0800 Subject: [PATCH 04/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/mouse_thymus_cite_totalvi with DVC --- data/mouse_thymus_cite_totalvi.dvc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/mouse_thymus_cite_totalvi.dvc b/data/mouse_thymus_cite_totalvi.dvc index 2b5c362..d773a33 100644 --- a/data/mouse_thymus_cite_totalvi.dvc +++ b/data/mouse_thymus_cite_totalvi.dvc @@ -1,6 +1,6 @@ outs: -- md5: ca64a2dc33acb804857624547930db8f.dir - size: 72953633 +- md5: 7a75a6f2be0db755e093c54a6959c810.dir + size: 19740513 nfiles: 1 hash: md5 path: mouse_thymus_cite_totalvi From 65feb57b042c62e6f633b71c2e97a4275460212a Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 19:41:48 -0800 Subject: [PATCH 05/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/haniffa_covid_pbmc.h5mu with DVC --- data/.gitignore | 1 + data/haniffa_covid_pbmc.h5mu.dvc | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 data/haniffa_covid_pbmc.h5mu.dvc diff --git a/data/.gitignore b/data/.gitignore index c29c7d1..5b7cde7 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -1,2 +1,3 @@ /mouse_thymus_cite.h5mu /mouse_thymus_cite_totalvi +/haniffa_covid_pbmc.h5mu diff --git a/data/haniffa_covid_pbmc.h5mu.dvc b/data/haniffa_covid_pbmc.h5mu.dvc new file mode 100644 index 0000000..93b8cb5 --- /dev/null +++ b/data/haniffa_covid_pbmc.h5mu.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 4e811c9890e6865ba5d6b90407893fe3 + size: 14964638351 + hash: md5 + path: haniffa_covid_pbmc.h5mu From 29d1322af9a6c820a32953ccc97037f7b8317abc Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 19:46:13 -0800 Subject: [PATCH 06/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/mouse_thymus_cite_totalvi with DVC --- data/mouse_thymus_cite_totalvi.dvc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/mouse_thymus_cite_totalvi.dvc b/data/mouse_thymus_cite_totalvi.dvc index d773a33..73961bc 100644 --- a/data/mouse_thymus_cite_totalvi.dvc +++ b/data/mouse_thymus_cite_totalvi.dvc @@ -1,6 +1,6 @@ outs: -- md5: 7a75a6f2be0db755e093c54a6959c810.dir - size: 19740513 +- md5: 45440ebf423731ce3fde73e9aa1efeff.dir + size: 19740577 nfiles: 1 hash: md5 path: mouse_thymus_cite_totalvi From 06738a2b5559fe8baf34460aa2cd484d42ac979b Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 19:56:20 -0800 Subject: [PATCH 07/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/mouse_thymus_cite_totalvi with DVC --- data/mouse_thymus_cite_totalvi.dvc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/mouse_thymus_cite_totalvi.dvc b/data/mouse_thymus_cite_totalvi.dvc index 73961bc..55581b0 100644 --- a/data/mouse_thymus_cite_totalvi.dvc +++ b/data/mouse_thymus_cite_totalvi.dvc @@ -1,5 +1,5 @@ outs: -- md5: 45440ebf423731ce3fde73e9aa1efeff.dir +- md5: 45714ffeb6515fd005fcdce47796fdc4.dir size: 19740577 nfiles: 1 hash: md5 From fbfdfc4d3c9e4a009c6f4b783990f2acc7e92e67 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 19:59:34 -0800 Subject: [PATCH 08/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/mouse_thymus_cite_totalvi with DVC --- data/mouse_thymus_cite_totalvi.dvc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/mouse_thymus_cite_totalvi.dvc b/data/mouse_thymus_cite_totalvi.dvc index 55581b0..705a4ee 100644 --- a/data/mouse_thymus_cite_totalvi.dvc +++ b/data/mouse_thymus_cite_totalvi.dvc @@ -1,5 +1,5 @@ outs: -- md5: 45714ffeb6515fd005fcdce47796fdc4.dir +- md5: e22d18251deccac94275ce60325dcec1.dir size: 19740577 nfiles: 1 hash: md5 From 8c540ada5f08d6980122381603093bfa21711d65 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 21:20:54 -0800 Subject: [PATCH 09/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/neurips_bone_marrow_cite.h5mu with DVC --- data/.gitignore | 1 + data/neurips_bone_marrow_cite.h5mu.dvc | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 data/neurips_bone_marrow_cite.h5mu.dvc diff --git a/data/.gitignore b/data/.gitignore index 5b7cde7..a5468ae 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -1,3 +1,4 @@ /mouse_thymus_cite.h5mu /mouse_thymus_cite_totalvi /haniffa_covid_pbmc.h5mu +/neurips_bone_marrow_cite.h5mu diff --git a/data/neurips_bone_marrow_cite.h5mu.dvc b/data/neurips_bone_marrow_cite.h5mu.dvc new file mode 100644 index 0000000..aa21696 --- /dev/null +++ b/data/neurips_bone_marrow_cite.h5mu.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 43c9f25f3ed5cd338df4c74f6a8bf64c + size: 914992298 + hash: md5 + path: neurips_bone_marrow_cite.h5mu From 0e1847ab89cdc9fb1f9e618e976acddfb2f40b59 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 22:06:48 -0800 Subject: [PATCH 10/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/neurips_bone_marrow_cite.h5mu with DVC --- data/neurips_bone_marrow_cite.h5mu.dvc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/neurips_bone_marrow_cite.h5mu.dvc b/data/neurips_bone_marrow_cite.h5mu.dvc index aa21696..1071bd7 100644 --- a/data/neurips_bone_marrow_cite.h5mu.dvc +++ b/data/neurips_bone_marrow_cite.h5mu.dvc @@ -1,5 +1,5 @@ outs: -- md5: 43c9f25f3ed5cd338df4c74f6a8bf64c - size: 914992298 +- md5: 3f68254232dfe342a45a612684f36d92 + size: 881794010 hash: md5 path: neurips_bone_marrow_cite.h5mu From b2b444f4d37be3c165a570d1e9e1eb68971820a3 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 22:31:09 -0800 Subject: [PATCH 11/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/heart_cell_atlas.h5mu with DVC --- data/.gitignore | 1 + data/heart_cell_atlas.h5mu.dvc | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 data/heart_cell_atlas.h5mu.dvc diff --git a/data/.gitignore b/data/.gitignore index a5468ae..d584f9e 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -2,3 +2,4 @@ /mouse_thymus_cite_totalvi /haniffa_covid_pbmc.h5mu /neurips_bone_marrow_cite.h5mu +/heart_cell_atlas.h5mu diff --git a/data/heart_cell_atlas.h5mu.dvc b/data/heart_cell_atlas.h5mu.dvc new file mode 100644 index 0000000..ae6ed94 --- /dev/null +++ b/data/heart_cell_atlas.h5mu.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 767bf9739d979b6a808164528f70802b + size: 28528362 + hash: md5 + path: heart_cell_atlas.h5mu From 9f220c8c16ae5fa00a74bcfa7e0ffbaf59ca4bfb Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 22:32:14 -0800 Subject: [PATCH 12/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/bone_marrow_cite_totalvi with DVC --- data/.gitignore | 1 + data/bone_marrow_cite_totalvi.dvc | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 data/bone_marrow_cite_totalvi.dvc diff --git a/data/.gitignore b/data/.gitignore index d584f9e..b042d58 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -3,3 +3,4 @@ /haniffa_covid_pbmc.h5mu /neurips_bone_marrow_cite.h5mu /heart_cell_atlas.h5mu +/bone_marrow_cite_totalvi diff --git a/data/bone_marrow_cite_totalvi.dvc b/data/bone_marrow_cite_totalvi.dvc new file mode 100644 index 0000000..c204929 --- /dev/null +++ b/data/bone_marrow_cite_totalvi.dvc @@ -0,0 +1,6 @@ +outs: +- md5: d9b1e348df00254a404d5f53a5e10cbd.dir + size: 19834273 + nfiles: 1 + hash: md5 + path: bone_marrow_cite_totalvi From 5f98697c415c2609c83c9284d7f854321d385cc8 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 22:34:45 -0800 Subject: [PATCH 13/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/heart_cell_atlas_scvi with DVC --- data/.gitignore | 1 + data/heart_cell_atlas_scvi.dvc | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 data/heart_cell_atlas_scvi.dvc diff --git a/data/.gitignore b/data/.gitignore index b042d58..0567b00 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -4,3 +4,4 @@ /neurips_bone_marrow_cite.h5mu /heart_cell_atlas.h5mu /bone_marrow_cite_totalvi +/heart_cell_atlas_scvi diff --git a/data/heart_cell_atlas_scvi.dvc b/data/heart_cell_atlas_scvi.dvc new file mode 100644 index 0000000..65f889f --- /dev/null +++ b/data/heart_cell_atlas_scvi.dvc @@ -0,0 +1,6 @@ +outs: +- md5: 5d7b4318da7edea125a8ed6369c1e44a.dir + size: 3371610 + nfiles: 1 + hash: md5 + path: heart_cell_atlas_scvi From 512458281f30eb4dd11e157021faaaa255c44318 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 22:38:12 -0800 Subject: [PATCH 14/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/heart_cell_atlas.h5ad with DVC --- data/.gitignore | 1 + data/heart_cell_atlas.h5ad.dvc | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 data/heart_cell_atlas.h5ad.dvc diff --git a/data/.gitignore b/data/.gitignore index 0567b00..5b771c8 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -5,3 +5,4 @@ /heart_cell_atlas.h5mu /bone_marrow_cite_totalvi /heart_cell_atlas_scvi +/heart_cell_atlas.h5ad diff --git a/data/heart_cell_atlas.h5ad.dvc b/data/heart_cell_atlas.h5ad.dvc new file mode 100644 index 0000000..62c9c14 --- /dev/null +++ b/data/heart_cell_atlas.h5ad.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 767bf9739d979b6a808164528f70802b + size: 28528362 + hash: md5 + path: heart_cell_atlas.h5ad From 6b8b19eca19517a316763f50fbcada3e2a82a30d Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 22:38:24 -0800 Subject: [PATCH 15/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/hlca_core.h5ad with DVC --- data/.gitignore | 1 + data/hlca_core.h5ad.dvc | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 data/hlca_core.h5ad.dvc diff --git a/data/.gitignore b/data/.gitignore index 5b771c8..96c5f84 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -6,3 +6,4 @@ /bone_marrow_cite_totalvi /heart_cell_atlas_scvi /heart_cell_atlas.h5ad +/hlca_core.h5ad diff --git a/data/hlca_core.h5ad.dvc b/data/hlca_core.h5ad.dvc new file mode 100644 index 0000000..2da940e --- /dev/null +++ b/data/hlca_core.h5ad.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 923ab873d03cff49cfd53a9063e6c1ed + size: 1940424488 + hash: md5 + path: hlca_core.h5ad From 93343a6fbac73d1c4622b0b5a32f653677ec49cd Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 22:41:22 -0800 Subject: [PATCH 16/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/heart_cell_atlas_scvi with DVC --- data/heart_cell_atlas_scvi.dvc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/heart_cell_atlas_scvi.dvc b/data/heart_cell_atlas_scvi.dvc index 65f889f..d9d9f69 100644 --- a/data/heart_cell_atlas_scvi.dvc +++ b/data/heart_cell_atlas_scvi.dvc @@ -1,5 +1,5 @@ outs: -- md5: 5d7b4318da7edea125a8ed6369c1e44a.dir +- md5: 75670e08aee1039bc163245eb6fc2152.dir size: 3371610 nfiles: 1 hash: md5 From 0d8f2c3481bd4047c84714b1db49dd6ee288532a Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 22:48:55 -0800 Subject: [PATCH 17/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/hlca_reference_scanvi with DVC --- data/.gitignore | 1 + data/hlca_reference_scanvi.dvc | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 data/hlca_reference_scanvi.dvc diff --git a/data/.gitignore b/data/.gitignore index 96c5f84..18e3eb5 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -7,3 +7,4 @@ /heart_cell_atlas_scvi /heart_cell_atlas.h5ad /hlca_core.h5ad +/hlca_reference_scanvi diff --git a/data/hlca_reference_scanvi.dvc b/data/hlca_reference_scanvi.dvc new file mode 100644 index 0000000..b897e1b --- /dev/null +++ b/data/hlca_reference_scanvi.dvc @@ -0,0 +1,6 @@ +outs: +- md5: 5c77b351ae8e005c3180d45cf1281586.dir + size: 5825950 + nfiles: 1 + hash: md5 + path: hlca_reference_scanvi From ead797a9cf122713edfb50132a2af984cc442aa0 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 22:55:51 -0800 Subject: [PATCH 18/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/test_data.h5ad with DVC --- data/.gitignore | 1 + data/test_data.h5ad.dvc | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 data/test_data.h5ad.dvc diff --git a/data/.gitignore b/data/.gitignore index 18e3eb5..02c6c4a 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -8,3 +8,4 @@ /heart_cell_atlas.h5ad /hlca_core.h5ad /hlca_reference_scanvi +/test_data.h5ad diff --git a/data/test_data.h5ad.dvc b/data/test_data.h5ad.dvc new file mode 100644 index 0000000..867a3fb --- /dev/null +++ b/data/test_data.h5ad.dvc @@ -0,0 +1,5 @@ +outs: +- md5: c953ede658081b53caa02ce334156c92 + size: 1008864 + hash: md5 + path: test_data.h5ad From c9a6269939e012b9dd10b81a28bb3fc203477840 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 22:56:05 -0800 Subject: [PATCH 19/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/test_scvi with DVC --- data/.gitignore | 1 + data/test_scvi.dvc | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 data/test_scvi.dvc diff --git a/data/.gitignore b/data/.gitignore index 02c6c4a..95b9e20 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -9,3 +9,4 @@ /hlca_core.h5ad /hlca_reference_scanvi /test_data.h5ad +/test_scvi diff --git a/data/test_scvi.dvc b/data/test_scvi.dvc new file mode 100644 index 0000000..dcb37e1 --- /dev/null +++ b/data/test_scvi.dvc @@ -0,0 +1,6 @@ +outs: +- md5: 96c9aa4e2b723743f2dbc22a017f0ebe.dir + size: 309658 + nfiles: 1 + hash: md5 + path: test_scvi From 5cb48edff2ec55aee093944b74f78151c6be7256 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 22:59:54 -0800 Subject: [PATCH 20/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/test_data.h5ad with DVC --- data/test_data.h5ad.dvc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/test_data.h5ad.dvc b/data/test_data.h5ad.dvc index 867a3fb..adfc425 100644 --- a/data/test_data.h5ad.dvc +++ b/data/test_data.h5ad.dvc @@ -1,5 +1,5 @@ outs: -- md5: c953ede658081b53caa02ce334156c92 +- md5: 91bb92467e4f291e0ba0c1663dfabc96 size: 1008864 hash: md5 path: test_data.h5ad From 891514a1759ec475b903aaa6aa5bf3d4d0aee412 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 23:00:10 -0800 Subject: [PATCH 21/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/test_scvi with DVC --- data/test_scvi.dvc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/test_scvi.dvc b/data/test_scvi.dvc index dcb37e1..822211b 100644 --- a/data/test_scvi.dvc +++ b/data/test_scvi.dvc @@ -1,5 +1,5 @@ outs: -- md5: 96c9aa4e2b723743f2dbc22a017f0ebe.dir +- md5: 0155f7292347d68142c70584a7607eae.dir size: 309658 nfiles: 1 hash: md5 From 0b9cfa44f5c74f046eb58769a3ad6b71c18aa81e Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 23:05:50 -0800 Subject: [PATCH 22/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/neurips_bone_marrow_cite.h5mu with DVC From 78681a123a9ee57bc674e7281f03ba3c203754ab Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 23:21:23 -0800 Subject: [PATCH 23/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/mouse_thymus_cite.h5mu with DVC From 4032ac5dd7408b3a1fefaa6e4f325ed80036560e Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 23:30:17 -0800 Subject: [PATCH 24/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/bone_marrow_cite_totalvi with DVC --- data/bone_marrow_cite_totalvi.dvc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/bone_marrow_cite_totalvi.dvc b/data/bone_marrow_cite_totalvi.dvc index c204929..892e55c 100644 --- a/data/bone_marrow_cite_totalvi.dvc +++ b/data/bone_marrow_cite_totalvi.dvc @@ -1,5 +1,5 @@ outs: -- md5: d9b1e348df00254a404d5f53a5e10cbd.dir +- md5: 731bf03911d0a1cd4188d9e9d8d4aca0.dir size: 19834273 nfiles: 1 hash: md5 From 00e86632dfa26c8357ba226fd3a4dea84a2a8d08 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Tue, 10 Dec 2024 23:41:58 -0800 Subject: [PATCH 25/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/mouse_thymus_cite_totalvi with DVC --- data/mouse_thymus_cite_totalvi.dvc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/mouse_thymus_cite_totalvi.dvc b/data/mouse_thymus_cite_totalvi.dvc index 705a4ee..d0f1cbe 100644 --- a/data/mouse_thymus_cite_totalvi.dvc +++ b/data/mouse_thymus_cite_totalvi.dvc @@ -1,6 +1,6 @@ outs: -- md5: e22d18251deccac94275ce60325dcec1.dir - size: 19740577 +- md5: 9bf5749a13af50d972b5f23a0c272095.dir + size: 19781665 nfiles: 1 hash: md5 path: mouse_thymus_cite_totalvi From e0474958e88ddf6097ca73055ac7950f10456a17 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Wed, 11 Dec 2024 00:12:17 -0800 Subject: [PATCH 26/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/neurips_bone_marrow_cite.h5mu with DVC From 4a5827e0bfb2170e9c580bd31d3d2ddb15121ea9 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Wed, 11 Dec 2024 00:31:28 -0800 Subject: [PATCH 27/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/mouse_thymus_cite.h5mu with DVC --- data/mouse_thymus_cite.h5mu.dvc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/mouse_thymus_cite.h5mu.dvc b/data/mouse_thymus_cite.h5mu.dvc index 158441c..c8acdaf 100644 --- a/data/mouse_thymus_cite.h5mu.dvc +++ b/data/mouse_thymus_cite.h5mu.dvc @@ -1,5 +1,5 @@ outs: -- md5: 22b9936c775f8121d02e6234eef28623 - size: 5073938312 +- md5: 3bc874646f7fabd1a41be6c0dabe8402 + size: 3565090008 hash: md5 path: mouse_thymus_cite.h5mu From af4e79a3b07d6b576ce8e0710b0dd39977f69e9c Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Wed, 11 Dec 2024 00:34:33 -0800 Subject: [PATCH 28/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/bone_marrow_cite_totalvi with DVC --- data/bone_marrow_cite_totalvi.dvc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/bone_marrow_cite_totalvi.dvc b/data/bone_marrow_cite_totalvi.dvc index 892e55c..4996c71 100644 --- a/data/bone_marrow_cite_totalvi.dvc +++ b/data/bone_marrow_cite_totalvi.dvc @@ -1,5 +1,5 @@ outs: -- md5: 731bf03911d0a1cd4188d9e9d8d4aca0.dir +- md5: 78ef07339eacb072b5adcbee125c9acc.dir size: 19834273 nfiles: 1 hash: md5 From e3d29a61bc69f9ec61b1ec4259a93bd621cfa308 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Wed, 11 Dec 2024 08:44:24 -0800 Subject: [PATCH 29/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/mouse_thymus_cite.h5mu with DVC From 8a1b5d7462e0fe415a077975a08a0d9c2fd3d53c Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Wed, 11 Dec 2024 09:01:48 -0800 Subject: [PATCH 30/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/mouse_thymus_cite_totalvi with DVC --- data/mouse_thymus_cite_totalvi.dvc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/mouse_thymus_cite_totalvi.dvc b/data/mouse_thymus_cite_totalvi.dvc index d0f1cbe..cd0a2d0 100644 --- a/data/mouse_thymus_cite_totalvi.dvc +++ b/data/mouse_thymus_cite_totalvi.dvc @@ -1,5 +1,5 @@ outs: -- md5: 9bf5749a13af50d972b5f23a0c272095.dir +- md5: f515e7546f9d4c18be2ab57aadcd3114.dir size: 19781665 nfiles: 1 hash: md5 From 8d660a49b861b99ba976247c1b2186b41c7c673a Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Wed, 11 Dec 2024 10:47:09 -0800 Subject: [PATCH 31/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/mouse_thymus_cite.h5mu with DVC --- data/mouse_thymus_cite.h5mu.dvc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/mouse_thymus_cite.h5mu.dvc b/data/mouse_thymus_cite.h5mu.dvc index c8acdaf..4e07b40 100644 --- a/data/mouse_thymus_cite.h5mu.dvc +++ b/data/mouse_thymus_cite.h5mu.dvc @@ -1,5 +1,5 @@ outs: -- md5: 3bc874646f7fabd1a41be6c0dabe8402 - size: 3565090008 +- md5: c6881ba87a9b9234be11cbebd9bef3c0 + size: 3514139984 hash: md5 path: mouse_thymus_cite.h5mu From 172b28c76cae620c9c4f825f5cd337377730432b Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Wed, 11 Dec 2024 10:50:15 -0800 Subject: [PATCH 32/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/haniffa_covid_pbmc.h5mu with DVC --- data/haniffa_covid_pbmc.h5mu.dvc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/haniffa_covid_pbmc.h5mu.dvc b/data/haniffa_covid_pbmc.h5mu.dvc index 93b8cb5..2181c18 100644 --- a/data/haniffa_covid_pbmc.h5mu.dvc +++ b/data/haniffa_covid_pbmc.h5mu.dvc @@ -1,5 +1,5 @@ outs: -- md5: 4e811c9890e6865ba5d6b90407893fe3 - size: 14964638351 +- md5: 1dd19cfb0c6293275551bbdbdf00e309 + size: 13680956370 hash: md5 path: haniffa_covid_pbmc.h5mu From b9e48329e766fac8d442bbfda20a9f04146b0588 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Wed, 11 Dec 2024 10:56:26 -0800 Subject: [PATCH 33/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/mouse_thymus_cite.h5mu with DVC --- data/mouse_thymus_cite.h5mu.dvc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/mouse_thymus_cite.h5mu.dvc b/data/mouse_thymus_cite.h5mu.dvc index 4e07b40..a8cc920 100644 --- a/data/mouse_thymus_cite.h5mu.dvc +++ b/data/mouse_thymus_cite.h5mu.dvc @@ -1,5 +1,5 @@ outs: -- md5: c6881ba87a9b9234be11cbebd9bef3c0 - size: 3514139984 +- md5: 27ae88708a0e2e42591f8665601ca915 + size: 1807038906 hash: md5 path: mouse_thymus_cite.h5mu From 19eda9b174940c67db10ef70311516af5bc84fac Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Wed, 11 Dec 2024 11:17:07 -0800 Subject: [PATCH 34/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/mouse_thymus_cite_totalvi with DVC --- data/mouse_thymus_cite_totalvi.dvc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/mouse_thymus_cite_totalvi.dvc b/data/mouse_thymus_cite_totalvi.dvc index cd0a2d0..a9c9b76 100644 --- a/data/mouse_thymus_cite_totalvi.dvc +++ b/data/mouse_thymus_cite_totalvi.dvc @@ -1,5 +1,5 @@ outs: -- md5: f515e7546f9d4c18be2ab57aadcd3114.dir +- md5: a2e5acb9b535c112698ea6cad43a6a95.dir size: 19781665 nfiles: 1 hash: md5 From 30513023da7f46b2a827e1bc4875dcd0ef79191c Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Wed, 11 Dec 2024 12:06:20 -0800 Subject: [PATCH 35/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/haniffa_covid_pbmc with DVC --- data/.gitignore | 1 + data/haniffa_covid_pbmc.dvc | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 data/haniffa_covid_pbmc.dvc diff --git a/data/.gitignore b/data/.gitignore index 95b9e20..ad2831b 100644 --- a/data/.gitignore +++ b/data/.gitignore @@ -10,3 +10,4 @@ /hlca_reference_scanvi /test_data.h5ad /test_scvi +/haniffa_covid_pbmc diff --git a/data/haniffa_covid_pbmc.dvc b/data/haniffa_covid_pbmc.dvc new file mode 100644 index 0000000..ec69365 --- /dev/null +++ b/data/haniffa_covid_pbmc.dvc @@ -0,0 +1,6 @@ +outs: +- md5: 9727ed6842ec135c97bdaf9fe9475a10.dir + size: 30431585 + nfiles: 1 + hash: md5 + path: haniffa_covid_pbmc From 8be50faf4d558ab584dd97308fa742a3a8f12bc8 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Wed, 11 Dec 2024 13:16:21 -0800 Subject: [PATCH 36/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/haniffa_covid_pbmc.h5mu with DVC From 84bc5391c2a37dd92997a05146781ac9442196f7 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Wed, 11 Dec 2024 13:57:29 -0800 Subject: [PATCH 37/38] Track /home/cane/Documents/scvi-tools/scvi-hub-models/data/haniffa_covid_pbmc.h5mu with DVC --- data/haniffa_covid_pbmc.h5mu.dvc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/haniffa_covid_pbmc.h5mu.dvc b/data/haniffa_covid_pbmc.h5mu.dvc index 2181c18..3663bfe 100644 --- a/data/haniffa_covid_pbmc.h5mu.dvc +++ b/data/haniffa_covid_pbmc.h5mu.dvc @@ -1,5 +1,5 @@ outs: -- md5: 1dd19cfb0c6293275551bbdbdf00e309 - size: 13680956370 +- md5: fcb4c382ba3695eba206c4714fca3d45 + size: 9357617754 hash: md5 path: haniffa_covid_pbmc.h5mu From 7a5886ecd6d4f5d0948a162c8d156e3a625defe1 Mon Sep 17 00:00:00 2001 From: Can Ergen Date: Wed, 11 Dec 2024 21:57:02 -0800 Subject: [PATCH 38/38] Revision done. --- src/scvi_hub_models/__main__.py | 12 +- .../config/haniffa_covid_pbmc.json | 5 +- .../config/heart_cell_atlas.json | 4 + .../config/human_lung_cell_atlas.json | 5 +- .../config/mouse_thymus_cite.json | 7 +- src/scvi_hub_models/config/neurips_cite.json | 7 +- src/scvi_hub_models/config/test_scvi.json | 4 +- src/scvi_hub_models/models/_base_workflow.py | 184 +++--- .../models/_haniffa_covid_pbmc.py | 31 +- .../models/_heart_cell_atlas.py | 9 +- .../models/_human_lung_cell_atlas.py | 17 +- .../models/_mouse_thymus_cite.py | 31 +- src/scvi_hub_models/models/_neurips_cite.py | 49 +- src/scvi_hub_models/models/_test_scvi.py | 27 +- src/scvi_hub_models/test.ipynb | 603 ++++++++++++++++++ 15 files changed, 821 insertions(+), 174 deletions(-) create mode 100644 src/scvi_hub_models/test.ipynb diff --git a/src/scvi_hub_models/__main__.py b/src/scvi_hub_models/__main__.py index f39cf3e..dc083eb 100644 --- a/src/scvi_hub_models/__main__.py +++ b/src/scvi_hub_models/__main__.py @@ -12,7 +12,15 @@ @click.option("--dry_run", type=bool, default=False, help="Dry run the workflow.") @click.option("--config_key", type=str, help="Use a different config file, e.g. for test purpose.") @click.option("--save_dir", type=str, help="Directory to save intermediate results (defaults temporary).") -def run_workflow(model_name: str, dry_run: bool, config_key: str = None, save_dir: str = None) -> None: +@click.option("--reload_data", type=bool, help="Reload the data or get from DVC.") +@click.option("--reload_model", type=bool, help="Reload the model or get from DVC.") +def run_workflow( + model_name: str, + dry_run: bool, + config_key: str = None, + save_dir: str = None, + reload_data: bool = False, + reload_model: bool = False) -> None: """Run the workflow for a specific model.""" from importlib import import_module if not config_key: @@ -22,7 +30,7 @@ def run_workflow(model_name: str, dry_run: bool, config_key: str = None, save_di Workflow = workflow_module._Workflow config = json_data_store[config_key] - workflow = Workflow(save_dir=save_dir, dry_run=dry_run, config=config) + workflow = Workflow(save_dir=save_dir, dry_run=dry_run, config=config, reload_data=reload_data, reload_model=reload_model) workflow.run() diff --git a/src/scvi_hub_models/config/haniffa_covid_pbmc.json b/src/scvi_hub_models/config/haniffa_covid_pbmc.json index fe5bb16..12c9e50 100644 --- a/src/scvi_hub_models/config/haniffa_covid_pbmc.json +++ b/src/scvi_hub_models/config/haniffa_covid_pbmc.json @@ -2,9 +2,11 @@ "model_dir": "haniffa_covid_pbmc", "model_class": "TOTALVI", "repo_name": "scvi-tools/haniffa_covid_pbmc_totalvi", + "reload_data": true, "extra_data_kwargs": { "reference_adata_cxg_id": "c7775e88-49bf-4ba2-a03b-93f00447c958", - "reference_adata_fname": "haniffa_covid_pbmc.h5ad" + "reference_adata_fname": "haniffa_covid_pbmc.h5ad", + "large_training_file_name": "haniffa_covid_pbmc.h5mu" }, "metadata": { "training_data_url": "https://datasets.cellxgene.cziscience.com/5ad66a4f-d619-4cb3-8015-a87c755647b3.h5ad", @@ -16,6 +18,7 @@ "description": "CITE-seq to measure RNA and surface proteins in thymocytes from wild-type and T cell lineage-restricted mice to generate a comprehensive timeline of cell state for each T cell lineage.", "references": "Steier, Z., Aylard, D.A., McIntyre, L.L. et al. Single-cell multiomic analysis of thymocyte development reveals drivers of CD4+ T cell and CD8+ T cell lineage commitment. Nat Immunol 24, 1579–1590 (2023). https://doi.org/10.1038/s41590-023-01584-0." }, + "criticism_settings": { "n_samples": 3, "cell_type_key": "cell_type" diff --git a/src/scvi_hub_models/config/heart_cell_atlas.json b/src/scvi_hub_models/config/heart_cell_atlas.json index 71ee449..70a77f5 100644 --- a/src/scvi_hub_models/config/heart_cell_atlas.json +++ b/src/scvi_hub_models/config/heart_cell_atlas.json @@ -2,6 +2,10 @@ "model_dir": "heart_cell_atlas_scvi", "model_class": "SCVI", "repo_name": "scvi-tools/heart-cell-atlas-scvi", + "extra_data_kwargs": { + "reference_adata_fname": "heart_cell_atlas.h5ad", + "large_training_file_name": "heart_cell_atlas.h5ad" + }, "metadata": { "training_data_url": "https://www.heartcellatlas.org/#DataSources", "tissues": ["heart"], diff --git a/src/scvi_hub_models/config/human_lung_cell_atlas.json b/src/scvi_hub_models/config/human_lung_cell_atlas.json index 3054b4b..a26c61e 100644 --- a/src/scvi_hub_models/config/human_lung_cell_atlas.json +++ b/src/scvi_hub_models/config/human_lung_cell_atlas.json @@ -1,5 +1,5 @@ { - "model_dir": "hlca_scanvi_reference", + "model_dir": "hlca_reference_scanvi", "model_class": "SCANVI", "repo_name": "scvi-tools/human-lung-cell-atlas-scanvi", "extra_data_kwargs": { @@ -7,7 +7,8 @@ "legacy_model_hash": "a7cd60f4342292b3cba54545bcd8a34decdc8e6b82163f009273d543e7e3910e", "legacy_model_dir": "hlca_scanvi_reference_legacy", "reference_adata_cxg_id": "066943a2-fdac-4b29-b348-40cede398e4e", - "reference_adata_fname": "hlca_core.h5ad" + "reference_adata_fname": "hlca_core.h5ad", + "large_training_file_name": "hlca_core.h5ad" }, "metadata": { "training_data_url": "https://cellxgene.cziscience.com/collections/6f6d381a-7701-4781-935c-db10d30de293", diff --git a/src/scvi_hub_models/config/mouse_thymus_cite.json b/src/scvi_hub_models/config/mouse_thymus_cite.json index 06b590a..5c2ca42 100644 --- a/src/scvi_hub_models/config/mouse_thymus_cite.json +++ b/src/scvi_hub_models/config/mouse_thymus_cite.json @@ -1,11 +1,10 @@ { - "model_dir": "mouse_thymus_cite", + "model_dir": "mouse_thymus_cite_totalvi", "model_class": "TOTALVI", - "repo_name": "scvi-tools/mouse_thymus_totalvi", - "reload_data": true, + "repo_name": "scvi-tools/mouse_thymus_cite_totalvi", "extra_data_kwargs": { "reference_adata_cxg_id": "c14c54f8-85d8-45db-9de7-6ab572cc748a", - "reference_adata_fname": "thymus_cite.h5ad", + "reference_adata_fname": "mouse_thymus_cite.h5ad", "large_training_file_name": "mouse_thymus_cite.h5mu" }, "metadata": { diff --git a/src/scvi_hub_models/config/neurips_cite.json b/src/scvi_hub_models/config/neurips_cite.json index 8d7eec6..e8cf6e0 100644 --- a/src/scvi_hub_models/config/neurips_cite.json +++ b/src/scvi_hub_models/config/neurips_cite.json @@ -1,9 +1,12 @@ { - "model_dir": "bone_marrow_cite", + "model_dir": "bone_marrow_cite_totalvi", "model_class": "TOTALVI", "repo_name": "scvi-tools/bone_marrow_cite_totalvi", "extra_data_kwargs": { - "reference_adata_fname": "bmmc_cite.h5ad" + "url": "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE194122&format=file&file=GSE194122%5Fopenproblems%5Fneurips2021%5Fcite%5FBMMC%5Fprocessed%2Eh5ad%2Egz", + "hash": "b9b50fade9349719cba23c97c6515d3501a32ee3735fe95fe51221d2e8a5f361", + "reference_adata_fname": "bmmc_cite.h5ad.gz", + "large_training_file_name": "neurips_bone_marrow_cite.h5mu" }, "metadata": { "training_data_url": "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE194122&format=file&file=GSE194122%5Fopenproblems%5Fneurips2021%5Fcite%5FBMMC%5Fprocessed%2Eh5ad%2Egz", diff --git a/src/scvi_hub_models/config/test_scvi.json b/src/scvi_hub_models/config/test_scvi.json index 3ba0f83..104b9e1 100644 --- a/src/scvi_hub_models/config/test_scvi.json +++ b/src/scvi_hub_models/config/test_scvi.json @@ -2,9 +2,11 @@ "model_dir": "test_scvi", "model_class": "SCVI", "repo_name": "scvi-tools/test-scvi", + "extra_data_kwargs": { + "large_training_file_name": "test_data.h5ad" + }, "collection_name": "test", "minify_model": false, - "extra_data_kwargs": {}, "metadata": { "tissues": ["synthetic"], "data_modalities": ["rna"], diff --git a/src/scvi_hub_models/models/_base_workflow.py b/src/scvi_hub_models/models/_base_workflow.py index 08d19c8..0787577 100644 --- a/src/scvi_hub_models/models/_base_workflow.py +++ b/src/scvi_hub_models/models/_base_workflow.py @@ -14,28 +14,9 @@ from scvi.hub import HubMetadata, HubModel, HubModelCardHelper from scvi.model.base import BaseModelClass -import subprocess, os -from pydrive.auth import GoogleAuth -from pydrive.drive import GoogleDrive - - -def upload_gdrive(path): - remote_url = subprocess.check_output(["dvc", "remote", "list"], text=True).split()[-1] - - # Extract the Google Drive folder ID from the remote URL - folder_id = remote_url.split("gdrive://")[-1] - - # Check if DVC detected an update - if "modified" in subprocess.check_output(["dvc", "diff", "--json"], text=True): - GoogleAuth().LocalWebserverAuth() - drive = GoogleDrive(GoogleAuth()) - file = drive.CreateFile({"title": os.path.basename(path), "parents": [{"id": folder_id}]}) - file.SetContentFile(path) - file.Upload() - print(f"Uploaded {path} to Google Drive folder {folder_id}.") - # Specify your repository and target file -repo_path = "." + +repo_path = os.path.abspath(Path(__file__).parent.parent.parent.parent) dvc_repo = Repo(repo_path) git_repo = git.Repo(repo_path) @@ -67,17 +48,25 @@ class BaseModelWorkflow: config A :class:`~frozendict.frozendict` containing the configuration for the workflow. Can only be set once. + reload_data + If ``True``, the data will be reloaded. Otherwise, it will be pulled from DVC. Defaults to ``False``. + reload_model + If ``True``, the model will be reloaded. Otherwise, it will be pulled from DVC. Defaults to ``False``. """ def __init__( self, save_dir: str | None = None, dry_run: bool = False, - config: frozendict | None = None + config: frozendict | None = None, + reload_data: bool = True, + reload_model: bool = True, ): self.save_dir = save_dir self.dry_run = dry_run self.config = config + self.reload_data = reload_data + self.reload_model = reload_model @property def save_dir(self): @@ -114,22 +103,41 @@ def config(self, value: frozendict): value = frozendict(value) self._config = value + @property + def reload_data(self): + return self._reload_data + + @reload_data.setter + def reload_data(self, value: bool): + if hasattr(self, "_reload_data"): + raise AttributeError("`reload_data` can only be set once.") + self._reload_data = value + + @property + def reload_model(self): + return self._reload_model + + @reload_model.setter + def reload_model(self, value: bool): + if hasattr(self, "_reload_model"): + raise AttributeError("`reload_model` can only be set once.") + self._reload_model = value + def get_adata(self) -> anndata.AnnData | None: """Download and load the dataset.""" logger.info("Loading dataset.") if self.dry_run: return None - if self.config['reload_data']: - path_file = os.path.join('data/', self.config['extra_data_kwargs']['large_training_file_name']) - adata = self.download_adata() + if self.reload_data: + path_file = os.path.join(f'{repo_path}/data/', self.config['extra_data_kwargs']['large_training_file_name']) + print(path_file) + adata = self.download_adata(path_file) dvc_repo.add(path_file) git_repo.index.commit(f"Track {path_file} with DVC") - print(f"Pushing {path_file} to DVC remote...") dvc_repo.push() git_repo.remote().push() - upload_gdrive(path_file) else: - path_file = os.path.join('data/', self.config['extra_data_kwargs']['large_training_file_name']) + path_file = os.path.join(f'{repo_path}/data/', self.config['extra_data_kwargs']['large_training_file_name']) dvc_repo.pull([path_file]) if path_file.endswith(".h5mu"): adata = mudata.read_h5mu(path_file) @@ -137,34 +145,40 @@ def get_adata(self) -> anndata.AnnData | None: adata = anndata.read_h5ad(path_file) return adata - def _get_adata(self, url: str, hash: str, file_path: str) -> str: + def get_model(self, adata) -> BaseModelClass | None: + """Download and load the model.""" + logger.info("Loading model.") + if self.dry_run: + return None + if self.reload_model: + path_file = os.path.join(f'{repo_path}/data/', self.config['model_dir']) + model = self.load_model(adata) + model.save(path_file, overwrite=True, save_anndata=False) + dvc_repo.add(path_file) + git_repo.index.commit(f"Track {path_file} with DVC") + dvc_repo.push() + git_repo.remote().push() + else: + path_file = os.path.join(f'{repo_path}/data/', self.config['model_dir']) + dvc_repo.pull([path_file]) + model = self.default_load_model(adata, self.config['model_class'], path_file) + return model + + def _get_adata(self, url: str, hash: str, file_path: str, processor: str | None = None) -> str: logger.info("Downloading and reading data.") if self.dry_run: return None - retrieve( + file_out = retrieve( url=url, known_hash=hash, fname=file_path, path=self.save_dir, - processor=None, - ) - return anndata.read_h5ad(os.path.join(self.save_dir, file_path)) - - def _download_model(self, url: str, hash: str, file_path: str) -> str: - logger.info("Downloading model.") - if self.dry_run: - return None - - return retrieve( - url=url, # - known_hash=hash, #config["adata_hashes"][tissue], - fname=file_path, # f"{tissue}_adata.h5ad" - path=self.save_dir, - processor=None, + processor=processor, ) + return anndata.read_h5ad(file_out) - def _load_model(self, model_path: str, adata: anndata.AnnData, model_name: str): + def default_load_model(self, adata: anndata.AnnData, model_name: str, model_path: str | None = None) -> BaseModelClass: """Load the model.""" logger.info("Loading model.") if self.dry_run: @@ -178,48 +192,20 @@ def _load_model(self, model_path: str, adata: anndata.AnnData, model_name: str): elif model_name == "CondSCVI": from scvi.model import CondSCVI model_cls = CondSCVI + elif model_name == "TOTALVI": + from scvi.model import TOTALVI + model_cls = TOTALVI elif model_name == "Stereoscope": from scvi.external import RNAStereoscope model_cls = RNAStereoscope else: raise ValueError(f"Model {model_name} not recognized.") - model = model_cls.load(os.path.join(self.save_dir, model_path), adata=adata) + if model_path is None: + model_path = os.path.join(self.save_dir, self.config["model_dir"]) + model = model_cls.load(model_path, adata=adata) return model - def _create_hub_model( - self, - model_path: str, - training_data_url: str | None = None - ) -> HubModel | None: - logger.info("Creating the HubModel.") - if self.dry_run: - return None - - if training_data_url is None: - training_data_url = self.config.get("training_data_url", None) - - metadata = self.config["metadata"] - hub_metadata = HubMetadata.from_dir( - model_path, - anndata_version=anndata_version - ) - model_card = HubModelCardHelper.from_dir( - model_path, - anndata_version=anndata_version, - license_info=metadata.get("license_info", "mit"), - data_modalities=metadata.get("data_modalities", None), - tissues=metadata.get("tissues", None), - data_is_annotated=metadata.get("data_is_annotated", False), - data_is_minified=metadata.get("data_is_minified", False), - training_data_url=training_data_url, - training_code_url=metadata.get("training_code_url", None), - description=metadata.get("description", None), - references=metadata.get("references", None), - ) - - return HubModel(model_path, hub_metadata, model_card) - def _minify_and_save_model( self, model: BaseModelClass, @@ -252,11 +238,47 @@ def _minify_and_save_model( qzm, qzv = model.get_latent_representation(give_mean=False, return_dist=True) adata.obsm[qzm_key] = qzm adata.obsm[qzv_key] = qzv - model.minify_adata(use_latent_qzm_key=qzm_key, use_latent_qzv_key=qzv_key) + if isinstance(adata, mudata.MuData): + model.minify_mudata(use_latent_qzm_key=qzm_key, use_latent_qzv_key=qzv_key) + else: + model.minify_adata(use_latent_qzm_key=qzm_key, use_latent_qzv_key=qzv_key) model.save(mini_model_path, overwrite=True, save_anndata=True) return mini_model_path + def _create_hub_model( + self, + model_path: str, + training_data_url: str | None = None + ) -> HubModel | None: + logger.info("Creating the HubModel.") + if self.dry_run: + return None + + if training_data_url is None: + training_data_url = self.config.get("training_data_url", None) + + metadata = self.config["metadata"] + hub_metadata = HubMetadata.from_dir( + model_path, + anndata_version=anndata_version + ) + model_card = HubModelCardHelper.from_dir( + model_path, + anndata_version=anndata_version, + license_info=metadata.get("license_info", "mit"), + data_modalities=metadata.get("data_modalities", None), + tissues=metadata.get("tissues", None), + data_is_annotated=metadata.get("data_is_annotated", False), + data_is_minified=metadata.get("data_is_minified", False), + training_data_url=training_data_url, + training_code_url=metadata.get("training_code_url", None), + description=metadata.get("description", None), + references=metadata.get("references", None), + ) + + return HubModel(model_path, hub_metadata, model_card) + def _upload_hub_model(self, hub_model: HubModel, repo_name: str | None = None, **kwargs) -> HubModel: """Upload the HubModel to Hugging Face.""" collection_name = self.config.get("collection_name", None) diff --git a/src/scvi_hub_models/models/_haniffa_covid_pbmc.py b/src/scvi_hub_models/models/_haniffa_covid_pbmc.py index dc47600..6262d18 100644 --- a/src/scvi_hub_models/models/_haniffa_covid_pbmc.py +++ b/src/scvi_hub_models/models/_haniffa_covid_pbmc.py @@ -18,15 +18,12 @@ def _load_adata(self) -> AnnData: adata_path = os.path.join(self.save_dir, self.config['extra_data_kwargs']["reference_adata_fname"]) if not os.path.exists(adata_path): - # TODO for next LTX remove census_version='latest'. - download_source_h5ad(self.config['extra_data_kwargs']["reference_adata_cxg_id"], to_path=adata_path, census_version='latest') + download_source_h5ad(self.config['extra_data_kwargs']["reference_adata_cxg_id"], to_path=adata_path) return sc.read_h5ad(adata_path) def _preprocess_adata(self, adata: AnnData) -> AnnData: import scanpy as sc - print(adata, adata.X.data) - sc.pp.filter_genes(adata, min_counts=3) adata.layers["counts"] = adata.X.copy() sc.pp.highly_variable_genes( @@ -38,20 +35,28 @@ def _preprocess_adata(self, adata: AnnData) -> AnnData: batch_key="sample_id", span=1.0, ) - protein_adata = AnnData(adata.obsm["protein_expression"]) + protein_adata = AnnData( + adata.uns['antibody_raw.X'].toarray(), + obs=adata.obs, + var=adata.uns['antibody_features']) + protein_adata.obs_names = adata.obs_names - del adata.obsm["protein_expression"] - adata = MuData({"rna": adata, "protein": protein_adata}) + del adata.uns['antibody_raw.X'] + del adata.uns['antibody_features'] + del adata.uns['antibody_X'] + del adata.uns['neighbors'] + mdata = MuData({"rna": adata, "protein": protein_adata}) - return adata + return mdata - def load_adata(self) -> AnnData | None: + def download_adata(self, path) -> AnnData | None: """Download and load the dataset.""" logger.info(f"Saving dataset to {self.save_dir} and preprocessing.") if self.dry_run: return None adata = self._load_adata() mdata = self._preprocess_adata(adata) + mdata.write_h5mu(path) return mdata def _initialize_model(self, mdata: MuData) -> TOTALVI: @@ -59,7 +64,7 @@ def _initialize_model(self, mdata: MuData) -> TOTALVI: mdata, rna_layer="counts", protein_layer=None, - batch_key="sample_id", + batch_key="donor_id", modalities={ "rna_layer": "rna", "protein_layer": "protein", @@ -70,11 +75,11 @@ def _initialize_model(self, mdata: MuData) -> TOTALVI: def _train_model(self, model: TOTALVI) -> TOTALVI: """Train the scVI model.""" - model.train(max_epochs=200) + model.train(max_epochs=50) return model - def get_model(self, adata) -> TOTALVI | None: + def load_model(self, adata) -> TOTALVI | None: """Initialize and train the scVI model.""" logger.info("Training the scVI model.") if self.dry_run: @@ -85,7 +90,7 @@ def get_model(self, adata) -> TOTALVI | None: def run(self): super().run() - mdata = self.load_adata() + mdata = self.get_adata() model = self.get_model(mdata) model_path = self._minify_and_save_model(model, mdata) hub_model = self._create_hub_model(model_path) diff --git a/src/scvi_hub_models/models/_heart_cell_atlas.py b/src/scvi_hub_models/models/_heart_cell_atlas.py index 65c3822..abea1b5 100644 --- a/src/scvi_hub_models/models/_heart_cell_atlas.py +++ b/src/scvi_hub_models/models/_heart_cell_atlas.py @@ -31,13 +31,14 @@ def _preprocess_adata(self, adata: AnnData) -> AnnData: return adata - def load_adata(self) -> AnnData | None: + def download_adata(self, path) -> AnnData | None: """Download and load the dataset.""" logger.info(f"Saving heart cell atlas dataset to {self.save_dir}.") if self.dry_run: return None adata = self._load_adata() adata = self._preprocess_adata(adata) + adata.write_h5ad(path) return adata def _initialize_model(self, adata: AnnData) -> SCVI: @@ -51,11 +52,11 @@ def _initialize_model(self, adata: AnnData) -> SCVI: def _train_model(self, model: SCVI) -> SCVI: """Train the scVI model.""" - model.train(max_epochs=5) + model.train(max_epochs=200) return model - def get_model(self, adata) -> SCVI | None: + def load_model(self, adata) -> SCVI | None: """Initialize and train the scVI model.""" logger.info("Training the scVI model.") if self.dry_run: @@ -66,7 +67,7 @@ def get_model(self, adata) -> SCVI | None: def run(self): super().run() - adata = self.load_adata() + adata = self.get_adata() model = self.get_model(adata) model_path = self._minify_and_save_model(model, adata) hub_model = self._create_hub_model(model_path) diff --git a/src/scvi_hub_models/models/_human_lung_cell_atlas.py b/src/scvi_hub_models/models/_human_lung_cell_atlas.py index f68baef..6dd8101 100644 --- a/src/scvi_hub_models/models/_human_lung_cell_atlas.py +++ b/src/scvi_hub_models/models/_human_lung_cell_atlas.py @@ -10,6 +10,9 @@ class _Workflow(BaseModelWorkflow): + def load_model(self, adata: anndata.AnnData) -> BaseModelWorkflow: + return self.default_load_model(adata, self.config['model_class']) + def _download_model(self): from pathlib import Path @@ -23,7 +26,6 @@ def _download_model(self): path=self.save_dir, ) untarred = sorted(untarred) - print(untarred) return str(Path(untarred[0]).parent) def _get_model(self) -> str: @@ -64,7 +66,7 @@ def _preprocess_reference_adata(self, adata: anndata.AnnData, model_path: str) - # .X does not contain raw counts initially adata.X = adata.raw.X - _, genes, _, _ = _load_saved_files(model_path, load_adata=False) + _, genes, _, _ = _load_saved_files(os.path.join(self.save_dir, self.config["model_dir"]), load_adata=False) adata = adata[:, adata.var.index.isin(genes)].copy() # get rid of some var columns that we dont need @@ -108,13 +110,14 @@ def _download_embedding_adata(self) -> str: adata = anndata.io.read_h5ad(adata) return adata[adata.obs["core_or_extension"] == "core"].copy() - def _get_adata(self, model_path: str) -> anndata.AnnData: + def download_adata(self, path) -> anndata.AnnData: logging.info("Loading data.") if self.dry_run: return None ref_adata = self._download_reference_adata() - ref_adata = self._preprocess_reference_adata(ref_adata, model_path) + ref_adata = self._preprocess_reference_adata(ref_adata, self.model_path) ref_adata = self._postprocess_reference_adata(ref_adata) + ref_adata.write_h5ad(path) return ref_adata @property @@ -124,9 +127,9 @@ def id(self) -> str: def run(self): super().run() - model_path = self._get_model() - adata = self._get_adata(model_path) - model = self._load_model(model_path, adata, "SCANVI") + self._get_model() + adata = self.get_adata() + model = self.get_model(adata) model_path = self._minify_and_save_model(model, adata) hub_model = self._create_hub_model(model_path) hub_model = self._upload_hub_model(hub_model) diff --git a/src/scvi_hub_models/models/_mouse_thymus_cite.py b/src/scvi_hub_models/models/_mouse_thymus_cite.py index 56c1ec8..255b1ee 100644 --- a/src/scvi_hub_models/models/_mouse_thymus_cite.py +++ b/src/scvi_hub_models/models/_mouse_thymus_cite.py @@ -23,28 +23,35 @@ def _load_adata(self) -> AnnData: return sc.read_h5ad(adata_path) def _preprocess_adata(self, adata: AnnData) -> AnnData: - import scanpy as sc - - print(adata, adata.X.data) - - sc.pp.filter_genes(adata, min_counts=3) matching_indices = [adata.raw.var_names.get_loc(gene) for gene in adata.var_names] adata.layers["counts"] = adata.raw.X[:, matching_indices].copy() - protein_adata = AnnData(adata.obsm["protein_expression"]) + sc.pp.highly_variable_genes( + adata, + n_top_genes=4000, + subset=True, + layer="counts", + flavor="seurat_v3", + span=1.0, + ) + protein_adata = AnnData(adata.obsm["protein_expression"], obs=adata.obs) protein_adata.obs_names = adata.obs_names del adata.obsm["protein_expression"] - adata = MuData({"rna": adata, "protein": protein_adata}) + del adata.obsm['denoised_genes'] + del adata.obsm['denoised_proteins'] + del adata.uns['AB_adata'] + mdata = MuData({"rna": adata, "protein": protein_adata}) + print(mdata) - return adata + return mdata - def download_adata(self) -> AnnData | None: + def download_adata(self, path) -> AnnData | None: """Download and load the dataset.""" - logger.info(f"Saving dataset to {self.save_dir} and preprocessing.") + logger.info(f"Saving dataset to {path} and preprocessing.") if self.dry_run: return None adata = self._load_adata() mdata = self._preprocess_adata(adata) - mdata.write_h5mu(f'data/{self.config['extra_data_kwargs']['large_training_file_name']}') + mdata.write_h5mu(path) return mdata def _initialize_model(self, mdata: MuData) -> TOTALVI: @@ -67,7 +74,7 @@ def _train_model(self, model: TOTALVI) -> TOTALVI: return model - def get_model(self, adata) -> TOTALVI | None: + def load_model(self, adata) -> TOTALVI | None: """Initialize and train the scVI model.""" logger.info("Training the scVI model.") if self.dry_run: diff --git a/src/scvi_hub_models/models/_neurips_cite.py b/src/scvi_hub_models/models/_neurips_cite.py index 74f6cfd..af4c470 100644 --- a/src/scvi_hub_models/models/_neurips_cite.py +++ b/src/scvi_hub_models/models/_neurips_cite.py @@ -1,9 +1,9 @@ import logging -import os import scanpy as sc from anndata import AnnData from mudata import MuData +from pooch import Decompress from scvi.model import TOTALVI from scvi_hub_models.models import BaseModelWorkflow @@ -12,52 +12,45 @@ class _Workflow(BaseModelWorkflow): - - def _load_adata(self) -> AnnData: - from cellxgene_census import download_source_h5ad - - adata_path = os.path.join(self.save_dir, self.config['extra_data_kwargs']["reference_adata_fname"]) - if not os.path.exists(adata_path): - # TODO for next LTS remove census_version='latest'. - download_source_h5ad(self.config['extra_data_kwargs']["reference_adata_cxg_id"], to_path=adata_path, census_version='latest') - return sc.read_h5ad(adata_path) - def _preprocess_adata(self, adata: AnnData) -> AnnData: - import scanpy as sc - - sc.pp.filter_genes(adata, min_counts=3) - adata.layers["counts"] = adata.X.copy() + rna = adata[:, adata.var['feature_types']=='GEX'].copy() + protein = adata[:, adata.var['feature_types']=='ADT'].copy() + protein.layers["counts"] = protein.layers["counts"].toarray() + sc.pp.filter_genes(rna, min_counts=3) sc.pp.highly_variable_genes( - adata, + rna, n_top_genes=4000, subset=True, layer="counts", flavor="seurat_v3", - batch_key="sample_id", + batch_key="Site", span=1.0, ) - protein_adata = AnnData(adata.obsm["protein_expression"]) - protein_adata.obs_names = adata.obs_names - del adata.obsm["protein_expression"] - adata = MuData({"rna": adata, "protein": protein_adata}) + adata = MuData({"rna": rna, "protein": protein}) return adata - def load_adata(self) -> AnnData | None: + def download_adata(self, path) -> AnnData | None: """Download and load the dataset.""" - logger.info(f"Saving dataset to {self.save_dir} and preprocessing.") + logger.info(f"Saving dataset to {path} and preprocessing.") if self.dry_run: return None - adata = self._load_adata() + adata = self._get_adata( + url=self.config["extra_data_kwargs"]["url"], + hash=self.config["extra_data_kwargs"]["hash"], + file_path=self.config["extra_data_kwargs"]["reference_adata_fname"], + processor=Decompress(), + ) mdata = self._preprocess_adata(adata) + mdata.write_h5mu(path) return mdata def _initialize_model(self, mdata: MuData) -> TOTALVI: TOTALVI.setup_mudata( mdata, rna_layer="counts", - protein_layer=None, - batch_key="sample_id", + protein_layer="counts", + batch_key="batch", modalities={ "rna_layer": "rna", "protein_layer": "protein", @@ -72,7 +65,7 @@ def _train_model(self, model: TOTALVI) -> TOTALVI: return model - def get_model(self, adata) -> TOTALVI | None: + def load_model(self, adata) -> TOTALVI | None: """Initialize and train the scVI model.""" logger.info("Training the scVI model.") if self.dry_run: @@ -83,7 +76,7 @@ def get_model(self, adata) -> TOTALVI | None: def run(self): super().run() - mdata = self.load_adata() + mdata = self.get_adata() model = self.get_model(mdata) model_path = self._minify_and_save_model(model, mdata) hub_model = self._create_hub_model(model_path) diff --git a/src/scvi_hub_models/models/_test_scvi.py b/src/scvi_hub_models/models/_test_scvi.py index b72913d..7c326b1 100644 --- a/src/scvi_hub_models/models/_test_scvi.py +++ b/src/scvi_hub_models/models/_test_scvi.py @@ -12,29 +12,23 @@ class _Workflow(BaseModelWorkflow): - def load_dataset(self) -> AnnData | None: + def download_adata(self, path) -> AnnData | None: from scvi.data import synthetic_iid logger.info("Loading synthetic dataset.") if self.dry_run: return None + adata = synthetic_iid() + adata.write_h5ad(path) + return adata - return synthetic_iid() - - def initialize_model(self, adata: AnnData | None) -> SCVI | None: - logger.info("Initializing the scVI model.") + def load_model(self, adata: AnnData) -> SCVI: + logger.info("Training the scVI model.") if self.dry_run: return None - SCVI.setup_anndata(adata) - return SCVI(adata) - - def train_model(self, model: SCVI | None) -> SCVI | None: - logger.info("Training the scVI model.") - if self.dry_run: - return model - - model.train(max_epochs=1) + model = SCVI(adata) + model.train(max_epochs=10) return model @property @@ -44,9 +38,8 @@ def id(self) -> str: def run(self): super().run() - adata = self.load_dataset() - model = self.initialize_model(adata) - model = self.train_model(model) + adata = self.get_adata() + model = self.get_model(adata) model_path = self._minify_and_save_model(model, adata) hub_model = self._create_hub_model(model_path) hub_model = self._upload_hub_model(hub_model) diff --git a/src/scvi_hub_models/test.ipynb b/src/scvi_hub_models/test.ipynb new file mode 100644 index 0000000..c730192 --- /dev/null +++ b/src/scvi_hub_models/test.ipynb @@ -0,0 +1,603 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.insert(0, '/home/cane/Documents/scvi-tools/scvi-hub-models/src')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "import os\n", + "\n", + "import scanpy as sc\n", + "from anndata import AnnData\n", + "from mudata import MuData\n", + "from scvi.model import TOTALVI\n", + "\n", + "from scvi_hub_models.models import BaseModelWorkflow\n", + "\n", + "logger = logging.getLogger(__name__)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/cane/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/mudata/_core/mudata.py:1531: FutureWarning: From 0.4 .update() will not pull obs/var columns from individual modalities by default anymore. Set mudata.set_options(pull_on_update=False) to adopt the new behaviour, which will become the default. Use new pull_obs/pull_var and push_obs/push_var methods for more flexibility.\n", + " self._update_attr(\"var\", axis=0, join_common=join_common)\n", + "/home/cane/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/mudata/_core/mudata.py:1429: FutureWarning: From 0.4 .update() will not pull obs/var columns from individual modalities by default anymore. Set mudata.set_options(pull_on_update=False) to adopt the new behaviour, which will become the default. Use new pull_obs/pull_var and push_obs/push_var methods for more flexibility.\n", + " self._update_attr(\"obs\", axis=1, join_common=join_common)\n" + ] + } + ], + "source": [ + "import mudata\n", + "mu = mudata.read_h5mu('/home/cane/Documents/scvi-tools/scvi-hub-models/test/mini_totalvi/mdata.h5mu')" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
MuData object with n_obs × n_vars = 72042 × 4111\n",
+       "  obs:\t'_scvi_labels', 'observed_lib_size'\n",
+       "  var:\t'cv_gene'\n",
+       "  uns:\t'_scvi_adata_minify_type', '_scvi_manager_uuid', '_scvi_uuid'\n",
+       "  obsm:\t'totalvi_latent_qzm', 'totalvi_latent_qzv'\n",
+       "  2 modalities\n",
+       "    rna:\t72042 x 4000\n",
+       "      obs:\t'percent_mito', 'n_counts', 'n_genes', 'n_protein_counts', 'n_proteins', 'leiden_totalVI_res1.4', 'leiden_totalVI_res1.0', 'leiden_totalVI_res0.6', 'annotations_clean', 'mean_pseudotime', 'Pseudotime_bin', 'curve1', 'curve2', 'difference', 'weight_curve1', 'weight_curve2', 'UMIs_RNA', 'UMIs_protein', 'n_genes_pt', 'n_proteins_pt', 'percent_mito_pt', 'Experiment', 'slingshot_clusters', 'organism_ontology_term_id', 'disease_ontology_term_id', 'sex_ontology_term_id', 'tissue_type', 'tissue_ontology_term_id', 'suspension_type', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'is_primary_data', 'development_stage_ontology_term_id', 'batch_indices', 'sample_id', 'Location', 'donor_id', 'sample_weeks', 'genotype', 'Lineage_by_genotypeSlingshot', 'Lineage_by_genotype', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'observation_joinid', '_scvi_batch', 'cv_cell'\n",
+       "      var:\t'gene_id', 'gene_name', 'expression_type', 'n_cells', 'feature_is_filtered', 'feature_name', 'feature_reference', 'feature_biotype', 'feature_length', 'feature_type', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'cv_gene'\n",
+       "      uns:\t'AB_adata', 'annotations_clean_colors', 'batch_condition', 'batch_indices_colors', 'citation', 'hvg', 'leiden', 'neighbors', 'protein_names', 'schema_reference', 'schema_version', 'title', 'totalVI_genes', 'totalVI_proteins', 'umap'\n",
+       "      obsm:\t'X_totalVI', 'X_umap', 'denoised_genes', 'denoised_proteins'\n",
+       "      varm:\t'lfc_model', 'lfc_raw'\n",
+       "      layers:\t'counts'\n",
+       "    protein:\t72042 x 111\n",
+       "      obs:\t'percent_mito', 'n_counts', 'n_genes', 'n_protein_counts', 'n_proteins', 'leiden_totalVI_res1.4', 'leiden_totalVI_res1.0', 'leiden_totalVI_res0.6', 'annotations_clean', 'mean_pseudotime', 'Pseudotime_bin', 'curve1', 'curve2', 'difference', 'weight_curve1', 'weight_curve2', 'UMIs_RNA', 'UMIs_protein', 'n_genes_pt', 'n_proteins_pt', 'percent_mito_pt', 'Experiment', 'slingshot_clusters', 'organism_ontology_term_id', 'disease_ontology_term_id', 'sex_ontology_term_id', 'tissue_type', 'tissue_ontology_term_id', 'suspension_type', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'is_primary_data', 'development_stage_ontology_term_id', 'batch_indices', 'sample_id', 'Location', 'donor_id', 'sample_weeks', 'genotype', 'Lineage_by_genotypeSlingshot', 'Lineage_by_genotype', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'observation_joinid', '_scvi_batch', 'cv_cell'\n",
+       "      var:\t'cv_gene'\n",
+       "      varm:\t'lfc_model', 'lfc_raw'
" + ], + "text/plain": [ + "MuData object with n_obs × n_vars = 72042 × 4111\n", + " obs:\t'_scvi_labels', 'observed_lib_size'\n", + " var:\t'cv_gene'\n", + " uns:\t'_scvi_adata_minify_type', '_scvi_manager_uuid', '_scvi_uuid'\n", + " obsm:\t'totalvi_latent_qzm', 'totalvi_latent_qzv'\n", + " 2 modalities\n", + " rna:\t72042 x 4000\n", + " obs:\t'percent_mito', 'n_counts', 'n_genes', 'n_protein_counts', 'n_proteins', 'leiden_totalVI_res1.4', 'leiden_totalVI_res1.0', 'leiden_totalVI_res0.6', 'annotations_clean', 'mean_pseudotime', 'Pseudotime_bin', 'curve1', 'curve2', 'difference', 'weight_curve1', 'weight_curve2', 'UMIs_RNA', 'UMIs_protein', 'n_genes_pt', 'n_proteins_pt', 'percent_mito_pt', 'Experiment', 'slingshot_clusters', 'organism_ontology_term_id', 'disease_ontology_term_id', 'sex_ontology_term_id', 'tissue_type', 'tissue_ontology_term_id', 'suspension_type', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'is_primary_data', 'development_stage_ontology_term_id', 'batch_indices', 'sample_id', 'Location', 'donor_id', 'sample_weeks', 'genotype', 'Lineage_by_genotypeSlingshot', 'Lineage_by_genotype', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'observation_joinid', '_scvi_batch', 'cv_cell'\n", + " var:\t'gene_id', 'gene_name', 'expression_type', 'n_cells', 'feature_is_filtered', 'feature_name', 'feature_reference', 'feature_biotype', 'feature_length', 'feature_type', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'cv_gene'\n", + " uns:\t'AB_adata', 'annotations_clean_colors', 'batch_condition', 'batch_indices_colors', 'citation', 'hvg', 'leiden', 'neighbors', 'protein_names', 'schema_reference', 'schema_version', 'title', 'totalVI_genes', 'totalVI_proteins', 'umap'\n", + " obsm:\t'X_totalVI', 'X_umap', 'denoised_genes', 'denoised_proteins'\n", + " varm:\t'lfc_model', 'lfc_raw'\n", + " layers:\t'counts'\n", + " protein:\t72042 x 111\n", + " obs:\t'percent_mito', 'n_counts', 'n_genes', 'n_protein_counts', 'n_proteins', 'leiden_totalVI_res1.4', 'leiden_totalVI_res1.0', 'leiden_totalVI_res0.6', 'annotations_clean', 'mean_pseudotime', 'Pseudotime_bin', 'curve1', 'curve2', 'difference', 'weight_curve1', 'weight_curve2', 'UMIs_RNA', 'UMIs_protein', 'n_genes_pt', 'n_proteins_pt', 'percent_mito_pt', 'Experiment', 'slingshot_clusters', 'organism_ontology_term_id', 'disease_ontology_term_id', 'sex_ontology_term_id', 'tissue_type', 'tissue_ontology_term_id', 'suspension_type', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'is_primary_data', 'development_stage_ontology_term_id', 'batch_indices', 'sample_id', 'Location', 'donor_id', 'sample_weeks', 'genotype', 'Lineage_by_genotypeSlingshot', 'Lineage_by_genotype', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'observation_joinid', '_scvi_batch', 'cv_cell'\n", + " var:\t'cv_gene'\n", + " varm:\t'lfc_model', 'lfc_raw'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mu" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "from scvi_hub_models.config import json_data_store" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from scvi_hub_models.models import _neurips_cite\n", + "Workflow = _neurips_cite._Workflow" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "self = Workflow(config=json_data_store['neurips_cite'], save_dir='.', reload_model=True, reload_data=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'bmmc_cite.h5ad.gz'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "self.config['extra_data_kwargs']['reference_adata_fname']" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Decompressing '/home/cane/.cache/pooch/bmmc_cite.h5ad.gz' to '/home/cane/.cache/pooch/bmmc_cite.h5ad.gz.decomp' using method 'auto'.\n" + ] + } + ], + "source": [ + "from pooch import retrieve\n", + "from pooch import Decompress\n", + "\n", + "adata_path = retrieve(\n", + " url=self.config['extra_data_kwargs']['url'],\n", + " known_hash=\"b9b50fade9349719cba23c97c6515d3501a32ee3735fe95fe51221d2e8a5f361\",\n", + " fname='bmmc_cite.h5ad.gz',\n", + " processor=Decompress(),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/cane/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/anndata/_core/anndata.py:1758: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", + " utils.warn_names_duplicates(\"var\")\n" + ] + } + ], + "source": [ + "import anndata\n", + "\n", + "ad = anndata.read_h5ad('/home/cane/.cache/pooch/bmmc_cite.h5ad')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AnnData object with n_obs × n_vars = 90261 × 14087\n", + " obs: 'GEX_n_genes_by_counts', 'GEX_pct_counts_mt', 'GEX_size_factors', 'GEX_phase', 'ADT_n_antibodies_by_counts', 'ADT_total_counts', 'ADT_iso_count', 'cell_type', 'batch', 'ADT_pseudotime_order', 'GEX_pseudotime_order', 'Samplename', 'Site', 'DonorNumber', 'Modality', 'VendorLot', 'DonorID', 'DonorAge', 'DonorBMI', 'DonorBloodType', 'DonorRace', 'Ethnicity', 'DonorGender', 'QCMeds', 'DonorSmoker', 'is_train'\n", + " var: 'feature_types', 'gene_id'\n", + " uns: 'dataset_id', 'genome', 'organism'\n", + " obsm: 'ADT_X_pca', 'ADT_X_umap', 'ADT_isotype_controls', 'GEX_X_pca', 'GEX_X_umap'\n", + " layers: 'counts'" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ad" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/cane/Documents/scvi-tools/scvi-hub-models/data/neurips_bone_marrow_cite.h5mu\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/cane/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/anndata/_core/anndata.py:1758: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.\n", + " utils.warn_names_duplicates(\"var\")\n", + "/home/cane/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/mudata/_core/mudata.py:1531: FutureWarning: From 0.4 .update() will not pull obs/var columns from individual modalities by default anymore. Set mudata.set_options(pull_on_update=False) to adopt the new behaviour, which will become the default. Use new pull_obs/pull_var and push_obs/push_var methods for more flexibility.\n", + " self._update_attr(\"var\", axis=0, join_common=join_common)\n", + "/home/cane/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/mudata/_core/mudata.py:931: UserWarning: Cannot join columns with the same name because var_names are intersecting.\n", + " warnings.warn(\n", + "/home/cane/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/mudata/_core/mudata.py:1429: FutureWarning: From 0.4 .update() will not pull obs/var columns from individual modalities by default anymore. Set mudata.set_options(pull_on_update=False) to adopt the new behaviour, which will become the default. Use new pull_obs/pull_var and push_obs/push_var methods for more flexibility.\n", + " self._update_attr(\"obs\", axis=1, join_common=join_common)\n", + "/home/cane/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/mudata/_core/mudata.py:1531: FutureWarning: From 0.4 .update() will not pull obs/var columns from individual modalities by default anymore. Set mudata.set_options(pull_on_update=False) to adopt the new behaviour, which will become the default. Use new pull_obs/pull_var and push_obs/push_var methods for more flexibility.\n", + " self._update_attr(\"var\", axis=0, join_common=join_common)\n", + "/home/cane/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/mudata/_core/mudata.py:1429: FutureWarning: From 0.4 .update() will not pull obs/var columns from individual modalities by default anymore. Set mudata.set_options(pull_on_update=False) to adopt the new behaviour, which will become the default. Use new pull_obs/pull_var and push_obs/push_var methods for more flexibility.\n", + " self._update_attr(\"obs\", axis=1, join_common=join_common)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "mdata = self.get_adata()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "GEX_n_genes_by_counts                         893\n",
+       "GEX_pct_counts_mt                        6.723979\n",
+       "GEX_size_factors                         0.356535\n",
+       "GEX_phase                                      G1\n",
+       "ADT_n_antibodies_by_counts                    115\n",
+       "ADT_total_counts                           2828.0\n",
+       "ADT_iso_count                                 5.0\n",
+       "cell_type                     Naive CD20+ B IGKC+\n",
+       "batch                                        s1d1\n",
+       "ADT_pseudotime_order                          NaN\n",
+       "GEX_pseudotime_order                          NaN\n",
+       "Samplename                      site1_donor1_cite\n",
+       "Site                                        site1\n",
+       "DonorNumber                                donor1\n",
+       "Modality                                     cite\n",
+       "VendorLot                                 3054455\n",
+       "DonorID                                     15078\n",
+       "DonorAge                                       34\n",
+       "DonorBMI                                     24.8\n",
+       "DonorBloodType                                 B-\n",
+       "DonorRace                                   White\n",
+       "Ethnicity                      HISPANIC OR LATINO\n",
+       "DonorGender                                  Male\n",
+       "QCMeds                                      False\n",
+       "DonorSmoker                             Nonsmoker\n",
+       "is_train                                    train\n",
+       "_scvi_batch                                     0\n",
+       "cv_cell                                  7.578709\n",
+       "Name: GCATTAGCATAAGCGG-1-s1d1, dtype: object"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mdata['rna'].obs.iloc[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO     Computing empirical prior initialization for protein background.                                          \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "GPU available: True (cuda), used: True\n",
+      "TPU available: False, using: 0 TPU cores\n",
+      "HPU available: False, using: 0 HPUs\n",
+      "You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision\n",
+      "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n",
+      "/home/cane/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/torch/optim/lr_scheduler.py:62: UserWarning: The verbose parameter is deprecated. Please use get_last_lr() to access the learning rate.\n",
+      "  warnings.warn(\n",
+      "/home/cane/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/lightning/pytorch/core/optimizer.py:316: The lr scheduler dict contains the key(s) ['monitor'], but the keys will be ignored. You need to call `lr_scheduler.step()` manually in manual optimization.\n",
+      "/home/cane/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=23` in the `DataLoader` to improve performance.\n",
+      "/home/cane/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=23` in the `DataLoader` to improve performance.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 200/200: 100%|██████████| 200/200 [22:05<00:00,  7.33s/it, v_num=1, train_loss_step=1.58e+3, train_loss_epoch=1.6e+3]"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "`Trainer.fit` stopped: `max_epochs=200` reached.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 200/200: 100%|██████████| 200/200 [22:05<00:00,  6.63s/it, v_num=1, train_loss_step=1.58e+3, train_loss_epoch=1.6e+3]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['rna', 'protein']\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/cane/Documents/scvi-tools/src/scvi/criticism/_ppc.py:293: UserWarning: n_top_genes_fallback=100 is greater than 10% of the number ofgenes f(134) in the dataset. Setting it to 10%.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "Only one class present in y_true. ROC AUC score is not defined in that case.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[23], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m model \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_model(mdata)\n\u001b[0;32m----> 2\u001b[0m model_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_minify_and_save_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      3\u001b[0m hub_model \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_hub_model(model_path)\n\u001b[1;32m      4\u001b[0m hub_model \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_upload_hub_model(hub_model)\n",
+      "File \u001b[0;32m~/Documents/scvi-tools/scvi-hub-models/src/scvi_hub_models/models/_base_workflow.py:239\u001b[0m, in \u001b[0;36m_minify_and_save_model\u001b[0;34m(self, model, adata)\u001b[0m\n\u001b[1;32m    237\u001b[0m qzm, qzv \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mget_latent_representation(give_mean\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, return_dist\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m    238\u001b[0m adata\u001b[38;5;241m.\u001b[39mobsm[qzm_key] \u001b[38;5;241m=\u001b[39m qzm\n\u001b[0;32m--> 239\u001b[0m adata\u001b[38;5;241m.\u001b[39mobsm[qzv_key] \u001b[38;5;241m=\u001b[39m qzv\n\u001b[1;32m    240\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(adata, mudata\u001b[38;5;241m.\u001b[39mMuData):\n\u001b[1;32m    241\u001b[0m     model\u001b[38;5;241m.\u001b[39mminify_mudata(use_latent_qzm_key\u001b[38;5;241m=\u001b[39mqzm_key, use_latent_qzv_key\u001b[38;5;241m=\u001b[39mqzv_key)\n",
+      "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/criticism/_create_criticism_report.py:76\u001b[0m, in \u001b[0;36mcreate_criticism_report\u001b[0;34m(model, adata, skip_metrics, n_samples, label_key, save_folder)\u001b[0m\n\u001b[1;32m     74\u001b[0m md_cell_wise_cv, md_gene_wise_cv, md_de \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     75\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m modalities:\n\u001b[0;32m---> 76\u001b[0m     md_cell_wise_cv_, md_gene_wise_cv_, md_de_ \u001b[38;5;241m=\u001b[39m \u001b[43mcompute_metrics\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     77\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43madata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mskip_metrics\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_samples\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabel_key\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodality\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mi\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     78\u001b[0m     md_cell_wise_cv \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModality: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mi\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m md_cell_wise_cv_ \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     79\u001b[0m     md_gene_wise_cv \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModality: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mi\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m md_gene_wise_cv_ \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n",
+      "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/criticism/_create_criticism_report.py:116\u001b[0m, in \u001b[0;36mcompute_metrics\u001b[0;34m(model, adata, skip_metrics, n_samples, label_key, modality)\u001b[0m\n\u001b[1;32m    114\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m label_key \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m labels_state_registry\u001b[38;5;241m.\u001b[39moriginal_key \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_scvi_labels\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m    115\u001b[0m     label_key \u001b[38;5;241m=\u001b[39m labels_state_registry\u001b[38;5;241m.\u001b[39moriginal_key\n\u001b[0;32m--> 116\u001b[0m \u001b[43mppc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdifferential_expression\u001b[49m\u001b[43m(\u001b[49m\u001b[43mde_groupby\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabel_key\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mp_val_thresh\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.2\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m    117\u001b[0m summary_df \u001b[38;5;241m=\u001b[39m ppc\u001b[38;5;241m.\u001b[39mmetrics[METRIC_DIFF_EXP][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msummary\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mset_index(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgroup\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    118\u001b[0m summary_df \u001b[38;5;241m=\u001b[39m summary_df\u001b[38;5;241m.\u001b[39mdrop(columns\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n",
+      "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/utils/_dependencies.py:24\u001b[0m, in \u001b[0;36mdependencies..decorator..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     21\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(fn)\n\u001b[1;32m     22\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapper\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     23\u001b[0m     error_on_missing_dependencies(\u001b[38;5;241m*\u001b[39mmodules)\n\u001b[0;32m---> 24\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/criticism/_ppc.py:430\u001b[0m, in \u001b[0;36mPosteriorPredictiveCheck.differential_expression\u001b[0;34m(self, de_groupby, de_method, n_samples, cell_scale_factor, p_val_thresh, n_top_genes_fallback)\u001b[0m\n\u001b[1;32m    427\u001b[0m         true \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mzeros_like(pred)\n\u001b[1;32m    428\u001b[0m         true[np\u001b[38;5;241m.\u001b[39margsort(raw_adj_p_vals)[:n_top_genes_fallback]] \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m--> 430\u001b[0m     roc_aucs\u001b[38;5;241m.\u001b[39mappend(\u001b[43mroc_auc_score\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpred\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m    431\u001b[0m     pr_aucs\u001b[38;5;241m.\u001b[39mappend(average_precision_score(true, pred))\n\u001b[1;32m    433\u001b[0m \u001b[38;5;66;03m# Compute means over samples\u001b[39;00m\n",
+      "File \u001b[0;32m~/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/sklearn/utils/_param_validation.py:213\u001b[0m, in \u001b[0;36mvalidate_params..decorator..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    207\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    208\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[1;32m    209\u001b[0m         skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m    210\u001b[0m             prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[1;32m    211\u001b[0m         )\n\u001b[1;32m    212\u001b[0m     ):\n\u001b[0;32m--> 213\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    214\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m InvalidParameterError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    215\u001b[0m     \u001b[38;5;66;03m# When the function is just a wrapper around an estimator, we allow\u001b[39;00m\n\u001b[1;32m    216\u001b[0m     \u001b[38;5;66;03m# the function to delegate validation to the estimator, but we replace\u001b[39;00m\n\u001b[1;32m    217\u001b[0m     \u001b[38;5;66;03m# the name of the estimator by the name of the function in the error\u001b[39;00m\n\u001b[1;32m    218\u001b[0m     \u001b[38;5;66;03m# message to avoid confusion.\u001b[39;00m\n\u001b[1;32m    219\u001b[0m     msg \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39msub(\n\u001b[1;32m    220\u001b[0m         \u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparameter of \u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mw+ must be\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    221\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparameter of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__qualname__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m must be\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    222\u001b[0m         \u001b[38;5;28mstr\u001b[39m(e),\n\u001b[1;32m    223\u001b[0m     )\n",
+      "File \u001b[0;32m~/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/sklearn/metrics/_ranking.py:640\u001b[0m, in \u001b[0;36mroc_auc_score\u001b[0;34m(y_true, y_score, average, sample_weight, max_fpr, multi_class, labels)\u001b[0m\n\u001b[1;32m    638\u001b[0m     labels \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39munique(y_true)\n\u001b[1;32m    639\u001b[0m     y_true \u001b[38;5;241m=\u001b[39m label_binarize(y_true, classes\u001b[38;5;241m=\u001b[39mlabels)[:, \u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m--> 640\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_average_binary_score\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    641\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpartial\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_binary_roc_auc_score\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_fpr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmax_fpr\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    642\u001b[0m \u001b[43m        \u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    643\u001b[0m \u001b[43m        \u001b[49m\u001b[43my_score\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    644\u001b[0m \u001b[43m        \u001b[49m\u001b[43maverage\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    645\u001b[0m \u001b[43m        \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    646\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    647\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:  \u001b[38;5;66;03m# multilabel-indicator\u001b[39;00m\n\u001b[1;32m    648\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m _average_binary_score(\n\u001b[1;32m    649\u001b[0m         partial(_binary_roc_auc_score, max_fpr\u001b[38;5;241m=\u001b[39mmax_fpr),\n\u001b[1;32m    650\u001b[0m         y_true,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    653\u001b[0m         sample_weight\u001b[38;5;241m=\u001b[39msample_weight,\n\u001b[1;32m    654\u001b[0m     )\n",
+      "File \u001b[0;32m~/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/sklearn/metrics/_base.py:76\u001b[0m, in \u001b[0;36m_average_binary_score\u001b[0;34m(binary_metric, y_true, y_score, average, sample_weight)\u001b[0m\n\u001b[1;32m     73\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;124m format is not supported\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(y_type))\n\u001b[1;32m     75\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y_type \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbinary\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m---> 76\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mbinary_metric\u001b[49m\u001b[43m(\u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_score\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     78\u001b[0m check_consistent_length(y_true, y_score, sample_weight)\n\u001b[1;32m     79\u001b[0m y_true \u001b[38;5;241m=\u001b[39m check_array(y_true)\n",
+      "File \u001b[0;32m~/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/sklearn/metrics/_ranking.py:382\u001b[0m, in \u001b[0;36m_binary_roc_auc_score\u001b[0;34m(y_true, y_score, sample_weight, max_fpr)\u001b[0m\n\u001b[1;32m    380\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Binary roc auc score.\"\"\"\u001b[39;00m\n\u001b[1;32m    381\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(np\u001b[38;5;241m.\u001b[39munique(y_true)) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[0;32m--> 382\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    383\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOnly one class present in y_true. ROC AUC score \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    384\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mis not defined in that case.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    385\u001b[0m     )\n\u001b[1;32m    387\u001b[0m fpr, tpr, _ \u001b[38;5;241m=\u001b[39m roc_curve(y_true, y_score, sample_weight\u001b[38;5;241m=\u001b[39msample_weight)\n\u001b[1;32m    388\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m max_fpr \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m max_fpr \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n",
+      "\u001b[0;31mValueError\u001b[0m: Only one class present in y_true. ROC AUC score is not defined in that case."
+     ]
+    }
+   ],
+   "source": [
+    "model = self.get_model(mdata)\n",
+    "model_path = self._minify_and_save_model(model, mdata)\n",
+    "hub_model = self._create_hub_model(model_path)\n",
+    "hub_model = self._upload_hub_model(hub_model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO     File ../mini_totalvi/model.pt already downloaded                                                          \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/cane/Documents/scvi-tools/src/scvi/model/base/_save_load.py:76: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+      "  model = torch.load(model_path, map_location=map_location)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "INFO     File ../mini_totalvi/model.pt already downloaded                                                          \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/cane/Documents/scvi-tools/src/scvi/model/base/_save_load.py:76: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
+      "  model = torch.load(model_path, map_location=map_location)\n",
+      "No files have been modified since last commit. Skipping to prevent empty commit.\n",
+      "No files have been modified since last commit. Skipping to prevent empty commit.\n"
+     ]
+    }
+   ],
+   "source": [
+    "hub_model = self._create_hub_model(model_path)\n",
+    "hub_model = self._upload_hub_model(hub_model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/cane/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/mudata/_core/mudata.py:1531: FutureWarning: From 0.4 .update() will not pull obs/var columns from individual modalities by default anymore. Set mudata.set_options(pull_on_update=False) to adopt the new behaviour, which will become the default. Use new pull_obs/pull_var and push_obs/push_var methods for more flexibility.\n",
+      "  self._update_attr(\"var\", axis=0, join_common=join_common)\n",
+      "/home/cane/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/mudata/_core/mudata.py:1429: FutureWarning: From 0.4 .update() will not pull obs/var columns from individual modalities by default anymore. Set mudata.set_options(pull_on_update=False) to adopt the new behaviour, which will become the default. Use new pull_obs/pull_var and push_obs/push_var methods for more flexibility.\n",
+      "  self._update_attr(\"obs\", axis=1, join_common=join_common)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "
MuData object with n_obs × n_vars = 72042 × 16104\n",
+       "  obs:\t'_scvi_labels', 'cv_cell'\n",
+       "  var:\t'cv_gene'\n",
+       "  uns:\t'_scvi_uuid', '_scvi_manager_uuid'\n",
+       "  2 modalities\n",
+       "    rna:\t72042 x 15993\n",
+       "      obs:\t'percent_mito', 'n_counts', 'n_genes', 'n_protein_counts', 'n_proteins', 'leiden_totalVI_res1.4', 'leiden_totalVI_res1.0', 'leiden_totalVI_res0.6', 'annotations_clean', 'mean_pseudotime', 'Pseudotime_bin', 'curve1', 'curve2', 'difference', 'weight_curve1', 'weight_curve2', 'UMIs_RNA', 'UMIs_protein', 'n_genes_pt', 'n_proteins_pt', 'percent_mito_pt', 'Experiment', 'slingshot_clusters', 'organism_ontology_term_id', 'disease_ontology_term_id', 'sex_ontology_term_id', 'tissue_type', 'tissue_ontology_term_id', 'suspension_type', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'is_primary_data', 'development_stage_ontology_term_id', 'batch_indices', 'sample_id', 'Location', 'donor_id', 'sample_weeks', 'genotype', 'Lineage_by_genotypeSlingshot', 'Lineage_by_genotype', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'observation_joinid', '_scvi_batch'\n",
+       "      var:\t'gene_id', 'gene_name', 'expression_type', 'n_cells', 'feature_is_filtered', 'feature_name', 'feature_reference', 'feature_biotype', 'feature_length', 'feature_type', 'n_counts'\n",
+       "      uns:\t'AB_adata', 'annotations_clean_colors', 'batch_condition', 'batch_indices_colors', 'citation', 'leiden', 'neighbors', 'protein_names', 'schema_reference', 'schema_version', 'title', 'totalVI_genes', 'totalVI_proteins', 'umap'\n",
+       "      obsm:\t'X_totalVI', 'X_umap', 'denoised_genes', 'denoised_proteins'\n",
+       "      layers:\t'counts'\n",
+       "    protein:\t72042 x 111\n",
+       "      obs:\t'percent_mito', 'n_counts', 'n_genes', 'n_protein_counts', 'n_proteins', 'leiden_totalVI_res1.4', 'leiden_totalVI_res1.0', 'leiden_totalVI_res0.6', 'annotations_clean', 'mean_pseudotime', 'Pseudotime_bin', 'curve1', 'curve2', 'difference', 'weight_curve1', 'weight_curve2', 'UMIs_RNA', 'UMIs_protein', 'n_genes_pt', 'n_proteins_pt', 'percent_mito_pt', 'Experiment', 'slingshot_clusters', 'organism_ontology_term_id', 'disease_ontology_term_id', 'sex_ontology_term_id', 'tissue_type', 'tissue_ontology_term_id', 'suspension_type', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'is_primary_data', 'development_stage_ontology_term_id', 'batch_indices', 'sample_id', 'Location', 'donor_id', 'sample_weeks', 'genotype', 'Lineage_by_genotypeSlingshot', 'Lineage_by_genotype', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'observation_joinid', '_scvi_batch'
" + ], + "text/plain": [ + "MuData object with n_obs × n_vars = 72042 × 16104\n", + " obs:\t'_scvi_labels', 'cv_cell'\n", + " var:\t'cv_gene'\n", + " uns:\t'_scvi_uuid', '_scvi_manager_uuid'\n", + " 2 modalities\n", + " rna:\t72042 x 15993\n", + " obs:\t'percent_mito', 'n_counts', 'n_genes', 'n_protein_counts', 'n_proteins', 'leiden_totalVI_res1.4', 'leiden_totalVI_res1.0', 'leiden_totalVI_res0.6', 'annotations_clean', 'mean_pseudotime', 'Pseudotime_bin', 'curve1', 'curve2', 'difference', 'weight_curve1', 'weight_curve2', 'UMIs_RNA', 'UMIs_protein', 'n_genes_pt', 'n_proteins_pt', 'percent_mito_pt', 'Experiment', 'slingshot_clusters', 'organism_ontology_term_id', 'disease_ontology_term_id', 'sex_ontology_term_id', 'tissue_type', 'tissue_ontology_term_id', 'suspension_type', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'is_primary_data', 'development_stage_ontology_term_id', 'batch_indices', 'sample_id', 'Location', 'donor_id', 'sample_weeks', 'genotype', 'Lineage_by_genotypeSlingshot', 'Lineage_by_genotype', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'observation_joinid', '_scvi_batch'\n", + " var:\t'gene_id', 'gene_name', 'expression_type', 'n_cells', 'feature_is_filtered', 'feature_name', 'feature_reference', 'feature_biotype', 'feature_length', 'feature_type', 'n_counts'\n", + " uns:\t'AB_adata', 'annotations_clean_colors', 'batch_condition', 'batch_indices_colors', 'citation', 'leiden', 'neighbors', 'protein_names', 'schema_reference', 'schema_version', 'title', 'totalVI_genes', 'totalVI_proteins', 'umap'\n", + " obsm:\t'X_totalVI', 'X_umap', 'denoised_genes', 'denoised_proteins'\n", + " layers:\t'counts'\n", + " protein:\t72042 x 111\n", + " obs:\t'percent_mito', 'n_counts', 'n_genes', 'n_protein_counts', 'n_proteins', 'leiden_totalVI_res1.4', 'leiden_totalVI_res1.0', 'leiden_totalVI_res0.6', 'annotations_clean', 'mean_pseudotime', 'Pseudotime_bin', 'curve1', 'curve2', 'difference', 'weight_curve1', 'weight_curve2', 'UMIs_RNA', 'UMIs_protein', 'n_genes_pt', 'n_proteins_pt', 'percent_mito_pt', 'Experiment', 'slingshot_clusters', 'organism_ontology_term_id', 'disease_ontology_term_id', 'sex_ontology_term_id', 'tissue_type', 'tissue_ontology_term_id', 'suspension_type', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'self_reported_ethnicity_ontology_term_id', 'is_primary_data', 'development_stage_ontology_term_id', 'batch_indices', 'sample_id', 'Location', 'donor_id', 'sample_weeks', 'genotype', 'Lineage_by_genotypeSlingshot', 'Lineage_by_genotype', 'cell_type', 'assay', 'disease', 'organism', 'sex', 'tissue', 'self_reported_ethnicity', 'development_stage', 'observation_joinid', '_scvi_batch'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mdata.update()\n", + "mdata['protein'].obs = mdata['rna'].obs\n", + "mdata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['rna', 'protein']\n" + ] + }, + { + "ename": "KeyError", + "evalue": "\"Values ['rna', 'protein'], from ['rna', 'protein'], are not valid obs/ var names or indices.\"", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[19], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m model_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_minify_and_save_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmdata\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Documents/scvi-tools/scvi-hub-models/src/scvi_hub_models/models/_base_workflow.py:267\u001b[0m, in \u001b[0;36mBaseModelWorkflow._minify_and_save_model\u001b[0;34m(self, model, adata)\u001b[0m\n\u001b[1;32m 265\u001b[0m os\u001b[38;5;241m.\u001b[39mmakedirs(mini_model_path)\n\u001b[1;32m 266\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcreate_criticism_report\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mTrue\u001b[39;00m) \u001b[38;5;129;01mand\u001b[39;00m model\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;129;01min\u001b[39;00m SUPPORTED_PPC_MODELS:\n\u001b[0;32m--> 267\u001b[0m \u001b[43mcreate_criticism_report\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 268\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 269\u001b[0m \u001b[43m \u001b[49m\u001b[43msave_folder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmini_model_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 270\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_samples\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcriticism_settings\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mn_samples\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[43m \u001b[49m\u001b[43mlabel_key\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcriticism_settings\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcell_type_key\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 272\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mminify_model\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mTrue\u001b[39;00m) \u001b[38;5;129;01mand\u001b[39;00m model\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m \u001b[38;5;129;01min\u001b[39;00m SUPPORTED_MINIFIED_MODELS:\n\u001b[1;32m 275\u001b[0m qzm_key \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmodel_name\u001b[38;5;241m.\u001b[39mlower()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_latent_qzm\u001b[39m\u001b[38;5;124m\"\u001b[39m\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/criticism/_create_criticism_report.py:76\u001b[0m, in \u001b[0;36mcreate_criticism_report\u001b[0;34m(model, adata, skip_metrics, n_samples, label_key, save_folder)\u001b[0m\n\u001b[1;32m 74\u001b[0m md_cell_wise_cv, md_gene_wise_cv, md_de \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 75\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m [modalities]:\n\u001b[0;32m---> 76\u001b[0m md_cell_wise_cv_, md_gene_wise_cv_, md_de_ \u001b[38;5;241m=\u001b[39m \u001b[43mcompute_metrics\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 77\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43madata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mskip_metrics\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_samples\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabel_key\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodality\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mi\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 78\u001b[0m md_cell_wise_cv \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModality: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mi\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m md_cell_wise_cv_ \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 79\u001b[0m md_gene_wise_cv \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModality: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mi\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m md_gene_wise_cv_ \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/criticism/_create_criticism_report.py:99\u001b[0m, in \u001b[0;36mcompute_metrics\u001b[0;34m(model, adata, skip_metrics, n_samples, label_key, modality)\u001b[0m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_metrics\u001b[39m(model, adata, skip_metrics, n_samples, label_key, modality\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m 98\u001b[0m models_dict \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m: model}\n\u001b[0;32m---> 99\u001b[0m ppc \u001b[38;5;241m=\u001b[39m \u001b[43mPPC\u001b[49m\u001b[43m(\u001b[49m\u001b[43madata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodels_dict\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_samples\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_samples\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodality\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodality\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 100\u001b[0m \u001b[38;5;66;03m# run ppc+cv\u001b[39;00m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m METRIC_CV_CELL \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m skip_metrics:\n", + "File \u001b[0;32m~/Documents/scvi-tools/src/scvi/criticism/_ppc.py:85\u001b[0m, in \u001b[0;36mPosteriorPredictiveCheck.__init__\u001b[0;34m(self, adata, models_dict, count_layer_key, n_samples, indices, modality)\u001b[0m\n\u001b[1;32m 83\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(adata, MuData):\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m modality \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModality must be defined for MuData.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m---> 85\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39madata \u001b[38;5;241m=\u001b[39m \u001b[43madata\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmodality\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 86\u001b[0m raw_counts \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 87\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39madata\u001b[38;5;241m.\u001b[39mlayers[count_layer_key] \u001b[38;5;28;01mif\u001b[39;00m count_layer_key \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39madata\u001b[38;5;241m.\u001b[39mX\n\u001b[1;32m 89\u001b[0m )\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[0;32m~/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/mudata/_core/mudata.py:516\u001b[0m, in \u001b[0;36mMuData.__getitem__\u001b[0;34m(self, index)\u001b[0m\n\u001b[1;32m 514\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmod[index]\n\u001b[1;32m 515\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 516\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mMuData\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mas_view\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/mudata/_core/mudata.py:166\u001b[0m, in \u001b[0;36mMuData.__init__\u001b[0;34m(self, data, feature_types_names, as_view, index, **kwargs)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_init_common()\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m as_view:\n\u001b[0;32m--> 166\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_init_as_view\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m 169\u001b[0m \u001b[38;5;66;03m# Add all modalities to a MuData object\u001b[39;00m\n", + "File \u001b[0;32m~/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/mudata/_core/mudata.py:265\u001b[0m, in \u001b[0;36mMuData._init_as_view\u001b[0;34m(self, mudata_ref, index)\u001b[0m\n\u001b[1;32m 262\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_init_as_view\u001b[39m(\u001b[38;5;28mself\u001b[39m, mudata_ref: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMuData\u001b[39m\u001b[38;5;124m\"\u001b[39m, index):\n\u001b[1;32m 263\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01manndata\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mindex\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m _normalize_indices\n\u001b[0;32m--> 265\u001b[0m obsidx, varidx \u001b[38;5;241m=\u001b[39m \u001b[43m_normalize_indices\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmudata_ref\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mobs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmudata_ref\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvar\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 267\u001b[0m \u001b[38;5;66;03m# to handle single-element subsets, otherwise when subsetting a Dataframe\u001b[39;00m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;66;03m# we get a Series\u001b[39;00m\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(obsidx, Integral):\n", + "File \u001b[0;32m~/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/anndata/_core/index.py:32\u001b[0m, in \u001b[0;36m_normalize_indices\u001b[0;34m(index, names0, names1)\u001b[0m\n\u001b[1;32m 30\u001b[0m index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mtuple\u001b[39m(i\u001b[38;5;241m.\u001b[39mvalues \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(i, pd\u001b[38;5;241m.\u001b[39mSeries) \u001b[38;5;28;01melse\u001b[39;00m i \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m index)\n\u001b[1;32m 31\u001b[0m ax0, ax1 \u001b[38;5;241m=\u001b[39m unpack_index(index)\n\u001b[0;32m---> 32\u001b[0m ax0 \u001b[38;5;241m=\u001b[39m \u001b[43m_normalize_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43max0\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnames0\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 33\u001b[0m ax1 \u001b[38;5;241m=\u001b[39m _normalize_index(ax1, names1)\n\u001b[1;32m 34\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ax0, ax1\n", + "File \u001b[0;32m~/.local/share/hatch/env/virtual/scvi-tools/eVVa01t5/scvi-tools/lib/python3.12/site-packages/anndata/_core/index.py:99\u001b[0m, in \u001b[0;36m_normalize_index\u001b[0;34m(indexer, index)\u001b[0m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m np\u001b[38;5;241m.\u001b[39many(positions \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m0\u001b[39m):\n\u001b[1;32m 98\u001b[0m not_found \u001b[38;5;241m=\u001b[39m indexer[positions \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m0\u001b[39m]\n\u001b[0;32m---> 99\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\n\u001b[1;32m 100\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mValues \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlist\u001b[39m(not_found)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, from \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlist\u001b[39m(indexer)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mare not valid obs/ var names or indices.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 102\u001b[0m )\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m positions \u001b[38;5;66;03m# np.ndarray[int]\u001b[39;00m\n\u001b[1;32m 104\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIndexError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnknown indexer \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mindexer\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m of type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(indexer)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mKeyError\u001b[0m: \"Values ['rna', 'protein'], from ['rna', 'protein'], are not valid obs/ var names or indices.\"" + ] + } + ], + "source": [ + "model_path = self._minify_and_save_model(model, mdata)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hub_model = self._create_hub_model(model_path)\n", + "hub_model = self._upload_hub_model(hub_model)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "scvi-tools", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}