From 7f0ec8762d40339940005de72f66ea8d36a89de2 Mon Sep 17 00:00:00 2001 From: gwaygenomics Date: Wed, 10 Oct 2018 10:33:06 -0400 Subject: [PATCH 1/2] make sure columns are dropped --- 2.evaluate-classifier-pdx.ipynb | 188 ++++++++++++++++++++++++--- scripts/2.evaluate-classifier-pdx.py | 4 +- 2 files changed, 172 insertions(+), 20 deletions(-) diff --git a/2.evaluate-classifier-pdx.ipynb b/2.evaluate-classifier-pdx.ipynb index ff17113..92d6679 100644 --- a/2.evaluate-classifier-pdx.ipynb +++ b/2.evaluate-classifier-pdx.ipynb @@ -252,8 +252,8 @@ "ALK 18\n", "KRAS 16\n", "NF1 12\n", - "DMD 10\n", "NRAS 10\n", + "DMD 10\n", "CIC 8\n", "SOS1 7\n", "HRAS 4\n", @@ -281,13 +281,13 @@ "text/plain": [ "Missense_Mutation 112\n", "Shallow_Del 17\n", - "Fusion 13\n", "Nonsense_Mutation 13\n", + "Fusion 13\n", "Frame_Shift_Del 5\n", - "Frame_Shift_Ins 3\n", "Splice_Site 3\n", - "Deep_Del 1\n", + "Frame_Shift_Ins 3\n", "In_Frame_Del 1\n", + "Deep_Del 1\n", "Name: Variant_Classification, dtype: int64" ] }, @@ -309,8 +309,8 @@ "data": { "text/plain": [ "Osteosarcoma 45\n", - "BCP-ALL 26\n", "Neuroblastoma 26\n", + "BCP-ALL 26\n", "Glioblastoma 13\n", "Ewing Sarcoma 12\n", "Ph-likeALL 7\n", @@ -318,15 +318,15 @@ "MLL-ALL 4\n", "Medulloblastoma 4\n", "Alveolar Rhabdomyosarcoma 4\n", - "Wilms 4\n", "T-ALL 4\n", - "Astrocytoma 3\n", + "Wilms 4\n", "ATRT 3\n", - "ETP-ALL 2\n", + "Astrocytoma 3\n", "Other Sarcoma 2\n", + "ETP-ALL 2\n", "Colon Carcinoma 1\n", - "Small Cell Carcinoma 1\n", "Rhabdoid 1\n", + "Small Cell Carcinoma 1\n", "Name: Histology.Detailed, dtype: int64" ] }, @@ -902,7 +902,155 @@ "cell_type": "code", "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModelALKBRAFCICDMDHRASKRASNF1NRASPTPN11SOS1TP53Histology_Fullras_status
0ALL-0310000000000MLL-ALL1
1ALL-0700000100000BCP-ALL1
2ALL-0800010000000T-ALL1
3ALL-10800000100000Ph-likeALL1
4ALL-11500000000001BCP-ALL0
\n", + "
" + ], + "text/plain": [ + " Model ALK BRAF CIC DMD HRAS KRAS NF1 NRAS PTPN11 SOS1 TP53 \\\n", + "0 ALL-03 1 0 0 0 0 0 0 0 0 0 0 \n", + "1 ALL-07 0 0 0 0 0 1 0 0 0 0 0 \n", + "2 ALL-08 0 0 0 1 0 0 0 0 0 0 0 \n", + "3 ALL-108 0 0 0 0 0 1 0 0 0 0 0 \n", + "4 ALL-115 0 0 0 0 0 0 0 0 0 0 1 \n", + "\n", + " Histology_Full ras_status \n", + "0 MLL-ALL 1 \n", + "1 BCP-ALL 1 \n", + "2 T-ALL 1 \n", + "3 Ph-likeALL 1 \n", + "4 BCP-ALL 0 " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Ras Pathway Alterations\n", "ras_genes = ['ALK', 'NF1', 'PTPN11', 'BRAF', 'CIC', 'KRAS', 'HRAS', 'NRAS', 'DMD', 'SOS1']\n", @@ -911,7 +1059,9 @@ " full_status_df\n", " .assign(ras_status = full_status_df.loc[:, ras_genes]\n", " .max(axis='columns'))\n", - ")" + ")\n", + "\n", + "full_status_df.head()" ] }, { @@ -1103,22 +1253,22 @@ "T-ALL 19\n", "Ph-likeALL 15\n", "Wilms 13\n", - "Ewing Sarcoma 10\n", "MLL-ALL 10\n", + "Ewing Sarcoma 10\n", "ATRT 8\n", + "Alveolar Rhabdomyosarcoma 7\n", "Embryonal Rhabdomyosarcoma 7\n", "PNET 7\n", - "Alveolar Rhabdomyosarcoma 7\n", "ETP-ALL 6\n", - "Glioblastoma 6\n", "Ependymoma 6\n", + "Glioblastoma 6\n", "Rhabdoid 5\n", - "Other Sarcoma 3\n", "Ph+-ALL 3\n", + "Other Sarcoma 3\n", "DIPG 2\n", + "Astrocytoma 2\n", "Small Cell Carcinoma 2\n", "Other Renal 2\n", - "Astrocytoma 2\n", "CNS germinoma 1\n", "Colon Carcinoma 1\n", "Name: Histology-Detailed, dtype: int64" @@ -1992,7 +2142,7 @@ "scores_file = os.path.join(\"results\", \"classifier_scores_with_clinical_and_alterations.tsv\")\n", "genes = ras_genes + ['TP53']\n", "\n", - "scores_df.drop(['Model_x', 'Model_y', 'Histology_Full'], axis='columns')\n", + "scores_df = scores_df.drop(['Model_x', 'Model_y', 'Histology_Full'], axis='columns')\n", "scores_df[genes] = scores_df[genes].fillna(value=0)\n", "\n", "scores_df.sort_values(by='sample_id').to_csv(scores_file, sep='\\t', index=False)" @@ -2020,9 +2170,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:target-classification]", + "display_name": "Python [conda env:expression-classification]", "language": "python", - "name": "conda-env-target-classification-py" + "name": "conda-env-expression-classification-py" }, "language_info": { "codemirror_mode": { diff --git a/scripts/2.evaluate-classifier-pdx.py b/scripts/2.evaluate-classifier-pdx.py index dee2f18..074a9e1 100644 --- a/scripts/2.evaluate-classifier-pdx.py +++ b/scripts/2.evaluate-classifier-pdx.py @@ -134,6 +134,8 @@ .max(axis='columns')) ) +full_status_df.head() + # ## Load Clinical Data Information # @@ -437,7 +439,7 @@ scores_file = os.path.join("results", "classifier_scores_with_clinical_and_alterations.tsv") genes = ras_genes + ['TP53'] -scores_df.drop(['Model_x', 'Model_y', 'Histology_Full'], axis='columns') +scores_df = scores_df.drop(['Model_x', 'Model_y', 'Histology_Full'], axis='columns') scores_df[genes] = scores_df[genes].fillna(value=0) scores_df.sort_values(by='sample_id').to_csv(scores_file, sep='\t', index=False) From aba81444f0754f729eedd58ce1034c816bd30392 Mon Sep 17 00:00:00 2001 From: gwaygenomics Date: Wed, 10 Oct 2018 10:33:37 -0400 Subject: [PATCH 2/2] rerun pipeline and refresh results --- figures/NF1_confidence_scores.pdf | Bin 19722 -> 19722 bytes figures/NF1_predictions.pdf | Bin 19136 -> 19136 bytes figures/Ras_confidence_scores.pdf | Bin 28608 -> 28608 bytes figures/Ras_predictions.pdf | Bin 18118 -> 18118 bytes figures/TP53_confidence_scores.pdf | Bin 20832 -> 20832 bytes ...53_osteosarcoma_variant_classification.pdf | Bin 19759 -> 19759 bytes figures/TP53_predictions.pdf | Bin 18444 -> 18444 bytes figures/TP53_variant_classification.pdf | Bin 24747 -> 24747 bytes figures/classifier_precision_recall_curve.pdf | Bin 17155 -> 17155 bytes figures/classifier_roc_curve.pdf | Bin 15950 -> 15950 bytes figures/histology_NF1_predictions.pdf | Bin 38737 -> 38737 bytes figures/histology_Ras_predictions.pdf | Bin 41067 -> 41067 bytes figures/histology_TP53_predictions.pdf | Bin 41317 -> 41317 bytes html/2.evaluate-classifier-pdx.html | 171 ++++++++++++++++-- html/3.explore-variants.html | 8 +- 15 files changed, 161 insertions(+), 18 deletions(-) diff --git a/figures/NF1_confidence_scores.pdf b/figures/NF1_confidence_scores.pdf index e6aba3ebee00dfe47797b1281e7e48176d584c8c..959aa18131b0b4929c827a2ec5c9cc48b52151d8 100644 GIT binary patch delta 23 ecmeC0#n?5AaRavphoOO?fsvV^>1JV%?<@dTat7}J delta 23 ecmeC0#n?5AaRavphk=Qqsey^9@n&IwmGQt-#tk!FISdU94UEhT%r`G|m1hM2XYU7* delta 23 fcmX>wmGQt-#tk!FISfn;O$|&;OgArdm1hM2Xfg+v diff --git a/figures/Ras_confidence_scores.pdf b/figures/Ras_confidence_scores.pdf index 5234b894ffa235093f354833272d3fedc2b1b9fe..474fcd5edc8f6b08d02595bbe0cd1e01f90d9602 100644 GIT binary patch delta 23 fcmX?bpYgzb#tmOHI1CL84UEhTO*a3{ILrnBeys_{ delta 23 fcmX?bpYgzb#tmOHI1Ee-O$|&;jW_?zILrnBe)b8? diff --git a/figures/Ras_predictions.pdf b/figures/Ras_predictions.pdf index 85ada0220302ef4687847fca38b4bb6e8c78355d..be7cfc7a706632d18e6ea64fe4267eeeb1ae9141 100644 GIT binary patch delta 23 fcmX@s%XqApal?B@4nqS&10yp7^UdEK&$9pkZ+Hmc delta 23 fcmX@s%XqApal?B@4g(WIQv(wd)6L%<&$9pkZ@UQQ diff --git a/figures/TP53_confidence_scores.pdf b/figures/TP53_confidence_scores.pdf index c15833ef7cc9072849966628c239cfc8d263f7f6..86842583a1fd06cb523544145848045f3f3181f4 100644 GIT binary patch delta 23 ecmaE`i1EQ9#toJ}9EJvl21aIvrkfpo1Ivm$1DI=tOoP| diff --git a/figures/TP53_variant_classification.pdf b/figures/TP53_variant_classification.pdf index 1ec3ff5fe87e9306d27321fce5f0a8a3344aa6db..1117aa1b216e71dc54feee1cddffd6f24bdbf324 100644 GIT binary patch delta 23 fcmZ2|ka6`v#tl!RI1CL84UEhTO*g-Z+Rq9AbOi|> delta 23 fcmZ2|ka6`v#tl!RI1Ee-O$|&;O*X%Y+Rq9AbWRB+ diff --git a/figures/classifier_precision_recall_curve.pdf b/figures/classifier_precision_recall_curve.pdf index 0c18d4db0a5e18055a22a2a9bebb94cfc732f8b0..8a72266656846cc827f777031b2936bd2459e12b 100644 GIT binary patch delta 23 ecmZo}V{C3?+@NE}VQ64zU}R=sw%Nq)JPQC*pa#DH delta 23 ecmZo}V{C3?+@NE}VPIltYG7hwy4l3;JPQC*=?2CC diff --git a/figures/classifier_roc_curve.pdf b/figures/classifier_roc_curve.pdf index 226cfc3856618fc633191faf4624f16c86b6add7..c8a7efb9ab4c3fab286bfa6c1d475e6efcfc9c0a 100644 GIT binary patch delta 21 ccmX?CbFOAXvL%P1fuVtsnSt5nY|F( diff --git a/figures/histology_Ras_predictions.pdf b/figures/histology_Ras_predictions.pdf index 5051a3cc768289be64506ebd9395782acb64edb9..81d442a68434e724f4bf447ec2f543e0843be20b 100644 GIT binary patch delta 23 fcmaETfa&!CrVW3la~K*J8W@=wSZrpQkLoad Status MatrixLoad Status Matrix
Missense_Mutation    112
 Shallow_Del           17
-Nonsense_Mutation     13
 Fusion                13
+Nonsense_Mutation     13
 Frame_Shift_Del        5
 Splice_Site            3
 Frame_Shift_Ins        3
-In_Frame_Del           1
 Deep_Del               1
+In_Frame_Del           1
 Name: Variant_Classification, dtype: int64
@@ -12150,18 +12150,18 @@

Load Status MatrixExtract Gene Status.assign(ras_status = full_status_df.loc[:, ras_genes] .max(axis='columns')) ) + +full_status_df.head() +
+
+ + +
+ +
Out[12]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ModelALKBRAFCICDMDHRASKRASNF1NRASPTPN11SOS1TP53Histology_Fullras_status
0ALL-0310000000000MLL-ALL1
1ALL-0700000100000BCP-ALL1
2ALL-0800010000000T-ALL1
3ALL-10800000100000Ph-likeALL1
4ALL-11500000000001BCP-ALL0
+
+
+ +
+ +
+
+