From e6847905383a1b9ca4ba948d6cb1be31dade1252 Mon Sep 17 00:00:00 2001
From: mbowcut2 <55161542+mbowcut2@users.noreply.github.com>
Date: Thu, 14 Mar 2024 15:11:00 -0600
Subject: [PATCH] Bug Fix - 367 (#35)

* - Fixed references to ref_names_for_pe

* removed extra tabs

* trying to match empty line, no tabs

* - changed references to ref_names[0]

* Mckay/pd warnings (#45)

* refactor errors='ignore' to try except

* refactored integer slice to iloc[]

* moved to_numeric try except to function

* Refactor to_numeric_ignore_errors to to_numeric_ignore_columns

This change is slightly cleaner because it addresses the root issue that some
columns are strings (and can therefore not be converted to numeric types). Now
if an error does occur when converting the dfs to numeric types it won't be
swallowed up.

* Add documentation to to_numeric_ignore_columns

---------

Co-authored-by: Cole Lyman <cole@colelyman.com>

---------

Co-authored-by: Cole Lyman <cole@colelyman.com>
---
 CRISPResso2/CRISPRessoCORE.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py
index 60402b46..52eaff43 100644
--- a/CRISPResso2/CRISPRessoCORE.py
+++ b/CRISPResso2/CRISPRessoCORE.py
@@ -4791,14 +4791,14 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq):
                     mod_pcts.append(np.concatenate(([ref_name, 'Deletions'], np.array(ref1_all_deletion_count_vectors[ref_name]).astype(float)/tot)))
                     mod_pcts.append(np.concatenate(([ref_name, 'Substitutions'], np.array(ref1_all_substitution_count_vectors[ref_name]).astype(float)/tot)))
                     mod_pcts.append(np.concatenate(([ref_name, 'All_modifications'], np.array(ref1_all_indelsub_count_vectors[ref_name]).astype(float)/tot)))
-                    mod_pcts.append(np.concatenate(([ref_name, 'Total'], [counts_total[ref_name]]*refs[ref_names_for_pe[0]]['sequence_length'])))
-                colnames = ['Batch', 'Modification']+list(refs[ref_names_for_pe[0]]['sequence'])
+                    mod_pcts.append(np.concatenate(([ref_name, 'Total'], [counts_total[ref_name]]*refs[ref_names[0]]['sequence_length'])))
+                colnames = ['Batch', 'Modification']+list(refs[ref_names[0]]['sequence'])
                 pe_modification_percentage_summary_df = to_numeric_ignore_columns(pd.DataFrame(mod_pcts, columns=colnames), {'Batch', 'Modification'})
 
-                sgRNA_intervals = refs[ref_names_for_pe[0]]['sgRNA_intervals']
-                sgRNA_names = refs[ref_names_for_pe[0]]['sgRNA_names']
-                sgRNA_mismatches = refs[ref_names_for_pe[0]]['sgRNA_mismatches']
-                include_idxs_list = refs[ref_names_for_pe[0]]['include_idxs']
+                sgRNA_intervals = refs[ref_names[0]]['sgRNA_intervals']
+                sgRNA_names = refs[ref_names[0]]['sgRNA_names']
+                sgRNA_mismatches = refs[ref_names[0]]['sgRNA_mismatches']
+                include_idxs_list = refs[ref_names[0]]['include_idxs']
 
                 plot_root = _jp('11a.Prime_editing_nucleotide_percentage_quilt')
                 pro_output_name = f'plot_{os.path.basename(plot_root)}.json'