From 3fd84d09b5353fbb5e41ad7ff3e9fbfba8f84817 Mon Sep 17 00:00:00 2001 From: Trevor Martin <60452953+trevormartinj7@users.noreply.github.com> Date: Mon, 22 Jan 2024 12:26:46 -0700 Subject: [PATCH] Failed batch runs (#33) * Reports, add reports to packages, colors, ordered pandas sort (#28) * Sort by #Reads instead of %Reads to avoid floating point errors * Fix x-axis spacing on some reports * Add break to header matching loop to prevent match statements being printed after failure * Check all headers and only error if there are unmatched values * Fix indent * Remove missing_header variable * Fix tick marks * Squashed 'CRISPResso2/CRISPRessoReports/' changes from 7d9b4e5..e18807d * X-axis tick fix on fig 6a * Fix function name from styles to config * Squashed 'CRISPResso2/CRISPRessoReports/' changes from e18807d..e9da7bf git-subtree-dir: CRISPResso2/CRISPRessoReports git-subtree-split: e9da7bff794058e1fcdb3dc9ced79871c6a30e18 * Add CRISPRessoReports to packages * Colors only with pro * changed tuple to list for matplotlib change (#31) * wgs and batch failed runs implementation * Added failed run functionality including shared function, edits to Report, and displaying with HTML and Javascript * Merge CRISPRessoReports master into failed-batch-runs * Cole's failed-batch-runs review and changes (#36) * Fix showing link to report in CLI (only show in web) * Remove styling of jumbotron The p-5 added some weird space at the top of the container, the rounded-3 did not make a difference (because there is no background), and the h-100 also did not make a difference. * Remove extra spaces at end of the line * Remove color legend from figure caption in plot 4f * Refactor fig_reports.html partial to reduce duplication * Add opacity to custom colors on allele quilt plot * Remove extra spaces * Change default color of deletion It looked too similar to `N` and was difficult to tell apart. * Refactor plot 10c, refactor displaying of figures This commit adds flexbox to the plots, this was mainly for plots 10b and 10c because their alignment was off. * Add more plots to get the correct percentages for width * Remove setting the height of the plots * Check for failed batch info before retrieving it in `make_multi_report_from_folder` * Fix extraneous whitespace in `fig_reports` partial * Only load certain resources when on web mode * Move jQuery import to bottom of the page to improve performance * Extract out report footer buttons to partial * Fix too many closing divs in batchReport.html * Refactor failed runs to be a partial * Move the failed run JS to the partial This has the benefit of keeping the relevant code close, and also prevents the error that we were running into before where `chevronIcon` wasn't found when there were no failed runs (because the element wasn't there). * Remove `report_name` id because it probably has spaces * Move existing Plotly plots to batchReport from multiReport * Fix typo in fig 11c and resize it to 40% --------- Co-authored-by: Samuel Nichols Co-authored-by: mbowcut2 <55161542+mbowcut2@users.noreply.github.com> Co-authored-by: Cole Lyman --- CRISPResso2/CRISPRessoAggregateCORE.py | 2 +- CRISPResso2/CRISPRessoBatchCORE.py | 21 +- CRISPResso2/CRISPRessoCORE.py | 41 +- CRISPResso2/CRISPRessoCompareCORE.py | 2 +- CRISPResso2/CRISPRessoMetaCORE.py | 2 +- CRISPResso2/CRISPRessoPlot.py | 121 ++- CRISPResso2/CRISPRessoPooledCORE.py | 33 +- CRISPResso2/CRISPRessoPooledWGSCompareCORE.py | 6 +- .../.github/workflows/.pylintrc | 7 + .../.github/workflows/pylint.yml | 26 + .../CRISPRessoReport.py | 371 +++++--- CRISPResso2/CRISPRessoReports/README.md | 191 ++++ CRISPResso2/CRISPRessoReports/__init__.py | 0 .../templates/CRISPResso_justcup.png | Bin .../templates/batchReport.html} | 92 +- .../templates/favicon.ico | Bin .../CRISPRessoReports/templates/layout.html | 226 +++++ .../templates/multiReport.html | 168 ++++ .../templates/pooledReport.html | 101 +++ .../CRISPRessoReports/templates/report.html | 714 +++++++++++++++ .../shared/partials/failed_runs.html | 65 ++ .../shared/partials/fig_reports.html | 34 + .../shared/partials/fig_summaries.html | 15 + .../templates/shared/partials/log_params.html | 30 + .../partials/report_footer_buttons.html | 9 + .../templates/wgsReport.html | 99 ++ CRISPResso2/CRISPRessoShared.py | 135 ++- CRISPResso2/CRISPRessoWGSCORE.py | 12 +- CRISPResso2/default_style.json | 13 + CRISPResso2/templates/layout.html | 104 --- CRISPResso2/templates/report.html | 854 ------------------ MANIFEST.in | 2 +- setup.py | 3 +- 33 files changed, 2264 insertions(+), 1235 deletions(-) create mode 100644 CRISPResso2/CRISPRessoReports/.github/workflows/.pylintrc create mode 100644 CRISPResso2/CRISPRessoReports/.github/workflows/pylint.yml rename CRISPResso2/{ => CRISPRessoReports}/CRISPRessoReport.py (68%) create mode 100644 CRISPResso2/CRISPRessoReports/README.md create mode 100644 CRISPResso2/CRISPRessoReports/__init__.py rename CRISPResso2/{ => CRISPRessoReports}/templates/CRISPResso_justcup.png (100%) rename CRISPResso2/{templates/multiReport.html => CRISPRessoReports/templates/batchReport.html} (69%) rename CRISPResso2/{ => CRISPRessoReports}/templates/favicon.ico (100%) create mode 100644 CRISPResso2/CRISPRessoReports/templates/layout.html create mode 100644 CRISPResso2/CRISPRessoReports/templates/multiReport.html create mode 100644 CRISPResso2/CRISPRessoReports/templates/pooledReport.html create mode 100644 CRISPResso2/CRISPRessoReports/templates/report.html create mode 100644 CRISPResso2/CRISPRessoReports/templates/shared/partials/failed_runs.html create mode 100644 CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_reports.html create mode 100644 CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html create mode 100644 CRISPResso2/CRISPRessoReports/templates/shared/partials/log_params.html create mode 100644 CRISPResso2/CRISPRessoReports/templates/shared/partials/report_footer_buttons.html create mode 100644 CRISPResso2/CRISPRessoReports/templates/wgsReport.html create mode 100644 CRISPResso2/default_style.json delete mode 100644 CRISPResso2/templates/layout.html delete mode 100644 CRISPResso2/templates/report.html diff --git a/CRISPResso2/CRISPRessoAggregateCORE.py b/CRISPResso2/CRISPRessoAggregateCORE.py index 748a0794..689bef98 100644 --- a/CRISPResso2/CRISPRessoAggregateCORE.py +++ b/CRISPResso2/CRISPRessoAggregateCORE.py @@ -18,7 +18,7 @@ from datetime import datetime from CRISPResso2 import CRISPRessoShared from CRISPResso2 import CRISPRessoPlot -from CRISPResso2 import CRISPRessoReport +from CRISPResso2.CRISPRessoReports import CRISPRessoReport from CRISPResso2.CRISPRessoMultiProcessing import get_max_processes, run_plot diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py index 833853f6..92e6625e 100644 --- a/CRISPResso2/CRISPRessoBatchCORE.py +++ b/CRISPResso2/CRISPRessoBatchCORE.py @@ -16,7 +16,7 @@ from CRISPResso2 import CRISPRessoShared from CRISPResso2 import CRISPRessoPlot from CRISPResso2 import CRISPRessoMultiProcessing -from CRISPResso2 import CRISPRessoReport +from CRISPResso2.CRISPRessoReports import CRISPRessoReport import logging @@ -91,6 +91,7 @@ def main(): crispresso_options_for_batch = list(crispresso_options-options_to_ignore) CRISPRessoShared.check_file(args.batch_settings) + config = CRISPRessoShared.check_custom_config(args) if args.zip_output and not args.place_report_in_output_folder: warn('Invalid arguement combination: If zip_output is True then place_report_in_output_folder must also be True. Setting place_report_in_output_folder to True.') @@ -301,12 +302,16 @@ def main(): amplicon_names = {} amplicon_counts = {} completed_batch_arr = [] + failed_batch_arr = [] + failed_batch_arr_desc = [] for idx, row in batch_params.iterrows(): batch_name = CRISPRessoShared.slugify(row["name"]) folder_name = os.path.join(OUTPUT_DIRECTORY, 'CRISPResso_on_%s' % batch_name) - run_data_file = os.path.join(folder_name, 'CRISPResso2_info.json') - if not os.path.isfile(run_data_file): - info("Skipping folder '%s'. Cannot find run data at '%s'."%(folder_name, run_data_file)) + # check if run failed + failed_run_bool, failed_status_string = CRISPRessoShared.check_if_failed_run(folder_name, info) + if failed_run_bool: + failed_batch_arr.append(batch_name) + failed_batch_arr_desc.append(failed_status_string) run_datas.append(None) continue @@ -326,6 +331,8 @@ def main(): completed_batch_arr.append(batch_name) + crispresso2_info['results']['failed_batch_arr'] = failed_batch_arr + crispresso2_info['results']['failed_batch_arr_desc'] = failed_batch_arr_desc crispresso2_info['results']['completed_batch_arr'] = completed_batch_arr # make sure amplicon names aren't super long @@ -596,6 +603,7 @@ def main(): 'save_also_png': save_png, 'sgRNA_intervals': sub_sgRNA_intervals, 'quantification_window_idxs': include_idxs, + 'custom_colors': config['colors'], } debug('Plotting nucleotide percentage quilt for amplicon {0}, sgRNA {1}'.format(amplicon_name, sgRNA)) plot( @@ -620,6 +628,7 @@ def main(): 'save_also_png': save_png, 'sgRNA_intervals': sub_sgRNA_intervals, 'quantification_window_idxs': include_idxs, + 'custom_colors': config['colors'] } debug('Plotting nucleotide conversion map for amplicon {0}, sgRNA {1}'.format(amplicon_name, sgRNA)) plot( @@ -646,6 +655,7 @@ def main(): 'save_also_png': save_png, 'sgRNA_intervals': consensus_sgRNA_intervals, 'quantification_window_idxs': include_idxs, + 'custom_colors': config['colors'], } debug('Plotting nucleotide quilt for {0}'.format(amplicon_name)) plot( @@ -669,6 +679,7 @@ def main(): 'save_also_png': save_png, 'sgRNA_intervals': consensus_sgRNA_intervals, 'quantification_window_idxs': include_idxs, + 'custom_colors': config['colors'] } debug('Plotting nucleotide conversion map for {0}'.format(amplicon_name)) plot( @@ -692,6 +703,7 @@ def main(): 'mod_pct_df': modification_percentage_summary_df, 'fig_filename_root': this_nuc_pct_quilt_plot_name, 'save_also_png': save_png, + 'custom_colors': config['colors'], } debug('Plotting nucleotide quilt for {0}'.format(amplicon_name)) plot( @@ -710,6 +722,7 @@ def main(): 'conversion_nuc_from': args.conversion_nuc_from, 'conversion_nuc_to': args.conversion_nuc_to, 'save_also_png': save_png, + 'custom_colors': config['colors'] } debug('Plotting BE nucleotide conversion map for {0}'.format(amplicon_name)) plot( diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py index efba0ead..916e54ad 100644 --- a/CRISPResso2/CRISPRessoCORE.py +++ b/CRISPResso2/CRISPRessoCORE.py @@ -28,7 +28,7 @@ from CRISPResso2 import CRISPRessoCOREResources -from CRISPResso2 import CRISPRessoReport +from CRISPResso2.CRISPRessoReports import CRISPRessoReport from CRISPResso2 import CRISPRessoShared from CRISPResso2 import CRISPRessoPlot from CRISPResso2 import CRISPResso2Align @@ -1436,7 +1436,7 @@ def rreplace(s, old, new): - def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited_seq, prime_editing_extension_seq_dna, prime_editing_pegRNA_extension_quantification_window_size, + def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited_seq, prime_editing_extension_seq_dna, prime_editing_pegRNA_extension_quantification_window_size, nicking_qw_center, nicking_qw_size,aln_matrix,needleman_wunsch_gap_open,needleman_wunsch_gap_extend, prime_editing_gap_open, prime_editing_gap_extend): """ gets prime editing guide sequences for this amplicon @@ -1451,7 +1451,7 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited aln_matrix: matrix specifying alignment substitution scores in the NCBI format needleman_wunsch_gap_open: alignment penalty assignment used to determine similarity of two sequences. needleman_wunsch_gap_extend: alignment penalty assignment used to determine similarity of two sequences. - prime_editing_gap_open: alignment penalty assignment used to determine similarity of two pegRNA components. For prime editing the gap open is usually larger while the extension penalty is lower/zero to accomodate insertions of large sequences. + prime_editing_gap_open: alignment penalty assignment used to determine similarity of two pegRNA components. For prime editing the gap open is usually larger while the extension penalty is lower/zero to accomodate insertions of large sequences. prime_editing_gap_extend: alignment penalty assignment used to determine similarity of two pegRNA components """ pe_guides = [] @@ -3368,6 +3368,7 @@ def save_count_vectors_to_file(vectors, vectorNames, refSeq, filename): info('Making Plots...') ############################################################################################################################################### save_png = True + config = CRISPRessoShared.check_custom_config(args) if args.suppress_report: save_png = False @@ -3412,6 +3413,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ else: process_pool = None process_futures = None + plot = partial( CRISPRessoMultiProcessing.run_plot, num_processes=n_processes, @@ -3597,6 +3599,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ 'sgRNA_names': sgRNA_names, 'sgRNA_mismatches': sgRNA_mismatches, 'quantification_window_idxs': include_idxs_list, + 'custom_colors': config["colors"], } debug('Plotting nucleotide quilt across amplicon') plot(CRISPRessoPlot.plot_nucleotide_quilt, plot_2a_input) @@ -3643,6 +3646,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ 'sgRNA_names': sgRNA_names, 'sgRNA_mismatches': sgRNA_mismatches, 'quantification_window_idxs': new_include_idx, + 'custom_colors': config["colors"], } debug('Plotting nucleotide distribuition around {0} for {1}'.format(sgRNA_legend, ref_name)) plot(CRISPRessoPlot.plot_nucleotide_quilt, plot_2b_input) @@ -3820,6 +3824,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ ), }, 'plot_root': plot_root, + 'custom_colors': config["colors"], 'save_also_png': save_png, } debug('Plotting amplication modifications for {0}'.format(ref_name)) @@ -3849,12 +3854,13 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ 'Mutation position distribution', ref_name, ), 'plot_root': plot_root, + 'custom_colors': config["colors"], 'save_also_png': save_png, } debug('Plotting modification frequency for {0}'.format(ref_name)) plot(CRISPRessoPlot.plot_modification_frequency, plot_4b_input) crispresso2_info['results']['refs'][ref_name]['plot_4b_root'] = os.path.basename(plot_root) - crispresso2_info['results']['refs'][ref_name]['plot_4b_caption'] = "Figure 4b: Frequency of insertions (red), deletions (purple), and substitutions (green) across the entire amplicon, including modifications outside of the quantification window." + crispresso2_info['results']['refs'][ref_name]['plot_4b_caption'] = "Figure 4b: Frequency of insertions, deletions, and substitutions across the entire amplicon, including modifications outside of the quantification window." crispresso2_info['results']['refs'][ref_name]['plot_4b_data'] = [('Modification frequency', os.path.basename(mod_count_filename))] plot_root = _jp( @@ -3877,6 +3883,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ ), 'ref_name': ref_name, 'plot_root': plot_root, + 'custom_colors': config["colors"], 'save_also_png': save_png, } debug('Plotting quantification window locations for {0}'.format(ref_name)) @@ -3885,7 +3892,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ plot_4c_input, ) crispresso2_info['results']['refs'][ref_name]['plot_4c_root'] = os.path.basename(plot_root) - crispresso2_info['results']['refs'][ref_name]['plot_4c_caption'] = "Figure 4c: Frequency of insertions (red), deletions (purple), and substitutions (green) across the entire amplicon, considering only modifications that overlap with the quantification window." + crispresso2_info['results']['refs'][ref_name]['plot_4c_caption'] = "Figure 4c: Frequency of insertions, deletions, and substitutions across the entire amplicon, considering only modifications that overlap with the quantification window." crispresso2_info['results']['refs'][ref_name]['plot_4c_data'] = [('Modification frequency in quantification window', os.path.basename(quant_window_mod_count_filename))] #Position dependent indels plot @@ -3933,6 +3940,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ 'n_total': N_TOTAL, 'ref_len': ref_len, 'ref_name': ref_names[0], + 'custom_colors': config["colors"], 'save_also_png': save_png, } if ref_name == ref_names[0]: @@ -3947,7 +3955,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ plot_4e_input['plot_title'] = 'Mutation position distribution in %s reads with reference to %s'%(ref_name, ref_names[0]) plot_4e_input['plot_root'] = plot_root crispresso2_info['results']['refs'][ref_names[0]]['plot_4f_root'] = os.path.basename(plot_root) - crispresso2_info['results']['refs'][ref_names[0]]['plot_4f_caption'] = "Figure 4f: Positions of modifications in HDR reads with respect to the reference sequence ("+ref_names[0]+"). Insertions: red, deletions: purple, substitutions: green. All modifications (including those outside the quantification window) are shown." + crispresso2_info['results']['refs'][ref_names[0]]['plot_4f_caption'] = f"Figure 4f: Positions of modifications in HDR reads with respect to the reference sequence ({ref_names[0]}). All modifications (including those outside the quantification window) are shown." crispresso2_info['results']['refs'][ref_names[0]]['plot_4f_data'] = [] debug('Plotting global modifications with respect to reference') plot( @@ -4002,6 +4010,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ 'quantification_window_idxs': include_idxs_list, 'sgRNA_names': sgRNA_names, 'sgRNA_mismatches': sgRNA_mismatches, + 'custom_colors': config["colors"], } debug('Plotting HDR nucleotide quilt') plot(CRISPRessoPlot.plot_nucleotide_quilt, plot_4g_input) @@ -4102,12 +4111,13 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ ref_name, ), 'plot_root': plot_root, + 'custom_colors': config["colors"], 'save_also_png': save_png, } debug('Plotting non-coding mutation positions for {0}'.format(ref_name)) plot(CRISPRessoPlot.plot_non_coding_mutations, plot_7_input) crispresso2_info['results']['refs'][ref_name]['plot_7_root'] = os.path.basename(plot_root) - crispresso2_info['results']['refs'][ref_name]['plot_7_caption'] = "Figure 7: Reads with insertions (red), deletions (purple), and substitutions (green) mapped to reference amplicon position exclusively in noncoding region/s (that is, without mutations affecting coding sequences). The predicted cleavage site is indicated by a vertical dashed line. Only sequence positions directly adjacent to insertions or directly affected by deletions or substitutions are plotted." + crispresso2_info['results']['refs'][ref_name]['plot_7_caption'] = "Figure 7: Reads with insertions, deletions, and substitutions mapped to reference amplicon position exclusively in noncoding region/s (that is, without mutations affecting coding sequences). The predicted cleavage site is indicated by a vertical dashed line. Only sequence positions directly adjacent to insertions or directly affected by deletions or substitutions are plotted." crispresso2_info['results']['refs'][ref_name]['plot_7_data'] = [] plot_root = _jp('8.'+ref_plot_name+'Potential_splice_sites_pie_chart') @@ -4141,6 +4151,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ 'fig_filename_root': fig_filename_root, 'save_also_png': save_png, 'quantification_window_idxs': include_idxs_list, + 'custom_colors': config['colors'] } debug('Plotting substitutions across reference for {0}'.format(ref_name)) plot(CRISPRessoPlot.plot_subs_across_ref, plot_10a_input) @@ -4158,7 +4169,8 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ 'Substitution frequency\nin entire amplicon', ref_name, ), 'fig_filename_root': fig_filename_root, - 'save_also_png': save_png + 'save_also_png': save_png, + 'custom_colors': config['colors'] } debug('Plotting substitution frequency barplot for {0}'.format(ref_name)) plot(CRISPRessoPlot.plot_sub_freqs, plot_10b_input) @@ -4172,7 +4184,8 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ 'alt_nuc_counts': alt_nuc_counts, 'plot_title': get_plot_title_with_ref_name('Substitution frequency\nin quantification window', ref_name), 'fig_filename_root': fig_filename_root, - 'save_also_png': save_png + 'save_also_png': save_png, + 'custom_colors': config['colors'] } debug('Plotting substitution frequency barplot in quantification window for {0}'.format(ref_name)) plot(CRISPRessoPlot.plot_sub_freqs, plot_10c_input) @@ -4263,6 +4276,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ 'sgRNA_names': sgRNA_names, 'sgRNA_mismatches': sgRNA_mismatches, 'annotate_wildtype_allele': args.annotate_wildtype_allele, + 'custom_colors': config["colors"], } debug('Plotting allele distribution around cut for {0}'.format(ref_name)) plot(CRISPRessoPlot.plot_alleles_table, plot_9_input) @@ -4349,6 +4363,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ 'conversion_nuc_from': args.conversion_nuc_from, 'fig_filename_root': fig_filename_root, 'save_also_png': save_png, + 'custom_colors': config['colors'], } debug('Plotting conversion at {0}s around the {1} for {2}'.format(args.conversion_nuc_from, sgRNA_legend, ref_name)) plot( @@ -4368,6 +4383,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ 'conversion_nuc_from': args.conversion_nuc_from, 'fig_filename_root': fig_filename_root, 'save_also_png': save_png, + 'custom_colors': config['colors'] } debug('Plotting non-reference conversion at {0}s around the {1} for {2}'.format(args.conversion_nuc_from, sgRNA_legend, ref_name)) plot( @@ -4389,7 +4405,8 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_ ), 'conversion_nuc_from': args.conversion_nuc_from, 'fig_filename_root': fig_filename_root, - 'save_also_png': save_png + 'save_also_png': save_png, + 'custom_colors': config['colors'] } debug('Plotting scaled non-reference conversion at {0}s around the {1} for {2}'.format(args.conversion_nuc_from, sgRNA_legend, ref_name)) plot( @@ -4581,6 +4598,7 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq): 'sgRNA_names': sgRNA_names, 'sgRNA_mismatches': sgRNA_mismatches, 'quantification_window_idxs': include_idxs_list, + 'custom_colors': config['colors'] } info('Plotting prime editing nucleotide percentage quilt', {'percent_complete': 96}) plot(CRISPRessoPlot.plot_nucleotide_quilt, plot_11a_input) @@ -4639,6 +4657,7 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq): 'sgRNA_names': sgRNA_names, 'sgRNA_mismatches': sgRNA_mismatches, 'quantification_window_idxs': new_include_idx, + 'custom_colors': config['colors'] } info('Plotting nucleotide quilt', {'percent_complete': 97}) plot(CRISPRessoPlot.plot_nucleotide_quilt, plot_11b_input) @@ -4659,7 +4678,7 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq): plot_11c_input, ) crispresso2_info['results']['general_plots']['plot_11c_root'] = os.path.basename(plot_root) - crispresso2_info['results']['general_plots']['plot_11c_caption'] = "Figure 11a: Scaffold insertion lengths and deletion lengths in reads that contain a scaffold insertion. 'Length matching scaffold' shows the number of basepairs immediately after the pegRNA extension sequence that exactly match the scaffold RNA sequence. 'Insertion length' shows the length of the insertion immediately after the pegRNA extension sequence (including bases that do not match the scaffold sequence)." + crispresso2_info['results']['general_plots']['plot_11c_caption'] = "Figure 11c: Scaffold insertion lengths and deletion lengths in reads that contain a scaffold insertion. 'Length matching scaffold' shows the number of basepairs immediately after the pegRNA extension sequence that exactly match the scaffold RNA sequence. 'Insertion length' shows the length of the insertion immediately after the pegRNA extension sequence (including bases that do not match the scaffold sequence)." crispresso2_info['results']['general_plots']['plot_11c_data'] = [('Scaffold insertion alleles with insertion sizes', os.path.basename(scaffold_insertion_sizes_filename))] # join plotting pool diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py index 64c77c53..fa3e0f7f 100644 --- a/CRISPResso2/CRISPRessoCompareCORE.py +++ b/CRISPResso2/CRISPRessoCompareCORE.py @@ -11,7 +11,7 @@ import argparse from CRISPResso2 import CRISPRessoShared from CRISPResso2 import CRISPRessoPlot -from CRISPResso2 import CRISPRessoReport +from CRISPResso2.CRISPRessoReports import CRISPRessoReport import logging diff --git a/CRISPResso2/CRISPRessoMetaCORE.py b/CRISPResso2/CRISPRessoMetaCORE.py index 0be6400c..e2e0b843 100644 --- a/CRISPResso2/CRISPRessoMetaCORE.py +++ b/CRISPResso2/CRISPRessoMetaCORE.py @@ -15,7 +15,7 @@ from CRISPResso2 import CRISPRessoShared from CRISPResso2 import CRISPRessoPlot from CRISPResso2 import CRISPRessoMultiProcessing -from CRISPResso2 import CRISPRessoReport +from CRISPResso2.CRISPRessoReports import CRISPRessoReport import logging diff --git a/CRISPResso2/CRISPRessoPlot.py b/CRISPResso2/CRISPRessoPlot.py index 16cf2a9c..4f19ce07 100644 --- a/CRISPResso2/CRISPRessoPlot.py +++ b/CRISPResso2/CRISPRessoPlot.py @@ -72,13 +72,30 @@ def get_nuc_color(nuc, alpha): return (charSum, (1-charSum), (2*charSum*(1-charSum))) -def get_color_lookup(nucs, alpha): - colorLookup = {} - for nuc in nucs: - colorLookup[nuc] = get_nuc_color(nuc, alpha) - return colorLookup +def get_color_lookup(nucs, alpha, custom_colors=None): + if custom_colors is None: + colorLookup = {} + for nuc in nucs: + colorLookup[nuc] = get_nuc_color(nuc, alpha) + return colorLookup + else: + get_color = lambda x, y, z: (x / 255.0, y / 255.0, z / 255.0, alpha) + colors = {} + for nuc in nucs: + if nuc == 'INS': + rgb = (193, 129, 114) + else: + rgb = hex_to_rgb(custom_colors[nuc]) + colors[nuc] = get_color(rgb[0], rgb[1], rgb[2]) + return colors + -def plot_nucleotide_quilt(nuc_pct_df,mod_pct_df,fig_filename_root,save_also_png=False,sgRNA_intervals=None,min_text_pct=0.5,max_text_pct=0.95,quantification_window_idxs=None,sgRNA_names=None,sgRNA_mismatches=None,shade_unchanged=True,group_column='Batch'): +def hex_to_rgb(value): + value = value.lstrip('#') + lv = len(value) + return tuple(int(value[i:i + lv // 3], 16) for i in range(0, lv, lv // 3)) + +def plot_nucleotide_quilt(nuc_pct_df,mod_pct_df,fig_filename_root, custom_colors, save_also_png=False,sgRNA_intervals=None,min_text_pct=0.5,max_text_pct=0.95,quantification_window_idxs=None,sgRNA_names=None,sgRNA_mismatches=None,shade_unchanged=True,group_column='Batch'): """ Plots a nucleotide quilt with each square showing the percentage of each base at that position in the reference nuc_pct_df: dataframe with percents of each base (ACTGN-) at each position @@ -111,8 +128,9 @@ def plot_nucleotide_quilt(nuc_pct_df,mod_pct_df,fig_filename_root,save_also_png= samplesList.append(nuc_pct_df.iloc[sample_row_start, 0]) # make a color map of fixed colors - color_lookup = get_color_lookup(['A', 'T', 'C', 'G', 'N', 'INS', '-'], alpha=1) - unchanged_color_lookup = get_color_lookup(['A', 'T', 'C', 'G', 'N', 'INS', '-'], alpha=0.3) + color_lookup = get_color_lookup(['A', 'T', 'C', 'G', 'N', 'INS', '-'], alpha=1, custom_colors=custom_colors) + unchanged_color_lookup = get_color_lookup(['A', 'T', 'C', 'G', 'N', 'INS', '-'], alpha=0.3, + custom_colors=custom_colors) #fig = plt.figure(figsize=(amp_len/2.0,nSamples*2)) #fig = plt.figure(figsize=(amp_len,nSamples)) @@ -497,6 +515,7 @@ def plot_amplicon_modifications( y_max, plot_titles, plot_root, + custom_colors, save_also_png=False, ): fig, ax = plt.subplots(figsize=(10, 10)) @@ -535,9 +554,9 @@ def plot_amplicon_modifications( ax.plot( all_indelsub_count_vectors, - 'r', lw=3, label=plot_titles['combined'], + color=custom_colors['Deletion'] ) if cut_points: @@ -648,6 +667,7 @@ def plot_modification_frequency( y_max, plot_title, plot_root, + custom_colors, save_also_png=False, ): fig, ax = plt.subplots(figsize=(10, 10)) @@ -685,13 +705,13 @@ def plot_modification_frequency( ax.add_patch(p) ax.plot( - all_insertion_count_vectors, 'r', lw=3, label='Insertions', + all_insertion_count_vectors, lw=3, label='Insertions', color=custom_colors['Insertion'] ) ax.plot( - all_deletion_count_vectors, 'm', lw=3, label='Deletions', + all_deletion_count_vectors, lw=3, label='Deletions', color=custom_colors['Deletion'] ) ax.plot( - all_substitution_count_vectors, 'g', lw=3, label='Substitutions', + all_substitution_count_vectors, lw=3, label='Substitutions', color=custom_colors['Substitution'] ) y_max = max( @@ -816,6 +836,7 @@ def plot_quantification_window_locations( ref_name, plot_title, plot_root, + custom_colors, save_also_png, ): fig, ax = plt.subplots(figsize=(10, 10)) @@ -859,11 +880,9 @@ def plot_quantification_window_locations( ) ax.add_patch(p) - ax.plot(insertion_count_vectors, 'r', linewidth=3, label='Insertions') - ax.plot(deletion_count_vectors, 'm', linewidth=3, label='Deletions') - ax.plot( - substitution_count_vectors, 'g', linewidth=3, label='Substitutions', - ) + ax.plot(insertion_count_vectors, linewidth=3, label='Insertions', color=custom_colors['Insertion']) + ax.plot(deletion_count_vectors, linewidth=3, label='Deletions', color=custom_colors['Deletion']) + ax.plot(substitution_count_vectors, linewidth=3, label='Substitutions', color=custom_colors['Substitution']) if cut_points: added_legend = False @@ -1089,6 +1108,7 @@ def plot_global_modifications_reference( ref_name, plot_title, plot_root, + custom_colors, save_also_png=False, ): fig, ax = plt.subplots(figsize=(10, 10)) @@ -1103,13 +1123,13 @@ def plot_global_modifications_reference( 1, ) * 1.1 - ax.plot(ref1_all_insertion_positions, 'r', linewidth=3, label='Insertions') - ax.plot(ref1_all_deletion_positions, 'm', linewidth=3, label='Deletions') + ax.plot(ref1_all_insertion_positions, linewidth=3, label='Insertions', color=custom_colors['Insertion']) + ax.plot(ref1_all_deletion_positions, linewidth=3, label='Deletions', color=custom_colors['Deletion']) ax.plot( ref1_all_substitution_positions, - 'g', linewidth=3, label='Substitutions', + color=custom_colors['Substitution'] ) ref1_cut_points = ref1['sgRNA_cut_points'] @@ -1405,10 +1425,10 @@ def plot_frameshift_frequency( ax1.bar(x - 0.1, y) ax1.set_xlim(-30.5, 30.5) ax1.set_frame_on(False) - ax1.set_xticks([idx for idx in range(-30, 31) if idx % 3]) + ax1.set_xticks([idx for idx in range(-30, 31) if idx % 3 == 0]) ax1.tick_params( which='both', # both major and minor ticks are affected - bottom=False, # ticks along the bottom edge are off + bottom=True, # ticks along the bottom edge are off top=False, # ticks along the top edge are off labelbottom=True, # labels along the bottom edge are off ) @@ -1416,7 +1436,7 @@ def plot_frameshift_frequency( xmin, xmax = ax1.get_xaxis().get_view_interval() ymin, ymax = ax1.get_yaxis().get_view_interval() ax1.set_xticklabels( - map(str, [idx for idx in range(-30, 31) if idx % 3]), + map(str, [idx for idx in range(-30, 31) if idx % 3 == 0]), rotation='vertical', ) ax1.set_title(plot_titles['fs']) @@ -1448,7 +1468,7 @@ def plot_frameshift_frequency( ax2.set_xticks([idx for idx in range(-30, 31) if (idx % 3 == 0)]) ax2.tick_params( which='both', # both major and minor ticks are affected - bottom=False, # ticks along the bottom edge are off + bottom=True, # ticks along the bottom edge are off top=False, # ticks along the top edge are off labelbottom=True, # labels along the bottom edge are off ) @@ -1536,10 +1556,11 @@ def plot_global_frameshift_in_frame_mutations( ax1.bar(x - 0.1, y) ax1.set_xlim(-30.5, 30.5) ax1.set_frame_on(False) - ax1.set_xticks([idx for idx in range(-30, 31) if idx % 3]) + ax1.set_xticks([idx for idx in range(-30, 31) if idx % 3 == 0]) ax1.tick_params( which='both', # both major and minor ticks are affected - bottom=False, # ticks along the bottom edge are off + left=True, + bottom=True, # ticks along the bottom edge are off top=False, # ticks along the top edge are off labelbottom=True, # labels along the bottom edge are off ) @@ -1547,7 +1568,7 @@ def plot_global_frameshift_in_frame_mutations( xmin, xmax = ax1.get_xaxis().get_view_interval() ymin, ymax = ax1.get_yaxis().get_view_interval() ax1.set_xticklabels( - map(str, [idx for idx in range(-30, 31) if idx % 3]), + map(str, [idx for idx in range(-30, 31) if idx % 3 == 0]), rotation='vertical', ) ax1.set_title('Global Frameshift profile') @@ -1659,26 +1680,27 @@ def plot_non_coding_mutations( sgRNA_intervals, plot_title, plot_root, + custom_colors, save_also_png=False, ): fig, ax = plt.subplots(figsize=(10, 10)) ax.plot( insertion_count_vectors_noncoding, - 'r', linewidth=3, label='Insertions', + color=custom_colors['Insertion'] ) ax.plot( deletion_count_vectors_noncoding, - 'm', linewidth=3, label='Deletions', + color=custom_colors['Deletion'] ) ax.plot( substitution_count_vectors_noncoding, - 'g', linewidth=3, label='Substitutions', + color=custom_colors['Substitution'] ) y_max = max( @@ -1974,7 +1996,7 @@ def add_sgRNA_to_ax(ax,sgRNA_intervals,sgRNA_y_start,sgRNA_y_height,amp_len,x_of else: ax.text(x_offset+min_sgRNA_x, this_sgRNA_y_start + this_sgRNA_y_height/2, 'sgRNA ', horizontalalignment='right', verticalalignment='center', fontsize=font_size) -def plot_conversion_map(nuc_pct_df,fig_filename_root,conversion_nuc_from,conversion_nuc_to,save_also_png,plotPct = 0.9,min_text_pct=0.3,max_text_pct=0.9,conversion_scale_max=None,sgRNA_intervals=None,quantification_window_idxs=None,sgRNA_names=None,sgRNA_mismatches=None): +def plot_conversion_map(nuc_pct_df,fig_filename_root,conversion_nuc_from,conversion_nuc_to,save_also_png,custom_colors,plotPct = 0.9,min_text_pct=0.3,max_text_pct=0.9,conversion_scale_max=None,sgRNA_intervals=None,quantification_window_idxs=None,sgRNA_names=None,sgRNA_mismatches=None): """ Plots a heatmap of conversion across several sequences :param nuc_pct_df combined df of multiple batches @@ -2034,7 +2056,9 @@ def plot_conversion_map(nuc_pct_df,fig_filename_root,conversion_nuc_from,convers plt.clf() # make a color map of fixed colors (for coloring reference in this example) - color_lookup = get_color_lookup(['A', 'T', 'C', 'G'], alpha=1) + color_lookup = get_color_lookup(['A', 'T', 'C', 'G', 'N', 'INS', '-'], alpha=1, custom_colors=custom_colors) + unchanged_color_lookup = get_color_lookup(['A', 'T', 'C', 'G', 'N', 'INS', '-'], alpha=0.3, + custom_colors=custom_colors) # fig = plt.figure(figsize=(amp_len/2.0,nSamples*2)) fig, ax = plt.subplots(figsize=((amp_len+10)/2.0, (nSamples+1)*2)) @@ -2144,7 +2168,7 @@ def plot_conversion_map(nuc_pct_df,fig_filename_root,conversion_nuc_from,convers plt.close(fig) -def plot_subs_across_ref(ref_len, ref_seq, ref_name, ref_count, all_substitution_base_vectors, plot_title, fig_filename_root, save_also_png, quantification_window_idxs=None): +def plot_subs_across_ref(ref_len, ref_seq, ref_name, ref_count, all_substitution_base_vectors, plot_title, fig_filename_root, save_also_png, custom_colors, quantification_window_idxs=None): """ Plots substitutions across the reference sequece - each position on the x axis reprsents a nucleotide in the reference bars at each x posion show the number of times the reference nucleotide was substituted for another reference @@ -2153,8 +2177,7 @@ def plot_subs_across_ref(ref_len, ref_seq, ref_name, ref_count, all_substitution fig, ax = plt.subplots(figsize=(16, 8)) ind = np.arange(ref_len) - alph = ['A', 'C', 'G', 'T', 'N'] - color_lookup = get_color_lookup(alph, alpha=1) + color_lookup = get_color_lookup(['A', 'T', 'C', 'G', 'N', 'INS', '-'], alpha=1, custom_colors=custom_colors) pA = ax.bar(ind, all_substitution_base_vectors[ref_name+"_A"], color=color_lookup['A']) pC = ax.bar(ind, all_substitution_base_vectors[ref_name+"_C"], color=color_lookup['C'], bottom=all_substitution_base_vectors[ref_name+"_A"]) @@ -2210,7 +2233,7 @@ def plot_subs_across_ref(ref_len, ref_seq, ref_name, ref_count, all_substitution fig.savefig(fig_filename_root + '.png', bbox_extra_artists=(lgd,), bbox_inches='tight') plt.close(fig) -def plot_sub_freqs(alt_nuc_counts, plot_title, fig_filename_root, save_also_png): +def plot_sub_freqs(alt_nuc_counts, plot_title, fig_filename_root, save_also_png, custom_colors): """ Plots histogram of substitution frequencies for each nucleotide (from nuc X to nuc Y) input: @@ -2220,8 +2243,7 @@ def plot_sub_freqs(alt_nuc_counts, plot_title, fig_filename_root, save_also_png) #plot all substitution rates fig, ax = plt.subplots(figsize=(8.3, 8)) - alph = ['A', 'C', 'G', 'T', 'N'] - color_lookup = get_color_lookup(alph, alpha=1) + color_lookup = get_color_lookup(['A', 'T', 'C', 'G', 'N', 'INS', '-'], alpha=1, custom_colors=custom_colors) ax.bar([1, 2, 3], [alt_nuc_counts['A']['C'], alt_nuc_counts['A']['G'], alt_nuc_counts['A']['T']], color=color_lookup['A']) ax.bar([5, 6, 7], [alt_nuc_counts['C']['A'], alt_nuc_counts['C']['G'], alt_nuc_counts['C']['T']], color=color_lookup['C']) @@ -2295,14 +2317,14 @@ def plot_log_nuc_freqs(df_nuc_freq,tot_aln_reads,plot_title,fig_filename_root,sa plt.close(fig) -def plot_conversion_at_sel_nucs(df_subs, ref_name, ref_sequence, plot_title, conversion_nuc_from, fig_filename_root, save_also_png): +def plot_conversion_at_sel_nucs(df_subs, ref_name, ref_sequence, plot_title, conversion_nuc_from, fig_filename_root, save_also_png, custom_colors): ''' Plots the conversion at selected nucleotides Looks for the 'conversion_nuc_from' in the ref_sequence and sets those as 'selected nucleotides' At selected nucleotides, the proportion of each base is shown as a barplot ''' nucs = list(df_subs.index) - color_lookup = get_color_lookup(nucs, alpha=1) + color_lookup = get_color_lookup(['A', 'T', 'C', 'G', 'N', 'INS', '-'], alpha=1, custom_colors=custom_colors) amp_len = len(ref_sequence) fig = plt.figure(figsize=(amp_len, 6)) @@ -2357,14 +2379,14 @@ def plot_conversion_at_sel_nucs(df_subs, ref_name, ref_sequence, plot_title, con fig.savefig(fig_filename_root+'.png', bbox_inches='tight', pad_inches=0.1) plt.close(fig) -def plot_conversion_at_sel_nucs_not_include_ref(df_subs, ref_name, ref_sequence, plot_title, conversion_nuc_from, fig_filename_root, save_also_png): +def plot_conversion_at_sel_nucs_not_include_ref(df_subs, ref_name, ref_sequence, plot_title, conversion_nuc_from, fig_filename_root, save_also_png, custom_colors): ''' Plots the conversion at selected nucleotides but ignores non-substitutions (for example at nucs that are 'C' in the reference, bars show the proportion of A T G (not C)) Looks for the 'conversion_nuc_from' in the ref_sequence and sets those as 'selected nucleotides' At selected nucleotides, the proportion of each substitution is shown as a barplot ''' nucs = list(df_subs.index) - color_lookup = get_color_lookup(nucs, alpha=1) + color_lookup = get_color_lookup(['A', 'T', 'C', 'G', 'N', 'INS', '-'], alpha=1, custom_colors=custom_colors) amp_len = len(ref_sequence) fig = plt.figure(figsize=(amp_len, 6)) @@ -2429,14 +2451,14 @@ def plot_conversion_at_sel_nucs_not_include_ref(df_subs, ref_name, ref_sequence, fig.savefig(fig_filename_root+'.png', bbox_inches='tight', pad_inches=0.1) plt.close(fig) -def plot_conversion_at_sel_nucs_not_include_ref_scaled(df_subs, ref_name, ref_sequence, plot_title, conversion_nuc_from, fig_filename_root, save_also_png): +def plot_conversion_at_sel_nucs_not_include_ref_scaled(df_subs, ref_name, ref_sequence, plot_title, conversion_nuc_from, fig_filename_root, save_also_png, custom_colors): ''' Plots the conversion at selected nucleotides not including reference base, scaled by number of events Looks for the 'conversion_nuc_from' in the ref_sequence and sets those as 'selected nucleotides' At selected nucleotides, the count of each base is shown as a barplot ''' nucs = list(df_subs.index) - color_lookup = get_color_lookup(nucs, alpha=1) + color_lookup = get_color_lookup(['A', 'T', 'C', 'G', 'N', 'INS', '-'], alpha=1, custom_colors=custom_colors) nucs.remove(conversion_nuc_from) amp_len = len(ref_sequence) @@ -2728,16 +2750,17 @@ def plot_alleles_heatmap(reference_seq,fig_filename_root,X,annot,y_labels,insert INDEL_color = get_nuc_color('N', alpha) if custom_colors is not None: + hex_alpha = '66' # this is equivalent to 40% in hexadecimal if 'A' in custom_colors: - A_color = custom_colors['A'] + A_color = custom_colors['A'] + hex_alpha if 'T' in custom_colors: - T_color = custom_colors['T'] + T_color = custom_colors['T'] + hex_alpha if 'C' in custom_colors: - C_color = custom_colors['C'] + C_color = custom_colors['C'] + hex_alpha if 'G' in custom_colors: - G_color = custom_colors['G'] + G_color = custom_colors['G'] + hex_alpha if 'N' in custom_colors: - INDEL_color = custom_colors['N'] + INDEL_color = custom_colors['N'] + hex_alpha dna_to_numbers={'-':0,'A':1,'T':2,'C':3,'G':4,'N':5} seq_to_numbers= lambda seq: [dna_to_numbers[x] for x in seq] diff --git a/CRISPResso2/CRISPRessoPooledCORE.py b/CRISPResso2/CRISPRessoPooledCORE.py index 8d9e7e35..09cb7eae 100644 --- a/CRISPResso2/CRISPRessoPooledCORE.py +++ b/CRISPResso2/CRISPRessoPooledCORE.py @@ -18,7 +18,7 @@ import zipfile from CRISPResso2 import CRISPRessoShared from CRISPResso2 import CRISPRessoMultiProcessing -from CRISPResso2 import CRISPRessoReport +from CRISPResso2.CRISPRessoReports import CRISPRessoReport from CRISPResso2 import CRISPRessoPlot import traceback @@ -663,8 +663,8 @@ def main(): lowercase_default_amplicon_headers = {h.lower(): h for h in default_input_amplicon_headers} headers = [] + unmatched_headers = [] has_header = False - has_unmatched_header_el = False for head in header_els: # Header based on header provided # Look up long name (e.g. qwc -> quantification_window_coordinates) @@ -678,21 +678,23 @@ def main(): match = difflib.get_close_matches(long_head, lowercase_default_amplicon_headers, n=1) if not match: - has_unmatched_header_el = True - warn(f'Unable to find matches for header value "{head}". Using the default header values and order.') + unmatched_headers.append(head) else: has_header = True headers.append(lowercase_default_amplicon_headers[match[0]]) if args.debug: info(f'Matching header {head} with {lowercase_default_amplicon_headers[match[0]]}.') - if not has_header or has_unmatched_header_el: + if len(headers) > 5 and not has_header: + raise CRISPRessoShared.BadParameterException('Incorrect number of columns provided without header.') + elif has_header and len(unmatched_headers) > 0: + raise CRISPRessoShared.BadParameterException('Unable to match headers: ' + str(unmatched_headers)) + + if not has_header: # Default header headers = [] for i in range(len(header_els)): headers.append(default_input_amplicon_headers[i]) - if len(headers) > 5: - raise CRISPRessoShared.BadParameterException('Incorrect number of columns provided without header.') if args.debug: info(f'Header variable names in order: {headers}') @@ -880,6 +882,23 @@ def main(): warn('Skipping amplicon [%s] because no reads align to it\n'% idx) CRISPRessoMultiProcessing.run_crispresso_cmds(crispresso_cmds, n_processes_for_pooled, 'amplicon', args.skip_failed, start_end_percent=(16, 80)) + # Initialize array to track failed runs + failed_batch_arr = [] + failed_batch_arr_desc = [] + for cmd in crispresso_cmds: + + # Extract the folder name from the CRISPResso command + folder_name_regex = re.search(r'-o\s+\S+\s+--name\s+(\S+)', cmd) + if folder_name_regex: + folder_name = os.path.join(OUTPUT_DIRECTORY, 'CRISPResso_on_%s' % folder_name_regex.group(1)) + failed_run_bool, failed_status_string = CRISPRessoShared.check_if_failed_run(folder_name, info) + if failed_run_bool: + failed_batch_arr.append(folder_name_regex.group(1)) + failed_batch_arr_desc.append(failed_status_string) + + # Store the failed runs in crispresso2_info for later use + crispresso2_info['results']['failed_batch_arr'] = failed_batch_arr + crispresso2_info['results']['failed_batch_arr_desc'] = failed_batch_arr_desc df_template['n_reads']=n_reads_aligned_amplicons df_template['n_reads_aligned_%']=df_template['n_reads']/float(N_READS_ALIGNED)*100 diff --git a/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py b/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py index 15f28624..abac3cea 100644 --- a/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py +++ b/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py @@ -11,7 +11,7 @@ import sys from CRISPResso2 import CRISPRessoShared from CRISPResso2 import CRISPRessoMultiProcessing -from CRISPResso2 import CRISPRessoReport +from CRISPResso2.CRISPRessoReports import CRISPRessoReport import traceback @@ -354,8 +354,12 @@ def main(): report_name = _jp("CRISPResso2PooledWGSCompare_report.html") else: report_name = OUTPUT_DIRECTORY+'.html' + empty_failed_runs_arr = [] + empty_failed_runs_arr_desc = [] CRISPRessoReport.make_multi_report( processed_regions, + empty_failed_runs_arr, + empty_failed_runs_arr_desc, processed_region_html_files, report_name, OUTPUT_DIRECTORY, diff --git a/CRISPResso2/CRISPRessoReports/.github/workflows/.pylintrc b/CRISPResso2/CRISPRessoReports/.github/workflows/.pylintrc new file mode 100644 index 00000000..1b892797 --- /dev/null +++ b/CRISPResso2/CRISPRessoReports/.github/workflows/.pylintrc @@ -0,0 +1,7 @@ +[FORMAT] +max-line-length=150 +max-args=15 +max-locals=40 + +[MESSAGES CONTROL] +disable = E0401, W0719 diff --git a/CRISPResso2/CRISPRessoReports/.github/workflows/pylint.yml b/CRISPResso2/CRISPRessoReports/.github/workflows/pylint.yml new file mode 100644 index 00000000..5b7e7e11 --- /dev/null +++ b/CRISPResso2/CRISPRessoReports/.github/workflows/pylint.yml @@ -0,0 +1,26 @@ +name: Pylint + +on: + push: + branches: + - '*' + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pylint + - name: Analysing the code with pylint + run: | + pylint --fail-under=9 $(git ls-files '*.py') --rcfile=/home/runner/work/CRISPRessoReports/CRISPRessoReports/.github/workflows/.pylintrc diff --git a/CRISPResso2/CRISPRessoReport.py b/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py similarity index 68% rename from CRISPResso2/CRISPRessoReport.py rename to CRISPResso2/CRISPRessoReports/CRISPRessoReport.py index 2b21db07..c1e11c7c 100644 --- a/CRISPResso2/CRISPRessoReport.py +++ b/CRISPResso2/CRISPRessoReports/CRISPRessoReport.py @@ -6,9 +6,43 @@ import os from jinja2 import Environment, FileSystemLoader +from jinja_partials import generate_render_partial, render_partial from CRISPResso2 import CRISPRessoShared +def render_template(template_name, jinja2_env, **data): + """Render a template with partials. + + Parameters + ---------- + template_name: str + The name of the template to render. For example, if you have a template + file called `templates/my_template.html` you would pass in + `my_template.html`. + jinja2_env: jinja2.Environment + The Jinja2 environment being used. + **data: keyword arguments of any type + Additional keyword arguments that are passed to the template. + + Returns + ------- + The rendered template. + """ + def custom_partial_render(partial_template_name, **partial_data): + template = jinja2_env.get_template(partial_template_name) + partial_data.update( + render_partial=generate_render_partial( + custom_partial_render, + ), + is_default_user=False, + is_web=False, + ) + return template.render(**partial_data) + return render_partial( + template_name, custom_partial_render, **data, + ) + + def make_report_from_folder(crispresso_report_file, crispresso_folder, _ROOT): """ Makes an html report for a crispresso run @@ -24,121 +58,132 @@ def make_report_from_folder(crispresso_report_file, crispresso_folder, _ROOT): run_data = CRISPRessoShared.load_crispresso_info(crispresso_folder) make_report(run_data, crispresso_report_file, crispresso_folder, _ROOT) -def make_report(run_data, crispresso_report_file, crispresso_folder, _ROOT): - #dicts for each amplicon fig_names[amp_name] = [list of fig names] - # fig_locs[amp_name][fig_name] = figure location - fig_names = {} #all except for the figure 1 (which is common to all amplicons) - fig_locs = {} - fig_titles = {} - fig_captions = {} - fig_datas = {} - sgRNA_based_fig_names = {} -# print('crispresso_report file: ' + crispresso_report_file + ' crispresso_folder : ' + crispresso_folder + ' root: ' + _ROOT) - - def add_fig_if_exists(fig_name, fig_root, fig_title, fig_caption, fig_data, - amplicon_fig_names, amplicon_fig_locs, amplicon_fig_titles, amplicon_fig_captions, amplicon_fig_datas): - """ - Helper function to add figure if the file exists - if fig at filename exists, - amplicon_figs[figname] is set to that file - """ - #fullpath=os.path.join(crispresso_folder,fig_root+'.png') - fullpath=os.path.join(crispresso_folder, fig_root+'.png') -# print('adding file ' + fig_root + ' at ' + fullpath) - if os.path.exists(fullpath): - amplicon_fig_names.append(fig_name) - #amplicon_fig_locs[fig_name]=os.path.basename(fig_root+'.png') - amplicon_fig_locs[fig_name]=os.path.basename(fig_root) - amplicon_fig_titles[fig_name] = fig_title - amplicon_fig_captions[fig_name] = fig_caption - amplicon_fig_datas[fig_name] = [] - for (data_caption, data_file) in fig_data: - if os.path.exists(os.path.join(crispresso_folder, data_file)): - amplicon_fig_datas[fig_name].append((data_caption, data_file)) - - global_fig_names= [] +def add_fig_if_exists(fig_name, fig_root, fig_title, fig_caption, fig_data, + amplicon_fig_names, amplicon_figures, crispresso_folder): + """ + Helper function to add figure if the file exists + if fig at filename exists, + amplicon_figs[figname] is set to that file + """ + # fullpath=os.path.join(crispresso_folder,fig_root+'.png') + pngfullpath = os.path.join(crispresso_folder, fig_root + '.png') + htmlfullpath = os.path.join(crispresso_folder, fig_root + '.html') + # print('adding file ' + fig_root + ' at ' + fullpath) + if os.path.exists(pngfullpath) or os.path.exists(htmlfullpath): + amplicon_fig_names.append(fig_name) + # amplicon_fig_locs[fig_name]=os.path.basename(fig_root+'.png') + amplicon_figures['locs'][fig_name] = os.path.basename(fig_root) + amplicon_figures['titles'][fig_name] = fig_title + amplicon_figures['captions'][fig_name] = fig_caption + amplicon_figures['datas'][fig_name] = [] + for (data_caption, data_file) in fig_data: + if os.path.exists(os.path.join(crispresso_folder, data_file)): + amplicon_figures['datas'][fig_name].append((data_caption, data_file)) + if os.path.exists(htmlfullpath): + with open(htmlfullpath, encoding="utf-8") as html: + html_string = "
" + html_string += html.read() + html_string += "
" + amplicon_figures['htmls'][fig_name] = html_string + + +def assemble_figs(run_data, crispresso_folder): + """ + Helper function create the data structre for the figures + """ + figures = {'names': {}, 'locs': {}, 'titles': {}, 'captions': {}, 'datas': {}, 'htmls': {}, 'sgRNA_based_names': {}} + + global_fig_names = [] for fig in ['1a', '1b', '1c', '1d', '5a', '6a', '8a', '11c']: - fig_name = 'plot_'+ fig + fig_name = 'plot_' + fig if fig_name + '_root' in run_data['results']['general_plots']: - add_fig_if_exists(fig_name, run_data['results']['general_plots'][fig_name + '_root'], 'Figure ' + fig, run_data['results']['general_plots'][fig_name + '_caption'], run_data['results']['general_plots'][fig_name+'_data'], - global_fig_names, fig_locs, fig_titles, fig_captions, fig_datas) - + add_fig_if_exists(fig_name, run_data['results']['general_plots'][fig_name + '_root'], 'Figure ' + fig, + run_data['results']['general_plots'][fig_name + '_caption'], + run_data['results']['general_plots'][fig_name + '_data'], + global_fig_names, figures, crispresso_folder) amplicons = [] for amplicon_name in run_data['results']['ref_names']: amplicons.append(amplicon_name) - amplicon_fig_names = [] - amplicon_fig_locs = {} - amplicon_fig_titles = {} - amplicon_fig_captions = {} - amplicon_fig_datas = {} - - + amplicon_figures = {'names': [], 'locs': {}, 'titles': {}, 'captions': {}, 'datas': {}, 'htmls': {}} - for fig in ['2a', '3a', '3b', '4a', '4b', '4c', '4d', '4e', '4f', '4g', '5', '6', '7', '8', '10a', '10b', '10c', '11a']: - fig_name = 'plot_'+ fig + for fig in ['2a', '3a', '3b', '4a', '4b', '4c', '4d', '4e', '4f', '4g', '5', '6', '7', '8', '10a', '10b', '10c', + '11a']: + fig_name = 'plot_' + fig if fig_name + '_root' in run_data['results']['refs'][amplicon_name]: - add_fig_if_exists(fig_name, run_data['results']['refs'][amplicon_name][fig_name + '_root'], 'Figure ' + fig_name, run_data['results']['refs'][amplicon_name][fig_name + '_caption'], run_data['results']['refs'][amplicon_name][fig_name + '_data'], - amplicon_fig_names, amplicon_fig_locs, amplicon_fig_titles, amplicon_fig_captions, amplicon_fig_datas) + add_fig_if_exists(fig_name, run_data['results']['refs'][amplicon_name][fig_name + '_root'], + 'Figure ' + fig_name, + run_data['results']['refs'][amplicon_name][fig_name + '_caption'], + run_data['results']['refs'][amplicon_name][fig_name + '_data'], + global_fig_names, amplicon_figures, crispresso_folder) this_sgRNA_based_fig_names = {} for fig in ['2b', '9', '10d', '10e', '10f', '10g', '11b']: - #fig 2b's + # fig 2b's this_fig_names = [] - if 'plot_'+fig+'_roots' in run_data['results']['refs'][amplicon_name]: - for idx, plot_root in enumerate(run_data['results']['refs'][amplicon_name]['plot_'+fig+'_roots']): - fig_name = "plot_"+fig+"_" + str(idx) - add_fig_if_exists(fig_name, plot_root, 'Figure ' + fig_name + ' sgRNA ' + str(idx+1), run_data['results']['refs'][amplicon_name]['plot_'+fig+'_captions'][idx], run_data['results']['refs'][amplicon_name]['plot_'+fig+'_datas'][idx], - this_fig_names, amplicon_fig_locs, amplicon_fig_titles, amplicon_fig_captions, amplicon_fig_datas) + if 'plot_' + fig + '_roots' in run_data['results']['refs'][amplicon_name]: + for idx, plot_root in enumerate(run_data['results']['refs'][amplicon_name]['plot_' + fig + '_roots']): + fig_name = "plot_" + fig + "_" + str(idx) + add_fig_if_exists(fig_name, plot_root, 'Figure ' + fig_name + ' sgRNA ' + str(idx + 1), + run_data['results']['refs'][amplicon_name]['plot_' + fig + '_captions'][idx], + run_data['results']['refs'][amplicon_name]['plot_' + fig + '_datas'][idx], + this_fig_names, amplicon_figures, crispresso_folder) this_sgRNA_based_fig_names[fig] = this_fig_names - fig_names[amplicon_name] = amplicon_fig_names - sgRNA_based_fig_names[amplicon_name] = this_sgRNA_based_fig_names + figures['names'][amplicon_name] = amplicon_figures['names'] + figures['sgRNA_based_names'][amplicon_name] = this_sgRNA_based_fig_names - fig_locs[amplicon_name] = amplicon_fig_locs - fig_titles[amplicon_name] = amplicon_fig_titles - fig_captions[amplicon_name] = amplicon_fig_captions - fig_datas[amplicon_name] = amplicon_fig_datas + figures['locs'][amplicon_name] = amplicon_figures['locs'] + figures['titles'][amplicon_name] = amplicon_figures['titles'] + figures['captions'][amplicon_name] = amplicon_figures['captions'] + figures['datas'][amplicon_name] = amplicon_figures['datas'] + figures['htmls'][amplicon_name] = amplicon_figures['htmls'] + data = {'amplicons': amplicons, 'figures': figures} + return data + + +def make_report(run_data, crispresso_report_file, crispresso_folder, _ROOT): + # dicts for each amplicon fig_names[amp_name] = [list of fig names] + # fig_locs[amp_name][fig_name] = figure location + # print('crispresso_report file: ' + crispresso_report_file + ' crispresso_folder : ' + crispresso_folder + ' root: ' + _ROOT) + data = assemble_figs(run_data, crispresso_folder) report_display_name = "" if run_data['running_info']['args'].name != "": report_display_name = run_data['running_info']['args'].name - - #find path between the report and the data (if the report is in another directory vs in the same directory as the data) + # find path between the report and the data (if the report is in another directory vs in the same directory as the data) crispresso_data_path = os.path.relpath(crispresso_folder, os.path.dirname(crispresso_report_file)) if crispresso_data_path == ".": crispresso_data_path = "" else: - crispresso_data_path += "/"; + crispresso_data_path += "/" report_data = { - 'amplicons': amplicons, - 'fig_names': fig_names, - 'sgRNA_based_fig_names': sgRNA_based_fig_names, - 'fig_locs': fig_locs, - 'fig_titles': fig_titles, - 'fig_captions': fig_captions, - 'fig_datas': fig_datas, + 'amplicons': data['amplicons'], + 'figures': data['figures'], 'run_data': run_data, 'report_display_name': report_display_name, 'crispresso_data_path': crispresso_data_path, } - j2_env = Environment(loader=FileSystemLoader(os.path.join(_ROOT, 'templates'))) - template = j2_env.get_template('report.html') + j2_env = Environment(loader=FileSystemLoader(os.path.join(_ROOT, 'CRISPRessoReports', 'templates'))) -# dest_dir = os.path.dirname(crispresso_report_file) -# shutil.copy2(os.path.join(_ROOT,'templates','CRISPResso_justcup.png'),dest_dir) -# shutil.copy2(os.path.join(_ROOT,'templates','favicon.ico'),dest_dir) + # dest_dir = os.path.dirname(crispresso_report_file) + # shutil.copy2(os.path.join(_ROOT,'templates','CRISPResso_justcup.png'),dest_dir) + # shutil.copy2(os.path.join(_ROOT,'templates','favicon.ico'),dest_dir) + + with open(crispresso_report_file, 'w', encoding="utf-8") as outfile: + outfile.write(render_template( + 'report.html', j2_env, report_data=report_data, + )) - outfile = open(crispresso_report_file, 'w') - outfile.write(template.render(report_data=report_data)) - outfile.close() def make_batch_report_from_folder(crispressoBatch_report_file, crispresso2_info, batch_folder, _ROOT): batch_names = crispresso2_info['results']['completed_batch_arr'] + failed_runs = crispresso2_info['results']['failed_batch_arr'] + failed_runs_desc = crispresso2_info['results']['failed_batch_arr_desc'] display_names = crispresso2_info['results']['batch_input_names'] window_nuc_pct_quilts = crispresso2_info['results']['general_plots']['window_nuc_pct_quilt_plot_names'] @@ -206,12 +251,12 @@ def make_batch_report_from_folder(crispressoBatch_report_file, crispresso2_info, allele_modification_heatmap_plot['htmls'] = {} for heatmap_plot_name, heatmap_plot_path in allele_modification_heatmap_plot['paths'].items(): - with open(heatmap_plot_path) as fh: + with open(heatmap_plot_path, encoding="utf-8") as fh: allele_modification_heatmap_plot['htmls'][heatmap_plot_name] = fh.read() allele_modification_line_plot['htmls'] = {} for line_plot_name, line_plot_path in allele_modification_line_plot['paths'].items(): - with open(line_plot_path) as fh: + with open(line_plot_path, encoding="utf-8") as fh: allele_modification_line_plot['htmls'][line_plot_name] = fh.read() #find path between the report and the data (if the report is in another directory vs in the same directory as the data) @@ -219,8 +264,7 @@ def make_batch_report_from_folder(crispressoBatch_report_file, crispresso2_info, if crispresso_data_path == ".": crispresso_data_path = "" else: - crispresso_data_path += "/"; - + crispresso_data_path += "/" sub_html_files = {} run_names = [] @@ -230,9 +274,9 @@ def make_batch_report_from_folder(crispressoBatch_report_file, crispresso2_info, crispresso_folder = os.path.join(batch_folder, sub_folder) run_data = CRISPRessoShared.load_crispresso_info(crispresso_folder) if 'running_info' not in run_data: - raise Exception('CRISPResso run %s has no report. Cannot add to batch report.'% sub_folder) + raise Exception(f'CRISPResso run {sub_folder} has no report. Cannot add to batch report.') - this_sub_html_file = sub_folder+".html" + this_sub_html_file = sub_folder + ".html" if run_data['running_info']['args'].place_report_in_output_folder: this_sub_html_file = os.path.join(sub_folder, run_data['running_info']['report_filename']) sub_html_files[display_name] = this_sub_html_file @@ -241,15 +285,18 @@ def make_batch_report_from_folder(crispressoBatch_report_file, crispresso2_info, output_title = 'CRISPResso Batch Output' if crispresso2_info['running_info']['args'].name != '': - output_title += '
{0}'.format(crispresso2_info['running_info']['args'].name) + output_title += f"
{crispresso2_info['running_info']['args'].name}" make_multi_report( run_names, + failed_runs, + failed_runs_desc, sub_html_files, crispressoBatch_report_file, batch_folder, _ROOT, output_title, + 'batch', summary_plots={ 'names': summary_plot_names, 'titles': summary_plot_titles, @@ -269,33 +316,38 @@ def make_pooled_report_from_folder(crispresso_report_file, crispresso2_info, fol names_arr = crispresso2_info['results']['good_region_names'] output_title = 'CRISPResso Pooled Output' if crispresso2_info['running_info']['args'].name != '': - output_title += '
{0}'.format(crispresso2_info['running_info']['args'].name) - make_multi_report_from_folder(crispresso2_info, names_arr, output_title, crispresso_report_file, folder, _ROOT) + output_title += f"
{crispresso2_info['running_info']['args'].name}" + make_multi_report_from_folder(crispresso2_info, names_arr, output_title, crispresso_report_file, folder, _ROOT, 'pooled') + def make_compare_report_from_folder(crispresso_report_file, crispresso2_info, folder, _ROOT): names_arr = [] output_title = 'CRISPResso Compare Output' if crispresso2_info['running_info']['args'].name != '': - output_title += '
{0}'.format(crispresso2_info['running_info']['args'].name) - make_multi_report_from_folder(crispresso2_info, names_arr, output_title, crispresso_report_file, folder, _ROOT) + output_title += "
{crispresso2_info['running_info']['args'].name}" + make_multi_report_from_folder(crispresso2_info, names_arr, output_title, crispresso_report_file, folder, _ROOT, 'compare') + def make_meta_report_from_folder(crispresso_report_file, crispresso2_info, folder, _ROOT): names_arr = crispresso2_info['meta_names_arr'] input_names = crispresso2_info['meta_input_names'] output_title = 'CRISPresso Meta Output' if crispresso2_info['running_info']['args'].name != '': - output_title += '
{0}'.format(crispresso2_info['running_info']['args'].name) - make_multi_report_from_folder(crispresso2_info, names_arr, output_title, crispresso_report_file, folder, _ROOT, display_names=input_names) + output_title += "
{crispresso2_info['running_info']['args'].name}" + make_multi_report_from_folder(crispresso2_info, names_arr, output_title, crispresso_report_file, folder, _ROOT, 'meta', + display_names=input_names) + def make_wgs_report_from_folder(crispresso_report_file, crispresso2_info, folder, _ROOT): names_arr = crispresso2_info['results']['good_region_names'] - display_names = crispresso2_info['results']['good_region_display_names'] output_title = 'CRISPResso WGS Output' if crispresso2_info['running_info']['args'].name != '': - output_title += '
{0}'.format(crispresso2_info['running_info']['args'].name) - make_multi_report_from_folder(crispresso2_info, names_arr, output_title, crispresso_report_file, folder, _ROOT, display_names) + output_title += "
{crispresso2_info['running_info']['args'].name}" + make_multi_report_from_folder(crispresso2_info, names_arr, output_title, crispresso_report_file, folder, _ROOT, 'wgs') + -def make_multi_report_from_folder(crispresso2_info,names_arr,report_name,crispresso_report_file,folder,_ROOT,display_names=None): +def make_multi_report_from_folder(crispresso2_info, names_arr, report_name, crispresso_report_file, folder, _ROOT, crispresso_tool, + display_names=None): """ Prepares information to make a report of multiple CRISPResso runs - like CRISPRessoWGS or CRISPRessoPooled @@ -306,10 +358,11 @@ def make_multi_report_from_folder(crispresso2_info,names_arr,report_name,crispre crispresso_report_file (string): path to write report to folder (string): folder containing crispresso runs _ROOT (string): location of crispresso assets (images, templates, etc) - display_names (dict): report_name->display_name; Titles to be shown for crispresso runs (if different from names_arr, e.g. if display_names have spaces or bad chars, they won't be the same as names_arr) + display_names (dict): report_name->display_name; Titles to be shown for crispresso runs + (if different from names_arr, e.g. if display_names have spaces or bad chars, they won't be the same as names_arr) Returns: - Nothin + Nothing """ summary_plot_names = [] @@ -326,6 +379,14 @@ def make_multi_report_from_folder(crispresso2_info,names_arr,report_name,crispre summary_plot_datas = crispresso2_info['results']['general_plots']['summary_plot_datas'] run_names = [] + if 'failed_batch_arr' in crispresso2_info['results']: + failed_runs = crispresso2_info['results']['failed_batch_arr'] + else: + failed_runs = [] + if 'failed_batch_arr' in crispresso2_info['results']: + failed_runs_desc = crispresso2_info['results']['failed_batch_arr_desc'] + else: + failed_runs_desc = [] sub_html_files = {} sub_2a_labels = {} sub_2a_pdfs = {} @@ -335,15 +396,15 @@ def make_multi_report_from_folder(crispresso2_info,names_arr,report_name,crispre if display_names is not None: display_name = display_names[name] - folder_name = 'CRISPResso_on_%s' % name + folder_name = f'CRISPResso_on_{name}' sub_folder = os.path.join(folder, folder_name) run_data = CRISPRessoShared.load_crispresso_info(sub_folder) if 'running_info' not in run_data: - raise Exception('CRISPResso run %s has no report. Cannot add to report.'% sub_folder) + raise Exception(f'CRISPResso run {sub_folder} has no report. Cannot add to report.') run_names.append(display_name) - this_sub_html_file = os.path.basename(folder_name)+".html" + this_sub_html_file = os.path.basename(folder_name) + ".html" if run_data['running_info']['args'].place_report_in_output_folder: this_sub_html_file = os.path.join(os.path.basename(sub_folder), run_data['running_info']['report_filename']) sub_html_files[display_name] = this_sub_html_file @@ -352,9 +413,9 @@ def make_multi_report_from_folder(crispresso2_info,names_arr,report_name,crispre this_sub_2a_pdfs = [] for ref_name in run_data['results']['ref_names']: if 'plot_2a_root' in run_data['results']['refs'][ref_name]: - pdf_file = run_data['results']['refs'][ref_name]['plot_2a_root']+".pdf" + pdf_file = run_data['results']['refs'][ref_name]['plot_2a_root'] + ".pdf" if os.path.exists(pdf_file): - this_sub_2a_pdfs.append(run_data['results']['refs'][ref_name]['plot_2a_root']+".pdf") + this_sub_2a_pdfs.append(run_data['results']['refs'][ref_name]['plot_2a_root'] + ".pdf") this_sub_2a_labels.append("Nucleotide distribution across " + ref_name) sub_2a_labels[display_name] = this_sub_2a_labels @@ -362,11 +423,14 @@ def make_multi_report_from_folder(crispresso2_info,names_arr,report_name,crispre make_multi_report( run_names, + failed_runs, + failed_runs_desc, sub_html_files, crispresso_report_file, folder, _ROOT, report_name, + crispresso_tool, summary_plots={ 'names': summary_plot_names, 'titles': summary_plot_titles, @@ -378,24 +442,22 @@ def make_multi_report_from_folder(crispresso2_info,names_arr,report_name,crispre def make_multi_report( run_names, + failed_runs, + failed_runs_desc, sub_html_files, crispresso_multi_report_file, crispresso_folder, _ROOT, report_name, - window_nuc_pct_quilts=[], - nuc_pct_quilts=[], - window_nuc_conv_plots=[], - nuc_conv_plots=[], - summary_plots={ - 'names': [], - 'titles': [], - 'labels': [], - 'datas': [], - }, - compact_plots_to_show={}, - allele_modification_heatmap_plot={}, - allele_modification_line_plot={}, + crispresso_tool, + window_nuc_pct_quilts=None, + nuc_pct_quilts=None, + window_nuc_conv_plots=None, + nuc_conv_plots=None, + summary_plots=None, + compact_plots_to_show=None, + allele_modification_heatmap_plot=None, + allele_modification_line_plot=None, ): """ Makes an HTML report for a run containing multiple crispresso runs @@ -407,7 +469,6 @@ def make_multi_report( report_name (string): description of report type to be shown at top of report crispresso_folder (string): absolute path to the crispresso output _ROOT (string): absolute path to the crispresso executable - summary_plots (dict): a dict with the following keys: names (list): list of plot names - keys for following dicts titles (dict): dict of plot_name->plot_title @@ -430,10 +491,17 @@ def fill_default(dictionary, key, default_type=list): dictionary[key] = default_type() j2_env = Environment( - loader=FileSystemLoader(os.path.join(_ROOT, 'templates')), + loader=FileSystemLoader(os.path.join(_ROOT, 'CRISPRessoReports', 'templates')), ) j2_env.filters['dirname'] = dirname - template = j2_env.get_template('multiReport.html') + if crispresso_tool == 'batch': + template = 'batchReport.html' + elif crispresso_tool == 'pooled': + template = 'pooledReport.html' + elif crispresso_tool == 'wgs': + template = 'wgsReport.html' + else: + template = 'multiReport.html' crispresso_data_path = os.path.relpath( crispresso_folder, os.path.dirname(crispresso_multi_report_file), @@ -443,6 +511,10 @@ def fill_default(dictionary, key, default_type=list): else: crispresso_data_path += "/" + if allele_modification_heatmap_plot is None: + allele_modification_heatmap_plot = {} + if allele_modification_line_plot is None: + allele_modification_line_plot = {} dictionaries = [ allele_modification_heatmap_plot, allele_modification_line_plot, ] @@ -460,22 +532,39 @@ def fill_default(dictionary, key, default_type=list): key, default_type, ) - - with open(crispresso_multi_report_file, 'w') as outfile: - outfile.write(template.render( - window_nuc_pct_quilts=window_nuc_pct_quilts, - nuc_pct_quilts=nuc_pct_quilts, - window_nuc_conv_plots=window_nuc_conv_plots, - nuc_conv_plots=nuc_conv_plots, + if summary_plots is None: + summary_plots={ + 'names': [], + 'titles': [], + 'labels': [], + 'datas': [], + } + + for html in sub_html_files: + sub_html_files[html] = crispresso_data_path + sub_html_files[html] + with open(crispresso_multi_report_file, 'w', encoding="utf-8") as outfile: + outfile.write(render_template( + template, + j2_env, + window_nuc_pct_quilts=[] if window_nuc_pct_quilts is None else window_nuc_pct_quilts, + nuc_pct_quilts=[] if nuc_pct_quilts is None else nuc_pct_quilts, + window_nuc_conv_plots=[] if window_nuc_conv_plots is None else window_nuc_conv_plots, + nuc_conv_plots=[] if nuc_conv_plots is None else nuc_conv_plots, crispresso_data_path=crispresso_data_path, - summary_plot_names=summary_plots['names'], - summary_plot_titles=summary_plots['titles'], - summary_plot_labels=summary_plots['labels'], - summary_plot_datas=summary_plots['datas'], + report_data={ + 'names': summary_plots['names'], + 'titles': summary_plots['titles'], + 'labels': summary_plots['labels'], + 'datas': summary_plots['datas'], + 'htmls': [], + 'crispresso_data_path': crispresso_data_path, + }, run_names=run_names, + failed_runs=failed_runs, + failed_runs_desc=failed_runs_desc, sub_html_files=sub_html_files, report_name=report_name, - compact_plots_to_show=compact_plots_to_show, + compact_plots_to_show=[] if compact_plots_to_show is None else compact_plots_to_show, allele_modification_heatmap_plot_names=allele_modification_heatmap_plot['names'], allele_modification_heatmap_plot_htmls=allele_modification_heatmap_plot['htmls'], allele_modification_heatmap_plot_titles=allele_modification_heatmap_plot['titles'], @@ -497,7 +586,7 @@ def make_aggregate_report( _ROOT, folder_arr, crispresso_html_reports, - compact_plots_to_show={}, + compact_plots_to_show=None, display_names=None, ): """ @@ -512,10 +601,11 @@ def make_aggregate_report( folder_arr (arr of strings): paths to the aggregated crispresso folders crispresso_html_reports (dict): folder->html_path; Paths to the aggregated crispresso run html reports compact_plots_to_show (dict): name=>{'href': path to target(report) when user clicks on image, 'img': path to png image to show} - display_names (dict): folder->display_name; Titles to be shown for crispresso runs (if different from names_arr, e.g. if display_names have spaces or bad chars, they won't be the same as names_arr) + display_names (dict): folder->display_name; Titles to be shown for crispresso runs + (if different from names_arr, e.g. if display_names have spaces or bad chars, they won't be the same as names_arr) Returns: - Nothin + Nothing """ summary_plots = {} if 'summary_plot_names' in crispresso2_info['results']['general_plots']: @@ -589,7 +679,7 @@ def make_aggregate_report( run_names = [] sub_html_files = {} - for idx, folder in enumerate(folder_arr): + for folder in folder_arr: display_name = folder if display_names is not None: display_name = display_names[folder] @@ -597,7 +687,8 @@ def make_aggregate_report( run_names.append(display_name) sub_html_file = os.path.relpath(crispresso_html_reports[folder], crispresso_report_folder) sub_html_files[display_name] = sub_html_file - + if compact_plots_to_show is None: + compact_plots_to_show = {} for compact_plot in compact_plots_to_show: old_href = compact_plots_to_show[compact_plot]['href'] compact_plots_to_show[compact_plot]['href'] = os.path.relpath(old_href, crispresso_report_folder) @@ -606,16 +697,22 @@ def make_aggregate_report( allele_modification_heatmap_plot['htmls'] = {} for heatmap_plot_name, heatmap_plot_path in allele_modification_heatmap_plot['paths'].items(): - with open(heatmap_plot_path) as fh: + with open(heatmap_plot_path, encoding="utf-8") as fh: allele_modification_heatmap_plot['htmls'][heatmap_plot_name] = fh.read() allele_modification_line_plot['htmls'] = {} for line_plot_name, line_plot_path in allele_modification_line_plot['paths'].items(): - with open(line_plot_path) as fh: + with open(line_plot_path, encoding="utf-8") as fh: allele_modification_line_plot['htmls'][line_plot_name] = fh.read() + # make_multi_report expects two arrays here for other calls of this function + empty_failed_runs = [] + empty_failed_runs_desc = [] + make_multi_report( run_names, + empty_failed_runs, + empty_failed_runs_desc, sub_html_files, crispresso_report_file, crispresso_report_folder, diff --git a/CRISPResso2/CRISPRessoReports/README.md b/CRISPResso2/CRISPRessoReports/README.md new file mode 100644 index 00000000..b31cff80 --- /dev/null +++ b/CRISPResso2/CRISPRessoReports/README.md @@ -0,0 +1,191 @@ +# CRISPRessoReports + +This repo holds the shared reports code and HTML templates that are used by the CLI and web projects. + +Take care when committing into these files as not to mix unrelated git histories. + +## How do I work with this repo? + +Step 1 only needs to be done once per cloned repo, the other steps will need to be dne more frequently. + +1. Add the remote to this repo to the "parent" repo (i.e. `CRISPResso2` or `C2Web`). **You should only have to do this once.** + +``` shell +git remote add reports https://github.com/edilytics/CRISPRessoReports.git +``` + +*Note:* you can change the `reports` above to whatever you would like, just don't forget to replace `reports` with it in the other steps. + +2. In the parent repo, fetch the latest changes from `CRISPRessoReports`. + +``` shell +git fetch reports +``` + +3. Checkout a `CRISPRessoReports` branch. + +``` shell +git checkout -b master-reports reports/master +``` + +*Note:* you can obviously change the names of these branches (or select a branch other than `master`). I would recommend naming all branches from `CRISPRessoReports` with the same prefix or suffix (i.e. `*-reports`) because it can get confusing to keep all of the branches straight... + +4. Read `CRISPRessoReports` into the parent repo as a subtree. This is when the code in `CRISPRessoReports` will finally be added to the parent repo. + +**Very important**, switch to the branch in the parent repo where you want the code to be added! For example: + +``` shell +git checkout +``` + +Then, you will read the commits into wherever `CRISPRessoReports` are stored for that repo. **Note:** you should only have to do this if `CRISPRessoReports` has not been added to the parent repo, if it is already there, do not repeat this step. + +``` shell +git read-tree --prefix=CRISPResso2/CRISPRessoReports -u master-reports +``` + +Run `git status` and you should see the files added! + +5. Stage and commit your files as you normally would. + +### How do I pull commits that are in `CRISPRessoReports` into the parent repo? + +1. In the parent repo, switch to the `-reports` branch. + +``` shell +git checkout -reports +``` + +2. Pull the changes from `CRISPRessoReports`. + +``` shell +git pull +``` + +You should see the updates that you are looking for. + +3. **Very important**, switch back to whichever branch you are working on in the parent repo. + +``` shell +git checkout +``` + +4. Merge the changes in and resolve any merge conflicts. + +``` shell +git merge --squash -Xsubtree="CRISPResso2/CRISPRessoReports" --no-commit --allow-unrelated-histories -reports +``` + +*Note:* You may need to change the value of the `-Xsubtree` parameter to match where `CRISPRessoReports` is located in the parent repo. + +5. Commit your changes and resolve merge conflicts. + +Also, note that the default commit message may have a summary of all commits, please shorten it to be descriptive of the most recent changes. + +### How do I push commits that are in my parent repo's `CRISPRessoReports` into the shared `CRISPRessoReports` repo? + +1. In the parent repo, switch to (or create) the branch on `CRISPRessoReports` that will have the changes you push. + +If you are creating a new branch based off of `CRISPRessoReports` master, run this: + +``` shell +git checkout -b -reports reports/master +``` + +Or if you would like to push to an existing branch on `CRISPRessoReports`, run this: + +``` shell +git checkout -reports +``` + +2. Merge the changes in and resove any merge conflicts. + +``` shell +git merge --squash -Xsubtree="CRISPResso2/CRISPRessoReports" --no-commit --allow-unrelated-histories +``` + +*Note:* `` is the branch of the parent repo that contains the changes inside the `CRISPRessoReports` sub-directory. + +### I am working on a feature that requires changing `CRISPRessoReports`, what do I do? + +If a feature that you are working on requires changes to CRISPRessoReports, you will need to perform a few steps to get setup. + +1. Create a feature branch in the parent repo, based on the parent repo master. + +``` shell +git checkout -b origin/master +``` + +2. Create a feature branch on `CRISPRessoReports`. + +Checkout your local `CRISPRessoReports` master branch. + +``` shell +git checkout master-reports +``` + +Pull in the latest changes. + +``` shell +git pull +``` + +Create the `CRISPRessoReports` feature branch based on `reports/master`. + +``` shell +git checkout -b -reports reports/master +``` + +*Note:* if your branch is named `cool-feature` in the parent repo, then follow the convention of naming the corresponding `CRISPRessoReports` branch `cool-feature-reports`. + +If you run `git status` at this point you should see any directories in the parent repo as untracked files, this is normal and expected. + +3. Switch back to the feature-branch in the parent repo, and develop your changes. + +``` shell +git checkout +``` + +*Note:* you can mingle your changes in `CRISPRessoReports` and the parent repo in the same commits. + +4. Merge and push your changes up to `CRISPRessoReports`. + +Switch to the `-reports` branch. + +``` shell +git checkout -reports +``` + +Merge the changes from the parent repo into the `-reports` branch. + +``` shell +git merge --squash -Xsubtree="CRISPResso2/CRISPRessoReports" --no-commit --allow-unrelated-histories +``` + +# FAQ + +## There are lots of merge conflicts, how do I just accept all of the incoming changes? + +If you want to blindly accept all of the incoming changes, you can add the parameter `-Xtheirs` to the `git merge...` command and anything that was a merge conflict before, should now be overwritten by the incoming change. + +## I tired of typing `git merge --squash ...`, what can I do?! + +Typing out the `git merge...` command is long and is a big pain. Here are some shortcuts to add to your `.git/config` file in order to make this easier to type. + +``` git-config +[alias] + # merge in branch and resolve merge conflicts + m = "!f() { git merge --squash -Xsubtree='CRISPResso2/CRISPRessoReports' --no-commit --allow-unrelated-histories $1; }; f" + # merge in branch and accept all of the incoming changes + mt = "!f() { git merge --squash -Xtheirs -Xsubtree='CRISPResso2/CRISPRessoReports' --no-commit --allow-unrelated-histories $1; }; f" +``` + +Now you can just run `git m ` to merge `` into your current branch. Or run `git mt ` to accept all of the incoming changes. + +# Sources and helpful links + +- This method was heavily based off of what was described in [this blog post](http://johnatten.com/2013/03/16/git-subtree-merge-the-quick-version/) +- If you want to know more about git merging, the manual is [very helpful](https://git-scm.com/book/en/v2/Git-Tools-Advanced-Merging) +- If you messed up (merging the wrong branch), you can undo it using `git reset --hard `. **Beware:** this can cause you to lose work, so use with care. [Learn more here](https://stackoverflow.com/a/8888015/1947159). +- If you need to rewrite git history, try using [git-filter-repo](https://github.com/newren/git-filter-repo) +- After rewriting git history (from a mirror repo), if you can't push to GitHub, [try this](https://stackoverflow.com/a/34266401/1947159) diff --git a/CRISPResso2/CRISPRessoReports/__init__.py b/CRISPResso2/CRISPRessoReports/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/CRISPResso2/templates/CRISPResso_justcup.png b/CRISPResso2/CRISPRessoReports/templates/CRISPResso_justcup.png similarity index 100% rename from CRISPResso2/templates/CRISPResso_justcup.png rename to CRISPResso2/CRISPRessoReports/templates/CRISPResso_justcup.png diff --git a/CRISPResso2/templates/multiReport.html b/CRISPResso2/CRISPRessoReports/templates/batchReport.html similarity index 69% rename from CRISPResso2/templates/multiReport.html rename to CRISPResso2/CRISPRessoReports/templates/batchReport.html index 4f346efe..1255424c 100644 --- a/CRISPResso2/templates/multiReport.html +++ b/CRISPResso2/CRISPRessoReports/templates/batchReport.html @@ -1,5 +1,6 @@ {% extends "layout.html" %} {% block head %} + + + + + + + + {% if is_web %} + + + + + {% else %} + + {% endif %} + + + + {% block head %}{% endblock %} + + + + + {% if is_web %} +
+ {{ self.help_block() }} +
+ + {% with messages = get_flashed_messages(with_categories=true) %} + {% if messages %} + {% for category, message in messages %} + {% if category == "error" %} + + {% else %} + + {% endif %} + {% endfor %} + {% endif %} + {% endwith %} + {% endif %} + + {# if default user (normal crispresso mode) #} + {% if not is_web or is_default_user %} +
+
+
+ {% if is_web %} +
+ {% else %} +
+ {% endif %} +
+
+

CRISPResso2

+

Analysis of genome editing outcomes from deep sequencing data

+
+
+
+
+ + + {% else %} + {# if doing user sessions #} + +
+ {% endif %} + +
+
+
+
+ {% block help_block %} {% endblock %} +
+
+
+
+ + {% block content %}{% endblock %} + + {% if not is_web or is_default_user %} +
+
+ +
+
+ {% else %} +
+
+ +
+
+ {% endif %} +
+ + + + + + + + {% block foot %}{% endblock %} + + + diff --git a/CRISPResso2/CRISPRessoReports/templates/multiReport.html b/CRISPResso2/CRISPRessoReports/templates/multiReport.html new file mode 100644 index 00000000..10d30063 --- /dev/null +++ b/CRISPResso2/CRISPRessoReports/templates/multiReport.html @@ -0,0 +1,168 @@ +{% extends "layout.html" %} +{% block head %} + + + + +{% endblock %} + +{% block content %} + +
+
+ +
+
+ + {% if run_names|length > 0 %} +
+
+
{{report_name}}
+
+
+
+ {% for run_name in run_names %} + {{run_name}} + {% endfor %} +
+
+
+ {% endif %} + + {{render_partial('shared/partials/failed_runs.html', failed_runs=failed_runs, failed_runs_desc=failed_runs_desc)}} + + {% if window_nuc_pct_quilts|length > 0 %} +
+
+
Nucleotide percentages around guides
+
+
+ {% for plot_name in window_nuc_pct_quilts %} +
{{report_data['titles'][plot_name]}}
+ {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }} + {% endfor %} +
+
+ {% endif %} + + {% if nuc_pct_quilts|length > 0 %} +
+
+
Nucleotide percentages in the entire amplicon
+
+
+ {% for plot_name in nuc_pct_quilts %} +
{{report_data['titles'][plot_name]}}
+ {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }} + {% endfor %} +
+
+ {% endif %} + + {% if window_nuc_conv_plots|length > 0 %} +
+
+
Conversion of target bases around guides
+
+
+ {% for plot_name in window_nuc_conv_plots %} +
{{report_data['titles'][plot_name]}}
+ {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }} + {% endfor %} +
+
+ {% endif %} + + {% if nuc_conv_plots|length > 0 %} +
+
+
Conversion of target bases in the entire amplicon
+
+
+ {% for plot_name in nuc_conv_plots %} +
{{report_data['titles'][plot_name]}}
+ {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }} + {% endfor %} +
+
+ {% endif %} + + {% if report_data['names']|length > 0 %} + {% for plot_name in report_data['names'] %} +
+
+
{{report_data['titles'][plot_name]}}
+
+
+ {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }} +
+
+ {% endfor %} + {% endif %} + + {% if compact_plots_to_show|length > 0 %} +
+
+
Summary Plots
+
+
+
+ {% for compact_plot in compact_plots_to_show %} + + {% endfor %} +
+
+
+ {% endif %} + + {{render_partial('shared/partials/report_footer_buttons.html', report_zip_filename=report_zip_filename, report_path=report_path)}} + +
{# jumbotron_content #} +
{# jumbrotron #} + +
{# column #} + +
+{% endblock %} + +{% block foot %} +{% endblock %} diff --git a/CRISPResso2/CRISPRessoReports/templates/pooledReport.html b/CRISPResso2/CRISPRessoReports/templates/pooledReport.html new file mode 100644 index 00000000..ba944d90 --- /dev/null +++ b/CRISPResso2/CRISPRessoReports/templates/pooledReport.html @@ -0,0 +1,101 @@ +{% extends "layout.html" %} +{% block head %} + + +{% endblock %} + +{% block content %} + +
+
+
+ +
+
+ + {% if run_names|length > 0 %} +
+
+
{{report_name}}
+
+
+
+ {% for region_name in run_names %} + {{region_name}} + {% endfor %} +
+
+
+ {% endif %} + + {{render_partial('shared/partials/failed_runs.html', failed_runs=failed_runs, failed_runs_desc=failed_runs_desc)}} + + {% if report_data['names']|length > 0 %} + {% for plot_name in report_data['names'] %} +
+
+
{{report_data['titles'][plot_name]}}
+
+
+ {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }} +
+
+ {% endfor %} + {% endif %} + + {{render_partial('shared/partials/report_footer_buttons.html', report_zip_filename=report_zip_filename, report_path=report_path)}} + + {# data bit for web version: #} + {# +

Data: {{data_label}}

+ #} +
{# jumbotron_content #} +
{# jumbrotron #} + +
{# column #} + +
+
+{% endblock %} + +{% block foot %} +{% endblock %} diff --git a/CRISPResso2/CRISPRessoReports/templates/report.html b/CRISPResso2/CRISPRessoReports/templates/report.html new file mode 100644 index 00000000..9214d168 --- /dev/null +++ b/CRISPResso2/CRISPRessoReports/templates/report.html @@ -0,0 +1,714 @@ +{% extends "layout.html" %} +{% block head %} + + +{% endblock %} + +{% block content %} +
+
+
+
+
+
+ {% if report_data['report_display_name'] != '' %} +
{{report_data['report_display_name']}}
+ {% endif %} +
CRISPResso2 run information
+ +
+
+
+
+ {{ render_partial('shared/partials/fig_reports.html', report_data=report_data, fig_name='plot_1a')}} +
+ {{ render_partial('shared/partials/log_params.html', report_data=report_data) }} +
+
+
+ +
+
+
Allele assignments
+ +
+
+
+
+ {{ render_partial('shared/partials/fig_reports.html', report_data=report_data, fig_name='plot_1b')}} +
+
+ {{ render_partial('shared/partials/fig_reports.html', report_data=report_data, fig_name='plot_1c')}} +
+ {% if report_data['figures']['locs']['plot_1d'] %} +
+ {{ render_partial('shared/partials/fig_reports.html', report_data=report_data, fig_name='plot_1d')}} +
+ {% endif %} +
+
+
{# end card #} + + {# start global coding sequence report #} + {% if report_data['figures']['locs']['plot_5a'] %} +
+
+
Global frameshift analysis
+
+
+ {{ render_partial('shared/partials/fig_reports.html', report_data=report_data, fig_name='plot_5a')}} +
+
+ {% endif %} + + {% if report_data['figures']['locs']['plot_6a'] %} +
+
+
Global frameshift mutagenesis profiles
+
+
+ {{ render_partial('shared/partials/fig_reports.html', report_data=report_data, fig_name='plot_6a')}} +
+
+ {% endif %} + + {% if report_data['figures']['locs']['plot_8a'] %} +
+
+
Global splicing analysis
+
+
+ {{ render_partial('shared/partials/fig_reports.html', report_data=report_data, fig_name='plot_8a')}} +
+
+ {% endif %} + {# end of global coding sequence analysis #} + + {# start hdr summary #} + {% if report_data['figures']['locs'][report_data.amplicons[0]]['plot_4g'] %} +
+
+ {% if report_data.amplicons|length == 1 %} {# if only one amplicon #} +
HDR summary plot
+ {% else %} +
HDR summary report (all reads aligned to {{report_data.amplicons[0]}})
+ {% endif %} +
+
+ {{ render_partial('shared/partials/fig_reports.html', report_data=report_data, fig_name='plot_4g', amplicon_name=report_data.amplicons[0])}} +
+
+ {% endif %} + {# end HDR summary #} + + {# start prime editing report #} + {% if report_data['figures']['locs'][report_data.amplicons[0]]['plot_11a'] %} +
+
+ {% if report_data.amplicons|length == 1 %} {# if only one amplicon #} +
Prime editing report
+ {% else %} +
Prime editing report (all reads aligned to {{report_data.amplicons[0]}})
+ {% endif %} +
+
+ {{ render_partial('shared/partials/fig_reports.html', report_data=report_data, fig_name='plot_11a', amplicon_name=report_data.amplicons[0])}} +
+
+ {% endif %} + + {% if report_data['figures']['sgRNA_based_names'][report_data.amplicons[0]] and report_data['figures']['sgRNA_based_names'][report_data.amplicons[0]]['11b']%} +
+
+ {% if report_data.amplicons|length == 1 %} {# if only one amplicon #} +
Prime editing summary plots at analysis positions
+ {% else %} +
Prime editing summary plots at analysis positions (aligned to {{report_data.amplicons[0]}})
+ {% endif %} +
+
+ {% for fig_name in report_data['figures']['sgRNA_based_names'][report_data.amplicons[0]]['11b'] %} +
+ {{ render_partial('shared/partials/fig_reports.html', report_data=report_data, fig_name=fig_name, amplicon_name=report_data.amplicons[0])}} +
+ {% endfor %} +
+
+ {% endif %} + {# end plot 11b for prime editing #} + + {% if report_data['figures']['locs']['plot_11c'] %} +
+
+
Scaffold insertions
+
+
+ {{ render_partial('shared/partials/fig_reports.html', report_data=report_data, fig_name='plot_11c')}} +
+
+ {% endif %} + {# end prime editing section #} + + {% if report_data.amplicons|length == 1 %} +
{# if only one amplicon, just a normal div #} + {% else %} + {# If there is more than one amplicon, print a navigation to show each amplicon #} +

Reads are aligned to each amplicon sequence separately. Quantification and visualization of these reads are shown for each amplicon below:

+
+
+
Amplicons
+ +
+ {% endif %} {# end if report contains more than one amplicon #} + + + {% if report_data.amplicons|length == 1 %} {# if only one amplicon, just a normal div #} +
+ {% else %} + {# jumbotron_content #} +
{# jumbrotron #} +
{# column #} +
+ +
+{% endblock %} + +{% block foot %} + +{% endblock %} diff --git a/CRISPResso2/CRISPRessoReports/templates/shared/partials/failed_runs.html b/CRISPResso2/CRISPRessoReports/templates/shared/partials/failed_runs.html new file mode 100644 index 00000000..015dda0f --- /dev/null +++ b/CRISPResso2/CRISPRessoReports/templates/shared/partials/failed_runs.html @@ -0,0 +1,65 @@ +{% if failed_runs|length > 0 %} +
+
+ +
Failed Runs
+ +
+ +
+
+ {% for failed_run in failed_runs %} + {# Toggle the description visibility on click #} + + {{failed_run}} + + {# Initially hide the description and add a light background with dark text for readability #} + + {% endfor %} +
+
+
+ + +{% endif %} diff --git a/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_reports.html b/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_reports.html new file mode 100644 index 00000000..e21c6853 --- /dev/null +++ b/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_reports.html @@ -0,0 +1,34 @@ +
+ {# Set the width based on the plot #} + {%- if fig_name in ['plot_1a', 'plot_1b', 'plot_1c', 'plot_1d', 'plot_3a', 'plot_4a', 'plot_4b', 'plot_4c', 'plot_4e', 'plot_4f', 'plot_5a', 'plot_7', 'plot_8', 'plot_8a', 'plot_11c'] -%} + {% set width = '40%' %} + {%- elif fig_name in ['plot_10b', 'plot_10c'] -%} + {% set width = '35%' %} + {%- elif fig_name in ['plot_10a'] -%} + {% set width = '70%' %} + {%- else -%} + {% set width = '100%' %} + {%- endif -%} + + {%- if amplicon_name is defined -%} + {%- if 'htmls' in report_data['figures'] and fig_name in report_data['figures']['htmls'][amplicon_name] -%} + {{report_data['figures']['htmls'][amplicon_name][fig_name]|safe}} + {%- else -%} + + {% endif -%} + + {%- for (data_label,data_path) in report_data['figures']['datas'][amplicon_name][fig_name] %} +

Data: {{data_label}}

+ {%- endfor -%} + {%- else %} + {%- if 'htmls' in report_data['figures'] and fig_name in report_data['figures']['htmls'] -%} + {{report_data['figures']['htmls'][fig_name]|safe}} + {%- else -%} + + {% endif -%} + + {%- for (data_label,data_path) in report_data['figures']['datas'][fig_name] %} +

Data: {{data_label}}

+ {%- endfor -%} + {%- endif %} +
diff --git a/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html b/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html new file mode 100644 index 00000000..63d40dea --- /dev/null +++ b/CRISPResso2/CRISPRessoReports/templates/shared/partials/fig_summaries.html @@ -0,0 +1,15 @@ +
+ {% if report_data['htmls'] and report_data['htmls'][plot_name]%} + {{report_data['htmls'][plot_name]|safe}} + {% else %} + {% if plot_name in ['Nucleotide_conversion_map', 'Nucleotide_percentage_quilt'] %} + + {% else %} + + {% endif %} + {% endif %} + + {% for (data_label,data_path) in report_data['datas'][plot_name] %} +

Data: {{data_label}}

+ {% endfor %} +
diff --git a/CRISPResso2/CRISPRessoReports/templates/shared/partials/log_params.html b/CRISPResso2/CRISPRessoReports/templates/shared/partials/log_params.html new file mode 100644 index 00000000..ad56e16a --- /dev/null +++ b/CRISPResso2/CRISPRessoReports/templates/shared/partials/log_params.html @@ -0,0 +1,30 @@ +
+

CRISPResso version: {{report_data['run_data']['running_info']['version']}}

+

Run completed: {{report_data['run_data']['running_info']['end_time_string']}}

+

Amplicon sequence:

{{report_data['run_data']['running_info']['args']['amplicon_seq']}}

+ {% if report_data['run_data']['running_info']['args']['guide_seq'] != '' %} +

Guide sequence:

{{report_data['run_data']['running_info']['args']['guide_seq']}}

+ {% endif %} +

Command used:

{{report_data['run_data']['running_info']['command_used']}}

+

Parameters:

{{report_data['run_data']['running_info']['args_string']}}

+ {% if is_web and 'metadata' in report_data and report_data['metadata'].keys() %} +

Metadata:

+ + + + + + + + + {% for key in report_data['metadata'].keys() %} + + + + + {% endfor %} + +
KeyValue
{{key}}{{report_data['metadata'][key]}}
+ {% endif %} +

Running log

+
diff --git a/CRISPResso2/CRISPRessoReports/templates/shared/partials/report_footer_buttons.html b/CRISPResso2/CRISPRessoReports/templates/shared/partials/report_footer_buttons.html new file mode 100644 index 00000000..fa6ab71d --- /dev/null +++ b/CRISPResso2/CRISPRessoReports/templates/shared/partials/report_footer_buttons.html @@ -0,0 +1,9 @@ +
+ {% if is_web -%} + {%- if report_zip_filename -%} + Download report + {% endif %} + Link to report + {% endif %} + +
diff --git a/CRISPResso2/CRISPRessoReports/templates/wgsReport.html b/CRISPResso2/CRISPRessoReports/templates/wgsReport.html new file mode 100644 index 00000000..5f2008a4 --- /dev/null +++ b/CRISPResso2/CRISPRessoReports/templates/wgsReport.html @@ -0,0 +1,99 @@ +{% extends "layout.html" %} +{% block head %} + + + +{% endblock %} + +{% block content %} + +
+
+
+ +
+
+ +
+
+
{{report_name}}
+
+
+
+ {% for region_name in run_names %} + {{region_name}} + {% endfor %} +
+
+
+ + {{render_partial('shared/partials/failed_runs.html', failed_runs=failed_runs, failed_runs_desc=failed_runs_desc)}} + + {% if report_data['names']|length > 0 %} + {% for plot_name in report_data['names'] %} +
+
+
{{report_data['titles'][plot_name]}}
+
+
+ {{ render_partial('shared/partials/fig_summaries.html', report_data=report_data, plot_name=plot_name) }} +
+
+ {% endfor %} + {% endif %} + + {{render_partial('shared/partials/report_footer_buttons.html', report_zip_filename=report_zip_filename, report_path=report_path)}} + {# data bit for web version: #} + {# +

Data: {{data_label}}

+ #} +
{# jumbotron_content #} +
{# jumbrotron #} + +
{# column #} + +
+
+{% endblock %} + +{% block foot %} +{% endblock %} diff --git a/CRISPResso2/CRISPRessoShared.py b/CRISPResso2/CRISPRessoShared.py index 9cbdb8b2..d6a0ed93 100644 --- a/CRISPResso2/CRISPRessoShared.py +++ b/CRISPResso2/CRISPRessoShared.py @@ -9,6 +9,9 @@ import errno import gzip import json +import sys +import importlib.util + import numpy as np import os import pandas as pd @@ -19,6 +22,7 @@ import subprocess as sb import unicodedata import logging +from inspect import getmodule, stack from CRISPResso2 import CRISPResso2Align from CRISPResso2 import CRISPRessoCOREResources @@ -30,39 +34,51 @@ class FlashException(Exception): pass + class TrimmomaticException(Exception): pass + class NoReadsAlignedException(Exception): pass + class AlignmentException(Exception): pass + class SgRNASequenceException(Exception): pass + class NTException(Exception): pass + class ExonSequenceException(Exception): pass + class DuplicateSequenceIdException(Exception): pass + class NoReadsAfterQualityFilteringException(Exception): pass + class BadParameterException(Exception): pass + class AutoException(Exception): pass + class OutputFolderIncompleteException(Exception): pass + class InstallationException(Exception): pass @@ -301,6 +317,7 @@ def getCRISPRessoArgParser(parser_title="CRISPResso Parameters", required_params parser.add_argument('--suppress_plots', help='Suppress output plots', action='store_true') parser.add_argument('--write_cleaned_report', action='store_true', help=argparse.SUPPRESS) # trims working directories from output in report (for web access) + parser.add_argument('--config_file', help='File path to JSON file with config elements', type=str) # base editor parameters parser.add_argument('--base_editor_output', @@ -529,7 +546,7 @@ def clean_filename(filename): validFilenameChars = "+-_.()%s%s" % (string.ascii_letters, string.digits) filename = slugify(str(filename).replace(' ', '_')) cleanedFilename = unicodedata.normalize('NFKD', filename) - return(''.join(c for c in cleanedFilename if c in validFilenameChars)) + return (''.join(c for c in cleanedFilename if c in validFilenameChars)) def check_file(filename): try: @@ -618,7 +635,7 @@ def assert_fastq_format(file_path, max_lines_to_check=100): params: file_path: path to fastq file max_lines_to_check: number of lines to check in the file - returns: + returns: True if the file is in the correct format """ @@ -1007,6 +1024,50 @@ def default_sigpipe(): return seq_lines +def check_if_failed_run(folder_name, info): + """ + Check the output folder for a info.json file and a status.txt file to see if the run completed successfully or not + + input: + folder_name: path to output folder + info: logger + + + returns: + bool True if run completed successfully, False otherwise + string describing why it failed + """ + + run_data_file = os.path.join(folder_name, 'CRISPResso2_info.json') + status_info = os.path.join(folder_name, 'CRISPResso_status.txt') + if not os.path.isfile(run_data_file) or not os.path.isfile(status_info): + info("Skipping folder '%s'. Cannot find run data status file at '%s'."%(folder_name, run_data_file)) + if "CRISPRessoPooled" in folder_name: + unit = "amplicon" + elif "CRISPRessoWGS" in folder_name: + unit = "region" + else: + unit = "sample" + + return True, f"CRISPResso failed for this {unit}! Please check your input files and parameters." + else: + with open(status_info) as fh: + try: + file_contents = fh.read() + search_result = re.search(r'(\d+\.\d+)% (.+)', file_contents) + if search_result: + percent_complete, status = search_result.groups() + if percent_complete != '100.00': + info("Skipping folder '%s'. Run is not complete (%s)." % (folder_name, status)) + return True, status + else: + return True, file_contents + except Exception as e: + print(e) + info("Skipping folder '%s'. Cannot parse status file '%s'." % (folder_name, status_info)) + return True, "Cannot parse status file '%s'." % (status_info) + return False, "" + def guess_amplicons(fastq_r1,fastq_r2,number_of_reads_to_consider,flash_command,max_paired_end_reads_overlap,min_paired_end_reads_overlap,aln_matrix,needleman_wunsch_gap_open,needleman_wunsch_gap_extend,split_interleaved_input=False,min_freq_to_consider=0.2,amplicon_similarity_cutoff=0.95): """ @@ -1826,3 +1887,73 @@ def zip_results(results_folder): ) sb.call(cmd_to_zip, shell=True) return + + +def is_C2Pro_installed(): + try: + spec = importlib.util.find_spec("crispressoPro") + if spec is None: + return False + else: + return True + except: + return False + + +def check_custom_config(args): + """Check if the config_file argument was provided. If so load the configurations from the file, otherwise load default configurations. + + Parameters: + ------------- + args : dict + All arguments passed into the crispresso run. + + Returns: + ------------- + style : dict + A dict with a 'colors' key that contains hex color values for different report items. + + -OR- + + custom_style : dict + A dict with a 'colors' key that contains hex color values for different report items loaded from a user provided json file. + + """ + config = { + "colors": { + 'Substitution': '#0000FF', + 'Insertion': '#008000', + 'Deletion': '#FF0000', + 'A': '#7FC97F', + 'T': '#BEAED4', + 'C': '#FDC086', + 'G': '#FFFF99', + 'N': '#C8C8C8', + '-': '#1E1E1E', + } + } + + logger = logging.getLogger(getmodule(stack()[1][0]).__name__) + + #Check if crispresso.pro is installed + if not is_C2Pro_installed(): + return config + if args.config_file: + try: + with open(args.config_file, "r") as json_file: + custom_config = json.load(json_file) + + if 'colors' not in custom_config.keys(): + logger.warn("Json file does not contain the colors key. Defaulting all values.") + return config + + for key in config['colors']: + if key not in custom_config['colors']: + logger.warn(f"Value for {key} not provided, defaulting") + custom_config['colors'][key] = config['colors'][key] + + return custom_config + except Exception as e: + logger.warn("Cannot read json file '%s', defaulting style parameters." % args.config_file) + print(e) + return config diff --git a/CRISPResso2/CRISPRessoWGSCORE.py b/CRISPResso2/CRISPRessoWGSCORE.py index d45d0256..861b124d 100644 --- a/CRISPResso2/CRISPRessoWGSCORE.py +++ b/CRISPResso2/CRISPRessoWGSCORE.py @@ -18,7 +18,7 @@ import unicodedata from CRISPResso2 import CRISPRessoShared from CRISPResso2 import CRISPRessoMultiProcessing -from CRISPResso2 import CRISPRessoReport +from CRISPResso2.CRISPRessoReports import CRISPRessoReport from CRISPResso2 import CRISPRessoPlot @@ -650,15 +650,19 @@ def set_filenames(row): header_el_count = len(header_els) empty_line_els = [np.nan]*(header_el_count-1) n_reads_index = header_els.index('Reads_total') - 1 + failed_batch_arr = [] + failed_batch_arr_desc = [] for idx, row in df_regions.iterrows(): run_name = CRISPRessoShared.slugify(str(idx)) folder_name = 'CRISPResso_on_%s' % run_name + failed_run_bool, failed_run_desc = CRISPRessoShared.check_if_failed_run(_jp(folder_name), info) all_region_names.append(run_name) all_region_read_counts[run_name] = row.n_reads - run_file = os.path.join(_jp(folder_name), 'CRISPResso2_info.json') - if not os.path.exists(run_file): + if failed_run_bool: + failed_batch_arr.append(run_name) + failed_batch_arr_desc.append(failed_run_desc) warn('Skipping the folder %s: not enough reads, incomplete, or empty folder.'% folder_name) this_els = empty_line_els[:] this_els[n_reads_index] = row.n_reads @@ -710,6 +714,8 @@ def set_filenames(row): else: df_summary_quantification.fillna('NA').to_csv(samples_quantification_summary_filename, sep='\t', index=None) + crispresso2_info['results']['failed_batch_arr'] = failed_batch_arr + crispresso2_info['results']['failed_batch_arr_desc'] = failed_batch_arr_desc crispresso2_info['results']['alignment_stats']['samples_quantification_summary_filename'] = os.path.basename(samples_quantification_summary_filename) crispresso2_info['results']['regions'] = df_regions crispresso2_info['results']['all_region_names'] = all_region_names diff --git a/CRISPResso2/default_style.json b/CRISPResso2/default_style.json new file mode 100644 index 00000000..9876b670 --- /dev/null +++ b/CRISPResso2/default_style.json @@ -0,0 +1,13 @@ +{ + "colors": { + "Substitution": "#0000FF", + "Insertion": "#008000", + "Deletion": "#FF0000", + "A": "#7FC97F", + "T": "#BEAED4", + "C": "#FDC086", + "G": "#FFFF99", + "N": "#C8C8C8", + "-": "#C1C1C1" + } +} \ No newline at end of file diff --git a/CRISPResso2/templates/layout.html b/CRISPResso2/templates/layout.html deleted file mode 100644 index 5eba4687..00000000 --- a/CRISPResso2/templates/layout.html +++ /dev/null @@ -1,104 +0,0 @@ - - - - CRISPResso2 Report - - - - - - - - - - - - - - - - - - - {% block head %}{% endblock %} - - - - -
- -
-
-
-
-
-

CRISPResso2

-

Analysis of genome editing outcomes from deep sequencing data

-
-
- -
-
- -
- {% block content %}{% endblock %} - -
- -
- - -
- -
- - - - - - - - - {% block foot %}{% endblock %} - - - diff --git a/CRISPResso2/templates/report.html b/CRISPResso2/templates/report.html deleted file mode 100644 index 37cc4bae..00000000 --- a/CRISPResso2/templates/report.html +++ /dev/null @@ -1,854 +0,0 @@ -{% extends "layout.html" %} -{% block head %} - - -{% endblock %} - -{% block content %} -
-
- -
-
- -
-
- {% if report_data['report_display_name'] != '' %} -
{{report_data['report_display_name']}}
- {% endif %} -
CRISPResso2 run information
- -
-
-
-
- - - {% for (data_label,data_path) in report_data['fig_datas']['plot_1a'] %} -

Data: {{data_label}}

- {% endfor %} -
-
-

CRISPResso version: {{report_data['run_data']['running_info']['version']}}

-

Run completed: {{report_data['run_data']['running_info']['end_time_string']}}

-

Amplicon sequence:

{{report_data['run_data']['running_info']['args']['amplicon_seq']}}

- {% if report_data['run_data']['running_info']['args']['guide_seq'] != '' %} -

Guide sequence:

{{report_data['run_data']['running_info']['args']['guide_seq']}}

- {% endif %} -

Command used:

{{report_data['run_data']['running_info']['command_used']}}

-

Parameters:

{{report_data['run_data']['running_info']['args_string']}}

-

Running log

-
-
-
-
- -
-
-
Allele assignments
- -
-
-
-
- - - {% for (data_label,data_path) in report_data['fig_datas']['plot_1b'] %} -

Data: {{data_label}}

- {% endfor %} -
-
- - - {% for (data_label,data_path) in report_data['fig_datas']['plot_1c'] %} -

Data: {{data_label}}

- {% endfor %} -
- {% if report_data['fig_locs']['plot_1d'] %} -
- - - {% for (data_label,data_path) in report_data['fig_datas']['plot_1d'] %} -

Data: {{data_label}}

- {% endfor %} -
- {% endif %} -
- -
-
{# end card #} - - {# start global coding sequence report #} - {% if report_data['fig_locs']['plot_5a'] %} -
-
-
Global frameshift analysis
-
-
- - - {% for (data_label,data_path) in report_data['fig_datas']['plot_5a'] %} -

Data: {{data_label}}

- {% endfor %} -
-
- {% endif %} - - {% if report_data['fig_locs']['plot_6a'] %} -
-
-
Global frameshift mutagenesis profiles
-
-
- - - {% for (data_label,data_path) in report_data['fig_datas']['plot_6a'] %} -

Data: {{data_label}}

- {% endfor %} -
-
- {% endif %} - - {% if report_data['fig_locs']['plot_8a'] %} -
-
-
Global splicing analysis
-
-
- - - {% for (data_label,data_path) in report_data['fig_datas']['plot_8a'] %} -

Data: {{data_label}}

- {% endfor %} -
-
- {% endif %} - {# end of global coding sequence analysis #} - - {# start hdr summary #} - {% if report_data['fig_locs'][report_data.amplicons[0]]['plot_4g'] %} -
-
- {% if report_data.amplicons|length == 1 %} {# if only one amplicon #} -
HDR summary plot
- {% else %} -
HDR summary report (all reads aligned to {{report_data.amplicons[0]}})
- {% endif %} -
-
- - - {% for (data_label,data_path) in report_data['fig_datas'][report_data.amplicons[0]]['plot_4g'] %} -

Data: {{data_label}}

- {% endfor %} -
-
- {% endif %} - {# end HDR summary #} - - {# start prime editing report #} - {% if report_data['fig_locs'][report_data.amplicons[0]]['plot_11a'] %} -
-
- {% if report_data.amplicons|length == 1 %} {# if only one amplicon #} -
Prime editing report
- {% else %} -
Prime editing report (all reads aligned to {{report_data.amplicons[0]}})
- {% endif %} -
-
- - - {% for (data_label,data_path) in report_data['fig_datas'][report_data.amplicons[0]]['plot_11a'] %} -

Data: {{data_label}}

- {% endfor %} -
-
- {% endif %} - - {% if report_data['sgRNA_based_fig_names'][report_data.amplicons[0]] and report_data['sgRNA_based_fig_names'][report_data.amplicons[0]]['11b']%} -
-
- {% if report_data.amplicons|length == 1 %} {# if only one amplicon #} -
Prime editing summary plots at analysis positions
- {% else %} -
Prime editing summary plots at analysis positions (aligned to {{report_data.amplicons[0]}})
- {% endif %} -
-
- {% for fig_name in report_data['sgRNA_based_fig_names'][report_data.amplicons[0]]['11b'] %} -
- - - {% for (data_label,data_path) in report_data['fig_datas'][report_data.amplicons[0]][fig_name] %} -

Data: {{data_label}}

- {% endfor %} -
- {% endfor %} -
-
- {% endif %} {# end plot 11b for prime editing #} - - {% if report_data['fig_locs']['plot_11c'] %} -
-
-
Scaffold insertions
-
-
- - - {% for (data_label,data_path) in report_data['fig_datas']['plot_11c'] %} -

Data: {{data_label}}

- {% endfor %} -
-
- {% endif %} - - {# end prime editing section #} - - - {% if report_data.amplicons|length == 1 %} -
{# if only one amplicon, just a normal div #} - {% else %} - {# If there is more than one amplicon, print a navigation to show each amplicon #} -

Reads are aligned to each amplicon sequence separately. Quantification and visualization of these reads are shown for each amplicon below:

-
-
-
Amplicons
- -
- {% endif %} {# end if report contains more than one amplicon #} - - - {% if report_data.amplicons|length == 1 %} {# if only one amplicon, just a normal div #} -
- {% else %} - {# jumbotron_content #} -
{# jumbrotron #} - -
{# column #} - -
-{% endblock %} - -{% block foot %} - -{% endblock %} diff --git a/MANIFEST.in b/MANIFEST.in index 59f8ed4c..cc437970 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,2 @@ include CRISPResso2/EDNAFULL -include CRISPResso2/templates/* +include CRISPResso2/CRISPRessoReports/templates/* diff --git a/setup.py b/setup.py index cba9ac76..20cc3598 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ def main(): url='http://github.com/pinellolab/CRISPResso2', package_dir={'CRISPResso2' : 'CRISPResso2'}, include_package_data = True, - packages=['CRISPResso2'], + packages=['CRISPResso2', 'CRISPResso2.CRISPRessoReports'], entry_points=entry_points, description="Software pipeline for the analysis of genome editing outcomes from deep sequencing data", classifiers=[ @@ -89,6 +89,7 @@ def main(): 'matplotlib', # '>=1.3.1,<=2.2.3', 'seaborn', # '>0.7.1,<0.10', 'jinja2', + 'jinja_partials', 'scipy', 'numpy', 'plotly',