Skip to content

Commit

Permalink
Prime editing refinement and Pooled filesystem demand reduction
Browse files Browse the repository at this point in the history
- Prime editing parameters renamed, nicking guides match with flexibility
- Prime editing extension seq is shown as a guide (with no cut site)
- Prime editing summary plot included in report
- Nucleotide plots are shaded when no changes from the reference sequence
- sgRNA annotations are plotted on multiple lines if they overlap
- N's don't count as substitutions
- extended read analysis data available with --write_detailed_allele_table flag
  • Loading branch information
kclem committed May 10, 2020
1 parent 8c58471 commit 4e1d6b2
Show file tree
Hide file tree
Showing 9 changed files with 2,629 additions and 1,970 deletions.
2,970 changes: 1,484 additions & 1,486 deletions CRISPResso2/CRISPResso2Align.c

Large diffs are not rendered by default.

730 changes: 583 additions & 147 deletions CRISPResso2/CRISPRessoCORE.py

Large diffs are not rendered by default.

585 changes: 300 additions & 285 deletions CRISPResso2/CRISPRessoCOREResources.c

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion CRISPResso2/CRISPRessoCOREResources.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def find_indels_substitutions(_read_seq_al,_ref_seq_al,_include_indx):

if c in nucSet:
ref_positions.append(idx)
if ref_seq_al[idx_c]!=read_seq_al[idx_c] and read_seq_al[idx_c]!='-' and ref_seq_al[idx_c]!='-':
if ref_seq_al[idx_c]!=read_seq_al[idx_c] and read_seq_al[idx_c]!='-' and read_seq_al[idx_c] != 'N':
all_substitution_positions.append(idx)
all_substitution_values.append(read_seq_al[idx_c])
if idx in _include_indx:
Expand Down
236 changes: 196 additions & 40 deletions CRISPResso2/CRISPRessoPlot.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions CRISPResso2/CRISPRessoPooledCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -1299,6 +1299,7 @@ def default_sigpipe():

if RUNNING_MODE=='ONLY_GENOME' or RUNNING_MODE=='AMPLICONS_AND_GENOME':
files_to_remove+=[bam_filename_genome]
files_to_remove+=[bam_filename_genome+".bai"]

if RUNNING_MODE=='ONLY_AMPLICONS':
files_to_remove+=[bam_filename_amplicons,amplicon_fa_filename]
Expand Down
4 changes: 2 additions & 2 deletions CRISPResso2/CRISPRessoReport.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,14 @@ def add_fig_if_exists(fig_name,fig_root,fig_title,fig_caption,fig_data,



for fig in ['2a','3a','3b','4a','4b','4c','4d','4e','4f','5','6','7','8','10a','10b','10c']:
for fig in ['2a','3a','3b','4a','4b','4c','4d','4e','4f','5','6','7','8','10a','10b','10c','11a']:
fig_name = 'plot_'+ fig
if fig_name + '_root' in run_data['refs'][amplicon_name]:
add_fig_if_exists(fig_name,run_data['refs'][amplicon_name][fig_name + '_root'],'Figure ' + fig_name,run_data['refs'][amplicon_name][fig_name + '_caption'],run_data['refs'][amplicon_name][fig_name + '_data'],
amplicon_fig_names,amplicon_fig_locs,amplicon_fig_titles,amplicon_fig_captions,amplicon_fig_datas)

this_sgRNA_based_fig_names = {}
for fig in ['2b','9','10d','10e','10f','10g']:
for fig in ['2b','9','10d','10e','10f','10g','11b']:
#fig 2b's
this_fig_names = []
if 'plot_'+fig+'_roots' in run_data['refs'][amplicon_name]:
Expand Down
27 changes: 18 additions & 9 deletions CRISPResso2/CRISPRessoShared.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
'''
CRISPResso2 - Kendell Clement and Luca Pinello 2018
CRISPResso2 - Kendell Clement and Luca Pinello 2020
Software pipeline for the analysis of genome editing outcomes from deep sequencing data
(c) 2018 The General Hospital Corporation. All Rights Reserved.
(c) 2020 The General Hospital Corporation. All Rights Reserved.
'''

import argparse
Expand Down Expand Up @@ -31,7 +31,7 @@
else:
import cPickle as cp #python 2.7

__version__ = "2.0.36"
__version__ = "2.0.37"

###EXCEPTIONS############################
class FlashException(Exception):
Expand Down Expand Up @@ -142,12 +142,14 @@ def getCRISPRessoArgParser(parserTitle = "CRISPResso Parameters",requiredParams=
parser.add_argument('--plot_window_size','--offset_around_cut_to_plot', type=int, help='Defines the size of the window extending from the quantification window center to plot. Nucleotides within plot_window_size of the quantification_window_center for each guide are plotted.', default=20)
parser.add_argument('--min_frequency_alleles_around_cut_to_plot', type=float, help='Minimum %% reads required to report an allele in the alleles table plot.', default=0.2)
parser.add_argument('--expand_allele_plots_by_quantification', help='If set, alleles with different modifications in the quantification window (but not necessarily in the plotting window (e.g. for another sgRNA)) are plotted on separate lines, even though they may have the same apparent sequence. To force the allele plot and the allele table to be the same, set this parameter. If unset, all alleles with the same sequence will be collapsed into one row.', action='store_true')
parser.add_argument('--allele_plot_pcts_only_for_assigned_reference',help='If set, in the allele plots, the percentages will show the percentage as a percent of reads aligned to the assigned reference. Default behavior is to show percentage as a percent of all reads.',action='store_true')
parser.add_argument('-qwc','--quantification_window_coordinates', type=str, help='Bp positions in the amplicon sequence specifying the quantification window. This parameter overrides values of the "--quantification_window_center", "--cleavage_offset", "--window_around_sgrna" or "--window_around_sgrna" values. Any indels/substitutions outside this window are excluded. Indexes are 0-based, meaning that the first nucleotide is position 0. Ranges are separted by the dash sign (e.g. "start-stop"), and multiple ranges can be separated by the underscore (_). ' +
'A value of 0 disables this filter. (can be comma-separated list of values, corresponding to amplicon sequences given in --amplicon_seq e.g. 5-10,5-10_20-30 would specify the 5th-10th bp in the first reference and the 5th-10th and 20th-30th bp in the second reference)', default=None)

#verbosity parameters
parser.add_argument('--keep_intermediate',help='Keep all the intermediate files',action='store_true')
parser.add_argument('--dump',help='Dump numpy arrays and pandas dataframes to file for debugging purposes',action='store_true')
parser.add_argument('--write_detailed_allele_table',help='If set, a detailed allele table will be written including alignment scores for each read sequence.',action='store_true')

#report style parameters
parser.add_argument('--max_rows_alleles_around_cut_to_plot', type=int, help='Maximum number of rows to report in the alleles table plot. ', default=50)
Expand All @@ -162,10 +164,11 @@ def getCRISPRessoArgParser(parserTitle = "CRISPResso Parameters",requiredParams=
parser.add_argument('--conversion_nuc_to', help='For base editor plots, this is the nucleotide produced by the base editor',default='T')

#prime editing parameters
parser.add_argument('--prime_editing_spacer_seq',type=str,help="Spacer sgRNA sequence used in prime editing. The spacer should not include the PAM sequence. The sequence should be given in the RNA 5'->3' order, so for Cas9, the PAM would be on the right side of the sequence.",default='')
parser.add_argument('--prime_editing_extension_seq',type=str,help="Extension sequence used in prime editing. The sequence should be given such that the left end of the sequence overlaps with the primer binding site (PBS) and the right end of the sequence is the 3' flap.",default='')
parser.add_argument('--prime_editing_extension_quantification_window_size',type=int,help="Quantification window size (in bp) at flap site for measuring modifications anchored at the right side of the extension sequence. Similar to the --quantification_window parameter, the total length of the quantification window will be 2x this parameter. Default: 5bp (10bp total window size)",default=5)
parser.add_argument('--prime_editing_nicking_guide_seq',type=str,help="Nicking sgRNA sequence used in prime editing. The sgRNA should not include the PAM sequence. The sequence should be given in the RNA 5'-3' order, so for Cas9, the PAM would be on the right side of the sequence",default='')
parser.add_argument('--prime_editing_pegRNA_spacer_seq',type=str,help="pegRNA spacer sgRNA sequence used in prime editing. The spacer should not include the PAM sequence. The sequence should be given in the RNA 5'->3' order, so for Cas9, the PAM would be on the right side of the given sequence.",default='')
parser.add_argument('--prime_editing_pegRNA_extension_seq',type=str,help="Extension sequence used in prime editing. The sequence should be given in the RNA 5'->3' order, such that the sequence starts with the RT template including the edit, followed by the Primer-binding site (PBS).",default='')
parser.add_argument('--prime_editing_pegRNA_extension_quantification_window_size',type=int,help="Quantification window size (in bp) at flap site for measuring modifications anchored at the right side of the extension sequence. Similar to the --quantification_window parameter, the total length of the quantification window will be 2x this parameter. Default: 5bp (10bp total window size)",default=5)
parser.add_argument('--prime_editing_pegRNA_scaffold_sequence',type=str,help="If given, reads containing any of this scaffold sequence before extension sequence (provided by --prime_editing_extension_seq) will be classified as 'Scaffold Incorporated'. The sequence should be given in the 5'->3' order such that the RT template directly follows this sequence. A common value is 'GGCACCGAGUCGGUGC'.",default='')
parser.add_argument('--prime_editing_nicking_guide_seq',type=str,help="Nicking sgRNA sequence used in prime editing. The sgRNA should not include the PAM sequence. The sequence should be given in the RNA 5'->3' order, so for Cas9, the PAM would be on the right side of the sequence",default='')

#special running modes
parser.add_argument('--crispresso1_mode', help='Parameter usage as in CRISPResso 1',action='store_true')
Expand Down Expand Up @@ -680,6 +683,8 @@ def get_row_around_cut(row,cut_point,offset):


def get_dataframe_around_cut(df_alleles, cut_point,offset,collapse_by_sequence=True):
if df_alleles.shape[0] == 0:
return df_alleles
ref1 = df_alleles['Reference_Sequence'].iloc[0]
ref1 = ref1.replace('-','')
if (cut_point + offset + 1 > len(ref1)):
Expand Down Expand Up @@ -796,7 +801,7 @@ def get_amplicon_info_for_guides(ref_seq,guides,guide_mismatches,guide_names,qua
this_sgRNA_cut_points.append(cut_p)
this_sgRNA_plot_cut_points.append(guide_plot_cut_points[guide_idx])
this_sgRNA_intervals.append((m.start(),m.start() + len(current_guide_seq)-1))
this_sgRNA_mismatches.append(guide_mismatches[guide_idx])
this_sgRNA_mismatches.append([len(current_guide_seq)-x for x in guide_mismatches[guide_idx]])

if quantification_window_sizes[guide_idx] > 0:
st=max(0,cut_p-quantification_window_sizes[guide_idx]+1)
Expand Down Expand Up @@ -892,10 +897,14 @@ def set_guide_array(vals, guides, property_name):
"""
#if only one is given, set it for all guides
vals_array = vals.split(",")
ret_array = [int(vals_array[0])]*max(1,len(guides))
ret_array = [int(vals_array[0])]*len(guides)
#len(vals_array) is always one -- don't freak out if it's longer than 0 guides
if len(vals_array) > 1 and len(vals_array) > len(guides):
raise BadParameterException("More %s values were given than guides. Guides: %d %ss: %d"%(property_name,len(guides),property_name,len(vals_array)))

if len(guides) == 0:
return []

for idx, val in enumerate(vals_array):
if val != '':
ret_array[idx] = int(val)
Expand Down
44 changes: 44 additions & 0 deletions CRISPResso2/templates/report.html
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,49 @@ <h5>Global splicing analysis</h5>
{% endif %}
{# end of global coding sequence analysis #}

{# start prime editing report #}
{% if report_data['fig_locs'][report_data.amplicons[0]]['plot_11a'] %}
<div class='card text-center mb-2'>
<div class='card-header'>
{% if report_data.amplicons|length == 1 %} {# if only one amplicon #}
<h5>Prime editing report</h5>
{% else %}
<h5>Prime editing report (all reads aligned to {{report_data.amplicons[0]}})</h5>
{% endif %}
</div>
<div class='card-body'>
<a href="{{report_data['crispresso_data_path']}}{{report_data['fig_locs'][report_data.amplicons[0]]['plot_11a']}}.pdf"><img src="{{report_data['crispresso_data_path']}}{{report_data['fig_locs'][report_data.amplicons[0]]['plot_11a']}}.png" width='80%' ></a>
<label class="labelpadding">{{report_data['fig_captions'][report_data.amplicons[0]]['plot_11a']}}</label>
{% for (data_label,data_path) in report_data['fig_datas'][report_data.amplicons[0]]['plot_11a'] %}
<p class="m-0"><small>Data: <a href="{{report_data['crispresso_data_path']}}{{data_path}}">{{data_label}}</a></small></p>
{% endfor %}
</div>
</div>
{% endif %}

{% if report_data['sgRNA_based_fig_names'][report_data.amplicons[0]] and report_data['sgRNA_based_fig_names'][report_data.amplicons[0]]['11b']%}
<div class='card text-center mb-2'>
<div class='card-header'>
{% if report_data.amplicons|length == 1 %} {# if only one amplicon #}
<h5>Prime editing summary plots at analysis positions</h5>
{% else %}
<h5>Prime editing summary plots at analysis positions (aligned to {{report_data.amplicons[0]}})</h5>
{% endif %}
</div>
<div class='card-body'>
{% for fig_name in report_data['sgRNA_based_fig_names'][report_data.amplicons[0]]['11b'] %}
<div class='mb-4'>
<a href="{{report_data['crispresso_data_path']}}{{report_data['fig_locs'][report_data.amplicons[0]][fig_name]}}.pdf"><img src="{{report_data['crispresso_data_path']}}{{report_data['fig_locs'][report_data.amplicons[0]][fig_name]}}.png" width='95%' ></a>
<label class="labelpadding">{{report_data['fig_captions'][report_data.amplicons[0]][fig_name]}}</label>
{% for (data_label,data_path) in report_data['fig_datas'][report_data.amplicons[0]][fig_name] %}
<p class="m-0"><small>Data: <a href="{{report_data['crispresso_data_path']}}{{data_path}}">{{data_label}}</a></small></p>
{% endfor %}
</div>
{% endfor %}
</div>
</div>
{% endif %} {# end plot 11b for prime editing #}


{% if report_data.amplicons|length == 1 %}
<div> {# if only one amplicon, just a normal div #}
Expand Down Expand Up @@ -649,6 +692,7 @@ <h5>Base editing for {{amplicon_name}}</h5>
</div>
</div> {# end card #}
{% endif %} {# end base editing card #}

</div> {# end this amplicon tab #} <!--end amp tab -->

{% endfor %}
Expand Down

0 comments on commit 4e1d6b2

Please sign in to comment.