From 897752d510d360e61be80254e0ebfc9b3c160e06 Mon Sep 17 00:00:00 2001 From: Kendell Clement Date: Thu, 18 Jul 2024 14:31:54 -0600 Subject: [PATCH] Asymmetrical cut point (#457) * add cut_point_ind to plot_alleles_heatmap for asymmetrical plotting * Cole asymmetrical cut point (#453) * Pin versions of numpy and matplotlib in CI environment (#84) (#452) * Reduce duplication and implement cut_point_ind in plot_alleles_heatmap_hist --------- Co-authored-by: Cole Lyman --- CRISPResso2/CRISPRessoPlot.py | 78 +++++++++-- scripts/plotCustomAllelePlot.py | 238 +++++--------------------------- 2 files changed, 101 insertions(+), 215 deletions(-) diff --git a/CRISPResso2/CRISPRessoPlot.py b/CRISPResso2/CRISPRessoPlot.py index 99a11273..3c7fe830 100644 --- a/CRISPResso2/CRISPRessoPlot.py +++ b/CRISPResso2/CRISPRessoPlot.py @@ -3061,6 +3061,7 @@ def plot_alleles_heatmap( custom_colors, SAVE_ALSO_PNG=False, plot_cut_point=True, + cut_point_ind=None, sgRNA_intervals=None, sgRNA_names=None, sgRNA_mismatches=None, @@ -3077,6 +3078,7 @@ def plot_alleles_heatmap( -per_element_annot_kws: annotations for each cell (e.g. bold for substitutions, etc.) -SAVE_ALSO_PNG: whether to write png file as well -plot_cut_point: if false, won't draw 'predicted cleavage' line + -cut_point_ind: index of cut point (if None, will be plot in the middle calculated as len(reference_seq)/2) -sgRNA_intervals: locations where sgRNA is located -sgRNA_mismatches: array (for each sgRNA_interval) of locations in sgRNA where there are mismatches -sgRNA_names: array (for each sgRNA_interval) of names of sgRNAs (otherwise empty) @@ -3198,7 +3200,9 @@ def plot_alleles_heatmap( #cut point vertical line if plot_cut_point: - ax_hm.vlines([plot_nuc_len/2], *ax_hm.get_ylim(), linestyles='dashed') + if cut_point_ind is None: + cut_point_ind = [plot_nuc_len / 2] + ax_hm.vlines(cut_point_ind,*ax_hm.get_ylim(),linestyles='dashed') ax_hm_ref.yaxis.tick_right() @@ -3233,7 +3237,7 @@ def plot_alleles_heatmap( fig.savefig(fig_filename_root+'.png', bbox_inches='tight', bbox_extra_artists=(lgd,)) plt.close(fig) -def plot_alleles_heatmap_hist(reference_seq,fig_filename_root,X,annot,y_labels,insertion_dict,per_element_annot_kws,count_values,custom_colors,SAVE_ALSO_PNG=False,plot_cut_point=True,sgRNA_intervals=None,sgRNA_names=None,sgRNA_mismatches=None,**kwargs): +def plot_alleles_heatmap_hist(reference_seq,fig_filename_root,X,annot,y_labels,insertion_dict,per_element_annot_kws,count_values,custom_colors,SAVE_ALSO_PNG=False,plot_cut_point=True,cut_point_ind=None,sgRNA_intervals=None,sgRNA_names=None,sgRNA_mismatches=None,**kwargs): """ Plots alleles in a heatmap (nucleotides color-coded for easy visualization) input: @@ -3246,6 +3250,7 @@ def plot_alleles_heatmap_hist(reference_seq,fig_filename_root,X,annot,y_labels,i -per_element_annot_kws: annotations for each cell (e.g. bold for substitutions, etc.) -SAVE_ALSO_PNG: whether to write png file as well -plot_cut_point: if false, won't draw 'predicted cleavage' line + -cut_point_ind: index of cut point (if None, will be plot in the middle calculated as len(reference_seq)/2) -sgRNA_intervals: locations where sgRNA is located -sgRNA_mismatches: array (for each sgRNA_interval) of locations in sgRNA where there are mismatches -sgRNA_names: array (for each sgRNA_interval) of names of sgRNAs (otherwise empty) @@ -3328,7 +3333,9 @@ def plot_alleles_heatmap_hist(reference_seq,fig_filename_root,X,annot,y_labels,i #cut point vertical line if plot_cut_point: - ax_hm.vlines([plot_nuc_len/2], *ax_hm.get_ylim(), linestyles='dashed') + if cut_point_ind is None: + cut_point_ind = [plot_nuc_len / 2] + ax_hm.vlines(cut_point_ind, *ax_hm.get_ylim(), linestyles='dashed') #create boxes for ins for idx, lss in insertion_dict.items(): @@ -3368,7 +3375,7 @@ def plot_alleles_heatmap_hist(reference_seq,fig_filename_root,X,annot,y_labels,i plt.savefig(fig_filename_root+'.png', bbox_inches='tight', bbox_extra_artists=(lgd,), pad_inches=0.1) plt.close() -def plot_alleles_table(reference_seq,df_alleles,fig_filename_root,custom_colors,MIN_FREQUENCY=0.5,MAX_N_ROWS=100,SAVE_ALSO_PNG=False,plot_cut_point=True,sgRNA_intervals=None,sgRNA_names=None,sgRNA_mismatches=None,annotate_wildtype_allele='****',**kwargs): +def plot_alleles_table(reference_seq,df_alleles,fig_filename_root,custom_colors,MIN_FREQUENCY=0.5,MAX_N_ROWS=100,SAVE_ALSO_PNG=False,plot_cut_point=True,cut_point_ind=None,sgRNA_intervals=None,sgRNA_names=None,sgRNA_mismatches=None,annotate_wildtype_allele='****',**kwargs): """ plots an allele table for a dataframe with allele frequencies input: @@ -3379,6 +3386,7 @@ def plot_alleles_table(reference_seq,df_alleles,fig_filename_root,custom_colors, MAX_N_ROWS: max rows to plot SAVE_ALSO_PNG: whether to write png file as well plot_cut_point: if false, won't draw 'predicted cleavage' line + cut_point_ind: index of cut point (if None, will be plot in the middle calculated as len(reference_seq)/2) sgRNA_intervals: locations where sgRNA is located sgRNA_mismatches: array (for each sgRNA_interval) of locations in sgRNA where there are mismatches sgRNA_names: array (for each sgRNA_interval) of names of sgRNAs (otherwise empty) @@ -3390,9 +3398,23 @@ def plot_alleles_table(reference_seq,df_alleles,fig_filename_root,custom_colors, for ix, is_ref in enumerate(is_reference): if is_ref: y_labels[ix] += annotate_wildtype_allele - plot_alleles_heatmap(reference_seq, fig_filename_root, X, annot, y_labels, insertion_dict, per_element_annot_kws, custom_colors, SAVE_ALSO_PNG, plot_cut_point, sgRNA_intervals, sgRNA_names, sgRNA_mismatches) -def plot_alleles_table_from_file(alleles_file_name,fig_filename_root,custom_colors,MIN_FREQUENCY=0.5,MAX_N_ROWS=100,SAVE_ALSO_PNG=False,plot_cut_point=True,sgRNA_intervals=None,sgRNA_names=None,sgRNA_mismatches=None,annotate_wildtype_allele='',**kwargs): + plot_alleles_heatmap(reference_seq=reference_seq, + fig_filename_root=fig_filename_root, + X=X, + annot=annot, + y_labels=y_labels, + insertion_dict=insertion_dict, + per_element_annot_kws=per_element_annot_kws, + custom_colors=custom_colors, + SAVE_ALSO_PNG=SAVE_ALSO_PNG, + plot_cut_point=plot_cut_point, + cut_point_ind=cut_point_ind, + sgRNA_intervals=sgRNA_intervals, + sgRNA_names=sgRNA_names, + sgRNA_mismatches=sgRNA_mismatches) + +def plot_alleles_table_from_file(alleles_file_name,fig_filename_root,custom_colors,MIN_FREQUENCY=0.5,MAX_N_ROWS=100,SAVE_ALSO_PNG=False,plot_cut_point=True,cut_point_ind=None,sgRNA_intervals=None,sgRNA_names=None,sgRNA_mismatches=None,annotate_wildtype_allele='',**kwargs): """ plots an allele table for a dataframe with allele frequencies infers the reference sequence by finding reference sequences without gaps (-) @@ -3405,6 +3427,7 @@ def plot_alleles_table_from_file(alleles_file_name,fig_filename_root,custom_colo MAX_N_ROWS: max rows to plot SAVE_ALSO_PNG: whether to write png file as well plot_cut_point: if false, won't draw 'predicted cleavage' line + cut_point_ind: index of cut point (if None, will be plot in the middle calculated as len(reference_seq)/2) sgRNA_intervals: locations where sgRNA is located sgRNA_mismatches: array (for each sgRNA_interval) of locations in sgRNA where there are mismatches sgRNA_names: array (for each sgRNA_interval) of names of sgRNAs (otherwise empty) @@ -3426,7 +3449,20 @@ def plot_alleles_table_from_file(alleles_file_name,fig_filename_root,custom_colo for ix, is_ref in enumerate(is_reference): if is_ref: y_labels[ix] += annotate_wildtype_allele - plot_alleles_heatmap(reference_seq, fig_filename_root, X, annot, y_labels, insertion_dict, per_element_annot_kws, custom_colors, SAVE_ALSO_PNG, plot_cut_point, sgRNA_intervals, sgRNA_names, sgRNA_mismatches) + plot_alleles_heatmap(reference_seq=reference_seq, + fig_filename_root=fig_filename_root, + X=X, + annot=annot, + y_labels=y_labels, + insertion_dict=insertion_dict, + per_element_annot_kws=per_element_annot_kws, + custom_colors=custom_colors, + SAVE_ALSO_PNG=SAVE_ALSO_PNG, + plot_cut_point=plot_cut_point, + cut_point_ind=cut_point_ind, + sgRNA_intervals=sgRNA_intervals, + sgRNA_names=sgRNA_names, + sgRNA_mismatches=sgRNA_mismatches) def plot_alleles_tables_from_folder(crispresso_output_folder,fig_filename_root,custom_colors,MIN_FREQUENCY=None,MAX_N_ROWS=None,SAVE_ALSO_PNG=False,plot_cut_point=True,sgRNA_intervals=None,sgRNA_names=None,sgRNA_mismatches=None,**kwargs): """ @@ -3495,7 +3531,19 @@ def plot_alleles_tables_from_folder(crispresso_output_folder,fig_filename_root,c new_sgRNA_intervals += [(int_start - new_sel_cols_start - 1, int_end - new_sel_cols_start - 1)] X, annot, y_labels, insertion_dict, per_element_annot_kws, is_reference = prep_alleles_table(df_alleles, ref_seq_around_cut, MAX_N_ROWS, MIN_FREQUENCY) - plot_alleles_heatmap(ref_seq_around_cut, fig_filename_root+"_"+ref_name+"_"+sgRNA_label, X, annot, y_labels, insertion_dict, per_element_annot_kws, custom_colors, SAVE_ALSO_PNG, plot_cut_point, new_sgRNA_intervals, sgRNA_names, sgRNA_mismatches) + plot_alleles_heatmap(reference_seq=ref_seq_around_cut, + fig_filename_root=fig_filename_root+"_"+ref_name+"_"+sgRNA_label, + X=X, + annot=annot, + y_labels=y_labels, + insertion_dict=insertion_dict, + per_element_annot_kws=per_element_annot_kws, + custom_colors=custom_colors, + SAVE_ALSO_PNG=SAVE_ALSO_PNG, + plot_cut_point=plot_cut_point, + sgRNA_intervals=new_sgRNA_intervals, + sgRNA_names=sgRNA_names, + sgRNA_mismatches=sgRNA_mismatches) plot_count += 1 print('Plotted ' + str(plot_count) + ' plots') @@ -3517,7 +3565,19 @@ def plot_alleles_table_compare(reference_seq,df_alleles,sample_name_1,sample_nam custom_colors: dict of colors to plot (e.g. colors['A'] = (1,0,0,0.4) # red,blue,green,alpha ) """ X, annot, y_labels, insertion_dict, per_element_annot_kws = prep_alleles_table_compare(df_alleles, sample_name_1, sample_name_2, MAX_N_ROWS, MIN_FREQUENCY) - plot_alleles_heatmap(reference_seq, fig_filename_root, X, annot, y_labels, insertion_dict, per_element_annot_kws, custom_colors, SAVE_ALSO_PNG, plot_cut_point, sgRNA_intervals, sgRNA_names, sgRNA_mismatches) + plot_alleles_heatmap(reference_seq=reference_seq, + fig_filename_root=fig_filename_root, + X=X, + annot=annot, + y_labels=y_labels, + insertion_dict=insertion_dict, + per_element_annot_kws=per_element_annot_kws, + custom_colors=custom_colors, + SAVE_ALSO_PNG=SAVE_ALSO_PNG, + plot_cut_point=plot_cut_point, + sgRNA_intervals=sgRNA_intervals, + sgRNA_names=sgRNA_names, + sgRNA_mismatches=sgRNA_mismatches) def plot_nucleotide_quilt_from_folder(crispresso_output_folder,fig_filename_root,save_also_png=False,sgRNA_intervals=None,min_text_pct=0.5,max_text_pct=0.95,quantification_window_idxs=None,sgRNA_names=None,sgRNA_mismatches=None,shade_unchanged=True,**kwargs): """ diff --git a/scripts/plotCustomAllelePlot.py b/scripts/plotCustomAllelePlot.py index a3094158..0025706e 100644 --- a/scripts/plotCustomAllelePlot.py +++ b/scripts/plotCustomAllelePlot.py @@ -44,19 +44,19 @@ def main(): def arrStr_to_arr(val): return [int(x) for x in val[1:-1].split(",")] -def get_row_around_cut_assymetrical(row,cut_point,plot_left,plot_right): +def get_row_around_cut_asymmetrical(row,cut_point,plot_left,plot_right): cut_idx=row['ref_positions'].index(cut_point) return row['Aligned_Sequence'][cut_idx-plot_left+1:cut_idx+plot_right+1],row['Reference_Sequence'][cut_idx-plot_left+1:cut_idx+plot_right+1],row['Read_Status']=='UNMODIFIED',row['n_deleted'],row['n_inserted'],row['n_mutated'],row['#Reads'], row['%Reads'] -def get_dataframe_around_cut_assymetrical(df_alleles, cut_point,plot_left,plot_right,collapse_by_sequence=True): +def get_dataframe_around_cut_asymmetrical(df_alleles, cut_point,plot_left,plot_right,collapse_by_sequence=True): if df_alleles.shape[0] == 0: return df_alleles ref1 = df_alleles['Reference_Sequence'].iloc[0] ref1 = ref1.replace('-','') if (cut_point + plot_right + 1 > len(ref1)): - raise(BadParameterException('The plotting window cannot extend past the end of the amplicon. Amplicon length is ' + str(len(ref1)) + ' but plot extends to ' + str(cut_point+plot_right+1))) + raise(CRISPRessoShared.BadParameterException('The plotting window cannot extend past the end of the amplicon. Amplicon length is ' + str(len(ref1)) + ' but plot extends to ' + str(cut_point+plot_right+1))) - df_alleles_around_cut=pd.DataFrame(list(df_alleles.apply(lambda row: get_row_around_cut_assymetrical(row,cut_point,plot_left,plot_right),axis=1).values), + df_alleles_around_cut=pd.DataFrame(list(df_alleles.apply(lambda row: get_row_around_cut_asymmetrical(row,cut_point,plot_left,plot_right),axis=1).values), columns=['Aligned_Sequence','Reference_Sequence','Unedited','n_deleted','n_inserted','n_mutated','#Reads','%Reads']) df_alleles_around_cut=df_alleles_around_cut.groupby(['Aligned_Sequence','Reference_Sequence','Unedited','n_deleted','n_inserted','n_mutated']).sum().reset_index().set_index('Aligned_Sequence') @@ -118,7 +118,7 @@ def plot_alleles_tables_from_folder(crispresso_output_folder,fig_filename_root,p plot_cut_point = plot_center ref_seq_around_cut=refs[ref_name]['sequence'][cut_point-plot_left+1:cut_point+plot_right+1] - df_alleles_around_cut=get_dataframe_around_cut_assymetrical(df_alleles, cut_point, plot_left, plot_right) + df_alleles_around_cut=get_dataframe_around_cut_asymmetrical(df_alleles, cut_point, plot_left, plot_right) this_allele_count = len(df_alleles_around_cut.index) if this_allele_count < 1: print('No reads found for ' + ref_name) @@ -133,7 +133,19 @@ def plot_alleles_tables_from_folder(crispresso_output_folder,fig_filename_root,p new_sgRNA_intervals += [(int_start - new_sel_cols_start - 1,int_end - new_sel_cols_start - 1)] fig_filename_root = fig_filename_root+"_"+ref_name+"_"+sgRNA_label - plot_alleles_table(ref_seq_around_cut,df_alleles=df_alleles_around_cut,fig_filename_root=fig_filename_root,cut_point_ind=cut_point-new_sel_cols_start,custom_colors=custom_colors,MIN_FREQUENCY=MIN_FREQUENCY,MAX_N_ROWS=MAX_N_ROWS,SAVE_ALSO_PNG=SAVE_ALSO_PNG,plot_cut_point=plot_cut_point,sgRNA_intervals=new_sgRNA_intervals,sgRNA_names=sgRNA_names,sgRNA_mismatches=sgRNA_mismatches,annotate_wildtype_allele=crispresso2_info['running_info']['args'].annotate_wildtype_allele) + CRISPRessoPlot.plot_alleles_table(ref_seq_around_cut, + df_alleles=df_alleles_around_cut, + fig_filename_root=fig_filename_root, + cut_point_ind=cut_point-new_sel_cols_start, + custom_colors=custom_colors, + MIN_FREQUENCY=MIN_FREQUENCY, + MAX_N_ROWS=MAX_N_ROWS, + SAVE_ALSO_PNG=SAVE_ALSO_PNG, + plot_cut_point=plot_cut_point, + sgRNA_intervals=new_sgRNA_intervals, + sgRNA_names=sgRNA_names, + sgRNA_mismatches=sgRNA_mismatches, + annotate_wildtype_allele=crispresso2_info['running_info']['args'].annotate_wildtype_allele) plot_count += 1 else: @@ -147,7 +159,7 @@ def plot_alleles_tables_from_folder(crispresso_output_folder,fig_filename_root,p plot_idxs = sgRNA_plot_idxs[ind] ref_seq_around_cut=refs[ref_name]['sequence'][cut_point-plot_left+1:cut_point+plot_right+1] - df_alleles_around_cut=get_dataframe_around_cut_assymetrical(df_alleles, cut_point, plot_left, plot_right) + df_alleles_around_cut=get_dataframe_around_cut_asymmetrical(df_alleles, cut_point, plot_left, plot_right) this_allele_count = len(df_alleles_around_cut.index) if this_allele_count < 1: print('No reads found for ' + ref_name) @@ -162,209 +174,23 @@ def plot_alleles_tables_from_folder(crispresso_output_folder,fig_filename_root,p new_sgRNA_intervals += [(int_start - new_sel_cols_start - 1,int_end - new_sel_cols_start - 1)] fig_filename_root = fig_filename_root+"_"+ref_name+"_"+sgRNA_label - plot_alleles_table(ref_seq_around_cut,df_alleles=df_alleles_around_cut,fig_filename_root=fig_filename_root,cut_point_ind=cut_point-new_sel_cols_start,custom_colors=custom_colors,MIN_FREQUENCY=MIN_FREQUENCY,MAX_N_ROWS=MAX_N_ROWS,SAVE_ALSO_PNG=SAVE_ALSO_PNG,plot_cut_point=plot_cut_point,sgRNA_intervals=new_sgRNA_intervals,sgRNA_names=sgRNA_names,sgRNA_mismatches=sgRNA_mismatches,annotate_wildtype_allele=crispresso2_info['running_info']['args'].annotate_wildtype_allele) + CRISPRessoPlot.plot_alleles_table(ref_seq_around_cut, + df_alleles=df_alleles_around_cut, + fig_filename_root=fig_filename_root, + cut_point_ind=cut_point-new_sel_cols_start, + custom_colors=custom_colors, + MIN_FREQUENCY=MIN_FREQUENCY, + MAX_N_ROWS=MAX_N_ROWS, + SAVE_ALSO_PNG=SAVE_ALSO_PNG, + plot_cut_point=plot_cut_point, + sgRNA_intervals=new_sgRNA_intervals, + sgRNA_names=sgRNA_names, + sgRNA_mismatches=sgRNA_mismatches, + annotate_wildtype_allele=crispresso2_info['running_info']['args'].annotate_wildtype_allele) plot_count += 1 print('Plotted ' + str(plot_count) + ' plots') -def plot_alleles_table(reference_seq,df_alleles,fig_filename_root,custom_colors,MIN_FREQUENCY=0.5,MAX_N_ROWS=100,SAVE_ALSO_PNG=False,plot_cut_point=True,cut_point_ind=None,sgRNA_intervals=None,sgRNA_names=None,sgRNA_mismatches=None,annotate_wildtype_allele='****',): - """ - plots an allele table for a dataframe with allele frequencies - input: - reference_seq: the reference amplicon sequence to plot - df_alleles: merged dataframe (should include columns "#Reads','%Reads') - fig_filename: figure filename to plot (not including '.pdf' or '.png') - MIN_FREQUENCY: sum of alleles % must add to this to be plotted - MAX_N_ROWS: max rows to plot - SAVE_ALSO_PNG: whether to write png file as well - plot_cut_point: if false, won't draw 'predicted cleavage' line - cut_point_ind: index to plot cut point at - sgRNA_intervals: locations where sgRNA is located - sgRNA_mismatches: array (for each sgRNA_interval) of locations in sgRNA where there are mismatches - sgRNA_names: array (for each sgRNA_interval) of names of sgRNAs (otherwise empty) - custom_colors: dict of colors to plot (e.g. colors['A'] = (1,0,0,0.4) # red,blue,green,alpha ) - annotate_wildtype_allele: string to add to the end of the wildtype allele (e.g. ** or '') - """ - X,annot,y_labels,insertion_dict,per_element_annot_kws,is_reference = CRISPRessoPlot.prep_alleles_table(df_alleles,reference_seq,MAX_N_ROWS,MIN_FREQUENCY) - if annotate_wildtype_allele != '': - for ix, is_ref in enumerate(is_reference): - if is_ref: - y_labels[ix] += annotate_wildtype_allele - plot_alleles_heatmap(reference_seq,fig_filename_root,X,annot,y_labels,insertion_dict,per_element_annot_kws,custom_colors,SAVE_ALSO_PNG,plot_cut_point,cut_point_ind,sgRNA_intervals,sgRNA_names,sgRNA_mismatches) - -def plot_alleles_heatmap(reference_seq,fig_filename_root,X,annot,y_labels,insertion_dict,per_element_annot_kws,custom_colors,SAVE_ALSO_PNG=False,plot_cut_point=True,cut_point_ind=None,sgRNA_intervals=None,sgRNA_names=None,sgRNA_mismatches=None): - """ - Plots alleles in a heatmap (nucleotides color-coded for easy visualization) - input: - -reference_seq: sequence of reference allele to plot - -fig_filename: figure filename to plot (not including '.pdf' or '.png') - -X: list of numbers representing nucleotides of the allele - -annot: list of nucleotides (letters) of the allele - -y_labels: list of labels for each row/allele - -insertion_dict: locations of insertions -- red squares will be drawn around these - -per_element_annot_kws: annotations for each cell (e.g. bold for substitutions, etc.) - -SAVE_ALSO_PNG: whether to write png file as well - -plot_cut_point: if false, won't draw 'predicted cleavage' line - -cut_point_ind: index to plot cut point at - -sgRNA_intervals: locations where sgRNA is located - -sgRNA_mismatches: array (for each sgRNA_interval) of locations in sgRNA where there are mismatches - -sgRNA_names: array (for each sgRNA_interval) of names of sgRNAs (otherwise empty) - -custom_colors: dict of colors to plot (e.g. colors['A'] = (1,0,0,0.4) # red,blue,green,alpha ) - """ - plot_nuc_len=len(reference_seq) - - # make a color map of fixed colors - alpha=0.4 - A_color=CRISPRessoPlot.get_nuc_color('A',alpha) - T_color=CRISPRessoPlot.get_nuc_color('T',alpha) - C_color=CRISPRessoPlot.get_nuc_color('C',alpha) - G_color=CRISPRessoPlot.get_nuc_color('G',alpha) - INDEL_color = CRISPRessoPlot.get_nuc_color('N',alpha) - - if custom_colors is not None: - if 'A' in custom_colors: - A_color = custom_colors['A'] - if 'T' in custom_colors: - T_color = custom_colors['T'] - if 'C' in custom_colors: - C_color = custom_colors['C'] - if 'G' in custom_colors: - G_color = custom_colors['G'] - if 'N' in custom_colors: - INDEL_color = custom_colors['N'] - - dna_to_numbers={'-':0,'A':1,'T':2,'C':3,'G':4,'N':5} - seq_to_numbers= lambda seq: [dna_to_numbers[x] for x in seq] - - cmap = colors_mpl.ListedColormap([INDEL_color, A_color,T_color,C_color,G_color,INDEL_color]) - - #ref_seq_around_cut=reference_seq[max(0,cut_point-plot_nuc_len/2+1):min(len(reference_seq),cut_point+plot_nuc_len/2+1)] - -# print('per element anoot kws: ' + per_element_annot_kws) - if len(per_element_annot_kws) > 1: - per_element_annot_kws=np.vstack(per_element_annot_kws[::-1]) - else: - per_element_annot_kws=np.array(per_element_annot_kws) - ref_seq_hm=np.expand_dims(seq_to_numbers(reference_seq),1).T - ref_seq_annot_hm=np.expand_dims(list(reference_seq),1).T - - annot=annot[::-1] - X=X[::-1] - - N_ROWS=len(X) - N_COLUMNS=plot_nuc_len - - if N_ROWS < 1: - fig=plt.figure() - ax = fig.add_subplot(111) - plt.text(0.5, 0.5,'No Alleles',horizontalalignment='center',verticalalignment='center',transform = ax.transAxes) - ax.set_clip_on(False) - - plt.savefig(fig_filename_root+'.pdf',bbox_inches='tight') - if SAVE_ALSO_PNG: - plt.savefig(fig_filename_root+'.png',bbox_inches='tight') - plt.close() - return - - sgRNA_rows = [] - num_sgRNA_rows = 0 - - if sgRNA_intervals and len(sgRNA_intervals) > 0: - sgRNA_rows = CRISPRessoPlot.get_rows_for_sgRNA_annotation(sgRNA_intervals,plot_nuc_len) - num_sgRNA_rows = max(sgRNA_rows) + 1 - fig=plt.figure(figsize=(plot_nuc_len*0.3,(N_ROWS+1 + num_sgRNA_rows)*0.6)) - gs1 = gridspec.GridSpec(N_ROWS+2,N_COLUMNS) - gs2 = gridspec.GridSpec(N_ROWS+2,N_COLUMNS) - #ax_hm_ref heatmap for the reference - ax_hm_ref=plt.subplot(gs1[0:1, :]) - ax_hm=plt.subplot(gs2[2:, :]) - else: - fig=plt.figure(figsize=(plot_nuc_len*0.3,(N_ROWS+1)*0.6)) - gs1 = gridspec.GridSpec(N_ROWS+1,N_COLUMNS) - gs2 = gridspec.GridSpec(N_ROWS+1,N_COLUMNS) - #ax_hm_ref heatmap for the reference - ax_hm_ref=plt.subplot(gs1[0, :]) - ax_hm=plt.subplot(gs2[1:, :]) - - - CRISPRessoPlot.custom_heatmap(ref_seq_hm,annot=ref_seq_annot_hm,annot_kws={'size':16},cmap=cmap,fmt='s',ax=ax_hm_ref,vmin=0,vmax=5,square=True) - CRISPRessoPlot.custom_heatmap(X,annot=np.array(annot),annot_kws={'size':16},cmap=cmap,fmt='s',ax=ax_hm,vmin=0,vmax=5,square=True, per_element_annot_kws=per_element_annot_kws) - - ax_hm.yaxis.tick_right() - ax_hm.yaxis.set_ticklabels(y_labels[::-1],rotation=True,va='center') - ax_hm.xaxis.set_ticks([]) - - if sgRNA_intervals and len(sgRNA_intervals) > 0: - this_sgRNA_y_start = -1*num_sgRNA_rows - this_sgRNA_y_height = num_sgRNA_rows - 0.3 - CRISPRessoPlot.add_sgRNA_to_ax(ax_hm_ref,sgRNA_intervals,sgRNA_y_start=this_sgRNA_y_start,sgRNA_y_height=this_sgRNA_y_height,amp_len=plot_nuc_len,font_size='small',clip_on=False,sgRNA_names=sgRNA_names,sgRNA_mismatches=sgRNA_mismatches,x_offset=0,label_at_zero=True,sgRNA_rows=sgRNA_rows) - -# todo -- add sgRNAs below reference plot -# if sgRNA_intervals: -# ax_hm_anno=plt.subplot(gs3[2, :]) -# sgRNA_y_start = 0.3 -## sgRNA_y_height = 0.1 -# sgRNA_y_height = 10 -# min_sgRNA_x = None -# for idx,sgRNA_int in enumerate(sgRNA_intervals): -# ax_hm_anno.add_patch( -# patches.Rectangle((2+sgRNA_int[0], sgRNA_y_start), 1+sgRNA_int[1]-sgRNA_int[0], sgRNA_y_height,facecolor=(0,0,0,0.15)) -# ) -# #set left-most sgrna start -# if not min_sgRNA_x: -# min_sgRNA_x = sgRNA_int[0] -# if sgRNA_int[0] < min_sgRNA_x: -# min_sgRNA_x = sgRNA_int[0] -# ax_hm_anno.text(2+min_sgRNA_x,sgRNA_y_start + sgRNA_y_height/2,'sgRNA ',horizontalalignment='right',verticalalignment='center') - - #print lines - - - #create boxes for ins - for idx,lss in insertion_dict.items(): - for ls in lss: - ax_hm.add_patch(patches.Rectangle((ls[0],N_ROWS-idx-1),ls[1]-ls[0],1,linewidth=3,edgecolor='r',fill=False)) - - #cut point vertical line - if plot_cut_point: - if cut_point_ind is None: - ax_hm.vlines([plot_nuc_len/2],*ax_hm.get_ylim(),linestyles='dashed') - else: - ax_hm.vlines(cut_point_ind,*ax_hm.get_ylim(),linestyles='dashed') - - ax_hm_ref.yaxis.tick_right() - ax_hm_ref.xaxis.set_ticks([]) - ax_hm_ref.yaxis.set_ticklabels(['Reference'],rotation=True,va='center') - - - - gs2.update(left=0,right=1, hspace=0.05,wspace=0,top=1*(((N_ROWS)*1.13))/(N_ROWS)) - gs1.update(left=0,right=1, hspace=0.05,wspace=0,) - - sns.set_context(rc={'axes.facecolor':'white','lines.markeredgewidth': 1,'mathtext.fontset' : 'stix','text.usetex':True,'text.latex.unicode':True} ) - - proxies = [matplotlib.lines.Line2D([0], [0], linestyle='none', mfc='black', - mec='none', marker=r'$\mathbf{{{}}}$'.format('bold'),ms=18), - matplotlib.lines.Line2D([0], [0], linestyle='none', mfc='none', - mec='r', marker='s',ms=8,markeredgewidth=2.5), - matplotlib.lines.Line2D([0], [0], linestyle='none', mfc='none', - mec='black', marker='_',ms=2,)] - descriptions=['Substitutions','Insertions','Deletions'] - - if plot_cut_point: - proxies.append( - matplotlib.lines.Line2D([0], [1], linestyle='--',c='black',ms=6)) - descriptions.append('Predicted cleavage position') - - #ax_hm_ref.legend(proxies, descriptions, numpoints=1, markerscale=2, loc='center', bbox_to_anchor=(0.5, 4),ncol=1) - lgd = ax_hm.legend(proxies, descriptions, numpoints=1, markerscale=2, loc='upper center', bbox_to_anchor=(0.5, 0),ncol=1,fancybox=True,shadow=False) - - plt.savefig(fig_filename_root+'.pdf',bbox_inches='tight',bbox_extra_artists=(lgd,)) - if SAVE_ALSO_PNG: - plt.savefig(fig_filename_root+'.png',bbox_inches='tight',bbox_extra_artists=(lgd,)) - plt.close() - - if __name__ == "__main__": # execute only if run as a script main()