Skip to content

Commit

Permalink
Move read filtering to after merging in CRISPResso (#39)
Browse files Browse the repository at this point in the history
* Move read filtering to after merging

This is in an effort to be consistent with the behavior and results of
CRISPRessoPooled.

* Properly assign the correct file names for read filtering

* Add space around operators
  • Loading branch information
Colelyman authored and mbowcut2 committed Mar 26, 2024
1 parent f6e1906 commit bb06075
Showing 1 changed file with 26 additions and 38 deletions.
64 changes: 26 additions & 38 deletions CRISPResso2/CRISPRessoCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -2117,44 +2117,6 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited

info('Done!', {'percent_complete': 4})

if args.min_average_read_quality>0 or args.min_single_bp_quality>0 or args.min_bp_quality_or_N>0:
if args.bam_input != '':
raise CRISPRessoShared.BadParameterException('The read filtering options are not available with bam input')
info('Filtering reads with average bp quality < %d and single bp quality < %d and replacing bases with quality < %d with N ...' % (args.min_average_read_quality, args.min_single_bp_quality, args.min_bp_quality_or_N))
min_av_quality = None
if args.min_average_read_quality > 0:
min_av_quality = args.min_average_read_quality

min_single_bp_quality = None
if args.min_single_bp_quality > 0:
min_single_bp_quality = args.min_single_bp_quality

min_bp_quality_or_N = None
if args.min_bp_quality_or_N > 0:
min_bp_quality_or_N = args.min_bp_quality_or_N

if args.fastq_r2!='':
output_filename_r1=_jp(os.path.basename(args.fastq_r1.replace('.fastq', '')).replace('.gz', '')+'_filtered.fastq.gz')
output_filename_r2=_jp(os.path.basename(args.fastq_r2.replace('.fastq', '')).replace('.gz', '')+'_filtered.fastq.gz')

from CRISPResso2 import filterFastqs
filterFastqs.filterFastqs(fastq_r1=args.fastq_r1, fastq_r2=args.fastq_r2, fastq_r1_out=output_filename_r1, fastq_r2_out=output_filename_r2, min_bp_qual_in_read=min_single_bp_quality, min_av_read_qual=min_av_quality, min_bp_qual_or_N=min_bp_quality_or_N)

args.fastq_r1 = output_filename_r1
args.fastq_r2 = output_filename_r2
files_to_remove += [output_filename_r1]
files_to_remove += [output_filename_r2]

else:
output_filename_r1=_jp(os.path.basename(args.fastq_r1.replace('.fastq', '')).replace('.gz', '')+'_filtered.fastq.gz')

from CRISPResso2 import filterFastqs
filterFastqs.filterFastqs(fastq_r1=args.fastq_r1, fastq_r1_out=output_filename_r1, min_bp_qual_in_read=min_single_bp_quality, min_av_read_qual=min_av_quality, min_bp_qual_or_N=min_bp_quality_or_N)

args.fastq_r1 = output_filename_r1
files_to_remove += [output_filename_r1]


#Trim and merge reads
if args.bam_input != '' and args.trim_sequences:
raise CRISPRessoShared.BadParameterException('Read trimming options are not available with bam input')
Expand Down Expand Up @@ -2292,7 +2254,33 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited
info('Wrote force-merged reads to ' + new_merged_filename)

info('Done!', {'percent_complete': 7})
else: # single end reads with no trimming
processed_output_filename = args.fastq_r1

if args.min_average_read_quality > 0 or args.min_single_bp_quality > 0 or args.min_bp_quality_or_N > 0:
if args.bam_input != '':
raise CRISPRessoShared.BadParameterException('The read filtering options are not available with bam input')
info('Filtering reads with average bp quality < %d and single bp quality < %d and replacing bases with quality < %d with N ...' % (args.min_average_read_quality, args.min_single_bp_quality, args.min_bp_quality_or_N))
min_av_quality = None
if args.min_average_read_quality > 0:
min_av_quality = args.min_average_read_quality

min_single_bp_quality = None
if args.min_single_bp_quality > 0:
min_single_bp_quality = args.min_single_bp_quality

min_bp_quality_or_N = None
if args.min_bp_quality_or_N > 0:
min_bp_quality_or_N = args.min_bp_quality_or_N

output_filename_r1 = _jp(os.path.basename(
processed_output_filename.replace('.fastq', '')).replace('.gz', '') + '_filtered.fastq.gz',
)

from CRISPResso2 import filterFastqs
filterFastqs.filterFastqs(fastq_r1=processed_output_filename, fastq_r1_out=output_filename_r1, min_bp_qual_in_read=min_single_bp_quality, min_av_read_qual=min_av_quality, min_bp_qual_or_N=min_bp_quality_or_N)

processed_output_filename = output_filename_r1

#count reads
N_READS_AFTER_PREPROCESSING = 0
Expand Down

0 comments on commit bb06075

Please sign in to comment.