Skip to content

Commit

Permalink
Fix multiprocessing error, don't start pool when only using single th…
Browse files Browse the repository at this point in the history
…read (#302)

* Update README to have consistent use of `--base_editor_output` (#16)

* Add files via upload

* Only start process pools when using multiple processes

This is mainly to solve the issue when running on AWS Lambda, but this should
improve single core performance overall.

---------

Co-authored-by: Kendell Clement <[email protected]>
  • Loading branch information
Colelyman and kclem authored May 11, 2023
1 parent 92a705c commit 1cd54bc
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 23 deletions.
23 changes: 14 additions & 9 deletions CRISPResso2/CRISPRessoAggregateCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,12 @@ def main():
else:
n_processes = int(args.n_processes)

process_pool = ProcessPoolExecutor(n_processes)
process_futures = []
if n_processes > 1:
process_pool = ProcessPoolExecutor(n_processes)
process_futures = []
else:
process_pool = None
process_futures = None

plot = partial(
run_plot,
Expand Down Expand Up @@ -845,13 +849,14 @@ def main():
crispresso2Aggregate_info_file, crispresso2_info,
)

wait(process_futures)
if args.debug:
debug('Plot pool results:')
for future in process_futures:
debug('future: ' + str(future))
future_results = [f.result() for f in process_futures] #required to raise exceptions thrown from within future
process_pool.shutdown()
if n_processes > 1:
wait(process_futures)
if args.debug:
debug('Plot pool results:')
for future in process_futures:
debug('future: ' + str(future))
future_results = [f.result() for f in process_futures] #required to raise exceptions thrown from within future
process_pool.shutdown()

info('Analysis Complete!', {'percent_complete': 100})
print(CRISPRessoShared.get_crispresso_footer())
Expand Down
10 changes: 7 additions & 3 deletions CRISPResso2/CRISPRessoBatchCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,12 @@ def main():
if args.suppress_report:
save_png = False

process_futures = []
process_pool = ProcessPoolExecutor(n_processes_for_batch)
if n_processes_for_batch > 1:
process_pool = ProcessPoolExecutor(n_processes_for_batch)
process_futures = []
else:
process_pool = None
process_futures = None

plot = partial(
CRISPRessoMultiProcessing.run_plot,
Expand Down Expand Up @@ -854,7 +858,7 @@ def main():
for line in infile:
outfile.write(batch_name + "\t" + line)

if not args.suppress_batch_summary_plots:
if not args.suppress_batch_summary_plots and n_processes_for_batch > 1:
wait(process_futures)
if args.debug:
debug('CRISPResso batch results:')
Expand Down
27 changes: 16 additions & 11 deletions CRISPResso2/CRISPRessoCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -3175,7 +3175,7 @@ def save_count_vectors_to_file(vectors, vectorNames, refSeq, filename):
refs[ref_name]['ref_plot_name'] = ref_plot_name
continue

if len(ref_plot_name) > 21 and not args.suppress_amplicon_name_truncation:
if len(ref_plot_name) > 21 and not args.suppress_amplicon_name_truncation:
ref_plot_name = ref_plot_name[0:21] #truncate to 21 characters if too long to avoid filename issues

#make sure (truncated) ref plot name is unique
Expand Down Expand Up @@ -3380,8 +3380,12 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_

############

process_pool = ProcessPoolExecutor(n_processes)
process_futures = []
if n_processes > 1:
process_pool = ProcessPoolExecutor(n_processes)
process_futures = []
else:
process_pool = None
process_futures = None
plot = partial(
CRISPRessoMultiProcessing.run_plot,
num_processes=n_processes,
Expand Down Expand Up @@ -3428,7 +3432,7 @@ def count_alternate_alleles(sub_base_vectors, ref_name, ref_sequence, ref_total_
crispresso2_info['results']['general_plots']['plot_1c_data'] = [('Quantification of editing', os.path.basename(quant_of_editing_freq_filename))]
debug('Plotting read class pie chart and bar plot', {'percent_complete': 44})
plot(CRISPRessoPlot.plot_class_piechart_and_barplot, plot_1bc_input)
# to test, run: plot_pool.apply_async(CRISPRessoPlot.plot_class_piechart_and_barplot, kwds=plot_1bc_input).get()
# to test, run: process_pool.apply_async(CRISPRessoPlot.plot_class_piechart_and_barplot, kwds=plot_1bc_input).get()


#1d for dsODN
Expand Down Expand Up @@ -4633,13 +4637,14 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq):
crispresso2_info['results']['general_plots']['plot_11c_data'] = [('Scaffold insertion alleles with insertion sizes', os.path.basename(scaffold_insertion_sizes_filename))]

# join plotting pool
wait(process_futures)
if args.debug:
debug('Plot pool results:')
for future in process_futures:
debug('future: ' + str(future))
future_results = [f.result() for f in process_futures] #required to raise exceptions thrown from within future
process_pool.shutdown()
if n_processes > 1:
wait(process_futures)
if args.debug:
debug('Plot pool results:')
for future in process_futures:
debug('future: ' + str(future))
future_results = [f.result() for f in process_futures] #required to raise exceptions thrown from within future
process_pool.shutdown()

info('Done!')

Expand Down

0 comments on commit 1cd54bc

Please sign in to comment.