From 460848a1e0dfd84acf6f9ba0ee1eca78de1cc56c Mon Sep 17 00:00:00 2001 From: Samuel Nichols Date: Mon, 5 Feb 2024 13:03:29 -0700 Subject: [PATCH] Cole/fix assigning multiple qwc (#37) * Remove extraneous whitespace * Implement the ability to assign multiple quantification window coordinates * Updating documentation for quantification window coordinates * Update to allow 0 to prevent qwc being set. Update description. --------- Co-authored-by: Cole Lyman --- CRISPResso2/CRISPRessoCORE.py | 20 +++++++++++++------- CRISPResso2/CRISPRessoShared.py | 5 +++-- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py index 26ddbdf2..92f8483d 100644 --- a/CRISPResso2/CRISPRessoCORE.py +++ b/CRISPResso2/CRISPRessoCORE.py @@ -2205,15 +2205,21 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited refs[ref_name]['contains_guide'] = refs[clone_ref_name]['contains_guide'] #quantification window coordinates override other options - if amplicon_quant_window_coordinates_arr[clone_ref_idx] != "" and amplicon_quant_window_coordinates_arr[this_ref_idx] != '0': + if amplicon_quant_window_coordinates_arr[clone_ref_idx] != "": if amplicon_quant_window_coordinates_arr[this_ref_idx] != "": - this_include_idxs = get_include_idxs_from_quant_window_coordinates(amplicon_quant_window_coordinates_arr[this_ref_idx]) + this_quant_window_coordinates = amplicon_quant_window_coordinates_arr[this_ref_idx] else: - this_include_idxs = get_cloned_include_idxs_from_quant_window_coordinates( - amplicon_quant_window_coordinates_arr[clone_ref_idx], - s1inds.copy(), - ) - + this_quant_window_coordinates = amplicon_quant_window_coordinates_arr[clone_ref_idx] + this_include_idxs = [] + these_coords = this_quant_window_coordinates.split("_") + for coord in these_coords: + coordRE = re.match(r'^(\d+)-(\d+)$', coord) + if coordRE: + start = s1inds[int(coordRE.group(1))] + end = s1inds[int(coordRE.group(2)) + 1] + this_include_idxs.extend(range(start, end)) + else: + raise NTException("Cannot parse analysis window coordinate '" + str(coord)) #subtract any indices in 'exclude_idxs' -- e.g. in case some of the cloned include_idxs were near the read ends (excluded) this_exclude_idxs = sorted(list(set(refs[ref_name]['exclude_idxs']))) this_include_idxs = sorted(list(set(np.setdiff1d(this_include_idxs, this_exclude_idxs)))) diff --git a/CRISPResso2/CRISPRessoShared.py b/CRISPResso2/CRISPRessoShared.py index dcfde350..c39b6e4c 100644 --- a/CRISPResso2/CRISPRessoShared.py +++ b/CRISPResso2/CRISPRessoShared.py @@ -220,8 +220,9 @@ def getCRISPRessoArgParser(tool, parser_title="CRISPResso Parameters"): help='If set, in the allele plots, the percentages will show the percentage as a percent of reads aligned to the assigned reference. Default behavior is to show percentage as a percent of all reads.', action='store_true') parser.add_argument('-qwc', '--quantification_window_coordinates', type=str, - help='Bp positions in the amplicon sequence specifying the quantification window. This parameter overrides values of the "--quantification_window_center", "--cleavage_offset", "--window_around_sgrna" or "--window_around_sgrna" values. Any indels/substitutions outside this window are excluded. Indexes are 0-based, meaning that the first nucleotide is position 0. Ranges are separted by the dash sign (e.g. "start-stop"), and multiple ranges can be separated by the underscore (_). ' + - 'A value of 0 disables this filter. (can be comma-separated list of values, corresponding to amplicon sequences given in --amplicon_seq e.g. 5-10,5-10_20-30 would specify the 5th-10th bp in the first reference and the 5th-10th and 20th-30th bp in the second reference)', + help='Bp positions in the amplicon sequence specifying the quantification window. This parameter overrides values of the "--quantification_window_center", "--cleavage_offset", "--window_around_sgrna" or "--window_around_sgrna" values. Any indels/substitutions outside this window are excluded. Indexes are 0-based, meaning that the first nucleotide is position 0. Ranges are separted by the dash sign (e.g. "start-stop"), and multiple ranges can be separated by the underscore (_) (can be comma-separated list of values, corresponding to amplicon sequences given in --amplicon_seq e.g. 5-10,5-10_20-30 would specify the 6th-11th bp in the first reference and the 6th-11th and 21st-31st bp in the second reference). ' + + 'A value of 0 disables this filter for a particular amplicon (e.g. 0,90-110 This would disable the quantification window for the first amplicon and specify the quantification window of 90-110 for the second).' + + 'Note that if there are multiple amplicons provided, and only one quantification window coordinate is provided, the same quantification window will be used for all amplicons and be adjusted to account for insertions/deletions.', default=None) parser.add_argument('--annotate_wildtype_allele', type=str, help='Wildtype alleles in the allele table plots will be marked with this string (e.g. **).',