Skip to content

Commit

Permalink
Cole/fix assigning multiple qwc (#37)
Browse files Browse the repository at this point in the history
* Remove extraneous whitespace

* Implement the ability to assign multiple quantification window coordinates

* Updating documentation for quantification window coordinates

* Update to allow 0 to prevent qwc being set. Update description.

---------

Co-authored-by: Cole Lyman <[email protected]>
  • Loading branch information
2 people authored and mbowcut2 committed Nov 8, 2024
1 parent ebd8f11 commit 460848a
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 9 deletions.
20 changes: 13 additions & 7 deletions CRISPResso2/CRISPRessoCORE.py
Original file line number Diff line number Diff line change
Expand Up @@ -2205,15 +2205,21 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited
refs[ref_name]['contains_guide'] = refs[clone_ref_name]['contains_guide']

#quantification window coordinates override other options
if amplicon_quant_window_coordinates_arr[clone_ref_idx] != "" and amplicon_quant_window_coordinates_arr[this_ref_idx] != '0':
if amplicon_quant_window_coordinates_arr[clone_ref_idx] != "":
if amplicon_quant_window_coordinates_arr[this_ref_idx] != "":
this_include_idxs = get_include_idxs_from_quant_window_coordinates(amplicon_quant_window_coordinates_arr[this_ref_idx])
this_quant_window_coordinates = amplicon_quant_window_coordinates_arr[this_ref_idx]
else:
this_include_idxs = get_cloned_include_idxs_from_quant_window_coordinates(
amplicon_quant_window_coordinates_arr[clone_ref_idx],
s1inds.copy(),
)

this_quant_window_coordinates = amplicon_quant_window_coordinates_arr[clone_ref_idx]
this_include_idxs = []
these_coords = this_quant_window_coordinates.split("_")
for coord in these_coords:
coordRE = re.match(r'^(\d+)-(\d+)$', coord)
if coordRE:
start = s1inds[int(coordRE.group(1))]
end = s1inds[int(coordRE.group(2)) + 1]
this_include_idxs.extend(range(start, end))
else:
raise NTException("Cannot parse analysis window coordinate '" + str(coord))
#subtract any indices in 'exclude_idxs' -- e.g. in case some of the cloned include_idxs were near the read ends (excluded)
this_exclude_idxs = sorted(list(set(refs[ref_name]['exclude_idxs'])))
this_include_idxs = sorted(list(set(np.setdiff1d(this_include_idxs, this_exclude_idxs))))
Expand Down
5 changes: 3 additions & 2 deletions CRISPResso2/CRISPRessoShared.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,9 @@ def getCRISPRessoArgParser(tool, parser_title="CRISPResso Parameters"):
help='If set, in the allele plots, the percentages will show the percentage as a percent of reads aligned to the assigned reference. Default behavior is to show percentage as a percent of all reads.',
action='store_true')
parser.add_argument('-qwc', '--quantification_window_coordinates', type=str,
help='Bp positions in the amplicon sequence specifying the quantification window. This parameter overrides values of the "--quantification_window_center", "--cleavage_offset", "--window_around_sgrna" or "--window_around_sgrna" values. Any indels/substitutions outside this window are excluded. Indexes are 0-based, meaning that the first nucleotide is position 0. Ranges are separted by the dash sign (e.g. "start-stop"), and multiple ranges can be separated by the underscore (_). ' +
'A value of 0 disables this filter. (can be comma-separated list of values, corresponding to amplicon sequences given in --amplicon_seq e.g. 5-10,5-10_20-30 would specify the 5th-10th bp in the first reference and the 5th-10th and 20th-30th bp in the second reference)',
help='Bp positions in the amplicon sequence specifying the quantification window. This parameter overrides values of the "--quantification_window_center", "--cleavage_offset", "--window_around_sgrna" or "--window_around_sgrna" values. Any indels/substitutions outside this window are excluded. Indexes are 0-based, meaning that the first nucleotide is position 0. Ranges are separted by the dash sign (e.g. "start-stop"), and multiple ranges can be separated by the underscore (_) (can be comma-separated list of values, corresponding to amplicon sequences given in --amplicon_seq e.g. 5-10,5-10_20-30 would specify the 6th-11th bp in the first reference and the 6th-11th and 21st-31st bp in the second reference). ' +
'A value of 0 disables this filter for a particular amplicon (e.g. 0,90-110 This would disable the quantification window for the first amplicon and specify the quantification window of 90-110 for the second).' +
'Note that if there are multiple amplicons provided, and only one quantification window coordinate is provided, the same quantification window will be used for all amplicons and be adjusted to account for insertions/deletions.',
default=None)
parser.add_argument('--annotate_wildtype_allele', type=str,
help='Wildtype alleles in the allele table plots will be marked with this string (e.g. **).',
Expand Down

0 comments on commit 460848a

Please sign in to comment.