diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index fae5a96b..16941ebd 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -2,6 +2,9 @@ name: Run Integration Tests on: push: + + pull_request: + types: [opened, reopened] jobs: build: @@ -37,7 +40,7 @@ jobs: run: | mkdir ../CRISPResso2_copy cp -r * ../CRISPResso2_copy - + - name: Copy C2_tests repo uses: actions/checkout@master with: diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 67b57b6f..564df075 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -2,6 +2,9 @@ name: Pylint on: push: + + pull_request: + types: [opened, reopened] jobs: build: diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index fb2a86dc..5cec2da2 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -2,6 +2,9 @@ name: Run Pytest on: push: + + pull_request: + types: [opened, reopened] jobs: build: diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py index 9a7c278e..2531a0a7 100644 --- a/CRISPResso2/CRISPRessoCORE.py +++ b/CRISPResso2/CRISPRessoCORE.py @@ -136,6 +136,79 @@ def get_n_reads_bam(bam_filename,bam_chr_loc=""): ######################################### + +def split_quant_window_coordinates(quant_window_coordinates): + """Split the quantification window coordinates to be iterated over. + + Parameters + ---------- + quant_window_coordinates: str + The quantification window coordinates, in the form "5-10_100-101", where + the "_" delimits separate ranges and the "-" delimits the range itself. + + Returns + ------- + list of tuples + Where each element is a tuple and the first element of the tuple is the + start of the range and the second element is the end of the range. + """ + coord_re = re.compile(r'^(\d+)-(\d+)$') + try: + return [tuple(map(int, coord_re.match(c).groups())) for c in quant_window_coordinates.split('_')] + except: + raise CRISPRessoShared.BadParameterException("Cannot parse analysis window coordinate '" + str(quant_window_coordinates)) + + +def get_include_idxs_from_quant_window_coordinates(quant_window_coordinates): + """Get the include_idxs from the quantification window coordinates. + + Parameters + ---------- + quant_window_coordinates: str + The quantification window coordinates, in the form "5-10_100-101", where + the "_" delimits separate ranges and the "-" delimits the range itself. + + Returns + ------- + list of int + The include_idxs to be used for quantification, which is the quantification + window coordinates expanded to include all individual indexes contained therein. + """ + return [ + i + for coord in split_quant_window_coordinates(quant_window_coordinates) + for i in range(coord[0], coord[1] + 1) + ] + + +def get_cloned_include_idxs_from_quant_window_coordinates(quant_window_coordinates, idxs): + """Get the include_idxs from the quantification window coordinates, but adjusted according to s1ind. + + Parameters + ---------- + quant_window_coordinates: str + The quantification window coordinates, in the form "5-10_100-101", where + the "_" delimits separate ranges and the "-" delimits the range itself. + idxs: list of int + The index values mapped to the amplicon from which this is being cloned. + + Returns + ------- + list of int + The include_idxs to be used for quantification, which is the quantification + window coordinates expanded to include all individual indexes contained therein, + but adjusted according to s1inds. + """ + include_idxs = [] + for i in range(1, len(idxs)): + if abs(idxs[i-1]) == idxs[i]: + idxs[i] = -1 * abs(idxs[i]) + for coord in split_quant_window_coordinates(quant_window_coordinates): + include_idxs.extend(idxs[coord[0]:coord[1] + 1]) + + return list(filter(lambda x: x >= 0, include_idxs)) + + def get_pe_scaffold_search(prime_edited_ref_sequence, prime_editing_pegRNA_extension_seq, prime_editing_pegRNA_scaffold_seq, prime_editing_pegRNA_scaffold_min_match_length): """ For prime editing, determines the scaffold string to search for (the shortest substring of args.prime_editing_pegRNA_scaffold_seq not in the prime-edited reference sequence) @@ -1966,21 +2039,15 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited refs[ref_name]['contains_guide'] = refs[clone_ref_name]['contains_guide'] #quantification window coordinates override other options - if amplicon_quant_window_coordinates_arr[clone_ref_idx] != "": + if amplicon_quant_window_coordinates_arr[clone_ref_idx] != "" and amplicon_quant_window_coordinates_arr[this_ref_idx] != '0': if amplicon_quant_window_coordinates_arr[this_ref_idx] != "": - this_quant_window_coordinates = amplicon_quant_window_coordinates_arr[this_ref_idx] + this_include_idxs = get_include_idxs_from_quant_window_coordinates(amplicon_quant_window_coordinates_arr[this_ref_idx]) else: - this_quant_window_coordinates = amplicon_quant_window_coordinates_arr[clone_ref_idx] - this_include_idxs = [] - these_coords = this_quant_window_coordinates.split("_") - for coord in these_coords: - coordRE = re.match(r'^(\d+)-(\d+)$', coord) - if coordRE: - start = s1inds[int(coordRE.group(1))] - end = s1inds[int(coordRE.group(2)) + 1] - this_include_idxs.extend(range(start, end)) - else: - raise NTException("Cannot parse analysis window coordinate '" + str(coord)) + this_include_idxs = get_cloned_include_idxs_from_quant_window_coordinates( + amplicon_quant_window_coordinates_arr[clone_ref_idx], + s1inds.copy(), + ) + #subtract any indices in 'exclude_idxs' -- e.g. in case some of the cloned include_idxs were near the read ends (excluded) this_exclude_idxs = sorted(list(set(refs[ref_name]['exclude_idxs']))) this_include_idxs = sorted(list(set(np.setdiff1d(this_include_idxs, this_exclude_idxs)))) @@ -2117,44 +2184,6 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited info('Done!', {'percent_complete': 4}) - if args.min_average_read_quality>0 or args.min_single_bp_quality>0 or args.min_bp_quality_or_N>0: - if args.bam_input != '': - raise CRISPRessoShared.BadParameterException('The read filtering options are not available with bam input') - info('Filtering reads with average bp quality < %d and single bp quality < %d and replacing bases with quality < %d with N ...' % (args.min_average_read_quality, args.min_single_bp_quality, args.min_bp_quality_or_N)) - min_av_quality = None - if args.min_average_read_quality > 0: - min_av_quality = args.min_average_read_quality - - min_single_bp_quality = None - if args.min_single_bp_quality > 0: - min_single_bp_quality = args.min_single_bp_quality - - min_bp_quality_or_N = None - if args.min_bp_quality_or_N > 0: - min_bp_quality_or_N = args.min_bp_quality_or_N - - if args.fastq_r2!='': - output_filename_r1=_jp(os.path.basename(args.fastq_r1.replace('.fastq', '')).replace('.gz', '')+'_filtered.fastq.gz') - output_filename_r2=_jp(os.path.basename(args.fastq_r2.replace('.fastq', '')).replace('.gz', '')+'_filtered.fastq.gz') - - from CRISPResso2 import filterFastqs - filterFastqs.filterFastqs(fastq_r1=args.fastq_r1, fastq_r2=args.fastq_r2, fastq_r1_out=output_filename_r1, fastq_r2_out=output_filename_r2, min_bp_qual_in_read=min_single_bp_quality, min_av_read_qual=min_av_quality, min_bp_qual_or_N=min_bp_quality_or_N) - - args.fastq_r1 = output_filename_r1 - args.fastq_r2 = output_filename_r2 - files_to_remove += [output_filename_r1] - files_to_remove += [output_filename_r2] - - else: - output_filename_r1=_jp(os.path.basename(args.fastq_r1.replace('.fastq', '')).replace('.gz', '')+'_filtered.fastq.gz') - - from CRISPResso2 import filterFastqs - filterFastqs.filterFastqs(fastq_r1=args.fastq_r1, fastq_r1_out=output_filename_r1, min_bp_qual_in_read=min_single_bp_quality, min_av_read_qual=min_av_quality, min_bp_qual_or_N=min_bp_quality_or_N) - - args.fastq_r1 = output_filename_r1 - files_to_remove += [output_filename_r1] - - #Trim and merge reads if args.bam_input != '' and args.trim_sequences: raise CRISPRessoShared.BadParameterException('Read trimming options are not available with bam input') @@ -2292,7 +2321,33 @@ def get_prime_editing_guides(this_amp_seq, this_amp_name, ref0_seq, prime_edited info('Wrote force-merged reads to ' + new_merged_filename) info('Done!', {'percent_complete': 7}) + else: # single end reads with no trimming + processed_output_filename = args.fastq_r1 + + if args.min_average_read_quality > 0 or args.min_single_bp_quality > 0 or args.min_bp_quality_or_N > 0: + if args.bam_input != '': + raise CRISPRessoShared.BadParameterException('The read filtering options are not available with bam input') + info('Filtering reads with average bp quality < %d and single bp quality < %d and replacing bases with quality < %d with N ...' % (args.min_average_read_quality, args.min_single_bp_quality, args.min_bp_quality_or_N)) + min_av_quality = None + if args.min_average_read_quality > 0: + min_av_quality = args.min_average_read_quality + + min_single_bp_quality = None + if args.min_single_bp_quality > 0: + min_single_bp_quality = args.min_single_bp_quality + + min_bp_quality_or_N = None + if args.min_bp_quality_or_N > 0: + min_bp_quality_or_N = args.min_bp_quality_or_N + + output_filename_r1 = _jp(os.path.basename( + processed_output_filename.replace('.fastq', '')).replace('.gz', '') + '_filtered.fastq.gz', + ) + + from CRISPResso2 import filterFastqs + filterFastqs.filterFastqs(fastq_r1=processed_output_filename, fastq_r1_out=output_filename_r1, min_bp_qual_in_read=min_single_bp_quality, min_av_read_qual=min_av_quality, min_bp_qual_or_N=min_bp_quality_or_N) + processed_output_filename = output_filename_r1 #count reads N_READS_AFTER_PREPROCESSING = 0 diff --git a/CRISPResso2/CRISPRessoReports/README.md b/CRISPResso2/CRISPRessoReports/README.md index b31cff80..39487ca6 100644 --- a/CRISPResso2/CRISPRessoReports/README.md +++ b/CRISPResso2/CRISPRessoReports/README.md @@ -86,10 +86,16 @@ Also, note that the default commit message may have a summary of all commits, pl 1. In the parent repo, switch to (or create) the branch on `CRISPRessoReports` that will have the changes you push. -If you are creating a new branch based off of `CRISPRessoReports` master, run this: +If you are creating a new branch based off of `CRISPRessoReports` master, run this to switch to the reports master branch: ``` shell -git checkout -b -reports reports/master +git checkout reports/master +``` + +Then, run to actually create (and switch to) the branch that you will be working with: + +``` shell +git checkout -b -reports ``` Or if you would like to push to an existing branch on `CRISPRessoReports`, run this: @@ -106,6 +112,18 @@ git merge --squash -Xsubtree="CRISPResso2/CRISPRessoReports" --no-commit --allow *Note:* `` is the branch of the parent repo that contains the changes inside the `CRISPRessoReports` sub-directory. +3. Push to `CRISPRessoReports`. + +``` shell +git push +``` + +4. Switch back to your branch on `CRISPResso` or `C2Web`. + +``` shell +git checkout +``` + ### I am working on a feature that requires changing `CRISPRessoReports`, what do I do? If a feature that you are working on requires changes to CRISPRessoReports, you will need to perform a few steps to get setup. @@ -113,7 +131,7 @@ If a feature that you are working on requires changes to CRISPRessoReports, you 1. Create a feature branch in the parent repo, based on the parent repo master. ``` shell -git checkout -b origin/master +git checkout -b ``` 2. Create a feature branch on `CRISPRessoReports`. diff --git a/CRISPResso2/CRISPRessoReports/templates/batchReport.html b/CRISPResso2/CRISPRessoReports/templates/batchReport.html index 1255424c..49a6c9ea 100644 --- a/CRISPResso2/CRISPRessoReports/templates/batchReport.html +++ b/CRISPResso2/CRISPRessoReports/templates/batchReport.html @@ -45,7 +45,6 @@ {% endblock %} {% block content %} -
diff --git a/CRISPResso2/CRISPRessoReports/templates/multiReport.html b/CRISPResso2/CRISPRessoReports/templates/multiReport.html index 10d30063..2831d9fc 100644 --- a/CRISPResso2/CRISPRessoReports/templates/multiReport.html +++ b/CRISPResso2/CRISPRessoReports/templates/multiReport.html @@ -46,7 +46,7 @@ {% endblock %} {% block content %} - +
@@ -61,7 +61,7 @@
{{report_name}}
{% for run_name in run_names %} - {{run_name}} + {{run_name}} {% endfor %}
@@ -162,6 +162,7 @@
Summary Plots
{# column #}
+
{% endblock %} {% block foot %} diff --git a/CRISPResso2/CRISPRessoReports/templates/pooledReport.html b/CRISPResso2/CRISPRessoReports/templates/pooledReport.html index ba944d90..899f39c1 100644 --- a/CRISPResso2/CRISPRessoReports/templates/pooledReport.html +++ b/CRISPResso2/CRISPRessoReports/templates/pooledReport.html @@ -44,7 +44,6 @@ {% endblock %} {% block content %} -
@@ -60,8 +59,8 @@
{{report_name}}
{% for region_name in run_names %} - {{region_name}} - {% endfor %} + {{region_name}} + {% endfor %}
diff --git a/CRISPResso2/CRISPRessoReports/templates/wgsReport.html b/CRISPResso2/CRISPRessoReports/templates/wgsReport.html index 5f2008a4..47ab9002 100644 --- a/CRISPResso2/CRISPRessoReports/templates/wgsReport.html +++ b/CRISPResso2/CRISPRessoReports/templates/wgsReport.html @@ -45,7 +45,6 @@ {% endblock %} {% block content %} -
@@ -60,8 +59,8 @@
{{report_name}}
{% for region_name in run_names %} - {{region_name}} - {% endfor %} + {{region_name}} + {% endfor %}
diff --git a/CRISPResso2/CRISPRessoShared.py b/CRISPResso2/CRISPRessoShared.py index 26a212a6..8e82c4e3 100644 --- a/CRISPResso2/CRISPRessoShared.py +++ b/CRISPResso2/CRISPRessoShared.py @@ -1677,7 +1677,47 @@ def set_guide_array(vals, guides, property_name): for idx, val in enumerate(vals_array): if val != '': ret_array[idx] = int(val) - return ret_array + return ret_array + + +def get_relative_coordinates(to_sequence, from_sequence): + """Given an alignment, get the relative coordinates of the second sequence to the first. + + For example, from_sequence[i] matches to to_sequence[inds[i]]. A `-1` + indicates a gap at the beginning of `to_sequence`. + + Parameters + ---------- + to_sequence : str + The alignment of the first sequence (where the coordinates are relative to) + from_sequence : str + The alignment of the second sequence + + Returns + ------- + s1inds_gap_left : list of int + The relative coordinates of the second sequence to the first, where gaps + in the first sequence are filled with the left value. + s1inds_gap_right : list of int + The relative coordinates of the second sequence to the first, where gaps + in the first sequence are filled with the right value. + """ + s1inds_gap_left = [] + s1inds_gap_right = [] + s1idx_left = -1 + s1idx_right = 0 + s2idx = -1 + for ix in range(len(to_sequence)): + if to_sequence[ix] != "-": + s1idx_left += 1 + if from_sequence[ix] != "-": + s2idx += 1 + s1inds_gap_left.append(s1idx_left) + s1inds_gap_right.append(s1idx_right) + if to_sequence[ix] != "-": + s1idx_right += 1 + + return s1inds_gap_left, s1inds_gap_right def get_alignment_coordinates(to_sequence, from_sequence, aln_matrix, needleman_wunsch_gap_open, @@ -1701,23 +1741,7 @@ def get_alignment_coordinates(to_sequence, from_sequence, aln_matrix, needleman_ gap_open=needleman_wunsch_gap_open, gap_extend=needleman_wunsch_gap_extend, gap_incentive=this_gap_incentive) - # print(fws1) - # print(fws2) - s1inds_l = [] - s1inds_r = [] - s1ix_l = -1 - s1ix_r = 0 - s2ix = -1 - for ix in range(len(fws1)): - if fws1[ix] != "-": - s1ix_l += 1 - if fws2[ix] != "-": - s2ix += 1 - s1inds_l.append(s1ix_l) - s1inds_r.append(s1ix_r) - if fws1[ix] != "-": - s1ix_r += 1 - return s1inds_l, s1inds_r + return get_relative_coordinates(fws1, fws2) def get_mismatches(seq_1, seq_2, aln_matrix, needleman_wunsch_gap_open, needleman_wunsch_gap_extend): diff --git a/tests/unit_tests/test_CRISPRessoCORE.py b/tests/unit_tests/test_CRISPRessoCORE.py index 268b6727..9a14549f 100644 --- a/tests/unit_tests/test_CRISPRessoCORE.py +++ b/tests/unit_tests/test_CRISPRessoCORE.py @@ -1,7 +1,7 @@ """Unit tests for CRISPResso2CORE.""" import pytest -from CRISPResso2 import CRISPRessoCORE +from CRISPResso2 import CRISPRessoCORE, CRISPRessoShared def test_get_consensus_alignment_from_pairs(): """Tests for generating consensus alignments from paired reads.""" @@ -93,6 +93,106 @@ def test_get_consensus_alignment_from_pairs(): assert ref_seq == "ATCGATCGAT" assert score == 50 #double check this score... should be 5/10 + +def test_split_quant_window_coordinates_single(): + assert [(5, 10)] == CRISPRessoCORE.split_quant_window_coordinates('5-10') + + +def test_split_quant_window_coordinates_multiple(): + assert CRISPRessoCORE.split_quant_window_coordinates('2-5_10-12') == [(2, 5), (10, 12)] + + +def test_split_quant_window_coordinates_error(): + with pytest.raises(CRISPRessoShared.BadParameterException): + CRISPRessoCORE.split_quant_window_coordinates('a-5') + + +def test_split_quant_window_coordinates_empty(): + with pytest.raises(CRISPRessoShared.BadParameterException): + CRISPRessoCORE.split_quant_window_coordinates('_') + + +def test_split_quant_window_coordinates_partially_empty(): + with pytest.raises(CRISPRessoShared.BadParameterException): + CRISPRessoCORE.split_quant_window_coordinates('1-3_') + + +def test_split_quant_window_coordinates_blank(): + with pytest.raises(CRISPRessoShared.BadParameterException): + CRISPRessoCORE.split_quant_window_coordinates('') + + +def test_get_include_idxs_from_quant_window_coordinates(): + quant_window_coordinates = '1-10_12-20' + assert CRISPRessoCORE.get_include_idxs_from_quant_window_coordinates(quant_window_coordinates) == [*list(range(1, 11)), *list(range(12, 21))] + + +def test_get_cloned_include_idxs_from_quant_window_coordinates(): + quant_window_coordinates = '1-10_12-20' + s1inds = list(range(22)) + assert CRISPRessoCORE.get_cloned_include_idxs_from_quant_window_coordinates(quant_window_coordinates, s1inds) == [*list(range(1, 11)), *list(range(12, 21))] + + +def test_get_cloned_include_idxs_from_quant_window_coordinates_insertion_beginning(): + quant_window_coordinates = '1-10_12-20' + # represents a 5bp insertion at the beginning (left) + s1inds = list(range(5, 27)) + assert CRISPRessoCORE.get_cloned_include_idxs_from_quant_window_coordinates(quant_window_coordinates, s1inds) == [*list(range(6, 16)), *list(range(17, 26))] + +def test_get_cloned_include_idxs_from_quant_window_coordinates_deletion_beginning(): + quant_window_coordinates = '1-10_12-20' + # represents a 5bp deletion at the beginning (left) + s1inds = [-1, -1, -1, -1, -1 ] + list(range(26)) + assert CRISPRessoCORE.get_cloned_include_idxs_from_quant_window_coordinates(quant_window_coordinates, s1inds) == [*list(range(0, 6)), *list(range(7, 16))] + +def test_get_cloned_include_idxs_from_quant_window_coordinates_deletion(): + quant_window_coordinates = '10-20_35-40' + # represents a 7bp deletion in the middle + s1inds = list(range(23)) + [22, 22, 22, 22, 22, 22, 22] + list(range(23, 34)) + assert CRISPRessoCORE.get_cloned_include_idxs_from_quant_window_coordinates(quant_window_coordinates, s1inds) == [*list(range(10, 21)), *list(range(35-7, 41-7))] + +def test_get_cloned_include_idxs_from_quant_window_coordinates_deletion_modified(): + quant_window_coordinates = '10-25_35-40' + # represents a 7bp deletion in the middle, where part of the QW is deleted + # [0, 1, 3, 4, ... , 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 24, ... , 33] + s1inds = list(range(23)) + [22, 22, 22, 22, 22, 22, 22] + list(range(23, 34)) + assert CRISPRessoCORE.get_cloned_include_idxs_from_quant_window_coordinates(quant_window_coordinates, s1inds) == [*list(range(10, 23)), *list(range(35-7, 41-7))] + + +def test_get_cloned_include_idxs_from_quant_window_coordinates_deletion_end_modified(): + # 5 bp deletion at end of 20 bp sequence + quant_window_coordinates = '1-5_10-20' + s1inds = [*list(range(16)), *[15, 15, 15, 15, 15]] + assert CRISPRessoCORE.get_cloned_include_idxs_from_quant_window_coordinates(quant_window_coordinates, s1inds) == [*list(range(1, 6)), *list(range(10, 16))] + +def test_get_cloned_include_idxs_from_quant_window_coordinates_insertion_and_deletion(): + # 5 bp deletion and 5 bp insertion + quant_window_coordinates = '1-5_10-20' + s1inds = [0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 6, 7, 8, 9, 15, 16, 17, 18, 19, 20] + assert CRISPRessoCORE.get_cloned_include_idxs_from_quant_window_coordinates(quant_window_coordinates, s1inds) == [*list(range(1, 6)), *[6, 7, 8, 9, 15, 16, 17, 18, 19, 20]] + +def test_get_cloned_include_idxs_from_quant_window_coordinates_insertion_and_deletion_modified(): + quant_window_coordinates = '1-5_10-20' + s1inds = [0, 1, 2, 2, 4, 5, 6, 7, 7, 7, 7, 7, 7, 8, 9, 10, 15, 16, 17, 18, 19] + assert CRISPRessoCORE.get_cloned_include_idxs_from_quant_window_coordinates(quant_window_coordinates, s1inds) == [*[1,2,4,5], *[8, 9, 10, 15, 16, 17, 18, 19]] + +def test_get_cloned_include_idxs_from_quant_window_coordinates_insertion_across_qw(): + # 6 bp insertion in middle of 4 bp sequence + quant_window_coordinates = '1-4' + s1inds = [0,1,2,9,10] + assert CRISPRessoCORE.get_cloned_include_idxs_from_quant_window_coordinates(quant_window_coordinates, s1inds) == [1,2,9,10] + +def test_get_cloned_include_idxs_from_quant_window_coordinates_deletion_entire_qw(): + # 5 bp deletion of entire qw + quant_window_coordinates = '1-4_7-10' + s1inds = [0, 1, 2, 3, 4, 5, 6, 6, 6, 6, 6] + assert CRISPRessoCORE.get_cloned_include_idxs_from_quant_window_coordinates(quant_window_coordinates, s1inds) == [1, 2, 3, 4] + +def test_get_cloned_include_idxs_from_quant_window_coordinates_include_zero(): + quant_window_coordinates = '0-5' + s1inds = [0, 1, 2, 3, 4, 5] + assert CRISPRessoCORE.get_cloned_include_idxs_from_quant_window_coordinates(quant_window_coordinates, s1inds) == [0, 1, 2, 3, 4, 5] + if __name__ == "__main__": # execute only if run as a script test_get_consensus_alignment_from_pairs() diff --git a/tests/unit_tests/test_CRISPRessoShared.py b/tests/unit_tests/test_CRISPRessoShared.py index da1bc479..f6ce67a0 100644 --- a/tests/unit_tests/test_CRISPRessoShared.py +++ b/tests/unit_tests/test_CRISPRessoShared.py @@ -20,4 +20,57 @@ def test_get_mismatches(): -5, -3, ) - assert len(mismatch_cords) == 6 + assert len(mismatch_cords) == 6 + +def test_get_relative_coordinates(): + s1inds_gap_left, s1inds_gap_right = CRISPRessoShared.get_relative_coordinates('ATCGT', 'TTCGT') + assert s1inds_gap_left == [0, 1, 2, 3, 4] + assert s1inds_gap_right == [0, 1, 2, 3, 4] + + +def test_get_relative_coordinates_to_gap(): + # unaligned sequences + seq_1 = 'TTCGT' + seq_2 = 'TTCT' + + # aligned_sequences + to_sequence = 'TTC-T' + from_sequence = 'TTCGT' + + s1inds_gap_left, s1inds_gap_right = CRISPRessoShared.get_relative_coordinates(to_sequence, from_sequence) + assert s1inds_gap_left == [0, 1, 2, 2, 3] + assert s1inds_gap_right == [0, 1, 2, 3, 3] + + + assert seq_1[0] == seq_2[s1inds_gap_left[0]] + assert seq_1[1] == seq_2[s1inds_gap_left[1]] + assert seq_1[2] == seq_2[s1inds_gap_left[2]] + assert seq_1[4] == seq_2[s1inds_gap_left[4]] + + +def test_get_relative_coordinates_start_gap(): + s1inds_gap_left, s1inds_gap_right = CRISPRessoShared.get_relative_coordinates('--CGT', 'TTCGT') + assert s1inds_gap_left == [-1, -1, 0, 1, 2] + assert s1inds_gap_right == [0, 0, 0, 1, 2] + + +def test_get_relative_coordinates_from_gap(): + s1inds_gap_left, s1inds_gap_right = CRISPRessoShared.get_relative_coordinates('ATCGT', 'ATC-T') + assert s1inds_gap_left == [0, 1, 2, 4] + assert s1inds_gap_right == [0, 1, 2, 4] + +def test_get_relative_coordinates_end_gap(): + s1inds_gap_left, s1inds_gap_right = CRISPRessoShared.get_relative_coordinates('ATC--', 'ATCGT') + assert s1inds_gap_left == [0, 1, 2, 2, 2] + assert s1inds_gap_right == [0, 1, 2, 3, 3] + +def test_get_relative_coordinates_multiple_gaps(): + s1inds_gap_left, s1inds_gap_right = CRISPRessoShared.get_relative_coordinates('AT--TC--G--CC', 'ATCGTCGCGTTCC') + assert s1inds_gap_left == [0, 1, 1, 1, 2, 3, 3, 3, 4, 4, 4, 5, 6] + assert s1inds_gap_right == [0, 1, 2, 2, 2, 3, 4, 4, 4, 5, 5, 5, 6] + +def test_get_relative_coordinates_ind_and_dels(): + s1inds_gap_left, s1inds_gap_right = CRISPRessoShared.get_relative_coordinates('ATG--C', 'A-GCTC') + assert s1inds_gap_left == [0, 2, 2, 2, 3] + assert s1inds_gap_right == [0, 2, 3, 3, 3] +