From 9911f1fb73e82c381acd1785c3ad32a1e41fc019 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Fri, 22 May 2020 16:42:59 -0400 Subject: [PATCH] Replace all double quotes with single quotes --- lusSTR/annot.py | 326 ++++++++++++++++++++++++------------------------ 1 file changed, 163 insertions(+), 163 deletions(-) diff --git a/lusSTR/annot.py b/lusSTR/annot.py index 45f66113..d3bb8cf9 100644 --- a/lusSTR/annot.py +++ b/lusSTR/annot.py @@ -42,18 +42,18 @@ def get_annotation(sequence, repeat_list): for element in re.split(repeat, sequence): parts = element.split(',') for i in parts: - if i == "": + if i == '': count += 1 else: if count == 1: final.append(repeat) elif count >= 2: - final.append(f"[{repeat}]{count}") + final.append(f'[{repeat}]{count}') count = 1 final.append(i) - if parts[-1] == "" and count > 2: - final.append(f"[{repeat}]{count-1}") - elif parts[-1] == "" and count <= 2: + if parts[-1] == '' and count > 2: + final.append(f'[{repeat}]{count-1}') + elif parts[-1] == '' and count <= 2: final.append(repeat) tmp = ' '.join(final) elif k > 0: @@ -68,21 +68,21 @@ def get_annotation(sequence, repeat_list): for element in re.split(repeat, tmp): parts = element.split(',') for i in parts: - if i == "": + if i == '': count += 1 else: if count == 1: final.append(repeat) elif count >= 2: - final.append(f"[{repeat}]{count}") + final.append(f'[{repeat}]{count}') count = 1 final.append(i) - if parts[-1] == "" and count > 2: - final.append(f"[{repeat}]{count-1}") - elif parts[-1] == "" and count <= 2: + if parts[-1] == '' and count > 2: + final.append(f'[{repeat}]{count-1}') + elif parts[-1] == '' and count <= 2: final.append(repeat) tmp = ' '.join(final) - return re.sub(" ", " ", tmp) + return re.sub(' ', ' ', tmp) def split_by_n(sequence, n): @@ -104,13 +104,13 @@ def split_string(sequence, n, repeat_list): strings = get_annotation(sequence, repeat_list) final_string = list() for unit in strings.split(' '): - if len(unit) > n and "[" not in unit: + if len(unit) > n and '[' not in unit: for x in split_by_n(unit, n): final_string.append(x) else: final_string.append(unit) final_string_formatted = ' '.join(final_string) - return re.sub(" ", " ", final_string_formatted) + return re.sub(' ', ' ', final_string_formatted) def rev_complement_anno(sequence): @@ -140,22 +140,22 @@ def rev_comp_forward_strand_bracket( sequence is a microvariant or not. ''' if locusid in cannot_split_list: - if locusid == "D19S433": - forward_strand_bracketed_form = D19_annotation(rev_sequence, repeat_list, "CCTT") - elif locusid == "D1S1656": - forward_strand_bracketed_form = D1_annotation(rev_sequence, repeat_list, "CACA") - elif locusid == "D7S820": + if locusid == 'D19S433': + forward_strand_bracketed_form = D19_annotation(rev_sequence, repeat_list, 'CCTT') + elif locusid == 'D1S1656': + forward_strand_bracketed_form = D1_annotation(rev_sequence, repeat_list, 'CACA') + elif locusid == 'D7S820': forward_strand_bracketed_form = D7_anno(rev_sequence, allele, n, repeat_list) else: forward_strand_bracketed_form = split_string(rev_sequence, n, repeat_list) - elif locusid == "FGA": + elif locusid == 'FGA': if len(rev_sequence) % n != 0: forward_strand_bracketed_form = FGA_anno(rev_sequence, repeat_list) else: forward_strand_bracketed_form = loci_need_split_anno(rev_sequence, n) else: forward_strand_bracketed_form = loci_need_split_anno(rev_sequence, n) - return re.sub(" ", " ", forward_strand_bracketed_form) + return re.sub(' ', ' ', forward_strand_bracketed_form) def rev_comp_uas_output_bracket(forward_bracket, n): @@ -173,25 +173,25 @@ def rev_comp_uas_output_bracket(forward_bracket, n): for j in ind: if j.isalpha(): reverse_strand_form.append(complement[j]) - elif j == "[": - reverse_strand_form.append("]") - elif j == "]": - reverse_strand_form.append("[") + elif j == '[': + reverse_strand_form.append(']') + elif j == ']': + reverse_strand_form.append('[') else: reverse_strand_form.append(j) reverse_form_anno_tmp = ''.join(reversed(reverse_strand_form)) reverse_form_anno_final = list() for unit in reverse_form_anno_tmp.split(' '): - if "[" in unit: + if '[' in unit: if len(unit) == (n+3): - final_string = f"{unit[1:(len(unit))]}{unit[0]}" + final_string = f'{unit[1:(len(unit))]}{unit[0]}' else: - final_string = f"{unit[2:(len(unit))]}{unit[1]}{unit[0]}" + final_string = f'{unit[2:(len(unit))]}{unit[1]}{unit[0]}' reverse_form_anno_final.append(final_string) else: reverse_form_anno_final.append(unit) reverse_strand_bracketed_form = ' '.join(reverse_form_anno_final) - return re.sub(" ", " ", reverse_strand_bracketed_form) + return re.sub(' ', ' ', reverse_strand_bracketed_form) def get_blocks(sequence, n): @@ -225,9 +225,9 @@ def loci_need_split_anno(sequence, n): if count == 1: alleles.append(unit) else: - alleles.append(f"[{unit}]{count}") + alleles.append(f'[{unit}]{count}') alleles_final = ' '.join(alleles) - return re.sub(" ", " ", alleles_final) + return re.sub(' ', ' ', alleles_final) def traditional_str_allele(sequence, n, n_sub_out): @@ -240,7 +240,7 @@ def traditional_str_allele(sequence, n, n_sub_out): else: allele_tmp = int(len(new_seq)/n) allele_dec = int(len(new_seq) % n) - trad_allele = f"{allele_tmp}.{allele_dec}" + trad_allele = f'{allele_tmp}.{allele_dec}' return trad_allele @@ -252,13 +252,13 @@ def extract(s, single_repeat): finalcount = 0 for m in re.finditer(single_repeat, s): count = s[m.end()+1:m.end()+3] - if count == "" or count[0] == "[" or count[0] == " " or count.isalpha(): + if count == '' or count[0] == '[' or count[0] == ' ' or count.isalpha(): count = 1 try: if float(count) > float(finalcount): finalcount = count try: - if str(finalcount)[1] == " ": + if str(finalcount)[1] == ' ': finalcount = finalcount[0] except IndexError: count = count @@ -281,24 +281,24 @@ def lus_anno(sequence, lus, sec, tert, locusid, str_allele): The D21S11 locus also requires a separate function because both the lus and secondary motif are the same, but differ based on the location of the repeat. ''' - if locusid == "D21S11": + if locusid == 'D21S11': lus_allele, sec_allele, tert_allele = lus_D21_anno(sequence, lus, sec, tert) else: lus_allele = extract(sequence, lus) - if sec != "": + if sec != '': sec_allele = extract(sequence, sec) - if tert != "": + if tert != '': tert_allele = extract(sequence, tert) - elif locusid == "D7S820": - if str(sequence)[-1] == "T" and isinstance(str_allele, str): + elif locusid == 'D7S820': + if str(sequence)[-1] == 'T' and isinstance(str_allele, str): tert_allele = 1 else: tert_allele = 0 else: - tert_allele = "" + tert_allele = '' else: - sec_allele = "" - tert_allele = "" + sec_allele = '' + tert_allele = '' return lus_allele, sec_allele, tert_allele @@ -306,8 +306,8 @@ def D21_lus_sec(sequence, repeat, tert): ''' Function to identify the number of LUS and secondary motif alleles for the D21S11 locus. - A separate function is required because the LUS repeat motif is the last "TCTA" repeat set and - the secondary repeat motif is the first set of "TCTA" repeats in the sequence. + A separate function is required because the LUS repeat motif is the last 'TCTA' repeat set and + the secondary repeat motif is the first set of 'TCTA' repeats in the sequence. ''' remaining = list() lus_sec = list() @@ -325,7 +325,7 @@ def D21_lus_sec(sequence, repeat, tert): else: parts = element.split('[,]') for i in parts: - if i != "": + if i != '': repeats = extract(i, repeat) lus_sec.append(repeats) if lus_allele is None: @@ -344,13 +344,13 @@ def extract_D21_tert(s, single_repeat): finalcount = 0 for m in re.finditer(single_repeat, s): count = s[m.end()+1:m.end()+3] - if count == "" or count[0] == "[" or count[0] == " " or count.isalpha(): + if count == '' or count[0] == '[' or count[0] == ' ' or count.isalpha(): count = 1 try: if float(count) > float(finalcount): finalcount = count try: - if str(finalcount)[1] == " ": + if str(finalcount)[1] == ' ': finalcount = finalcount[0] except IndexError: count = count @@ -380,7 +380,7 @@ def D21_bracket(sequence, no_of_split_bases, repeats): ''' forward_strand_bracketed_form = split_string(sequence, no_of_split_bases, repeats) prev = 0 - for m in re.finditer("]", forward_strand_bracketed_form): + for m in re.finditer(']', forward_strand_bracketed_form): prev = m.end() if ( prev == (len(forward_strand_bracketed_form) - 1) or @@ -390,24 +390,24 @@ def D21_bracket(sequence, no_of_split_bases, repeats): else: first_string = forward_strand_bracketed_form[:prev+2] second_string = forward_strand_bracketed_form[prev+2:] - second_string_final = re.sub(" ", "", second_string) + second_string_final = re.sub(' ', '', second_string) if len(second_string_final) % 4 == 0: split_second_string = loci_need_split_anno(second_string_final, 4) - final_string = f"{first_string} {second_string}" + final_string = f'{first_string} {second_string}' elif len(second_string_final) == 6: third_string = second_string_final[-6:-4] fourth_string = second_string_final[-4:] - final_string = f"{first_string} {third_string} {fourth_string}" + final_string = f'{first_string} {third_string} {fourth_string}' elif len(second_string_final) % 4 == 2: third_string = second_string_final[:-6] fourth_string = second_string_final[-6:-4] last_string = second_string_final[-4:] third_string_final = loci_need_split_anno(third_string, 4) - final_string = f"{first_string} {third_string_final} {fourth_string} {last_string}" + final_string = f'{first_string} {third_string_final} {fourth_string} {last_string}' else: third_string = loci_need_split_anno(second_string_final, 4) - final_string = f"{first_string} {third_string}" - return re.sub(" ", " ", final_string) + final_string = f'{first_string} {third_string}' + return re.sub(' ', ' ', final_string) def split_sequence_into_two_strings(sequence, repeat_for_split): @@ -437,7 +437,7 @@ def D1_annotation(sequence, repeat_list, repeat_for_split): final = list() first_string, second_string = split_sequence_into_two_strings(sequence_filt, repeat_for_split) final.append(sequence[:2]) - if first_string == "": + if first_string == '': final.append(repeat_for_split) else: final.append(loci_need_split_anno(first_string, 4)) @@ -446,7 +446,7 @@ def D1_annotation(sequence, repeat_list, repeat_for_split): else: final.append(loci_need_split_anno(second_string, 4)) final_string = ' '.join(final) - return re.sub(" ", " ", final_string) + return re.sub(' ', ' ', final_string) def D19_annotation(sequence, repeat_list, repeat_for_split): @@ -483,7 +483,7 @@ def D19_annotation(sequence, repeat_list, repeat_for_split): else: final.append(loci_need_split_anno(second_string, 4)) final_string = ' '.join(final) - return re.sub(" ", " ", final_string) + return re.sub(' ', ' ', final_string) def FGA_anno(sequence, repeat_list): @@ -491,9 +491,9 @@ def FGA_anno(sequence, repeat_list): Function to create the forward strand bracketed annotation for FGA locus. A specialized function is required because which repeat unit should be identified differs - based on its location in the sequence. For example, the "GGAA" repeat should be identified at - the beginning of the sequence; the "GAAA" repeat should be identified at the end of the - sequence; and the repeat "AAAG" should be identified within the two end repeats. + based on its location in the sequence. For example, the 'GGAA' repeat should be identified at + the beginning of the sequence; the 'GAAA' repeat should be identified at the end of the + sequence; and the repeat 'AAAG' should be identified within the two end repeats. Simply identifying repeat units in a specified order does not result in the final annotation which is consistent with previously published annotation for this locus. @@ -503,7 +503,7 @@ def FGA_anno(sequence, repeat_list): if (len(sequence) % 4 == 0): final_string = loci_need_split_anno(sequence, 4) else: - for m in re.finditer("GGAA", sequence): + for m in re.finditer('GGAA', sequence): if prev == 0 or m.start() == prev: prev = m.end() else: @@ -511,10 +511,10 @@ def FGA_anno(sequence, repeat_list): first_string = sequence[:prev] second_string = sequence[prev:] prev = 0 - for m in re.finditer("AAAA", second_string): + for m in re.finditer('AAAA', second_string): prev = m.start() break - if second_string[prev:(prev+6)] == "AAAAAA": + if second_string[prev:(prev+6)] == 'AAAAAA': third_string = second_string[:prev+2] fourth_string = second_string[prev+2:] elif prev == 0: @@ -527,32 +527,32 @@ def FGA_anno(sequence, repeat_list): final.append(split_string(third_string, 4, repeat_list)) count = 0 tmp = list() - for element in re.split("GAAA", fourth_string): + for element in re.split('GAAA', fourth_string): parts = element.split(',') for i in parts: - if i == "": + if i == '': count += 1 else: if count == 1: - tmp.append("GAAA") + tmp.append('GAAA') elif count >= 2: - tmp.append("[GAAA]" + str(count)) + tmp.append('[GAAA]' + str(count)) count = 1 - if i == "AAAAAA": - tmp.append("AA AAAA") + if i == 'AAAAAA': + tmp.append('AA AAAA') elif len(i) > 4: for x in split_by_n(i, 4): tmp.append(x) else: tmp.append(i) - if parts[-1] == "" and count > 2: - tmp.append("[GAAA]" + str(count-1)) - elif parts[-1] == "" and count <= 2: - tmp.append("GAAA") + if parts[-1] == '' and count > 2: + tmp.append('[GAAA]' + str(count-1)) + elif parts[-1] == '' and count <= 2: + tmp.append('GAAA') last_string_final = ' '.join(tmp) final.append(last_string_final) final_string = ' '.join(final) - return re.sub(" ", " ", final_string) + return re.sub(' ', ' ', final_string) def TH01_annotation(sequence, repeat_list): @@ -560,12 +560,12 @@ def TH01_annotation(sequence, repeat_list): Function to create bracketed annotation for the TH01 locus. A separate function is required for the microvariants of the TH01 locus because of the - insertion of the "ATG" between the repeat units "AATG". + insertion of the 'ATG' between the repeat units 'AATG'. ''' strings = get_annotation(sequence, repeat_list) final_string = list() for unit in strings.split(' '): - if "[" not in unit and len(unit) > 3 and (len(unit) % 4 != 0) and unit[:3] == "ATG": + if '[' not in unit and len(unit) > 3 and (len(unit) % 4 != 0) and unit[:3] == 'ATG': group1 = unit[:3] final_string.append(group1) for x in split_by_n(unit[3:], n=4): @@ -592,8 +592,8 @@ def PentaD_annotation(sequence, no_of_repeat_bases, repeat_list): first_string = sequence[:5] second_string = sequence[5:] second_string_anno = split_string(second_string, no_of_repeat_bases, repeat_list) - final_string = f"{first_string} {second_string_anno}" - return re.sub(" ", " ", final_string) + final_string = f'{first_string} {second_string_anno}' + return re.sub(' ', ' ', final_string) def D7_anno(sequence, allele, n, repeat_list): @@ -608,24 +608,24 @@ def D7_anno(sequence, allele, n, repeat_list): forward_strand_bracketed_form = split_string(sequence, n, repeat_list) else: if re.search(r'\d{1,2}.1', allele): - if sequence[-1] == "T": + if sequence[-1] == 'T': forward_strand_bracketed_form = split_string(sequence, n, repeat_list) else: forward_strand_bracketed_form = ( - f"{sequence[0]} " - f"{split_string(sequence[1:], n, repeat_list)}" + f'{sequence[0]} ' + f'{split_string(sequence[1:], n, repeat_list)}' ) elif re.search(r'\d{1,2}.2', allele): new_repeat_list = [ - "TATC", - "TGTC", - "AATC" + 'TATC', + 'TGTC', + 'AATC' ] forward_strand_bracketed_form = split_string(sequence, n, new_repeat_list) else: forward_strand_bracketed_form = ( - f"{sequence[:3]} " - f"{split_string(sequence[3:], n, repeat_list)}" + f'{sequence[:3]} ' + f'{split_string(sequence[3:], n, repeat_list)}' ) return forward_strand_bracketed_form @@ -634,11 +634,11 @@ def D13_anno(sequence, repeats): if len(sequence) < 110: bracketed_form = loci_need_split_anno(sequence, 4) else: - for m in re.finditer("GGGC", sequence): + for m in re.finditer('GGGC', sequence): break_point = m.end() bracketed_form = ( - f"{loci_need_split_anno(sequence[:break_point], 4)} " - f"{split_string(sequence[break_point:], 4, repeats)}" + f'{loci_need_split_anno(sequence[:break_point], 4)} ' + f'{split_string(sequence[break_point:], 4, repeats)}' ) return bracketed_form @@ -654,7 +654,7 @@ def resolve_uas_sequence(sequence, str_data, kit, locus, n): trim5 = str_data['Power_5'] trim3 = str_data['Power_3'] - if str_data['ReverseCompNeeded'] == "No": + if str_data['ReverseCompNeeded'] == 'No': uas_sequence = full_seq_to_uas(sequence, trim5, trim3) else: uas_from_full = full_seq_to_uas(sequence, trim5, trim3) @@ -685,40 +685,40 @@ def flank_5(full_seq, front, locus, n): 'D19S433', 'FGA', 'TPOX', 'CSF1PO', 'D3S1358', 'D6S1043', 'TH01', 'D9S1122' ] flank_seq = full_seq[:front] - if locus == "D8S1179": - flank = "" - elif locus == "D13S317": + if locus == 'D8S1179': + flank = '' + elif locus == 'D13S317': flank = ( - f"{flank_seq[:2]} {loci_need_split_anno(flank_seq[2:14], 4)} {flank_seq[14]} " - f"{flank_seq[15]} {flank_seq[16:19]} {loci_need_split_anno(flank_seq[19:], 4)}" + f'{flank_seq[:2]} {loci_need_split_anno(flank_seq[2:14], 4)} {flank_seq[14]} ' + f'{flank_seq[15]} {flank_seq[16:19]} {loci_need_split_anno(flank_seq[19:], 4)}' ) - elif locus == "D20S482": + elif locus == 'D20S482': flank = ( - f"{flank_seq[:2]} {flank_seq[2:6]} {flank_seq[6]} {flank_seq[7:10]} " - f"{flank_seq[10:]} {flank_seq[14:18]}" + f'{flank_seq[:2]} {flank_seq[2:6]} {flank_seq[6]} {flank_seq[7:10]} ' + f'{flank_seq[10:]} {flank_seq[14:18]}' ) - elif locus == "D2S441": - flank = f"{flank_seq[:4]} {flank_seq[4]} {loci_need_split_anno(flank_seq[5:], 4)}" - elif locus == "D7S820": - flank = f"{flank_seq[0]} {loci_need_split_anno(flank_seq[1:13], 4)} {flank_seq[13:]}" - elif locus == "D16S539": + elif locus == 'D2S441': + flank = f'{flank_seq[:4]} {flank_seq[4]} {loci_need_split_anno(flank_seq[5:], 4)}' + elif locus == 'D7S820': + flank = f'{flank_seq[0]} {loci_need_split_anno(flank_seq[1:13], 4)} {flank_seq[13:]}' + elif locus == 'D16S539': flank = ( - f"{flank_seq[:2]} {flank_seq[2:6]} {flank_seq[6]} " - f"{loci_need_split_anno(flank_seq[7:], 4)}" + f'{flank_seq[:2]} {flank_seq[2:6]} {flank_seq[6]} ' + f'{loci_need_split_anno(flank_seq[7:], 4)}' ) - elif locus == "D1S1656": - flank = f"{flank_seq[:3]} {loci_need_split_anno(flank_seq[3:], 4)}" - elif locus == "PentaD": + elif locus == 'D1S1656': + flank = f'{flank_seq[:3]} {loci_need_split_anno(flank_seq[3:], 4)}' + elif locus == 'PentaD': flank = ( - f"{loci_need_split_anno(flank_seq[:20], 5)} {flank_seq[20]} {flank_seq[21:25]} " - f"{loci_need_split_anno(flank_seq[25:], 5)}" + f'{loci_need_split_anno(flank_seq[:20], 5)} {flank_seq[20]} {flank_seq[21:25]} ' + f'{loci_need_split_anno(flank_seq[25:], 5)}' ) - elif locus == "vWA": - flank = f"{flank_seq[:3]} {loci_need_split_anno(flank_seq[3:], 4)}" - elif locus == "D10S1248": - flank = f"{flank_seq[:2]} {loci_need_split_anno(flank_seq[2:], 4)}" - elif locus == "D22S1045": - flank = f"{flank_seq[0]} {loci_need_split_anno(flank_seq[1:], 3)}" + elif locus == 'vWA': + flank = f'{flank_seq[:3]} {loci_need_split_anno(flank_seq[3:], 4)}' + elif locus == 'D10S1248': + flank = f'{flank_seq[:2]} {loci_need_split_anno(flank_seq[2:], 4)}' + elif locus == 'D22S1045': + flank = f'{flank_seq[0]} {loci_need_split_anno(flank_seq[1:], 3)}' elif locus in invariant_loci: flank_rev = loci_need_split_anno(flank_seq[::-1], n) flank = flank_rev[::-1] @@ -732,28 +732,28 @@ def flank_3(full_seq, back, locus, n): 'vWA', 'D19S433', 'D6S1043', 'PentaE', 'TH01', 'PentaD' ] flank_seq = full_seq[-back:] - if locus == "D1S1656" or locus == "FGA": - flank = "" - elif locus == "CSF1PO": - flank = f"{flank_seq[0]} {loci_need_split_anno(flank_seq[1:-1], 4)} {flank_seq[-1]}" - elif locus == "D18S51": + if locus == 'D1S1656' or locus == 'FGA': + flank = '' + elif locus == 'CSF1PO': + flank = f'{flank_seq[0]} {loci_need_split_anno(flank_seq[1:-1], 4)} {flank_seq[-1]}' + elif locus == 'D18S51': flank = ( - f"{flank_seq[:2]} {loci_need_split_anno(flank_seq[2:30], 4)} {flank_seq[30:33]} " - f"{flank_seq[33]} {loci_need_split_anno(flank_seq[34:42], 4)} {flank_seq[42:44]} " - f"{flank_seq[44:]}" + f'{flank_seq[:2]} {loci_need_split_anno(flank_seq[2:30], 4)} {flank_seq[30:33]} ' + f'{flank_seq[33]} {loci_need_split_anno(flank_seq[34:42], 4)} {flank_seq[42:44]} ' + f'{flank_seq[44:]}' ) - elif locus == "D16S539": + elif locus == 'D16S539': flank = ( - f"{loci_need_split_anno(flank_seq[:12], 4)} {flank_seq[12:15]} {flank_seq[15]} " - f"{loci_need_split_anno(flank_seq[16:28], 4)} {flank_seq[28:31]} " - f"{flank_seq[31:33]} {flank_seq[33]} {flank_seq[-2:]}" + f'{loci_need_split_anno(flank_seq[:12], 4)} {flank_seq[12:15]} {flank_seq[15]} ' + f'{loci_need_split_anno(flank_seq[16:28], 4)} {flank_seq[28:31]} ' + f'{flank_seq[31:33]} {flank_seq[33]} {flank_seq[-2:]}' ) - elif locus == "D7S820": + elif locus == 'D7S820': flank = loci_need_split_anno(flank_seq, 4) - elif locus == "D21S11": + elif locus == 'D21S11': flank = ( - f"{flank_seq[:2]} {flank_seq[2]} {loci_need_split_anno(flank_seq[3:11], 4)} " - f"{flank_seq[-1]}" + f'{flank_seq[:2]} {flank_seq[2]} {loci_need_split_anno(flank_seq[3:11], 4)} ' + f'{flank_seq[-1]}' ) elif locus in invariant_loci: flank = loci_need_split_anno(flank_seq, n) @@ -762,10 +762,10 @@ def flank_3(full_seq, back, locus, n): def main(args): cannot_split = [ - "D19S433", "D6S1043", "TH01", "D21S11", "D1S1656", "D7S820", "D5S818", "D12S391", - "D9S1122", "PentaE" + 'D19S433', 'D6S1043', 'TH01', 'D21S11', 'D1S1656', 'D7S820', 'D5S818', 'D12S391', + 'D9S1122', 'PentaE' ] - must_split = ["D13S317", "D18S51"] + must_split = ['D13S317', 'D18S51'] data = pd.read_csv(args.input) list_of_lists = [] @@ -779,8 +779,8 @@ def main(args): project = data.iloc[i, 4] analysis = data.iloc[i, 5] except IndexError: - project = "NA" - analysis = "NA" + project = 'NA' + analysis = 'NA' repeats = str_dict[locus]['Repeats'] no_of_repeat_bases = len(str_dict[locus]['LUS']) no_of_sub_bases = str_dict[locus]['BasesToSubtract'] @@ -800,7 +800,7 @@ def main(args): havetosplit = locus in must_split split_incompatible = len(uas_sequence) % no_of_repeat_bases != 0 and not havetosplit if cantsplit or split_incompatible: - if str_dict[locus]['ReverseCompNeeded'] == "Yes": + if str_dict[locus]['ReverseCompNeeded'] == 'Yes': reverse_comp_sequence = rev_complement_anno(uas_sequence) forward_strand_bracketed_form = rev_comp_forward_strand_bracket( reverse_comp_sequence, no_of_repeat_bases, repeats, locus, cannot_split, @@ -809,13 +809,13 @@ def main(args): reverse_strand_bracketed_form = rev_comp_uas_output_bracket( forward_strand_bracketed_form, no_of_repeat_bases ) - elif locus == "D21S11": + elif locus == 'D21S11': forward_strand_bracketed_form = D21_bracket( uas_sequence, no_of_split_bases, repeats ) - elif locus == "TH01" and (len(uas_sequence) % no_of_repeat_bases != 0): + elif locus == 'TH01' and (len(uas_sequence) % no_of_repeat_bases != 0): forward_strand_bracketed_form = TH01_annotation(uas_sequence, repeats) - elif locus == "PentaD": + elif locus == 'PentaD': forward_strand_bracketed_form = PentaD_annotation( uas_sequence, no_of_repeat_bases, repeats ) @@ -827,7 +827,7 @@ def main(args): forward_strand_bracketed_form, lus, sec, tert, locus, str_allele ) else: - if locus == "D18S51": + if locus == 'D18S51': if type(str_allele) == str: forward_strand_bracketed_form = split_string( uas_sequence, no_of_repeat_bases, repeats @@ -836,9 +836,9 @@ def main(args): forward_strand_bracketed_form = loci_need_split_anno( uas_sequence, no_of_repeat_bases ) - elif locus == "D13S317": + elif locus == 'D13S317': forward_strand_bracketed_form = D13_anno(uas_sequence, repeats) - elif str_dict[locus]['ReverseCompNeeded'] == "Yes": + elif str_dict[locus]['ReverseCompNeeded'] == 'Yes': reverse_comp_sequence = rev_complement_anno(uas_sequence) forward_strand_bracketed_form = rev_comp_forward_strand_bracket( reverse_comp_sequence, no_of_repeat_bases, repeats, locus, cannot_split, @@ -847,7 +847,7 @@ def main(args): reverse_strand_bracketed_form = rev_comp_uas_output_bracket( forward_strand_bracketed_form, no_of_repeat_bases ) - elif locus == "PentaD": + elif locus == 'PentaD': forward_strand_bracketed_form = PentaD_annotation( uas_sequence, no_of_repeat_bases, repeats ) @@ -859,20 +859,20 @@ def main(args): forward_strand_bracketed_form, lus, sec, tert, locus, str_allele ) - if locus == "PentaD": - if str_allele == "2.2": + if locus == 'PentaD': + if str_allele == '2.2': lus_final = 5 - elif str_allele == "3.2": + elif str_allele == '3.2': lus_final = 6 - lus_final_output = f"{str_allele}_{lus_final}" - if sec_final == "": + lus_final_output = f'{str_allele}_{lus_final}' + if sec_final == '': lus_plus = lus_final_output else: - if tert_final == "": - lus_plus = f"{str_allele}_{lus_final}_{sec_final}" + if tert_final == '': + lus_plus = f'{str_allele}_{lus_final}_{sec_final}' else: - lus_plus = f"{str_allele}_{lus_final}_{sec_final}_{tert_final}" - if str_dict[locus]['ReverseCompNeeded'] == "Yes": + lus_plus = f'{str_allele}_{lus_final}_{sec_final}_{tert_final}' + if str_dict[locus]['ReverseCompNeeded'] == 'Yes': summary = [ sampleid, project, analysis, locus, uas_sequence, reverse_comp_sequence, str_allele, forward_strand_bracketed_form, reverse_strand_bracketed_form, @@ -886,14 +886,14 @@ def main(args): ] list_of_lists.append(summary) - if not args.uas and args.kit == "forenseq": - if flank_5_anno == "": - full_bracketed_anno = f"{forward_strand_bracketed_form} {flank_3_anno}" - elif flank_3_anno == "": - full_bracketed_anno = f"{flank_5_anno} {forward_strand_bracketed_form}" + if not args.uas and args.kit == 'forenseq': + if flank_5_anno == '': + full_bracketed_anno = f'{forward_strand_bracketed_form} {flank_3_anno}' + elif flank_3_anno == '': + full_bracketed_anno = f'{flank_5_anno} {forward_strand_bracketed_form}' else: full_bracketed_anno = ( - f"{flank_5_anno} {forward_strand_bracketed_form} {flank_3_anno}" + f'{flank_5_anno} {forward_strand_bracketed_form} {flank_3_anno}' ) flank_summary = [ sampleid, project, analysis, locus, reads, str_allele, sequence, flank_5_anno, @@ -915,7 +915,7 @@ def main(args): 'Full_Sequence', '5_Flank_Anno', 'UAS_Region_Anno', '3_Flank_Anno' ] final_flank_output = pd.DataFrame(flanks_list, columns=flanks_columns) - final_flank_output.to_csv(f"{name}_flanks_anno.txt", sep="\t", index=False) + final_flank_output.to_csv(f'{name}_flanks_anno.txt', sep='\t', index=False) if args.combine: final_output = final_output.groupby([ 'SampleID', 'Project', 'Analysis', 'Locus', 'UAS_Output_Sequence', @@ -923,8 +923,8 @@ def main(args): 'Forward_Strand_Bracketed_form', 'UAS_Output_Bracketed_Form', 'LUS', 'LUS_Plus' ], as_index=False)['Reads'].sum() - final_output.to_csv(args.out, sep="\t", index=False) + final_output.to_csv(args.out, sep='\t', index=False) else: - final_output.to_csv(f"{name}_no_combined_reads.txt", sep="\t", index=False) + final_output.to_csv(f'{name}_no_combined_reads.txt', sep='\t', index=False) else: - final_output.to_csv(args.out, sep="\t", index=False) + final_output.to_csv(args.out, sep='\t', index=False)