-
Notifications
You must be signed in to change notification settings - Fork 0
/
not_matches.py
52 lines (42 loc) · 2.32 KB
/
not_matches.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import sys
from collections import defaultdict, Counter
def normalize_primer_name(primer_name):
return primer_name.strip().replace("Primer Names: ", "") #previous versions resulted in duplicate names for some primers
def count_primer_frequencies(input_file):
primer_frequencies = Counter()
primer_occurrences = defaultdict(set)
fasta_associations = defaultdict(set)
all_fasta_names = set()
with open(input_file, "r") as file:
lines = file.readlines()
fasta_name = None
for line in lines:
if line.startswith("Fasta Name: "):
fasta_name = line.strip().replace("Fasta Name: ", "")
all_fasta_names.add(fasta_name)
elif fasta_name and line.startswith(" Primer Names: "):
primer_names = line.strip().replace(" Primer Names: ", "").split(", ")
primer_names = [normalize_primer_name(primer) for primer in primer_names]
primer_frequencies.update(primer_names)
for primer_name in primer_names:
primer_occurrences[primer_name].add(fasta_name)
fasta_associations[primer_name].add(fasta_name)
return primer_frequencies, primer_occurrences, fasta_associations, all_fasta_names
def main():
if len(sys.argv) != 2:
print("Usage: python3 no_matches.py fasta_primer_dictionary.txt")
return
input_file = sys.argv[1] #this is the output file from primer_dictionaries.py
primer_frequencies, primer_occurrences, fasta_associations, all_fasta_names = count_primer_frequencies(input_file)
for primer_name, frequency in primer_frequencies.items():
occurrence_count = len(primer_occurrences[primer_name])
print(f"Primer Name: {primer_name}, Frequency: {frequency}, Occurrences: {occurrence_count}")
print("\nFasta names not associated with each individual primer:") #this output only accounts for fasta names or seq_ids that have amplicons generated from extract_amplicon_from_primersearch_output_UnO.py
for primer_name in primer_frequencies:
unused_fasta_files = all_fasta_names - fasta_associations[primer_name]
if unused_fasta_files:
print(f"Primer Name: {primer_name}")
for fasta in unused_fasta_files:
print(f" {fasta}")
if __name__ == "__main__":
main()