Changed behavior for --ambiguity-usage ploid parameter

ablab · Feb 27, 2023 · c44fbb8 · alexeigurevich · Mar 9, 2023 · alexeigurevich
1 parent de9c373
commit c44fbb8
Showing 1 changed file with 25 additions and 10 deletions.
diff --git a/quast_libs/ca_utils/analyze_contigs.py b/quast_libs/ca_utils/analyze_contigs.py
@@ -10,6 +10,7 @@
 from quast_libs.ca_utils.analyze_misassemblies import process_misassembled_contig, IndelsInfo, find_all_sv, Misassembly
 from quast_libs.ca_utils.best_set_selection import get_best_aligns_sets, get_used_indexes, score_single_align
 from quast_libs.ca_utils.misc import ref_labels_by_chromosomes
+from quast_libs.diputils import DipQuastAnalyzer
 
 
 def add_potential_misassembly(ref, misassemblies_by_ref, refs_with_translocations):
@@ -202,17 +203,31 @@ def analyze_contigs(ca_output, contigs_fpath, unaligned_fpath, unaligned_info_fp
                         top_aligns = top_aligns[1:]
                         for align in top_aligns:
                             ca_output.stdout_f.write('\t\t\tSkipping alignment ' + str(align) + '\n')
-                    # This is a template for future "ploid" ambiguity-usage flag, need to change it later:
                     elif qconfig.ambiguity_usage == "ploid":
-                        ca_output.stdout_f.write('\t\tUsing only first of these alignment (option --ambiguity-usage is set to "one"):\n')
-                        ca_output.stdout_f.write('\t\t\tAlignment: %s\n' % str(top_aligns[0]))
-                        ca_output.icarus_out_f.write(top_aligns[0].icarus_report_str() + '\n')
-                        ref_aligns.setdefault(top_aligns[0].ref, []).append(top_aligns[0])
-                        aligned_lengths.append(top_aligns[0].len2)
-                        contigs_aligned_lengths[-1] = top_aligns[0].len2
-                        ca_output.coords_filtered_f.write(top_aligns[0].coords_str() + '\n')
-                        top_aligns = top_aligns[1:]
-                        for align in top_aligns:
+                        dip_dict_haplotypes = DipQuastAnalyzer().fill_dip_dict_by_chromosomes(qconfig.reference) # MOVE HIGHER!
+                        ploidy = len(dip_dict_haplotypes)
+                        ca_output.stdout_f.write(f'\t\tThere are {ploidy} haplotypes. Using no more than one alignment for each haplotype\n')
+                        used_haplotypes = []
+                        skipped_aligns = []
+                        while len(top_aligns):
+                            if len(used_haplotypes) == ploidy:
+                                break
+                            for key, value in dip_dict_haplotypes.items(): # Create method for this later!
+                                if top_aligns[0].ref in value:
+                                    haplotype = key
+                            if haplotype not in used_haplotypes:
+                                ca_output.stdout_f.write('\t\t\tAlignment: %s\n' % str(top_aligns[0]))
+                                ca_output.icarus_out_f.write(top_aligns[0].icarus_report_str() + '\n')
+                                ref_aligns.setdefault(top_aligns[0].ref, []).append(top_aligns[0])
+                                aligned_lengths.append(top_aligns[0].len2)
+                                contigs_aligned_lengths[-1] = top_aligns[0].len2
+                                ca_output.coords_filtered_f.write(top_aligns[0].coords_str() + '\n')
+                                used_haplotypes.append(haplotype)
+                                top_aligns = top_aligns[1:]
+                            else:
+                                skipped_aligns.append(top_aligns[0])
+                                top_aligns = top_aligns[1:]
+                        for align in skipped_aligns:
                             ca_output.stdout_f.write('\t\t\tSkipping alignment ' + str(align) + '\n')
                     elif qconfig.ambiguity_usage == "all":
                         ca_output.stdout_f.write('\t\tUsing all these alignments (option --ambiguity-usage is set to "all"):\n')