From 1085ce0e52bf9940990746a9c5d81c6ab7000d8b Mon Sep 17 00:00:00 2001 From: Katherine Eaton Date: Tue, 8 Nov 2022 11:03:04 -0600 Subject: [PATCH] script: postprocess count alleles outside regions as intermissions for #195 --- sc2rf/postprocess.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sc2rf/postprocess.py b/sc2rf/postprocess.py index 61d70ff..562cb0d 100644 --- a/sc2rf/postprocess.py +++ b/sc2rf/postprocess.py @@ -586,6 +586,24 @@ def main( alleles_by_parent[clade] = [] alleles_by_parent[clade].append(allele) + # Add in alleles that were not assigned to any region + for allele in alleles_split: + allele_nuc = allele.split("|")[2] + # Skip missing data + if allele_nuc == "N": + continue + + allele_coord = int(allele.split("|")[0]) + allele_in_region = False + for start_coord in regions_filter: + end_coord = regions_filter[start_coord]["end"] + if allele_coord >= start_coord and allele_coord <= end_coord: + allele_in_region = True + + # Alleles not assigned to any region are counted as intermissions + if not allele_in_region: + intermission_alleles.append(allele) + # Identify the "minor" parent (least number of alleles) # minor_parent = None minor_num_alleles = len(alleles_split)