script: add extra logic for auto-passing lineages

ktmeaton · Feb 22, 2023 · 7e586d1 · 7e586d1
1 parent 0ebe5e9
commit 7e586d1
Showing 1 changed file with 27 additions and 9 deletions.
diff --git a/sc2rf/postprocess.py b/sc2rf/postprocess.py
@@ -408,23 +408,41 @@ def main(
             sc2rf_details_dict[strain] = []
 
     # ---------------------------------------------------------------------
-    # Auto-pass lineages from nextclade assignment
+    # Auto-pass lineages from nextclade assignment, that were also detected by sc2rf
 
     if nextclade and nextclade_auto_pass:
 
         logger.info(
             "Auto-passing lineages: {}".format(",".join(nextclade_auto_pass_lineages))
         )
 
-        # If already in the df, set the status to positive, update details
-        for rec in df[df["strain"].isin(auto_pass_df.index)].iterrows():
+        for rec in auto_pass_df.iterrows():
             strain = rec[0]
-            strain_orig = rec[1]["strain"]
-            lineage = auto_pass_df.loc[strain_orig]["Nextclade_pango"]
-            details = "nextclade-auto-pass {}".format(lineage)
-            sc2rf_details_dict[strain].append(details)
-            df.at[strain, "sc2rf_status"] = "positive"
-            df.at[strain, "sc2rf_details"] = ";".join(sc2rf_details_dict[strain])
+
+            # If already in the df, set the status to positive, update details
+            if strain in list(df.index):
+
+                strain_orig = df.loc[strain]["strain"]
+                lineage = auto_pass_df.loc[strain_orig]["Nextclade_pango"]
+                details = "nextclade-auto-pass {}".format(lineage)
+                sc2rf_details_dict[strain].append(details)
+
+                df.at[strain, "sc2rf_status"] = "positive"
+                df.at[strain, "sc2rf_details"] = ";".join(sc2rf_details_dict[strain])
+            # If it's not in the dataframe add it
+            else:
+                lineage = rec[1]["Nextclade_pango"]
+                details = "nextclade-auto-pass {}".format(lineage)
+                sc2rf_details_dict[strain] = [details]
+
+                # new row
+                row_dict = {col: [NO_DATA_CHAR] for col in df.columns}
+                row_dict["strain"] = strain
+                row_dict["sc2rf_status"] = "positive"
+                row_dict["sc2rf_details"] = ";".join(sc2rf_details_dict[strain])
+
+                row_df = pd.DataFrame(row_dict).set_index("strain")
+                df = pd.concat([df, row_df], ignore_index=False)
 
     # -------------------------------------------------------------------------
     # Begin Post-Processing