From 7e586d1d1ee0d449f54faf953eaa6e2ce0f8da13 Mon Sep 17 00:00:00 2001
From: Katherine Eaton <katherine.eaton@phac-aspc.gc.ca>
Date: Wed, 22 Feb 2023 09:41:07 -0600
Subject: [PATCH] script: add extra logic for auto-passing lineages

---
 sc2rf/postprocess.py | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/sc2rf/postprocess.py b/sc2rf/postprocess.py
index ae56a5f..5961096 100644
--- a/sc2rf/postprocess.py
+++ b/sc2rf/postprocess.py
@@ -408,7 +408,7 @@ def main(
             sc2rf_details_dict[strain] = []
 
     # ---------------------------------------------------------------------
-    # Auto-pass lineages from nextclade assignment
+    # Auto-pass lineages from nextclade assignment, that were also detected by sc2rf
 
     if nextclade and nextclade_auto_pass:
 
@@ -416,15 +416,33 @@ def main(
             "Auto-passing lineages: {}".format(",".join(nextclade_auto_pass_lineages))
         )
 
-        # If already in the df, set the status to positive, update details
-        for rec in df[df["strain"].isin(auto_pass_df.index)].iterrows():
+        for rec in auto_pass_df.iterrows():
             strain = rec[0]
-            strain_orig = rec[1]["strain"]
-            lineage = auto_pass_df.loc[strain_orig]["Nextclade_pango"]
-            details = "nextclade-auto-pass {}".format(lineage)
-            sc2rf_details_dict[strain].append(details)
-            df.at[strain, "sc2rf_status"] = "positive"
-            df.at[strain, "sc2rf_details"] = ";".join(sc2rf_details_dict[strain])
+
+            # If already in the df, set the status to positive, update details
+            if strain in list(df.index):
+
+                strain_orig = df.loc[strain]["strain"]
+                lineage = auto_pass_df.loc[strain_orig]["Nextclade_pango"]
+                details = "nextclade-auto-pass {}".format(lineage)
+                sc2rf_details_dict[strain].append(details)
+
+                df.at[strain, "sc2rf_status"] = "positive"
+                df.at[strain, "sc2rf_details"] = ";".join(sc2rf_details_dict[strain])
+            # If it's not in the dataframe add it
+            else:
+                lineage = rec[1]["Nextclade_pango"]
+                details = "nextclade-auto-pass {}".format(lineage)
+                sc2rf_details_dict[strain] = [details]
+
+                # new row
+                row_dict = {col: [NO_DATA_CHAR] for col in df.columns}
+                row_dict["strain"] = strain
+                row_dict["sc2rf_status"] = "positive"
+                row_dict["sc2rf_details"] = ";".join(sc2rf_details_dict[strain])
+
+                row_df = pd.DataFrame(row_dict).set_index("strain")
+                df = pd.concat([df, row_df], ignore_index=False)
 
     # -------------------------------------------------------------------------
     # Begin Post-Processing