diff --git a/workflow/scripts/plot-lineages-over-time.py b/workflow/scripts/plot-lineages-over-time.py index bea202b12..17fd019f3 100644 --- a/workflow/scripts/plot-lineages-over-time.py +++ b/workflow/scripts/plot-lineages-over-time.py @@ -34,10 +34,17 @@ def plot_lineages_over_time(sm_input, sm_output, dates, sm_output_table): pangolin_calls["lineage_count"] = pd.Series() # mask low occurrences - threshold = len(pangolin_calls) / 10 - pangolin_calls.loc[pangolin_calls["lineage_count"] < threshold, "lineage"] = ( - "other (<" + str(threshold) + " occ.)" - ) + print(pangolin_calls["lineage"].value_counts()) + df = pd.DataFrame(pangolin_calls["lineage"].value_counts()) + df.sort_values(by=["lineage"]) + if len(df.index) > 10: + pangolin_calls.loc[ + ~df.head(10).isin(pangolin_calls["lineage"]), "lineage" + ] = "other occ." + else: + pangolin_calls.loc[ + pangolin_calls["lineage_count"] < 0, "lineage" + ] = "other occ." pangolin_calls.rename(columns={"lineage": "Lineage", "date": "Date"}, inplace=True) diff --git a/workflow/scripts/plot-variants-over-time.py b/workflow/scripts/plot-variants-over-time.py index 4be2b776a..7520402cc 100644 --- a/workflow/scripts/plot-variants-over-time.py +++ b/workflow/scripts/plot-variants-over-time.py @@ -84,10 +84,16 @@ def plot_variants_over_time(sm_output, sm_output_table): ].transform(lambda s: s.count()) # mask low occurrences - threshold = len(calls) / 10 - calls.loc[calls["total occurrence"] < threshold, "alteration"] = ( - "other (<" + str(threshold) + " occ.)" - ) + print(calls["alteration"].value_counts()) + df = pd.DataFrame(calls["alteration"].value_counts()) + df.sort_values(by=["alteration"]) + if len(df.index) > 10: + # print(calls.loc[calls["alteration"].isin(df.head(10).index)]) + calls.loc[ + ~calls["alteration"].isin(df.head(10).index), "alteration" + ] = "other occ." + else: + calls.loc[calls["total occurrence"] < 0, "alteration"] = "other occ." calls.rename(columns={"alteration": "Alteration", "date": "Date"}, inplace=True)