Skip to content

Commit

Permalink
fix: relative aggregation sample plot (#412)
Browse files Browse the repository at this point in the history
* Changed plots occurence threshold to relative

* fmt

* Name changed: number of occurences in plot

* fmt

* Changed threshold values for variant/lineage plots

* Changed way of thresholding lineages/variants

* fmt

* Changed name for other occ. in plot

Co-authored-by: Thomas Battenfeld <[email protected]>
  • Loading branch information
AKBrueggemann and thomasbtf authored Jan 4, 2022
1 parent 0e8f993 commit 6a8155e
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 8 deletions.
15 changes: 11 additions & 4 deletions workflow/scripts/plot-lineages-over-time.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,17 @@ def plot_lineages_over_time(sm_input, sm_output, dates, sm_output_table):
pangolin_calls["lineage_count"] = pd.Series()

# mask low occurrences
threshold = len(pangolin_calls) / 10
pangolin_calls.loc[pangolin_calls["lineage_count"] < threshold, "lineage"] = (
"other (<" + str(threshold) + " occ.)"
)
print(pangolin_calls["lineage"].value_counts())
df = pd.DataFrame(pangolin_calls["lineage"].value_counts())
df.sort_values(by=["lineage"])
if len(df.index) > 10:
pangolin_calls.loc[
~df.head(10).isin(pangolin_calls["lineage"]), "lineage"
] = "other occ."
else:
pangolin_calls.loc[
pangolin_calls["lineage_count"] < 0, "lineage"
] = "other occ."

pangolin_calls.rename(columns={"lineage": "Lineage", "date": "Date"}, inplace=True)

Expand Down
14 changes: 10 additions & 4 deletions workflow/scripts/plot-variants-over-time.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,16 @@ def plot_variants_over_time(sm_output, sm_output_table):
].transform(lambda s: s.count())

# mask low occurrences
threshold = len(calls) / 10
calls.loc[calls["total occurrence"] < threshold, "alteration"] = (
"other (<" + str(threshold) + " occ.)"
)
print(calls["alteration"].value_counts())
df = pd.DataFrame(calls["alteration"].value_counts())
df.sort_values(by=["alteration"])
if len(df.index) > 10:
# print(calls.loc[calls["alteration"].isin(df.head(10).index)])
calls.loc[
~calls["alteration"].isin(df.head(10).index), "alteration"
] = "other occ."
else:
calls.loc[calls["total occurrence"] < 0, "alteration"] = "other occ."

calls.rename(columns={"alteration": "Alteration", "date": "Date"}, inplace=True)

Expand Down

0 comments on commit 6a8155e

Please sign in to comment.