From fb4d4e5fd9ebc79c43b6bbc954a43ba80056fcfa Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Wed, 23 Feb 2022 11:37:24 +0000 Subject: [PATCH 1/9] refactor voc table generation --- .../scripts/generate-lineage-variant-table.py | 132 ++++++++---------- 1 file changed, 56 insertions(+), 76 deletions(-) diff --git a/workflow/scripts/generate-lineage-variant-table.py b/workflow/scripts/generate-lineage-variant-table.py index b8e8876e8..ae13a6f7a 100644 --- a/workflow/scripts/generate-lineage-variant-table.py +++ b/workflow/scripts/generate-lineage-variant-table.py @@ -24,6 +24,22 @@ def has_numbers(inputString): return any(char.isdigit() for char in inputString) +def rename_enumeration(list_length): + append_dict = { + 1: "st", 2: "nd", 3: "rd", + 21: "st", 22: "nd", 23: "rd", + 31: "st", 32: "nd", 33: "rd", + 41: "st", 42: "nd", 43: "rd", + 51: "st", 52: "nd", 53: "rd", + } + range_list = list(range(1, list_length + 1)) + for i in range(len(range_list)): + if range_list[i] in append_dict: + range_list[i] = str(range_list[i]) + append_dict[range_list[i]] + else: + range_list[i] = str(range_list[i]) + "th" + return range_list + variants_df = pd.DataFrame() lineage_df = pd.DataFrame() @@ -56,17 +72,6 @@ def has_numbers(inputString): ignore_index=True, ) -# count occurences of signatures (x) in lineage columns and get sorted list -lineage_dict = dict(lineage_df.count()) -lineage_dict = dict( - sorted(lineage_dict.items(), key=lambda item: item[1], reverse=True) -) -top5_lineages = list(lineage_dict.keys()) - -# only include variant names (index=0) + top 5 variants (index=1-6) and reorder -lineage_df.drop(labels=top5_lineages[7:], axis=1, inplace=True) -lineage_df = lineage_df[top5_lineages[:7]] - # aggregate both dataframes by summing up repeating rows for VAR (maximum=1) and multiply Prob_not_present variants_df = ( variants_df.groupby(["Mutations"]) @@ -74,9 +79,7 @@ def has_numbers(inputString): func={"Frequency": lambda x: min(sum(x), 1.0), "Prob_not_present": np.prod}, axis=1, ) - .reset_index() ) -pd.set_option("display.max_rows", None) # new column for 1-prob_not_present = prob_present variants_df["Probability"] = 1.0 - variants_df["Prob_not_present"] @@ -88,35 +91,32 @@ def has_numbers(inputString): lineage_df = lineage_df.replace({"x": 1}) lineage_df = ( lineage_df.groupby(["Mutations"]) - .agg(func={column: np.max for column in top5_lineages[:7]}) + .agg(func={column: np.max for column in lineage_df.columns}) .reset_index(drop=True) ) lineage_df = lineage_df.replace({1: "x", 0: ""}) -# calculate Jaccard coefficient for top 5 lineages and save row as df to append after sorting +# calculate Jaccard coefficient for each lineage +# iterate over lineages in columns (mutations as index) +lineage_df.set_index("Mutations", inplace=True) jaccard_coefficient = {} -for lineage in range(1, len(top5_lineages[:6])): - jaccard_coefficient[top5_lineages[lineage]] = round( - variants_df[ - variants_df["Mutations"].isin( - lineage_df[lineage_df[top5_lineages[lineage]] == "x"]["Mutations"] - ) - ]["Prob X VAF"].sum() - / variants_df["Prob X VAF"].sum(), - 3, - ) +for lineage in lineage_df.columns: + lineage_defining_variants = variants_df.index.isin(lineage_df.index[lineage_df[lineage] == "x"]) + lineage_defining_non_variants = ~variants_df.index.isin(lineage_df.index[lineage_df[lineage] == "x"]) + print(lineage_defining_variants) + jaccard_coefficient[lineage] = round((variants_df[lineage_defining_variants]["Prob X VAF"].sum() + variants_df[lineage_defining_non_variants]["Prob_not_present"].sum()) / len(variants_df),3,) + jaccard_row = pd.DataFrame( {"Mutations": "Similarity", **jaccard_coefficient}, index=[0] ) - # merge variants dataframe and lineage dataframe -variants_df = variants_df.merge(lineage_df, left_on="Mutations", right_on="Mutations") +variants_df = variants_df.merge(lineage_df, left_index=True, right_index=True) # add feature column for sorting -variants_df["Features"] = variants_df["Mutations"].str.extract(r"(.+)[:].+|\*") +variants_df["Features"] = variants_df.index.to_series().str.extract(r"(.+)[:].+|\*") # position of variant for sorting and change type -variants_df["Position"] = variants_df["Mutations"].str.extract( +variants_df["Position"] = variants_df.index.to_series().str.extract( r"(.*:?[A-Z]+|\*$|-)([0-9]+)([A-Z]+$|\*$|-)$" )[1] variants_df = variants_df.astype({"Position": "int64"}) @@ -128,70 +128,50 @@ def has_numbers(inputString): sorterIndex = dict(zip(sorter, range(len(sorter)))) variants_df["Features_Rank"] = variants_df["Features"].map(sorterIndex) -# define categories for sorting -variants_df.loc[ - (variants_df[top5_lineages[1]] == "x") & (variants_df["Probability"] >= 0.95), - "Order", -] = 0 -variants_df.loc[ - (variants_df[top5_lineages[1]] == "x") & (variants_df["Probability"] <= 0.05), - "Order", -] = 1 -variants_df.loc[ - (variants_df[top5_lineages[1]] != "x") & (variants_df["Probability"] >= 0.95), - "Order", -] = 2 -variants_df.loc[ - (variants_df[top5_lineages[1]] == "x") - & ((variants_df["Probability"] > 0.05) & (variants_df["Probability"] < 0.95)), - "Order", -] = 3 -variants_df.loc[ - (variants_df[top5_lineages[1]] != "x") & (variants_df["Probability"] <= 0.05), - "Order", -] = 4 -variants_df.loc[ - (variants_df[top5_lineages[1]] != "x") - & ((variants_df["Probability"] > 0.05) & (variants_df["Probability"] < 0.95)), - "Order", -] = 5 - -top5_lineages_row_df = pd.DataFrame( - {"Mutations": "Lineage", **{x: x for x in top5_lineages[1:6]}}, index=[0] -) - -# sort final DF -variants_df["Prob X VAF"].replace([0, 0.0], np.NaN, inplace=True) -variants_df.sort_values( - by=["Order", "Features_Rank", "Position"], - ascending=[True, True, True], - na_position="last", - inplace=True, +# row for lineage name after renaming columns (column names can't be formatted) +lineages_row_df = pd.DataFrame( + {"Mutations": "Lineage", **{x: x for x in list(lineage_df.columns) if x != "Mutations"}}, index=[0] ) # concat row with Jaccard coefficient, drop unneccesary columns, sort with Jaccard coefficient, round -variants_df = pd.concat([jaccard_row, variants_df]).reset_index(drop=True) -variants_df = pd.concat([top5_lineages_row_df, variants_df]).reset_index(drop=True) -variants_df = variants_df[ - ["Mutations", "Probability", "Frequency", *top5_lineages[1:6]] -] +variants_df.reset_index(inplace=True) +variants_df = pd.concat([jaccard_row, variants_df]) +variants_df = pd.concat([lineages_row_df, variants_df]) +all_columns = variants_df.columns +first_columns = ["Mutations", "Probability", "Frequency"] +rest_columns = [item for item in all_columns if item not in first_columns] + variants_df = variants_df.round({"Probability": 5, "Frequency": 5}) variants_df.set_index("Mutations", inplace=True) variants_df.sort_values( by="Similarity", axis=1, na_position="first", ascending=False, inplace=True ) -# rename top 5 hits +# rename hits ascending variants_df.rename( columns={ x: y for x, y in zip( - list(variants_df.columns)[2:], - ["Highest similarity", "2nd", "3rd", "4th", "5th"], + list(variants_df.columns)[7:], + rename_enumeration(len(list(variants_df.columns)[7:])), ) }, errors="raise", inplace=True, ) +# sort final DF +variants_df.loc[variants_df["1st"] == "x", "Order",] = 1 +variants_df.loc[variants_df["1st"] != "x", "Order",] = 2 +variants_df.at["Similarity", "Order"] = 0 +variants_df.at["Lineage", "Order"] = 0 +variants_df["Prob X VAF"].replace([0, 0.0], np.NaN, inplace=True) +variants_df.sort_values( + by=["Order", "Features_Rank", "Position"], + ascending=[True, True, True], + na_position="last", + inplace=True, +) +# drop unwanted columns +variants_df.drop(columns=["Prob_not_present", "Prob X VAF", "Features", "Position", "Features_Rank", "Order"], inplace=True) # output variant_df variants_df.to_csv(snakemake.output.variant_table, index=True, sep=",") From e8f7275f3d2459e2865a5f5ed7262ae9eba5ed78 Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Wed, 23 Feb 2022 11:38:59 +0000 Subject: [PATCH 2/9] fmt --- .../scripts/generate-lineage-variant-table.py | 77 ++++++++++++++----- 1 file changed, 58 insertions(+), 19 deletions(-) diff --git a/workflow/scripts/generate-lineage-variant-table.py b/workflow/scripts/generate-lineage-variant-table.py index ae13a6f7a..d28591df6 100644 --- a/workflow/scripts/generate-lineage-variant-table.py +++ b/workflow/scripts/generate-lineage-variant-table.py @@ -26,12 +26,22 @@ def has_numbers(inputString): def rename_enumeration(list_length): append_dict = { - 1: "st", 2: "nd", 3: "rd", - 21: "st", 22: "nd", 23: "rd", - 31: "st", 32: "nd", 33: "rd", - 41: "st", 42: "nd", 43: "rd", - 51: "st", 52: "nd", 53: "rd", - } + 1: "st", + 2: "nd", + 3: "rd", + 21: "st", + 22: "nd", + 23: "rd", + 31: "st", + 32: "nd", + 33: "rd", + 41: "st", + 42: "nd", + 43: "rd", + 51: "st", + 52: "nd", + 53: "rd", + } range_list = list(range(1, list_length + 1)) for i in range(len(range_list)): if range_list[i] in append_dict: @@ -40,6 +50,7 @@ def rename_enumeration(list_length): range_list[i] = str(range_list[i]) + "th" return range_list + variants_df = pd.DataFrame() lineage_df = pd.DataFrame() @@ -73,12 +84,9 @@ def rename_enumeration(list_length): ) # aggregate both dataframes by summing up repeating rows for VAR (maximum=1) and multiply Prob_not_present -variants_df = ( - variants_df.groupby(["Mutations"]) - .agg( - func={"Frequency": lambda x: min(sum(x), 1.0), "Prob_not_present": np.prod}, - axis=1, - ) +variants_df = variants_df.groupby(["Mutations"]).agg( + func={"Frequency": lambda x: min(sum(x), 1.0), "Prob_not_present": np.prod}, + axis=1, ) # new column for 1-prob_not_present = prob_present @@ -101,10 +109,21 @@ def rename_enumeration(list_length): lineage_df.set_index("Mutations", inplace=True) jaccard_coefficient = {} for lineage in lineage_df.columns: - lineage_defining_variants = variants_df.index.isin(lineage_df.index[lineage_df[lineage] == "x"]) - lineage_defining_non_variants = ~variants_df.index.isin(lineage_df.index[lineage_df[lineage] == "x"]) + lineage_defining_variants = variants_df.index.isin( + lineage_df.index[lineage_df[lineage] == "x"] + ) + lineage_defining_non_variants = ~variants_df.index.isin( + lineage_df.index[lineage_df[lineage] == "x"] + ) print(lineage_defining_variants) - jaccard_coefficient[lineage] = round((variants_df[lineage_defining_variants]["Prob X VAF"].sum() + variants_df[lineage_defining_non_variants]["Prob_not_present"].sum()) / len(variants_df),3,) + jaccard_coefficient[lineage] = round( + ( + variants_df[lineage_defining_variants]["Prob X VAF"].sum() + + variants_df[lineage_defining_non_variants]["Prob_not_present"].sum() + ) + / len(variants_df), + 3, + ) jaccard_row = pd.DataFrame( {"Mutations": "Similarity", **jaccard_coefficient}, index=[0] @@ -130,7 +149,11 @@ def rename_enumeration(list_length): # row for lineage name after renaming columns (column names can't be formatted) lineages_row_df = pd.DataFrame( - {"Mutations": "Lineage", **{x: x for x in list(lineage_df.columns) if x != "Mutations"}}, index=[0] + { + "Mutations": "Lineage", + **{x: x for x in list(lineage_df.columns) if x != "Mutations"}, + }, + index=[0], ) # concat row with Jaccard coefficient, drop unneccesary columns, sort with Jaccard coefficient, round @@ -159,8 +182,14 @@ def rename_enumeration(list_length): inplace=True, ) # sort final DF -variants_df.loc[variants_df["1st"] == "x", "Order",] = 1 -variants_df.loc[variants_df["1st"] != "x", "Order",] = 2 +variants_df.loc[ + variants_df["1st"] == "x", + "Order", +] = 1 +variants_df.loc[ + variants_df["1st"] != "x", + "Order", +] = 2 variants_df.at["Similarity", "Order"] = 0 variants_df.at["Lineage", "Order"] = 0 variants_df["Prob X VAF"].replace([0, 0.0], np.NaN, inplace=True) @@ -171,7 +200,17 @@ def rename_enumeration(list_length): inplace=True, ) # drop unwanted columns -variants_df.drop(columns=["Prob_not_present", "Prob X VAF", "Features", "Position", "Features_Rank", "Order"], inplace=True) +variants_df.drop( + columns=[ + "Prob_not_present", + "Prob X VAF", + "Features", + "Position", + "Features_Rank", + "Order", + ], + inplace=True, +) # output variant_df variants_df.to_csv(snakemake.output.variant_table, index=True, sep=",") From 68221f383d52477d0938c405ba0f4c2e5ded633e Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Wed, 23 Feb 2022 14:16:23 +0000 Subject: [PATCH 3/9] top 10 and minor fix --- resources/lineage-variant-table-formatter.js | 22 ++++++++++++++++++- .../scripts/generate-lineage-variant-table.py | 7 +++--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/resources/lineage-variant-table-formatter.js b/resources/lineage-variant-table-formatter.js index 706af1a6a..4aabbfcd9 100644 --- a/resources/lineage-variant-table-formatter.js +++ b/resources/lineage-variant-table-formatter.js @@ -162,7 +162,7 @@ } } }, - "Highest similarity": function (value) { + "1st": function (value) { let result = this["lineage helper"](value); return result; }, @@ -182,4 +182,24 @@ let result = this["lineage helper"](value); return result; }, + "6th": function (value) { + let result = this["lineage helper"](value); + return result; + }, + "7th": function (value) { + let result = this["lineage helper"](value); + return result; + }, + "8th": function (value) { + let result = this["lineage helper"](value); + return result; + }, + "9th": function (value) { + let result = this["lineage helper"](value); + return result; + }, + "10th": function (value) { + let result = this["lineage helper"](value); + return result; + }, }; diff --git a/workflow/scripts/generate-lineage-variant-table.py b/workflow/scripts/generate-lineage-variant-table.py index d28591df6..2ed70f79e 100644 --- a/workflow/scripts/generate-lineage-variant-table.py +++ b/workflow/scripts/generate-lineage-variant-table.py @@ -112,10 +112,7 @@ def rename_enumeration(list_length): lineage_defining_variants = variants_df.index.isin( lineage_df.index[lineage_df[lineage] == "x"] ) - lineage_defining_non_variants = ~variants_df.index.isin( - lineage_df.index[lineage_df[lineage] == "x"] - ) - print(lineage_defining_variants) + lineage_defining_non_variants = ~lineage_defining_variants jaccard_coefficient[lineage] = round( ( variants_df[lineage_defining_variants]["Prob X VAF"].sum() @@ -211,6 +208,8 @@ def rename_enumeration(list_length): ], inplace=True, ) +# drop other lineages, top 10 only +variants_df.drop(variants_df.columns[12:], axis=1, inplace=True) # output variant_df variants_df.to_csv(snakemake.output.variant_table, index=True, sep=",") From 188821983898da6749f93e1c51086107ee87ab46 Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Fri, 25 Feb 2022 15:25:23 +0000 Subject: [PATCH 4/9] add formatting for read depth column --- resources/lineage-variant-table-formatter.js | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/resources/lineage-variant-table-formatter.js b/resources/lineage-variant-table-formatter.js index 4aabbfcd9..ea3b8cd56 100644 --- a/resources/lineage-variant-table-formatter.js +++ b/resources/lineage-variant-table-formatter.js @@ -108,6 +108,16 @@ var lighting = (0.9 - parseFloat(prob) * 0.4) * 100; return `${prob}`; }, + ReadDepth: function (depth) { + depth = parseInt(depth) + if (!isNaN(depth)) { + if (depth < 10) { + return `${depth}`; + } else { + return `${depth}`; + } + } + }, "lineage helper": function (value) { if (isNaN(value)) { var variant_colors = { @@ -140,7 +150,7 @@ if (value == "x") { return `${"\u2713"}`; } else { - const match = /^(?.{3})\s(?.+)\s?.*$/i.exec(value); + const match = /^(?.{1,3})\s(?.+)\s?.*$/i.exec(value); var parent = match.groups?.parent; var version = match.groups?.version; From e7cd83327776b226e1134615a37b8852c1c20d12 Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Fri, 25 Feb 2022 15:26:17 +0000 Subject: [PATCH 5/9] add read depth, fix prob calc --- .../scripts/generate-lineage-variant-table.py | 33 ++++++++++++++----- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/workflow/scripts/generate-lineage-variant-table.py b/workflow/scripts/generate-lineage-variant-table.py index 2ed70f79e..5e82d1ff8 100644 --- a/workflow/scripts/generate-lineage-variant-table.py +++ b/workflow/scripts/generate-lineage-variant-table.py @@ -16,10 +16,18 @@ def phred_to_prob(phred): if phred is None: - return 0 + return pd.NA return 10 ** (-phred / 10) +# np.prod returns 1's as values for a pd series with NaN's. A list would return NaN's +def prod_prob_not_present(probs): + if pd.isna(probs).any(): + return pd.NA + else: + return np.prod(probs) + + def has_numbers(inputString): return any(char.isdigit() for char in inputString) @@ -60,9 +68,12 @@ def rename_enumeration(list_length): if "SIGNATURES" in record.info: signatures = record.info.get("SIGNATURES", ("#ERROR0",)) vaf = record.samples[0]["AF"][0] + dp = record.samples[0]["DP"] prob_not_present = phred_to_prob( record.info["PROB_ABSENT"][0] ) + phred_to_prob(record.info["PROB_ARTIFACT"][0]) + if pd.isna(prob_not_present): + vaf = pd.NA lineages = record.info["LINEAGES"] for signature in signatures: # generate df with all signatures + VAF and Prob_not_present from calculation @@ -70,6 +81,7 @@ def rename_enumeration(list_length): { "Mutations": signature, "Frequency": vaf, + "ReadDepth": dp, "Prob_not_present": prob_not_present, }, ignore_index=True, @@ -85,7 +97,7 @@ def rename_enumeration(list_length): # aggregate both dataframes by summing up repeating rows for VAR (maximum=1) and multiply Prob_not_present variants_df = variants_df.groupby(["Mutations"]).agg( - func={"Frequency": lambda x: min(sum(x), 1.0), "Prob_not_present": np.prod}, + func={"Frequency": lambda x: min(sum(x), 1.0), "Prob_not_present": prod_prob_not_present, "ReadDepth": np.min}, axis=1, ) @@ -157,11 +169,9 @@ def rename_enumeration(list_length): variants_df.reset_index(inplace=True) variants_df = pd.concat([jaccard_row, variants_df]) variants_df = pd.concat([lineages_row_df, variants_df]) -all_columns = variants_df.columns -first_columns = ["Mutations", "Probability", "Frequency"] -rest_columns = [item for item in all_columns if item not in first_columns] -variants_df = variants_df.round({"Probability": 5, "Frequency": 5}) + +variants_df = variants_df.round({"Probability": 2, "Frequency": 2}) variants_df.set_index("Mutations", inplace=True) variants_df.sort_values( by="Similarity", axis=1, na_position="first", ascending=False, inplace=True @@ -171,8 +181,8 @@ def rename_enumeration(list_length): columns={ x: y for x, y in zip( - list(variants_df.columns)[7:], - rename_enumeration(len(list(variants_df.columns)[7:])), + list(variants_df.columns)[8:], + rename_enumeration(len(list(variants_df.columns)[8:])), ) }, errors="raise", @@ -208,8 +218,13 @@ def rename_enumeration(list_length): ], inplace=True, ) +all_columns = variants_df.columns +first_columns = ["Probability", "Frequency", "ReadDepth"] +rest_columns = [item for item in all_columns if item not in first_columns] +variants_df = variants_df[[*first_columns, *rest_columns]] + # drop other lineages, top 10 only -variants_df.drop(variants_df.columns[12:], axis=1, inplace=True) +variants_df.drop(variants_df.columns[13:], axis=1, inplace=True) # output variant_df variants_df.to_csv(snakemake.output.variant_table, index=True, sep=",") From 00e5d2df3732747eac0d7d82d9e022700af4d21f Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Fri, 25 Feb 2022 15:26:45 +0000 Subject: [PATCH 6/9] fmt --- workflow/scripts/generate-lineage-variant-table.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/workflow/scripts/generate-lineage-variant-table.py b/workflow/scripts/generate-lineage-variant-table.py index 5e82d1ff8..f8f543d90 100644 --- a/workflow/scripts/generate-lineage-variant-table.py +++ b/workflow/scripts/generate-lineage-variant-table.py @@ -97,7 +97,11 @@ def rename_enumeration(list_length): # aggregate both dataframes by summing up repeating rows for VAR (maximum=1) and multiply Prob_not_present variants_df = variants_df.groupby(["Mutations"]).agg( - func={"Frequency": lambda x: min(sum(x), 1.0), "Prob_not_present": prod_prob_not_present, "ReadDepth": np.min}, + func={ + "Frequency": lambda x: min(sum(x), 1.0), + "Prob_not_present": prod_prob_not_present, + "ReadDepth": np.min, + }, axis=1, ) From 076c7746380262508b76b5a1aa408c8c346fc873 Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Fri, 25 Feb 2022 16:48:03 +0000 Subject: [PATCH 7/9] fix formatting, add exp notation --- resources/lineage-variant-table-formatter.js | 30 +++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/resources/lineage-variant-table-formatter.js b/resources/lineage-variant-table-formatter.js index ea3b8cd56..c980d5682 100644 --- a/resources/lineage-variant-table-formatter.js +++ b/resources/lineage-variant-table-formatter.js @@ -101,12 +101,32 @@ } }, Frequency: function (vaf) { - var lighting = (0.9 - parseFloat(vaf) * 0.4) * 100; - return `${vaf}`; + vaf = parseFloat(vaf) + if (!isNaN(vaf)) { + var lighting = (0.9 - vaf * 0.4) * 100; + if (vaf < 0.1) { + vaf = vaf.toExponential(2) + } else { + vaf = vaf.toFixed(2) + } + return `${vaf}`; + } else { + return " " + } }, Probability: function (prob) { - var lighting = (0.9 - parseFloat(prob) * 0.4) * 100; - return `${prob}`; + prob = parseFloat(prob) + if (!isNaN(prob)) { + var lighting = (0.9 - prob * 0.4) * 100; + if (prob < 0.1) { + prob = prob.toExponential(2) + } else { + prob = prob.toFixed(2) + } + return `${prob}`; + } else { + return " " + } }, ReadDepth: function (depth) { depth = parseInt(depth) @@ -116,6 +136,8 @@ } else { return `${depth}`; } + } else { + return " " } }, "lineage helper": function (value) { From e7ea7f83f85418eab15970a6c2513c2f966cc396 Mon Sep 17 00:00:00 2001 From: Alexander Thomas <77535027+alethomas@users.noreply.github.com> Date: Thu, 3 Mar 2022 14:36:31 +0100 Subject: [PATCH 8/9] Update workflow/scripts/generate-lineage-variant-table.py Co-authored-by: Thomas Battenfeld <46334240+thomasbtf@users.noreply.github.com> --- .../scripts/generate-lineage-variant-table.py | 37 +++++++------------ 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/workflow/scripts/generate-lineage-variant-table.py b/workflow/scripts/generate-lineage-variant-table.py index f8f543d90..72c2cb8d8 100644 --- a/workflow/scripts/generate-lineage-variant-table.py +++ b/workflow/scripts/generate-lineage-variant-table.py @@ -32,31 +32,20 @@ def has_numbers(inputString): return any(char.isdigit() for char in inputString) +def add_number_suffix(number): + number = str(number) + + if number.endswith("1") and number != "11": + return f"{number}st" + elif number.endswith("2") and number != "12": + return f"{number}nd" + elif number.endswith("3") and number != "13": + return f"{number}rd" + else: + return f"{number}th" + def rename_enumeration(list_length): - append_dict = { - 1: "st", - 2: "nd", - 3: "rd", - 21: "st", - 22: "nd", - 23: "rd", - 31: "st", - 32: "nd", - 33: "rd", - 41: "st", - 42: "nd", - 43: "rd", - 51: "st", - 52: "nd", - 53: "rd", - } - range_list = list(range(1, list_length + 1)) - for i in range(len(range_list)): - if range_list[i] in append_dict: - range_list[i] = str(range_list[i]) + append_dict[range_list[i]] - else: - range_list[i] = str(range_list[i]) + "th" - return range_list + return [add_number_suffix(x) for in in range(1, list_length+1)] variants_df = pd.DataFrame() From a53bdb188b9dd980969127b0389ec70c31577abc Mon Sep 17 00:00:00 2001 From: Alex Thomas Date: Thu, 3 Mar 2022 15:50:26 +0000 Subject: [PATCH 9/9] fmt --- workflow/scripts/generate-lineage-variant-table.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/workflow/scripts/generate-lineage-variant-table.py b/workflow/scripts/generate-lineage-variant-table.py index 72c2cb8d8..6784c48a0 100644 --- a/workflow/scripts/generate-lineage-variant-table.py +++ b/workflow/scripts/generate-lineage-variant-table.py @@ -39,13 +39,14 @@ def add_number_suffix(number): return f"{number}st" elif number.endswith("2") and number != "12": return f"{number}nd" - elif number.endswith("3") and number != "13": + elif number.endswith("3") and number != "13": return f"{number}rd" else: return f"{number}th" + def rename_enumeration(list_length): - return [add_number_suffix(x) for in in range(1, list_length+1)] + return [add_number_suffix(x) for x in range(1, list_length + 1)] variants_df = pd.DataFrame()