Skip to content

Commit

Permalink
fixed even dist and tax_abundance
Browse files Browse the repository at this point in the history
  • Loading branch information
farchaab committed May 6, 2024
1 parent 46597c5 commit 19e0bad
Showing 1 changed file with 10 additions and 10 deletions.
20 changes: 10 additions & 10 deletions mess/workflow/scripts/calculate_cov.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,18 @@ def get_lognormal_dist(df, mu, sigma):
return df


def get_even_dist(df, cols):
def get_even_dist(table, cols):
"""
function that calculates even abundances across taxonomy
"""
abundances = (
df[cols]
table[cols]
.value_counts(normalize=True)
.reset_index()
.rename(columns={"seq_abundance": "tax_abundance"}, inplace=True)
.rename(columns={"proportion": "tax_abundance"})
)
return df.merge(abundances)

return table.merge(abundances)


"""
Expand Down Expand Up @@ -99,15 +100,14 @@ def get_even_dist(df, cols):
df["seq_abundance"] = df["bases"] / df["sum_bases"]
else:
if "tax_abundance" in entry_df.columns:
df["sum_seq_length"] = df.groupby("samplename")[
"total_sequence_length"
].transform("sum")
df["sum_cov"] = bases / df["sum_seq_length"]
df["cov_sim"] = df["sum_cov"] * df["tax_abundance"]
df["cov_obtained"] = bases / df["total_sequence_length"]
df["sum_cov_obtained"] = df.groupby("samplename")["cov_obtained"].transform("sum")
df["cov_sim"] = df["tax_abundance"] * df["sum_cov_obtained"]
df["sum_cov"] = df.groupby("samplename")["cov_sim"].transform("sum")
df["bases"] = df["cov_sim"] * df["total_sequence_length"]
df["sum_bases"] = df.groupby("samplename")["bases"].transform("sum")
df["seq_abundance"] = df["bases"] / df["sum_bases"]
df["reads"] = df["bases"] / (snakemake.params.read_len * p)
df["seq_abundance"] = df["bases"] / df["sum_bases"]

if "seq_abundance" in entry_df.columns:
df["bases"] = df["seq_abundance"] * bases
Expand Down

0 comments on commit 19e0bad

Please sign in to comment.