Skip to content

Commit

Permalink
added back number_of_contigs
Browse files Browse the repository at this point in the history
  • Loading branch information
farchaab committed Oct 11, 2024
1 parent e9ab869 commit 72dfab8
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions mess/workflow/scripts/calculate_cov.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def get_even_dist(table):
df["tax_abundance"] = df["proportion"] / df["count"]
df["genome_bases"] = df["total_sequence_length"] * df["tax_abundance"]
df["sum_genome_bases"] = df.groupby("samplename")["genome_bases"].transform("sum")
df["cov_obtained"] = bases / (df["sum_genome_bases"] * p)
df["cov_obtained"] = bases / df["sum_genome_bases"]
df["cov_sim"] = df["tax_abundance"] * df["cov_obtained"]
df["sum_cov"] = df.groupby("samplename")["cov_sim"].transform("sum")
df["bases"] = df["cov_sim"] * df["total_sequence_length"]
Expand All @@ -117,7 +117,7 @@ def get_even_dist(table):
df["sum_genome_bases"] = df.groupby("samplename")["genome_bases"].transform(
"sum"
)
df["cov_obtained"] = bases / (df["sum_genome_bases"] * p)
df["cov_obtained"] = bases / df["sum_genome_bases"]
df["cov_sim"] = df["tax_abundance"] * df["cov_obtained"]
df["sum_cov"] = df.groupby("samplename")["cov_sim"].transform("sum")
df["bases"] = df["cov_sim"] * df["total_sequence_length"]
Expand All @@ -133,7 +133,7 @@ def get_even_dist(table):
df["tax_abundance"] = df["cov_sim"] / df["sum_cov"]

if "reads" in entry_df.columns:
df["bases"] = df["reads"] * (snakemake.params.read_len * p)
df["bases"] = df["reads"] * snakemake.params.read_len * p
df["sum_bases"] = df.groupby("samplename")["bases"].transform("sum")
df["seq_abundance"] = df["bases"] / df["sum_bases"]
df["cov_sim"] = df["bases"] / df["total_sequence_length"]
Expand Down Expand Up @@ -165,6 +165,7 @@ def get_even_dist(table):
"path",
"tax_id",
"total_sequence_length",
"number_of_contigs",
"reads",
"bases",
"seq_abundance",
Expand All @@ -178,5 +179,7 @@ def get_even_dist(table):
df["tax_abundance"] = df["tax_abundance"].apply(lambda x: round(x, 3))
df["seq_abundance"] = df["seq_abundance"].apply(lambda x: round(x, 3))

df = df.astype({"seed": int, "tax_id": int, "total_sequence_length": int})
df[cols].set_index(["samplename", "fasta"]).to_csv(snakemake.output[0], sep="\t") # type: ignore
df = df.astype(
{"seed": int, "tax_id": int, "total_sequence_length": int, "number_of_contigs": int}
)
df[cols].set_index(["samplename", "fasta"]).to_csv(snakemake.output[0], sep="\t") # type: ignore

0 comments on commit 72dfab8

Please sign in to comment.