diff --git a/mess/workflow/scripts/calculate_cov.py b/mess/workflow/scripts/calculate_cov.py index 4168a9c..edb5711 100644 --- a/mess/workflow/scripts/calculate_cov.py +++ b/mess/workflow/scripts/calculate_cov.py @@ -93,7 +93,7 @@ def get_even_dist(table): df["tax_abundance"] = df["proportion"] / df["count"] df["genome_bases"] = df["total_sequence_length"] * df["tax_abundance"] df["sum_genome_bases"] = df.groupby("samplename")["genome_bases"].transform("sum") - df["cov_obtained"] = bases / (df["sum_genome_bases"] * p) + df["cov_obtained"] = bases / df["sum_genome_bases"] df["cov_sim"] = df["tax_abundance"] * df["cov_obtained"] df["sum_cov"] = df.groupby("samplename")["cov_sim"].transform("sum") df["bases"] = df["cov_sim"] * df["total_sequence_length"] @@ -117,7 +117,7 @@ def get_even_dist(table): df["sum_genome_bases"] = df.groupby("samplename")["genome_bases"].transform( "sum" ) - df["cov_obtained"] = bases / (df["sum_genome_bases"] * p) + df["cov_obtained"] = bases / df["sum_genome_bases"] df["cov_sim"] = df["tax_abundance"] * df["cov_obtained"] df["sum_cov"] = df.groupby("samplename")["cov_sim"].transform("sum") df["bases"] = df["cov_sim"] * df["total_sequence_length"] @@ -133,7 +133,7 @@ def get_even_dist(table): df["tax_abundance"] = df["cov_sim"] / df["sum_cov"] if "reads" in entry_df.columns: - df["bases"] = df["reads"] * (snakemake.params.read_len * p) + df["bases"] = df["reads"] * snakemake.params.read_len * p df["sum_bases"] = df.groupby("samplename")["bases"].transform("sum") df["seq_abundance"] = df["bases"] / df["sum_bases"] df["cov_sim"] = df["bases"] / df["total_sequence_length"] @@ -165,6 +165,7 @@ def get_even_dist(table): "path", "tax_id", "total_sequence_length", + "number_of_contigs", "reads", "bases", "seq_abundance", @@ -178,5 +179,7 @@ def get_even_dist(table): df["tax_abundance"] = df["tax_abundance"].apply(lambda x: round(x, 3)) df["seq_abundance"] = df["seq_abundance"].apply(lambda x: round(x, 3)) -df = df.astype({"seed": int, "tax_id": int, "total_sequence_length": int}) -df[cols].set_index(["samplename", "fasta"]).to_csv(snakemake.output[0], sep="\t") # type: ignore +df = df.astype( + {"seed": int, "tax_id": int, "total_sequence_length": int, "number_of_contigs": int} +) +df[cols].set_index(["samplename", "fasta"]).to_csv(snakemake.output[0], sep="\t") # type: ignore \ No newline at end of file