From b5fa3a84e27ca97f5614932924ec30fcc7f0436c Mon Sep 17 00:00:00 2001 From: farchaab Date: Fri, 25 Oct 2024 12:03:42 +0200 Subject: [PATCH] added rotate column in table --- mess/workflow/scripts/calculate_cov.py | 8 +++++--- mess/workflow/scripts/split_contigs.py | 12 ++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/mess/workflow/scripts/calculate_cov.py b/mess/workflow/scripts/calculate_cov.py index edb5711..b06fc84 100644 --- a/mess/workflow/scripts/calculate_cov.py +++ b/mess/workflow/scripts/calculate_cov.py @@ -93,7 +93,7 @@ def get_even_dist(table): df["tax_abundance"] = df["proportion"] / df["count"] df["genome_bases"] = df["total_sequence_length"] * df["tax_abundance"] df["sum_genome_bases"] = df.groupby("samplename")["genome_bases"].transform("sum") - df["cov_obtained"] = bases / df["sum_genome_bases"] + df["cov_obtained"] = bases / df["sum_genome_bases"] df["cov_sim"] = df["tax_abundance"] * df["cov_obtained"] df["sum_cov"] = df.groupby("samplename")["cov_sim"].transform("sum") df["bases"] = df["cov_sim"] * df["total_sequence_length"] @@ -117,7 +117,7 @@ def get_even_dist(table): df["sum_genome_bases"] = df.groupby("samplename")["genome_bases"].transform( "sum" ) - df["cov_obtained"] = bases / df["sum_genome_bases"] + df["cov_obtained"] = bases / df["sum_genome_bases"] df["cov_sim"] = df["tax_abundance"] * df["cov_obtained"] df["sum_cov"] = df.groupby("samplename")["cov_sim"].transform("sum") df["bases"] = df["cov_sim"] * df["total_sequence_length"] @@ -173,6 +173,8 @@ def get_even_dist(table): "tax_abundance", "seed", ] +if "rotate" in entry_df.columns: + cols.append("rotate") df["reads"] = df["reads"].apply(lambda x: int(round(x))) df["bases"] = df["bases"].apply(lambda x: int(round(x))) @@ -182,4 +184,4 @@ def get_even_dist(table): df = df.astype( {"seed": int, "tax_id": int, "total_sequence_length": int, "number_of_contigs": int} ) -df[cols].set_index(["samplename", "fasta"]).to_csv(snakemake.output[0], sep="\t") # type: ignore \ No newline at end of file +df[cols].to_csv(snakemake.output[0], sep="\t", index=False) # type: ignore diff --git a/mess/workflow/scripts/split_contigs.py b/mess/workflow/scripts/split_contigs.py index 6750dc4..5a519b9 100644 --- a/mess/workflow/scripts/split_contigs.py +++ b/mess/workflow/scripts/split_contigs.py @@ -35,17 +35,17 @@ def split_fasta(fa, outdir): cols = ["samplename", "fasta", "contig", "contig_length", "tax_id", "seed", "cov_sim"] -if snakemake.params.rotate > 1: - cols += ["n", "random_start"] +if snakemake.params.circular: + cols += ["n", "random_start", "rotate"] + if "rotate" not in df.columns: + df.loc[:, "rotate"] = [snakemake.params.rotate] * len(df) df["random_start"] = df.apply( - lambda row: get_random_start( - row["seed"], row["contig_length"], snakemake.params.rotate - ), + lambda row: get_random_start(row["seed"], row["contig_length"], row["rotate"]), axis=1, ) df_expanded = df.explode("random_start").reset_index(drop=True) df_expanded["n"] = df_expanded.groupby(["samplename", "contig"]).cumcount() + 1 df = df_expanded - df["cov_sim"] = df["cov_sim"] / snakemake.params.rotate + df["cov_sim"] = df["cov_sim"] / df["rotate"] df[cols].to_csv(snakemake.output.tsv, sep="\t", index=False)