Skip to content

Commit

Permalink
added rule for assembly reports removal
Browse files Browse the repository at this point in the history
  • Loading branch information
farchaab committed Oct 3, 2023
1 parent 979e8d5 commit 815789d
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 8 deletions.
2 changes: 2 additions & 0 deletions assembly_finder/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ rule all_download:
f"{outdir}/assemblies/sha256.txt",
f"{outdir}/assembly_summary.tsv",
f"{outdir}/sequence_summary.tsv",
f"{outdir}/taxonomy_summary.tsv",
f"{outdir}/clean.txt",
30 changes: 22 additions & 8 deletions assembly_finder/bin/find_assemblies.smk
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,10 @@ else:
# Replace empty values with default params
# Drop empty entries
# Set entry as index
entry_df = (
entry_df.replace(
empty_to_val,
)
.dropna()
.set_index("entry")
)
entry_df = entry_df.replace(
empty_to_val,
).dropna()
entry_df = entry_df.astype({"entry": str}).set_index("entry")
# Get entry list
entries = list(entry_df.index)

Expand Down Expand Up @@ -325,6 +322,23 @@ rule get_summaries:
tax_df.to_csv(output[2], sep="\t", index=None)


rule clean_reports:
input:
f"{outdir}/assembly_summary.tsv",
f"{outdir}/sequence_summary.tsv",
f"{outdir}/taxonomy_summary.tsv",
reports=lambda wildcards: downloads(wildcards, "_assembly_report.txt"),
output:
temp(f"{outdir}/clean.txt"),
params:
asmdir=f"{outdir}/assemblies",
shell:
"""
rm {input.reports}
touch {output}
"""


rule format_checksum:
input:
f"{outdir}/assemblies/checksums.txt",
Expand All @@ -351,7 +365,7 @@ rule verify_checksums:
f"{outdir}/assemblies/aspera-checks.txt",
lambda wildcards: downloads(wildcards, ".fna.gz"),
output:
f"{outdir}/assemblies/sha256.txt",
temp(f"{outdir}/assemblies/sha256.txt"),
params:
asmdir=f"{outdir}/assemblies",
shell:
Expand Down

0 comments on commit 815789d

Please sign in to comment.