Skip to content

Commit

Permalink
fix: better catching of singlem error messages
Browse files Browse the repository at this point in the history
  • Loading branch information
rhysnewell committed Mar 10, 2024
1 parent 2d94753 commit 49581ae
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 5 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ ask you to set these environment variables upon first running and if they are no
the `aviary configure` subcommand to reset the environment variables:

```commandline
aviary configure -o logs/ --eggnog-db-path /shared/db/eggnog/ --gtdb-path /shared/db/gtdb/ --checkm2-db-path /shared/db/checkm2db/ --download
aviary configure -o logs/ --eggnog-db-path /shared/db/eggnog/ --gtdb-path /shared/db/gtdb/ --checkm2-db-path /shared/db/checkm2db/ --singlem-metapackage-path /shared/db/singlem/ --download
```

This command will check if the databases exist at those given locations, if they don't then aviary will download and change
Expand Down
2 changes: 1 addition & 1 deletion aviary/modules/annotation/annotation.smk
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ rule download_gtdb:

# Uncompress and pipe output to TQDM
'echo "[INFO] - Extracting archive..."; '
'tar xvzf "$TARGET_TAR" -C "${{TARGET_DIR}}" --strip 1; '
'tar -xvzf "$TARGET_TAR" -C "${{TARGET_DIR}}" --strip 1; '

# Remove the file after successful extraction
'rm "$TARGET_TAR"; '
Expand Down
2 changes: 2 additions & 0 deletions aviary/modules/binning/binning.smk
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,8 @@ rule checkm_das_tool:
rule singlem_pipe_reads:
output:
"data/singlem_out/metagenome.combined_otu_table.csv"
params:
package_path = os.environ["SINGLEM_METAPACKAGE_PATH"]
threads: min(config["max_threads"], 48)
resources:
mem_mb = lambda wildcards, attempt: min(int(config["max_memory"])*1024, 8*1024*attempt),
Expand Down
2 changes: 2 additions & 0 deletions aviary/modules/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ def __init__(self,
self.skip_binners = ["none"]
self.skip_abundances = False
self.binning_only = False
self.skip_taxonomy = False
self.skip_singlem = False

try:
self.assembly = args.assembly
Expand Down
20 changes: 17 additions & 3 deletions aviary/scripts/singlem_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,13 +137,16 @@ def __init__(self, threads: int, output_dir: str, read_container: ReadContainer,

def run(self):
with open(self.logf, "a") as logf:
logf.write("generating SingleM commands")
logf.write("generating SingleM commands\n")
self.create_commands()
for command in self.commands:
logf.write(" ".join(command) + "\n")
logf.write("running SingleM commands\n")
self.run_commands(logf)
self.combine_otu_tables(logf)

def combine_otu_tables(self, logf):
logf.write("combining SingleM otu tables")
logf.write("combining SingleM otu tables\n")
intermidate_otu_tables = glob.glob(os.path.join(self.intermediate_dir, "*.csv"))
summarise_cmd = f"singlem summarise --input-otu-tables {' '.join(intermidate_otu_tables)} --output-otu-table {os.path.join(self.output_dir, 'metagenome.combined_otu_table.csv')}".split()
try:
Expand All @@ -160,16 +163,19 @@ def create_commands(self):
self._create_longread_commands()
self._create_shortread_commands()


def run_commands(self, logf):
process_index = 0
for command in self.commands:
f = tempfile.TemporaryFile()
p = subprocess.Popen(command, stdout=f, stderr=STDOUT)
p = subprocess.Popen(command, stdout=f, stderr=logf)
self.process_queue.append((p, f))
process_index += 1
if len(self.process_queue) >= self.threads:
self._check_processes(self.threads + 1, logf)

# write how many processes are left
logf.write(f"waiting for {len(self.process_queue)} processes to finish\n")
while len(self.process_queue) > 0:
self._check_processes(0, logf)

Expand Down Expand Up @@ -215,8 +221,16 @@ def run_singlem(
singlem_container = SingleMContainer(threads, output_dir, read_container, log)
singlem_container.run()

def valid_path(path: str) -> bool:
return os.path.exists(path)

if __name__ == '__main__':
# check if SINGLEM_METAPACKAGE_PATH environment variable is set and path is valid
# if not then, error and exit
os.environ["SINGLEM_METAPACKAGE_PATH"] = snakemake.params.package_path
if "SINGLEM_METAPACKAGE_PATH" not in os.environ or not valid_path(os.environ["SINGLEM_METAPACKAGE_PATH"]):
raise ValueError("SINGLEM_METAPACKAGE_PATH environment variable not set. Please set using 'aviary configure' or manually. Exiting.")

long_reads = snakemake.config['long_reads']
short_reads_1 = snakemake.config['short_reads_1']
short_reads_2 = snakemake.config['short_reads_2']
Expand Down

0 comments on commit 49581ae

Please sign in to comment.