From 20c5aeac357b45138f3f40422bd27e3d6878d0da Mon Sep 17 00:00:00 2001 From: David Danko Date: Mon, 7 Feb 2022 11:46:42 -0500 Subject: [PATCH 1/5] fix: issues with mouse db and db module list --- cap2/pipeline/databases/__init__.py | 3 ++- cap2/pipeline/databases/mouse_removal_db.py | 15 ++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/cap2/pipeline/databases/__init__.py b/cap2/pipeline/databases/__init__.py index 5db82f4c..3ed2ad77 100644 --- a/cap2/pipeline/databases/__init__.py +++ b/cap2/pipeline/databases/__init__.py @@ -4,7 +4,7 @@ from .hmp_db import HmpDB from .uniref import Uniref90 from .kraken2_db import Kraken2DB, BrakenKraken2DB - +from .fast_kraken2_db import FastKraken2DB MODULES = [ HumanRemovalDB, @@ -12,4 +12,5 @@ Uniref90, Kraken2DB, BrakenKraken2DB, + FastKraken2DB, ] diff --git a/cap2/pipeline/databases/mouse_removal_db.py b/cap2/pipeline/databases/mouse_removal_db.py index 1effa25b..2e5f137b 100644 --- a/cap2/pipeline/databases/mouse_removal_db.py +++ b/cap2/pipeline/databases/mouse_removal_db.py @@ -1,6 +1,6 @@ import luigi -from os.path import join, dirname +from os.path import join, dirname, isfile from glob import glob import subprocess from os import makedirs @@ -36,13 +36,14 @@ def __init__(self, *args, **kwargs): def download_mouse_genome(self): local_dir = join(self.config.db_dir, 'GRCm39') makedirs(local_dir, exist_ok=True) - cmd = ( - 'wget ' - f'--directory-prefix={local_dir} ' - f'{MOUSE_GENOME_URL} ' - ) - self.run_cmd(cmd) local_path = join(local_dir, 'GCA_000001635.9_GRCm39_genomic.fna.gz') + if not isfile(local_path): + cmd = ( + 'wget ' + f'--directory-prefix={local_dir} ' + f'{MOUSE_GENOME_URL} ' + ) + self.run_cmd(cmd) return local_path @property From 37b7f87833d9b6decd9802e56233c08d558b9af0 Mon Sep 17 00:00:00 2001 From: David C Danko Date: Mon, 7 Feb 2022 12:04:37 -0500 Subject: [PATCH 2/5] chore: add jpeg to aptitude install --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3cfe09e9..cd6d43b9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,6 +13,7 @@ jobs: command: | apt update apt install -y wget + apt-get install -y libjpeg-dev zlib1g-dev cd $HOME wget "https://repo.anaconda.com/miniconda/Miniconda3-4.7.10-Linux-x86_64.sh" -O miniconda.sh printf '%s' "8a324adcc9eaf1c09e22a992bb6234d91a94146840ee6b11c114ecadafc68121 miniconda.sh" | sha256sum -c From ec77c050aa69a592d3dca49dbf045b960c3fb6d3 Mon Sep 17 00:00:00 2001 From: David C Danko Date: Mon, 7 Feb 2022 12:48:27 -0500 Subject: [PATCH 3/5] fix: more aptitude installs to support pillow --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index cd6d43b9..47acad93 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -14,6 +14,7 @@ jobs: apt update apt install -y wget apt-get install -y libjpeg-dev zlib1g-dev + apt-get install build-essential libssl-dev libffi-dev libxml2-dev libxslt1-dev zlib1g-dev cd $HOME wget "https://repo.anaconda.com/miniconda/Miniconda3-4.7.10-Linux-x86_64.sh" -O miniconda.sh printf '%s' "8a324adcc9eaf1c09e22a992bb6234d91a94146840ee6b11c114ecadafc68121 miniconda.sh" | sha256sum -c From 4d22f95ce4816a41916df4fa38c5a3cc0f0fdb95 Mon Sep 17 00:00:00 2001 From: David C Danko Date: Mon, 7 Feb 2022 12:55:51 -0500 Subject: [PATCH 4/5] fix: add -y --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 47acad93..43074df9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -14,7 +14,7 @@ jobs: apt update apt install -y wget apt-get install -y libjpeg-dev zlib1g-dev - apt-get install build-essential libssl-dev libffi-dev libxml2-dev libxslt1-dev zlib1g-dev + apt-get install -y build-essential libssl-dev libffi-dev libxml2-dev libxslt1-dev zlib1g-dev cd $HOME wget "https://repo.anaconda.com/miniconda/Miniconda3-4.7.10-Linux-x86_64.sh" -O miniconda.sh printf '%s' "8a324adcc9eaf1c09e22a992bb6234d91a94146840ee6b11c114ecadafc68121 miniconda.sh" | sha256sum -c From eb8a1f0322e77ed3973fa69eb4580f18a193011e Mon Sep 17 00:00:00 2001 From: David C Danko Date: Mon, 7 Feb 2022 13:54:54 -0500 Subject: [PATCH 5/5] fix: safe load yaml --- cap2/pipeline/preprocessing/error_correct_reads.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cap2/pipeline/preprocessing/error_correct_reads.py b/cap2/pipeline/preprocessing/error_correct_reads.py index 71b20189..889d17af 100644 --- a/cap2/pipeline/preprocessing/error_correct_reads.py +++ b/cap2/pipeline/preprocessing/error_correct_reads.py @@ -2,7 +2,7 @@ import luigi import subprocess from os.path import join, dirname, basename -from yaml import load +from yaml import safe_load from shutil import rmtree from ..config import PipelineConfig @@ -93,7 +93,7 @@ def _run_paired(self): cmd += f' -t {self.cores} -o {outdir}' self.run_cmd(cmd) # runs error correction but leaves output in a dir config_path = f'{self.sample_name}.error_correction_out/corrected/corrected.yaml' - spades_out = load(open(config_path).read()) + spades_out = safe_load(open(config_path).read()) ec_r1 = spades_out[0]['left reads'] assert len(ec_r1) == 1 ec_r2 = spades_out[0]['right reads']