diff --git a/.circleci/config.yml b/.circleci/config.yml index 3cfe09e9..43074df9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,6 +13,8 @@ jobs: command: | apt update apt install -y wget + apt-get install -y libjpeg-dev zlib1g-dev + apt-get install -y build-essential libssl-dev libffi-dev libxml2-dev libxslt1-dev zlib1g-dev cd $HOME wget "https://repo.anaconda.com/miniconda/Miniconda3-4.7.10-Linux-x86_64.sh" -O miniconda.sh printf '%s' "8a324adcc9eaf1c09e22a992bb6234d91a94146840ee6b11c114ecadafc68121 miniconda.sh" | sha256sum -c diff --git a/cap2/pipeline/databases/__init__.py b/cap2/pipeline/databases/__init__.py index 5db82f4c..3ed2ad77 100644 --- a/cap2/pipeline/databases/__init__.py +++ b/cap2/pipeline/databases/__init__.py @@ -4,7 +4,7 @@ from .hmp_db import HmpDB from .uniref import Uniref90 from .kraken2_db import Kraken2DB, BrakenKraken2DB - +from .fast_kraken2_db import FastKraken2DB MODULES = [ HumanRemovalDB, @@ -12,4 +12,5 @@ Uniref90, Kraken2DB, BrakenKraken2DB, + FastKraken2DB, ] diff --git a/cap2/pipeline/databases/mouse_removal_db.py b/cap2/pipeline/databases/mouse_removal_db.py index 1effa25b..2e5f137b 100644 --- a/cap2/pipeline/databases/mouse_removal_db.py +++ b/cap2/pipeline/databases/mouse_removal_db.py @@ -1,6 +1,6 @@ import luigi -from os.path import join, dirname +from os.path import join, dirname, isfile from glob import glob import subprocess from os import makedirs @@ -36,13 +36,14 @@ def __init__(self, *args, **kwargs): def download_mouse_genome(self): local_dir = join(self.config.db_dir, 'GRCm39') makedirs(local_dir, exist_ok=True) - cmd = ( - 'wget ' - f'--directory-prefix={local_dir} ' - f'{MOUSE_GENOME_URL} ' - ) - self.run_cmd(cmd) local_path = join(local_dir, 'GCA_000001635.9_GRCm39_genomic.fna.gz') + if not isfile(local_path): + cmd = ( + 'wget ' + f'--directory-prefix={local_dir} ' + f'{MOUSE_GENOME_URL} ' + ) + self.run_cmd(cmd) return local_path @property diff --git a/cap2/pipeline/preprocessing/error_correct_reads.py b/cap2/pipeline/preprocessing/error_correct_reads.py index 71b20189..889d17af 100644 --- a/cap2/pipeline/preprocessing/error_correct_reads.py +++ b/cap2/pipeline/preprocessing/error_correct_reads.py @@ -2,7 +2,7 @@ import luigi import subprocess from os.path import join, dirname, basename -from yaml import load +from yaml import safe_load from shutil import rmtree from ..config import PipelineConfig @@ -93,7 +93,7 @@ def _run_paired(self): cmd += f' -t {self.cores} -o {outdir}' self.run_cmd(cmd) # runs error correction but leaves output in a dir config_path = f'{self.sample_name}.error_correction_out/corrected/corrected.yaml' - spades_out = load(open(config_path).read()) + spades_out = safe_load(open(config_path).read()) ec_r1 = spades_out[0]['left reads'] assert len(ec_r1) == 1 ec_r2 = spades_out[0]['right reads']