Skip to content

Commit

Permalink
Merge pull request #202 from rhysnewell/fix-batch-read-type
Browse files Browse the repository at this point in the history
fix: batch checks read type correctly
  • Loading branch information
rhysnewell authored Apr 28, 2024
2 parents bf90668 + 51cfd30 commit 9d97ef3
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 43 deletions.
24 changes: 24 additions & 0 deletions aviary/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,25 @@
__version__ = "0.9.0"


# CONSTANTS
LONG_READ_TYPES = ["ont", "ont_hq", "rs", "sq", "ccs", "hifi"]
MEDAKA_MODELS = [
"r103_fast_g507", "r103_fast_snp_g507", "r103_fast_variant_g507", "r103_hac_g507", "r103_hac_snp_g507",
"r103_hac_variant_g507", "r103_min_high_g345", "r103_min_high_g360", "r103_prom_high_g360", "r103_prom_snp_g3210",
"r103_prom_variant_g3210", "r103_sup_g507", "r103_sup_snp_g507", "r103_sup_variant_g507", "r1041_e82_260bps_fast_g632",
"r1041_e82_260bps_fast_variant_g632", "r1041_e82_260bps_hac_g632", "r1041_e82_260bps_hac_variant_g632", "r1041_e82_260bps_sup_g632",
"r1041_e82_260bps_sup_variant_g632", "r1041_e82_400bps_fast_g615", "r1041_e82_400bps_fast_g632",
"r1041_e82_400bps_fast_variant_g615", "r1041_e82_400bps_fast_variant_g632", "r1041_e82_400bps_hac_g615",
"r1041_e82_400bps_hac_g632", "r1041_e82_400bps_hac_variant_g615", "r1041_e82_400bps_hac_variant_g632", "r1041_e82_400bps_sup_g615",
"r1041_e82_400bps_sup_variant_g615", "r104_e81_fast_g5015", "r104_e81_fast_variant_g5015", "r104_e81_hac_g5015",
"r104_e81_hac_variant_g5015", "r104_e81_sup_g5015", "r104_e81_sup_g610", "r104_e81_sup_variant_g610", "r10_min_high_g303",
"r10_min_high_g340", "r941_e81_fast_g514", "r941_e81_fast_variant_g514", "r941_e81_hac_g514", "r941_e81_hac_variant_g514",
"r941_e81_sup_g514", "r941_e81_sup_variant_g514", "r941_min_fast_g303", "r941_min_fast_g507", "r941_min_fast_snp_g507",
"r941_min_fast_variant_g507", "r941_min_hac_g507", "r941_min_hac_snp_g507", "r941_min_hac_variant_g507", "r941_min_high_g303",
"r941_min_high_g330", "r941_min_high_g340_rle", "r941_min_high_g344", "r941_min_high_g351", "r941_min_high_g360", "r941_min_sup_g507",
"r941_min_sup_snp_g507", "r941_min_sup_variant_g507", "r941_prom_fast_g303", "r941_prom_fast_g507", "r941_prom_fast_snp_g507",
"r941_prom_fast_variant_g507", "r941_prom_hac_g507", "r941_prom_hac_snp_g507", "r941_prom_hac_variant_g507", "r941_prom_high_g303",
"r941_prom_high_g330", "r941_prom_high_g344", "r941_prom_high_g360", "r941_prom_high_g4011", "r941_prom_snp_g303", "r941_prom_snp_g322",
"r941_prom_snp_g360", "r941_prom_sup_g507", "r941_prom_sup_snp_g507", "r941_prom_sup_variant_g507", "r941_prom_variant_g303",
"r941_prom_variant_g322", "r941_prom_variant_g360", "r941_sup_plant_g610", "r941_sup_plant_variant_g610"
]
35 changes: 12 additions & 23 deletions aviary/aviary.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
###############################################################################
import aviary.config.config as Config
from aviary.modules.processor import Processor, process_batch
from .__init__ import __version__
from .__init__ import __version__, MEDAKA_MODELS, LONG_READ_TYPES
__author__ = "Rhys Newell"
__copyright__ = "Copyright 2022"
__credits__ = ["Rhys Newell"]
Expand Down Expand Up @@ -478,34 +478,15 @@ def main():
'reads, "ont" for Oxford Nanopore and "ont_hq" for Oxford Nanopore high quality reads (Guppy5+ or Q20) \n',
dest='longread_type',
default="ont",
choices=["ont","ont_hq", "rs", "sq", "ccs", "hifi"],
choices=LONG_READ_TYPES,
)

long_read_group.add_argument(
'--medaka-model', '--medaka_model',
help='Medaka model to use for polishing long reads. \n',
dest='medaka_model',
default="r941_min_hac_g507",
choices=[
"r103_fast_g507", "r103_fast_snp_g507", "r103_fast_variant_g507", "r103_hac_g507", "r103_hac_snp_g507",
"r103_hac_variant_g507", "r103_min_high_g345", "r103_min_high_g360", "r103_prom_high_g360", "r103_prom_snp_g3210",
"r103_prom_variant_g3210", "r103_sup_g507", "r103_sup_snp_g507", "r103_sup_variant_g507", "r1041_e82_260bps_fast_g632",
"r1041_e82_260bps_fast_variant_g632", "r1041_e82_260bps_hac_g632", "r1041_e82_260bps_hac_variant_g632", "r1041_e82_260bps_sup_g632",
"r1041_e82_260bps_sup_variant_g632", "r1041_e82_400bps_fast_g615", "r1041_e82_400bps_fast_g632",
"r1041_e82_400bps_fast_variant_g615", "r1041_e82_400bps_fast_variant_g632", "r1041_e82_400bps_hac_g615",
"r1041_e82_400bps_hac_g632", "r1041_e82_400bps_hac_variant_g615", "r1041_e82_400bps_hac_variant_g632", "r1041_e82_400bps_sup_g615",
"r1041_e82_400bps_sup_variant_g615", "r104_e81_fast_g5015", "r104_e81_fast_variant_g5015", "r104_e81_hac_g5015",
"r104_e81_hac_variant_g5015", "r104_e81_sup_g5015", "r104_e81_sup_g610", "r104_e81_sup_variant_g610", "r10_min_high_g303",
"r10_min_high_g340", "r941_e81_fast_g514", "r941_e81_fast_variant_g514", "r941_e81_hac_g514", "r941_e81_hac_variant_g514",
"r941_e81_sup_g514", "r941_e81_sup_variant_g514", "r941_min_fast_g303", "r941_min_fast_g507", "r941_min_fast_snp_g507",
"r941_min_fast_variant_g507", "r941_min_hac_g507", "r941_min_hac_snp_g507", "r941_min_hac_variant_g507", "r941_min_high_g303",
"r941_min_high_g330", "r941_min_high_g340_rle", "r941_min_high_g344", "r941_min_high_g351", "r941_min_high_g360", "r941_min_sup_g507",
"r941_min_sup_snp_g507", "r941_min_sup_variant_g507", "r941_prom_fast_g303", "r941_prom_fast_g507", "r941_prom_fast_snp_g507",
"r941_prom_fast_variant_g507", "r941_prom_hac_g507", "r941_prom_hac_snp_g507", "r941_prom_hac_variant_g507", "r941_prom_high_g303",
"r941_prom_high_g330", "r941_prom_high_g344", "r941_prom_high_g360", "r941_prom_high_g4011", "r941_prom_snp_g303", "r941_prom_snp_g322",
"r941_prom_snp_g360", "r941_prom_sup_g507", "r941_prom_sup_snp_g507", "r941_prom_sup_variant_g507", "r941_prom_variant_g303",
"r941_prom_variant_g322", "r941_prom_variant_g360", "r941_sup_plant_g610", "r941_sup_plant_variant_g610"
]
choices=MEDAKA_MODELS
)

long_read_group.add_argument(
Expand Down Expand Up @@ -1109,7 +1090,7 @@ def main():
type=str2bool,
nargs='?',
const=True,
default=True
default=False
)

batch_options.add_argument(
Expand All @@ -1136,6 +1117,14 @@ def main():
default='95'
)

batch_options.add_argument(
'--medaka-model', '--medaka_model',
help='Medaka model to use for polishing long reads. \n',
dest='medaka_model',
default="r941_min_hac_g507",
choices=MEDAKA_MODELS
)

add_workflow_arg(
batch_options,
['get_bam_indices', 'recover_mags', 'annotate', 'lorikeet'],
Expand Down
47 changes: 27 additions & 20 deletions aviary/modules/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,15 @@
import logging
import os
import subprocess
import copy
from pathlib import Path
from glob import glob

# Local imports
from snakemake import utils
from snakemake.io import load_configfile
from ruamel.yaml import YAML # used for yaml reading with comments
from aviary import LONG_READ_TYPES

BATCH_HEADER=['sample', 'short_reads_1', 'short_reads_2', 'long_reads', 'long_read_type', 'assembly', 'coassemble']

Expand Down Expand Up @@ -555,36 +557,41 @@ def process_batch(args, prefix):
s2 = check_batch_input(batch.iloc[i, 2], "none", split=True)
l = check_batch_input(batch.iloc[i, 3], "none", split=True)
l_type = check_batch_input(batch.iloc[i, 4], "ont", split=False)
if l_type not in LONG_READ_TYPES:
logging.error(f"Unknown long read type {l_type} specified.")
logging.error(f"Valid long read types: {LONG_READ_TYPES}")
sys.exit(1)
assembly = check_batch_input(batch.iloc[i, 5], None, split=False)
coassemble = check_batch_input(batch.iloc[i, 6], False, split=False)


new_args = copy.deepcopy(args)
# update the value of args
args.output = f"{prefix}/{sample}"
runs.append(args.output)
args.pe1 = s1
args.pe2 = s2
new_args.output = f"{prefix}/{sample}"
runs.append(new_args.output)
new_args.pe1 = s1
new_args.pe2 = s2

args.longreads = l
args.longread_type = l_type
args.assembly = assembly
args.coassemble = coassemble
new_args.longreads = l
new_args.longread_type = l_type
new_args.assembly = assembly
new_args.coassemble = coassemble

# ensure output folder exists
if not os.path.exists(args.output):
os.makedirs(args.output)
if not os.path.exists(new_args.output):
os.makedirs(new_args.output)

# setup processor for this line
processor = Processor(args)
processor = Processor(new_args)
processor.make_config()

processor.run_workflow(cores=int(args.n_cores),
dryrun=args.dryrun,
clean=args.clean,
conda_frontend=args.conda_frontend,
snakemake_args=args.cmds,
rerun_triggers=args.rerun_triggers,
profile=args.snakemake_profile,
cluster_retries=args.cluster_retries,
processor.run_workflow(cores=int(new_args.n_cores),
dryrun=new_args.dryrun,
clean=new_args.clean,
conda_frontend=new_args.conda_frontend,
snakemake_args=new_args.cmds,
rerun_triggers=new_args.rerun_triggers,
profile=new_args.snakemake_profile,
cluster_retries=new_args.cluster_retries,
write_to_script=write_to_script)

if args.cluster:
Expand Down

0 comments on commit 9d97ef3

Please sign in to comment.