diff --git a/pipes/WDL/tasks/tasks_assembly.wdl b/pipes/WDL/tasks/tasks_assembly.wdl index 20a85f600..7d577bd4b 100644 --- a/pipes/WDL/tasks/tasks_assembly.wdl +++ b/pipes/WDL/tasks/tasks_assembly.wdl @@ -15,7 +15,7 @@ task assemble { String sample_name = basename(basename(reads_unmapped_bam, ".bam"), ".taxfilt") Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-assemble:2.3.2.0" + String docker = "quay.io/broadinstitute/viral-assemble:2.3.6.1" } parameter_meta{ reads_unmapped_bam: { @@ -115,7 +115,7 @@ task select_references { Int? skani_s Int? skani_c - String docker = "quay.io/broadinstitute/viral-assemble:2.3.2.0" + String docker = "quay.io/broadinstitute/viral-assemble:2.3.6.1" Int machine_mem_gb = 4 Int cpu = 2 Int disk_size = 100 @@ -206,7 +206,7 @@ task scaffold { Float? scaffold_min_pct_contig_aligned Int? machine_mem_gb - String docker="quay.io/broadinstitute/viral-assemble:2.3.2.0" + String docker="quay.io/broadinstitute/viral-assemble:2.3.6.1" # do this in multiple steps in case the input doesn't actually have "assembly1-x" in the name String sample_name = basename(basename(contigs_fasta, ".fasta"), ".assembly1-spades") @@ -583,7 +583,7 @@ task align_reads { Boolean skip_mark_dupes = false Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.3.2" + String docker = "quay.io/broadinstitute/viral-core:2.4.0" String sample_name = basename(basename(basename(reads_unmapped_bam, ".bam"), ".taxfilt"), ".clean") } @@ -720,7 +720,7 @@ task refine_assembly_with_aligned_reads { Int min_coverage = 3 Int machine_mem_gb = 15 - String docker = "quay.io/broadinstitute/viral-assemble:2.3.2.0" + String docker = "quay.io/broadinstitute/viral-assemble:2.3.6.1" } Int disk_size = 375 @@ -845,7 +845,7 @@ task refine_2x_and_plot { String? plot_coverage_novoalign_options = "-r Random -l 40 -g 40 -x 20 -t 100 -k" Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-assemble:2.3.2.0" + String docker = "quay.io/broadinstitute/viral-assemble:2.3.6.1" # do this in two steps in case the input doesn't actually have "cleaned" in the name String sample_name = basename(basename(reads_unmapped_bam, ".bam"), ".cleaned") @@ -981,7 +981,7 @@ task run_discordance { String out_basename = "run" Int min_coverage = 4 - String docker = "quay.io/broadinstitute/viral-core:2.3.2" + String docker = "quay.io/broadinstitute/viral-core:2.4.0" } parameter_meta { reads_aligned_bam: { diff --git a/pipes/WDL/tasks/tasks_demux.wdl b/pipes/WDL/tasks/tasks_demux.wdl index 9a63dc521..86d209970 100644 --- a/pipes/WDL/tasks/tasks_demux.wdl +++ b/pipes/WDL/tasks/tasks_demux.wdl @@ -6,7 +6,7 @@ task merge_tarballs { String out_filename Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.3.2" + String docker = "quay.io/broadinstitute/viral-core:2.4.0" } Int disk_size = 2625 @@ -163,7 +163,7 @@ task illumina_demux { Int? machine_mem_gb Int disk_size = 2625 - String docker = "quay.io/broadinstitute/viral-core:2.3.2" + String docker = "quay.io/broadinstitute/viral-core:2.4.0" } parameter_meta { diff --git a/pipes/WDL/tasks/tasks_interhost.wdl b/pipes/WDL/tasks/tasks_interhost.wdl index e8ca286ce..93e875dba 100644 --- a/pipes/WDL/tasks/tasks_interhost.wdl +++ b/pipes/WDL/tasks/tasks_interhost.wdl @@ -160,7 +160,7 @@ task multi_align_mafft_ref { Float? mafft_gapOpeningPenalty Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2" + String docker = "quay.io/broadinstitute/viral-phylo:2.3.6.0" } String fasta_basename = basename(reference_fasta, '.fasta') @@ -207,7 +207,7 @@ task multi_align_mafft { Float? mafft_gapOpeningPenalty Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2" + String docker = "quay.io/broadinstitute/viral-phylo:2.3.6.0" } Int disk_size = 200 @@ -351,7 +351,7 @@ task index_ref { File? novocraft_license Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-core:2.3.2" + String docker = "quay.io/broadinstitute/viral-core:2.4.0" } Int disk_size = 100 @@ -476,7 +476,7 @@ task merge_vcfs_gatk { File ref_fasta Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2" + String docker = "quay.io/broadinstitute/viral-phylo:2.3.6.0" String output_prefix = "merged" } diff --git a/pipes/WDL/tasks/tasks_intrahost.wdl b/pipes/WDL/tasks/tasks_intrahost.wdl index 83dc49599..10eb1b4f1 100644 --- a/pipes/WDL/tasks/tasks_intrahost.wdl +++ b/pipes/WDL/tasks/tasks_intrahost.wdl @@ -136,7 +136,7 @@ task lofreq { File reference_fasta String out_basename = basename(aligned_bam, '.bam') - String docker = "quay.io/biocontainers/lofreq:2.1.5--py38h588ecb2_4" + String docker = "quay.io/broadinstitute/viral-phylo:2.3.6.0" } Int disk_size = 200 command <<< @@ -145,8 +145,13 @@ task lofreq { lofreq version | grep version | sed 's/.* \(.*\)/\1/g' | tee LOFREQ_VERSION # make local copies because CWD is writeable but localization dir isn't always - cp "~{reference_fasta}" reference.fasta cp "~{aligned_bam}" aligned.bam + python3<' reference.fasta | tr -d '\nNn' | wc -c) == "0" ]; then @@ -191,7 +196,7 @@ task isnvs_per_sample { Boolean removeDoublyMappedReads = true Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2" + String docker = "quay.io/broadinstitute/viral-phylo:2.3.6.0" String sample_name = basename(basename(basename(mapped_bam, ".bam"), ".all"), ".mapped") } @@ -234,7 +239,7 @@ task isnvs_vcf { Boolean naiveFilter = false Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2" + String docker = "quay.io/broadinstitute/viral-phylo:2.3.6.0" } parameter_meta { @@ -308,7 +313,7 @@ task annotate_vcf_snpeff { String? emailAddress Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2" + String docker = "quay.io/broadinstitute/viral-phylo:2.3.6.0" String output_basename = basename(basename(in_vcf, ".gz"), ".vcf") } diff --git a/pipes/WDL/tasks/tasks_megablast.wdl b/pipes/WDL/tasks/tasks_megablast.wdl index 7b9822545..d93a359c1 100755 --- a/pipes/WDL/tasks/tasks_megablast.wdl +++ b/pipes/WDL/tasks/tasks_megablast.wdl @@ -15,7 +15,7 @@ task trim_rmdup_subsamp { Int cpu = 16 Int disk_size_gb = 100 - String docker = "quay.io/broadinstitute/viral-assemble:2.3.2.0" + String docker = "quay.io/broadinstitute/viral-assemble:2.3.6.1" } parameter_meta { @@ -36,7 +36,7 @@ task trim_rmdup_subsamp { command <<< set -ex o pipefail assembly.py --version | tee VERSION - #BAM ->FASTQ-> OutBam? https://github.com/broadinstitute/viral-assemble/blob/80bcc1da5c6a0174362ca9fd8bc0b49ee0b4103b/assembly.py#L91 + #BAM ->FASTQ-> OutBam? https://github.com/broadinstitute/viral-assemble:2.3.6.1 assembly.py trim_rmdup_subsamp \ "~{inBam}" \ "~{clipDb}" \ diff --git a/pipes/WDL/tasks/tasks_ncbi.wdl b/pipes/WDL/tasks/tasks_ncbi.wdl index abb5a3d1d..31278f662 100644 --- a/pipes/WDL/tasks/tasks_ncbi.wdl +++ b/pipes/WDL/tasks/tasks_ncbi.wdl @@ -6,7 +6,7 @@ task download_fasta { Array[String]+ accessions String emailAddress - String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2" + String docker = "quay.io/broadinstitute/viral-phylo:2.3.6.0" } command { @@ -38,7 +38,7 @@ task download_annotations { String emailAddress String combined_out_prefix - String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2" + String docker = "quay.io/broadinstitute/viral-phylo:2.3.6.0" } command <<< @@ -85,7 +85,7 @@ task annot_transfer { File reference_fasta Array[File]+ reference_feature_table - String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2" + String docker = "quay.io/broadinstitute/viral-phylo:2.3.6.0" } parameter_meta { @@ -139,7 +139,7 @@ task align_and_annot_transfer_single { Array[File]+ reference_fastas Array[File]+ reference_feature_tables - String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2" + String docker = "quay.io/broadinstitute/viral-phylo:2.3.6.0" } parameter_meta { @@ -192,7 +192,7 @@ task structured_comments { File? filter_to_ids - String docker = "quay.io/broadinstitute/viral-core:2.3.2" + String docker = "quay.io/broadinstitute/viral-core:2.4.0" } String out_base = basename(assembly_stats_tsv, '.txt') command <<< @@ -272,7 +272,7 @@ task rename_fasta_header { String out_basename = basename(genome_fasta, ".fasta") - String docker = "quay.io/broadinstitute/viral-core:2.3.2" + String docker = "quay.io/broadinstitute/viral-core:2.4.0" } command { set -e @@ -437,7 +437,7 @@ task sra_meta_prep { Boolean paired String out_name = "sra_metadata.tsv" - String docker="quay.io/broadinstitute/viral-core:2.3.2" + String docker="quay.io/broadinstitute/viral-core:2.4.0" } Int disk_size = 100 parameter_meta { @@ -653,7 +653,7 @@ task biosample_to_genbank { File? filter_to_ids Boolean s_dropout_note = true - String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2" + String docker = "quay.io/broadinstitute/viral-phylo:2.3.6.0" } String base = basename(biosample_attributes, ".txt") command { @@ -851,7 +851,7 @@ task prepare_genbank { String? assembly_method_version Int? machine_mem_gb - String docker = "quay.io/broadinstitute/viral-phylo:2.1.20.2" + String docker = "quay.io/broadinstitute/viral-phylo:2.3.6.0" } parameter_meta { diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl index 98b1733ab..52d874982 100644 --- a/pipes/WDL/tasks/tasks_nextstrain.wdl +++ b/pipes/WDL/tasks/tasks_nextstrain.wdl @@ -11,9 +11,15 @@ task taxid_to_nextclade_dataset_name { 2697049 : 'sars-cov-2', 641809 : 'flu_h1n1pdm_ha', 335341 : 'flu_h3n2_ha', + 119210 : 'flu_h3n2_ha', 518987 : 'flu_vic_ha', 208893 : 'rsv_a', 208895 : 'rsv_b', + 11234 : 'nextstrain/measles/N450/WHO-2012', + 11053 : 'nextstrain/dengue/all', + 11060 : 'nextstrain/dengue/all', + 11069 : 'nextstrain/dengue/all', + 11070 : 'nextstrain/dengue/all', 10244 : 'MPXV', 619591 : 'hMPXV' } @@ -43,13 +49,11 @@ task nextclade_one_sample { File genome_fasta File? root_sequence File? auspice_reference_tree_json - File? qc_config_json + File? pathogen_json File? gene_annotations_json - File? pcr_primers_csv - File? virus_properties String? dataset_name Int disk_size = 50 - String docker = "nextstrain/nextclade:2.14.0" + String docker = "nextstrain/nextclade:3.9.1" } String basename = basename(genome_fasta, ".fasta") command <<< @@ -67,21 +71,19 @@ task nextclade_one_sample { DATASET_ARG="--input-dataset ." python3< MEM_BYTES >>> runtime { - docker: "quay.io/broadinstitute/viral-core:2.3.2" + docker: "quay.io/broadinstitute/viral-core:2.4.0" memory: "1 GB" cpu: cpus disks: "local-disk " + disk_size + " LOCAL" @@ -430,7 +430,7 @@ task tsv_join { runtime { memory: "~{machine_mem_gb} GB" cpu: 4 - docker: "quay.io/broadinstitute/viral-core:2.3.2" + docker: "quay.io/broadinstitute/viral-core:2.4.0" disks: "local-disk " + disk_size + " HDD" disk: disk_size + " GB" # TES dx_instance_type: "mem1_ssd1_v2_x4" @@ -517,7 +517,7 @@ task tsv_stack { input { Array[File]+ input_tsvs String out_basename - String docker = "quay.io/broadinstitute/viral-core:2.3.2" + String docker = "quay.io/broadinstitute/viral-core:2.4.0" } Int disk_size = 50 @@ -780,7 +780,7 @@ task filter_sequences_by_length { File sequences_fasta Int min_non_N = 1 - String docker = "quay.io/broadinstitute/viral-core:2.3.2" + String docker = "quay.io/broadinstitute/viral-core:2.4.0" Int disk_size = 750 } parameter_meta { diff --git a/pipes/WDL/workflows/augur_from_assemblies.wdl b/pipes/WDL/workflows/augur_from_assemblies.wdl index e94ee5574..08c3ab87e 100644 --- a/pipes/WDL/workflows/augur_from_assemblies.wdl +++ b/pipes/WDL/workflows/augur_from_assemblies.wdl @@ -13,6 +13,7 @@ workflow augur_from_assemblies { input { Array[File]+ assembly_fastas + Array[File]? contextual_genome_fastas Array[File]+ sample_metadata_tsvs File ref_fasta @@ -29,6 +30,10 @@ workflow augur_from_assemblies { description: "Set of assembled genomes to align and build trees. These must represent a single chromosome/segment of a genome only. Fastas may be one-sequence-per-individual or a concatenated multi-fasta (unaligned) or a mixture of the two. They may be compressed (gz, bz2, zst, lz4), uncompressed, or a mixture.", patterns: ["*.fasta", "*.fa", "*.fasta.gz", "*.fasta.zst"] } + contextual_genome_fastas: { + description: "Set of near-complete contextual genomes to include in tree build. Each fasta provided must represent a single chromosome/segment of a genome. Fastas may be one-sequence-per-individual or a concatenated multi-fasta (unaligned) or a mixture of the two. They may be compressed (gz, bz2, zst, lz4), uncompressed, or a mixture. ", + patterns: ["*.fasta", "*.fa", "*.fasta.gz", "*.fasta.zst"] + } sample_metadata_tsvs: { description: "Tab-separated metadata file that contain binning variables and values. Must contain all samples: output will be filtered to the IDs present in this file.", patterns: ["*.txt", "*.tsv"] @@ -54,7 +59,7 @@ workflow augur_from_assemblies { call utils.zcat { input: - infiles = assembly_fastas, + infiles = flatten([assembly_fastas, select_first([contextual_genome_fastas,[]])]), output_name = "all_samples_combined_assembly.fasta" } call utils.filter_sequences_by_length { diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl index 89258cfe1..0af33b228 100644 --- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl +++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl @@ -150,21 +150,32 @@ workflow scaffold_and_refine_multitaxa { "sample": '{"entityType":"sample","entityName":"' + sample_id + '"}' } - scatter(h in assembly_header) { - String stat_by_taxon = stats_by_taxon[h] + if(assembly_length_unambiguous > min_scaffold_unambig) { + scatter(h in assembly_header) { + String stat_by_taxon = stats_by_taxon[h] + } } } ### summary stats - call utils.concatenate { - input: - infiles = [write_tsv([assembly_header]), write_tsv(stat_by_taxon)], - output_name = "assembly_metadata-~{sample_id}.tsv" + if (length(select_all(stat_by_taxon)) > 0) { + call utils.concatenate as assembly_stats_non_empty { + input: + infiles = [write_tsv([assembly_header]), write_tsv(select_all(stat_by_taxon))], + output_name = "assembly_metadata-~{sample_id}.tsv" + } + } + if (length(select_all(stat_by_taxon)) == 0) { + call utils.concatenate as assembly_stats_empty { + input: + infiles = [write_tsv([assembly_header])], + output_name = "assembly_metadata-~{sample_id}.tsv" + } } output { Array[Map[String,String]] assembly_stats_by_taxon = stats_by_taxon - File assembly_stats_by_taxon_tsv = concatenate.combined + File assembly_stats_by_taxon_tsv = select_first([assembly_stats_non_empty.combined, assembly_stats_empty.combined]) String assembly_method = "viral-ngs/scaffold_and_refine_multitaxa" #String assembly_top_taxon_id = select_references.top_matches_per_cluster_basenames[0] diff --git a/requirements-modules.txt b/requirements-modules.txt index 9221cd99c..c0cab07d4 100644 --- a/requirements-modules.txt +++ b/requirements-modules.txt @@ -1,12 +1,12 @@ -broadinstitute/viral-core=2.3.2 -broadinstitute/viral-assemble=2.3.2.0 +broadinstitute/viral-core=2.4.0 +broadinstitute/viral-assemble=2.3.6.1 broadinstitute/viral-classify=2.2.4.2 -broadinstitute/viral-phylo=2.1.20.2 +broadinstitute/viral-phylo=2.3.6.0 broadinstitute/py3-bio=0.1.2 broadinstitute/beast-beagle-cuda=1.10.5pre broadinstitute/ncbi-tools=2.10.7.10 broadinstitute/delphy=0.999 nextstrain/base=build-20240318T173028Z andersenlabapps/ivar=1.3.1 -quay.io/staphb/pangolin=4.3.1-pdata-1.23.1 -nextstrain/nextclade=2.14.0 +quay.io/staphb/pangolin=4.3.1-pdata-1.30 +nextstrain/nextclade=3.9.1 diff --git a/test/input/WDL/miniwdl-local/test_inputs-genbank-local.json b/test/input/WDL/miniwdl-local/test_inputs-genbank-local.json deleted file mode 100644 index 1c938870e..000000000 --- a/test/input/WDL/miniwdl-local/test_inputs-genbank-local.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "genbank.molType": "cRNA", - "genbank.reference_accessions": ["KM821997.1", "KM821998.1"], - "genbank.coverage_table": "test/input/genbank/coverage_report-RUN1-workflow.txt", - "genbank.alignments_bams": [], - "genbank.sequencingTech": "Illumina MiSeq", - "genbank.biosample_attributes": "test/input/genbank/biosample-attributes-lasv.txt", - "genbank.prep_genbank.assembly_method": "placeholder assembly software", - "genbank.prep_genbank.assembly_method_version": "5.4.3.2.1", - "genbank.comment": "this is only a test -- DO NOT SUBMIT to NCBI", - "genbank.author_list": "Odia,Ikponmwosa, Ehaine,Philomena, Oguzie,Judith,U. Siddle,Katherine,J. Mehta,Samar, Barnes,Kayla,M. Winnicki,Sarah,M. Brehio,Patrick, Shah,Rickey,R. Chak,Bridget, Yozwiak,Nathan, Amao,Adebayo, Nosamiefan,Iguosadolo, Birren,Bruce,W. Park,Daniel,J. Folarin,Onikepe, Okokhere,Peter, Sabeti,Pardis,C. Happi,Christian,T.", - "genbank.author_sbt_defaults_yaml": "test/input/genbank/default_sbt_values.yaml", - "genbank.author_sbt_j2_template": "test/input/genbank/author_template.sbt.j2", - "genbank.assemblies_fasta": [ - "test/input/LASV_NGA_2018_0026.fasta", - "test/input/LASV_NGA_2018_0097.fasta", - "test/input/LASV_NGA_2018_0541.fasta" - ], - "genbank.email_address": "viral-ngs@broadinstitute.org" -} -