From 8b7731c276cb8d51affc15c252625235bb29ec18 Mon Sep 17 00:00:00 2001 From: Chris Tomkins-Tinch Date: Tue, 5 Nov 2024 15:12:37 -0500 Subject: [PATCH] add deduplication of sequences by ID in augur_from_assemblies (#564) add deduplication of sequences by ID in augur_from_assemblies, after filtering by length, prior to MSA via MAFFT. --- pipes/WDL/workflows/augur_from_assemblies.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/augur_from_assemblies.wdl b/pipes/WDL/workflows/augur_from_assemblies.wdl index 08c3ab87e..0eb30b244 100644 --- a/pipes/WDL/workflows/augur_from_assemblies.wdl +++ b/pipes/WDL/workflows/augur_from_assemblies.wdl @@ -67,9 +67,13 @@ workflow augur_from_assemblies { sequences_fasta = zcat.combined, min_non_N = min_unambig_genome } + call nextstrain.nextstrain_deduplicate_sequences as dedup_seqs { + input: + sequences_fasta = filter_sequences_by_length.filtered_fasta + } call nextstrain.mafft_one_chr as mafft { input: - sequences = filter_sequences_by_length.filtered_fasta, + sequences = dedup_seqs.sequences_deduplicated_fasta, ref_fasta = ref_fasta, basename = "all_samples_aligned.fasta" }