Skip to content

Commit

Permalink
Merge pull request #347 from UPHL-BioNGS/update-20240702
Browse files Browse the repository at this point in the history
Update 20240702
  • Loading branch information
erinyoung authored Jul 9, 2024
2 parents c3740a9 + 5be5d8c commit 6f3584d
Show file tree
Hide file tree
Showing 25 changed files with 860 additions and 621 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/check_versions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ jobs:
echo "New version for $base! Upgrade to $latest_version from $workflow_version." | tee -a versions.txt
issue_text="$issue_text<br>- $base from $workflow_version to $latest_version "
fi
docker rmi $base:latest
done
latest_nextclade_version=$(docker run nextstrain/nextclade:latest nextclade --version | awk '{print $2}')
Expand Down
137 changes: 132 additions & 5 deletions .github/workflows/github_actions.config
Original file line number Diff line number Diff line change
@@ -1,7 +1,134 @@
params.vadr = false

process {
withName:ivar_consensus{
memory = '4 GB'
}
errorStrategy = { task.attempt < 2 ? 'retry' : 'terminate'}
withName:aci{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:artic{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:artic_read_filtering{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:bbnorm{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:bcftools_variants{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:bwa{
publishDir = [ path: "cecret", mode: 'link', pattern: 'logs/*/*log' ]
}
withName:download{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:fasta_prep{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:summary{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:unzip{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:fastp{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:fastqc{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:freyja_variants{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:freyja_demix{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:freyja_aggregate{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:heatcluster{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:igv_reports{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:iqtree2{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:ivar_consensus{
memory = '4 GB'
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:ivar_variants{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:ivar_trim{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:kraken2{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:mafft{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:minimap2{
publishDir = [ path: "cecret", mode: 'link', pattern: 'logs/*/*log' ]
}
withName:multiqc_combine{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:nextclade_dataset{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:nextclade{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:pango_collapse{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:pangolin{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:phytreeviz{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_stats{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_coverage{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_flagstat{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_depth{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_ampliconstats{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_plot_ampliconstats{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_sort{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_filter{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_ampliconclip{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:samtools_markdup{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:seqyclean{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:snpdists{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
withName:vadr{
publishDir = [ path: "cecret", mode: 'link', saveAs: { filename -> filename.equals('versions.yml') ? null : filename }]
}
}

4 changes: 4 additions & 0 deletions .github/workflows/test_kraken2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,9 @@ jobs:
cat cecret*/cecret_results.txt
- name: Kraken2 results
run: |
wc -l cecret/kraken2/*_kraken2_report.txt
- name: Clean
run: rm -rf work .nextflow*
37 changes: 37 additions & 0 deletions .github/workflows/test_mpx_yale.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: Test mpx workflow with yale primers

on: [pull_request, workflow_dispatch]

run-name: mpx_yale

jobs:

test:
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@master

- name: Install Nextflow
run: |
wget -qO- get.nextflow.io | bash
sudo mv nextflow /usr/local/bin/
- name: Download reads
run: |
mkdir reads
cd reads
wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR206/024/SRR20689724/SRR20689724_1.fastq.gz
wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR206/024/SRR20689724/SRR20689724_2.fastq.gz
cd ../
- name: Run Cecret
run: |
nextflow run . -profile docker,mpx_yale -c .github/workflows/github_actions.config --maxcpus 2 --medcpus 2
ls cecret*
head cecret*/cecret_results.txt
- name: Clean
run: rm -rf work .nextflow*
3 changes: 2 additions & 1 deletion .github/workflows/test_primers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ jobs:
'ncov_V3',
'ncov_V4',
'ncov_V4.1',
'ncov_V5.3.2'
'ncov_V5.3.2',
'mpx_yale'
]

steps:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_profile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
- name: Run Cecret
run: |
nextflow run . -profile docker,test -c .github/workflows/github_actions.config --maxcpus 2 --medcpus 2 --cleaner 'fastp' --aligner 'minimap2' --mpileup_depth 200 --vadr false
nextflow run . -profile docker,test -c .github/workflows/github_actions.config --maxcpus 2 --medcpus 2 --cleaner 'fastp' --aligner 'minimap2' --mpileup_depth 200
ls cecret*
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_profile1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
- name: Run Cecret
run: |
nextflow run . -profile docker,test1 -c .github/workflows/github_actions.config --maxcpus 2 --medcpus 2 --cleaner 'fastp' --aligner 'minimap2' --mpileup_depth 200 --vadr false
nextflow run . -profile docker,test1 -c .github/workflows/github_actions.config --maxcpus 2 --medcpus 2 --cleaner 'fastp' --aligner 'minimap2' --mpileup_depth 200
ls cecret*
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_profile2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
- name: Run Cecret
run: |
nextflow run . -profile docker,test2 -c .github/workflows/github_actions.config --maxcpus 2 --medcpus 2 --cleaner 'fastp' --aligner 'minimap2' --mpileup_depth 200 --vadr false
nextflow run . -profile docker,test2 -c .github/workflows/github_actions.config --maxcpus 2 --medcpus 2 --cleaner 'fastp' --aligner 'minimap2' --mpileup_depth 200
ls cecret*
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -230,14 +230,17 @@ params.minimum_depth = 10
The defaults for Cecret continue to be for SARS-CoV-2, but there are growing demands for a workflow for Monkeypox Virus. As such, there are a few parameters that might benefit the **End User**.

### Using the Monkeypox profile
There are three profiles for Monkeypox Virus sequencing : `mpx`, `mpx_idt` and `mpx_primalseq`. The `mpx` profile has some defaults for a metagenomic-type sequencing, while `mpx_idt` is for libraries prepped with [IDT](https://www.idtdna.com/)'s primers, and `mpx_primalseq` which has been [validated](https://www.medrxiv.org/content/10.1101/2022.10.14.22280783v1.full-text) with Illumina library prep methods and sequencing platforms.
There are three profiles for Monkeypox Virus sequencing : `mpx`, `mpx_idt`, `mpx_yale`, and `mpx_primalseq`. The `mpx` profile has some defaults for a metagenomic-type sequencing, while `mpx_idt` is for libraries prepped with [IDT](https://www.idtdna.com/)'s primers, and `mpx_primalseq` which has been [validated](https://www.medrxiv.org/content/10.1101/2022.10.14.22280783v1.full-text) with Illumina library prep methods and sequencing platforms.
```
# metagenomic
nextflow run UPHL-BioNGS/Cecret -profile singularity,mpx
# using IDT's primers
nextflow run UPHL-BioNGS/Cecret -profile singularity,mpx_idt
# using primalseq with Yale's reference
nextflow run UPHL-BioNGS/Cecret -profile singularity,mpx_yale
# using Illumina library prep methods and sequencing platforms
nextflow run UPHL-BioNGS/Cecret -profile singularity,mpx_primalseq
```
Expand Down
35 changes: 22 additions & 13 deletions bin/combine_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
aci_file = 'aci_coverage_summary.csv'
ampliconstats_file = 'ampliconstats.summary'
samtools_coverage_file = 'samtools_coverage_summary.tsv'
pangolin_file = 'multiqc_data/multiqc_pangolin.txt'
pangolin_file = 'lineage_report.csv'
pango_collapse_file = 'pango_collapse.csv'
nextclade_file = 'multiqc_data/multiqc_nextclade.txt'
nextclade_file = 'nextclade.csv'
vadr_file = 'vadr.csv'
fastp_file = 'multiqc_data/multiqc_general_stats.txt'
fastq_names_file = 'fastq_names.csv'
Expand Down Expand Up @@ -277,9 +277,17 @@
summary_df = summary_df.drop('sample', axis=1)
columns = columns + ['samtools_meandepth_after_trimming', 'samtools_per_1X_coverage_after_trimming']

def vadr_sample_name(s):
if s.count('.') >=1:
if len(s.split(".")[-1]) > 2:
return ''.join(s.split(".")[:-1])
return s

if exists(vadr_file) :
print("Getting results from vadr file " + vadr_file)
vadr_df = pd.read_csv(vadr_file, dtype = str, usecols = ['name', 'p/f', 'model', 'alerts'], index_col= False)
vadr_df = vadr_df[vadr_df['name'] != 'name']
vadr_df = vadr_df[vadr_df['name'] != 'seq']
vadr_df = vadr_df.add_prefix('vadr_')
vadr_columns = list(vadr_df.columns)
vadr_columns.remove('vadr_name')
Expand All @@ -291,7 +299,8 @@
summary_df.drop('vadr_name', axis=1, inplace=True)
columns = ['vadr_p/f'] + columns + vadr_columns
else:
vadr_df['sample_match'] = vadr_df['vadr_name'].str.replace('Consensus_', '', regex = False).str.split(".").str[0]
vadr_df['sample_match'] = vadr_df['vadr_name'].str.replace('Consensus_', '', regex = False).apply(vadr_sample_name)

summary_df = pd.merge(summary_df, vadr_df, left_on = 'sample_id', right_on = 'sample_match', how = 'outer')
summary_df['sample_id'].fillna(summary_df['sample_match'], inplace=True)
summary_df.drop('vadr_name', axis=1, inplace=True)
Expand All @@ -301,42 +310,42 @@
if exists(nextclade_file) :
print("Getting results from nextclade file " + nextclade_file)

use_cols = ['Sample', 'clade', 'qc_overallstatus', 'qc_overallscore']
use_cols = ['seqName', 'clade', 'qc.overallStatus', 'qc.overallScore']

first = pd.read_table(nextclade_file, sep = '\t' , dtype = str, nrows=1)
first = pd.read_table(nextclade_file, sep = ';' , dtype = str, nrows=1)
if 'clade_who' in first.columns:
use_cols.append('clade_who')
if 'outbreak' in first.columns:
use_cols.append('outbreak')
if 'lineage' in first.columns:
use_cols.append('lineage')

nextclade_df = pd.read_table(nextclade_file, sep = '\t' , dtype = str, usecols = use_cols)
nextclade_df = pd.read_table(nextclade_file, sep = ';' , dtype = str, usecols = use_cols)
nextclade_df=nextclade_df.add_prefix('nextclade_')
nextclade_columns = list(nextclade_df.columns)
nextclade_df['sample_match'] = nextclade_df['nextclade_Sample'].str.replace('Consensus_', '', regex = False)
nextclade_columns.remove('nextclade_Sample')
nextclade_df['sample_match'] = nextclade_df['nextclade_seqName'].str.replace('Consensus_', '', regex = False).str.split(' ').str[0]
nextclade_columns.remove('nextclade_seqName')
nextclade_columns.remove('nextclade_clade')

summary_df = pd.merge(summary_df, nextclade_df, left_on = 'sample_id', right_on = 'sample_match', how = 'outer')
summary_df['sample_id'].fillna(summary_df['sample_match'], inplace=True)
summary_df.drop('nextclade_Sample', axis=1, inplace=True)
summary_df.drop('nextclade_seqName', axis=1, inplace=True)
summary_df.drop('sample_match', axis = 1, inplace = True )
columns = ['nextclade_clade'] + columns + nextclade_columns

if exists(pangolin_file) :
print("Getting results from pangolin file " + pangolin_file)

pangolin_df = pd.read_table(pangolin_file, dtype = str)
pangolin_df = pd.read_csv(pangolin_file, dtype = str)
pangolin_df = pangolin_df.add_prefix('pangolin_')
pangolin_columns = list(pangolin_df.columns)
pangolin_df['sample_match'] = pangolin_df['pangolin_Sample'].str.replace('Consensus_', '', regex= False)
pangolin_columns.remove('pangolin_Sample')
pangolin_df['sample_match'] = pangolin_df['pangolin_taxon'].str.replace('Consensus_', '', regex= False).str.split(' ').str[0]
pangolin_columns.remove('pangolin_taxon')
pangolin_columns.remove('pangolin_lineage')

summary_df = pd.merge(summary_df, pangolin_df, left_on = 'sample_id', right_on = 'sample_match', how = 'outer')
summary_df['sample_id'].fillna(summary_df['sample_match'], inplace=True)
summary_df.drop('pangolin_Sample', axis=1, inplace=True)
summary_df.drop('pangolin_taxon', axis=1, inplace=True)
summary_df.drop('sample_match', axis=1, inplace=True)
columns = ['pangolin_lineage'] + columns + pangolin_columns

Expand Down
2 changes: 1 addition & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ ch_reads.ifEmpty { println("No fastq or fastq.gz files were found at ${param

workflow CECRET {
ch_for_dataset = Channel.empty()
ch_for_version = Channel.from("Cecret version", workflow.manifest.version).first()
ch_for_version = Channel.from("Cecret version", workflow.manifest.version).collect()
ch_prealigned = Channel.empty()
ch_versions = Channel.empty()

Expand Down
2 changes: 1 addition & 1 deletion modules/artic.nf
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ process artic {
# time stamp + capturing tool versions
date > \$log
artic --version >> \$log
artic_version=\$(artic --version)
artic_version=\$(artic --version | awk '{print \$NF}')
cp ${reference} schema/cecret/V1/cecret.reference.fasta
cp ${bed} schema/cecret/V1/cecret.scheme.bed
Expand Down
2 changes: 1 addition & 1 deletion modules/bwa.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ process bwa {
# time stamp + capturing tool versions
date > \$log
echo "bwa \$(bwa 2>&1 | grep Version )" >> \$log
bwa_version="bwa : "\$(bwa 2>&1 | grep Version)
bwa_version=\$(bwa 2>&1 | grep Version | awk '{print \$NF}')
# index the reference fasta file
bwa index ${reference_genome}
Expand Down
4 changes: 2 additions & 2 deletions modules/fastp.nf
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ process fastp {
# time stamp + capturing tool versions
date > \$log
fastp --version >> \$log
cleaner_version="\$(fastp --version 2>&1 | head -n 1)"
cleaner_version=\$(fastp --version 2>&1 | awk '{print \$NF}')
fastp ${args} \
-i ${reads[0]} \
Expand All @@ -63,7 +63,7 @@ process fastp {
# time stamp + capturing tool versions
date > \$log
fastp --version >> \$log
cleaner_version="\$(fastp --version 2>&1 | head -n 1)"
cleaner_version=\$(fastp --version 2>&1 | awk '{print \$NF}')
fastp ${args} \
-i ${reads} \
Expand Down
Loading

0 comments on commit 6f3584d

Please sign in to comment.