From 5c01c2f61a2e3fb586811dffa79e3a20733c012d Mon Sep 17 00:00:00 2001 From: MarieLataretu Date: Tue, 15 Jun 2021 17:27:08 +0200 Subject: [PATCH 1/4] fixes #35 --- clean.nf | 40 ++++++++++++++++++++-------------------- modules/utils.nf | 9 ++++----- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/clean.nf b/clean.nf index 4504b79..31458aa 100755 --- a/clean.nf +++ b/clean.nf @@ -182,7 +182,7 @@ include { download_host; check_own; concat_contamination } from './modules/get_h include { minimap2_fasta; minimap2_nano; minimap2_illumina } from './modules/minimap2' include { bbduk } from './modules/bbmap' -include { rename_reads; restore_reads; get_number_of_reads; minimap2Stats; bbdukStats; writeLog } from './modules/utils' +include { compress_reads; get_number_of_reads; minimap2Stats; bbdukStats; writeLog } from './modules/utils' include { fastqc; nanoplot; format_nanoplot_report; quast; multiqc } from './modules/qc' @@ -277,16 +277,16 @@ workflow clean_nano { .mix(rRNAChannel).collect() concat_contamination( nano_input_ch.map{ it -> it[0] }, 'minimap2', contamination ) } - rename_reads(nano_input_ch, 'single') - minimap2_nano(rename_reads.out, concat_contamination.out.fa) + // rename_reads(nano_input_ch, 'single') + minimap2_nano(nano_input_ch, concat_contamination.out.fa) writeLog(nano_input_ch.map{ it -> it[0] }, 'minimap2', nano_input_ch.map{ it -> it[1] }, contamination) - get_number_of_reads(rename_reads.out, 'single') + get_number_of_reads(nano_input_ch, 'single') minimap2Stats(minimap2_nano.out.idxstats.join(get_number_of_reads.out)) - restore_reads(minimap2_nano.out.cleaned_reads.concat(minimap2_nano.out.contaminated_reads), 'single', 'minimap2') + compress_reads(minimap2_nano.out.cleaned_reads.concat(minimap2_nano.out.contaminated_reads), 'single', 'minimap2') emit: stats = minimap2Stats.out.tsv in = nano_input_ch.map{ it -> it.plus(1, 'all') } - out = restore_reads.out + out = compress_reads.out } workflow clean_illumina { @@ -311,25 +311,25 @@ workflow clean_illumina { .mix(rRNAChannel).collect() concat_contamination( illumina_input_ch.map{ it -> it[0] }, params.bbduk ? 'bbduk' : 'minimap2', contamination ) } - rename_reads(illumina_input_ch, 'paired') + // rename_reads(illumina_input_ch, 'paired') if (params.bbduk){ - bbduk(rename_reads.out, concat_contamination.out.fa, 'paired') + bbduk(illumina_input_ch, concat_contamination.out.fa, 'paired') writeLog(illumina_input_ch.map{ it -> it[0] }, 'bbduk', illumina_input_ch.map{ it -> it[1] }, contamination) bbdukStats(bbduk.out.stats) - restore_reads(bbduk.out.cleaned_reads.concat(bbduk.out.contaminated_reads), 'paired', 'bbduk') + compress_reads(bbduk.out.cleaned_reads.concat(bbduk.out.contaminated_reads), 'paired', 'bbduk') stats = bbdukStats.out.tsv } else { - minimap2_illumina(rename_reads.out, concat_contamination.out.fa, 'paired') + minimap2_illumina(illumina_input_ch, concat_contamination.out.fa, 'paired') writeLog(illumina_input_ch.map{ it -> it[0] }, 'minimap2', illumina_input_ch.map{ it -> it[1] }, contamination) - get_number_of_reads(rename_reads.out, 'paired') + get_number_of_reads(illumina_input_ch, 'paired') minimap2Stats(minimap2_illumina.out.idxstats.join(get_number_of_reads.out)) - restore_reads(minimap2_illumina.out.cleaned_reads.concat(minimap2_illumina.out.contaminated_reads), 'paired', 'minimap2') + compress_reads(minimap2_illumina.out.cleaned_reads.concat(minimap2_illumina.out.contaminated_reads), 'paired', 'minimap2') stats = minimap2Stats.out.tsv } emit: stats = stats in = illumina_input_ch.map{ it -> it.plus(1, 'all') } - out = restore_reads.out + out = compress_reads.out } workflow clean_illumina_single { @@ -354,25 +354,25 @@ workflow clean_illumina_single { .mix(rRNAChannel).collect() concat_contamination( illumina_single_end_input_ch.map{ it -> it[0] }, params.bbduk ? 'bbduk' : 'minimap2', contamination ) } - rename_reads(illumina_single_end_input_ch, 'single') + // rename_reads(illumina_single_end_input_ch, 'single') if (params.bbduk){ - bbduk(rename_reads.out, concat_contamination.out.fa, 'single') + bbduk(illumina_single_end_input_ch, concat_contamination.out.fa, 'single') writeLog(illumina_single_end_input_ch.map{ it -> it[0] }, 'bbduk', illumina_single_end_input_ch.map{ it -> it[1] }, contamination) bbdukStats(bbduk.out.stats) stats = bbdukStats.out.tsv - restore_reads(bbduk.out.cleaned_reads.concat(bbduk.out.contaminated_reads), 'single', 'bbduk') + compress_reads(bbduk.out.cleaned_reads.concat(bbduk.out.contaminated_reads), 'single', 'bbduk') } else { - minimap2_illumina(rename_reads.out, concat_contamination.out.fa, 'single') + minimap2_illumina(illumina_single_end_input_ch, concat_contamination.out.fa, 'single') writeLog(illumina_single_end_input_ch.map{ it -> it[0] }, 'minimap2', illumina_single_end_input_ch.map{ it -> it[1] }, contamination) - get_number_of_reads(rename_reads.out, 'single') + get_number_of_reads(illumina_single_end_input_ch, 'single') minimap2Stats(minimap2_illumina.out.idxstats.join(get_number_of_reads.out)) stats = minimap2Stats.out.tsv - restore_reads(minimap2_illumina.out.cleaned_reads.concat(minimap2_illumina.out.contaminated_reads), 'single', 'minimap2') + compress_reads(minimap2_illumina.out.cleaned_reads.concat(minimap2_illumina.out.contaminated_reads), 'single', 'minimap2') } emit: stats = stats in = illumina_single_end_input_ch.map{ it -> it.plus(1, 'all') } - out = restore_reads.out + out = compress_reads.out } workflow qc_fasta { diff --git a/modules/utils.nf b/modules/utils.nf index 6703aa3..783665b 100644 --- a/modules/utils.nf +++ b/modules/utils.nf @@ -47,7 +47,7 @@ process rename_reads { } } -process restore_reads { +process compress_reads { label 'basics' publishDir "${params.output}/${name}/${tool}", mode: 'copy', pattern: "*.gz" @@ -63,13 +63,12 @@ process restore_reads { script: if ( mode == 'paired' ) { """ - # restore the original read IDs - sed 's/DECONTAMINATE/ /g' ${reads}[0] | awk 'BEGIN{LINE=0};{if(LINE % 4 == 0 || LINE == 0){print \$0"/1"}else{print \$0};LINE++;}' | pigz -p ${task.cpus} > ${name}_1.${type}.fastq.gz - sed 's/DECONTAMINATE/ /g' ${reads}[1] | awk 'BEGIN{LINE=0};{if(LINE % 4 == 0 || LINE == 0){print \$0"/2"}else{print \$0};LINE++;}' | pigz -p ${task.cpus} > ${name}_2.${type}.fastq.gz + pigz -p ${task.cpus} > ${name}_1.${type}.fastq.gz + pigz -p ${task.cpus} > ${name}_2.${type}.fastq.gz """ } else { """ - sed 's/DECONTAMINATE/ /g' ${reads} | pigz -p ${task.cpus} > ${name}.${type}.fastq.gz + pigz -p ${task.cpus} > ${name}.${type}.fastq.gz """ } } From 237619446e0676bd8093523cee0b1419fbfe5ed7 Mon Sep 17 00:00:00 2001 From: MarieLataretu Date: Wed, 16 Jun 2021 18:09:29 +0200 Subject: [PATCH 2/4] fixed bug - don't write empty output... --- modules/utils.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/utils.nf b/modules/utils.nf index 783665b..fce0bb4 100644 --- a/modules/utils.nf +++ b/modules/utils.nf @@ -63,12 +63,12 @@ process compress_reads { script: if ( mode == 'paired' ) { """ - pigz -p ${task.cpus} > ${name}_1.${type}.fastq.gz - pigz -p ${task.cpus} > ${name}_2.${type}.fastq.gz + pigz -fc -p ${task.cpus} ${reads[0]} > ${name}_1.${type}.fastq.gz + pigz -fc -p ${task.cpus} ${reads[1]} > ${name}_2.${type}.fastq.gz """ } else { """ - pigz -p ${task.cpus} > ${name}.${type}.fastq.gz + pigz -fc -p ${task.cpus} ${reads} > ${name}.${type}.fastq.gz """ } } From 421cec617b52febf743bff2b0cf780602b1d16f0 Mon Sep 17 00:00:00 2001 From: MarieLataretu Date: Wed, 16 Jun 2021 18:25:20 +0200 Subject: [PATCH 3/4] improved stats --- assets/multiqc_config.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 5ad06da..b2525d2 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,12 +1,13 @@ custom_data: mapping: file_format: 'tsv' - section_name: 'Mapping statistics' - anchor: 'mapping' + section_name: 'Contamination statistics' + description: "Mapped and unmapped reads (or contigs) for the mapping result against the contamination. Short and sweet: mapped = contamination." + anchor: 'contamination' plot_type: 'bargraph' pconfig: - id: 'Mapping' - title: 'Mapping statistics' + id: 'Contamination' + title: 'Contamination statistics' sp: quast: From 5b8f1a575993446b5efabc392fa99656b14b8792 Mon Sep 17 00:00:00 2001 From: MarieLataretu Date: Tue, 22 Jun 2021 14:00:49 +0200 Subject: [PATCH 4/4] actual dcs --- data/controls/dcs.fa.gz | Bin 1225 -> 1252 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/data/controls/dcs.fa.gz b/data/controls/dcs.fa.gz index f3d7caef00818840dce2908d9caf6e344b54f51c..b74254d5744fab6bddf07b444f98724419f68b59 100644 GIT binary patch delta 1240 zcmV;}1Sk8+3FHZXABzYC000000RIL6LPG)o#{_+sxz1Q+3`F}p1y6t!wi3y5;Uk8`bLJ*A@MpW*=Vd$|JHLMV{P&kXKmYOL_rLu0<-6a1`&1u)`uT^CAEvvu zYif6%U-$RcY)|k1y!!StQ}v&B_i0r>zk3^lv z3}(i-?&c!p;kmoiNaVSlyC#$zo69RNCSQ_W)xSc&vwgt;)A_yUm&&DiVT`d6Pig)! z2sXe1TvS7UhkdzQDWdK33cz)+V&?MP_t3?}rPW?SEWMlu!Saf)*MAy3NvyScm`%Cdcp#Vd|=V&-w7YwMqbm;T)5CVV1ecyMwR4| zT?&i1Za_E2`+#@g_m;ZzK*nFEUmTYx_92M4ZP)NenT5%oG(mqJ3Ds+zNmhu5- z7mF2tGLLnvJmDzUF%@V#zb!=D7w3LW*FT=UjH*QptSq_xZY7$ZBadR>$7>lxd0#50 zCKi&SziFYG=qUxXYlcq8mjOm?NFA){d<$edY*0nR!?Ff4J0EaI%awL0RmyJ7A?msy z?JR()mkGC#dBoomK?T~8xez8nS3iEnepp0+5>=#}1+OQ2f(iskf1y}+uNxJ6Q z2z3%UoMysB)pH`EnN`ZsWS&T`jd>OGLOY@$*=nLgrxl=FuEj8M3X)C%KZ}Qr3u)=Q z7>)+&UrlMhY@3|Um`Lk}&=hZ=;;7IT&IaKmSlNbl%PJO#O1Ev>4l9}u(HP5*JV(iY zHVTKn>;?$uGQ>bFEyhZsk$n!+eV-OeOmZSGJogS3695Em>i`KydRbN}2wRNpeCMhZ z-$Os~vpCeNC_Y$pDBR7AZi|lCCq!QNOO6hEeW-Jrax)gzmK1|cd`2;wXfovI?J`wc z$#D%@IoBB<;SW$KX=AigN-jH&@!!3FdfW_R>z;k(TMqAqd&JN?S=6%c*-NQBj7_TZ zssNI;DYm->&J=&>nG!lLQ8b8Jfg3@1TY{11f-&zbw}})8N%zWAWTyi;ly#1eoccO; zHbA@0tpSl(tKu-wAqqX*gj{P(57iHkoHX_$7}0e&h2iiCzK>nX+$bxq9;5kxbd!q# z4jd!;MfYDda$8xJxa>hpTj!kHVyPCc=rXM&hid)-z8j~~-r+cmX2-@p60HQUqs>oZgJ+q+v@)!(=G z`+f(G&+lTs)4%iQJF$9L_TBE8tK0TH_>6xRZ|6Nd`@Oh-w6}Eb*zf+1KC`#~^L9L_ z^K$OKfqhq)SWcN+_c?VDe>T30;h52{yE#vJc;+rN0{L#|u5l&X=HkkW z$x5QD`k(8!x6c`1I=lD$Lb)){jnOy!DUbgS1RG!hF0!G)zSzwa-u77q;5t}2=HlG9 z>3rhyevqnv`)K7X2$ol@UVk-s5?FicVJZ-p&f5-hm&{v!;chSw_7~#0LIQ?>&(N^B z&KXd9bm+745dwSoee1hq*gfa$?ReT8C0+p0MWA~wH|aiRz=eBzXO3$I!WE1Ge2m}y zh2VL&8JD|EIDiQMSRj|W!u|6cU;wIx!t`B&=N-d;+w6t)h1BkVDT3 z(VGh&5H5LcA2I;^Q}JhT4c9^Om@5O%zusrEsZv8&uKou&ja1$_L!>bwim$Ei-g4vq$DUrlLOwna{5Or&)~Xp%Qjaa3p% zXMwO2tSm#TWfcoVrQ5P?n-#@}XpHGco}*+Ng+pI<1B87UVxX2LV?rx&yFCQW``m8sfFj$_cuxla2Ce}F%qzcQco)tQL+xZx%e-eTCG#+UHlfZ_0VHdaY-bCMDgMwiDYRdrXb`mmH-hj= zf|llhF|U_fL<)qYd-4?7=|B!;?c*b-ezu(r&`xu!Lj=~UI1IFjLJub)#~Q;!^}{1O zjr9mdR2@cPIDCTdW0lf3%8aYWXg<~CV1NPJh<4Har$$aI%Mh12h;D12b6PBa)xr^7 zrj_JS?SHnjG_^59q0PR2kq+GUf=2KhZ#Kn}LB+)GuW9df@p)C9+(D5Qq97EZNV8C2 zCWDqEv@?*?d8a?BsfuH#P^F2fj?VgbLSS)@G1#8h_))~k9~8MY81kP1q$XWe!EAL_ z>O`q6nwqqtHIz!-z-%G47#WN-9BCdN>Rve@y3HYioITO7I;89yx&ka6L9tT8D<9_m bes`JS(4sY&?Hu1f{sU0}yOZ(_lg0!dys>5s