diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ff6c69f5..cdcb27d12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,11 +22,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - [#688](https://github.com/nf-core/eager/issues/668) - Allow pipeline to complete, even if Qualimap crashes due to an empty or corrupt BAM file for one sample/library - [#683](https://github.com/nf-core/eager/pull/683) - Sets `--igenomes_ignore` to true by default, as rarely used by users currently and makes resolving configs less complex - Added exit code `140` to re-tryable exit code list to account for certain scheduler wall-time limit fails -- [672](https://github.com/nf-core/eager/issues/672) - Removed java parameter from picard tools which could cause memory issues -- [679](https://github.com/nf-core/eager/issues/679) - Refactor within-process bash conditions to groovy/nextflow, due to incompatibility with some servers environments +- [#672](https://github.com/nf-core/eager/issues/672) - Removed java parameter from picard tools which could cause memory issues +- [#679](https://github.com/nf-core/eager/issues/679) - Refactor within-process bash conditions to groovy/nextflow, due to incompatibility with some servers environments - [#690](https://github.com/nf-core/eager/pull/690) - Fixed ANGSD output mode for beagle by setting `-doMajorMinor 1` as default in that case - [#693](https://github.com/nf-core/eager/issues/693) - Fixed broken TSV input validation for the Colour Chemistry column - [#695](https://github.com/nf-core/eager/issues/695) - Fixed incorrect `-profile` order in tutorials (originally written reversed due to [nextflow bug](https://github.com/nextflow-io/nextflow/issues/1792)) +- [#653](https://github.com/nf-core/eager/issues/653) - Fixed file collision errors with sexdeterrmine for two same-named libraries with different strandedness ### `Dependencies` diff --git a/docs/output.md b/docs/output.md index ff4f13ee2..9c4d6deda 100644 --- a/docs/output.md +++ b/docs/output.md @@ -600,6 +600,8 @@ Sex.DetERRmine calculates the coverage of your mapped reads on the X and Y chrom When a bedfile of specific sites is provided, Sex.DetERRmine additionally calculates error bars around each relative coverage estimate. For this estimate to be trustworthy, the sites included in the bedfile should be spaced apart enough that a single sequencing read cannot overlap multiple sites. Hence, when a bedfile has not been provided, this error should be ignored. When a suitable bedfile is provided, each observation of a covered site is independent, and the error around the coverage is equal to the binomial error estimate. This error is then propagated during the calculation of relative coverage for the X and Y chromosomes. +> Note that in nf-core/eager this will be run on single- and double-stranded variants of the same library _separately_. This can also help assess for differential contamination between libraries. + #### Relative Coverage Theoretically, males are expected to cluster around (0.5, 0.5) in the produced scatter plot, while females are expected to cluster around (1.0, 0.0). In practice, when analysing ancient DNA, these relative coverage on both axes is slightly lower than expected, and individuals can cluster around (0.45, 0.45) and (0.85, 0.05). As the number of covered sites for an individual gets smaller, the confidence on the estimate becomes lower, because it is increasingly more likely to be affected by randomness in the preservation and sequencing of ancient DNA. diff --git a/main.nf b/main.nf index 081bbd33a..adc3d7fe6 100644 --- a/main.nf +++ b/main.nf @@ -2226,7 +2226,7 @@ process additional_library_merge { ch_trimmed_formerge.skip_merging .mix(ch_output_from_trimmerge) - .into{ ch_output_from_bamutils; ch_addlibmerge_for_qualimap; ch_for_sexdeterrmine } + .into{ ch_output_from_bamutils; ch_addlibmerge_for_qualimap; ch_for_sexdeterrmine_prep } // General mapping quality statistics for whole reference sequence - e.g. X and % coverage @@ -2633,13 +2633,33 @@ process multivcfanalyzer { // Human biological sex estimation +// rename to prevent single/double stranded library sample name-based file conflict +process sexdeterrmine_prep { + label 'sc_small' + + input: + tuple samplename, libraryid, lane, seqtype, organism, strandedness, udg, path(bam), path(bai) from ch_for_sexdeterrmine_prep + + output: + file "*_{single,double}strand.bam" into ch_prepped_for_sexdeterrmine + + when: + params.run_sexdeterrmine + + script: + """ + mv ${bam} ${bam.baseName}_${strandedness}strand.bam + """ + +} + // As we collect all files for a single sex_deterrmine run, we DO NOT use the normal input/output tuple -process sex_deterrmine { +process sexdeterrmine { label 'sc_small' publishDir "${params.outdir}/sex_determination", mode: params.publish_dir_mode input: - path bam from ch_for_sexdeterrmine.map { it[7] }.collect() + path bam from ch_prepped_for_sexdeterrmine.collect() path(bed) from ch_bed_for_sexdeterrmine output: diff --git a/nextflow_schema.json b/nextflow_schema.json index f12b3574a..6453d653e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1294,7 +1294,7 @@ "properties": { "run_sexdeterrmine": { "type": "boolean", - "description": "Turn on sex determination for human reference genomes.", + "description": "Turn on sex determination for human reference genomes. This will run on single- and double-stranded variants of a library separately.", "fa_icon": "fas fa-transgender-alt", "help_text": "Specify to run the optional process of sex determination.\n" },