diff --git a/CHANGELOG.md b/CHANGELOG.md index a13e9c038..5fa30d0d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Added Support for automated tests using [GitHub Actions](https://github.com/features/actions) * Added genotyping capability through GATK UnifiedGenotyper (v3.5), GATK HaplotypeCaller (v4.1) and FreeBayes * Added MultiVCFAnalyzer module +* Added human sex determination module. ### `Fixed` diff --git a/README.md b/README.md index 736934d2c..5ae84d74a 100644 --- a/README.md +++ b/README.md @@ -116,3 +116,5 @@ If you've contributed and you're missing in here, please let me know and I'll ad * **GATK 3.8** DePristo, M. A., Banks, E., Poplin, R., Garimella, K. V., Maguire, J. R., Hartl, C., … Daly, M. J. (2011). A framework for variation discovery and genotyping using next-generation DNA sequencing data. Nature Genetics, 43(5), 491–498. [https://doi.org/10.1038/ng.806](https://doi.org/10.1038/ng.806.) [Download](https://software.broadinstitute.org/gatk/download/) * **GATK 4.X** - no citation available yet * **MultiVCFAnalyzer** Bos, K.I. et al., (2014). Pre-Columbian mycobacterial genomes reveal seals as a source of New World human tuberculosis. Nature, 514(7523), pp.494–497. Available at: [http://dx.doi.org/10.1038/nature13591](http://dx.doi.org/10.1038/nature13591). Download: [https://github.com/alexherbig/MultiVCFAnalyzer](https://github.com/alexherbig/MultiVCFAnalyzer) +* **Sex.DetERRmine.py** Lamnidis, T.C. et al., 2018. Ancient Fennoscandian genomes reveal origin and spread of Siberian ancestry in Europe. Nature communications, 9(1), p.5018. Available at: [http://dx.doi.org/10.1038/s41467-018-07483-5](http://dx.doi.org/10.1038/s41467-018-07483-5). Download: [https://github.com/TCLamnidis/Sex.DetERRmine.git](https://github.com/TCLamnidis/Sex.DetERRmine.git). + diff --git a/docs/output.md b/docs/output.md index 65df5d5f5..c06769968 100644 --- a/docs/output.md +++ b/docs/output.md @@ -411,3 +411,4 @@ Each module has it's own output directory which sit alongside the `MultiQC/` dir * `trimmed_bam/` this contains the BAM files with X number of bases trimmed off as defined with the `--bamutils_clip_left` and `--bamutils_clip_right` flags and corresponding index files. You can use these BAM files for downstream analysis such as re-mapping data with more stringent parameters (if you set trimming to remove the most likely places containing damage in the read). * `genotyping/` this contains all the (gzipped) VCF files produced by your genotyping module. The file suffix will have the genotyping tool name. You will have VCF files corresponding to your deduplicated BAM files, as well as any turned-on downstream processes that create BAMs (e.g. trimmed bams or pmd tools). * `MultiVCFAnalyzer` this contains all output from MultiVCFAnalyzer, including SNP calling statistics, various SNP table(s) and FASTA alignment files. +* `sex_determination/` this contains the output for the sex determination run. This is a single `.tsv` file that includes a table with the Sample Name, the Nr of Autosomal SNPs, Nr of SNPs on the X/Y chromosome, the Nr of reads mapping to the Autosomes, the Nr of reads mapping to the X/Y chromosome, the relative coverage on the X/Y chromosomes, and the standard error associated with the relative coverages. These measures are provided for each bam file, one row per bam. If the `sexdeterrmine_bedfile` option has not been provided, the error bars cannot be trusted, and runtime will be considerably longer. diff --git a/docs/usage.md b/docs/usage.md index 0d69fb4e1..2c4fce65e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -712,6 +712,18 @@ If you wish to exclude SNP regions from consideration by MultiVCFAnalyzer (such If you wish to include results from SNPEff effect analysis, supply the output from SNPEff in txt format. +## Sex Determination + +An optional process for human DNA. It can be used to calculate the relative coverage of X and Y chromosomes compared to the autosomes (X-/Y-rate). Standard errors for these measurements are also calculated, assuming a binomial distribution of reads across the SNPs. + +### `--run_sexdeterrmine` + +Specify to run the optional process of sex determination. + +### `--sexdeterrmine_bedfile` + +Specify an optional bedfile of the list of SNPs to be used for X-/Y-rate calculation. Running without this parameter will considerably increase runtime, and render the resulting error bars unstrustworthy. Theoretically, any set of SNPs that are distant enough that two SNPs are unlikely to be covered by the same read can be used here. The programme was coded with the 1240K panel in mind. + ## Automatic Resubmission By default, if a pipeline step fails, EAGER2 will resubmit the job with twice the amount of CPU and memory. This will occur two times before failing. diff --git a/main.nf b/main.nf index 3c6f8a801..4c3d2210c 100644 --- a/main.nf +++ b/main.nf @@ -139,6 +139,9 @@ def helpMessage() { --reference_gff_exclude Specify positions to be excluded in '.gff' format. (Optional) --snp_eff_results Specify the output file from SNP effect analysis in '.txt' format. (Optional) + Sex Determination + --run_sexdeterrmine Turn on sex determination. + --sexdeterrmine_bedfile Specify SNP panel in bed format for error bar calculation. (Optional, see documentation) Other options: --outdir The output directory where the results will be saved @@ -284,6 +287,10 @@ params.reference_gff_annotations = 'NA' params.reference_gff_exclude = 'NA' params.snp_eff_results = 'NA' +//Sex.DetERRmine settings +params.run_sexdeterrmine = false +params.sexdeterrmine_bedfile = '' + multiqc_config = file(params.multiqc_config) output_docs = file("$baseDir/docs/output.md") where_are_my_files = file("$baseDir/assets/where_are_my_files.txt") @@ -718,7 +725,7 @@ process fastqc { saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"} when: - !params.bam && !params.skip_fastqc || params.bam && !params.run_convertbam + !params.bam && !params.skip_fastqc || params.bam && params.run_convertbam input: set val(name), file(reads) from ch_convertbam_for_fastqc @@ -881,7 +888,7 @@ process bwa { tag "${name}" publishDir "${params.outdir}/mapping/bwa", mode: 'copy' - when: !params.circularmapper && !params.bwamem || !params.skip_mapping + when: !params.circularmapper && params.bwamem && !params.skip_mapping input: set val(name), file(reads) from ch_adapteremoval_for_bwa @@ -1290,7 +1297,7 @@ process markDup{ if (!params.skip_deduplication) { ch_filtering_for_skiprmdup.mix(ch_output_from_dedup, ch_output_from_markdup) .filter { it =~/.*_rmdup.bam/ } - .into { ch_rmdup_for_skipdamagemanipulation; ch_rmdup_for_damageprofiler; ch_rmdup_for_qualimap; ch_rmdup_for_pmdtools; ch_rmdup_for_bamutils } + .into { ch_rmdup_for_skipdamagemanipulation; ch_rmdup_for_damageprofiler; ch_rmdup_for_qualimap; ch_rmdup_for_pmdtools; ch_rmdup_for_bamutils; ch_for_sexdeterrmine } ch_filteringindex_for_skiprmdup.mix(ch_outputindex_from_dedup, ch_outputindex_from_markdup) .filter { it =~/.*_rmdup.bam.bai|.*_rmdup.bam.csi/ } @@ -1298,7 +1305,7 @@ if (!params.skip_deduplication) { } else { ch_filtering_for_skiprmdup - .into { ch_rmdup_for_skipdamagemanipulation; ch_rmdup_for_damageprofiler; ch_rmdup_for_qualimap; ch_rmdup_for_pmdtools; ch_rmdup_for_bamutils } + .into { ch_rmdup_for_skipdamagemanipulation; ch_rmdup_for_damageprofiler; ch_rmdup_for_qualimap; ch_rmdup_for_pmdtools; ch_rmdup_for_bamutils; ch_for_sexdeterrmine } ch_filteringindex_for_skiprmdup .into { ch_rmdupindex_for_skipdamagemanipulation; ch_rmdupindex_for_damageprofiler; ch_rmdupindex_for_qualimap; ch_rmdupindex_for_pmdtools; ch_rmdupindex_for_bamutils } @@ -1622,8 +1629,7 @@ ch_gatk_download = Channel.value("download") """ } - - /* +/* * Step 12: SNP Table Generation */ @@ -1665,7 +1671,42 @@ if (params.additional_vcf_files == '') { pigz -p ${task.cpus} *.tsv *.txt snpAlignment.fasta snpAlignmentIncludingRefGenome.fasta fullAlignment.fasta rm *.vcf """ + } + /* + * Step XX Sex determintion with error bar calculation. + */ + + process sex_deterrmine{ + publishDir "${params.outdir}/sex_determination", mode:"copy" + + when: + params.run_sexdeterrmine + + input: + val 'Bams' from ch_for_sexdeterrmine.collect() + + output: + file 'SexDet.txt' + + script: + if (params.sexdeterrmine_bedfile == '') { + """ + for i in ${Bams.join(' ')}; do + echo \$i >> bamlist.txt + done + + samtools depth -aa -q30 -Q30 -f bamlist.txt| sexdeterrmine -f bamlist.txt >SexDet.txt + """ + } else { + """ + for i in ${Bams.join(' ')}; do + echo \$i >> bamlist.txt + done + + samtools depth -aa -q30 -Q30 -b ${params.sexdeterrmine_bedfile} -f bamlist.txt| sexdeterrmine -f bamlist.txt >SexDet.txt + """ + } } /*