diff --git a/dockerfiles/sv-base/Dockerfile b/dockerfiles/sv-base/Dockerfile index ac0608caf..bf1cdc19e 100644 --- a/dockerfiles/sv-base/Dockerfile +++ b/dockerfiles/sv-base/Dockerfile @@ -1,7 +1,7 @@ # This is the base dockerfile for the GATK SV pipeline that adds R, a few R packages, and GATK ARG SAMTOOLS_CLOUD_IMAGE=samtools-cloud:latest ARG VIRTUAL_ENV_IMAGE=sv-base-virtual-env:latest -ARG GATK_COMMIT="a33bf19dd3188af0af1bd17bce015eb20ba73227" +ARG GATK_COMMIT="64348bc9750ebf6cc473ecb8c1ced3fc66f05488" ARG GATK_JAR="/opt/gatk.jar" ARG R_INSTALL_PATH=/opt/R @@ -14,8 +14,8 @@ FROM $SAMTOOLS_CLOUD_IMAGE as samtools_cloud FROM $VIRTUAL_ENV_IMAGE as virtual_env_image RUN rm_unneeded_r_library_files.sh -ARG GATK_BUILD_DEP="git git-lfs openjdk-8-jdk" -ARG GATK_RUN_DEP="openjdk-8-jre-headless libgomp1" +ARG GATK_BUILD_DEP="git git-lfs openjdk-17-jdk" +ARG GATK_RUN_DEP="openjdk-17-jre-headless libgomp1" ARG GATK_COMMIT ARG GATK_JAR ARG DEBIAN_FRONTEND=noninteractive diff --git a/src/svtk/svtk/pesr/pe_test.py b/src/svtk/svtk/pesr/pe_test.py index a737e2c0a..bde88cb59 100644 --- a/src/svtk/svtk/pesr/pe_test.py +++ b/src/svtk/svtk/pesr/pe_test.py @@ -89,7 +89,8 @@ def _get_coords(pos, strand): startA, endA = _get_coords(record.pos, strandA) startB, endB = _get_coords(record.stop, strandB) - region = '{0}:{1}-{2}'.format(record.chrom, startA, endA) + # Add 1 because evidence is stored/indexed with 0-based coordinates + region = '{0}:{1}-{2}'.format(record.chrom, startA + 1, endA + 1) try: pairs = self.discfile.fetch(region=region, parser=pysam.asTuple()) diff --git a/src/svtk/svtk/pesr/sr_test.py b/src/svtk/svtk/pesr/sr_test.py index 4fefd6dca..612127de3 100644 --- a/src/svtk/svtk/pesr/sr_test.py +++ b/src/svtk/svtk/pesr/sr_test.py @@ -82,7 +82,7 @@ def test_record(self, record, called, background): # Clean up columns results['name'] = record.id results['bg_frac'] = results.called / \ - (results.background + results.called) + (results.background + results.called) results['bg_frac'] = results.bg_frac.fillna(0) cols = 'name coord pos log_pval called background bg_frac'.split() @@ -120,7 +120,8 @@ def load_counts(self, chrom, pos, strand): """Load pandas DataFrame from tabixfile""" if pos > 0: - region = '{0}:{1}-{1}'.format(chrom, pos) + # Add 1 because evidence is stored/indexed with 0-based coordinates + region = '{0}:{1}-{1}'.format(chrom, pos + 1) try: lines = self.countfile.fetch(region) except ValueError: diff --git a/wdl/BAFTestChromosome.wdl b/wdl/BAFTestChromosome.wdl index 1e53f1229..c4f42da77 100644 --- a/wdl/BAFTestChromosome.wdl +++ b/wdl/BAFTestChromosome.wdl @@ -113,7 +113,6 @@ task BAFTest { set -o pipefail java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} PrintSVEvidence \ - --skip-header \ --sequence-dictionary ~{ref_dict} \ --evidence-file ~{baf_metrics} \ -L "${chrom}:${start}-${end}" \ @@ -121,9 +120,9 @@ task BAFTest { else touch local.BAF.txt bgzip local.BAF.txt + tabix -0 -s1 -b2 -e2 local.BAF.txt.gz fi - tabix -s1 -b2 -e2 local.BAF.txt.gz svtk baf-test ~{bed} local.BAF.txt.gz --batch batch.key > ~{prefix}.metrics >>> diff --git a/wdl/BatchEvidenceMerging.wdl b/wdl/BatchEvidenceMerging.wdl index 4225784af..7f6f04ad4 100644 --- a/wdl/BatchEvidenceMerging.wdl +++ b/wdl/BatchEvidenceMerging.wdl @@ -158,7 +158,7 @@ task MergeEvidence { fi awk '/txt\.gz$/' evidence.list | while read fil; do - tabix -f -s1 -b2 -e2 $fil + tabix -f -0 -s1 -b2 -e2 $fil done /gatk/gatk --java-options "-Xmx~{java_heap_size_mb}m" PrintSVEvidence -F evidence.list --sample-names samples.list --sequence-dictionary ~{reference_dict} -O "~{batch}.~{evidence}.txt.gz" diff --git a/wdl/GenotypeCpxCnvsPerBatch.wdl b/wdl/GenotypeCpxCnvsPerBatch.wdl index aca267819..0db32f435 100644 --- a/wdl/GenotypeCpxCnvsPerBatch.wdl +++ b/wdl/GenotypeCpxCnvsPerBatch.wdl @@ -250,9 +250,9 @@ task RdTestGenotype { else touch local.RD.txt bgzip local.RD.txt + tabix -p bed local.RD.txt.gz fi - tabix -p bed local.RD.txt.gz tabix -p bed ~{bin_exclude} Rscript /opt/RdTest/RdTest.R \ diff --git a/wdl/MatrixQC.wdl b/wdl/MatrixQC.wdl index 902161ac0..e4681f961 100644 --- a/wdl/MatrixQC.wdl +++ b/wdl/MatrixQC.wdl @@ -158,10 +158,9 @@ task PESRBAF_QC { else touch ~{print_ev_output} bgzip ~{print_ev_output} + tabix -f -0 -s 1 -b 2 -e 2 ~{print_ev_output} fi - tabix -f -s 1 -b 2 -e 2 ~{print_ev_output} - /opt/sv-pipeline/00_preprocessing/misc_scripts/nonRD_matrix_QC.sh \ -d ~{distance} \ ~{print_ev_output} \ @@ -238,10 +237,9 @@ task RD_QC { else touch local.RD.txt bgzip local.RD.txt + tabix -f -p bed ~{print_ev_output} fi - tabix -f -p bed ~{print_ev_output} - /opt/sv-pipeline/00_preprocessing/misc_scripts/RD_matrix_QC.sh \ -d ~{distance} \ ~{print_ev_output} \ diff --git a/wdl/PETestChromosome.wdl b/wdl/PETestChromosome.wdl index a573cce13..360db4bf0 100644 --- a/wdl/PETestChromosome.wdl +++ b/wdl/PETestChromosome.wdl @@ -217,7 +217,6 @@ task PETest { if [ -s region.merged.bed ]; then java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} PrintSVEvidence \ - --skip-header \ --sequence-dictionary ~{ref_dict} \ --evidence-file ~{discfile} \ -L region.merged.bed \ @@ -225,9 +224,9 @@ task PETest { else touch local.PE.txt bgzip local.PE.txt + tabix -0 -s1 -b2 -e2 local.PE.txt.gz fi - tabix -s1 -b2 -e2 local.PE.txt.gz svtk pe-test -o ~{window} ~{common_arg} --medianfile ~{medianfile} --samples ~{include_list} ~{vcf} local.PE.txt.gz ~{prefix}.stats >>> runtime { diff --git a/wdl/RDTestChromosome.wdl b/wdl/RDTestChromosome.wdl index df11fabc4..0668fe5d5 100644 --- a/wdl/RDTestChromosome.wdl +++ b/wdl/RDTestChromosome.wdl @@ -176,10 +176,9 @@ task RDTest { else touch local.RD.txt bgzip local.RD.txt + tabix -p bed local.RD.txt.gz fi - tabix -p bed local.RD.txt.gz - Rscript /opt/RdTest/RdTest.R \ -b ~{bed} \ -n ~{prefix} \ diff --git a/wdl/ResolveCpxSv.wdl b/wdl/ResolveCpxSv.wdl index f63e1979e..cba568831 100644 --- a/wdl/ResolveCpxSv.wdl +++ b/wdl/ResolveCpxSv.wdl @@ -345,7 +345,6 @@ task ResolvePrep { if [ -s regions.bed ]; then java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} PrintSVEvidence \ - --skip-header \ --sequence-dictionary ~{ref_dict} \ --evidence-file $GS_PATH_TO_DISC_FILE \ -L regions.bed \ @@ -385,7 +384,7 @@ task ResolvePrep { > discfile.PE.txt.gz fi - tabix -s 1 -b 2 -e 2 -f discfile.PE.txt.gz + tabix -0 -s 1 -b 2 -e 2 -f discfile.PE.txt.gz >>> output { diff --git a/wdl/SRTestChromosome.wdl b/wdl/SRTestChromosome.wdl index 0f945a972..83987975d 100644 --- a/wdl/SRTestChromosome.wdl +++ b/wdl/SRTestChromosome.wdl @@ -218,7 +218,6 @@ task SRTest { if [ -s region.merged.bed ]; then java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} PrintSVEvidence \ - --skip-header \ --sequence-dictionary ~{ref_dict} \ --evidence-file ~{splitfile} \ -L region.merged.bed \ @@ -226,9 +225,9 @@ task SRTest { else touch local.SR.txt bgzip local.SR.txt + tabix -0 -s1 -b2 -e2 local.SR.txt.gz fi - tabix -s1 -b2 -e2 local.SR.txt.gz svtk sr-test -w 50 --log ~{common_arg} --medianfile ~{medianfile} --samples ~{include_list} ~{vcf} local.SR.txt.gz ~{prefix}.stats >>> runtime { diff --git a/wdl/SetSampleIdLegacy.wdl b/wdl/SetSampleIdLegacy.wdl index bcc114582..17957d3a3 100644 --- a/wdl/SetSampleIdLegacy.wdl +++ b/wdl/SetSampleIdLegacy.wdl @@ -122,13 +122,13 @@ task SetSampleId { output_name="~{sample_name}.~{file_type}.txt.gz" if [ ! -f "~{evidence_file}.tbi" ]; then - tabix -s1 -b2 -e2 ~{evidence_file} + tabix -0 -s1 -b2 -e2 ~{evidence_file} fi mkfifo $fifo_name /gatk/gatk --java-options "-Xmx2000m" PrintSVEvidence -F ~{evidence_file} --sequence-dictionary ~{reference_dict} -O $fifo_name & awk '{$~{sample_column}="~{sample_name}"}' < $fifo_name | bgzip -c > $output_name - tabix -s1 -b2 -e2 $output_name + tabix -0 -s1 -b2 -e2 $output_name >>> runtime { diff --git a/wdl/TasksGenotypeBatch.wdl b/wdl/TasksGenotypeBatch.wdl index 7a945ff48..aa1221b3e 100644 --- a/wdl/TasksGenotypeBatch.wdl +++ b/wdl/TasksGenotypeBatch.wdl @@ -344,10 +344,9 @@ task RDTestGenotype { else touch local.RD.txt bgzip local.RD.txt + tabix -p bed local.RD.txt.gz fi - tabix -p bed local.RD.txt.gz - Rscript /opt/RdTest/RdTest.R \ -b ~{bed} \ -c local.RD.txt.gz \ @@ -435,9 +434,9 @@ task CountPE { else touch local.PE.txt bgzip local.PE.txt + tabix -0 -s1 -b2 -e2 local.PE.txt.gz fi - tabix -s1 -b2 -e2 local.PE.txt.gz svtk count-pe -s ~{write_lines(samples)} --medianfile ~{medianfile} ~{vcf} local.PE.txt.gz ~{prefix}.pe_counts.txt gzip ~{prefix}.pe_counts.txt @@ -511,9 +510,9 @@ task CountSR { else touch local.SR.txt bgzip local.SR.txt + tabix -0 -s1 -b2 -e2 local.SR.txt.gz fi - tabix -s1 -b2 -e2 local.SR.txt.gz svtk count-sr -s ~{write_lines(samples)} --medianfile ~{medianfile} ~{vcf} local.SR.txt.gz ~{prefix}.sr_counts.txt /opt/sv-pipeline/04_variant_resolution/scripts/sum_SR.sh ~{prefix}.sr_counts.txt ~{prefix}.sr_sum.txt.gz gzip ~{prefix}.sr_counts.txt