Skip to content

Commit

Permalink
Update GATK to 4.6.0.0 in sv_pipeline_docker (#719)
Browse files Browse the repository at this point in the history
  • Loading branch information
mwalker174 authored Aug 27, 2024
1 parent 5aa3b8c commit 6b27c39
Show file tree
Hide file tree
Showing 13 changed files with 22 additions and 28 deletions.
6 changes: 3 additions & 3 deletions dockerfiles/sv-base/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# This is the base dockerfile for the GATK SV pipeline that adds R, a few R packages, and GATK
ARG SAMTOOLS_CLOUD_IMAGE=samtools-cloud:latest
ARG VIRTUAL_ENV_IMAGE=sv-base-virtual-env:latest
ARG GATK_COMMIT="a33bf19dd3188af0af1bd17bce015eb20ba73227"
ARG GATK_COMMIT="64348bc9750ebf6cc473ecb8c1ced3fc66f05488"
ARG GATK_JAR="/opt/gatk.jar"
ARG R_INSTALL_PATH=/opt/R

Expand All @@ -14,8 +14,8 @@ FROM $SAMTOOLS_CLOUD_IMAGE as samtools_cloud
FROM $VIRTUAL_ENV_IMAGE as virtual_env_image
RUN rm_unneeded_r_library_files.sh

ARG GATK_BUILD_DEP="git git-lfs openjdk-8-jdk"
ARG GATK_RUN_DEP="openjdk-8-jre-headless libgomp1"
ARG GATK_BUILD_DEP="git git-lfs openjdk-17-jdk"
ARG GATK_RUN_DEP="openjdk-17-jre-headless libgomp1"
ARG GATK_COMMIT
ARG GATK_JAR
ARG DEBIAN_FRONTEND=noninteractive
Expand Down
3 changes: 2 additions & 1 deletion src/svtk/svtk/pesr/pe_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ def _get_coords(pos, strand):
startA, endA = _get_coords(record.pos, strandA)
startB, endB = _get_coords(record.stop, strandB)

region = '{0}:{1}-{2}'.format(record.chrom, startA, endA)
# Add 1 because evidence is stored/indexed with 0-based coordinates
region = '{0}:{1}-{2}'.format(record.chrom, startA + 1, endA + 1)

try:
pairs = self.discfile.fetch(region=region, parser=pysam.asTuple())
Expand Down
5 changes: 3 additions & 2 deletions src/svtk/svtk/pesr/sr_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def test_record(self, record, called, background):
# Clean up columns
results['name'] = record.id
results['bg_frac'] = results.called / \
(results.background + results.called)
(results.background + results.called)
results['bg_frac'] = results.bg_frac.fillna(0)
cols = 'name coord pos log_pval called background bg_frac'.split()

Expand Down Expand Up @@ -120,7 +120,8 @@ def load_counts(self, chrom, pos, strand):
"""Load pandas DataFrame from tabixfile"""

if pos > 0:
region = '{0}:{1}-{1}'.format(chrom, pos)
# Add 1 because evidence is stored/indexed with 0-based coordinates
region = '{0}:{1}-{1}'.format(chrom, pos + 1)
try:
lines = self.countfile.fetch(region)
except ValueError:
Expand Down
3 changes: 1 addition & 2 deletions wdl/BAFTestChromosome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -113,17 +113,16 @@ task BAFTest {
set -o pipefail

java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} PrintSVEvidence \
--skip-header \
--sequence-dictionary ~{ref_dict} \
--evidence-file ~{baf_metrics} \
-L "${chrom}:${start}-${end}" \
-O local.BAF.txt.gz
else
touch local.BAF.txt
bgzip local.BAF.txt
tabix -0 -s1 -b2 -e2 local.BAF.txt.gz
fi

tabix -s1 -b2 -e2 local.BAF.txt.gz
svtk baf-test ~{bed} local.BAF.txt.gz --batch batch.key > ~{prefix}.metrics

>>>
Expand Down
2 changes: 1 addition & 1 deletion wdl/BatchEvidenceMerging.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ task MergeEvidence {
fi

awk '/txt\.gz$/' evidence.list | while read fil; do
tabix -f -s1 -b2 -e2 $fil
tabix -f -0 -s1 -b2 -e2 $fil
done

/gatk/gatk --java-options "-Xmx~{java_heap_size_mb}m" PrintSVEvidence -F evidence.list --sample-names samples.list --sequence-dictionary ~{reference_dict} -O "~{batch}.~{evidence}.txt.gz"
Expand Down
2 changes: 1 addition & 1 deletion wdl/GenotypeCpxCnvsPerBatch.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,9 @@ task RdTestGenotype {
else
touch local.RD.txt
bgzip local.RD.txt
tabix -p bed local.RD.txt.gz
fi
tabix -p bed local.RD.txt.gz
tabix -p bed ~{bin_exclude}
Rscript /opt/RdTest/RdTest.R \
Expand Down
6 changes: 2 additions & 4 deletions wdl/MatrixQC.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,9 @@ task PESRBAF_QC {
else
touch ~{print_ev_output}
bgzip ~{print_ev_output}
tabix -f -0 -s 1 -b 2 -e 2 ~{print_ev_output}
fi

tabix -f -s 1 -b 2 -e 2 ~{print_ev_output}

/opt/sv-pipeline/00_preprocessing/misc_scripts/nonRD_matrix_QC.sh \
-d ~{distance} \
~{print_ev_output} \
Expand Down Expand Up @@ -238,10 +237,9 @@ task RD_QC {
else
touch local.RD.txt
bgzip local.RD.txt
tabix -f -p bed ~{print_ev_output}
fi

tabix -f -p bed ~{print_ev_output}

/opt/sv-pipeline/00_preprocessing/misc_scripts/RD_matrix_QC.sh \
-d ~{distance} \
~{print_ev_output} \
Expand Down
3 changes: 1 addition & 2 deletions wdl/PETestChromosome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -217,17 +217,16 @@ task PETest {

if [ -s region.merged.bed ]; then
java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} PrintSVEvidence \
--skip-header \
--sequence-dictionary ~{ref_dict} \
--evidence-file ~{discfile} \
-L region.merged.bed \
-O local.PE.txt.gz
else
touch local.PE.txt
bgzip local.PE.txt
tabix -0 -s1 -b2 -e2 local.PE.txt.gz
fi

tabix -s1 -b2 -e2 local.PE.txt.gz
svtk pe-test -o ~{window} ~{common_arg} --medianfile ~{medianfile} --samples ~{include_list} ~{vcf} local.PE.txt.gz ~{prefix}.stats
>>>
runtime {
Expand Down
3 changes: 1 addition & 2 deletions wdl/RDTestChromosome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,9 @@ task RDTest {
else
touch local.RD.txt
bgzip local.RD.txt
tabix -p bed local.RD.txt.gz
fi

tabix -p bed local.RD.txt.gz

Rscript /opt/RdTest/RdTest.R \
-b ~{bed} \
-n ~{prefix} \
Expand Down
3 changes: 1 addition & 2 deletions wdl/ResolveCpxSv.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,6 @@ task ResolvePrep {
if [ -s regions.bed ]; then
java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} PrintSVEvidence \
--skip-header \
--sequence-dictionary ~{ref_dict} \
--evidence-file $GS_PATH_TO_DISC_FILE \
-L regions.bed \
Expand Down Expand Up @@ -385,7 +384,7 @@ task ResolvePrep {
> discfile.PE.txt.gz
fi
tabix -s 1 -b 2 -e 2 -f discfile.PE.txt.gz
tabix -0 -s 1 -b 2 -e 2 -f discfile.PE.txt.gz
>>>
output {
Expand Down
3 changes: 1 addition & 2 deletions wdl/SRTestChromosome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -218,17 +218,16 @@ task SRTest {

if [ -s region.merged.bed ]; then
java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} PrintSVEvidence \
--skip-header \
--sequence-dictionary ~{ref_dict} \
--evidence-file ~{splitfile} \
-L region.merged.bed \
-O local.SR.txt.gz
else
touch local.SR.txt
bgzip local.SR.txt
tabix -0 -s1 -b2 -e2 local.SR.txt.gz
fi

tabix -s1 -b2 -e2 local.SR.txt.gz
svtk sr-test -w 50 --log ~{common_arg} --medianfile ~{medianfile} --samples ~{include_list} ~{vcf} local.SR.txt.gz ~{prefix}.stats
>>>
runtime {
Expand Down
4 changes: 2 additions & 2 deletions wdl/SetSampleIdLegacy.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,13 @@ task SetSampleId {
output_name="~{sample_name}.~{file_type}.txt.gz"

if [ ! -f "~{evidence_file}.tbi" ]; then
tabix -s1 -b2 -e2 ~{evidence_file}
tabix -0 -s1 -b2 -e2 ~{evidence_file}
fi

mkfifo $fifo_name
/gatk/gatk --java-options "-Xmx2000m" PrintSVEvidence -F ~{evidence_file} --sequence-dictionary ~{reference_dict} -O $fifo_name &
awk '{$~{sample_column}="~{sample_name}"}' < $fifo_name | bgzip -c > $output_name
tabix -s1 -b2 -e2 $output_name
tabix -0 -s1 -b2 -e2 $output_name

>>>
runtime {
Expand Down
7 changes: 3 additions & 4 deletions wdl/TasksGenotypeBatch.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -344,10 +344,9 @@ task RDTestGenotype {
else
touch local.RD.txt
bgzip local.RD.txt
tabix -p bed local.RD.txt.gz
fi

tabix -p bed local.RD.txt.gz

Rscript /opt/RdTest/RdTest.R \
-b ~{bed} \
-c local.RD.txt.gz \
Expand Down Expand Up @@ -435,9 +434,9 @@ task CountPE {
else
touch local.PE.txt
bgzip local.PE.txt
tabix -0 -s1 -b2 -e2 local.PE.txt.gz
fi

tabix -s1 -b2 -e2 local.PE.txt.gz
svtk count-pe -s ~{write_lines(samples)} --medianfile ~{medianfile} ~{vcf} local.PE.txt.gz ~{prefix}.pe_counts.txt
gzip ~{prefix}.pe_counts.txt

Expand Down Expand Up @@ -511,9 +510,9 @@ task CountSR {
else
touch local.SR.txt
bgzip local.SR.txt
tabix -0 -s1 -b2 -e2 local.SR.txt.gz
fi

tabix -s1 -b2 -e2 local.SR.txt.gz
svtk count-sr -s ~{write_lines(samples)} --medianfile ~{medianfile} ~{vcf} local.SR.txt.gz ~{prefix}.sr_counts.txt
/opt/sv-pipeline/04_variant_resolution/scripts/sum_SR.sh ~{prefix}.sr_counts.txt ~{prefix}.sr_sum.txt.gz
gzip ~{prefix}.sr_counts.txt
Expand Down

0 comments on commit 6b27c39

Please sign in to comment.