Skip to content

Commit

Permalink
Update Spark scripts to reflect changes from #5386 and #5127.
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwhite committed Nov 15, 2018
1 parent 626c887 commit 8dcf46f
Show file tree
Hide file tree
Showing 9 changed files with 13 additions and 13 deletions.
4 changes: 2 additions & 2 deletions scripts/spark_eval/exome_md-bqsr-hc_hdfs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
. utils.sh

time_gatk "MarkDuplicatesSpark -I hdfs:///user/$USER/exome_spark_eval/NA12878.ga2.exome.maq.raw.bam -O hdfs:///user/$USER/exome_spark_eval/out/markdups-sharded --sharded-output true" 96 1 4g 4g
time_gatk "BQSRPipelineSpark -I hdfs:///user/$USER/exome_spark_eval/out/markdups-sharded -O hdfs:///user/$USER/exome_spark_eval/out/bqsr-sharded --sharded-output true -R hdfs:///user/$USER/exome_spark_eval/Homo_sapiens_assembly18.2bit --known-sites hdfs://${HDFS_HOST_PORT}/user/$USER/exome_spark_eval/dbsnp_138.hg18.vcf" 8 8 32g 4g
time_gatk "HaplotypeCallerSpark -I hdfs:///user/$USER/exome_spark_eval/out/bqsr-sharded -R hdfs:///user/$USER/exome_spark_eval/Homo_sapiens_assembly18.2bit -O hdfs://${HDFS_HOST_PORT}/user/$USER/exome_spark_eval/out/NA12878.ga2.exome.maq.raw.vcf -pairHMM AVX_LOGLESS_CACHING --max-reads-per-alignment-start 10" 64 1 6g 4g
time_gatk "BQSRPipelineSpark -I hdfs:///user/$USER/exome_spark_eval/out/markdups-sharded -O hdfs:///user/$USER/exome_spark_eval/out/bqsr-sharded --sharded-output true -R hdfs:///user/$USER/exome_spark_eval/Homo_sapiens_assembly18.fasta --known-sites hdfs://${HDFS_HOST_PORT}/user/$USER/exome_spark_eval/dbsnp_138.hg18.vcf" 8 8 32g 4g
time_gatk "HaplotypeCallerSpark -I hdfs:///user/$USER/exome_spark_eval/out/bqsr-sharded -R hdfs:///user/$USER/exome_spark_eval/Homo_sapiens_assembly18.fasta -O hdfs://${HDFS_HOST_PORT}/user/$USER/exome_spark_eval/out/NA12878.ga2.exome.maq.raw.vcf -pairHMM AVX_LOGLESS_CACHING --maxReadsPerAlignmentStart 10" 64 1 6g 4g
2 changes: 1 addition & 1 deletion scripts/spark_eval/exome_reads-pipeline_gcs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

. utils.sh

time_gatk "ReadsPipelineSpark -I gs://broad-spark-eval-test-data/exome/NA12878.ga2.exome.maq.raw.bam -O gs://broad-spark-eval-test-data/exome/NA12878.ga2.exome.maq.raw.vcf -R gs://broad-spark-eval-test-data/exome/Homo_sapiens_assembly18.2bit --known-sites gs://broad-spark-eval-test-data/exome/dbsnp_138.hg18.vcf -pairHMM AVX_LOGLESS_CACHING --max-reads-per-alignment-start 10" 8 8 32g 4g
time_gatk "ReadsPipelineSpark -I gs://broad-spark-eval-test-data/exome/NA12878.ga2.exome.maq.raw.bam -O gs://broad-spark-eval-test-data/exome/NA12878.ga2.exome.maq.raw.vcf -R gs://broad-spark-eval-test-data/exome/Homo_sapiens_assembly18.fasta --known-sites gs://broad-spark-eval-test-data/exome/dbsnp_138.hg18.vcf -pairHMM AVX_LOGLESS_CACHING --maxReadsPerAlignmentStart 10" 8 8 32g 4g
2 changes: 1 addition & 1 deletion scripts/spark_eval/exome_reads-pipeline_hdfs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

. utils.sh

time_gatk "ReadsPipelineSpark -I hdfs:///user/$USER/exome_spark_eval/NA12878.ga2.exome.maq.raw.bam -O hdfs://${HDFS_HOST_PORT}/user/$USER/exome_spark_eval/out/NA12878.ga2.exome.maq.raw.vcf -R hdfs://${HDFS_HOST_PORT}/user/$USER/exome_spark_eval/Homo_sapiens_assembly18.fasta --known-sites hdfs://${HDFS_HOST_PORT}/user/$USER/exome_spark_eval/dbsnp_138.hg18.vcf.gz -pairHMM AVX_LOGLESS_CACHING --max-reads-per-alignment-start 10" 20 7 28g 4g
time_gatk "ReadsPipelineSpark -I hdfs:///user/$USER/exome_spark_eval/NA12878.ga2.exome.maq.raw.bam -O hdfs://${HDFS_HOST_PORT}/user/$USER/exome_spark_eval/out/NA12878.ga2.exome.maq.raw.vcf -R hdfs://${HDFS_HOST_PORT}/user/$USER/exome_spark_eval/Homo_sapiens_assembly18.fasta --known-sites hdfs://${HDFS_HOST_PORT}/user/$USER/exome_spark_eval/dbsnp_138.hg18.vcf.gz -pairHMM AVX_LOGLESS_CACHING --maxReadsPerAlignmentStart 10" 20 7 28g 4g

# Notes
# 20 executors - 2 per node (this is run on a 10 node cluster of n1-standard-16, each with 16 cores, 60g)
Expand Down
4 changes: 2 additions & 2 deletions scripts/spark_eval/genome_md-bqsr-hc_hdfs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
. utils.sh

time_gatk "MarkDuplicatesSpark -I hdfs:///user/$USER/q4_spark_eval/WGS-G94982-NA12878-no-NC_007605.bam -O hdfs:///user/$USER/q4_spark_eval/out/markdups-sharded --sharded-output true" 256 1 4g 4g
time_gatk "BQSRPipelineSpark -I hdfs:///user/$USER/q4_spark_eval/out/markdups-sharded -O hdfs:///user/$USER/q4_spark_eval/out/bqsr-sharded --sharded-output true -R hdfs:///user/$USER/q4_spark_eval/human_g1k_v37.2bit --known-sites hdfs://${HDFS_HOST_PORT}/user/$USER/q4_spark_eval/dbsnp_138.b37.vcf" 20 8 42g 4g
time_gatk "HaplotypeCallerSpark -I hdfs:///user/$USER/q4_spark_eval/out/bqsr-sharded -R hdfs:///user/$USER/q4_spark_eval/human_g1k_v37.2bit -O hdfs://${HDFS_HOST_PORT}/user/$USER/q4_spark_eval/out/WGS-G94982-NA12878.vcf -pairHMM AVX_LOGLESS_CACHING --max-reads-per-alignment-start 10" 60 1 12g 8g
time_gatk "BQSRPipelineSpark -I hdfs:///user/$USER/q4_spark_eval/out/markdups-sharded -O hdfs:///user/$USER/q4_spark_eval/out/bqsr-sharded --sharded-output true -R hdfs:///user/$USER/q4_spark_eval/human_g1k_v37.fasta --known-sites hdfs://${HDFS_HOST_PORT}/user/$USER/q4_spark_eval/dbsnp_138.b37.vcf" 20 8 42g 4g
time_gatk "HaplotypeCallerSpark -I hdfs:///user/$USER/q4_spark_eval/out/bqsr-sharded -R hdfs:///user/$USER/q4_spark_eval/human_g1k_v37.fasta -O hdfs://${HDFS_HOST_PORT}/user/$USER/q4_spark_eval/out/WGS-G94982-NA12878.vcf -pairHMM AVX_LOGLESS_CACHING --maxReadsPerAlignmentStart 10" 60 1 12g 8g
2 changes: 1 addition & 1 deletion scripts/spark_eval/genome_reads-pipeline_hdfs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

. utils.sh

time_gatk "ReadsPipelineSpark -I hdfs:///user/$USER/q4_spark_eval/WGS-G94982-NA12878-no-NC_007605.bam -O hdfs://${HDFS_HOST_PORT}/user/$USER/q4_spark_eval/out/WGS-G94982-NA12878.vcf -R hdfs://${HDFS_HOST_PORT}/user/$USER/q4_spark_eval/human_g1k_v37.fasta --known-sites hdfs://${HDFS_HOST_PORT}/user/$USER/q4_spark_eval/dbsnp_138.b37.vcf.gz -pairHMM AVX_LOGLESS_CACHING --max-reads-per-alignment-start 10" 40 7 20g 8g
time_gatk "ReadsPipelineSpark -I hdfs:///user/$USER/q4_spark_eval/WGS-G94982-NA12878-no-NC_007605.bam -O hdfs://${HDFS_HOST_PORT}/user/$USER/q4_spark_eval/out/WGS-G94982-NA12878.vcf -R hdfs://${HDFS_HOST_PORT}/user/$USER/q4_spark_eval/human_g1k_v37.fasta --known-sites hdfs://${HDFS_HOST_PORT}/user/$USER/q4_spark_eval/dbsnp_138.b37.vcf.gz -pairHMM AVX_LOGLESS_CACHING --maxReadsPerAlignmentStart 10" 40 7 20g 8g
4 changes: 2 additions & 2 deletions scripts/spark_eval/small_md-bqsr-hc_gcs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
. utils.sh

time_gatk "MarkDuplicatesSpark -I gs://hellbender/test/resources/large/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.bam -O hdfs:///user/$USER/small_spark_eval/out/markdups-sharded --sharded-output true" 8 1 4g 4g
time_gatk "BQSRPipelineSpark -I hdfs:///user/$USER/small_spark_eval/out/markdups-sharded -O hdfs:///user/$USER/small_spark_eval/out/bqsr-sharded --sharded-output true -R gs://hellbender/test/resources/large/human_g1k_v37.20.21.2bit --known-sites gs://hellbender/test/resources/large/dbsnp_138.b37.20.21.vcf -L 20:10000000-10100000" 1 8 32g 4g
time_gatk "HaplotypeCallerSpark -I hdfs:///user/$USER/small_spark_eval/out/bqsr-sharded -R gs://hellbender/test/resources/large/human_g1k_v37.20.21.2bit -O gs://broad-spark-eval-test-data/small/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.vcf -pairHMM AVX_LOGLESS_CACHING" 8 1 4g 4g
time_gatk "BQSRPipelineSpark -I hdfs:///user/$USER/small_spark_eval/out/markdups-sharded -O hdfs:///user/$USER/small_spark_eval/out/bqsr-sharded --sharded-output true -R gs://hellbender/test/resources/large/human_g1k_v37.20.21.fasta --known-sites gs://hellbender/test/resources/large/dbsnp_138.b37.20.21.vcf -L 20:10000000-10100000" 1 8 32g 4g
time_gatk "HaplotypeCallerSpark -I hdfs:///user/$USER/small_spark_eval/out/bqsr-sharded -R gs://hellbender/test/resources/large/human_g1k_v37.20.21.fasta -O gs://broad-spark-eval-test-data/small/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.vcf -pairHMM AVX_LOGLESS_CACHING" 8 1 4g 4g
4 changes: 2 additions & 2 deletions scripts/spark_eval/small_md-bqsr-hc_hdfs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@
. utils.sh

time_gatk "MarkDuplicatesSpark -I hdfs:///user/$USER/small_spark_eval/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.bam -O hdfs:///user/$USER/small_spark_eval/out/markdups-sharded --sharded-output true" 8 1 4g 4g
time_gatk "BQSRPipelineSpark -I hdfs:///user/$USER/small_spark_eval/out/markdups-sharded -O hdfs:///user/$USER/small_spark_eval/out/bqsr-sharded --sharded-output true -R hdfs:///user/$USER/small_spark_eval/human_g1k_v37.20.21.2bit --known-sites hdfs://${HDFS_HOST_PORT}/user/$USER/small_spark_eval/dbsnp_138.b37.20.21.vcf -L 20:10000000-10100000" 1 8 32g 4g
time_gatk "HaplotypeCallerSpark -I hdfs:///user/$USER/small_spark_eval/out/bqsr-sharded -R hdfs:///user/$USER/small_spark_eval/human_g1k_v37.20.21.2bit -O hdfs://${HDFS_HOST_PORT}/user/$USER/small_spark_eval/out/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.vcf -pairHMM AVX_LOGLESS_CACHING" 8 1 4g 4g
time_gatk "BQSRPipelineSpark -I hdfs:///user/$USER/small_spark_eval/out/markdups-sharded -O hdfs:///user/$USER/small_spark_eval/out/bqsr-sharded --sharded-output true -R hdfs:///user/$USER/small_spark_eval/human_g1k_v37.20.21.fasta --known-sites hdfs://${HDFS_HOST_PORT}/user/$USER/small_spark_eval/dbsnp_138.b37.20.21.vcf -L 20:10000000-10100000" 1 8 32g 4g
time_gatk "HaplotypeCallerSpark -I hdfs:///user/$USER/small_spark_eval/out/bqsr-sharded -R hdfs:///user/$USER/small_spark_eval/human_g1k_v37.20.21.fasta -O hdfs://${HDFS_HOST_PORT}/user/$USER/small_spark_eval/out/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.vcf -pairHMM AVX_LOGLESS_CACHING" 8 1 4g 4g
2 changes: 1 addition & 1 deletion scripts/spark_eval/small_reads-pipeline_gcs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

. utils.sh

time_gatk "ReadsPipelineSpark -I gs://broad-spark-eval-test-data/small/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.bam -O gs://broad-spark-eval-test-data/small/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.vcf -R gs://broad-spark-eval-test-data/small/human_g1k_v37.20.21.2bit --known-sites gs://broad-spark-eval-test-data/small/dbsnp_138.b37.20.21.vcf -pairHMM AVX_LOGLESS_CACHING --max-reads-per-alignment-start 10" 1 8 32g 4g
time_gatk "ReadsPipelineSpark -I gs://broad-spark-eval-test-data/small/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.bam -O gs://broad-spark-eval-test-data/small/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.vcf -R gs://broad-spark-eval-test-data/small/human_g1k_v37.20.21.fasta --known-sites gs://broad-spark-eval-test-data/small/dbsnp_138.b37.20.21.vcf -pairHMM AVX_LOGLESS_CACHING --maxReadsPerAlignmentStart 10" 1 8 32g 4g
2 changes: 1 addition & 1 deletion scripts/spark_eval/small_reads-pipeline_hdfs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

. utils.sh

time_gatk "ReadsPipelineSpark -I hdfs:///user/$USER/small_spark_eval/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.bam -O hdfs://${HDFS_HOST_PORT}/user/$USER/small_spark_eval/out/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.vcf -R hdfs:///user/$USER/small_spark_eval/human_g1k_v37.20.21.2bit --known-sites hdfs://${HDFS_HOST_PORT}/user/$USER/small_spark_eval/dbsnp_138.b37.20.21.vcf -pairHMM AVX_LOGLESS_CACHING --max-reads-per-alignment-start 10" 1 8 32g 4g
time_gatk "ReadsPipelineSpark -I hdfs:///user/$USER/small_spark_eval/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.bam -O hdfs://${HDFS_HOST_PORT}/user/$USER/small_spark_eval/out/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.vcf -R hdfs:///user/$USER/small_spark_eval/human_g1k_v37.20.21.fasta --known-sites hdfs://${HDFS_HOST_PORT}/user/$USER/small_spark_eval/dbsnp_138.b37.20.21.vcf -pairHMM AVX_LOGLESS_CACHING --maxReadsPerAlignmentStart 10" 1 8 32g 4g

0 comments on commit 8dcf46f

Please sign in to comment.