diff --git a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/ClusterBatch.json.tmpl b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/ClusterBatch.json.tmpl index fe71d0bea..714e889cd 100644 --- a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/ClusterBatch.json.tmpl +++ b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/ClusterBatch.json.tmpl @@ -28,5 +28,6 @@ "ClusterBatch.manta_vcf_tar": "${this.std_manta_vcf_tar}", "ClusterBatch.melt_vcf_tar": "${this.std_melt_vcf_tar}", "ClusterBatch.scramble_vcf_tar": "${this.std_scramble_vcf_tar}", - "ClusterBatch.ped_file": "${workspace.cohort_ped_file}" + "ClusterBatch.ped_file": "${workspace.cohort_ped_file}", + "ClusterBatch.N_IQR_cutoff_plotting": "6" } diff --git a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/FilterBatchSites.json.tmpl b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/FilterBatchSites.json.tmpl index 5761fabb3..77990c7bb 100644 --- a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/FilterBatchSites.json.tmpl +++ b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/FilterBatchSites.json.tmpl @@ -8,5 +8,6 @@ "FilterBatchSites.melt_vcf" : "${this.clustered_melt_vcf}", "FilterBatchSites.scramble_vcf" : "${this.clustered_scramble_vcf}", "FilterBatchSites.evidence_metrics": "${this.metrics}", - "FilterBatchSites.evidence_metrics_common": "${this.metrics_common}" + "FilterBatchSites.evidence_metrics_common": "${this.metrics_common}", + "FilterBatchSites.N_IQR_cutoff_plotting": "6" } diff --git a/inputs/templates/test/ClusterBatch/ClusterBatch.json.tmpl b/inputs/templates/test/ClusterBatch/ClusterBatch.json.tmpl index 8c9acb25b..00225f1f7 100644 --- a/inputs/templates/test/ClusterBatch/ClusterBatch.json.tmpl +++ b/inputs/templates/test/ClusterBatch/ClusterBatch.json.tmpl @@ -29,5 +29,6 @@ "ClusterBatch.wham_vcf_tar": {{ test_batch.std_wham_vcf_tar | tojson }}, "ClusterBatch.manta_vcf_tar": {{ test_batch.std_manta_vcf_tar | tojson }}, "ClusterBatch.melt_vcf_tar": {{ test_batch.std_melt_vcf_tar | tojson }}, - "ClusterBatch.ped_file": {{ test_batch.ped_file | tojson }} + "ClusterBatch.ped_file": {{ test_batch.ped_file | tojson }}, + "ClusterBatch.N_IQR_cutoff_plotting": "6" } diff --git a/inputs/templates/test/FilterBatch/FilterBatchSites.json.tmpl b/inputs/templates/test/FilterBatch/FilterBatchSites.json.tmpl index cb936e511..a5b3a423b 100644 --- a/inputs/templates/test/FilterBatch/FilterBatchSites.json.tmpl +++ b/inputs/templates/test/FilterBatch/FilterBatchSites.json.tmpl @@ -7,5 +7,6 @@ "FilterBatchSites.wham_vcf" : {{ test_batch.merged_wham_vcf | tojson }}, "FilterBatchSites.melt_vcf" : {{ test_batch.merged_melt_vcf | tojson }}, "FilterBatchSites.evidence_metrics": {{ test_batch.evidence_metrics | tojson }}, - "FilterBatchSites.evidence_metrics_common": {{ test_batch.evidence_metrics_common | tojson }} + "FilterBatchSites.evidence_metrics_common": {{ test_batch.evidence_metrics_common | tojson }}, + "FilterBatchSites.N_IQR_cutoff_plotting": "6" } diff --git a/inputs/templates/test/GATKSVPipelineBatch/GATKSVPipelineBatch.json.tmpl b/inputs/templates/test/GATKSVPipelineBatch/GATKSVPipelineBatch.json.tmpl index 6b61270be..4e925a3de 100644 --- a/inputs/templates/test/GATKSVPipelineBatch/GATKSVPipelineBatch.json.tmpl +++ b/inputs/templates/test/GATKSVPipelineBatch/GATKSVPipelineBatch.json.tmpl @@ -94,7 +94,7 @@ "GATKSVPipelineBatch.GATKSVPipelinePhase1.depth_exclude_overlap_fraction": "0.5", "GATKSVPipelineBatch.GATKSVPipelinePhase1.depth_interval_overlap": "0.8", "GATKSVPipelineBatch.GATKSVPipelinePhase1.depth_clustering_algorithm": "SINGLE_LINKAGE", - + "GATKSVPipelineBatch.N_IQR_cutoff_plotting": "6", "GATKSVPipelineBatch.GATKSVPipelinePhase1.BAF_split_size": "10000", "GATKSVPipelineBatch.GATKSVPipelinePhase1.RD_split_size": "10000", "GATKSVPipelineBatch.GATKSVPipelinePhase1.PE_split_size": "10000", @@ -105,7 +105,6 @@ "GATKSVPipelineBatch.outlier_cutoff_table" : {{ test_batch.outlier_cutoff_table | tojson }}, "GATKSVPipelineBatch.GATKSVPipelinePhase1.outlier_cutoff_nIQR": "999999", - "GATKSVPipelineBatch.GenotypeBatch.n_RD_genotype_bins": "100000", "GATKSVPipelineBatch.GenotypeBatch.n_per_split": "5000", "GATKSVPipelineBatch.GenotypeBatch.pesr_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }}, diff --git a/inputs/templates/test/GATKSVPipelinePhase1/GATKSVPipelinePhase1.json.tmpl b/inputs/templates/test/GATKSVPipelinePhase1/GATKSVPipelinePhase1.json.tmpl index c5ec7ce56..471fe461a 100644 --- a/inputs/templates/test/GATKSVPipelinePhase1/GATKSVPipelinePhase1.json.tmpl +++ b/inputs/templates/test/GATKSVPipelinePhase1/GATKSVPipelinePhase1.json.tmpl @@ -45,6 +45,7 @@ "GATKSVPipelinePhase1.outlier_cutoff_table" : {{ test_batch.outlier_cutoff_table | tojson }}, "GATKSVPipelinePhase1.outlier_cutoff_nIQR": "6", + "GATKSVPipelinePhase1.N_IQR_cutoff_plotting": "6", "GATKSVPipelinePhase1.ploidy_sample_psi_scale": "0.001", "GATKSVPipelinePhase1.contig_ploidy_model_tar" : {{ test_batch.contig_ploidy_model_tar | tojson }}, diff --git a/wdl/ClusterBatch.wdl b/wdl/ClusterBatch.wdl index f1ce387c9..e6c55d5ae 100644 --- a/wdl/ClusterBatch.wdl +++ b/wdl/ClusterBatch.wdl @@ -5,6 +5,7 @@ import "DepthClustering.wdl" as depth import "ClusterBatchMetrics.wdl" as metrics import "TasksClusterBatch.wdl" as tasks import "Utils.wdl" as util +import "PlotSVCountsPerSample.wdl" as sv_counts workflow ClusterBatch { input { @@ -48,6 +49,9 @@ workflow ClusterBatch { Int pesr_breakend_window String? pesr_clustering_algorithm + # PlotSVCountsPerSample + Int? N_IQR_cutoff_plotting + # Module metrics parameters # Run module metrics workflow at the end - on by default Boolean? run_module_metrics @@ -81,6 +85,9 @@ workflow ClusterBatch { RuntimeAttr? runtime_attr_gatk_to_svtk_vcf_depth RuntimeAttr? runtime_override_concat_vcfs_depth RuntimeAttr? runtime_attr_exclude_intervals_pesr + RuntimeAttr? runtime_attr_count_svs + RuntimeAttr? runtime_attr_plot_svcounts + RuntimeAttr? runtime_attr_cat_outliers_preview } call util.GetSampleIdsFromVcfTar { @@ -282,6 +289,19 @@ workflow ClusterBatch { } } + if (defined(N_IQR_cutoff_plotting)){ + call sv_counts.PlotSVCountsPerSample { + input: + prefix = batch, + vcfs = [ClusterDepth.clustered_vcf, ClusterPESR_manta.clustered_vcf, ClusterPESR_wham.clustered_vcf, ClusterPESR_melt.clustered_vcf, ClusterPESR_scramble.clustered_vcf], + N_IQR_cutoff = select_first([N_IQR_cutoff_plotting]), + sv_pipeline_docker = sv_pipeline_docker, + runtime_attr_count_svs = runtime_attr_count_svs, + runtime_attr_plot_svcounts = runtime_attr_plot_svcounts, + runtime_attr_cat_outliers_preview = runtime_attr_cat_outliers_preview + } + } + output { File clustered_depth_vcf = ClusterDepth.clustered_vcf File clustered_depth_vcf_index = ClusterDepth.clustered_vcf_index @@ -293,7 +313,13 @@ workflow ClusterBatch { File? clustered_melt_vcf_index = ClusterPESR_melt.clustered_vcf_index File? clustered_scramble_vcf = ClusterPESR_scramble.clustered_vcf File? clustered_scramble_vcf_index = ClusterPESR_scramble.clustered_vcf_index - + Array[File]? clustered_sv_counts = PlotSVCountsPerSample.sv_counts + Array[File]? clustered_sv_count_plots = PlotSVCountsPerSample.sv_count_plots + File? clustered_outlier_samples_preview = PlotSVCountsPerSample.outlier_samples_preview + File? clustered_outlier_samples_with_reason = PlotSVCountsPerSample.outlier_samples_with_reason + Int? clustered_num_outlier_samples = PlotSVCountsPerSample.num_outlier_samples File? metrics_file_clusterbatch = ClusterBatchMetrics.metrics_file } -} + + +} \ No newline at end of file diff --git a/wdl/FilterBatchSites.wdl b/wdl/FilterBatchSites.wdl index 633486bcb..031204e6c 100644 --- a/wdl/FilterBatchSites.wdl +++ b/wdl/FilterBatchSites.wdl @@ -1,6 +1,7 @@ version 1.0 import "Structs.wdl" +import "PlotSVCountsPerSample.wdl" as sv_counts workflow FilterBatchSites { input { @@ -12,13 +13,19 @@ workflow FilterBatchSites { File? depth_vcf File evidence_metrics File evidence_metrics_common - String sv_pipeline_docker + + # PlotSVCountsPerSample metrics + Int N_IQR_cutoff_plotting = 6 + RuntimeAttr? runtime_attr_adjudicate RuntimeAttr? runtime_attr_rewrite_scores RuntimeAttr? runtime_attr_filter_annotate_vcf RuntimeAttr? runtime_attr_merge_pesr_vcfs - + RuntimeAttr? runtime_attr_count_svs + RuntimeAttr? runtime_attr_plot_svcounts + RuntimeAttr? runtime_attr_cat_outliers_preview + } Array[String] algorithms = ["manta", "wham", "melt", "scramble", "depth"] @@ -58,6 +65,17 @@ workflow FilterBatchSites { } } + call sv_counts.PlotSVCountsPerSample { + input: + prefix = batch, + vcfs=[FilterAnnotateVcf.annotated_vcf[0], FilterAnnotateVcf.annotated_vcf[1], FilterAnnotateVcf.annotated_vcf[2], FilterAnnotateVcf.annotated_vcf[3], FilterAnnotateVcf.annotated_vcf[4]], + N_IQR_cutoff = N_IQR_cutoff_plotting, + sv_pipeline_docker = sv_pipeline_docker, + runtime_attr_count_svs = runtime_attr_count_svs, + runtime_attr_plot_svcounts = runtime_attr_plot_svcounts, + runtime_attr_cat_outliers_preview = runtime_attr_cat_outliers_preview + } + output { File? sites_filtered_manta_vcf = FilterAnnotateVcf.annotated_vcf[0] File? sites_filtered_wham_vcf = FilterAnnotateVcf.annotated_vcf[1] @@ -67,7 +85,13 @@ workflow FilterBatchSites { File cutoffs = AdjudicateSV.cutoffs File scores = RewriteScores.updated_scores File RF_intermediate_files = AdjudicateSV.RF_intermediate_files + Array[File] sites_filtered_sv_counts = PlotSVCountsPerSample.sv_counts + Array[File] sites_filtered_sv_count_plots = PlotSVCountsPerSample.sv_count_plots + File sites_filtered_outlier_samples_preview = PlotSVCountsPerSample.outlier_samples_preview + File sites_filtered_outlier_samples_with_reason = PlotSVCountsPerSample.outlier_samples_with_reason + Int sites_filtered_num_outlier_samples = PlotSVCountsPerSample.num_outlier_samples } + } task AdjudicateSV { @@ -79,7 +103,7 @@ task AdjudicateSV { } RuntimeAttr default_attr = object { - cpu_cores: 1, + cpu_cores: 1, mem_gb: 3.75, disk_gb: 10, boot_disk_gb: 10, @@ -101,7 +125,7 @@ task AdjudicateSV { mv *_trainable.txt ~{batch}.RF_intermediate_files/ mv *_testable.txt ~{batch}.RF_intermediate_files/ tar -czvf ~{batch}.RF_intermediate_files.tar.gz ~{batch}.RF_intermediate_files - + >>> runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) @@ -125,7 +149,7 @@ task RewriteScores { } RuntimeAttr default_attr = object { - cpu_cores: 1, + cpu_cores: 1, mem_gb: 3.75, disk_gb: 10, boot_disk_gb: 10, @@ -145,7 +169,7 @@ task RewriteScores { -m ~{metrics} \ -s ~{scores} \ -o ~{batch}.updated_scores - + >>> runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) @@ -170,7 +194,7 @@ task FilterAnnotateVcf { } RuntimeAttr default_attr = object { - cpu_cores: 1, + cpu_cores: 1, mem_gb: 3.75, disk_gb: 10, boot_disk_gb: 10, @@ -200,7 +224,7 @@ task FilterAnnotateVcf { /opt/sv-pipeline/03_variant_filtering/scripts/annotate_RF_evidence.py filtered.corrected_coords.vcf.gz ~{scores} ~{prefix}.with_evidence.vcf bgzip ~{prefix}.with_evidence.vcf - + >>> runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) @@ -212,5 +236,4 @@ task FilterAnnotateVcf { maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) } -} - +} \ No newline at end of file diff --git a/wdl/GATKSVPipelineBatch.wdl b/wdl/GATKSVPipelineBatch.wdl index 9b4d20678..077b13100 100644 --- a/wdl/GATKSVPipelineBatch.wdl +++ b/wdl/GATKSVPipelineBatch.wdl @@ -63,6 +63,9 @@ workflow GATKSVPipelineBatch { File contig_ploidy_model_tar Array[File] gcnv_model_tars + # PlotSVCountsPerSample metrics from ClusterBatch in GATKSVPipelinePhase1 + Int? N_IQR_cutoff_plotting + File? outlier_cutoff_table File qc_definitions @@ -204,6 +207,7 @@ workflow GATKSVPipelineBatch { counts=counts_files_, bincov_matrix=EvidenceQC.bincov_matrix, bincov_matrix_index=EvidenceQC.bincov_matrix_index, + N_IQR_cutoff_plotting = N_IQR_cutoff_plotting, PE_files=pe_files_, SR_files=sr_files_, SD_files=sd_files_, @@ -211,6 +215,7 @@ workflow GATKSVPipelineBatch { melt_vcfs=melt_vcfs_, scramble_vcfs=scramble_vcfs_, wham_vcfs=wham_vcfs_, + cnmops_chrom_file=autosome_file, cnmops_allo_file=allosome_file, allosome_contigs=allosome_file, @@ -420,6 +425,11 @@ workflow GATKSVPipelineBatch { File? merged_melt_vcf_index = GATKSVPipelinePhase1.melt_vcf_index File? merged_wham_vcf = GATKSVPipelinePhase1.wham_vcf File? merged_wham_vcf_index = GATKSVPipelinePhase1.wham_vcf_index + Array[File] ?clustered_sv_counts = GATKSVPipelinePhase1.clustered_sv_counts + Array[File]? clustered_sv_count_plots = GATKSVPipelinePhase1.clustered_sv_count_plots + File? clustered_outlier_samples_preview = GATKSVPipelinePhase1.clustered_outlier_samples_preview + File? clustered_outlier_samples_with_reason = GATKSVPipelinePhase1.clustered_outlier_samples_with_reason + Int? clustered_num_outlier_samples = GATKSVPipelinePhase1.clustered_num_outlier_samples File evidence_metrics = GATKSVPipelinePhase1.evidence_metrics File evidence_metrics_common = GATKSVPipelinePhase1.evidence_metrics_common @@ -432,7 +442,6 @@ workflow GATKSVPipelineBatch { File? sites_filtered_wham_vcf = GATKSVPipelinePhase1.sites_filtered_wham_vcf File? sites_filtered_melt_vcf = GATKSVPipelinePhase1.sites_filtered_melt_vcf File? sites_filtered_depth_vcf = GATKSVPipelinePhase1.sites_filtered_depth_vcf - File cutoffs = GATKSVPipelinePhase1.cutoffs File genotyped_pesr_vcf = GenotypeBatch.genotyped_pesr_vcf File genotyped_depth_vcf = GenotypeBatch.genotyped_depth_vcf diff --git a/wdl/GATKSVPipelinePhase1.wdl b/wdl/GATKSVPipelinePhase1.wdl index ab20065b1..3aa03f739 100644 --- a/wdl/GATKSVPipelinePhase1.wdl +++ b/wdl/GATKSVPipelinePhase1.wdl @@ -160,6 +160,8 @@ workflow GATKSVPipelinePhase1 { Int pesr_breakend_window String? pesr_clustering_algorithm + Int? N_IQR_cutoff_plotting + File? baseline_depth_vcf_cluster_batch File? baseline_manta_vcf_cluster_batch File? baseline_wham_vcf_cluster_batch @@ -183,6 +185,9 @@ workflow GATKSVPipelinePhase1 { RuntimeAttr? runtime_attr_gatk_to_svtk_vcf_depth_cluster_batch RuntimeAttr? runtime_override_concat_vcfs_depth_cluster_batch RuntimeAttr? runtime_attr_exclude_intervals_pesr_cluster_batch + RuntimeAttr? runtime_attr_count_svs + RuntimeAttr? runtime_attr_plot_svcounts + RuntimeAttr? runtime_attr_cat_outliers_preview ############################################################ ## GenerateBatchMetrics @@ -358,6 +363,7 @@ workflow GATKSVPipelinePhase1 { pesr_interval_overlap=pesr_interval_overlap, pesr_breakend_window=pesr_breakend_window, pesr_clustering_algorithm=pesr_clustering_algorithm, + N_IQR_cutoff_plotting = N_IQR_cutoff_plotting, run_module_metrics=run_clusterbatch_metrics, linux_docker=linux_docker, sv_pipeline_base_docker=sv_pipeline_base_docker, @@ -384,7 +390,10 @@ workflow GATKSVPipelinePhase1 { runtime_attr_svcluster_depth=runtime_attr_svcluster_depth_cluster_batch, runtime_attr_gatk_to_svtk_vcf_depth=runtime_attr_gatk_to_svtk_vcf_depth_cluster_batch, runtime_override_concat_vcfs_depth=runtime_override_concat_vcfs_depth_cluster_batch, - runtime_attr_exclude_intervals_pesr=runtime_attr_exclude_intervals_pesr_cluster_batch + runtime_attr_exclude_intervals_pesr=runtime_attr_exclude_intervals_pesr_cluster_batch, + runtime_attr_count_svs = runtime_attr_count_svs, + runtime_attr_plot_svcounts = runtime_attr_plot_svcounts, + runtime_attr_cat_outliers_preview = runtime_attr_cat_outliers_preview } call batchmetrics.GenerateBatchMetrics as GenerateBatchMetrics { @@ -500,6 +509,11 @@ workflow GATKSVPipelinePhase1 { File? melt_vcf_index = ClusterBatch.clustered_melt_vcf_index File? scramble_vcf = ClusterBatch.clustered_scramble_vcf File? scramble_vcf_index = ClusterBatch.clustered_scramble_vcf_index + Array[File]? clustered_sv_counts = ClusterBatch.clustered_sv_counts + Array[File]? clustered_sv_count_plots = ClusterBatch.clustered_sv_count_plots + File? clustered_outlier_samples_preview = ClusterBatch.clustered_outlier_samples_preview + File? clustered_outlier_samples_with_reason = ClusterBatch.clustered_outlier_samples_with_reason + Int? clustered_num_outlier_samples = ClusterBatch.clustered_num_outlier_samples File? metrics_file_clusterbatch = ClusterBatch.metrics_file_clusterbatch