broadinstitute · samuelklee · Jan 10, 2019 · Dec 21, 2018 · LeeTL1220 · Jan 10, 2019
diff --git a/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl b/scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl
@@ -90,6 +90,7 @@ workflow CNVSomaticPairWorkflow {
     ##############################################
     Int? max_num_segments_per_chromosome
     Int? min_total_allele_count
+    Int? min_total_allele_count_normal
     Float? genotyping_homozygous_log_ratio_threshold
     Float? genotyping_base_error_rate
     Float? kernel_variance_copy_ratio
@@ -218,6 +219,7 @@ workflow CNVSomaticPairWorkflow {
             normal_allelic_counts = CollectAllelicCountsNormal.allelic_counts,
             max_num_segments_per_chromosome = max_num_segments_per_chromosome,
             min_total_allele_count = min_total_allele_count,
+            min_total_allele_count_normal = min_total_allele_count_normal,
             genotyping_homozygous_log_ratio_threshold = genotyping_homozygous_log_ratio_threshold,
             genotyping_base_error_rate = genotyping_base_error_rate,
             kernel_variance_copy_ratio = kernel_variance_copy_ratio,
@@ -345,7 +347,7 @@ workflow CNVSomaticPairWorkflow {
                 denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios,
                 allelic_counts = CollectAllelicCountsNormal.allelic_counts,
                 max_num_segments_per_chromosome = max_num_segments_per_chromosome,
-                min_total_allele_count = min_total_allele_count,
+                min_total_allele_count = min_total_allele_count_normal,
                 genotyping_homozygous_log_ratio_threshold = genotyping_homozygous_log_ratio_threshold,
                 genotyping_base_error_rate = genotyping_base_error_rate,
                 kernel_variance_copy_ratio = kernel_variance_copy_ratio,
@@ -542,6 +544,7 @@ task ModelSegments {
     File? normal_allelic_counts
     Int? max_num_segments_per_chromosome
     Int? min_total_allele_count
+    Int? min_total_allele_count_normal
     Float? genotyping_homozygous_log_ratio_threshold
     Float? genotyping_base_error_rate
     Float? kernel_variance_copy_ratio
@@ -577,6 +580,11 @@ task ModelSegments {
     # If optional output_dir not specified, use "out"
     String output_dir_ = select_first([output_dir, "out"])
 
+    # default values are min_total_allele_count_ = 0 in matched-normal mode
+    #                                            = 30 in case-only mode
+    Int default_min_total_allele_count = if defined(normal_allelic_counts) then 0 else 30
+    Int min_total_allele_count_ = select_first([min_total_allele_count, default_min_total_allele_count])
+
     command <<<
         set -e
         mkdir ${output_dir_}
@@ -586,7 +594,8 @@ task ModelSegments {
             --denoised-copy-ratios ${denoised_copy_ratios} \
             --allelic-counts ${allelic_counts} \
             ${"--normal-allelic-counts " + normal_allelic_counts} \
-            --minimum-total-allele-count ${default="30" min_total_allele_count} \
+            --minimum-total-allele-count-case ${min_total_allele_count_} \
+            --minimum-total-allele-count-normal ${default="30" min_total_allele_count_normal} \
             --genotyping-homozygous-log-ratio-threshold ${default="-10.0" genotyping_homozygous_log_ratio_threshold} \
             --genotyping-base-error-rate ${default="0.05" genotyping_base_error_rate} \
             --maximum-number-of-segments-per-chromosome ${default="1000" max_num_segments_per_chromosome} \
@@ -597,14 +606,14 @@ task ModelSegments {
             --window-size ${sep=" --window-size " window_sizes} \
             --number-of-changepoints-penalty-factor ${default="1.0" num_changepoints_penalty_factor} \
             --minor-allele-fraction-prior-alpha ${default="25.0" minor_allele_fraction_prior_alpha} \
-            --number-of-samples-copy-ratio ${default=100 num_samples_copy_ratio} \
-            --number-of-burn-in-samples-copy-ratio ${default=50 num_burn_in_copy_ratio} \
-            --number-of-samples-allele-fraction ${default=100 num_samples_allele_fraction} \
-            --number-of-burn-in-samples-allele-fraction ${default=50 num_burn_in_allele_fraction} \
+            --number-of-samples-copy-ratio ${default="100" num_samples_copy_ratio} \
+            --number-of-burn-in-samples-copy-ratio ${default="50" num_burn_in_copy_ratio} \
+            --number-of-samples-allele-fraction ${default="100" num_samples_allele_fraction} \
+            --number-of-burn-in-samples-allele-fraction ${default="50" num_burn_in_allele_fraction} \
             --smoothing-credible-interval-threshold-copy-ratio ${default="2.0" smoothing_threshold_copy_ratio} \
             --smoothing-credible-interval-threshold-allele-fraction ${default="2.0" smoothing_threshold_allele_fraction} \
-            --maximum-number-of-smoothing-iterations ${default=10 max_num_smoothing_iterations} \
-            --number-of-smoothing-iterations-per-fit ${default=0 num_smoothing_iterations_per_fit} \
+            --maximum-number-of-smoothing-iterations ${default="10" max_num_smoothing_iterations} \
+            --number-of-smoothing-iterations-per-fit ${default="0" num_smoothing_iterations_per_fit} \
             --output ${output_dir_} \
             --output-prefix ${entity_id}
 

diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/ModelSegments.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/ModelSegments.java
@@ -231,7 +231,8 @@ public final class ModelSegments extends CommandLineProgram {
     public static final String ALLELE_FRACTION_LEGACY_SEGMENTS_FILE_SUFFIX = ".af.igv" + SEGMENTS_FILE_SUFFIX;
 
     //het genotyping argument names
-    public static final String MINIMUM_TOTAL_ALLELE_COUNT_LONG_NAME = "minimum-total-allele-count";
+    public static final String MINIMUM_TOTAL_ALLELE_COUNT_CASE_LONG_NAME = "minimum-total-allele-count-case";
+    public static final String MINIMUM_TOTAL_ALLELE_COUNT_NORMAL_LONG_NAME = "minimum-total-allele-count-normal";
     public static final String GENOTYPING_HOMOZYGOUS_LOG_RATIO_THRESHOLD_LONG_NAME = "genotyping-homozygous-log-ratio-threshold";
     public static final String GENOTYPING_BASE_ERROR_RATE_LONG_NAME = "genotyping-base-error-rate";
 
@@ -248,8 +249,8 @@ public final class ModelSegments extends CommandLineProgram {
     public static final String MINOR_ALLELE_FRACTION_PRIOR_ALPHA_LONG_NAME = "minor-allele-fraction-prior-alpha";
     public static final String NUMBER_OF_SAMPLES_COPY_RATIO_LONG_NAME = "number-of-samples-copy-ratio";
     public static final String NUMBER_OF_BURN_IN_SAMPLES_COPY_RATIO_LONG_NAME = "number-of-burn-in-samples-copy-ratio";
-    public static final String NUM_SAMPLES_ALLELE_FRACTION_LONG_NAME = "number-of-samples-allele-fraction";
-    public static final String NUM_BURN_IN_ALLELE_FRACTION_LONG_NAME = "number-of-burn-in-samples-allele-fraction";
+    public static final String NUMBER_OF_SAMPLES_ALLELE_FRACTION_LONG_NAME = "number-of-samples-allele-fraction";
+    public static final String NUMBER_OF_BURN_IN_SAMPLES_ALLELE_FRACTION_LONG_NAME = "number-of-burn-in-samples-allele-fraction";
 
     //smoothing argument names
     public static final String SMOOTHING_CREDIBLE_INTERVAL_THRESHOLD_COPY_RATIO_LONG_NAME = "smoothing-credible-interval-threshold-copy-ratio";
@@ -292,12 +293,22 @@ public final class ModelSegments extends CommandLineProgram {
     private String outputDir;
 
     @Argument(
-            doc = "Minimum total count for filtering allelic counts, if available.",
-            fullName = MINIMUM_TOTAL_ALLELE_COUNT_LONG_NAME,
+            doc = "Minimum total count for filtering allelic counts in the case sample, if available.  " +
+                    "The default value of zero is appropriate for matched-normal mode; " +
+                    "increase to an appropriate value for case-only mode.",
+            fullName = MINIMUM_TOTAL_ALLELE_COUNT_CASE_LONG_NAME,
             minValue = 0,
             optional = true
     )
-    private int minTotalAlleleCount = 30;
+    private int minTotalAlleleCountCase = 0;
+
+    @Argument(
+            doc = "Minimum total count for filtering allelic counts in the matched-normal sample, if available.",
+            fullName = MINIMUM_TOTAL_ALLELE_COUNT_NORMAL_LONG_NAME,
+            minValue = 0,
+            optional = true
+    )
+    private int minTotalAlleleCountNormal = 30;
 
     @Argument(
             doc = "Log-ratio threshold for genotyping and filtering homozygous allelic counts, if available.  " +
@@ -414,15 +425,15 @@ public final class ModelSegments extends CommandLineProgram {
 
     @Argument(
             doc = "Total number of MCMC samples for allele-fraction model.",
-            fullName = NUM_SAMPLES_ALLELE_FRACTION_LONG_NAME,
+            fullName = NUMBER_OF_SAMPLES_ALLELE_FRACTION_LONG_NAME,
             optional = true,
             minValue = 1
     )
     private int numSamplesAlleleFraction = 100;
 
     @Argument(
             doc = "Number of burn-in samples to discard for allele-fraction model.",
-            fullName = NUM_BURN_IN_ALLELE_FRACTION_LONG_NAME,
+            fullName = NUMBER_OF_BURN_IN_SAMPLES_ALLELE_FRACTION_LONG_NAME,
             optional = true,
             minValue = 0
     )
@@ -619,12 +630,14 @@ private AllelicCountCollection genotypeHets(final SampleLocatableMetadata metada
 
         logger.info("Genotyping heterozygous sites from available allelic counts...");
 
+        AllelicCountCollection filteredAllelicCounts = allelicCounts;
+
         //filter on total count in case sample
-        logger.info(String.format("Filtering allelic counts with total count less than %d...", minTotalAlleleCount));
-        AllelicCountCollection filteredAllelicCounts = new AllelicCountCollection(
+        logger.info(String.format("Filtering allelic counts with total count less than %d...", minTotalAlleleCountCase));
+        filteredAllelicCounts = new AllelicCountCollection(
                 metadata,
-                allelicCounts.getRecords().stream()
-                        .filter(ac -> ac.getTotalReadCount() >= minTotalAlleleCount)
+                filteredAllelicCounts.getRecords().stream()
+                        .filter(ac -> ac.getTotalReadCount() >= minTotalAlleleCountCase)
                         .collect(Collectors.toList()));
         logger.info(String.format("Retained %d / %d sites after filtering on total count...",
                 filteredAllelicCounts.size(), allelicCounts.size()));
@@ -645,6 +658,7 @@ private AllelicCountCollection genotypeHets(final SampleLocatableMetadata metada
         if (normalAllelicCounts == null) {
             //filter on homozygosity in case sample
             logger.info("No matched normal was provided, not running in matched-normal mode...");
+
             logger.info("Performing binomial testing and filtering homozygous allelic counts...");
             hetAllelicCounts = new AllelicCountCollection(
                     metadata,
@@ -672,11 +686,11 @@ private AllelicCountCollection genotypeHets(final SampleLocatableMetadata metada
             }
 
             //filter on total count in matched normal
-            logger.info(String.format("Filtering allelic counts in matched normal with total count less than %d...", minTotalAlleleCount));
+            logger.info(String.format("Filtering allelic counts in matched normal with total count less than %d...", minTotalAlleleCountNormal));
             AllelicCountCollection filteredNormalAllelicCounts = new AllelicCountCollection(
                     normalMetadata,
                     normalAllelicCounts.getRecords().stream()
-                            .filter(ac -> ac.getTotalReadCount() >= minTotalAlleleCount)
+                            .filter(ac -> ac.getTotalReadCount() >= minTotalAlleleCountNormal)
                             .collect(Collectors.toList()));
             logger.info(String.format("Retained %d / %d sites in matched normal after filtering on total count...",
                     filteredNormalAllelicCounts.size(), normalAllelicCounts.size()));