Skip to content

Commit

Permalink
Added separate allele-count thresholds for the normal and tumor in Mo…
Browse files Browse the repository at this point in the history
…delSegments. (#5556)
  • Loading branch information
samuelklee authored Jan 10, 2019
1 parent f9a2e5c commit c3e9818
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 22 deletions.
25 changes: 17 additions & 8 deletions scripts/cnv_wdl/somatic/cnv_somatic_pair_workflow.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ workflow CNVSomaticPairWorkflow {
##############################################
Int? max_num_segments_per_chromosome
Int? min_total_allele_count
Int? min_total_allele_count_normal
Float? genotyping_homozygous_log_ratio_threshold
Float? genotyping_base_error_rate
Float? kernel_variance_copy_ratio
Expand Down Expand Up @@ -218,6 +219,7 @@ workflow CNVSomaticPairWorkflow {
normal_allelic_counts = CollectAllelicCountsNormal.allelic_counts,
max_num_segments_per_chromosome = max_num_segments_per_chromosome,
min_total_allele_count = min_total_allele_count,
min_total_allele_count_normal = min_total_allele_count_normal,
genotyping_homozygous_log_ratio_threshold = genotyping_homozygous_log_ratio_threshold,
genotyping_base_error_rate = genotyping_base_error_rate,
kernel_variance_copy_ratio = kernel_variance_copy_ratio,
Expand Down Expand Up @@ -345,7 +347,7 @@ workflow CNVSomaticPairWorkflow {
denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios,
allelic_counts = CollectAllelicCountsNormal.allelic_counts,
max_num_segments_per_chromosome = max_num_segments_per_chromosome,
min_total_allele_count = min_total_allele_count,
min_total_allele_count = min_total_allele_count_normal,
genotyping_homozygous_log_ratio_threshold = genotyping_homozygous_log_ratio_threshold,
genotyping_base_error_rate = genotyping_base_error_rate,
kernel_variance_copy_ratio = kernel_variance_copy_ratio,
Expand Down Expand Up @@ -542,6 +544,7 @@ task ModelSegments {
File? normal_allelic_counts
Int? max_num_segments_per_chromosome
Int? min_total_allele_count
Int? min_total_allele_count_normal
Float? genotyping_homozygous_log_ratio_threshold
Float? genotyping_base_error_rate
Float? kernel_variance_copy_ratio
Expand Down Expand Up @@ -577,6 +580,11 @@ task ModelSegments {
# If optional output_dir not specified, use "out"
String output_dir_ = select_first([output_dir, "out"])

# default values are min_total_allele_count_ = 0 in matched-normal mode
# = 30 in case-only mode
Int default_min_total_allele_count = if defined(normal_allelic_counts) then 0 else 30
Int min_total_allele_count_ = select_first([min_total_allele_count, default_min_total_allele_count])

command <<<
set -e
mkdir ${output_dir_}
Expand All @@ -586,7 +594,8 @@ task ModelSegments {
--denoised-copy-ratios ${denoised_copy_ratios} \
--allelic-counts ${allelic_counts} \
${"--normal-allelic-counts " + normal_allelic_counts} \
--minimum-total-allele-count ${default="30" min_total_allele_count} \
--minimum-total-allele-count-case ${min_total_allele_count_} \
--minimum-total-allele-count-normal ${default="30" min_total_allele_count_normal} \
--genotyping-homozygous-log-ratio-threshold ${default="-10.0" genotyping_homozygous_log_ratio_threshold} \
--genotyping-base-error-rate ${default="0.05" genotyping_base_error_rate} \
--maximum-number-of-segments-per-chromosome ${default="1000" max_num_segments_per_chromosome} \
Expand All @@ -597,14 +606,14 @@ task ModelSegments {
--window-size ${sep=" --window-size " window_sizes} \
--number-of-changepoints-penalty-factor ${default="1.0" num_changepoints_penalty_factor} \
--minor-allele-fraction-prior-alpha ${default="25.0" minor_allele_fraction_prior_alpha} \
--number-of-samples-copy-ratio ${default=100 num_samples_copy_ratio} \
--number-of-burn-in-samples-copy-ratio ${default=50 num_burn_in_copy_ratio} \
--number-of-samples-allele-fraction ${default=100 num_samples_allele_fraction} \
--number-of-burn-in-samples-allele-fraction ${default=50 num_burn_in_allele_fraction} \
--number-of-samples-copy-ratio ${default="100" num_samples_copy_ratio} \
--number-of-burn-in-samples-copy-ratio ${default="50" num_burn_in_copy_ratio} \
--number-of-samples-allele-fraction ${default="100" num_samples_allele_fraction} \
--number-of-burn-in-samples-allele-fraction ${default="50" num_burn_in_allele_fraction} \
--smoothing-credible-interval-threshold-copy-ratio ${default="2.0" smoothing_threshold_copy_ratio} \
--smoothing-credible-interval-threshold-allele-fraction ${default="2.0" smoothing_threshold_allele_fraction} \
--maximum-number-of-smoothing-iterations ${default=10 max_num_smoothing_iterations} \
--number-of-smoothing-iterations-per-fit ${default=0 num_smoothing_iterations_per_fit} \
--maximum-number-of-smoothing-iterations ${default="10" max_num_smoothing_iterations} \
--number-of-smoothing-iterations-per-fit ${default="0" num_smoothing_iterations_per_fit} \
--output ${output_dir_} \
--output-prefix ${entity_id}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,8 @@ public final class ModelSegments extends CommandLineProgram {
public static final String ALLELE_FRACTION_LEGACY_SEGMENTS_FILE_SUFFIX = ".af.igv" + SEGMENTS_FILE_SUFFIX;

//het genotyping argument names
public static final String MINIMUM_TOTAL_ALLELE_COUNT_LONG_NAME = "minimum-total-allele-count";
public static final String MINIMUM_TOTAL_ALLELE_COUNT_CASE_LONG_NAME = "minimum-total-allele-count-case";
public static final String MINIMUM_TOTAL_ALLELE_COUNT_NORMAL_LONG_NAME = "minimum-total-allele-count-normal";
public static final String GENOTYPING_HOMOZYGOUS_LOG_RATIO_THRESHOLD_LONG_NAME = "genotyping-homozygous-log-ratio-threshold";
public static final String GENOTYPING_BASE_ERROR_RATE_LONG_NAME = "genotyping-base-error-rate";

Expand All @@ -248,8 +249,8 @@ public final class ModelSegments extends CommandLineProgram {
public static final String MINOR_ALLELE_FRACTION_PRIOR_ALPHA_LONG_NAME = "minor-allele-fraction-prior-alpha";
public static final String NUMBER_OF_SAMPLES_COPY_RATIO_LONG_NAME = "number-of-samples-copy-ratio";
public static final String NUMBER_OF_BURN_IN_SAMPLES_COPY_RATIO_LONG_NAME = "number-of-burn-in-samples-copy-ratio";
public static final String NUM_SAMPLES_ALLELE_FRACTION_LONG_NAME = "number-of-samples-allele-fraction";
public static final String NUM_BURN_IN_ALLELE_FRACTION_LONG_NAME = "number-of-burn-in-samples-allele-fraction";
public static final String NUMBER_OF_SAMPLES_ALLELE_FRACTION_LONG_NAME = "number-of-samples-allele-fraction";
public static final String NUMBER_OF_BURN_IN_SAMPLES_ALLELE_FRACTION_LONG_NAME = "number-of-burn-in-samples-allele-fraction";

//smoothing argument names
public static final String SMOOTHING_CREDIBLE_INTERVAL_THRESHOLD_COPY_RATIO_LONG_NAME = "smoothing-credible-interval-threshold-copy-ratio";
Expand Down Expand Up @@ -292,12 +293,22 @@ public final class ModelSegments extends CommandLineProgram {
private String outputDir;

@Argument(
doc = "Minimum total count for filtering allelic counts, if available.",
fullName = MINIMUM_TOTAL_ALLELE_COUNT_LONG_NAME,
doc = "Minimum total count for filtering allelic counts in the case sample, if available. " +
"The default value of zero is appropriate for matched-normal mode; " +
"increase to an appropriate value for case-only mode.",
fullName = MINIMUM_TOTAL_ALLELE_COUNT_CASE_LONG_NAME,
minValue = 0,
optional = true
)
private int minTotalAlleleCount = 30;
private int minTotalAlleleCountCase = 0;

@Argument(
doc = "Minimum total count for filtering allelic counts in the matched-normal sample, if available.",
fullName = MINIMUM_TOTAL_ALLELE_COUNT_NORMAL_LONG_NAME,
minValue = 0,
optional = true
)
private int minTotalAlleleCountNormal = 30;

@Argument(
doc = "Log-ratio threshold for genotyping and filtering homozygous allelic counts, if available. " +
Expand Down Expand Up @@ -414,15 +425,15 @@ public final class ModelSegments extends CommandLineProgram {

@Argument(
doc = "Total number of MCMC samples for allele-fraction model.",
fullName = NUM_SAMPLES_ALLELE_FRACTION_LONG_NAME,
fullName = NUMBER_OF_SAMPLES_ALLELE_FRACTION_LONG_NAME,
optional = true,
minValue = 1
)
private int numSamplesAlleleFraction = 100;

@Argument(
doc = "Number of burn-in samples to discard for allele-fraction model.",
fullName = NUM_BURN_IN_ALLELE_FRACTION_LONG_NAME,
fullName = NUMBER_OF_BURN_IN_SAMPLES_ALLELE_FRACTION_LONG_NAME,
optional = true,
minValue = 0
)
Expand Down Expand Up @@ -619,12 +630,14 @@ private AllelicCountCollection genotypeHets(final SampleLocatableMetadata metada

logger.info("Genotyping heterozygous sites from available allelic counts...");

AllelicCountCollection filteredAllelicCounts = allelicCounts;

//filter on total count in case sample
logger.info(String.format("Filtering allelic counts with total count less than %d...", minTotalAlleleCount));
AllelicCountCollection filteredAllelicCounts = new AllelicCountCollection(
logger.info(String.format("Filtering allelic counts with total count less than %d...", minTotalAlleleCountCase));
filteredAllelicCounts = new AllelicCountCollection(
metadata,
allelicCounts.getRecords().stream()
.filter(ac -> ac.getTotalReadCount() >= minTotalAlleleCount)
filteredAllelicCounts.getRecords().stream()
.filter(ac -> ac.getTotalReadCount() >= minTotalAlleleCountCase)
.collect(Collectors.toList()));
logger.info(String.format("Retained %d / %d sites after filtering on total count...",
filteredAllelicCounts.size(), allelicCounts.size()));
Expand All @@ -645,6 +658,7 @@ private AllelicCountCollection genotypeHets(final SampleLocatableMetadata metada
if (normalAllelicCounts == null) {
//filter on homozygosity in case sample
logger.info("No matched normal was provided, not running in matched-normal mode...");

logger.info("Performing binomial testing and filtering homozygous allelic counts...");
hetAllelicCounts = new AllelicCountCollection(
metadata,
Expand Down Expand Up @@ -672,11 +686,11 @@ private AllelicCountCollection genotypeHets(final SampleLocatableMetadata metada
}

//filter on total count in matched normal
logger.info(String.format("Filtering allelic counts in matched normal with total count less than %d...", minTotalAlleleCount));
logger.info(String.format("Filtering allelic counts in matched normal with total count less than %d...", minTotalAlleleCountNormal));
AllelicCountCollection filteredNormalAllelicCounts = new AllelicCountCollection(
normalMetadata,
normalAllelicCounts.getRecords().stream()
.filter(ac -> ac.getTotalReadCount() >= minTotalAlleleCount)
.filter(ac -> ac.getTotalReadCount() >= minTotalAlleleCountNormal)
.collect(Collectors.toList()));
logger.info(String.format("Retained %d / %d sites in matched normal after filtering on total count...",
filteredNormalAllelicCounts.size(), normalAllelicCounts.size()));
Expand Down

0 comments on commit c3e9818

Please sign in to comment.