Skip to content

Commit

Permalink
finished responding to comments
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesemery committed Mar 4, 2022
1 parent cd103b2 commit addef19
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -499,21 +499,9 @@ static Set<Haplotype> filterPileupHaplotypes(final List<Haplotype> onlyNewHaplot
final Map<Kmer, Integer> kmerReadCounts,
final int numPileupHaplotypes,
final int kmerSize ) {
// // make sure we're supposed to look for high entropy
// if ( lookForMismatchEntropy &&
// pileup.getNumberOfElements() >= minReadsAtLocus &&
// (double)mismatchQualities / (double)totalQualities >= mismatchThreshold )
// hasPointEvent = true;

// AH & BG filter
// get reads from assemblyResultSet.regionForGenotyping.reads[x].samRecord mReadBases and mBaseQualities and kmerize them.
// Map<kmer, Integer> # times Kmer in all the reads
// for each kmer in reads - find hapotypes that contain it and
// filter haplotypes that don't have 10% coverage of read kmers
// check if finalizeRegion methods is already applied and it removes the softclipped bases

// get haplotypes from assemblyResultSet and kmerize. for each haplotype create a set of kmers.
// for each haplotype, look up the kmers in the read-map and sum thee counts fo the haplotype score

// Get haplotypes from assemblyResultSet and kmerize. for each haplotype create a set of kmers.
// for each haplotype, look up the kmers in the read-map and sum the counts fo the haplotype score
// create a Map<Haplytope, Score>
LinkedHashMap<Haplotype, Integer> haplotypeScores = new LinkedHashMap<>();
for (Haplotype haplotype : onlyNewHaplotypes) {
Expand All @@ -527,6 +515,7 @@ static Set<Haplotype> filterPileupHaplotypes(final List<Haplotype> onlyNewHaplot
haplotypeScores.put(haplotype, hapKmerCount);
}

// Select the top haplotypes in order of how many of their kmers have supports in the underlying reads
Map<Haplotype,Integer> topScoringHaplotypes =
haplotypeScores.entrySet().stream()
.sorted(Collections.reverseOrder(Map.Entry.comparingByValue()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -768,8 +768,6 @@ private boolean containsCalls(final CalledHaplotypes calledHaplotypes) {
*/
private List<VariantContext> referenceModelForNoVariation(final AssemblyRegion region, final boolean needsToBeFinalized, final List<VariantContext> VCpriors) {
if ( emitReferenceConfidence() ) {
//TODO - why the activeRegion cannot manage its own one-time finalization and filtering?
//TODO - perhaps we can remove the last parameter of this method and the three lines bellow?
if ( needsToBeFinalized ) {
AssemblyBasedCallerUtils.finalizeRegion(region, hcArgs.assemblerArgs.errorCorrectReads, hcArgs.dontUseSoftClippedBases, minTailQuality, readsHeader, samplesList, ! hcArgs.doNotCorrectOverlappingBaseQualities, hcArgs.softClipLowQualityEnds, hcArgs.pileupDetectionArgs.usePileupDetection);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,49 +44,49 @@ public final class PileupDetectionArgumentCollection {
* Enables detection of indels from the pileups in. (EXPERIMENTAL FEATURE)
*/
@Hidden
@Argument(fullName= PILEUP_DETECTION_ENABLE_INDELS, doc = "Pileup Detection: If enabled, pileup detection code will attempt to detect indels missing from assembly. (Requires `--pileup-detection` argument)", optional = true)
@Argument(fullName= PILEUP_DETECTION_ENABLE_INDELS, doc = "Pileup Detection: If enabled, pileup detection code will attempt to detect indels missing from assembly. (Requires '--"+PILEUP_DETECTION_LONG_NAME+"' argument)", optional = true)
public boolean detectIndels = false;

@Advanced
@Hidden
@Argument(fullName= "num-artificial-haplotypes-to-add-per-allele", doc = "Pileup Detection: This argument limits the maximum number of novel haplotypes to be added to the assembly haplotypes per pileup allele added", optional = true)
@Argument(fullName= "num-artificial-haplotypes-to-add-per-allele", doc = "Pileup Detection: This argument limits the maximum number of novel haplotypes to be added to the assembly haplotypes per pileup allele added. (Requires '--"+PILEUP_DETECTION_LONG_NAME+"'` argument)", optional = true)
public int numHaplotypesToIterate = 5;
@Advanced
@Hidden
@Argument(fullName= "artifical-haplotype-filtering-kmer-size", doc = "Pileup Detection: Controls what size to kmerize reads to in order to select best supported artificial haplotypes", optional = true)
@Argument(fullName= "artifical-haplotype-filtering-kmer-size", doc = "Pileup Detection: Controls what size to kmerize reads to in order to select best supported artificial haplotypes. (Requires '--"+PILEUP_DETECTION_LONG_NAME+"' argument)", optional = true)
public int filteringKmerSize = 10;

/**
* Percentage of reads required to support the alt for a variant to be considered
*/
@Hidden
@Argument(fullName= PILEUP_DETECTION_SNP_THRESHOLD, doc = "Pileup Detection: Percentage of alt supporting reads in order to consider alt SNP", optional = true)
@Argument(fullName= PILEUP_DETECTION_SNP_THRESHOLD, doc = "Pileup Detection: Percentage of alt supporting reads in order to consider alt SNP. (Requires '--"+PILEUP_DETECTION_LONG_NAME+"' argument)", optional = true)
public double snpThreshold = 0.1;
@Hidden
@Argument(fullName= PILEUP_DETECTION_INDEL_THRESHOLD, doc = "Pileup Detection: Percentage of alt supporting reads in order to consider alt indel", optional = true)
@Argument(fullName= PILEUP_DETECTION_INDEL_THRESHOLD, doc = "Pileup Detection: Percentage of alt supporting reads in order to consider alt indel. (Requires '--"+PILEUP_DETECTION_LONG_NAME+"' argument)", optional = true)
public double indelThreshold = 0.5;

@Hidden
@Argument(fullName= PILEUP_DETECTION_ABSOLUTE_ALT_DEPTH, doc = "Pileup Detection: Absolute number of alt reads necessary to be included in pileup events", optional = true)
@Argument(fullName= PILEUP_DETECTION_ABSOLUTE_ALT_DEPTH, doc = "Pileup Detection: Absolute number of alt reads necessary to be included in pileup events. (Requires '--"+PILEUP_DETECTION_LONG_NAME+"' argument)", optional = true)
public double pileupAbsoluteDepth = 0;
@Hidden
@Argument(fullName= PILEUP_DETECTION_INDEL_SNP_BLOCKING_RANGE, doc = "Pileup Detection: Filters out pileup snps within this many bases of an assembled indel", optional = true)
@Argument(fullName= PILEUP_DETECTION_INDEL_SNP_BLOCKING_RANGE, doc = "Pileup Detection: Filters out pileup snps within this many bases of an assembled indel. (Requires '--"+PILEUP_DETECTION_LONG_NAME+"' argument)", optional = true)
public int snpAdajacentToAssemblyIndel = 5;

/**
* Arguments related to the "bad read filtering" where alleles that are supported primarily by reads that fail at least one of a number of heuristics will be filtered out
*/
@Hidden
@Argument(fullName= PILEUP_DETECTION_BAD_READ_RATIO_LONG_NAME, doc = "Pileup Detection: Threshold of Alt reads rejected by bad reads heuristics to allow the variant", optional = true)
@Argument(fullName= PILEUP_DETECTION_BAD_READ_RATIO_LONG_NAME, doc = "Pileup Detection: Threshold of Alt reads rejected by bad reads heuristics to allow the variant. (Requires '--"+PILEUP_DETECTION_LONG_NAME+"' argument)", optional = true)
public double badReadThreshold = 0.0;
@Hidden
@Argument(fullName= PILEUP_DETECTION_PROPER_PAIR_READ_BADNESS_LONG_NAME, doc = "Pileup Detection: Reject alt reads not in proper-pairs", optional = true)
@Argument(fullName= PILEUP_DETECTION_PROPER_PAIR_READ_BADNESS_LONG_NAME, doc = "Pileup Detection: Reject alt reads not in proper-pairs. (Requires '--"+PILEUP_DETECTION_LONG_NAME+"' argument)", optional = true)
public boolean badReadProperPair = true;
@Hidden
@Argument(fullName= PILEUP_DETECTION_EDIT_DISTANCE_BADNESS_LONG_NAME, doc = "Pileup Detection: Reject alt reads with greater than this fraction edit distance from the reference", optional = true)
@Argument(fullName= PILEUP_DETECTION_EDIT_DISTANCE_BADNESS_LONG_NAME, doc = "Pileup Detection: Reject alt reads with greater than this fraction of mismatching bases from the reference (proxied using the NM tag). (Requires '--"+PILEUP_DETECTION_LONG_NAME+"' argument)", optional = true)
public double badReadEditDistance = 0.08;
@Hidden
@Argument(fullName= PILEUP_DETECTION_CHIMERIC_READ_BADNESS_LONG_NAME, doc = "Pileup Detection: Reject reads that are chimeric or supplementary", optional = true)
@Argument(fullName= PILEUP_DETECTION_CHIMERIC_READ_BADNESS_LONG_NAME, doc = "Pileup Detection: Reject reads that are chimeric or supplementary. (Requires '--"+PILEUP_DETECTION_LONG_NAME+"' argument)", optional = true)
public boolean badReadSecondaryOrSupplementary = true;
@Hidden
@Argument(fullName= PILEUP_DETECTION_TLEN_MEAN_LONG_NAME, doc = "Pileup Detection: Mean template length (T LEN) to consider for read badness. Requires '--"+PILEUP_DETECTION_TLEN_STD_LONG_NAME+"' to also be set.", optional = true)
Expand Down

0 comments on commit addef19

Please sign in to comment.