-
Notifications
You must be signed in to change notification settings - Fork 596
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Sort output from SVClusterEngine and fix no-call genotype ploidy bug in JointGermlineCNVSegmentation #7779
Sort output from SVClusterEngine and fix no-call genotype ploidy bug in JointGermlineCNVSegmentation #7779
Changes from all commits
057440f
816d4e0
75cbb6f
2065e0d
2330b8d
822cc4e
e1c3e19
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -147,7 +147,7 @@ public int getEndPosition() { | |
doc="Cluster events whose endpoints are within this distance of each other", optional=true) | ||
public int clusterWindow = CanonicalSVLinkage.DEFAULT_DEPTH_ONLY_PARAMS.getWindow(); | ||
|
||
@Argument(fullName = MODEL_CALL_INTERVALS_LONG_NAME, doc = "gCNV model intervals created with the FilterIntervals tool.") | ||
@Argument(fullName = MODEL_CALL_INTERVALS_LONG_NAME, doc = "gCNV model intervals created with the FilterIntervals tool.", optional=true) | ||
private GATKPath modelCallIntervalList = null; | ||
|
||
@Argument(fullName = BREAKPOINT_SUMMARY_STRATEGY_LONG_NAME, doc = "Strategy to use for choosing a representative value for a breakpoint cluster.", optional = true) | ||
|
@@ -204,6 +204,7 @@ public void onTraversalStart() { | |
//dictionary will not be null because this tool requiresReference() | ||
|
||
final GenomeLocParser parser = new GenomeLocParser(this.dictionary); | ||
|
||
setIntervals(parser); | ||
|
||
final ClusteringParameters clusterArgs = ClusteringParameters.createDepthParameters(clusterIntervalOverlap, clusterWindow, CLUSTER_SAMPLE_OVERLAP_FRACTION); | ||
|
@@ -267,12 +268,12 @@ private VariantContextWriter getVCFWriter() { | |
*/ | ||
@Override | ||
public void apply(final List<VariantContext> variantContexts, final ReferenceContext referenceContext, final List<ReadsContext> readsContexts) { | ||
if (currentContig == null) { | ||
currentContig = variantContexts.get(0).getContig(); //variantContexts should have identical start, so choose 0th arbitrarily | ||
} else if (!variantContexts.get(0).getContig().equals(currentContig)) { | ||
//variantContexts should have identical start, so choose 0th arbitrarily | ||
final String variantContig = variantContexts.get(0).getContig(); | ||
if (currentContig != null && !variantContig.equals(currentContig)) { | ||
processClusters(); | ||
currentContig = variantContexts.get(0).getContig(); | ||
} | ||
currentContig = variantContig; | ||
for (final VariantContext vc : variantContexts) { | ||
final SVCallRecord record = createDepthOnlyFromGCNVWithOriginalGenotypes(vc, minQS, allosomalContigs, refAutosomalCopyNumber, sampleDB); | ||
if (record != null) { | ||
|
@@ -291,13 +292,16 @@ public Object onTraversalSuccess() { | |
return null; | ||
} | ||
|
||
/** | ||
* Force-flushes the defragmenter, adds the resulting calls to the clustering engine, and flushes the clustering | ||
* engine. Since we need to check for variant overlap and reset genotypes, only flush clustering when we hit a | ||
* new contig. | ||
*/ | ||
private void processClusters() { | ||
if (!defragmenter.isEmpty()) { | ||
final List<SVCallRecord> defragmentedCalls = defragmenter.forceFlushAndGetOutput(); | ||
defragmentedCalls.stream().forEachOrdered(clusterEngine::add); | ||
} | ||
final List<SVCallRecord> defragmentedCalls = defragmenter.forceFlush(); | ||
defragmentedCalls.stream().forEachOrdered(clusterEngine::add); | ||
//Jack and Isaac cluster first and then defragment | ||
final List<SVCallRecord> clusteredCalls = clusterEngine.forceFlushAndGetOutput(); | ||
final List<SVCallRecord> clusteredCalls = clusterEngine.forceFlush(); | ||
write(clusteredCalls); | ||
} | ||
|
||
|
@@ -315,12 +319,10 @@ private VariantContext buildAndSanitizeRecord(final SVCallRecord record) { | |
} | ||
|
||
private void write(final List<SVCallRecord> calls) { | ||
final List<VariantContext> sortedCalls = calls.stream() | ||
.sorted(Comparator.comparing(c -> new SimpleInterval(c.getContigA(), c.getPositionA(), c.getPositionB()), //VCs have to be sorted by end as well | ||
IntervalUtils.getDictionaryOrderComparator(dictionary))) | ||
final List<VariantContext> sanitizedRecords = calls.stream() | ||
.map(this::buildAndSanitizeRecord) | ||
.collect(Collectors.toList()); | ||
final Iterator<VariantContext> it = sortedCalls.iterator(); | ||
final Iterator<VariantContext> it = sanitizedRecords.iterator(); | ||
ArrayList<VariantContext> overlappingVCs = new ArrayList<>(calls.size()); | ||
if (!it.hasNext()) { | ||
return; | ||
|
@@ -680,16 +682,28 @@ private static Genotype prepareGenotype(final Genotype g, final Allele refAllele | |
return builder.make(); | ||
} | ||
|
||
/** | ||
* "Fills" genotype alleles so that it has the correct ploidy | ||
* @param builder new alleles will be set for this builder | ||
* @param g non-ref alleles will be carried over from this genotype | ||
* @param ploidy desired ploidy for the new genotype | ||
* @param refAllele desired ref allele for new genotype | ||
*/ | ||
private static void correctGenotypePloidy(final GenotypeBuilder builder, final Genotype g, final int ploidy, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know I dropped the ball, but can you add some comments here? This is for overlapping events, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. This just modifies the genotypes for input variants so the ploidies are consistent with the ped file and also irons out any no-call/null GTs. |
||
final Allele refAllele) { | ||
final ArrayList<Allele> alleles = new ArrayList<>(g.getAlleles()); | ||
Utils.validate(alleles.size() <= ploidy, "Encountered genotype with ploidy " + ploidy + " but " + | ||
alleles.size() + " alleles."); | ||
while (alleles.size() < ploidy) { | ||
alleles.add(refAllele); | ||
if (g.getAlleles().size() == 1 && g.getAllele(0).isNoCall()) { | ||
// Special case to force interpretation of a single no-call allele as a possible null GT | ||
builder.alleles(Collections.nCopies(ploidy, Allele.NO_CALL)); | ||
} else { | ||
final ArrayList<Allele> alleles = new ArrayList<>(g.getAlleles()); | ||
Utils.validate(alleles.size() <= ploidy, "Encountered genotype with ploidy " + ploidy + | ||
" but " + alleles.size() + " alleles."); | ||
while (alleles.size() < ploidy) { | ||
alleles.add(refAllele); | ||
} | ||
alleles.trimToSize(); | ||
builder.alleles(alleles); | ||
} | ||
alleles.trimToSize(); | ||
builder.alleles(alleles); | ||
} | ||
|
||
private static void addExpectedCopyNumber(final GenotypeBuilder g, final int ploidy) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like this is for special cases, like the tests below, and the task end. Is that true? Regardless, please add some javadoc, esp. for differentiating use cases with the above method.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok added a comment. This is to be used only by forceFlush() in the engine itself, which is only called when we're certain that none of the currently active clusters can change. This is yes usually when reaching the end of a contig (or file).