Skip to content

Commit

Permalink
Add FILTER and QUAL fields to SVCallRecord
Browse files Browse the repository at this point in the history
  • Loading branch information
mwalker174 committed May 1, 2023
1 parent d7ed1ac commit 54a8658
Show file tree
Hide file tree
Showing 10 changed files with 160 additions and 81 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ public class SVCallRecord implements SVLocatable {
private final List<Allele> altAlleles;
private final GenotypesContext genotypes;
private final Map<String,Object> attributes;
private final Set<String> filters;
private final Double log10PError;

// CPX related fields
private final GATKSVVCFConstants.ComplexVariantSubtype cpxSubtype;
Expand All @@ -70,8 +72,10 @@ public SVCallRecord(final String id,
final List<Allele> alleles,
final List<Genotype> genotypes,
final Map<String,Object> attributes,
final Set<String> filters,
final Double log10PError,
final SAMSequenceDictionary dictionary) {
this(id, contigA, positionA, strandA, contigB, positionB, strandB, type, cpxSubtype, length, algorithms, alleles, genotypes, attributes);
this(id, contigA, positionA, strandA, contigB, positionB, strandB, type, cpxSubtype, length, algorithms, alleles, genotypes, attributes, filters, log10PError);
validateCoordinates(dictionary);
}

Expand All @@ -88,11 +92,14 @@ protected SVCallRecord(final String id,
final List<String> algorithms,
final List<Allele> alleles,
final List<Genotype> genotypes,
final Map<String, Object> attributes) {
final Map<String, Object> attributes,
final Set<String> filters,
final Double log10PError) {
Utils.nonNull(algorithms);
Utils.nonNull(alleles);
Utils.nonNull(genotypes);
Utils.nonNull(attributes);
Utils.nonNull(filters);
this.id = Utils.nonNull(id);
this.contigA = contigA;
this.positionA = positionA;
Expand All @@ -112,6 +119,8 @@ protected SVCallRecord(final String id,
final Pair<Boolean, Boolean> strands = inferStrands(type, strandA, strandB);
this.strandA = strands.getLeft();
this.strandB = strands.getRight();
this.filters = filters;
this.log10PError = log10PError;
}

/**
Expand Down Expand Up @@ -366,4 +375,16 @@ public SimpleInterval getPositionAInterval() {
public SimpleInterval getPositionBInterval() {
return new SimpleInterval(contigB, positionB, positionB);
}

public Set<String> getFilters() {
return filters;
}

public Double getLog10PError() {
return log10PError;
}

public GATKSVVCFConstants.ComplexVariantSubtype getCpxSubtype() {
return cpxSubtype;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ public static VariantContextBuilder getVariantBuilder(final SVCallRecord record)
&& record.getStrandA() != null && record.getStrandB() != null) {
builder.attribute(GATKSVVCFConstants.STRANDS_ATTRIBUTE, getStrandString(record));
}
if (!record.getFilters().isEmpty()) {
builder.filters(record.getFilters());
}
if (record.getLog10PError() != null) {
builder.log10PError(record.getLog10PError());
}

// htsjdk vcf encoder does not allow genotypes to have empty alleles
builder.genotypes(record.getGenotypes().stream().map(SVCallRecordUtils::sanitizeEmptyGenotype).collect(Collectors.toList()));
sanitizeNullAttributes(builder);
Expand Down Expand Up @@ -176,12 +183,12 @@ public static GenotypesContext populateGenotypesForMissingSamplesWithAlleles(fin
public static SVCallRecord copyCallWithNewGenotypes(final SVCallRecord record, final GenotypesContext genotypes) {
return new SVCallRecord(record.getId(), record.getContigA(), record.getPositionA(), record.getStrandA(), record.getContigB(),
record.getPositionB(), record.getStrandB(), record.getType(), record.getComplexSubtype(), record.getLength(), record.getAlgorithms(), record.getAlleles(),
genotypes, record.getAttributes());
genotypes, record.getAttributes(), record.getFilters(), record.getLog10PError());
}
public static SVCallRecord copyCallWithNewAttributes(final SVCallRecord record, final Map<String, Object> attr) {
return new SVCallRecord(record.getId(), record.getContigA(), record.getPositionA(), record.getStrandA(), record.getContigB(),
record.getPositionB(), record.getStrandB(), record.getType(), record.getComplexSubtype(), record.getLength(), record.getAlgorithms(), record.getAlleles(),
record.getGenotypes(), attr);
record.getGenotypes(), attr, record.getFilters(), record.getLog10PError());
}

/**
Expand Down Expand Up @@ -293,10 +300,10 @@ public static Stream<SVCallRecord> convertInversionsToBreakends(final SVCallReco
Utils.validateArg(record.isIntrachromosomal(), "Inversion " + record.getId() + " is not intrachromosomal");
final SVCallRecord positiveBreakend = new SVCallRecord(record.getId(), record.getContigA(),
record.getPositionA(), true, record.getContigB(), record.getPositionB(), true, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null,null,
record.getAlgorithms(), record.getAlleles(), record.getGenotypes(), record.getAttributes(), dictionary);
record.getAlgorithms(), record.getAlleles(), record.getGenotypes(), record.getAttributes(), record.getFilters(), record.getLog10PError(), dictionary);
final SVCallRecord negativeBreakend = new SVCallRecord(record.getId(), record.getContigA(),
record.getPositionA(), false, record.getContigB(), record.getPositionB(), false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND, null,null,
record.getAlgorithms(), record.getAlleles(), record.getGenotypes(), record.getAttributes(), dictionary);
record.getAlgorithms(), record.getAlleles(), record.getGenotypes(), record.getAttributes(), record.getFilters(), record.getLog10PError(), dictionary);
return Stream.of(positiveBreakend, negativeBreakend);
}

Expand Down Expand Up @@ -385,10 +392,11 @@ public static SVCallRecord create(final VariantContext variant, boolean keepVari
positionB = variant.getEnd();
}
}
final Double log10PError = variant.hasLog10PError() ? variant.getLog10PError() : null;

final Map<String, Object> sanitizedAttributes = sanitizeAttributes(attributes);
return new SVCallRecord(id, contigA, positionA, strand1, contigB, positionB, strand2, type, cpxSubtype, length, algorithms,
variant.getAlleles(), variant.getGenotypes(), sanitizedAttributes);
variant.getAlleles(), variant.getGenotypes(), sanitizedAttributes, variant.getFilters(), log10PError);
}

private static Map<String, Object> sanitizeAttributes(final Map<String, Object> attributes) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,12 @@ public SVCallRecord collapse(final SVClusterEngine.OutputCluster cluster) {
final Boolean strandA = type == GATKSVVCFConstants.StructuralVariantAnnotationType.CNV ? null : representative.getStrandA();
final Boolean strandB = type == GATKSVVCFConstants.StructuralVariantAnnotationType.CNV ? null : representative.getStrandB();

final Set<String> filters = collapseFilters(items);
final Double quality = collapseQuality(items);

return new SVCallRecord(representative.getId(), representative.getContigA(), start, strandA, representative.getContigB(),
end, strandB, type, representative.getComplexSubtype(), length, algorithms, alleles, genotypes, attributes, dictionary);
end, strandB, type, representative.getComplexSubtype(), length, algorithms, alleles, genotypes, attributes,
filters, quality, dictionary);
}

protected List<Allele> collapseAlleles(final List<Allele> altAlleles, final Allele refAllele) {
Expand All @@ -193,6 +197,21 @@ protected List<Allele> collapseAlleles(final List<Allele> altAlleles, final Alle
return alleles;
}

protected Set<String> collapseFilters(final List<SVCallRecord> items) {
return items.stream()
.map(SVCallRecord::getFilters)
.flatMap(Collection::stream)
.collect(Collectors.toSet());
}

protected Double collapseQuality(final List<SVCallRecord> items) {
if (items.size() == 1) {
return items.get(0).getLog10PError();
} else {
return null;
}
}

/**
* Asserts that the given records are valid for collapsing.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,8 @@ public VariantContext buildVariantContext(final SVCallRecord call) {
// Build new variant
final SVCallRecord finalCall = new SVCallRecord(newId, call.getContigA(), call.getPositionA(), call.getStrandA(),
call.getContigB(), call.getPositionB(), call.getStrandB(), call.getType(), call.getComplexSubtype(), call.getLength(),
call.getAlgorithms(), call.getAlleles(), filledGenotypes, call.getAttributes(), dictionary);
call.getAlgorithms(), call.getAlleles(), filledGenotypes, call.getAttributes(), call.getFilters(),
call.getLog10PError(), dictionary);
final VariantContextBuilder builder = SVCallRecordUtils.getVariantBuilder(finalCall);
if (omitMembers) {
builder.rmAttribute(GATKSVVCFConstants.CLUSTER_MEMBER_IDS_KEY);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package org.broadinstitute.hellbender.tools.sv;

import com.google.common.collect.Lists;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.GenotypesContext;
import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
Expand Down Expand Up @@ -73,7 +76,7 @@ public Object[][] testCreateInvalidCoordinatesData() {
public void testCreateInvalidCoordinates(final String contigA, final int posA, final String contigB, final int posB) {
new SVCallRecord("var1", contigA, posA, true, contigB, posB, false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND,
null, null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(),
Collections.emptyMap(), SVTestUtils.hg38Dict);
Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict);
Assert.fail("Expected exception not thrown");
}

Expand All @@ -90,6 +93,26 @@ public Object[][] testCreateValidCoordinatesData() {
public void testCreateValidCoordinates(final String contigA, final int posA, final String contigB, final int posB) {
new SVCallRecord("var1", contigA, posA, true, contigB, posB, false, GATKSVVCFConstants.StructuralVariantAnnotationType.BND,
null, null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Collections.emptyList(), Collections.emptyList(),
Collections.emptyMap(), SVTestUtils.hg38Dict);
Collections.emptyMap(), Collections.emptySet(), null, SVTestUtils.hg38Dict);
}

public void testGetters() {
final SVCallRecord record = new SVCallRecord("var1", "chr1", 100, true, "chr1", 200, false, GATKSVVCFConstants.StructuralVariantAnnotationType.DEL,
GATKSVVCFConstants.ComplexVariantSubtype.dDUP, null, SVTestUtils.PESR_ONLY_ALGORITHM_LIST, Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL),
GenotypesContext.create(GenotypeBuilder.create("sample1", Lists.newArrayList(Allele.SV_SIMPLE_DEL, Allele.SV_SIMPLE_DEL))),
Collections.singletonMap("TEST_KEY", "TEST_VALUE"), Collections.singleton("TEST_FILTER"), Double.valueOf(30), SVTestUtils.hg38Dict);
Assert.assertEquals(record.getId(), "var1");
Assert.assertEquals(record.getContigA(), "chr1");
Assert.assertEquals(record.getPositionA(), 100);
Assert.assertEquals(record.getStrandA(), Boolean.TRUE);
Assert.assertEquals(record.getContigB(), "chr1");
Assert.assertEquals(record.getPositionB(), 200);
Assert.assertEquals(record.getStrandA(), Boolean.FALSE);
Assert.assertEquals(record.getAlgorithms(), SVTestUtils.PESR_ONLY_ALGORITHM_LIST);
Assert.assertEquals(record.getGenotypes().get("sample1").getAlleles(), Lists.newArrayList(Allele.SV_SIMPLE_DEL, Allele.SV_SIMPLE_DEL));
Assert.assertEquals(record.getAttributes(), Collections.singletonMap("TEST_KEY", "TEST_VALUE"));
Assert.assertEquals(record.getAlleles(), Lists.newArrayList(Allele.REF_N, Allele.SV_SIMPLE_DEL));
Assert.assertEquals(record.getFilters(), Collections.singleton("TEST_FILTER"));
Assert.assertEquals(record.getLog10PError(), Double.valueOf(30));
}
}
Loading

0 comments on commit 54a8658

Please sign in to comment.