Skip to content

Commit

Permalink
-fixes a bug in the liftover logic whereby when the alleles couldn't …
Browse files Browse the repository at this point in the history
…be extended to the "left" because the start of the variant was already at position 1, the resulting VC was corrupt leading to the problem discussed in !1951

- added a unit test (which fails with the same error as the issue prior to the application of the changes)
- _someone_ should add an actual live (with vcf etc....)
- fixes !1951
- other tests are failing...need to look at this more carefully.
  • Loading branch information
yfarjoun committed Apr 5, 2024
1 parent 5b0b4c0 commit b713345
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 18 deletions.
37 changes: 25 additions & 12 deletions src/main/java/picard/util/LiftoverUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -372,15 +372,15 @@ protected static void leftAlignVariant(final VariantContextBuilder builder, fina

// 1. while changes in alleles do
while (changesInAlleles) {
final int oldStart = theStart;
final int oldEnd = theEnd;

changesInAlleles = false;
// 2. if alleles end with the same nucleotide then
if (alleleBasesMap.values().stream()
.collect(Collectors.groupingBy(a -> a[a.length - 1], Collectors.toSet()))
.size() == 1 && theEnd > 1) {
// 3. truncate rightmost nucleotide of each allele
alleleBasesMap.replaceAll((a, v) -> truncateBase(alleleBasesMap.get(a), true));
changesInAlleles = true;
theEnd--;
// 4. end if
}
Expand All @@ -390,20 +390,29 @@ protected static void leftAlignVariant(final VariantContextBuilder builder, fina
.map(a -> a.length)
.anyMatch(l -> l == 0)) {
// 6. extend alleles 1 nucleotide to the left


final byte extraBase;
final boolean extendLeft;
if (theStart > 1) {
extraBase = referenceSequence.getBases()[theStart - 2];
extendLeft = true;
theStart--;
} else {
extraBase = referenceSequence.getBases()[theEnd];
extendLeft = false;
theEnd++;
}

for (final Allele allele : alleleBasesMap.keySet()) {
// the first -1 for zero-base (getBases) versus 1-based (variant position)
// another -1 to get the base prior to the location of the start of the allele
final byte extraBase = (theStart > 1) ?
referenceSequence.getBases()[theStart - 2] :
referenceSequence.getBases()[theEnd];

alleleBasesMap.put(allele, extendOneBase(alleleBasesMap.get(allele), extraBase));
alleleBasesMap.put(allele, extendOneBase(alleleBasesMap.get(allele), extraBase, extendLeft));
}
changesInAlleles = true;
theStart--;

// 7. end if
}
changesInAlleles = theStart!=oldStart && theEnd != oldEnd;
}

// 8. while leftmost nucleotide of each allele are the same and all alleles have length 2 or more do
Expand Down Expand Up @@ -451,12 +460,16 @@ private static byte[] truncateBase(final byte[] allele, final boolean truncateRi
truncateRightmost ? allele.length - 1 : allele.length);
}

//creates a new byte array with the base added at the beginning
private static byte[] extendOneBase(final byte[] bases, final byte base) {
return extendOneBase(bases, base, true);
}

//creates a new byte array with the base added at the beginning
private static byte[] extendOneBase(final byte[] bases, final byte base, final boolean extendLeft) {
final byte[] newBases = new byte[bases.length + 1];

System.arraycopy(bases, 0, newBases, 1, bases.length);
newBases[0] = base;
System.arraycopy(bases, 0, newBases, extendLeft ? 1 : 0, bases.length);
newBases[extendLeft ? 0 : bases.length] = base;

return newBases;
}
Expand Down
58 changes: 52 additions & 6 deletions src/test/java/picard/util/LiftoverVcfTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@
import htsjdk.samtools.liftover.LiftOver;
import htsjdk.samtools.reference.FastaSequenceFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.*;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeBuilder;
Expand Down Expand Up @@ -59,6 +56,10 @@ public class LiftoverVcfTest extends CommandLineProgramTest {
private static final String refString = "CAAAAAAAAAACGTACGTACTCTCTCTCTACGT";
private static final ReferenceSequence REFERENCE = new ReferenceSequence("chr1", 0, refString.getBytes());

private static final String refStringWithRepeat = "CGTCGTCGT";

private static final ReferenceSequence REFERENCE_WITH_REPEATS = new ReferenceSequence("chr1", 0, refStringWithRepeat.getBytes());

public String getCommandLineProgramName() {
return LiftoverVcf.class.getSimpleName();
}
Expand Down Expand Up @@ -174,8 +175,8 @@ public void testChangingTagArguments() {
@Test
public void testReverseComplementFailureDoesNotErrorOut() {
final VariantContextBuilder builder = new VariantContextBuilder().source("test").loc("chr1", 1, 4);
final Allele originalRef = Allele.create("CCCC", true);
final Allele originalAlt = Allele.create("C", false);
final Allele originalRef = Allele.create("TCAT", true);
final Allele originalAlt = Allele.create("T", false);
builder.alleles(Arrays.asList(originalRef, originalAlt));

final Interval interval = new Interval("chr1", 1, 4, true, "test ");
Expand All @@ -185,6 +186,26 @@ public void testReverseComplementFailureDoesNotErrorOut() {

// we don't actually care what the results are here -- we just want to make sure that it doesn't fail
final VariantContextBuilder result = LiftoverUtils.reverseComplementVariantContext(builder.make(), interval, refSeq);
Assert.assertNotNull(result);
}


@Test
public void testReverseComplementWithRepeats() {
final VariantContextBuilder builder = new VariantContextBuilder().source("test").loc("chr1", 3, 6);
final Allele originalRef = Allele.create("CATC", true);
final Allele originalAlt = Allele.create("C", false);
builder.alleles(Arrays.asList(originalRef, originalAlt));

final Interval interval = new Interval("chr1", 3, 6, true, "test ");

final String reference = "ATGATGATGA";
final ReferenceSequence refSeq = new ReferenceSequence("chr1", 10, reference.getBytes());

// we don't actually care what the results are here -- we just want to make sure that it doesn't fail
final VariantContextBuilder result = LiftoverUtils.reverseComplementVariantContext(builder.make(), interval, refSeq);
Assert.assertNotNull(result);
Assert.assertTrue(result.getStart() > 0);
}

@DataProvider(name = "dataTestMissingContigInReference")
Expand Down Expand Up @@ -975,6 +996,31 @@ public void testLeftAlignVariants(final VariantContext source, final ReferenceSe
VcfTestUtils.assertEquals(vcb.make(), result);
}

@Test
public void testLeftAlignInRepeatingStart(){
// reference: CGTCGTCGT
// 123456789
// TCGT->T

final VariantContextBuilder builder = new VariantContextBuilder().source("test1").chr("chr1");
builder.start(3).stop(6).alleles(CollectionUtil.makeList(Allele.create("TCGT",true), Allele.create("T")));
GenotypeBuilder genotypeBuilder = new GenotypeBuilder();
genotypeBuilder.alleles(builder.getAlleles());
builder.genotypes(genotypeBuilder.make());

VariantContext vc = builder.make();
final List<Allele> origAlleles = new ArrayList<>(builder.getAlleles());

// This path mimics the codepath through the LiftoverUtils.reverseComplementVariantContext but without reverseComplementing
LiftoverUtils.leftAlignVariant(builder, vc.getStart(), vc.getEnd(), vc.getAlleles(), REFERENCE_WITH_REPEATS);
builder.genotypes(LiftoverUtils.fixGenotypes(builder.getGenotypes(), origAlleles, builder.getAlleles()));
VariantContext newVc = builder.make();
final String refString = StringUtil.bytesToString(REFERENCE_WITH_REPEATS.getBases(), newVc.getStart() - 1, newVc.getEnd() - newVc.getStart() + 1);

Assert.assertEquals(refString,"CGTC");
}


@DataProvider(name = "indelNoFlipData")
public Iterator<Object[]> indelNoFlipData() {

Expand Down

0 comments on commit b713345

Please sign in to comment.