Skip to content

Commit

Permalink
Allow fingerprinting of SAM files that only have a partial dictionary…
Browse files Browse the repository at this point in the history
… match to the haplotype map (#1955)
  • Loading branch information
yfarjoun authored Apr 5, 2024
1 parent 5b0b4c0 commit 0495b01
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 18 deletions.
10 changes: 9 additions & 1 deletion src/main/java/picard/fingerprint/FingerprintChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,15 @@ public Map<FingerprintIdDetails, Fingerprint> fingerprintSamFile(final Path samF
log.info(String.format("Reading an indexed file (%s)", samFile.toUri().toString()));
}

final SamLocusIterator iterator = new SamLocusIterator(in, this.haplotypes.getIntervalList(), in.hasIndex());
// fingerprinting allows that headers differ, but SamLocusIterator doesn't allow the dictionary of the query
// interval list to differ from that of the samfile, leading to an exception thrown.
// At this point we already know that the dictionary of the haplotypes is a proper prefix of that of the sam file
// So we just swap out the header in the interval list.
final IntervalList il = this.haplotypes.getIntervalList();
final IntervalList newIl = new IntervalList(in.getFileHeader());
newIl.addall(il.getIntervals());

final SamLocusIterator iterator = new SamLocusIterator(in, newIl, in.hasIndex());
iterator.setEmitUncoveredLoci(true);
iterator.setMappingQualityScoreCutoff(this.minimumMappingQuality);
iterator.setQualityScoreCutoff(this.minimumBaseQuality);
Expand Down
61 changes: 44 additions & 17 deletions src/test/java/picard/fingerprint/CrosscheckFingerprintsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import picard.PicardException;
import picard.cmdline.CommandLineProgramTest;
import picard.util.TabbedTextFileWithHeaderParser;
import picard.util.TestNGUtil;
import picard.vcf.SamTestUtils;
import picard.vcf.VcfTestUtils;

Expand All @@ -30,7 +31,6 @@
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

/**
Expand All @@ -40,8 +40,11 @@ public class CrosscheckFingerprintsTest extends CommandLineProgramTest {

private final File TEST_DATA_DIR = new File("testdata/picard/fingerprint/");
private final File HAPLOTYPE_MAP = new File(TEST_DATA_DIR, "Homo_sapiens_assembly19.haplotype_database.subset.txt");
private final File HAPLOTYPE_MAP_SHORT = new File(TEST_DATA_DIR, "Homo_sapiens_assembly19.haplotype_database.subset.short_dictionary.txt");
private final File HAPLOTYPE_MAP_FOR_CRAMS = new File(TEST_DATA_DIR, "Homo_sapiens_assembly19.haplotype_database.subset.shifted.for.crams.txt");

private final File HAPLOTYPE_MAP_FOR_CRAMS_SHORT = new File(TEST_DATA_DIR, "Homo_sapiens_assembly19.haplotype_database.subset.shifted.for.crams.short_dictionary.txt");

private final File NA12891_r1_sam = new File(TEST_DATA_DIR, "NA12891.over.fingerprints.r1.sam");
private final File NA12891_r2_sam = new File(TEST_DATA_DIR, "NA12891.over.fingerprints.r2.sam");
private final File NA12891_r1_sam_shifted_for_cram = new File(TEST_DATA_DIR, "NA12891.over.fingerprints.shifted.for.crams.r1.sam");
Expand Down Expand Up @@ -148,7 +151,6 @@ public String getCommandLineProgramName() {
return CrosscheckFingerprints.class.getSimpleName();
}

@DataProvider(name = "bamFilesRGs")
public Object[][] bamAndCramFilesRGs() {
return new Object[][] {
{NA12891_r1, NA12891_r2, false, 0, (NA12891_r1_RGs + NA12891_r2_RGs) * (NA12891_r1_RGs + NA12891_r2_RGs )},
Expand Down Expand Up @@ -178,10 +180,26 @@ public Object[][] bamAndCramFilesRGs() {
};
}

@Test(dataProvider = "bamFilesRGs")
public void testCrossCheckRGs(final File file1, final File file2, final boolean expectAllMatch, final int expectedRetVal, final int expectedNMetrics) throws IOException {
private Object[][] haplotypeMaps(){
return new Object[][]{
{HAPLOTYPE_MAP, HAPLOTYPE_MAP_FOR_CRAMS},
{HAPLOTYPE_MAP_SHORT, HAPLOTYPE_MAP_FOR_CRAMS_SHORT}};
}
@DataProvider(name = "bamFilesRGs")
public Iterator<Object[]> bamFilesRGsAndHapMaps(){
return TestNGUtil.interaction(bamAndCramFilesRGs(), haplotypeMaps());
}

File metrics = File.createTempFile("Fingerprinting", "NA1291.RG.crosscheck_metrics");
@Test(dataProvider = "bamFilesRGs")
public void testCrossCheckRGs(final File file1,
final File file2,
final boolean expectAllMatch,
final int expectedRetVal,
final int expectedNMetrics,
final File haplotypeMap,
final File haplotypeMapForCram) throws IOException {

final File metrics = File.createTempFile("Fingerprinting", "NA1291.RG.crosscheck_metrics");
metrics.deleteOnExit();

final List<String> args = new ArrayList<>(Arrays.asList("INPUT=" + file1.getAbsolutePath(),
Expand All @@ -193,14 +211,15 @@ public void testCrossCheckRGs(final File file1, final File file2, final boolean

if (file1.getName().endsWith(SamReader.Type.CRAM_TYPE.fileExtension())) {
args.add("R=" + referenceForCrams);
args.add("HAPLOTYPE_MAP=" + HAPLOTYPE_MAP_FOR_CRAMS);
args.add("HAPLOTYPE_MAP=" + haplotypeMapForCram.getAbsolutePath());
} else {
args.add("HAPLOTYPE_MAP=" + HAPLOTYPE_MAP);
args.add("HAPLOTYPE_MAP=" + haplotypeMap.getAbsolutePath());
}

doTest(args.toArray(new String[0]), metrics, expectedRetVal, expectedNMetrics, CrosscheckMetric.DataType.READGROUP, expectAllMatch);
}


@DataProvider(name = "cramsWithNoReference")
public Object[][] cramsWithNoReference() {
return new Object[][]{
Expand All @@ -212,7 +231,7 @@ public Object[][] cramsWithNoReference() {

@Test(dataProvider = "cramsWithNoReference")
public void testCramsWithNoReference(final File file1, final File file2) throws IOException {
File metrics = File.createTempFile("Fingerprinting", "NA12891.RG.crosscheck_metrics");
final File metrics = File.createTempFile("Fingerprinting", "NA12891.RG.crosscheck_metrics");
metrics.deleteOnExit();

final String[] args = {"INPUT=" + file1.getAbsolutePath(),
Expand All @@ -225,8 +244,11 @@ public void testCramsWithNoReference(final File file1, final File file2) throws
Assert.assertEquals(crossChecker.instanceMain(args), 1);
}

@DataProvider(name = "cramBamComparison")
public Object[][] cramBamComparison() {
private Object[][] haplotypeMapsForCram(){
return new Object[][]{{HAPLOTYPE_MAP_FOR_CRAMS}, {HAPLOTYPE_MAP_FOR_CRAMS_SHORT}};
}

private Object[][] cramBamComparison() {
return new Object[][]{
{NA12891_r1_shifted_bam, NA12891_r2_shifted_bam, NA12891_r1_cram, NA12891_r2_cram},
{NA12891_r1_shifted_bam, NA12892_r1_shifted_bam, NA12891_r1_cram, NA12892_r1_cram},
Expand All @@ -237,25 +259,30 @@ public Object[][] cramBamComparison() {
};
}

@DataProvider(name = "cramBamComparison")
public Iterator<Object[]> cramBamComparisonWithHapMap() {
return TestNGUtil.interaction(cramBamComparison(), haplotypeMapsForCram());
}

@Test(dataProvider = "cramBamComparison")
public void testCramBamComparison(final File bam1, final File bam2, final File cram1, final File cram2) throws IOException {
File metricsBam = File.createTempFile("Fingerprinting.bam.comparison", "crosscheck_metrics");
public void testCramBamComparison(final File bam1, final File bam2, final File cram1, final File cram2, final File haplotypeMap) throws IOException {
final File metricsBam = File.createTempFile("Fingerprinting.bam.comparison", "crosscheck_metrics");
metricsBam.deleteOnExit();
File metricsCram = File.createTempFile("Fingerprinting.cram.comparison", "crosscheck_metrics");
final File metricsCram = File.createTempFile("Fingerprinting.cram.comparison", "crosscheck_metrics");
metricsCram.deleteOnExit();

final List<String> argsBam = new ArrayList<>(Arrays.asList("INPUT=" + bam1.getAbsolutePath(),
"INPUT=" + bam2.getAbsolutePath(),
"OUTPUT=" + metricsBam.getAbsolutePath(),
"LOD_THRESHOLD=" + -2.0,
"HAPLOTYPE_MAP=" + HAPLOTYPE_MAP_FOR_CRAMS)
"HAPLOTYPE_MAP=" + haplotypeMap.getAbsolutePath())
);

final List<String> argsCram = new ArrayList<>(Arrays.asList("INPUT=" + cram1.getAbsolutePath(),
"INPUT=" + cram2.getAbsolutePath(),
"OUTPUT=" + metricsCram.getAbsolutePath(),
"LOD_THRESHOLD=" + -2.0,
"HAPLOTYPE_MAP=" + HAPLOTYPE_MAP_FOR_CRAMS,
"HAPLOTYPE_MAP=" + haplotypeMap.getAbsolutePath(),
"R=" + referenceForCrams)
);

Expand Down Expand Up @@ -416,10 +443,10 @@ public Object[][] bamFilesSMs() {

@Test(dataProvider = "bamFilesSMs")
public void testCrossCheckSMs(final File file1, final File file2, final int expectedRetVal, final int numberOfSamples) throws IOException {
File metrics = File.createTempFile("Fingerprinting", "NA1291.SM.crosscheck_metrics");
final File metrics = File.createTempFile("Fingerprinting", "NA1291.SM.crosscheck_metrics");
metrics.deleteOnExit();

File matrix = File.createTempFile("Fingerprinting", "NA1291.SM.matrix");
final File matrix = File.createTempFile("Fingerprinting", "NA1291.SM.matrix");
matrix.deleteOnExit();

final String[] args = new String[]{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
@HD VN:1.5
@SQ SN:1 LN:1001 M5:af506819d4b9357827ce128b6c83a816 UR:file:reference.shifted.for.crams.fasta
@SQ SN:3 LN:2001 M5:bfecd72141fd7c10b33f04958736baef UR:file:reference.shifted.for.crams.fasta
@SQ SN:4 LN:1001 M5:ac7d9a3274b2f1fa8eac3f01b3ff9b43 UR:file:reference.shifted.for.crams.fasta
@SQ SN:5 LN:1001 M5:468698a81368c9d42bd876a2dae5c330 UR:file:reference.shifted.for.crams.fasta
#CHROMOSOME POSITION NAME MAJOR_ALLELE MINOR_ALLELE MAF ANCHOR_SNP PANELS
1 251 rs7555566 A G 0.223794
3 251 rs17272796 C T 0.623026
4 251 rs6834736 C G 0.512884
5 251 rs2862058 A G 0.349127
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
@HD VN:1.4 GO:none SO:coordinate
@SQ SN:1 LN:249250621 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:1b22b98cdeb4a9304cb5d48026a85128 SP:Homo Sapiens
@SQ SN:2 LN:243199373 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:a0d9851da00400dec1098a9255ac712e SP:Homo Sapiens
@SQ SN:3 LN:198022430 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:fdfd811849cc2fadebc929bb925902e5 SP:Homo Sapiens
@SQ SN:4 LN:191154276 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:23dccd106897542ad87d2765d28a19a1 SP:Homo Sapiens
@SQ SN:5 LN:180915260 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:0740173db9ffd264d728f32784845cd7 SP:Homo Sapiens
#CHROMOSOME POSITION NAME MAJOR_ALLELE MINOR_ALLELE MAF ANCHOR_SNP PANELS
1 14804874 rs7555566 A G 0.223794
3 17077268 rs17272796 C T 0.623026
4 57194525 rs6834736 C G 0.512884
5 156355375 rs2862058 A G 0.349127

0 comments on commit 0495b01

Please sign in to comment.