diff --git a/CHANGES.md b/CHANGES.md index f843bca..bb0f70c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,7 @@ - Writing out proper SV type for Dragen CNV (#76) - Adding support for depth of coverage annotation (#73) +- Merge multiple input VCFs in annotate-svs with clustering (#75) ## v0.26 diff --git a/pom.xml b/pom.xml index ca3d41d..966976b 100644 --- a/pom.xml +++ b/pom.xml @@ -24,6 +24,8 @@ 1.4.197 2.24.1 0.41 + 1.0.0 + 0.6.1 diff --git a/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsArgs.java b/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsArgs.java index f78b82a..20658b1 100644 --- a/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsArgs.java +++ b/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsArgs.java @@ -118,6 +118,16 @@ public final class AnnotateSvsArgs { "Annotate CNV with coverage and mapping quality from maelstrom-core coverage VCF file") private List coverageVcfs = new ArrayList<>(); + @Parameter( + names = "--merge-overlap", + description = "Reciprocal overlap to require for merging (default: 0.75)") + private double mergeOverlap = 0.75; + + @Parameter( + names = "--merge-bnd-radius", + description = "Merge BNDs within the given radius (default: 50)") + private int mergeBndRadius = 50; + public String getRefseqSerPath() { return refseqSerPath; } @@ -198,6 +208,14 @@ public List getCoverageVcfs() { return coverageVcfs; } + public double getMergeOverlap() { + return mergeOverlap; + } + + public int getMergeBndRadius() { + return mergeBndRadius; + } + @Override public String toString() { return "AnnotateSvsArgs{" @@ -259,6 +277,10 @@ public String toString() { + '\'' + ", coverageVcfs=" + coverageVcfs + + ", mergeOverlap=" + + mergeOverlap + + ", mergeBndRadius=" + + mergeBndRadius + '}'; } } diff --git a/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsVcf.java b/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsVcf.java index 6d70c47..1b71316 100644 --- a/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsVcf.java +++ b/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsVcf.java @@ -7,6 +7,9 @@ import com.github.bihealth.varfish_annotator.data.GenomeVersion; import com.github.bihealth.varfish_annotator.db.DbInfoWriterHelper; import com.github.bihealth.varfish_annotator.utils.*; +import com.google.code.externalsorting.csv.CSVRecordBuffer; +import com.google.code.externalsorting.csv.CsvExternalSort; +import com.google.code.externalsorting.csv.CsvSortOptions; import com.google.common.base.Joiner; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -34,12 +37,17 @@ import htsjdk.variant.variantcontext.VariantContextBuilder; import htsjdk.variant.vcf.VCFFileReader; import java.io.*; +import java.nio.charset.Charset; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import java.sql.*; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVParser; +import org.apache.commons.csv.CSVRecord; /** Implementation of the annotate-svs command. */ public final class AnnotateSvsVcf { @@ -114,6 +122,15 @@ public void run() { } } + Path tmpDir = null; + try { + tmpDir = Files.createTempDirectory("varfish-annotator"); + } catch (IOException e) { + System.err.println("Could not create temporary directory"); + System.exit(1); + } + final Path tmpGtsPath = Paths.get(tmpDir.toString(), "tmp.gts.tsv"); + try (Connection conn = DriverManager.getConnection( "jdbc:h2:" @@ -123,11 +140,12 @@ public void run() { "sa", ""); VCFFileReader reader = new VCFFileReader(new File(args.getInputVcf())); - OutputStream gtsStream = Files.newOutputStream(Paths.get(args.getOutputGts())); OutputStream featureEffectsStream = Files.newOutputStream(Paths.get(args.getOutputFeatureEffects())); OutputStream dbInfoStream = Files.newOutputStream(Paths.get(args.getOutputDbInfos())); - Writer gtWriter = GzipUtil.maybeOpenGzipOutputStream(gtsStream, args.getOutputGts()); + OutputStream tmpGtsStream = Files.newOutputStream(tmpGtsPath); + Writer tmpGtsWriter = + GzipUtil.maybeOpenGzipOutputStream(tmpGtsStream, tmpGtsPath.toString()); Writer featureEffectsWriter = GzipUtil.maybeOpenGzipOutputStream( featureEffectsStream, args.getOutputFeatureEffects()); @@ -161,8 +179,13 @@ public void run() { refseqJvData, ensemblJvData, callerSupport, - gtWriter, + tmpGtsWriter, featureEffectsWriter); + + // Finalize genotypes and write out sorted + tmpGtsWriter.close(); + writeSortedGts(tmpGtsPath); + new DbInfoWriterHelper() .writeDbInfos(conn, dbInfoBufWriter, args.getRelease(), AnnotateVcf.class); } catch (SQLException e) { @@ -192,6 +215,52 @@ public void run() { } } + /** Finalize and write out sorted files. */ + private void writeSortedGts(Path tmpGtsPath) throws IOException { + // Configuration for sorting + final boolean hasChrom2Columns = + !args.getOptOutFeatures().contains(GtRecordBuilder.FEATURE_CHROM2_COLUMNS); + final CsvSortOptions sortOptions = + new CsvSortOptions.Builder( + new VarFishGtsTsvComparator(hasChrom2Columns), + CsvExternalSort.DEFAULTMAXTEMPFILES, + CsvExternalSort.estimateAvailableMemory()) + .charset(Charset.defaultCharset()) + .distinct(false) + .numHeader(1) + .skipHeader(false) + .format( + CSVFormat.DEFAULT + .builder() + .setDelimiter('\t') + .setIgnoreSurroundingSpaces(true) + .setQuote((Character) null) + .build()) + .build(); + + // Sort genotypes file and write final file to output. + final ArrayList gtHeader = new ArrayList<>(); + final List gtSortInBatch = + CsvExternalSort.sortInBatch(tmpGtsPath.toFile(), null, sortOptions, gtHeader); + try (OutputStream gtsStream = Files.newOutputStream(Paths.get(args.getOutputGts())); + Writer gtsWriter = GzipUtil.maybeOpenGzipOutputStream(gtsStream, args.getOutputGts()); + BufferedWriter bufWriter = new BufferedWriter(gtsWriter)) { + List bfbs = new ArrayList<>(); + for (File f : gtSortInBatch) { + InputStream in = new FileInputStream(f); + BufferedReader fbr = + new BufferedReader(new InputStreamReader(in, sortOptions.getCharset())); + CSVParser parser = new CSVParser(fbr, sortOptions.getFormat()); + CSVRecordBuffer bfb = new CSVRecordBuffer(parser); + bfbs.add(bfb); + } + + CsvExternalSort.mergeSortedFiles(bufWriter, sortOptions, bfbs, gtHeader); + } catch (ClassNotFoundException e) { + throw new RuntimeException("Problem with external file sort", e); + } + } + private void checkWriteOutBndMates() { if (!ImmutableList.of("true", "false", "auto").contains(args.getWriteBndMates())) { System.err.println("Unsupported feature in --opt-out: " + args.getWriteBndMates()); diff --git a/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr1Test.java b/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr1Test.java index 03276b5..15b51f1 100644 --- a/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr1Test.java +++ b/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr1Test.java @@ -160,9 +160,9 @@ void testWithSingleton(boolean gzipOutput) throws IOException { "release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n" + "GRCh37\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + "GRCh37\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n" + "GRCh37\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\",\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n" - + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n" - + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"; + + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"; final String expectedFeatureEffects = "case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n"; runTest( @@ -195,9 +195,9 @@ void testWithTrio(boolean gzipOutput) throws IOException { "release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n" + "GRCh37\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\"},\"\"\"gq\"\"\":127,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":11,\"\"\"pev\"\"\":4,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"gq\"\"\":57,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":19,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + "GRCh37\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t2\t1\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"gq\"\"\":24,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":12,\"\"\"pev\"\"\":1,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"gq\"\"\":39,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t3\t0\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n" + "GRCh37\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t3\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\",\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":43,\"\"\"srv\"\"\":8},\"\"\"NA12892\"\"\":{\"\"\"gq\"\"\":157,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":56,\"\"\"srv\"\"\":8}}\n" - + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n" - + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t3\t0\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n"; + + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n"; final String expectedFeatureEffects = "case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n"; runTest( @@ -322,9 +322,9 @@ void testWithSingletonOptOutChrom2() throws IOException { "release\tchromosome\tchromosome_no\tbin\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n" + "GRCh37\t1\t1\t599\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + "GRCh37\t1\t1\t604\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh37\t1\t1\t654\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n" + "GRCh37\t1\t1\t780\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\",\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n" - + "GRCh37\t14\t14\t1299\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n" - + "GRCh37\t1\t1\t654\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"; + + "GRCh37\t14\t14\t1299\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"; final String expectedFeatureEffects = "case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n"; runTest( @@ -356,9 +356,9 @@ void testWithSingletonOptOutDbCounts() throws IOException { "release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tgenotype\n" + "GRCh37\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{}\t{\"\"\"HG00102\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + "GRCh37\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{}\t{\"\"\"HG00102\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{}\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n" + "GRCh37\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{}\t{\"\"\"HG00102\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\",\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n" - + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{}\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n" - + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{}\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"; + + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{}\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"; final String expectedFeatureEffects = "case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n"; runTest( @@ -388,8 +388,8 @@ void testWithSingletonBnd() throws IOException { + "GRCh37\tthousand_genomes\tv5b.20130502\n"; final String expectedGts = "release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n" - + "GRCh37\t2\t2\t585\t1\t1\t585\t3to5\t10265\t10265\t-248\t248\t-248\t248\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.1\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":249239891,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":23,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":10,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" - + "GRCh37\t1\t1\t2486\t2\t2\t2486\t5to3\t249239891\t249239891\t-248\t248\t-248\t248\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.1\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"2\"\"\",\"\"\"pos2\"\"\":10265,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":23,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":10,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"; + + "GRCh37\t1\t1\t2486\t2\t2\t2486\t5to3\t249239891\t249239891\t-248\t248\t-248\t248\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.1\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"2\"\"\",\"\"\"pos2\"\"\":10265,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":23,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":10,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh37\t2\t2\t585\t1\t1\t585\t3to5\t10265\t10265\t-248\t248\t-248\t248\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.1\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":249239891,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":23,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":10,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"; final String expectedFeatureEffects = "case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n"; runTest( diff --git a/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf38Chr1Test.java b/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf38Chr1Test.java index 4394cef..d787499 100644 --- a/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf38Chr1Test.java +++ b/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf38Chr1Test.java @@ -119,9 +119,9 @@ void testWithSingleton(boolean gzipOutput) throws IOException { "release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n" + "GRCh38\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + "GRCh38\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh38\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n" + "GRCh38\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\",\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n" - + "GRCh38\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n" - + "GRCh38\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"; + + "GRCh38\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"; final String expectedFeatureEffects = "case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n"; runTest( @@ -148,9 +148,9 @@ void testWithTrio(boolean gzipOutput) throws IOException { "release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n" + "GRCh38\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\"},\"\"\"gq\"\"\":127,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":11,\"\"\"pev\"\"\":4,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gq\"\"\":57,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":19,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + "GRCh38\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t2\t1\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":24,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":12,\"\"\"pev\"\"\":1,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gq\"\"\":39,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n" + + "GRCh38\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t3\t0\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n" + "GRCh38\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t3\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\",\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":43,\"\"\"srv\"\"\":8},\"\"\"NA12892\"\"\":{\"\"\"gq\"\"\":157,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":56,\"\"\"srv\"\"\":8}}\n" - + "GRCh38\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n" - + "GRCh38\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t3\t0\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n"; + + "GRCh38\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n"; final String expectedFeatureEffects = "case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n"; runTest( diff --git a/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.sv.gts.tsv b/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.sv.gts.tsv index 2cbbc98..768ffa9 100644 --- a/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.sv.gts.tsv +++ b/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.sv.gts.tsv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9d197e6bcc4e1a222f2d82a619a52de0d8ce4b4da01be26a505bac0884fe0966 +oid sha256:32d08b75c80364cea7756822c78914fc8aabe1b61646066f7db82d3f2f80af32 size 2621930 diff --git a/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.delly2.Case_1_index-N1-DNA1-WGS1.gts.tsv b/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.delly2.Case_1_index-N1-DNA1-WGS1.gts.tsv index 13904ea..5bca673 100644 --- a/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.delly2.Case_1_index-N1-DNA1-WGS1.gts.tsv +++ b/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.delly2.Case_1_index-N1-DNA1-WGS1.gts.tsv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:32592801e92a602524cb86074a0f5f45a03cad1d2336a87bb02419ed946f995c +oid sha256:16c8e4c6ceb253e30a26e9b670efb4db6590f691de36a1575774b741b7277bdd size 19498392 diff --git a/varfish-annotator-core/pom.xml b/varfish-annotator-core/pom.xml index 005e100..af6dc2e 100644 --- a/varfish-annotator-core/pom.xml +++ b/varfish-annotator-core/pom.xml @@ -25,6 +25,18 @@ ${htsjdk.version} + + com.lodborg + interval-tree + ${intervaltree.version} + + + + com.google.code.externalsortinginjava + externalsortinginjava + ${externalsortinginjava.version} + + de.charite.compbio jannovar-core diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotype.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotype.java index 2f7ea56..aee98df 100644 --- a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotype.java +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotype.java @@ -5,10 +5,7 @@ import com.google.common.base.Joiner; import com.google.common.base.Objects; import com.google.common.collect.ImmutableList; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; +import java.util.*; import java.util.stream.Collectors; public final class SampleGenotype { diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/VarFishGtsTsvComparator.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/VarFishGtsTsvComparator.java new file mode 100644 index 0000000..254f365 --- /dev/null +++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/VarFishGtsTsvComparator.java @@ -0,0 +1,25 @@ +package com.github.bihealth.varfish_annotator.annotate_svs; + +import com.google.common.collect.ComparisonChain; +import java.util.Comparator; +import org.apache.commons.csv.CSVRecord; + +public class VarFishGtsTsvComparator implements Comparator { + + private boolean hasChrom2Columns; + + public VarFishGtsTsvComparator(boolean hasChrom2Columns) { + this.hasChrom2Columns = hasChrom2Columns; + } + + @Override + public int compare(CSVRecord lhs, CSVRecord rhs) { + int offset = hasChrom2Columns ? 4 : 0; + return ComparisonChain.start() + .compare(lhs.get(0), rhs.get(0)) + .compare(Integer.parseInt(lhs.get(2)), Integer.parseInt(rhs.get(2))) + .compare(Integer.parseInt(lhs.get(4 + offset)), Integer.parseInt(rhs.get(4 + offset))) + .compare(Integer.parseInt(lhs.get(5 + offset)), Integer.parseInt(rhs.get(5 + offset))) + .result(); + } +}