diff --git a/CHANGES.md b/CHANGES.md
index f843bca..bb0f70c 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -4,6 +4,7 @@
- Writing out proper SV type for Dragen CNV (#76)
- Adding support for depth of coverage annotation (#73)
+- Merge multiple input VCFs in annotate-svs with clustering (#75)
## v0.26
diff --git a/pom.xml b/pom.xml
index ca3d41d..966976b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -24,6 +24,8 @@
1.4.197
2.24.1
0.41
+ 1.0.0
+ 0.6.1
diff --git a/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsArgs.java b/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsArgs.java
index f78b82a..20658b1 100644
--- a/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsArgs.java
+++ b/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsArgs.java
@@ -118,6 +118,16 @@ public final class AnnotateSvsArgs {
"Annotate CNV with coverage and mapping quality from maelstrom-core coverage VCF file")
private List coverageVcfs = new ArrayList<>();
+ @Parameter(
+ names = "--merge-overlap",
+ description = "Reciprocal overlap to require for merging (default: 0.75)")
+ private double mergeOverlap = 0.75;
+
+ @Parameter(
+ names = "--merge-bnd-radius",
+ description = "Merge BNDs within the given radius (default: 50)")
+ private int mergeBndRadius = 50;
+
public String getRefseqSerPath() {
return refseqSerPath;
}
@@ -198,6 +208,14 @@ public List getCoverageVcfs() {
return coverageVcfs;
}
+ public double getMergeOverlap() {
+ return mergeOverlap;
+ }
+
+ public int getMergeBndRadius() {
+ return mergeBndRadius;
+ }
+
@Override
public String toString() {
return "AnnotateSvsArgs{"
@@ -259,6 +277,10 @@ public String toString() {
+ '\''
+ ", coverageVcfs="
+ coverageVcfs
+ + ", mergeOverlap="
+ + mergeOverlap
+ + ", mergeBndRadius="
+ + mergeBndRadius
+ '}';
}
}
diff --git a/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsVcf.java b/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsVcf.java
index 6d70c47..1b71316 100644
--- a/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsVcf.java
+++ b/varfish-annotator-cli/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateSvsVcf.java
@@ -7,6 +7,9 @@
import com.github.bihealth.varfish_annotator.data.GenomeVersion;
import com.github.bihealth.varfish_annotator.db.DbInfoWriterHelper;
import com.github.bihealth.varfish_annotator.utils.*;
+import com.google.code.externalsorting.csv.CSVRecordBuffer;
+import com.google.code.externalsorting.csv.CsvExternalSort;
+import com.google.code.externalsorting.csv.CsvSortOptions;
import com.google.common.base.Joiner;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
@@ -34,12 +37,17 @@
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.vcf.VCFFileReader;
import java.io.*;
+import java.nio.charset.Charset;
import java.nio.file.Files;
+import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVRecord;
/** Implementation of the annotate-svs command. */
public final class AnnotateSvsVcf {
@@ -114,6 +122,15 @@ public void run() {
}
}
+ Path tmpDir = null;
+ try {
+ tmpDir = Files.createTempDirectory("varfish-annotator");
+ } catch (IOException e) {
+ System.err.println("Could not create temporary directory");
+ System.exit(1);
+ }
+ final Path tmpGtsPath = Paths.get(tmpDir.toString(), "tmp.gts.tsv");
+
try (Connection conn =
DriverManager.getConnection(
"jdbc:h2:"
@@ -123,11 +140,12 @@ public void run() {
"sa",
"");
VCFFileReader reader = new VCFFileReader(new File(args.getInputVcf()));
- OutputStream gtsStream = Files.newOutputStream(Paths.get(args.getOutputGts()));
OutputStream featureEffectsStream =
Files.newOutputStream(Paths.get(args.getOutputFeatureEffects()));
OutputStream dbInfoStream = Files.newOutputStream(Paths.get(args.getOutputDbInfos()));
- Writer gtWriter = GzipUtil.maybeOpenGzipOutputStream(gtsStream, args.getOutputGts());
+ OutputStream tmpGtsStream = Files.newOutputStream(tmpGtsPath);
+ Writer tmpGtsWriter =
+ GzipUtil.maybeOpenGzipOutputStream(tmpGtsStream, tmpGtsPath.toString());
Writer featureEffectsWriter =
GzipUtil.maybeOpenGzipOutputStream(
featureEffectsStream, args.getOutputFeatureEffects());
@@ -161,8 +179,13 @@ public void run() {
refseqJvData,
ensemblJvData,
callerSupport,
- gtWriter,
+ tmpGtsWriter,
featureEffectsWriter);
+
+ // Finalize genotypes and write out sorted
+ tmpGtsWriter.close();
+ writeSortedGts(tmpGtsPath);
+
new DbInfoWriterHelper()
.writeDbInfos(conn, dbInfoBufWriter, args.getRelease(), AnnotateVcf.class);
} catch (SQLException e) {
@@ -192,6 +215,52 @@ public void run() {
}
}
+ /** Finalize and write out sorted files. */
+ private void writeSortedGts(Path tmpGtsPath) throws IOException {
+ // Configuration for sorting
+ final boolean hasChrom2Columns =
+ !args.getOptOutFeatures().contains(GtRecordBuilder.FEATURE_CHROM2_COLUMNS);
+ final CsvSortOptions sortOptions =
+ new CsvSortOptions.Builder(
+ new VarFishGtsTsvComparator(hasChrom2Columns),
+ CsvExternalSort.DEFAULTMAXTEMPFILES,
+ CsvExternalSort.estimateAvailableMemory())
+ .charset(Charset.defaultCharset())
+ .distinct(false)
+ .numHeader(1)
+ .skipHeader(false)
+ .format(
+ CSVFormat.DEFAULT
+ .builder()
+ .setDelimiter('\t')
+ .setIgnoreSurroundingSpaces(true)
+ .setQuote((Character) null)
+ .build())
+ .build();
+
+ // Sort genotypes file and write final file to output.
+ final ArrayList gtHeader = new ArrayList<>();
+ final List gtSortInBatch =
+ CsvExternalSort.sortInBatch(tmpGtsPath.toFile(), null, sortOptions, gtHeader);
+ try (OutputStream gtsStream = Files.newOutputStream(Paths.get(args.getOutputGts()));
+ Writer gtsWriter = GzipUtil.maybeOpenGzipOutputStream(gtsStream, args.getOutputGts());
+ BufferedWriter bufWriter = new BufferedWriter(gtsWriter)) {
+ List bfbs = new ArrayList<>();
+ for (File f : gtSortInBatch) {
+ InputStream in = new FileInputStream(f);
+ BufferedReader fbr =
+ new BufferedReader(new InputStreamReader(in, sortOptions.getCharset()));
+ CSVParser parser = new CSVParser(fbr, sortOptions.getFormat());
+ CSVRecordBuffer bfb = new CSVRecordBuffer(parser);
+ bfbs.add(bfb);
+ }
+
+ CsvExternalSort.mergeSortedFiles(bufWriter, sortOptions, bfbs, gtHeader);
+ } catch (ClassNotFoundException e) {
+ throw new RuntimeException("Problem with external file sort", e);
+ }
+ }
+
private void checkWriteOutBndMates() {
if (!ImmutableList.of("true", "false", "auto").contains(args.getWriteBndMates())) {
System.err.println("Unsupported feature in --opt-out: " + args.getWriteBndMates());
diff --git a/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr1Test.java b/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr1Test.java
index 03276b5..15b51f1 100644
--- a/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr1Test.java
+++ b/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf37Chr1Test.java
@@ -160,9 +160,9 @@ void testWithSingleton(boolean gzipOutput) throws IOException {
"release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n"
+ "GRCh37\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"
+ "GRCh37\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"
+ + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"
+ "GRCh37\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\",\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n"
- + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"
- + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n";
+ + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n";
final String expectedFeatureEffects =
"case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n";
runTest(
@@ -195,9 +195,9 @@ void testWithTrio(boolean gzipOutput) throws IOException {
"release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n"
+ "GRCh37\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\"},\"\"\"gq\"\"\":127,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":11,\"\"\"pev\"\"\":4,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"gq\"\"\":57,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":19,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"
+ "GRCh37\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t2\t1\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"gq\"\"\":24,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":12,\"\"\"pev\"\"\":1,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"gq\"\"\":39,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"
+ + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t3\t0\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n"
+ "GRCh37\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t3\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\",\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":43,\"\"\"srv\"\"\":8},\"\"\"NA12892\"\"\":{\"\"\"gq\"\"\":157,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":56,\"\"\"srv\"\"\":8}}\n"
- + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n"
- + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t3\t0\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n";
+ + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n";
final String expectedFeatureEffects =
"case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n";
runTest(
@@ -322,9 +322,9 @@ void testWithSingletonOptOutChrom2() throws IOException {
"release\tchromosome\tchromosome_no\tbin\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n"
+ "GRCh37\t1\t1\t599\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"
+ "GRCh37\t1\t1\t604\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"
+ + "GRCh37\t1\t1\t654\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"
+ "GRCh37\t1\t1\t780\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\",\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n"
- + "GRCh37\t14\t14\t1299\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"
- + "GRCh37\t1\t1\t654\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n";
+ + "GRCh37\t14\t14\t1299\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n";
final String expectedFeatureEffects =
"case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n";
runTest(
@@ -356,9 +356,9 @@ void testWithSingletonOptOutDbCounts() throws IOException {
"release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tgenotype\n"
+ "GRCh37\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{}\t{\"\"\"HG00102\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"
+ "GRCh37\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{}\t{\"\"\"HG00102\"\"\":{\"\"\"amq\"\"\":40,\"\"\"anc\"\"\":1.0,\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"
+ + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{}\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"
+ "GRCh37\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{}\t{\"\"\"HG00102\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\",\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n"
- + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{}\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"
- + "GRCh37\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{}\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n";
+ + "GRCh37\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{}\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n";
final String expectedFeatureEffects =
"case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n";
runTest(
@@ -388,8 +388,8 @@ void testWithSingletonBnd() throws IOException {
+ "GRCh37\tthousand_genomes\tv5b.20130502\n";
final String expectedGts =
"release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n"
- + "GRCh37\t2\t2\t585\t1\t1\t585\t3to5\t10265\t10265\t-248\t248\t-248\t248\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.1\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":249239891,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":23,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":10,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"
- + "GRCh37\t1\t1\t2486\t2\t2\t2486\t5to3\t249239891\t249239891\t-248\t248\t-248\t248\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.1\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"2\"\"\",\"\"\"pos2\"\"\":10265,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":23,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":10,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n";
+ + "GRCh37\t1\t1\t2486\t2\t2\t2486\t5to3\t249239891\t249239891\t-248\t248\t-248\t248\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.1\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"2\"\"\",\"\"\"pos2\"\"\":10265,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":23,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":10,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"
+ + "GRCh37\t2\t2\t585\t1\t1\t585\t3to5\t10265\t10265\t-248\t248\t-248\t248\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.1\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":249239891,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":23,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":10,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n";
final String expectedFeatureEffects =
"case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n";
runTest(
diff --git a/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf38Chr1Test.java b/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf38Chr1Test.java
index 4394cef..d787499 100644
--- a/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf38Chr1Test.java
+++ b/varfish-annotator-cli/src/test/java/com/github/bihealth/varfish_annotator/annotate_svs/AnnotateDellyVcf38Chr1Test.java
@@ -119,9 +119,9 @@ void testWithSingleton(boolean gzipOutput) throws IOException {
"release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n"
+ "GRCh38\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"
+ "GRCh38\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"
+ + "GRCh38\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"
+ "GRCh38\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\",\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7}}\n"
- + "GRCh38\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n"
- + "GRCh38\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t0\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n";
+ + "GRCh38\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t1\t0\t0\t{\"\"\"HG00102\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21}}\n";
final String expectedFeatureEffects =
"case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n";
runTest(
@@ -148,9 +148,9 @@ void testWithTrio(boolean gzipOutput) throws IOException {
"release\tchromosome\tchromosome_no\tbin\tchromosome2\tchromosome_no2\tbin2\tpe_orientation\tstart\tend\tstart_ci_left\tstart_ci_right\tend_ci_left\tend_ci_right\tcase_id\tset_id\tsv_uuid\tcaller\tsv_type\tsv_sub_type\tinfo\tnum_hom_alt\tnum_hom_ref\tnum_het\tnum_hemi_alt\tnum_hemi_ref\tgenotype\n"
+ "GRCh38\t1\t1\t599\t1\t1\t599\t3to5\t1866283\t1867170\t-56\t56\t-56\t56\t.\t.\t00000000-0000-0000-0000-000000000000\tEMBL.DELLYv0.8.5\tDEL\tDEL\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":22,\"\"\"pev\"\"\":6,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\"},\"\"\"gq\"\"\":127,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":11,\"\"\"pev\"\"\":4,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gq\"\"\":57,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":19,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"
+ "GRCh38\t1\t1\t604\t1\t1\t604\t5to3\t2583294\t2583895\t-624\t624\t-624\t624\t.\t.\t00000000-0000-0000-0000-000000000001\tEMBL.DELLYv0.8.5\tDUP\tDUP\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t2\t1\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":80,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":3,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":24,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":12,\"\"\"pev\"\"\":1,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"gq\"\"\":39,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":13,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":0,\"\"\"srv\"\"\":0}}\n"
+ + "GRCh38\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t3\t0\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n"
+ "GRCh38\t1\t1\t780\t1\t1\t780\t3to3\t25613682\t25614267\t-6\t6\t-6\t6\t.\t.\t00000000-0000-0000-0000-000000000002\tEMBL.DELLYv0.8.5\tINV\tINV\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"unaffectedCarriers\"\"\":0}\t0\t0\t3\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"ft\"\"\":{\"\"\"MIN_PE_AAF\"\"\",\"\"\"MIN_PE_COUNT\"\"\",\"\"\"MIN_SR_AAF\"\"\",\"\"\"MIN_SR_COUNT\"\"\"},\"\"\"gq\"\"\":123,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":46,\"\"\"srv\"\"\":7},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":43,\"\"\"srv\"\"\":8},\"\"\"NA12892\"\"\":{\"\"\"gq\"\"\":157,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":0,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":56,\"\"\"srv\"\"\":8}}\n"
- + "GRCh38\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n"
- + "GRCh38\t1\t1\t654\t14\t14\t654\t3to3\t9121445\t9121445\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000004\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"14\"\"\",\"\"\"pos2\"\"\":93713177,\"\"\"unaffectedCarriers\"\"\":0}\t0\t3\t0\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n";
+ + "GRCh38\t14\t14\t1299\t1\t1\t1299\t3to3\t93713177\t93713177\t-3\t3\t-3\t3\t.\t.\t00000000-0000-0000-0000-000000000003\tEMBL.DELLYv0.8.5\tBND\tBND\t{\"\"\"affectedCarriers\"\"\":0,\"\"\"backgroundCarriers\"\"\":0,\"\"\"chr2\"\"\":\"\"\"1\"\"\",\"\"\"pos2\"\"\":9121445,\"\"\"unaffectedCarriers\"\"\":0}\t0\t1\t2\t0\t0\t{\"\"\"NA12878\"\"\":{\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":50,\"\"\"pev\"\"\":17,\"\"\"src\"\"\":54,\"\"\"srv\"\"\":21},\"\"\"NA12891\"\"\":{\"\"\"gq\"\"\":93,\"\"\"gt\"\"\":\"\"\"0/0\"\"\",\"\"\"pec\"\"\":31,\"\"\"pev\"\"\":0,\"\"\"src\"\"\":31,\"\"\"srv\"\"\":0},\"\"\"NA12892\"\"\":{\"\"\"ft\"\"\":{\"\"\"MAX_PE_AAF\"\"\",\"\"\"MAX_PE_COUNT\"\"\",\"\"\"MAX_SR_AAF\"\"\",\"\"\"MAX_SR_COUNT\"\"\"},\"\"\"gq\"\"\":10000,\"\"\"gt\"\"\":\"\"\"0/1\"\"\",\"\"\"pec\"\"\":39,\"\"\"pev\"\"\":15,\"\"\"src\"\"\":37,\"\"\"srv\"\"\":18}}\n";
final String expectedFeatureEffects =
"case_id\tset_id\tsv_uuid\trefseq_gene_id\trefseq_transcript_id\trefseq_transcript_coding\trefseq_effect\tensembl_gene_id\tensembl_transcript_id\tensembl_transcript_coding\tensembl_effect\n";
runTest(
diff --git a/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.sv.gts.tsv b/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.sv.gts.tsv
index 2cbbc98..768ffa9 100644
--- a/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.sv.gts.tsv
+++ b/varfish-annotator-cli/src/test/resources/input/real-world-37/NA-12878WGS_dragen.sv.gts.tsv
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:9d197e6bcc4e1a222f2d82a619a52de0d8ce4b4da01be26a505bac0884fe0966
+oid sha256:32d08b75c80364cea7756822c78914fc8aabe1b61646066f7db82d3f2f80af32
size 2621930
diff --git a/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.delly2.Case_1_index-N1-DNA1-WGS1.gts.tsv b/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.delly2.Case_1_index-N1-DNA1-WGS1.gts.tsv
index 13904ea..5bca673 100644
--- a/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.delly2.Case_1_index-N1-DNA1-WGS1.gts.tsv
+++ b/varfish-annotator-cli/src/test/resources/input/real-world-37/bwa.delly2.Case_1_index-N1-DNA1-WGS1.gts.tsv
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:32592801e92a602524cb86074a0f5f45a03cad1d2336a87bb02419ed946f995c
+oid sha256:16c8e4c6ceb253e30a26e9b670efb4db6590f691de36a1575774b741b7277bdd
size 19498392
diff --git a/varfish-annotator-core/pom.xml b/varfish-annotator-core/pom.xml
index 005e100..af6dc2e 100644
--- a/varfish-annotator-core/pom.xml
+++ b/varfish-annotator-core/pom.xml
@@ -25,6 +25,18 @@
${htsjdk.version}
+
+ com.lodborg
+ interval-tree
+ ${intervaltree.version}
+
+
+
+ com.google.code.externalsortinginjava
+ externalsortinginjava
+ ${externalsortinginjava.version}
+
+
de.charite.compbio
jannovar-core
diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotype.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotype.java
index 2f7ea56..aee98df 100644
--- a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotype.java
+++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/SampleGenotype.java
@@ -5,10 +5,7 @@
import com.google.common.base.Joiner;
import com.google.common.base.Objects;
import com.google.common.collect.ImmutableList;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
+import java.util.*;
import java.util.stream.Collectors;
public final class SampleGenotype {
diff --git a/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/VarFishGtsTsvComparator.java b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/VarFishGtsTsvComparator.java
new file mode 100644
index 0000000..254f365
--- /dev/null
+++ b/varfish-annotator-core/src/main/java/com/github/bihealth/varfish_annotator/annotate_svs/VarFishGtsTsvComparator.java
@@ -0,0 +1,25 @@
+package com.github.bihealth.varfish_annotator.annotate_svs;
+
+import com.google.common.collect.ComparisonChain;
+import java.util.Comparator;
+import org.apache.commons.csv.CSVRecord;
+
+public class VarFishGtsTsvComparator implements Comparator {
+
+ private boolean hasChrom2Columns;
+
+ public VarFishGtsTsvComparator(boolean hasChrom2Columns) {
+ this.hasChrom2Columns = hasChrom2Columns;
+ }
+
+ @Override
+ public int compare(CSVRecord lhs, CSVRecord rhs) {
+ int offset = hasChrom2Columns ? 4 : 0;
+ return ComparisonChain.start()
+ .compare(lhs.get(0), rhs.get(0))
+ .compare(Integer.parseInt(lhs.get(2)), Integer.parseInt(rhs.get(2)))
+ .compare(Integer.parseInt(lhs.get(4 + offset)), Integer.parseInt(rhs.get(4 + offset)))
+ .compare(Integer.parseInt(lhs.get(5 + offset)), Integer.parseInt(rhs.get(5 + offset)))
+ .result();
+ }
+}