Skip to content

Commit

Permalink
gCNV vcf cleanup (#6352)
Browse files Browse the repository at this point in the history
Only called alleles as alts
Copy QS score to QUAL
  • Loading branch information
ldgauthier authored Jan 15, 2020
1 parent c4f18e2 commit ab73d32
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ VariantContext composeVariantContext(final IntegerCopyNumberSegment segment) {
int copyNumberCall = segment.getCallIntegerCopyNumberState().getCopyNumber();

final VariantContextBuilder variantContextBuilder = new VariantContextBuilder();
variantContextBuilder.alleles(ALL_ALLELES);
variantContextBuilder.chr(contig);
variantContextBuilder.start(start);
variantContextBuilder.stop(end);
Expand All @@ -155,8 +154,14 @@ VariantContext composeVariantContext(final IntegerCopyNumberSegment segment) {
genotypeBuilder.attribute(QSE, FastMath.round(segment.getQualityEnd()));
final Genotype genotype = genotypeBuilder.make();

final List<Allele> vcAlleles = new ArrayList<>(Collections.singletonList(REF_ALLELE));
if (!allele.equals(REF_ALLELE)) {
vcAlleles.add(allele);
}
variantContextBuilder.alleles(vcAlleles);
variantContextBuilder.attribute(VCFConstants.END_KEY, end);
variantContextBuilder.genotypes(genotype);
variantContextBuilder.log10PError(segment.getQualitySomeCalled()/-10.0);
return variantContextBuilder.make();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ public void testVariantComposition(final int refAutosomalCopyNumber,
Assert.assertEquals(var.getContig(), segment.getContig());
Assert.assertEquals(var.getStart(), segment.getStart());
Assert.assertEquals(var.getEnd(), segment.getEnd());
Assert.assertEquals(var.getAlleles(), GermlineCNVSegmentVariantComposer.ALL_ALLELES);

final Genotype gt = var.getGenotype(IntegerCopyNumberSegmentCollectionUnitTest.EXPECTED_SAMPLE_NAME);

Expand All @@ -62,6 +61,9 @@ public void testVariantComposition(final int refAutosomalCopyNumber,
expectedAllele = GermlineCNVSegmentVariantComposer.REF_ALLELE;
}
Assert.assertEquals(actualAllele, expectedAllele);
Assert.assertTrue(var.getAlleles().size() == (expectedAllele.equals(GermlineCNVSegmentVariantComposer.REF_ALLELE) ? 1 : 2));
Assert.assertTrue(var.getAlleles().contains(Allele.REF_N));
Assert.assertTrue(var.getAlleles().contains(expectedAllele));

/* assert correctness of quality metrics */
Assert.assertEquals(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,14 @@
##FORMAT=<ID=QSE,Number=1,Type=Integer,Description="Complementary Phred-scaled probability that the segment end position is a genuine copy-number changepoint">
##FORMAT=<ID=QSS,Number=1,Type=Integer,Description="Complementary Phred-scaled probability that the segment start position is a genuine copy-number changepoint">
##INFO=<ID=END,Number=1,Type=Integer,Description="End coordinate of the variant">
##contig=<ID=1,length=986037>
##contig=<ID=2,length=1914216>
##contig=<ID=3,length=3194368>
##contig=<ID=X,length=2139367>
##contig=<ID=Y,length=2089367>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE_000
1 68993 CNV_1_68993_986037 N <DEL>,<DUP> . . END=986037 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:32:3077:91:159
2 38716 CNV_2_38716_1914216 N <DEL>,<DUP> . . END=1914216 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:41:3077:103:102
3 238181 CNV_3_238181_3194368 N <DEL>,<DUP> . . END=3194368 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:41:3077:66:105
X 197963 CNV_X_197963_2139367 N <DEL>,<DUP> . . END=2139367 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:56:3077:133:119
Y 147963 CNV_Y_147963_2089367 N <DEL>,<DUP> . . END=2089367 GT:CN:NP:QA:QS:QSE:QSS 0:0:102:81:3077:526:1151
1 68993 CNV_1_68993_986037 N . 3076.53 . END=986037 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:32:3077:91:159
2 38716 CNV_2_38716_1914216 N . 3076.53 . END=1914216 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:41:3077:103:102
3 238181 CNV_3_238181_3194368 N . 3076.53 . END=3194368 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:41:3077:66:105
X 197963 CNV_X_197963_2139367 N . 3076.53 . END=2139367 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:56:3077:133:119
Y 147963 CNV_Y_147963_2089367 N . 3076.53 . END=2089367 GT:CN:NP:QA:QS:QSE:QSS 0:0:102:81:3077:526:1151
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,22 @@
##FORMAT=<ID=QSE,Number=1,Type=Integer,Description="Complementary Phred-scaled probability that the segment end position is a genuine copy-number changepoint">
##FORMAT=<ID=QSS,Number=1,Type=Integer,Description="Complementary Phred-scaled probability that the segment start position is a genuine copy-number changepoint">
##INFO=<ID=END,Number=1,Type=Integer,Description="End coordinate of the variant">
##contig=<ID=1,length=986037>
##contig=<ID=2,length=1914216>
##contig=<ID=3,length=3194368>
##contig=<ID=X,length=2139367>
##contig=<ID=Y,length=2089367>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE_001
1 68993 CNV_1_68993_986037 N <DEL>,<DUP> . . END=986037 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:68:3077:178:173
2 38716 CNV_2_38716_230141 N <DEL>,<DUP> . . END=230141 GT:CN:NP:QA:QS:QSE:QSS 0:2:5:125:594:184:125
2 230925 CNV_2_230925_231288 N <DEL>,<DUP> . . END=231288 GT:CN:NP:QA:QS:QSE:QSS 1:0:1:669:669:669:669
2 233003 CNV_2_233003_234369 N <DEL>,<DUP> . . END=234369 GT:CN:NP:QA:QS:QSE:QSS 2:4:2:35:49:64:35
2 242700 CNV_2_242700_1914216 N <DEL>,<DUP> . . END=1914216 GT:CN:NP:QA:QS:QSE:QSS 0:2:94:60:3077:115:60
3 238181 CNV_3_238181_3194368 N <DEL>,<DUP> . . END=3194368 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:61:3077:133:113
X 197963 CNV_X_197963_221961 N <DEL>,<DUP> . . END=221961 GT:CN:NP:QA:QS:QSE:QSS 0:1:8:137:1200:292:137
X 223929 CNV_X_223929_224644 N <DEL>,<DUP> . . END=224644 GT:CN:NP:QA:QS:QSE:QSS 2:3:2:72:134:69:105
X 227988 CNV_X_227988_228391 N <DEL>,<DUP> . . END=228391 GT:CN:NP:QA:QS:QSE:QSS 2:4:1:19:19:19:19
X 229335 CNV_X_229335_229688 N <DEL>,<DUP> . . END=229688 GT:CN:NP:QA:QS:QSE:QSS 0:1:1:57:57:57:57
X 230719 CNV_X_230719_230984 N <DEL>,<DUP> . . END=230984 GT:CN:NP:QA:QS:QSE:QSS 2:2:1:114:114:114:114
X 294570 CNV_X_294570_2139367 N <DEL>,<DUP> . . END=2139367 GT:CN:NP:QA:QS:QSE:QSS 0:1:89:115:3077:203:145
Y 147963 CNV_Y_147963_2089367 N <DEL>,<DUP> . . END=2089367 GT:CN:NP:QA:QS:QSE:QSS 0:1:102:119:3077:241:216
1 68993 CNV_1_68993_986037 N . 3076.53 . END=986037 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:68:3077:178:173
2 38716 CNV_2_38716_230141 N . 594.27 . END=230141 GT:CN:NP:QA:QS:QSE:QSS 0:2:5:125:594:184:125
2 230925 CNV_2_230925_231288 N <DEL> 669.46 . END=231288 GT:CN:NP:QA:QS:QSE:QSS 1:0:1:669:669:669:669
2 233003 CNV_2_233003_234369 N <DUP> 48.79 . END=234369 GT:CN:NP:QA:QS:QSE:QSS 1:4:2:35:49:64:35
2 242700 CNV_2_242700_1914216 N . 3076.53 . END=1914216 GT:CN:NP:QA:QS:QSE:QSS 0:2:94:60:3077:115:60
3 238181 CNV_3_238181_3194368 N . 3076.53 . END=3194368 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:61:3077:133:113
X 197963 CNV_X_197963_221961 N . 1199.68 . END=221961 GT:CN:NP:QA:QS:QSE:QSS 0:1:8:137:1200:292:137
X 223929 CNV_X_223929_224644 N <DUP> 133.65 . END=224644 GT:CN:NP:QA:QS:QSE:QSS 1:3:2:72:134:69:105
X 227988 CNV_X_227988_228391 N <DUP> 19 . END=228391 GT:CN:NP:QA:QS:QSE:QSS 1:4:1:19:19:19:19
X 229335 CNV_X_229335_229688 N . 56.74 . END=229688 GT:CN:NP:QA:QS:QSE:QSS 0:1:1:57:57:57:57
X 230719 CNV_X_230719_230984 N <DUP> 114.10 . END=230984 GT:CN:NP:QA:QS:QSE:QSS 1:2:1:114:114:114:114
X 294570 CNV_X_294570_2139367 N . 3076.53 . END=2139367 GT:CN:NP:QA:QS:QSE:QSS 0:1:89:115:3077:203:145
Y 147963 CNV_Y_147963_2089367 N . 3076.53 . END=2089367 GT:CN:NP:QA:QS:QSE:QSS 0:1:102:119:3077:241:216
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,16 @@
##FORMAT=<ID=QSE,Number=1,Type=Integer,Description="Complementary Phred-scaled probability that the segment end position is a genuine copy-number changepoint">
##FORMAT=<ID=QSS,Number=1,Type=Integer,Description="Complementary Phred-scaled probability that the segment start position is a genuine copy-number changepoint">
##INFO=<ID=END,Number=1,Type=Integer,Description="End coordinate of the variant">
##contig=<ID=1,length=986037>
##contig=<ID=2,length=1914216>
##contig=<ID=3,length=3194368>
##contig=<ID=X,length=2139367>
##contig=<ID=Y,length=2089367>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE_002
1 68993 CNV_1_68993_986037 N <DEL>,<DUP> . . END=986037 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:56:3077:163:146
2 38716 CNV_2_38716_1914216 N <DEL>,<DUP> . . END=1914216 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:30:3077:165:164
3 238181 CNV_3_238181_1414736 N <DEL>,<DUP> . . END=1414736 GT:CN:NP:QA:QS:QSE:QSS 0:2:42:54:3077:129:88
3 1415190 CNV_3_1415190_1415854 N <DEL>,<DUP> . . END=1415854 GT:CN:NP:QA:QS:QSE:QSS 1:0:2:593:1174:596:654
3 1418591 CNV_3_1418591_3194368 N <DEL>,<DUP> . . END=3194368 GT:CN:NP:QA:QS:QSE:QSS 0:2:58:74:3077:102:121
X 197963 CNV_X_197963_2139367 N <DEL>,<DUP> . . END=2139367 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:44:3077:77:151
Y 147963 CNV_Y_147963_2089367 N <DEL>,<DUP> . . END=2089367 GT:CN:NP:QA:QS:QSE:QSS 0:0:102:243:3077:902:733
1 68993 CNV_1_68993_986037 N . 3076.53 . END=986037 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:56:3077:163:146
2 38716 CNV_2_38716_1914216 N . 3076.53 . END=1914216 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:30:3077:165:164
3 238181 CNV_3_238181_1414736 N . 3076.53 . END=1414736 GT:CN:NP:QA:QS:QSE:QSS 0:2:42:54:3077:129:88
3 1415190 CNV_3_1415190_1415854 N <DEL> 1173.84 . END=1415854 GT:CN:NP:QA:QS:QSE:QSS 1:0:2:593:1174:596:654
3 1418591 CNV_3_1418591_3194368 N . 3076.53 . END=3194368 GT:CN:NP:QA:QS:QSE:QSS 0:2:58:74:3077:102:121
X 197963 CNV_X_197963_2139367 N . 3076.53 . END=2139367 GT:CN:NP:QA:QS:QSE:QSS 0:2:102:44:3077:77:151
Y 147963 CNV_Y_147963_2089367 N . 3076.53 . END=2089367 GT:CN:NP:QA:QS:QSE:QSS 0:0:102:243:3077:902:733

0 comments on commit ab73d32

Please sign in to comment.