Skip to content

Commit

Permalink
Fix ReblockGVCF NPE on no-calls
Browse files Browse the repository at this point in the history
Copy AS annotations too
Put RAW_MQandDP back to int tuple so GDB doesn't duck it up
Fix MQ bug, upgrade from experimental to beta
  • Loading branch information
ldgauthier committed Apr 24, 2019
1 parent bd8ad14 commit e3dadcd
Show file tree
Hide file tree
Showing 15 changed files with 605 additions and 86 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,21 @@
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMRecord;
import htsjdk.variant.variantcontext.Allele;
import org.apache.commons.lang.StringUtils;
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.*;
import org.broadinstitute.hellbender.utils.pairhmm.PairHMM;
import org.broadinstitute.hellbender.utils.read.AlignmentUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;

import java.util.ArrayList;
import java.util.List;
import javax.sound.sampled.Line;
import java.util.*;

public final class AnnotationUtils {
public static final String ALLELE_SPECIFIC_SPLIT_DELIM = "\\|"; //String.split takes a regex, so we need to escape the pipe
public static final String ALLELE_SPECIFIC_PRINT_DELIM = "|";
public static final String ALLELE_SPECIFIC_REDUCED_DELIM = ",";

private AnnotationUtils(){}

public static final String LIST_DELIMITER = ",";
Expand All @@ -39,4 +45,48 @@ public static String encodeStringList( final List<String> stringList) {
return StringUtils.join(stringList, ",");
}

/**
* Helper function to convert a List of Strings to a comma-separated String
* @param somethingList the ArrayList with String data
* @return a comma-separated String
*/
public static String encodeAnyASList( final List<?> somethingList) {
return StringUtils.join(somethingList, ALLELE_SPECIFIC_PRINT_DELIM).replaceAll("\\[|\\]", ""); //Who actually wants brackets at the ends of their string? Who???
}

/**
* Helper function to determine if an annotation is allele-specific
* @param annotation the annotation to be tested
* @return true if the annotation is expected to have values per-allele
*/
public static boolean isAlleleSpecific(final InfoFieldAnnotation annotation) {
if (annotation instanceof AS_RankSumTest) {
return true;
}
if (annotation instanceof AS_StrandBiasTest) {
return true;
}
if (annotation instanceof AS_RMSMappingQuality) {
return true;
}
if (annotation instanceof AS_StandardAnnotation) {
return true;
}
return false;
}

/**
* Handles all the Java and htsjdk parsing shenanigans
* @param rawDataString should not have surrounding brackets
* @return
*/
public static List<String> getAlleleLengthListOfString(String rawDataString) {
if (rawDataString == null) {
return Collections.emptyList();
}
if (rawDataString.startsWith("[")) {
rawDataString = rawDataString.substring(1, rawDataString.length() - 1).replaceAll("\\s", "");
}
return Arrays.asList(rawDataString.split(ALLELE_SPECIFIC_SPLIT_DELIM));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.logging.OneShotLogger;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;

import java.io.File;
Expand Down Expand Up @@ -49,7 +50,7 @@
@DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Likelihood-based test for the consanguinity among samples (InbreedingCoeff)")
public final class InbreedingCoeff extends PedigreeAnnotation implements StandardAnnotation {

private static final Logger logger = LogManager.getLogger(InbreedingCoeff.class);
private static final OneShotLogger logger = new OneShotLogger(InbreedingCoeff.class);
private static final int MIN_SAMPLES = 10;
private static final boolean ROUND_GENOTYPE_COUNTS = false;

Expand All @@ -72,13 +73,14 @@ public Map<String, Object> annotate(final ReferenceContext ref,
Utils.nonNull(vc);
final GenotypesContext genotypes = getFounderGenotypes(vc);
if (genotypes == null || genotypes.size() < MIN_SAMPLES || !vc.isVariant()) {
logger.warn("InbreedingCoeff will not be calculated; at least " + MIN_SAMPLES + " samples must have called genotypes");
return Collections.emptyMap();
}
final Pair<Integer, Double> sampleCountCoeff = calculateIC(vc, genotypes);
final int sampleCount = sampleCountCoeff.getLeft();
final double F = sampleCountCoeff.getRight();
if (sampleCount < MIN_SAMPLES) {
logger.warn("Annotation will not be calculated, must provide at least " + MIN_SAMPLES + " samples");
logger.warn("InbreedingCoeff will not be calculated for at least one position; at least " + MIN_SAMPLES + " samples must have called genotypes");
return Collections.emptyMap();
}
return Collections.singletonMap(getKeyNames().get(0), String.format("%.4f", F));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@
*/
public abstract class AS_RankSumTest extends RankSumTest implements ReducibleAnnotation {
private static final Logger logger = LogManager.getLogger(AS_RankSumTest.class);
public static final String SPLIT_DELIM = "\\|"; //String.split takes a regex, so we need to escape the pipe
public static final String PRINT_DELIM = "|";
public static final String RAW_DELIM = ",";
public static final String REDUCED_DELIM = ",";

@Override
public Map<String, Object> annotate(final ReferenceContext ref,
Expand Down Expand Up @@ -136,7 +133,7 @@ protected String makeRawAnnotationString(final List<Allele> vcAlleles, final Map
for (int i = 0; i< vcAlleles.size(); i++) {
if (!vcAlleles.get(i).isReference()) {
if (i != 0) { //strings will always start with a printDelim because we won't have values for the reference allele, but keep this for consistency with other annotations
annotationString += PRINT_DELIM;
annotationString += AnnotationUtils.ALLELE_SPECIFIC_PRINT_DELIM;
}
final Double alleleValue = perAlleleValues.get(vcAlleles.get(i));
//can be null if there are no ref reads
Expand Down Expand Up @@ -253,7 +250,7 @@ protected void parseRawDataString(final ReducibleAnnotationData<Histogram> myDat
}
//TODO handle misformatted annotation field more gracefully
//rawDataPerAllele is a per-sample list of the rank sum statistic for each allele
final String[] rawDataPerAllele = rawDataNoBrackets.split(SPLIT_DELIM);
final String[] rawDataPerAllele = rawDataNoBrackets.split(AnnotationUtils.ALLELE_SPECIFIC_SPLIT_DELIM);
for (int i=0; i<rawDataPerAllele.length; i++) {
final String alleleData = rawDataPerAllele[i];
final Histogram alleleList = perAlleleValues.get(myData.getAlleles().get(i));
Expand Down Expand Up @@ -289,7 +286,7 @@ private String makeReducedAnnotationString(final VariantContext vc, final Map<Al
String annotationString = "";
for (final Allele a : vc.getAlternateAlleles()) {
if (!annotationString.isEmpty()) {
annotationString += REDUCED_DELIM;
annotationString += AnnotationUtils.ALLELE_SPECIFIC_REDUCED_DELIM;
}
if (!perAltRankSumResults.containsKey(a)) {
logger.warn("ERROR: VC allele not found in annotation alleles -- maybe there was trimming?");
Expand All @@ -305,7 +302,7 @@ protected String makeCombinedAnnotationString(final List<Allele> vcAlleles, fina
for (int i = 0; i< vcAlleles.size(); i++) {
if (!vcAlleles.get(i).isReference()) {
if (i != 0) { //strings will always start with a printDelim because we won't have values for the reference allele, but keep this for consistency with other annotations
annotationString += PRINT_DELIM;
annotationString += AnnotationUtils.ALLELE_SPECIFIC_PRINT_DELIM;
}
final Histogram alleleValue = perAlleleValues.get(vcAlleles.get(i));
//can be null if there are no ref reads
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils;
import org.broadinstitute.hellbender.tools.walkers.annotator.StrandBiasTest;
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
Expand Down Expand Up @@ -165,32 +166,25 @@ public Map<String, Object> finalizeRawData(final VariantContext vc, final Varia
}

protected void parseRawDataString(ReducibleAnnotationData<List<Integer>> myData) {
String rawDataString = myData.getRawData();
if (rawDataString.startsWith("[")) {
rawDataString = rawDataString.substring(1,rawDataString.length()-1);
List<String> values = AnnotationUtils.getAlleleLengthListOfString(myData.getRawData());
if (values.size() != myData.getAlleles().size()) {
throw new IllegalStateException("Number of alleles and number of allele-specific entries do not match. " +
"Allele-specific annotations should have an entry for each allele including the reference.");
}
String[] rawDataPerAllele;
String[] rawListEntriesAsStringVector;

Map<Allele, List<Integer>> perAlleleValues = new HashMap<>();
//Initialize maps
for (Allele current : myData.getAlleles()) {
perAlleleValues.put(current, new LinkedList<Integer>());
}
//rawDataPerAllele is the list of values for each allele (each of variable length)
rawDataPerAllele = rawDataString.split(SPLIT_DELIM);
for (int i=0; i<rawDataPerAllele.length; i++) {
String alleleData = rawDataPerAllele[i];
if (!alleleData.isEmpty()) {
List<Integer> alleleList = perAlleleValues.get(myData.getAlleles().get(i));
rawListEntriesAsStringVector = alleleData.split(",");
//Read counts will only ever be integers
for (String s : rawListEntriesAsStringVector) {
if (!s.isEmpty()) {
alleleList.add(Integer.parseInt(s.trim()));
}
for (int i = 0; i < values.size(); i++) {
List<Integer> perAlleleList = new ArrayList<>();
String[] rawListEntriesAsStringVector = values.get(i).split(",");
//Read counts will only ever be integers
for (String s : rawListEntriesAsStringVector) {
if (!s.isEmpty()) {
perAlleleList.add(Integer.parseInt(s.trim()));
}
}
perAlleleValues.put(myData.getAlleles().get(i), perAlleleList);
}

myData.setAttributeMap(perAlleleValues);
}

Expand Down
Loading

0 comments on commit e3dadcd

Please sign in to comment.