Skip to content

Commit

Permalink
Removing unnecessary flow based argument and option (#8342)
Browse files Browse the repository at this point in the history
  • Loading branch information
ilyasoifer authored May 25, 2023
1 parent 0ff9b91 commit 947fbce
Show file tree
Hide file tree
Showing 7 changed files with 6 additions and 142 deletions.
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
package org.broadinstitute.hellbender.tools;

import org.apache.commons.lang3.ArrayUtils;
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.Hidden;
import org.broadinstitute.hellbender.cmdline.ReadFilterArgumentDefinitions;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.*;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.cmdline.ModeArgumentUtils;
import org.broadinstitute.hellbender.utils.read.FlowBasedRead;

import java.io.Serializable;

Expand All @@ -28,8 +22,6 @@ public class FlowBasedArgumentCollection implements Serializable {
public static final String PROB_SF_LONG_NAME = "flow-probability-scaling-factor";
public static final String RETAIN_MAX_N_PROBS_BASE_LONG_NAME = "flow-retain-max-n-probs-base-format";
public static final String FLOW_ORDER_CYCLE_LENGTH_LONG_NAME = "flow-order-cycle-length";
public static final String NUM_UNCERTAIN_FLOWS_LONG_NAME = "flow-number-of-uncertain-flows-to-clip";
public static final String FIRST_UNCERTAIN_FLOW_LONG_NAME = "flow-nucleotide-of-first-uncertain-flow";
public static final String FLOW_MATRIX_MODS_LONG_NAME = "flow-matrix-mods";
public static final String FLOW_KEEP_BOUNDARY_FLOWS_LONG_NAME = "keep-boundary-flows";

Expand All @@ -47,8 +39,6 @@ public class FlowBasedArgumentCollection implements Serializable {
private static final boolean DEFAULT_RETAIN_MAX_N_PROBS = false;
private static final int DEFAULT_PROB_SCALING_FACTOR = 10;
private static final int DEFAULT_FLOW_ORDER_CYCLE_LENGTH = 4;
private static final int DEFAULT_NUM_UNCERTAIN_FLOWS = 0;
private static final String DEFAULT_FIRST_UNCERTAIN_FLOW = "T";
private static final boolean DEFAULT_FLOW_USE_T0_TAG = false;

@Advanced
Expand Down Expand Up @@ -104,16 +94,6 @@ public class FlowBasedArgumentCollection implements Serializable {
@Argument(fullName = FLOW_ORDER_CYCLE_LENGTH_LONG_NAME, doc = "Length of flow order cycle", optional=true)
public int flowOrderCycleLength = DEFAULT_FLOW_ORDER_CYCLE_LENGTH;

@Advanced
@Hidden
@Argument(fullName = NUM_UNCERTAIN_FLOWS_LONG_NAME, doc = "Number of uncertain flows to trim on the 5' end of the read", optional=true)
public int flowNumUncertainFlows = DEFAULT_NUM_UNCERTAIN_FLOWS;

@Advanced
@Hidden
@Argument(fullName = FIRST_UNCERTAIN_FLOW_LONG_NAME, doc = "Nucleotide that is being read in the first uncertain (5') flow", optional=true)
public String flowFirstUncertainFlowBase = DEFAULT_FIRST_UNCERTAIN_FLOW;

@Advanced
@Argument(fullName=FLOW_MATRIX_MODS_LONG_NAME, doc="Modifications instructions to the read flow matrix. " +
"Format is src,dst{,src,dst}+. Example: 10,12,11,12 - these instructions will copy element 10 into 11 and 12", optional = true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,11 @@
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState;
import org.broadinstitute.hellbender.utils.logging.OneShotLogger;
import org.broadinstitute.hellbender.utils.pileup.PileupBasedAlleles;
import org.broadinstitute.hellbender.utils.pileup.ReadPileup;
import org.broadinstitute.hellbender.utils.read.*;
import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.hellbender.utils.read.FlowBasedRead;
import org.broadinstitute.hellbender.tools.FlowBasedArgumentCollection;

import java.io.File;
Expand Down Expand Up @@ -130,7 +128,6 @@ public static void finalizeRegion(final AssemblyRegion region,
final boolean correctOverlappingBaseQualities,
final boolean softClipLowQualityEnds,
final boolean overrideSoftclipFragmentCheck,
final FlowBasedArgumentCollection fbargs,
final boolean trackHardclippedReads) {
if ( region.isFinalized() ) {
return;
Expand All @@ -142,12 +139,9 @@ public static void finalizeRegion(final AssemblyRegion region,
final List<GATKRead> hardClippedReadsToUse = new ArrayList<>();

for (final GATKRead originalRead : region.getReads()) {
// TODO unclipping soft clips may introduce bases that aren't in the extended region if the unclipped bases
// TODO include a deletion w.r.t. the reference. We must remove kmers that occur before the reference haplotype start
GATKRead readTemp = FlowBasedReadUtils.isFlow(originalRead) ? FlowBasedRead.hardClipUncertainBases(originalRead, readsHeader, fbargs):originalRead;
readTemp = dontUseSoftClippedBases || ! ( overrideSoftclipFragmentCheck || ReadUtils.hasWellDefinedFragmentSize(readTemp)) ?
ReadClipper.hardClipSoftClippedBases(readTemp) : revertSoftClippedBases(readTemp);

GATKRead readTemp = dontUseSoftClippedBases || ! ( overrideSoftclipFragmentCheck || ReadUtils.hasWellDefinedFragmentSize(originalRead)) ?
ReadClipper.hardClipSoftClippedBases(originalRead) : revertSoftClippedBases(originalRead);

final GATKRead read = (softClipLowQualityEnds ? ReadClipper.softClipLowQualEnds(readTemp, minTailQualityToUse) :
ReadClipper.hardClipLowQualEnds(readTemp, minTailQualityToUse));
Expand Down Expand Up @@ -351,7 +345,6 @@ public static AssemblyResultSet assembleReads(final AssemblyRegion region,
correctOverlappingBaseQualities,
argumentCollection.softClipLowQualityEnds,
argumentCollection.overrideSoftclipFragmentCheck,
fbargs,
true);


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.Tuple;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
Expand Down Expand Up @@ -60,7 +59,6 @@

/**
* The core engine for the HaplotypeCaller that does all of the actual work of the tool.
*
* Usage:
* -Pass the HaplotypeCaller args into the constructor, which will initialize the HC engine completely.
* -Get the appropriate VCF or GVCF writer (depending on our arguments) from {@link #makeVCFWriter}
Expand Down Expand Up @@ -985,7 +983,6 @@ protected List<VariantContext> referenceModelForNoVariation(final AssemblyRegion
! hcArgs.doNotCorrectOverlappingBaseQualities,
hcArgs.softClipLowQualityEnds,
hcArgs.overrideSoftclipFragmentCheck,
hcArgs.fbargs,
hcArgs.pileupDetectionArgs.usePileupDetection);
}

Expand Down Expand Up @@ -1043,7 +1040,7 @@ protected Set<GATKRead> filterNonPassingReads( final AssemblyRegion activeRegion
!ReadFilterLibrary.MATE_ON_SAME_CONTIG_OR_NO_MAPPED_MATE.test(rec) ||
(hcArgs.keepRG != null && !rec.getReadGroup().equals(hcArgs.keepRG)) ) {
if (HaplotypeCallerGenotypingDebugger.isEnabled()) {
HaplotypeCallerGenotypingDebugger.println("Filtered before assembly the read: " + rec.toString());
HaplotypeCallerGenotypingDebugger.println("Filtered before assembly the read: " + rec);
}
readsToRemove.add(rec);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.util.Locatable;
import htsjdk.samtools.util.Tuple;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
Expand Down Expand Up @@ -93,7 +92,7 @@ public final class Mutect2Engine implements AssemblyRegionEvaluator, AutoCloseab
public static final int HUGE_FRAGMENT_LENGTH = 1_000_000;
public static final int MIN_TAIL_QUALITY = 9;

private M2ArgumentCollection MTAC;
final private M2ArgumentCollection MTAC;
private SAMFileHeader header;
private final int minCallableDepth;
public static final String CALLABLE_SITES_NAME = "callable";
Expand Down Expand Up @@ -588,7 +587,6 @@ private List<VariantContext> referenceModelForNoVariation(final AssemblyRegion r
false,
false,
false,
MTAC.fbargs,
MTAC.pileupDetectionArgs.usePileupDetection); //take off soft clips and low Q tails before we calculate likelihoods


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -998,76 +998,6 @@ private double[] phredToProb(final int [] kq) {
return result;
}

/**
* Hard clips uncertain flows (currently four first flows read that often generate noisy base calls
* @param inputRead GATKREad
* @param flowOrder flow order string from the SAM header
* @param fbargs arguments
* @return read with flowNumUncertainFlows trimmed
*/
public static GATKRead hardClipUncertainBases(final GATKRead inputRead, final String flowOrder,
final FlowBasedArgumentCollection fbargs ){
Utils.nonNull(fbargs);
Utils.validateArg(fbargs.flowFirstUncertainFlowBase.length()==1, "First uncertain flow base should be of length 1");
ReadClipper clipper = new ReadClipper(inputRead);
final String adjustedFlowOrder = adjustFlowOrderToUncertainFlow(flowOrder, fbargs.flowFirstUncertainFlowBase.charAt(0), fbargs.flowOrderCycleLength);
if (inputRead.isReverseStrand()) {
final int nUncertain = nUncertainBases(inputRead, adjustedFlowOrder, fbargs.flowNumUncertainFlows, false);
clipper.addOp(new ClippingOp(inputRead.getLength()-nUncertain, inputRead.getLength()-1));
}
else {
final int nUncertain = nUncertainBases(inputRead, adjustedFlowOrder, fbargs.flowNumUncertainFlows, true);
clipper.addOp(new ClippingOp(0, nUncertain-1));
}

return clipper.clipRead(ClippingRepresentation.HARDCLIP_BASES);

}

/**
* Hard clips uncertain flows (currently four first flows read that often generate noisy base calls
* @param inputRead GATKREad
* @param samHeader sam file header (to extract to flow order from
* @param fbargs arguments
* @return read with flowNumUncertainFlows trimmed
*/
public static GATKRead hardClipUncertainBases(final GATKRead inputRead, final SAMFileHeader samHeader,
final FlowBasedArgumentCollection fbargs ){
Utils.nonNull(fbargs);
String flowOrder = samHeader.getReadGroup(inputRead.getReadGroup()).getFlowOrder();
if (flowOrder==null){
throw new GATKException("Unable to trim uncertain bases without flow order information");
}
flowOrder = flowOrder.substring(0,fbargs.flowOrderCycleLength);
return hardClipUncertainBases(inputRead, flowOrder, fbargs);
}

private static String adjustFlowOrderToUncertainFlow(final String flowOrder,
final char firstUncertainFlowBase,
final int flowOrderLength){
String result = flowOrder + flowOrder;
final int adjustedStartPos = result.indexOf(firstUncertainFlowBase);
return result.substring(adjustedStartPos, adjustedStartPos+flowOrderLength);
}

//find how many bases are output from uncertain flows
private static int nUncertainBases(final GATKRead inputRead, final String flowOrder,
final int nUncertainFlows, final boolean isForward){
byte [] bases;
if (isForward){
bases = inputRead.getBases();
} else {
bases = ReadUtils.getBasesReverseComplement(inputRead).getBytes();
}

final int[] key = FlowBasedKeyCodec.baseArrayToKey(bases, flowOrder);
final int nTrimFlows = Math.min(nUncertainFlows, key.length);
int result = 0;
for (int i = 0 ; i < nTrimFlows; i++){
result += key[i];
}
return result;
}

public static void setMinimalReadLength(int minimalReadLength) {
FlowBasedRead.minimalReadLength = minimalReadLength;
Expand All @@ -1088,7 +1018,7 @@ private void readVestigialFlowMatrixFromTI(final String _flowOrder) {
flowMatrix = new double[maxHmer+1][key.length];
for (int i = 0 ; i < maxHmer+1; i++) {
for (int j = 0 ; j < key.length; j++ ){
flowMatrix[i][j] = fbargs.fillingValue;;
flowMatrix[i][j] = fbargs.fillingValue;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import org.apache.commons.lang3.tuple.Pair;
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.gatk.nativebindings.smithwaterman.SWParameters;
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.engine.AssemblyRegion;
import org.broadinstitute.hellbender.testutils.VariantContextTestUtils;
import org.broadinstitute.hellbender.utils.BaseUtils;
Expand Down Expand Up @@ -72,7 +71,7 @@ public void testfinalizeRegion() {
SampleList sampleList = SampleList.singletonSampleList("tumor");
Byte minbq = 9;
// NOTE: this test MUST be run with correctOverlappingBaseQualities enabled otherwise this test can succeed even with unsafe code
AssemblyBasedCallerUtils.finalizeRegion(activeRegion, false, false, minbq, header, sampleList, true, false, false, null, false);
AssemblyBasedCallerUtils.finalizeRegion(activeRegion, false, false, minbq, header, sampleList, true, false, false, false);

// make sure that the original reads are not changed due to finalizeRegion()
Assert.assertTrue(reads.get(0).convertToSAMRecord(header).equals(orgRead0));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,14 @@
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.broadinstitute.hellbender.tools.FlowBasedArgumentCollection;

import java.io.File;
import java.io.FileWriter;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class FlowBasedReadUnitTest extends GATKBaseTest {

Expand Down Expand Up @@ -122,16 +119,6 @@ void testBAMFormatParsingWithT0() throws Exception{
}


@Test (dataProvider = "makeReads")
public void testUncertainFlowTrimming(final GATKRead read, int nTrim, String uncertainFlowBase, final byte[] output, final int start, final int end) {
FlowBasedArgumentCollection fbargs = new FlowBasedArgumentCollection();
fbargs.flowNumUncertainFlows = nTrim;
fbargs.flowFirstUncertainFlowBase = uncertainFlowBase;
GATKRead trimmedRead = FlowBasedRead.hardClipUncertainBases(read, "TAGC", fbargs);
Assert.assertEquals(trimmedRead.getBases(), output);
Assert.assertEquals(trimmedRead.getStart(), start);
Assert.assertEquals(trimmedRead.getEnd(), end);
}

private GATKRead makeRead(final byte[] bases, final boolean isReverse) {

Expand All @@ -145,26 +132,6 @@ private GATKRead makeRead(final byte[] bases, final boolean isReverse) {
return read;
}

@DataProvider(name="makeReads")
public Object[][] makeMultipleReads(){
List<Object[]> tests = new ArrayList<>();
tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, false), 4, "T", new byte[] {'G','A'}, 104, 105});
tests.add(new Object[]{makeRead(new byte[]{'A','C','C','G','A','T'}, false), 4, "T", new byte[] {'G','A','T'}, 103, 105});
tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, true), 4, "T", new byte[] {'T','A','G','C'},100,103});
tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, false), 1, "T", new byte[] {'A','G','C','G','A'}, 101, 105});
tests.add(new Object[]{makeRead(new byte[]{'A','C','C','G','A','T'}, false), 2, "T", new byte[] {'C','C','G','A','T'}, 101, 105});
tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, true), 10, "T", new byte[] {},0,0});

tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, false), 4, "A", new byte[] {'A','G','C','G','A'}, 101, 105});
tests.add(new Object[]{makeRead(new byte[]{'A','C','C','G','A','T'}, false), 4, "A", new byte[] {'G','A','T'}, 103, 105});
tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, true), 4, "A", new byte[] {'T','A','G','C','G'},100,104});
tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, false), 1, "A", new byte[] {'T','A','G','C','G','A'}, 100, 105});
tests.add(new Object[]{makeRead(new byte[]{'A','C','C','G','A','T'}, false), 2, "A", new byte[] {'C','C','G','A','T'}, 101, 105});
tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, true), 10, "A", new byte[] {'T','A','G'},100,102});

return tests.toArray(new Object[][]{});
}

@Test
public void testFlowBasedReadConstructorEnforcesMatrix() {

Expand Down

0 comments on commit 947fbce

Please sign in to comment.