Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removing unnecessary argument and option #8342

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,14 +1,8 @@
package org.broadinstitute.hellbender.tools;

import org.apache.commons.lang3.ArrayUtils;
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.Hidden;
import org.broadinstitute.hellbender.cmdline.ReadFilterArgumentDefinitions;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.*;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.cmdline.ModeArgumentUtils;
import org.broadinstitute.hellbender.utils.read.FlowBasedRead;

import java.io.Serializable;

Expand All @@ -28,8 +22,6 @@ public class FlowBasedArgumentCollection implements Serializable {
public static final String PROB_SF_LONG_NAME = "flow-probability-scaling-factor";
public static final String RETAIN_MAX_N_PROBS_BASE_LONG_NAME = "flow-retain-max-n-probs-base-format";
public static final String FLOW_ORDER_CYCLE_LENGTH_LONG_NAME = "flow-order-cycle-length";
public static final String NUM_UNCERTAIN_FLOWS_LONG_NAME = "flow-number-of-uncertain-flows-to-clip";
public static final String FIRST_UNCERTAIN_FLOW_LONG_NAME = "flow-nucleotide-of-first-uncertain-flow";
public static final String FLOW_MATRIX_MODS_LONG_NAME = "flow-matrix-mods";
public static final String FLOW_KEEP_BOUNDARY_FLOWS_LONG_NAME = "keep-boundary-flows";

Expand All @@ -47,8 +39,6 @@ public class FlowBasedArgumentCollection implements Serializable {
private static final boolean DEFAULT_RETAIN_MAX_N_PROBS = false;
private static final int DEFAULT_PROB_SCALING_FACTOR = 10;
private static final int DEFAULT_FLOW_ORDER_CYCLE_LENGTH = 4;
private static final int DEFAULT_NUM_UNCERTAIN_FLOWS = 0;
private static final String DEFAULT_FIRST_UNCERTAIN_FLOW = "T";
private static final boolean DEFAULT_FLOW_USE_T0_TAG = false;

@Advanced
Expand Down Expand Up @@ -104,16 +94,6 @@ public class FlowBasedArgumentCollection implements Serializable {
@Argument(fullName = FLOW_ORDER_CYCLE_LENGTH_LONG_NAME, doc = "Length of flow order cycle", optional=true)
public int flowOrderCycleLength = DEFAULT_FLOW_ORDER_CYCLE_LENGTH;

@Advanced
@Hidden
@Argument(fullName = NUM_UNCERTAIN_FLOWS_LONG_NAME, doc = "Number of uncertain flows to trim on the 5' end of the read", optional=true)
public int flowNumUncertainFlows = DEFAULT_NUM_UNCERTAIN_FLOWS;

@Advanced
@Hidden
@Argument(fullName = FIRST_UNCERTAIN_FLOW_LONG_NAME, doc = "Nucleotide that is being read in the first uncertain (5') flow", optional=true)
public String flowFirstUncertainFlowBase = DEFAULT_FIRST_UNCERTAIN_FLOW;

@Advanced
@Argument(fullName=FLOW_MATRIX_MODS_LONG_NAME, doc="Modifications instructions to the read flow matrix. " +
"Format is src,dst{,src,dst}+. Example: 10,12,11,12 - these instructions will copy element 10 into 11 and 12", optional = true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,11 @@
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState;
import org.broadinstitute.hellbender.utils.logging.OneShotLogger;
import org.broadinstitute.hellbender.utils.pileup.PileupBasedAlleles;
import org.broadinstitute.hellbender.utils.pileup.ReadPileup;
import org.broadinstitute.hellbender.utils.read.*;
import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.hellbender.utils.read.FlowBasedRead;
import org.broadinstitute.hellbender.tools.FlowBasedArgumentCollection;

import java.io.File;
Expand Down Expand Up @@ -130,7 +128,6 @@ public static void finalizeRegion(final AssemblyRegion region,
final boolean correctOverlappingBaseQualities,
final boolean softClipLowQualityEnds,
final boolean overrideSoftclipFragmentCheck,
final FlowBasedArgumentCollection fbargs,
final boolean trackHardclippedReads) {
if ( region.isFinalized() ) {
return;
Expand All @@ -142,12 +139,9 @@ public static void finalizeRegion(final AssemblyRegion region,
final List<GATKRead> hardClippedReadsToUse = new ArrayList<>();

for (final GATKRead originalRead : region.getReads()) {
// TODO unclipping soft clips may introduce bases that aren't in the extended region if the unclipped bases
// TODO include a deletion w.r.t. the reference. We must remove kmers that occur before the reference haplotype start
GATKRead readTemp = FlowBasedReadUtils.isFlow(originalRead) ? FlowBasedRead.hardClipUncertainBases(originalRead, readsHeader, fbargs):originalRead;
readTemp = dontUseSoftClippedBases || ! ( overrideSoftclipFragmentCheck || ReadUtils.hasWellDefinedFragmentSize(readTemp)) ?
ReadClipper.hardClipSoftClippedBases(readTemp) : revertSoftClippedBases(readTemp);

GATKRead readTemp = dontUseSoftClippedBases || ! ( overrideSoftclipFragmentCheck || ReadUtils.hasWellDefinedFragmentSize(originalRead)) ?
ReadClipper.hardClipSoftClippedBases(originalRead) : revertSoftClippedBases(originalRead);

final GATKRead read = (softClipLowQualityEnds ? ReadClipper.softClipLowQualEnds(readTemp, minTailQualityToUse) :
ReadClipper.hardClipLowQualEnds(readTemp, minTailQualityToUse));
Expand Down Expand Up @@ -351,7 +345,6 @@ public static AssemblyResultSet assembleReads(final AssemblyRegion region,
correctOverlappingBaseQualities,
argumentCollection.softClipLowQualityEnds,
argumentCollection.overrideSoftclipFragmentCheck,
fbargs,
true);


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.Tuple;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
Expand Down Expand Up @@ -60,7 +59,6 @@

/**
* The core engine for the HaplotypeCaller that does all of the actual work of the tool.
*
* Usage:
* -Pass the HaplotypeCaller args into the constructor, which will initialize the HC engine completely.
* -Get the appropriate VCF or GVCF writer (depending on our arguments) from {@link #makeVCFWriter}
Expand Down Expand Up @@ -985,7 +983,6 @@ protected List<VariantContext> referenceModelForNoVariation(final AssemblyRegion
! hcArgs.doNotCorrectOverlappingBaseQualities,
hcArgs.softClipLowQualityEnds,
hcArgs.overrideSoftclipFragmentCheck,
hcArgs.fbargs,
hcArgs.pileupDetectionArgs.usePileupDetection);
}

Expand Down Expand Up @@ -1043,7 +1040,7 @@ protected Set<GATKRead> filterNonPassingReads( final AssemblyRegion activeRegion
!ReadFilterLibrary.MATE_ON_SAME_CONTIG_OR_NO_MAPPED_MATE.test(rec) ||
(hcArgs.keepRG != null && !rec.getReadGroup().equals(hcArgs.keepRG)) ) {
if (HaplotypeCallerGenotypingDebugger.isEnabled()) {
HaplotypeCallerGenotypingDebugger.println("Filtered before assembly the read: " + rec.toString());
HaplotypeCallerGenotypingDebugger.println("Filtered before assembly the read: " + rec);
}
readsToRemove.add(rec);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.util.Locatable;
import htsjdk.samtools.util.Tuple;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
Expand Down Expand Up @@ -93,7 +92,7 @@ public final class Mutect2Engine implements AssemblyRegionEvaluator, AutoCloseab
public static final int HUGE_FRAGMENT_LENGTH = 1_000_000;
public static final int MIN_TAIL_QUALITY = 9;

private M2ArgumentCollection MTAC;
final private M2ArgumentCollection MTAC;
private SAMFileHeader header;
private final int minCallableDepth;
public static final String CALLABLE_SITES_NAME = "callable";
Expand Down Expand Up @@ -588,7 +587,6 @@ private List<VariantContext> referenceModelForNoVariation(final AssemblyRegion r
false,
false,
false,
MTAC.fbargs,
MTAC.pileupDetectionArgs.usePileupDetection); //take off soft clips and low Q tails before we calculate likelihoods


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -998,76 +998,6 @@ private double[] phredToProb(final int [] kq) {
return result;
}

/**
* Hard clips uncertain flows (currently four first flows read that often generate noisy base calls
* @param inputRead GATKREad
* @param flowOrder flow order string from the SAM header
* @param fbargs arguments
* @return read with flowNumUncertainFlows trimmed
*/
public static GATKRead hardClipUncertainBases(final GATKRead inputRead, final String flowOrder,
final FlowBasedArgumentCollection fbargs ){
Utils.nonNull(fbargs);
Utils.validateArg(fbargs.flowFirstUncertainFlowBase.length()==1, "First uncertain flow base should be of length 1");
ReadClipper clipper = new ReadClipper(inputRead);
final String adjustedFlowOrder = adjustFlowOrderToUncertainFlow(flowOrder, fbargs.flowFirstUncertainFlowBase.charAt(0), fbargs.flowOrderCycleLength);
if (inputRead.isReverseStrand()) {
final int nUncertain = nUncertainBases(inputRead, adjustedFlowOrder, fbargs.flowNumUncertainFlows, false);
clipper.addOp(new ClippingOp(inputRead.getLength()-nUncertain, inputRead.getLength()-1));
}
else {
final int nUncertain = nUncertainBases(inputRead, adjustedFlowOrder, fbargs.flowNumUncertainFlows, true);
clipper.addOp(new ClippingOp(0, nUncertain-1));
}

return clipper.clipRead(ClippingRepresentation.HARDCLIP_BASES);

}

/**
* Hard clips uncertain flows (currently four first flows read that often generate noisy base calls
* @param inputRead GATKREad
* @param samHeader sam file header (to extract to flow order from
* @param fbargs arguments
* @return read with flowNumUncertainFlows trimmed
*/
public static GATKRead hardClipUncertainBases(final GATKRead inputRead, final SAMFileHeader samHeader,
final FlowBasedArgumentCollection fbargs ){
Utils.nonNull(fbargs);
String flowOrder = samHeader.getReadGroup(inputRead.getReadGroup()).getFlowOrder();
if (flowOrder==null){
throw new GATKException("Unable to trim uncertain bases without flow order information");
}
flowOrder = flowOrder.substring(0,fbargs.flowOrderCycleLength);
return hardClipUncertainBases(inputRead, flowOrder, fbargs);
}

private static String adjustFlowOrderToUncertainFlow(final String flowOrder,
final char firstUncertainFlowBase,
final int flowOrderLength){
String result = flowOrder + flowOrder;
final int adjustedStartPos = result.indexOf(firstUncertainFlowBase);
return result.substring(adjustedStartPos, adjustedStartPos+flowOrderLength);
}

//find how many bases are output from uncertain flows
private static int nUncertainBases(final GATKRead inputRead, final String flowOrder,
final int nUncertainFlows, final boolean isForward){
byte [] bases;
if (isForward){
bases = inputRead.getBases();
} else {
bases = ReadUtils.getBasesReverseComplement(inputRead).getBytes();
}

final int[] key = FlowBasedKeyCodec.baseArrayToKey(bases, flowOrder);
final int nTrimFlows = Math.min(nUncertainFlows, key.length);
int result = 0;
for (int i = 0 ; i < nTrimFlows; i++){
result += key[i];
}
return result;
}

public static void setMinimalReadLength(int minimalReadLength) {
FlowBasedRead.minimalReadLength = minimalReadLength;
Expand All @@ -1088,7 +1018,7 @@ private void readVestigialFlowMatrixFromTI(final String _flowOrder) {
flowMatrix = new double[maxHmer+1][key.length];
for (int i = 0 ; i < maxHmer+1; i++) {
for (int j = 0 ; j < key.length; j++ ){
flowMatrix[i][j] = fbargs.fillingValue;;
flowMatrix[i][j] = fbargs.fillingValue;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import org.apache.commons.lang3.tuple.Pair;
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.gatk.nativebindings.smithwaterman.SWParameters;
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.engine.AssemblyRegion;
import org.broadinstitute.hellbender.testutils.VariantContextTestUtils;
import org.broadinstitute.hellbender.utils.BaseUtils;
Expand Down Expand Up @@ -72,7 +71,7 @@ public void testfinalizeRegion() {
SampleList sampleList = SampleList.singletonSampleList("tumor");
Byte minbq = 9;
// NOTE: this test MUST be run with correctOverlappingBaseQualities enabled otherwise this test can succeed even with unsafe code
AssemblyBasedCallerUtils.finalizeRegion(activeRegion, false, false, minbq, header, sampleList, true, false, false, null, false);
AssemblyBasedCallerUtils.finalizeRegion(activeRegion, false, false, minbq, header, sampleList, true, false, false, false);

// make sure that the original reads are not changed due to finalizeRegion()
Assert.assertTrue(reads.get(0).convertToSAMRecord(header).equals(orgRead0));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,14 @@
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.broadinstitute.hellbender.tools.FlowBasedArgumentCollection;

import java.io.File;
import java.io.FileWriter;
import java.nio.file.FileSystems;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

public class FlowBasedReadUnitTest extends GATKBaseTest {

Expand Down Expand Up @@ -122,16 +119,6 @@ void testBAMFormatParsingWithT0() throws Exception{
}


@Test (dataProvider = "makeReads")
public void testUncertainFlowTrimming(final GATKRead read, int nTrim, String uncertainFlowBase, final byte[] output, final int start, final int end) {
FlowBasedArgumentCollection fbargs = new FlowBasedArgumentCollection();
fbargs.flowNumUncertainFlows = nTrim;
fbargs.flowFirstUncertainFlowBase = uncertainFlowBase;
GATKRead trimmedRead = FlowBasedRead.hardClipUncertainBases(read, "TAGC", fbargs);
Assert.assertEquals(trimmedRead.getBases(), output);
Assert.assertEquals(trimmedRead.getStart(), start);
Assert.assertEquals(trimmedRead.getEnd(), end);
}

private GATKRead makeRead(final byte[] bases, final boolean isReverse) {

Expand All @@ -145,26 +132,6 @@ private GATKRead makeRead(final byte[] bases, final boolean isReverse) {
return read;
}

@DataProvider(name="makeReads")
public Object[][] makeMultipleReads(){
List<Object[]> tests = new ArrayList<>();
tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, false), 4, "T", new byte[] {'G','A'}, 104, 105});
tests.add(new Object[]{makeRead(new byte[]{'A','C','C','G','A','T'}, false), 4, "T", new byte[] {'G','A','T'}, 103, 105});
tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, true), 4, "T", new byte[] {'T','A','G','C'},100,103});
tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, false), 1, "T", new byte[] {'A','G','C','G','A'}, 101, 105});
tests.add(new Object[]{makeRead(new byte[]{'A','C','C','G','A','T'}, false), 2, "T", new byte[] {'C','C','G','A','T'}, 101, 105});
tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, true), 10, "T", new byte[] {},0,0});

tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, false), 4, "A", new byte[] {'A','G','C','G','A'}, 101, 105});
tests.add(new Object[]{makeRead(new byte[]{'A','C','C','G','A','T'}, false), 4, "A", new byte[] {'G','A','T'}, 103, 105});
tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, true), 4, "A", new byte[] {'T','A','G','C','G'},100,104});
tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, false), 1, "A", new byte[] {'T','A','G','C','G','A'}, 100, 105});
tests.add(new Object[]{makeRead(new byte[]{'A','C','C','G','A','T'}, false), 2, "A", new byte[] {'C','C','G','A','T'}, 101, 105});
tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, true), 10, "A", new byte[] {'T','A','G'},100,102});

return tests.toArray(new Object[][]{});
}

@Test
public void testFlowBasedReadConstructorEnforcesMatrix() {

Expand Down