From f40627169798bd003c94ef226defb9cb4c0cbfa8 Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Wed, 24 May 2023 22:35:09 +0300 Subject: [PATCH] Removing unnecessary argument and option --- .../tools/FlowBasedArgumentCollection.java | 20 ------ .../AssemblyBasedCallerUtils.java | 11 +-- .../HaplotypeCallerEngine.java | 5 +- .../tools/walkers/mutect/Mutect2Engine.java | 4 +- .../hellbender/utils/read/FlowBasedRead.java | 72 +------------------ .../AssemblyBasedCallerUtilsUnitTest.java | 3 +- .../utils/read/FlowBasedReadUnitTest.java | 33 --------- 7 files changed, 6 insertions(+), 142 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/FlowBasedArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/FlowBasedArgumentCollection.java index 3f3a2d9a5bc..3fa41b46a04 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/FlowBasedArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/FlowBasedArgumentCollection.java @@ -1,14 +1,8 @@ package org.broadinstitute.hellbender.tools; -import org.apache.commons.lang3.ArrayUtils; import org.broadinstitute.barclay.argparser.Advanced; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.Hidden; -import org.broadinstitute.hellbender.cmdline.ReadFilterArgumentDefinitions; -import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.*; -import org.broadinstitute.hellbender.utils.Utils; -import org.broadinstitute.hellbender.cmdline.ModeArgumentUtils; -import org.broadinstitute.hellbender.utils.read.FlowBasedRead; import java.io.Serializable; @@ -28,8 +22,6 @@ public class FlowBasedArgumentCollection implements Serializable { public static final String PROB_SF_LONG_NAME = "flow-probability-scaling-factor"; public static final String RETAIN_MAX_N_PROBS_BASE_LONG_NAME = "flow-retain-max-n-probs-base-format"; public static final String FLOW_ORDER_CYCLE_LENGTH_LONG_NAME = "flow-order-cycle-length"; - public static final String NUM_UNCERTAIN_FLOWS_LONG_NAME = "flow-number-of-uncertain-flows-to-clip"; - public static final String FIRST_UNCERTAIN_FLOW_LONG_NAME = "flow-nucleotide-of-first-uncertain-flow"; public static final String FLOW_MATRIX_MODS_LONG_NAME = "flow-matrix-mods"; public static final String FLOW_KEEP_BOUNDARY_FLOWS_LONG_NAME = "keep-boundary-flows"; @@ -47,8 +39,6 @@ public class FlowBasedArgumentCollection implements Serializable { private static final boolean DEFAULT_RETAIN_MAX_N_PROBS = false; private static final int DEFAULT_PROB_SCALING_FACTOR = 10; private static final int DEFAULT_FLOW_ORDER_CYCLE_LENGTH = 4; - private static final int DEFAULT_NUM_UNCERTAIN_FLOWS = 0; - private static final String DEFAULT_FIRST_UNCERTAIN_FLOW = "T"; private static final boolean DEFAULT_FLOW_USE_T0_TAG = false; @Advanced @@ -104,16 +94,6 @@ public class FlowBasedArgumentCollection implements Serializable { @Argument(fullName = FLOW_ORDER_CYCLE_LENGTH_LONG_NAME, doc = "Length of flow order cycle", optional=true) public int flowOrderCycleLength = DEFAULT_FLOW_ORDER_CYCLE_LENGTH; - @Advanced - @Hidden - @Argument(fullName = NUM_UNCERTAIN_FLOWS_LONG_NAME, doc = "Number of uncertain flows to trim on the 5' end of the read", optional=true) - public int flowNumUncertainFlows = DEFAULT_NUM_UNCERTAIN_FLOWS; - - @Advanced - @Hidden - @Argument(fullName = FIRST_UNCERTAIN_FLOW_LONG_NAME, doc = "Nucleotide that is being read in the first uncertain (5') flow", optional=true) - public String flowFirstUncertainFlowBase = DEFAULT_FIRST_UNCERTAIN_FLOW; - @Advanced @Argument(fullName=FLOW_MATRIX_MODS_LONG_NAME, doc="Modifications instructions to the read flow matrix. " + "Format is src,dst{,src,dst}+. Example: 10,12,11,12 - these instructions will copy element 10 into 11 and 12", optional = true) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java index 2ff402a0bc2..5c488464bb1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java @@ -35,13 +35,11 @@ import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.locusiterator.LocusIteratorByState; import org.broadinstitute.hellbender.utils.logging.OneShotLogger; -import org.broadinstitute.hellbender.utils.pileup.PileupBasedAlleles; import org.broadinstitute.hellbender.utils.pileup.ReadPileup; import org.broadinstitute.hellbender.utils.read.*; import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner; import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; -import org.broadinstitute.hellbender.utils.read.FlowBasedRead; import org.broadinstitute.hellbender.tools.FlowBasedArgumentCollection; import java.io.File; @@ -130,7 +128,6 @@ public static void finalizeRegion(final AssemblyRegion region, final boolean correctOverlappingBaseQualities, final boolean softClipLowQualityEnds, final boolean overrideSoftclipFragmentCheck, - final FlowBasedArgumentCollection fbargs, final boolean trackHardclippedReads) { if ( region.isFinalized() ) { return; @@ -142,12 +139,9 @@ public static void finalizeRegion(final AssemblyRegion region, final List hardClippedReadsToUse = new ArrayList<>(); for (final GATKRead originalRead : region.getReads()) { - // TODO unclipping soft clips may introduce bases that aren't in the extended region if the unclipped bases // TODO include a deletion w.r.t. the reference. We must remove kmers that occur before the reference haplotype start - GATKRead readTemp = FlowBasedReadUtils.isFlow(originalRead) ? FlowBasedRead.hardClipUncertainBases(originalRead, readsHeader, fbargs):originalRead; - readTemp = dontUseSoftClippedBases || ! ( overrideSoftclipFragmentCheck || ReadUtils.hasWellDefinedFragmentSize(readTemp)) ? - ReadClipper.hardClipSoftClippedBases(readTemp) : revertSoftClippedBases(readTemp); - + GATKRead readTemp = dontUseSoftClippedBases || ! ( overrideSoftclipFragmentCheck || ReadUtils.hasWellDefinedFragmentSize(originalRead)) ? + ReadClipper.hardClipSoftClippedBases(originalRead) : revertSoftClippedBases(originalRead); final GATKRead read = (softClipLowQualityEnds ? ReadClipper.softClipLowQualEnds(readTemp, minTailQualityToUse) : ReadClipper.hardClipLowQualEnds(readTemp, minTailQualityToUse)); @@ -351,7 +345,6 @@ public static AssemblyResultSet assembleReads(final AssemblyRegion region, correctOverlappingBaseQualities, argumentCollection.softClipLowQualityEnds, argumentCollection.overrideSoftclipFragmentCheck, - fbargs, true); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java index df55b7b1477..d96fffb3875 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java @@ -4,7 +4,6 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.samtools.util.RuntimeIOException; -import htsjdk.samtools.util.Tuple; import htsjdk.variant.variantcontext.*; import htsjdk.variant.variantcontext.writer.Options; import htsjdk.variant.variantcontext.writer.VariantContextWriter; @@ -60,7 +59,6 @@ /** * The core engine for the HaplotypeCaller that does all of the actual work of the tool. - * * Usage: * -Pass the HaplotypeCaller args into the constructor, which will initialize the HC engine completely. * -Get the appropriate VCF or GVCF writer (depending on our arguments) from {@link #makeVCFWriter} @@ -985,7 +983,6 @@ protected List referenceModelForNoVariation(final AssemblyRegion ! hcArgs.doNotCorrectOverlappingBaseQualities, hcArgs.softClipLowQualityEnds, hcArgs.overrideSoftclipFragmentCheck, - hcArgs.fbargs, hcArgs.pileupDetectionArgs.usePileupDetection); } @@ -1043,7 +1040,7 @@ protected Set filterNonPassingReads( final AssemblyRegion activeRegion !ReadFilterLibrary.MATE_ON_SAME_CONTIG_OR_NO_MAPPED_MATE.test(rec) || (hcArgs.keepRG != null && !rec.getReadGroup().equals(hcArgs.keepRG)) ) { if (HaplotypeCallerGenotypingDebugger.isEnabled()) { - HaplotypeCallerGenotypingDebugger.println("Filtered before assembly the read: " + rec.toString()); + HaplotypeCallerGenotypingDebugger.println("Filtered before assembly the read: " + rec); } readsToRemove.add(rec); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java index ce13d6a8950..c71f83df315 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java @@ -2,7 +2,6 @@ import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.util.Locatable; -import htsjdk.samtools.util.Tuple; import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; @@ -93,7 +92,7 @@ public final class Mutect2Engine implements AssemblyRegionEvaluator, AutoCloseab public static final int HUGE_FRAGMENT_LENGTH = 1_000_000; public static final int MIN_TAIL_QUALITY = 9; - private M2ArgumentCollection MTAC; + final private M2ArgumentCollection MTAC; private SAMFileHeader header; private final int minCallableDepth; public static final String CALLABLE_SITES_NAME = "callable"; @@ -588,7 +587,6 @@ private List referenceModelForNoVariation(final AssemblyRegion r false, false, false, - MTAC.fbargs, MTAC.pileupDetectionArgs.usePileupDetection); //take off soft clips and low Q tails before we calculate likelihoods diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/FlowBasedRead.java b/src/main/java/org/broadinstitute/hellbender/utils/read/FlowBasedRead.java index f5030a3f655..39102aecc55 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/FlowBasedRead.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/FlowBasedRead.java @@ -998,76 +998,6 @@ private double[] phredToProb(final int [] kq) { return result; } - /** - * Hard clips uncertain flows (currently four first flows read that often generate noisy base calls - * @param inputRead GATKREad - * @param flowOrder flow order string from the SAM header - * @param fbargs arguments - * @return read with flowNumUncertainFlows trimmed - */ - public static GATKRead hardClipUncertainBases(final GATKRead inputRead, final String flowOrder, - final FlowBasedArgumentCollection fbargs ){ - Utils.nonNull(fbargs); - Utils.validateArg(fbargs.flowFirstUncertainFlowBase.length()==1, "First uncertain flow base should be of length 1"); - ReadClipper clipper = new ReadClipper(inputRead); - final String adjustedFlowOrder = adjustFlowOrderToUncertainFlow(flowOrder, fbargs.flowFirstUncertainFlowBase.charAt(0), fbargs.flowOrderCycleLength); - if (inputRead.isReverseStrand()) { - final int nUncertain = nUncertainBases(inputRead, adjustedFlowOrder, fbargs.flowNumUncertainFlows, false); - clipper.addOp(new ClippingOp(inputRead.getLength()-nUncertain, inputRead.getLength()-1)); - } - else { - final int nUncertain = nUncertainBases(inputRead, adjustedFlowOrder, fbargs.flowNumUncertainFlows, true); - clipper.addOp(new ClippingOp(0, nUncertain-1)); - } - - return clipper.clipRead(ClippingRepresentation.HARDCLIP_BASES); - - } - - /** - * Hard clips uncertain flows (currently four first flows read that often generate noisy base calls - * @param inputRead GATKREad - * @param samHeader sam file header (to extract to flow order from - * @param fbargs arguments - * @return read with flowNumUncertainFlows trimmed - */ - public static GATKRead hardClipUncertainBases(final GATKRead inputRead, final SAMFileHeader samHeader, - final FlowBasedArgumentCollection fbargs ){ - Utils.nonNull(fbargs); - String flowOrder = samHeader.getReadGroup(inputRead.getReadGroup()).getFlowOrder(); - if (flowOrder==null){ - throw new GATKException("Unable to trim uncertain bases without flow order information"); - } - flowOrder = flowOrder.substring(0,fbargs.flowOrderCycleLength); - return hardClipUncertainBases(inputRead, flowOrder, fbargs); - } - - private static String adjustFlowOrderToUncertainFlow(final String flowOrder, - final char firstUncertainFlowBase, - final int flowOrderLength){ - String result = flowOrder + flowOrder; - final int adjustedStartPos = result.indexOf(firstUncertainFlowBase); - return result.substring(adjustedStartPos, adjustedStartPos+flowOrderLength); - } - - //find how many bases are output from uncertain flows - private static int nUncertainBases(final GATKRead inputRead, final String flowOrder, - final int nUncertainFlows, final boolean isForward){ - byte [] bases; - if (isForward){ - bases = inputRead.getBases(); - } else { - bases = ReadUtils.getBasesReverseComplement(inputRead).getBytes(); - } - - final int[] key = FlowBasedKeyCodec.baseArrayToKey(bases, flowOrder); - final int nTrimFlows = Math.min(nUncertainFlows, key.length); - int result = 0; - for (int i = 0 ; i < nTrimFlows; i++){ - result += key[i]; - } - return result; - } public static void setMinimalReadLength(int minimalReadLength) { FlowBasedRead.minimalReadLength = minimalReadLength; @@ -1088,7 +1018,7 @@ private void readVestigialFlowMatrixFromTI(final String _flowOrder) { flowMatrix = new double[maxHmer+1][key.length]; for (int i = 0 ; i < maxHmer+1; i++) { for (int j = 0 ; j < key.length; j++ ){ - flowMatrix[i][j] = fbargs.fillingValue;; + flowMatrix[i][j] = fbargs.fillingValue; } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtilsUnitTest.java index 0e443bae84f..fae99f3a587 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtilsUnitTest.java @@ -8,7 +8,6 @@ import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.gatk.nativebindings.smithwaterman.SWParameters; -import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.engine.AssemblyRegion; import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.utils.BaseUtils; @@ -72,7 +71,7 @@ public void testfinalizeRegion() { SampleList sampleList = SampleList.singletonSampleList("tumor"); Byte minbq = 9; // NOTE: this test MUST be run with correctOverlappingBaseQualities enabled otherwise this test can succeed even with unsafe code - AssemblyBasedCallerUtils.finalizeRegion(activeRegion, false, false, minbq, header, sampleList, true, false, false, null, false); + AssemblyBasedCallerUtils.finalizeRegion(activeRegion, false, false, minbq, header, sampleList, true, false, false, false); // make sure that the original reads are not changed due to finalizeRegion() Assert.assertTrue(reads.get(0).convertToSAMRecord(header).equals(orgRead0)); diff --git a/src/test/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadUnitTest.java index 69503f49d2f..62fa3949ee0 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadUnitTest.java @@ -6,7 +6,6 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; import org.testng.Assert; -import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import org.broadinstitute.hellbender.tools.FlowBasedArgumentCollection; @@ -14,9 +13,7 @@ import java.io.FileWriter; import java.nio.file.FileSystems; import java.nio.file.Path; -import java.util.ArrayList; import java.util.Iterator; -import java.util.List; public class FlowBasedReadUnitTest extends GATKBaseTest { @@ -122,16 +119,6 @@ void testBAMFormatParsingWithT0() throws Exception{ } - @Test (dataProvider = "makeReads") - public void testUncertainFlowTrimming(final GATKRead read, int nTrim, String uncertainFlowBase, final byte[] output, final int start, final int end) { - FlowBasedArgumentCollection fbargs = new FlowBasedArgumentCollection(); - fbargs.flowNumUncertainFlows = nTrim; - fbargs.flowFirstUncertainFlowBase = uncertainFlowBase; - GATKRead trimmedRead = FlowBasedRead.hardClipUncertainBases(read, "TAGC", fbargs); - Assert.assertEquals(trimmedRead.getBases(), output); - Assert.assertEquals(trimmedRead.getStart(), start); - Assert.assertEquals(trimmedRead.getEnd(), end); - } private GATKRead makeRead(final byte[] bases, final boolean isReverse) { @@ -145,26 +132,6 @@ private GATKRead makeRead(final byte[] bases, final boolean isReverse) { return read; } - @DataProvider(name="makeReads") - public Object[][] makeMultipleReads(){ - List tests = new ArrayList<>(); - tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, false), 4, "T", new byte[] {'G','A'}, 104, 105}); - tests.add(new Object[]{makeRead(new byte[]{'A','C','C','G','A','T'}, false), 4, "T", new byte[] {'G','A','T'}, 103, 105}); - tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, true), 4, "T", new byte[] {'T','A','G','C'},100,103}); - tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, false), 1, "T", new byte[] {'A','G','C','G','A'}, 101, 105}); - tests.add(new Object[]{makeRead(new byte[]{'A','C','C','G','A','T'}, false), 2, "T", new byte[] {'C','C','G','A','T'}, 101, 105}); - tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, true), 10, "T", new byte[] {},0,0}); - - tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, false), 4, "A", new byte[] {'A','G','C','G','A'}, 101, 105}); - tests.add(new Object[]{makeRead(new byte[]{'A','C','C','G','A','T'}, false), 4, "A", new byte[] {'G','A','T'}, 103, 105}); - tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, true), 4, "A", new byte[] {'T','A','G','C','G'},100,104}); - tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, false), 1, "A", new byte[] {'T','A','G','C','G','A'}, 100, 105}); - tests.add(new Object[]{makeRead(new byte[]{'A','C','C','G','A','T'}, false), 2, "A", new byte[] {'C','C','G','A','T'}, 101, 105}); - tests.add(new Object[]{makeRead(new byte[]{'T','A','G','C','G','A'}, true), 10, "A", new byte[] {'T','A','G'},100,102}); - - return tests.toArray(new Object[][]{}); - } - @Test public void testFlowBasedReadConstructorEnforcesMatrix() {