From 097ee71a60d84007784a6154e97763c359388368 Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Thu, 1 Dec 2022 22:15:46 +0200 Subject: [PATCH] Make code determine the collapsing automatically --- ...AssemblyBasedCallerArgumentCollection.java | 3 ++- .../AssemblyBasedCallerUtils.java | 23 +++++++++++++++++-- .../HaplotypeCallerArgumentCollection.java | 2 +- .../utils/read/FlowBasedReadUtils.java | 2 +- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java index 85cf6aac7d6..b9dc7357424 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerArgumentCollection.java @@ -337,7 +337,8 @@ public SWParameters getReadToHaplotypeSWParameters() { @Hidden @Advanced - @Argument(fullName=FLOW_ASSEMBLY_COLLAPSE_HMER_SIZE_LONG_NAME, doc="Collapse reference regions with >Nhmer during assembly, normal value when used is 12", optional = true) + @Argument(fullName=FLOW_ASSEMBLY_COLLAPSE_HMER_SIZE_LONG_NAME, doc="Collapse reference regions with >Nhmer during assembly, normal value when used is 12, " + + "-1 means - determine automatically from mc tag in the reads, 0 - disable", optional = true) public int flowAssemblyCollapseHKerSize = 0; @Advanced diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java index fd72d9d5890..f791f8d0c70 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerUtils.java @@ -4,6 +4,7 @@ import htsjdk.samtools.Cigar; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileWriter; +import htsjdk.samtools.SAMReadGroupRecord; import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.samtools.util.Locatable; import htsjdk.variant.variantcontext.*; @@ -52,6 +53,7 @@ public final class AssemblyBasedCallerUtils { public static final int REFERENCE_PADDING_FOR_ASSEMBLY = 500; + public static final int DETERMINE_COLLAPSE_THRESHOLD = -1; public static final int NUM_HAPLOTYPES_TO_INJECT_FORCE_CALLING_ALLELES_INTO = 5; public static final String SUPPORTED_ALLELES_TAG="XA"; public static final String CALLABLE_REGION_TAG = "CR"; @@ -363,8 +365,12 @@ public static AssemblyResultSet assembleReads(final AssemblyRegion region, final SWParameters haplotypeToReferenceSWParameters = argumentCollection.getHaplotypeToReferenceSWParameters(); // establish reference mapper, if needed - final LongHomopolymerHaplotypeCollapsingEngine haplotypeCollapsing = (argumentCollection.flowAssemblyCollapseHKerSize > 0 && LongHomopolymerHaplotypeCollapsingEngine.needsCollapsing(refHaplotype.getBases(), argumentCollection.flowAssemblyCollapseHKerSize, logger)) - ? new LongHomopolymerHaplotypeCollapsingEngine(argumentCollection.flowAssemblyCollapseHKerSize, argumentCollection.flowAssemblyCollapsePartialMode, fullReferenceWithPadding, + int collapseHmerSize = argumentCollection.flowAssemblyCollapseHKerSize; + if (collapseHmerSize == DETERMINE_COLLAPSE_THRESHOLD){ + collapseHmerSize = AssemblyBasedCallerUtils.determineFlowAssemblyColapseHmer(header); + } + final LongHomopolymerHaplotypeCollapsingEngine haplotypeCollapsing = ( collapseHmerSize > 0 && LongHomopolymerHaplotypeCollapsingEngine.needsCollapsing(refHaplotype.getBases(), collapseHmerSize, logger)) + ? new LongHomopolymerHaplotypeCollapsingEngine(collapseHmerSize, argumentCollection.flowAssemblyCollapsePartialMode, fullReferenceWithPadding, paddedReferenceLoc, logger, argumentCollection.assemblerArgs.debugAssembly, aligner, argumentCollection.getHaplotypeToReferenceSWParameters()) : null; if ( haplotypeCollapsing != null ) { @@ -411,6 +417,18 @@ public static AssemblyResultSet assembleReads(final AssemblyRegion region, } } + private static int determineFlowAssemblyColapseHmer(SAMFileHeader readsHeader) { + int result = 0; + List rgr = readsHeader.getReadGroups(); + for (SAMReadGroupRecord rg : rgr) { + FlowBasedReadUtils.ReadGroupInfo rgi = new FlowBasedReadUtils.ReadGroupInfo(rg); + if (rgi.maxClass >= result) { + result = rgi.maxClass; + } + } + return result; + } + /** * Handle pileup detected alternate alleles. */ @@ -1217,4 +1235,5 @@ private static GATKRead revertSoftClippedBases(GATKRead inputRead){ result.setAttribute(ReferenceConfidenceModel.ORIGINAL_SOFTCLIP_END_TAG, softEnd); return result; } + } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java index ca09a661a61..d3f6c794472 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerArgumentCollection.java @@ -264,7 +264,7 @@ public enum FlowMode { MIN_BASE_QUALITY_SCORE_SHORT_NAME, "0", FILTER_ALLELES, "true", FILTER_ALLELES_SOR_THRESHOLD, "3", - FLOW_ASSEMBLY_COLLAPSE_HMER_SIZE_LONG_NAME, "20", + FLOW_ASSEMBLY_COLLAPSE_HMER_SIZE_LONG_NAME, String.valueOf(AssemblyBasedCallerUtils.DETERMINE_COLLAPSE_THRESHOLD), OVERRIDE_FRAGMENT_SOFTCLIP_CHECK_LONG_NAME, "true", FlowBasedAlignmentArgumentCollection.FLOW_LIKELIHOOD_PARALLEL_THREADS_LONG_NAME, "2", FlowBasedAlignmentArgumentCollection.FLOW_LIKELIHOOD_OPTIMIZED_COMP_LONG_NAME, "true", diff --git a/src/main/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadUtils.java index de2a5b94e31..63bb6410540 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/read/FlowBasedReadUtils.java @@ -29,7 +29,7 @@ static public class ReadGroupInfo { private String reversedFlowOrder = null; - ReadGroupInfo(final SAMReadGroupRecord readGroup) { + public ReadGroupInfo(final SAMReadGroupRecord readGroup) { Utils.nonNull(readGroup); this.flowOrder = readGroup.getFlowOrder();