Skip to content

Commit

Permalink
Make code determine the collapsing automatically
Browse files Browse the repository at this point in the history
  • Loading branch information
ilyasoifer committed Dec 1, 2022
1 parent 5f615cc commit acd85de
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,8 @@ public SWParameters getReadToHaplotypeSWParameters() {

@Hidden
@Advanced
@Argument(fullName=FLOW_ASSEMBLY_COLLAPSE_HMER_SIZE_LONG_NAME, doc="Collapse reference regions with >Nhmer during assembly, normal value when used is 12", optional = true)
@Argument(fullName=FLOW_ASSEMBLY_COLLAPSE_HMER_SIZE_LONG_NAME, doc="Collapse reference regions with >Nhmer during assembly, normal value when used is 12, " +
"-1 means - determine automatically from mc tag in the reads, 0 - disable", optional = true)
public int flowAssemblyCollapseHKerSize = 0;

@Advanced
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import htsjdk.samtools.Cigar;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.util.Locatable;
import htsjdk.variant.variantcontext.*;
Expand Down Expand Up @@ -52,6 +53,7 @@
public final class AssemblyBasedCallerUtils {

public static final int REFERENCE_PADDING_FOR_ASSEMBLY = 500;
public static final int DETERMINE_COLLAPSE_THRESHOLD = -1;
public static final int NUM_HAPLOTYPES_TO_INJECT_FORCE_CALLING_ALLELES_INTO = 5;
public static final String SUPPORTED_ALLELES_TAG="XA";
public static final String CALLABLE_REGION_TAG = "CR";
Expand Down Expand Up @@ -363,8 +365,12 @@ public static AssemblyResultSet assembleReads(final AssemblyRegion region,
final SWParameters haplotypeToReferenceSWParameters = argumentCollection.getHaplotypeToReferenceSWParameters();

// establish reference mapper, if needed
final LongHomopolymerHaplotypeCollapsingEngine haplotypeCollapsing = (argumentCollection.flowAssemblyCollapseHKerSize > 0 && LongHomopolymerHaplotypeCollapsingEngine.needsCollapsing(refHaplotype.getBases(), argumentCollection.flowAssemblyCollapseHKerSize, logger))
? new LongHomopolymerHaplotypeCollapsingEngine(argumentCollection.flowAssemblyCollapseHKerSize, argumentCollection.flowAssemblyCollapsePartialMode, fullReferenceWithPadding,
int collapseHmerSize = argumentCollection.flowAssemblyCollapseHKerSize;
if (collapseHmerSize == DETERMINE_COLLAPSE_THRESHOLD){
collapseHmerSize = AssemblyBasedCallerUtils.determineFlowAssemblyColapseHmer(header);
}
final LongHomopolymerHaplotypeCollapsingEngine haplotypeCollapsing = ( collapseHmerSize > 0 && LongHomopolymerHaplotypeCollapsingEngine.needsCollapsing(refHaplotype.getBases(), collapseHmerSize, logger))
? new LongHomopolymerHaplotypeCollapsingEngine(collapseHmerSize, argumentCollection.flowAssemblyCollapsePartialMode, fullReferenceWithPadding,
paddedReferenceLoc, logger, argumentCollection.assemblerArgs.debugAssembly, aligner, argumentCollection.getHaplotypeToReferenceSWParameters())
: null;
if ( haplotypeCollapsing != null ) {
Expand Down Expand Up @@ -411,6 +417,18 @@ public static AssemblyResultSet assembleReads(final AssemblyRegion region,
}
}

private static int determineFlowAssemblyColapseHmer(SAMFileHeader readsHeader) {
int result = 0;
List<SAMReadGroupRecord> rgr = readsHeader.getReadGroups();
for (SAMReadGroupRecord rg : rgr) {
FlowBasedReadUtils.ReadGroupInfo rgi = new FlowBasedReadUtils.ReadGroupInfo(rg);
if (rgi.maxClass >= result) {
result = rgi.maxClass;
}
}
return result;
}

/**
* Handle pileup detected alternate alleles.
*/
Expand Down Expand Up @@ -1217,4 +1235,5 @@ private static GATKRead revertSoftClippedBases(GATKRead inputRead){
result.setAttribute(ReferenceConfidenceModel.ORIGINAL_SOFTCLIP_END_TAG, softEnd);
return result;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ public enum FlowMode {
MIN_BASE_QUALITY_SCORE_SHORT_NAME, "0",
FILTER_ALLELES, "true",
FILTER_ALLELES_SOR_THRESHOLD, "3",
FLOW_ASSEMBLY_COLLAPSE_HMER_SIZE_LONG_NAME, "20",
FLOW_ASSEMBLY_COLLAPSE_HMER_SIZE_LONG_NAME, String.valueOf(AssemblyBasedCallerUtils.DETERMINE_COLLAPSE_THRESHOLD),
OVERRIDE_FRAGMENT_SOFTCLIP_CHECK_LONG_NAME, "true",
FlowBasedAlignmentArgumentCollection.FLOW_LIKELIHOOD_PARALLEL_THREADS_LONG_NAME, "2",
FlowBasedAlignmentArgumentCollection.FLOW_LIKELIHOOD_OPTIMIZED_COMP_LONG_NAME, "true",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ static public class ReadGroupInfo {

private String reversedFlowOrder = null;

ReadGroupInfo(final SAMReadGroupRecord readGroup) {
public ReadGroupInfo(final SAMReadGroupRecord readGroup) {

Utils.nonNull(readGroup);
this.flowOrder = readGroup.getFlowOrder();
Expand Down

0 comments on commit acd85de

Please sign in to comment.