Skip to content

Commit

Permalink
Responded to review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesemery committed Mar 17, 2023
1 parent 8d3d123 commit a690509
Show file tree
Hide file tree
Showing 24 changed files with 350 additions and 13,531 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,7 @@ private AssemblyRegion loadNextAssemblyRegion() {


final ActivityProfileState profile = evaluator.isActive(pileup, pileupRefContext, pileupFeatureContext);
//TODO pull out the qual score for later
logger.debug(() -> profile.toString());
// if (profile.isActiveProb() > 0.001) System.out.println(profile.toString());
activityProfile.add(profile);

if (pendingAlignmentData!=null) {
Expand Down Expand Up @@ -186,8 +184,6 @@ private AssemblyRegion loadNextAssemblyRegion() {
fillNextAssemblyRegionWithReads(nextRegion);
// fillnextessemblyregion; check you are on correct chr; if alignment data is not in the assembly region then pop it
fillNextAssemblyRegionWithPileupData(nextRegion);
//TODO this is a hack
// fillNextAssemblyRegionWith(activityProfile.)
}

return nextRegion;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,7 @@ private static List<Allele> getAllelesNotPresentInAssembly(VariantContext givenV
ReferenceConfidenceVariantContextMerger.remapAlleles(assembledVC, longerRef));
final Set<Allele> givenAlleleSet = new HashSet<>(longerRef.length() == givenVCRefLength ? givenVC.getAlternateAlleles() :
ReferenceConfidenceVariantContextMerger.remapAlleles(givenVC, longerRef));
//TODO ADD A TEST FOR THIS CHANGE! IT WAS A RARE BUG BEFORE
//We must filter out the Ref alleles here to protect against edge cases and undexpected behavior from the pileupcalling code
unassembledGivenAlleles = givenAlleleSet.stream().filter(a -> !assembledAlleleSet.contains(a)).filter(a -> !a.isReference()).collect(Collectors.toList());
}
return unassembledGivenAlleles;
Expand Down Expand Up @@ -930,7 +930,7 @@ public static Map<Allele, List<Haplotype>> createAlleleMapper(final VariantConte
for (final Haplotype h : haplotypes) {

// Partially determined haplotypes know at what position they are determined, only determined position haps should be considered for genotyping
if (h instanceof PartiallyDeterminedHaplotype && ((PartiallyDeterminedHaplotype) h).getDeterminedPosition() != loc) {
if (h.isPartiallyDetermined() && ((PartiallyDeterminedHaplotype) h).getDeterminedPosition() != loc) {
continue;
}
final List<VariantContext> spanningEvents = h.getEventMap().getOverlappingEvents(loc);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public final class AssemblyResultSet {
.thenComparing(vc -> vc.getAlternateAllele(0));
private final Map<Integer,AssemblyResult> assemblyResultByKmerSize;
private final Set<Haplotype> haplotypes;
private Set<Haplotype> originalAssemblyHaps = new HashSet<>();
private Set<Haplotype> originalAssemblyHaps = new LinkedHashSet<>();
private final Map<Haplotype,AssemblyResult> assemblyResultByHaplotype;
private AssemblyRegion regionForGenotyping;
private byte[] fullReferenceWithPadding;
Expand Down Expand Up @@ -594,7 +594,9 @@ public void replaceAllHaplotypes(Set<Haplotype> list) {
}
}

//TODO this is REALLY BAD get rid of it for future code:
// For PDHMM use: Remove a haplotype from this AssemblyResultSet and update all of the various references in the
// object to that haplotype to be current.
// WARNING: Deleting haplotypes in this manner is highly dangerous and will likely lead to lost variants
public void removeHapltotype(final Haplotype hap) {

haplotypes.remove(hap);
Expand All @@ -604,7 +606,6 @@ public void removeHapltotype(final Haplotype hap) {
discovered.remove(hap);
assemblyResultByKmerSize.get(kmerSize).setDiscoveredHaplotypes(discovered);
}
assemblyResultByKmerSize.keySet().forEach(kmer -> assemblyResultByKmerSize.get(kmer).getDiscoveredHaplotypes());
}

public void setPartiallyDeterminedMode() {
Expand All @@ -615,8 +616,10 @@ public boolean isPartiallyDeterminedList() {
return isPartiallyDeterminedList;
}

// For PDHMM use: store original assembly haplotypes if we are injecting artificial haplotypes later
// WARNING: This is likely not set in every case, use with caution
public void storeAssemblyHaplotypes() {
originalAssemblyHaps = new HashSet<>(haplotypes);
originalAssemblyHaps = new LinkedHashSet<>(haplotypes);
}

public boolean hasOverwrittenHaps() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.*;
import org.broadinstitute.hellbender.utils.haplotype.Haplotype;
import org.broadinstitute.hellbender.utils.haplotype.PartiallyDeterminedHaplotype;
import org.broadinstitute.hellbender.utils.read.*;
import org.broadinstitute.hellbender.utils.haplotype.FlowBasedHaplotype;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@
import org.broadinstitute.hellbender.tools.walkers.annotator.Annotation;
import org.broadinstitute.hellbender.tools.walkers.annotator.HaplotypeFilteringAnnotation;
import org.broadinstitute.hellbender.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod;
import org.broadinstitute.hellbender.transformers.DRAGENMappingQualityReadTransformer;
import org.broadinstitute.hellbender.transformers.ReadTransformer;
import org.broadinstitute.hellbender.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.hellbender.cmdline.ModeArgumentUtils;

import java.util.Collection;
import java.util.List;
Expand Down Expand Up @@ -172,21 +170,21 @@ public List<ReadFilter> getDefaultReadFilters() {
*/
@Override
protected String[] customCommandLineValidation() {
if ((hcArgs.dragenMode || hcArgs.dragen378Mode) && hcArgs.flowMode != HaplotypeCallerArgumentCollection.FlowMode.NONE ) {
if ((hcArgs.isDragenGATKMode()) && hcArgs.isFlowBasedCallingMode()) {
throw new UserException("dragen mode and flow mode can't be both specified");
}

if (hcArgs.dragenMode || hcArgs.dragen378Mode) {
if (hcArgs.isDragenGATKMode()) {
final GATKReadFilterPluginDescriptor readFilterPlugin =
getCommandLineParser().getPluginDescriptor(GATKReadFilterPluginDescriptor.class);
Optional<ReadFilter> filterOptional = readFilterPlugin.getResolvedInstances().stream().filter(rf -> rf instanceof MappingQualityReadFilter).findFirst();
filterOptional.ifPresent(readFilter -> ((MappingQualityReadFilter) readFilter).minMappingQualityScore = 1);
ModeArgumentUtils.setArgValues(
getCommandLineParser(),
hcArgs.dragen378Mode? hcArgs.getDragenVersion2NameValuePairs() : hcArgs.getDragenNameValuePairs(),
hcArgs.dragen378Mode? hcArgs.getDragenVersion378NameValuePairs() : hcArgs.getDragenVersion3412NameValuePairs(),
hcArgs.dragen378Mode? HaplotypeCallerArgumentCollection.DRAGEN_378_GATK_MODE_LONG_NAME : HaplotypeCallerArgumentCollection.DRAGEN_GATK_MODE_LONG_NAME);
}
if (hcArgs.flowMode != HaplotypeCallerArgumentCollection.FlowMode.NONE) {
if (hcArgs.isFlowBasedCallingMode()) {
ModeArgumentUtils.setArgValues(
getCommandLineParser(),
hcArgs.flowMode.getNameValuePairs(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,33 +154,42 @@ protected ReadThreadingAssemblerArgumentCollection getReadThreadingAssemblerArgu
*/
@Argument(fullName = DRAGEN_GATK_MODE_LONG_NAME, optional = true, doc="Single argument for enabling the bulk of DRAGEN-GATK features. NOTE: THIS WILL OVERWRITE PROVIDED ARGUMENT CHECK TOOL INFO TO SEE WHICH ARGUMENTS ARE SET).", mutex = {DRAGEN_378_GATK_MODE_LONG_NAME})
public Boolean dragenMode = false;

/**
* DRAGEN-GATK version 2: This includes PDHMM and Columnwise detection (with hopes to add Joint Detection and new STRE as well in the future)
*/
@Advanced
@Argument(fullName = DRAGEN_378_GATK_MODE_LONG_NAME, optional = true, doc="Single argument for enabling the bulk of DRAGEN-GATK features including new developments for concordance against DRAGEN 3.7.8. NOTE: THIS WILL OVERWRITE PROVIDED ARGUMENT CHECK TOOL INFO TO SEE WHICH ARGUMENTS ARE SET).", mutex = {DRAGEN_GATK_MODE_LONG_NAME})
public Boolean dragen378Mode = false;

@Advanced
@Argument(fullName = FLOW_GATK_MODE_LONG_NAME, optional = true, doc="Single argument for enabling the bulk of Flow Based features. NOTE: THIS WILL OVERWRITE PROVIDED ARGUMENT CHECK TOOL INFO TO SEE WHICH ARGUMENTS ARE SET).")
public FlowMode flowMode = FlowMode.NONE;

@Advanced
@Argument(fullName = APPLY_BQD_LONG_NAME, doc = "If enabled this argument will apply the DRAGEN-GATK BaseQualityDropout model to the genotyping model for filtering sites due to Linked Error mode.", optional = true)
public boolean applyBQD = false;

@Advanced
@Argument(fullName = APPLY_FRD_LONG_NAME, doc = "If enabled this argument will apply the DRAGEN-GATK ForeignReadDetection model to the genotyping model for filtering sites.", optional = true)
public boolean applyFRD = false;

@Advanced
@Argument(fullName = DISABLE_SPANNING_EVENT_GENOTYPING_LONG_NAME, doc = "If enabled this argument will disable inclusion of the '*' spanning event when genotyping events that overlap deletions", optional = true)
public boolean disableSpanningEventGenotyping = false;

@Advanced
@Argument(fullName = TRANSFORM_DRAGEN_MAPPING_QUALITY_LONG_NAME, doc = "If enabled this argument will map DRAGEN aligner aligned reads with mapping quality <=250 to scale up to MQ 50", optional = true)
public boolean transformDRAGENMapQ = false;
//TODO NOTE TO THE REVIEWER, THIS ARGUMENT IS INSUFFICIENT BOTH THIS AND --minimum-mapping-quality must be set, unfortunatley

//TODO NOTE TO THE REVIEWER, THIS ARGUMENT IS INSUFFICIENT BOTH THIS AND --minimum-mapping-quality must be set, unfortunately
//TODO they can't be unified since the toolDefaultReadFilters get instantiated before this field gets populated, and we can't
//TODO pull the threshold from that filter since it might or might not exist by the time we go to filter for threading, really
//TODO we should unify on the readFilter version of this check i think but perhaps they are seperate for athropological historical reasons and it is thus culturally protected?
//TODO we should unify on the readFilter version of this check i think but perhaps they are separate for anthropological historical reasons and it is thus culturally protected?
@Advanced
@Argument(fullName = MAPPING_QUALITY_THRESHOLD_FOR_GENOTYPING_LONG_NAME, doc = "Control the threshold for discounting reads from the genotyper due to mapping quality after the active region detection and assembly steps but before genotyping. NOTE: this is in contrast to the --"+ ReadFilterArgumentDefinitions.MINIMUM_MAPPING_QUALITY_NAME+" argument which filters reads from all parts of the HaplotypeCaller. If you would like to call genotypes with a different threshold both arguments must be set.", optional = true)
public int mappingQualityThreshold = HaplotypeCallerEngine.DEFAULT_READ_QUALITY_FILTER_THRESHOLD;

@Advanced
@Argument(fullName = MAX_EFFECTIVE_DEPTH_ADJUSTMENT_FOR_FRD_LONG_NAME, doc = "Set the maximum depth to modify FRD adjustment to in the event of high depth sites (0 to disable)", optional = false)
public int maxEffectiveDepthAdjustment = 0;
Expand Down Expand Up @@ -232,7 +241,7 @@ protected ReadThreadingAssemblerArgumentCollection getReadThreadingAssemblerArgu
@Argument(fullName= USE_FILTERED_READS_FOR_ANNOTATIONS_LONG_NAME, doc = "Use the contamination-filtered read maps for the purposes of annotating variants", optional=true)
public boolean useFilteredReadMapForAnnotations = false;

public String[] getDragenNameValuePairs() {
public String[] getDragenVersion3412NameValuePairs() {
return new String[]{
APPLY_BQD_LONG_NAME, "true",
APPLY_FRD_LONG_NAME, "true",
Expand All @@ -251,7 +260,15 @@ public String[] getDragenNameValuePairs() {
};
}

public String[] getDragenVersion2NameValuePairs() {
/**
* Extra arguments required for DRAGEN-GATK to be Functionally-Equivalent to DRAGEN v3.7.8. These are a super-set
* of the arguments above required for DRAGEN v3.4.12 Functional-Equivalence.
* The notable differences in the two versions are as follows:
* - v3.7.8 is run with Pileup-Calling enabled which is equivalent to DRAGEN "Joint Detection" in function
* - v3.7.8 adds heuristics to the Pileup-Caller to filter out false-positives. These heuristics may also filter out false positives from the assembly graphs.
* - v3.7.8 is run with the PartiallyDetermined HMM (or "PDHMM") in place of the normal HMM for assigning allele likelihoods
*/
public String[] getDragenVersion378NameValuePairs() {
return new String[]{
APPLY_BQD_LONG_NAME, "true",
APPLY_FRD_LONG_NAME, "true",
Expand All @@ -273,7 +290,7 @@ public String[] getDragenVersion2NameValuePairs() {
PileupDetectionArgumentCollection.PILEUP_DETECTION_ABSOLUTE_ALT_DEPTH, "0",
PileupDetectionArgumentCollection.PILEUP_DETECTION_BAD_READ_RATIO_LONG_NAME, "0.40",
PileupDetectionArgumentCollection.PILEUP_DETECTION_ENABLE_INDELS, "true",
PileupDetectionArgumentCollection.PILEUP_DETECTION_ACTIVE_REGION_LOD_THRESHOLD_LONG_NAME, "3.0",
PileupDetectionArgumentCollection.PILEUP_DETECTION_ACTIVE_REGION_PHRED_THRESHOLD_LONG_NAME, "3.0",
PileupDetectionArgumentCollection.GENERATE_PARTIALLY_DETERMINED_HAPLOTYPES_LONG_NAME, "true"
};
}
Expand All @@ -284,6 +301,17 @@ public String[] getDragenVersion2NameValuePairs() {
@Argument(fullName = STEPWISE_FITLERING_ARGUMENT, doc = "If enabled, this will create a FlowBasedAligner to use for filtering haplotypes before using another likelihoods engine for scoring.")
public boolean stepwiseFiltering = false;

// Should we make any alterations to the pipeline for DRAGEN-GATK
boolean isDragenGATKMode() {
return dragenMode || dragen378Mode;
}

// Should we make any alterations to the pipeline for FlowBasedCalling
boolean isFlowBasedCallingMode() {
return flowMode != FlowMode.NONE;
}


/**
* the different flow modes, in terms of their parameters and their values
*
Expand Down
Loading

0 comments on commit a690509

Please sign in to comment.