Skip to content

Commit

Permalink
fixed an issue where it might output variants out of order across ass…
Browse files Browse the repository at this point in the history
…embly regions
  • Loading branch information
jamesemery committed Aug 2, 2021
1 parent fe90587 commit 9366975
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ public final class HaplotypeCallerEngine implements AssemblyRegionEvaluator {
private Optional<HaplotypeBAMWriter> haplotypeBAMWriter;
// writes Variants from assembly graph
private Optional<VariantContextWriter> assembledEventMapVcfOutputWriter;
private Optional<PriorityQueue<VariantContext>> assembledEventMapVariants;

private Set<String> sampleSet;
private SampleList samplesList;
Expand Down Expand Up @@ -248,6 +249,8 @@ private void initialize(boolean createBamOutIndex, final boolean createBamOutMD5
false,
Options.DO_NOT_WRITE_GENOTYPES, Options.INDEX_ON_THE_FLY)
: null);
assembledEventMapVariants = Optional.ofNullable(hcArgs.assemblerArgs.debugAssemblyVariantsOut != null ?
new PriorityQueue<>(200, new VariantContextComparator(readsHeader.getSequenceDictionary())) : null);
assembledEventMapVcfOutputWriter.ifPresent(writer -> writeHeader(writer, readsHeader.getSequenceDictionary(), new HashSet<>()));
likelihoodCalculationEngine = AssemblyBasedCallerUtils.createLikelihoodCalculationEngine(hcArgs.likelihoodArgs, !hcArgs.softClipLowQualityEnds);
}
Expand Down Expand Up @@ -581,8 +584,7 @@ public List<VariantContext> callRegion(final AssemblyRegion region, final Featur

// run the local assembler, getting back a collection of information on how we should proceed
final AssemblyResultSet untrimmedAssemblyResult = AssemblyBasedCallerUtils.assembleReads(region, givenAlleles, hcArgs, readsHeader, samplesList, logger, referenceReader, assemblyEngine, aligner, !hcArgs.doNotCorrectOverlappingBaseQualities);
assembledEventMapVcfOutputWriter.ifPresent(writer ->
untrimmedAssemblyResult.getVariationEvents(hcArgs.maxMnpDistance).forEach(writer::add));
ReadThreadingAssembler.addAssembledVariantsToEventMapOutput(untrimmedAssemblyResult, assembledEventMapVariants, hcArgs.maxMnpDistance, assembledEventMapVcfOutputWriter);

if (assemblyDebugOutStream != null) {
try {
Expand Down Expand Up @@ -787,7 +789,7 @@ public void shutdown() {
if ( haplotypeBAMWriter.isPresent() ) {
haplotypeBAMWriter.get().close();
}
assembledEventMapVcfOutputWriter.ifPresent(VariantContextWriter::close);
assembledEventMapVcfOutputWriter.ifPresent(writer -> {assembledEventMapVariants.get().forEach(writer::add); writer.close();});
if ( referenceReader != null){
try {
referenceReader.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy;
Expand Down Expand Up @@ -870,4 +872,14 @@ public void setArtificialHaplotypeRecoveryMode(boolean disableUncoveredJunctionT
recoverHaplotypesFromEdgesNotCoveredInJunctionTrees = false;
}
}

// Debug output writer for the event map that ensures events are correctly sorted (even if there are cross region overlaps)
public static void addAssembledVariantsToEventMapOutput(final AssemblyResultSet untrimmedAssemblyResult, final Optional<PriorityQueue<VariantContext>> assembledEventMapVariants, final int maxMnpDistance, final Optional<VariantContextWriter> assembledEventMapVcfOutputWriter) {
assembledEventMapVariants.ifPresent(queue ->
untrimmedAssemblyResult.getVariationEvents(maxMnpDistance).forEach(event -> {
if (queue.size() >= 300) {
queue.stream().limit(200).forEachOrdered(vc -> assembledEventMapVcfOutputWriter.get().add(vc));
}
queue.add(event);}));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
import htsjdk.samtools.util.Locatable;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextComparator;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.SortingVariantContextWriter;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
Expand Down Expand Up @@ -104,6 +106,7 @@ public final class Mutect2Engine implements AssemblyRegionEvaluator {
private SomaticGenotypingEngine genotypingEngine;
private Optional<HaplotypeBAMWriter> haplotypeBAMWriter;
private Optional<VariantContextWriter> assembledEventMapVcfOutputWriter;
private Optional<PriorityQueue<VariantContext>> assembledEventMapVariants;
private VariantAnnotatorEngine annotationEngine;
private final SmithWatermanAligner aligner;
private final AssemblyRegionTrimmer trimmer;
Expand Down Expand Up @@ -163,6 +166,8 @@ public Mutect2Engine(final M2ArgumentCollection MTAC, AssemblyRegionArgumentColl
false,
Options.DO_NOT_WRITE_GENOTYPES, Options.INDEX_ON_THE_FLY)
: null);
assembledEventMapVariants = Optional.ofNullable(MTAC.assemblerArgs.debugAssemblyVariantsOut != null ?
new PriorityQueue<>(200, new VariantContextComparator(header.getSequenceDictionary())) : null);
assembledEventMapVcfOutputWriter.ifPresent(writer -> {VCFHeader head = new VCFHeader(); head.getSequenceDictionary(); writer.writeHeader(head);});
}

Expand Down Expand Up @@ -242,8 +247,7 @@ public List<VariantContext> callRegion(final AssemblyRegion originalAssemblyRegi
.filter(vc -> MTAC.forceCallFiltered || vc.isNotFiltered()).collect(Collectors.toList());

final AssemblyResultSet untrimmedAssemblyResult = AssemblyBasedCallerUtils.assembleReads(originalAssemblyRegion, givenAlleles, MTAC, header, samplesList, logger, referenceReader, assemblyEngine, aligner, false);
assembledEventMapVcfOutputWriter.ifPresent(writer ->
untrimmedAssemblyResult.getVariationEvents(MTAC.maxMnpDistance).forEach(writer::add));
ReadThreadingAssembler.addAssembledVariantsToEventMapOutput(untrimmedAssemblyResult, assembledEventMapVariants, MTAC.maxMnpDistance, assembledEventMapVcfOutputWriter);

final SortedSet<VariantContext> allVariationEvents = untrimmedAssemblyResult.getVariationEvents(MTAC.maxMnpDistance);
final AssemblyRegionTrimmer.Result trimmingResult = trimmer.trim(originalAssemblyRegion, allVariationEvents, referenceContext);
Expand Down Expand Up @@ -369,7 +373,7 @@ public void shutdown() {
likelihoodCalculationEngine.close();
aligner.close();
haplotypeBAMWriter.ifPresent(HaplotypeBAMWriter::close);
assembledEventMapVcfOutputWriter.ifPresent(VariantContextWriter::close);
assembledEventMapVcfOutputWriter.ifPresent(writer -> {assembledEventMapVariants.get().forEach(writer::add); writer.close();});
referenceReader.close();
}

Expand Down

0 comments on commit 9366975

Please sign in to comment.