Skip to content

Commit

Permalink
Added a debug option to output a side-vcf with variants detected by a…
Browse files Browse the repository at this point in the history
…ssembly for HaplotypeCaller and Mutect (#7384)
  • Loading branch information
jamesemery committed Oct 29, 2021
1 parent b8669c9 commit 29d5649
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ public final class HaplotypeCallerEngine implements AssemblyRegionEvaluator {

// writes Haplotypes to a bam file when the -bamout option is specified
private Optional<HaplotypeBAMWriter> haplotypeBAMWriter;
// writes Variants from assembly graph
private Optional<VariantContextWriter> assembledEventMapVcfOutputWriter;
private Optional<PriorityQueue<VariantContext>> assembledEventMapVariants;

private Set<String> sampleSet;
private SampleList samplesList;
Expand Down Expand Up @@ -239,6 +242,16 @@ private void initialize(boolean createBamOutIndex, final boolean createBamOutMD5

haplotypeBAMWriter = AssemblyBasedCallerUtils.createBamWriter(hcArgs, createBamOutIndex, createBamOutMD5, readsHeader);
assemblyEngine = hcArgs.createReadThreadingAssembler();
assembledEventMapVcfOutputWriter = Optional.ofNullable(hcArgs.assemblerArgs.debugAssemblyVariantsOut != null ?
GATKVariantContextUtils.createVCFWriter(
new GATKPath(hcArgs.assemblerArgs.debugAssemblyVariantsOut).toPath(),
readsHeader.getSequenceDictionary(),
false,
Options.DO_NOT_WRITE_GENOTYPES, Options.INDEX_ON_THE_FLY)
: null);
assembledEventMapVariants = Optional.ofNullable(hcArgs.assemblerArgs.debugAssemblyVariantsOut != null ?
new PriorityQueue<>(200, new VariantContextComparator(readsHeader.getSequenceDictionary())) : null);
assembledEventMapVcfOutputWriter.ifPresent(writer -> writeHeader(writer, readsHeader.getSequenceDictionary(), new HashSet<>()));
likelihoodCalculationEngine = AssemblyBasedCallerUtils.createLikelihoodCalculationEngine(hcArgs.likelihoodArgs, !hcArgs.softClipLowQualityEnds);
}

Expand Down Expand Up @@ -571,6 +584,7 @@ public List<VariantContext> callRegion(final AssemblyRegion region, final Featur

// run the local assembler, getting back a collection of information on how we should proceed
final AssemblyResultSet untrimmedAssemblyResult = AssemblyBasedCallerUtils.assembleReads(region, givenAlleles, hcArgs, readsHeader, samplesList, logger, referenceReader, assemblyEngine, aligner, !hcArgs.doNotCorrectOverlappingBaseQualities);
ReadThreadingAssembler.addAssembledVariantsToEventMapOutput(untrimmedAssemblyResult, assembledEventMapVariants, hcArgs.maxMnpDistance, assembledEventMapVcfOutputWriter);

if (assemblyDebugOutStream != null) {
try {
Expand Down Expand Up @@ -776,6 +790,7 @@ public void shutdown() {
if ( haplotypeBAMWriter.isPresent() ) {
haplotypeBAMWriter.get().close();
}
assembledEventMapVcfOutputWriter.ifPresent(writer -> {assembledEventMapVariants.get().forEach(writer::add); writer.close();});
if ( referenceReader != null){
try {
referenceReader.close();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.Hidden;
import org.broadinstitute.hellbender.engine.spark.AssemblyRegionArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
import org.broadinstitute.hellbender.utils.MathUtils;

Expand Down Expand Up @@ -168,7 +167,9 @@ public abstract class ReadThreadingAssemblerArgumentCollection implements Serial
@Hidden
@Argument(fullName="debug-graph-transformations", doc="Write DOT formatted graph files out of the assembler for only this graph size", optional = true)
public boolean debugGraphTransformations = false;

@Hidden
@Argument(fullName = "debug-assembly-variants-out", doc="Write a VCF file containing all of the variants from the event map (pre-filter) for each active region", optional = true)
public String debugAssemblyVariantsOut = null;
/**
* This argument is meant for debugging and is not immediately useful for normal analysis use.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.gatk.nativebindings.smithwaterman.SWOverhangStrategy;
Expand Down Expand Up @@ -806,6 +808,10 @@ public void setGraphWriter(File graphOutputPath) {
this.graphOutputPath = graphOutputPath;
}

public void setEventMapOut(File graphOutputPath) {
this.graphOutputPath = graphOutputPath;
}

public byte getMinBaseQualityToUseInAssembly() {
return minBaseQualityToUseInAssembly;
}
Expand Down Expand Up @@ -875,4 +881,14 @@ public void setArtificialHaplotypeRecoveryMode(boolean disableUncoveredJunctionT
recoverHaplotypesFromEdgesNotCoveredInJunctionTrees = false;
}
}

// Debug output writer for the event map that ensures events are correctly sorted (even if there are cross region overlaps)
public static void addAssembledVariantsToEventMapOutput(final AssemblyResultSet untrimmedAssemblyResult, final Optional<PriorityQueue<VariantContext>> assembledEventMapVariants, final int maxMnpDistance, final Optional<VariantContextWriter> assembledEventMapVcfOutputWriter) {
assembledEventMapVariants.ifPresent(queue ->
untrimmedAssemblyResult.getVariationEvents(maxMnpDistance).forEach(event -> {
if (queue.size() >= 300) {
queue.stream().limit(200).forEachOrdered(vc -> assembledEventMapVcfOutputWriter.get().add(vc));
}
queue.add(event);}));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import htsjdk.samtools.util.Locatable;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextComparator;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
Expand Down Expand Up @@ -50,6 +52,7 @@
import org.broadinstitute.hellbender.utils.smithwaterman.SmithWatermanAligner;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;

import java.io.File;
import java.nio.file.Path;
Expand Down Expand Up @@ -101,6 +104,8 @@ public final class Mutect2Engine implements AssemblyRegionEvaluator {
private ReadLikelihoodCalculationEngine likelihoodCalculationEngine;
private SomaticGenotypingEngine genotypingEngine;
private Optional<HaplotypeBAMWriter> haplotypeBAMWriter;
private Optional<VariantContextWriter> assembledEventMapVcfOutputWriter;
private Optional<PriorityQueue<VariantContext>> assembledEventMapVariants;
private VariantAnnotatorEngine annotationEngine;
private final SmithWatermanAligner aligner;
private final AssemblyRegionTrimmer trimmer;
Expand Down Expand Up @@ -153,6 +158,16 @@ public Mutect2Engine(final M2ArgumentCollection MTAC, AssemblyRegionArgumentColl
referenceConfidenceModel = new SomaticReferenceConfidenceModel(samplesList, header, 0, MTAC.minAF); //TODO: do something classier with the indel size arg
final List<String> tumorSamples = ReadUtils.getSamplesFromHeader(header).stream().filter(this::isTumorSample).collect(Collectors.toList());
f1R2CountsCollector = MTAC.f1r2TarGz == null ? Optional.empty() : Optional.of(new F1R2CountsCollector(MTAC.f1r2Args, header, MTAC.f1r2TarGz, tumorSamples));
assembledEventMapVcfOutputWriter = Optional.ofNullable(MTAC.assemblerArgs.debugAssemblyVariantsOut != null ?
GATKVariantContextUtils.createVCFWriter(
new GATKPath(MTAC.assemblerArgs.debugAssemblyVariantsOut).toPath(),
header.getSequenceDictionary(),
false,
Options.DO_NOT_WRITE_GENOTYPES, Options.INDEX_ON_THE_FLY)
: null);
assembledEventMapVariants = Optional.ofNullable(MTAC.assemblerArgs.debugAssemblyVariantsOut != null ?
new PriorityQueue<>(200, new VariantContextComparator(header.getSequenceDictionary())) : null);
assembledEventMapVcfOutputWriter.ifPresent(writer -> {VCFHeader head = new VCFHeader(); head.getSequenceDictionary(); writer.writeHeader(head);});
}

//default M2 read filters. Cheap ones come first in order to fail fast.
Expand Down Expand Up @@ -231,6 +246,7 @@ public List<VariantContext> callRegion(final AssemblyRegion originalAssemblyRegi
.filter(vc -> MTAC.forceCallFiltered || vc.isNotFiltered()).collect(Collectors.toList());

final AssemblyResultSet untrimmedAssemblyResult = AssemblyBasedCallerUtils.assembleReads(originalAssemblyRegion, givenAlleles, MTAC, header, samplesList, logger, referenceReader, assemblyEngine, aligner, false);
ReadThreadingAssembler.addAssembledVariantsToEventMapOutput(untrimmedAssemblyResult, assembledEventMapVariants, MTAC.maxMnpDistance, assembledEventMapVcfOutputWriter);

final SortedSet<VariantContext> allVariationEvents = untrimmedAssemblyResult.getVariationEvents(MTAC.maxMnpDistance);
final AssemblyRegionTrimmer.Result trimmingResult = trimmer.trim(originalAssemblyRegion, allVariationEvents, referenceContext);
Expand Down Expand Up @@ -355,7 +371,8 @@ public void writeExtraOutputs(final File statsTable) {
public void shutdown() {
likelihoodCalculationEngine.close();
aligner.close();
haplotypeBAMWriter.ifPresent(writer -> writer.close());
haplotypeBAMWriter.ifPresent(HaplotypeBAMWriter::close);
assembledEventMapVcfOutputWriter.ifPresent(writer -> {assembledEventMapVariants.get().forEach(writer::add); writer.close();});
referenceReader.close();
}

Expand Down

0 comments on commit 29d5649

Please sign in to comment.