Skip to content

Commit

Permalink
Extracted a cache of Genotypers indexed by ploidy (#8598)
Browse files Browse the repository at this point in the history
* Introduced a new class MulitPloidyGenotyperCache which keeps track of multiple GenotypingEngines that have different ploidy values.  
* minor refactoring in related classes
* removed an unused type parameter from GenotypingEngine
  • Loading branch information
lbergelson committed Dec 8, 2023
1 parent d8e2b89 commit a40ba3e
Show file tree
Hide file tree
Showing 13 changed files with 253 additions and 146 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public class GenotypeGVCFsEngine
private VariantAnnotatorEngine annotationEngine = null;

//the genotyping engine
private GenotypingEngine<?> forceOutputGenotypingEngine = null;
private GenotypingEngine forceOutputGenotypingEngine = null;
private MinimalGenotypingEngine genotypingEngine = null;

// the INFO field annotation key names to remove
Expand Down Expand Up @@ -303,7 +303,7 @@ private VariantContext callSomaticGenotypes(final VariantContext vc, double tlod
final VariantContextBuilder builder = new VariantContextBuilder(vc);
final VariantContext regenotypedVC = builder.genotypes(newGenotypes).make();

final int maxAltAlleles = genotypingEngine.getConfiguration().genotypeArgs.maxAlternateAlleles;
final int maxAltAlleles = genotypingEngine.getGenotypeArgs().maxAlternateAlleles;
List<Allele> allelesToKeep;

//we need to make sure all alleles pass the tlodThreshold
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public GenotypeCalculationArgumentCollection clone() {
* Using this argument instructs the genotyper to annotate (in the INFO field) the number of alternate alleles that were originally discovered at the site.
*/
@Argument(fullName = "annotate-with-num-discovered-alleles", doc = "If provided, we will annotate records with the number of alternate alleles that were discovered (but not necessarily genotyped) at a given site", optional = true)
public boolean ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = false;
public boolean annotateNumberOfAllelesDiscovered = false;

/**
* The expected heterozygosity value used to compute prior probability that a locus is non-reference.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@
/**
* Base class for genotyper engines.
*/
public abstract class GenotypingEngine<Config extends StandardCallerArgumentCollection> {
public abstract class GenotypingEngine {

private final static int TOO_LONG_PL = 100000;

protected final AlleleFrequencyCalculator alleleFrequencyCalculator;

protected final Config configuration;
private final StandardCallerArgumentCollection configuration;

protected VariantAnnotatorEngine annotationEngine;

Expand Down Expand Up @@ -63,7 +63,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
*
* @throws IllegalArgumentException if any of {@code samples}, {@code configuration} is {@code null} or if {@code samples} is empty.
*/
protected GenotypingEngine(final Config configuration,
protected GenotypingEngine(final StandardCallerArgumentCollection configuration,
final SampleList samples,
final boolean doAlleleSpecificCalcs) {
this.configuration = Utils.nonNull(configuration, "the configuration cannot be null");
Expand All @@ -89,12 +89,16 @@ public void setLogger(final Logger logger) {

public Set<VCFInfoHeaderLine> getAppropriateVCFInfoHeaders() {
final Set<VCFInfoHeaderLine> headerInfo = new LinkedHashSet<>();
if ( configuration.genotypeArgs.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED ) {
if ( getGenotypeArgs().annotateNumberOfAllelesDiscovered) {
headerInfo.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.NUMBER_OF_DISCOVERED_ALLELES_KEY));
}
return headerInfo;
}

public GenotypeCalculationArgumentCollection getGenotypeArgs() {
return getConfiguration().genotypeArgs;
}

/**
* Changes the annotation engine for this genotyping-engine.
*
Expand All @@ -109,7 +113,7 @@ public void setAnnotationEngine(final VariantAnnotatorEngine annotationEngine) {
*
* @return never {@code null}.
*/
public Config getConfiguration() {
protected StandardCallerArgumentCollection getConfiguration() {
return configuration;
}

Expand All @@ -131,8 +135,8 @@ public VariantContext calculateGenotypes(final VariantContext vc, final Genotype
return null;
}

final int defaultPloidy = configuration.genotypeArgs.samplePloidy;
final int maxAltAlleles = configuration.genotypeArgs.maxAlternateAlleles;
final int defaultPloidy = getGenotypeArgs().samplePloidy;
final int maxAltAlleles = getGenotypeArgs().maxAlternateAlleles;

VariantContext reducedVC = vc;
if (maxAltAlleles < vc.getAlternateAlleles().size()) {
Expand All @@ -156,7 +160,7 @@ public VariantContext calculateGenotypes(final VariantContext vc, final Genotype

// note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
final double log10Confidence =
!outputAlternativeAlleles.siteIsMonomorphic || configuration.annotateAllSitesWithPLs
!outputAlternativeAlleles.siteIsMonomorphic || getConfiguration().annotateAllSitesWithPLs
? AFresult.log10ProbOnlyRefAlleleExists() + 0.0 : AFresult.log10ProbVariantPresent() + 0.0;

// Add 0.0 removes -0.0 occurrences.
Expand Down Expand Up @@ -188,11 +192,11 @@ && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles) && forced
// create the genotypes
//TODO: omit subsetting if output alleles is not a proper subset of vc.getAlleles
final GenotypesContext genotypes = outputAlleles.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) :
AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), outputAlleles, gpc, configuration.genotypeArgs.genotypeAssignmentMethod);
AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), outputAlleles, gpc, getGenotypeArgs().genotypeAssignmentMethod);

if (configuration.genotypeArgs.usePosteriorProbabilitiesToCalculateQual && hasPosteriors(genotypes)) {
if (getGenotypeArgs().usePosteriorProbabilitiesToCalculateQual && hasPosteriors(genotypes)) {
final double log10NoVariantPosterior = phredNoVariantPosteriorProbability(outputAlleles, genotypes) * -.1;
final double qualUpdate = !outputAlternativeAlleles.siteIsMonomorphic || configuration.annotateAllSitesWithPLs
final double qualUpdate = !outputAlternativeAlleles.siteIsMonomorphic || getConfiguration().annotateAllSitesWithPLs
? log10NoVariantPosterior + 0.0 : MathUtils.log10OneMinusPow10(log10NoVariantPosterior) + 0.0;
if (!Double.isNaN(qualUpdate)) {
builder.log10PError(qualUpdate);
Expand Down Expand Up @@ -260,30 +264,23 @@ static boolean noAllelesOrFirstAlleleIsNotNonRef(List<Allele> altAlleles) {
protected abstract String callSourceString();

/**
* Holds information about the alternative allele subsetting based on supporting evidence, genotyping and
* output modes.
*/
private static class OutputAlleleSubset {
private final List<Allele> alleles;
private final boolean siteIsMonomorphic;
private final List<Integer> mleCounts;

private OutputAlleleSubset(final List<Allele> alleles, final List<Integer> mleCounts, final boolean siteIsMonomorphic) {
* Holds information about the alternative allele subsetting based on supporting evidence, genotyping and
* output modes.
*/
private record OutputAlleleSubset(List<Allele> alleles, List<Integer> mleCounts, boolean siteIsMonomorphic) {
private OutputAlleleSubset {
Utils.nonNull(alleles, "alleles");
Utils.nonNull(mleCounts, "mleCounts");
this.siteIsMonomorphic = siteIsMonomorphic;
this.alleles = alleles;
this.mleCounts = mleCounts;
}

private List<Allele> outputAlleles(final Allele referenceAllele) {
return Stream.concat(Stream.of(referenceAllele), alleles.stream()).collect(Collectors.toList());
}
private List<Allele> outputAlleles(final Allele referenceAllele) {
return Stream.concat(Stream.of(referenceAllele), alleles.stream()).collect(Collectors.toList());
}

public List<Integer> alternativeAlleleMLECounts() {
return mleCounts;
public List<Integer> alternativeAlleleMLECounts() {
return mleCounts;
}
}
}


/**
Expand All @@ -308,7 +305,7 @@ private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult
// we want to keep the NON_REF symbolic allele but only in the absence of a non-symbolic allele, e.g.
// if we combined a ref / NON_REF gVCF with a ref / alt gVCF
final boolean isNonRefWhichIsLoneAltAllele = alternativeAlleleCount == 1 && allele.equals(Allele.NON_REF_ALLELE);
final boolean isPlausible = afCalculationResult.passesThreshold(allele, configuration.genotypeArgs.standardConfidenceForCalling);
final boolean isPlausible = afCalculationResult.passesThreshold(allele, getGenotypeArgs().standardConfidenceForCalling);

//it's possible that the upstream deletion that spanned this site was not emitted, mooting the symbolic spanning deletion allele
final boolean isSpuriousSpanningDeletion = GATKVCFConstants.isSpanningDeletion(allele) && !isVcCoveredByDeletion(vc);
Expand Down Expand Up @@ -415,16 +412,16 @@ protected final boolean cannotBeGenotyped(final VariantContext vc) {
* This does not necessarily emit calls for all sites in a region (see {@link OutputMode}).
*/
protected boolean emitAllActiveSites() {
return configuration.outputMode == OutputMode.EMIT_ALL_ACTIVE_SITES;
return getConfiguration().outputMode == OutputMode.EMIT_ALL_ACTIVE_SITES;
}

protected final boolean passesEmitThreshold(final double conf, final boolean bestGuessIsRef) {
return (configuration.outputMode == OutputMode.EMIT_ALL_CONFIDENT_SITES || !bestGuessIsRef) &&
return (getConfiguration().outputMode == OutputMode.EMIT_ALL_CONFIDENT_SITES || !bestGuessIsRef) &&
passesCallThreshold(conf);
}

protected final boolean passesCallThreshold(final double conf) {
return conf >= configuration.genotypeArgs.standardConfidenceForCalling;
return conf >= getGenotypeArgs().standardConfidenceForCalling;
}

protected Map<String,Object> composeCallAttributes(final VariantContext vc, final List<Integer> alleleCountsofMLE,
Expand Down Expand Up @@ -457,7 +454,7 @@ protected Map<String,Object> composeCallAttributes(final VariantContext vc, fina
attributes.put(GATKVCFConstants.AS_QUAL_KEY, perAlleleQuals.stream().mapToInt(q -> Math.round(q)).boxed().collect(Collectors.toList()));
}

if ( configuration.genotypeArgs.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED ) {
if ( getGenotypeArgs().annotateNumberOfAllelesDiscovered) {
attributes.put(GATKVCFConstants.NUMBER_OF_DISCOVERED_ALLELES_KEY, vc.getAlternateAlleles().size());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
* used only by the HaplotypeCaller for its isActive() determination. Should not be used for
* any other purpose!
*/
public final class MinimalGenotypingEngine extends GenotypingEngine<StandardCallerArgumentCollection> {
public final class MinimalGenotypingEngine extends GenotypingEngine {

private final DragstrParams dragstrParams;

Expand Down Expand Up @@ -61,7 +61,7 @@ public MinimalGenotypingEngine(final StandardCallerArgumentCollection configurat

@Override
protected boolean forceKeepAllele(final Allele allele) {
return configuration.annotateAllSitesWithPLs;
return getConfiguration().annotateAllSitesWithPLs;
}

@Override
Expand All @@ -71,8 +71,8 @@ protected String callSourceString() {

@Override
public VariantContext calculateGenotypes(final VariantContext vc) {
if (dragstrParams == null || getConfiguration().genotypeArgs.dontUseDragstrPriors || !GATKVariantContextUtils.containsInlineIndel(vc) || referenceContext == null) {
final GenotypePriorCalculator gpc = GenotypePriorCalculator.assumingHW(configuration.genotypeArgs);
if (dragstrParams == null || getGenotypeArgs().dontUseDragstrPriors || !GATKVariantContextUtils.containsInlineIndel(vc) || referenceContext == null) {
final GenotypePriorCalculator gpc = GenotypePriorCalculator.assumingHW(getGenotypeArgs());
return calculateGenotypes(vc, gpc, Collections.emptyList());
} else {

Expand All @@ -90,7 +90,7 @@ public VariantContext calculateGenotypes(final VariantContext vc) {
final int period = analyzer.period(startOffset);
final int repeats = analyzer.repeatLength(startOffset);

final GenotypePriorCalculator gpc = GenotypePriorCalculator.givenDragstrParams(dragstrParams, period, repeats, Math.log10(getConfiguration().genotypeArgs.snpHeterozygosity), 2.0);
final GenotypePriorCalculator gpc = GenotypePriorCalculator.givenDragstrParams(dragstrParams, period, repeats, Math.log10(getGenotypeArgs().snpHeterozygosity), 2.0);
return calculateGenotypes(vc, gpc, Collections.emptyList());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import htsjdk.variant.variantcontext.VariantContext;
import org.apache.commons.collections4.map.DefaultedMap;
import org.apache.commons.lang3.SerializationUtils;
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
Expand All @@ -17,7 +18,7 @@
/**
* This is pulled out so that every caller isn't exposed to the arguments from every other caller.
*/
public class StandardCallerArgumentCollection implements Serializable {
public final class StandardCallerArgumentCollection implements Serializable {
private static final long serialVersionUID = 1L;

/**
Expand All @@ -28,7 +29,7 @@ public class StandardCallerArgumentCollection implements Serializable {
public void copyStandardCallerArgsFrom( final StandardCallerArgumentCollection other ) {
Utils.nonNull(other);

this.genotypeArgs = other.genotypeArgs.clone();
this.genotypeArgs = SerializationUtils.clone(other.genotypeArgs);
this.CONTAMINATION_FRACTION = other.CONTAMINATION_FRACTION;
this.CONTAMINATION_FRACTION_FILE = other.CONTAMINATION_FRACTION_FILE != null ? new File(other.CONTAMINATION_FRACTION_FILE.getAbsolutePath()) : null;
if ( other.sampleContamination != null ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public class AlleleFilteringHC extends AlleleFiltering {
public AlleleFilteringHC(HaplotypeCallerArgumentCollection _hcargs, OutputStreamWriter assemblyDebugStream, HaplotypeCallerGenotypingEngine _genotypingEngine){
super(_hcargs, assemblyDebugStream);
genotypingEngine = _genotypingEngine;
GenotypeCalculationArgumentCollection config = genotypingEngine.getConfiguration().genotypeArgs;
GenotypeCalculationArgumentCollection config = genotypingEngine.getGenotypeArgs();
afCalc = AlleleFrequencyCalculator.makeCalculator(config);
}

Expand Down
Loading

0 comments on commit a40ba3e

Please sign in to comment.