Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mutect2 germline resource can have split multiallelic format #8837

Merged
merged 5 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -404,9 +404,7 @@ private <E> Optional<E> getForNormal(final Supplier<E> supplier) {
private static Map<String, Object> getNegativeLogPopulationAFAnnotation(List<VariantContext> germlineResourceVariants,
final List<Allele> allAlleles,
final double afOfAllelesNotInGermlineResource) {
final Optional<VariantContext> germlineVC = germlineResourceVariants.isEmpty() ? Optional.empty()
: Optional.of(germlineResourceVariants.get(0)); // assume only one VC per site
final double[] populationAlleleFrequencies = getGermlineAltAlleleFrequencies(allAlleles, germlineVC, afOfAllelesNotInGermlineResource);
final double[] populationAlleleFrequencies = getGermlineAltAlleleFrequencies(allAlleles, germlineResourceVariants, afOfAllelesNotInGermlineResource);

return ImmutableMap.of(GATKVCFConstants.POPULATION_AF_KEY, MathUtils.applyToArray(populationAlleleFrequencies, x -> - Math.log10(x)));
}
Expand All @@ -416,27 +414,35 @@ private static Map<String, Object> getNegativeLogPopulationAFAnnotation(List<Var
* @param allAlleles Every emitted allele, with the reference allele first. Only alt alleles are annotated, but we
* * need the ref allele in case the germline resource has a more or less parsimonious representation
* * For example, eg ref = A, alt = C; germline ref = AT, germline alt = CT
* @param germlineVC Germline resource variant context from which AF INFO field is drawn
* @param germlineVCs Germline resource variant contexts from which AF INFO field is drawn
* @param afOfAllelesNotInGermlineResource Default value of population AF annotation
* @return
*/
@VisibleForTesting
static double[] getGermlineAltAlleleFrequencies(final List<Allele> allAlleles, final Optional<VariantContext> germlineVC, final double afOfAllelesNotInGermlineResource) {
static double[] getGermlineAltAlleleFrequencies(final List<Allele> allAlleles, final List<VariantContext> germlineVCs, final double afOfAllelesNotInGermlineResource) {
Utils.validateArg(!allAlleles.isEmpty(), "allAlleles are empty -- there is not even a reference allele.");
if (germlineVC.isPresent()) {
if (! germlineVC.get().hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) {
logger.warn("Germline resource variant at " + germlineVC.get().getContig() + ":" + germlineVC.get().getStart() +" missing AF attribute");
return Doubles.toArray(Collections.nCopies(allAlleles.size() - 1, afOfAllelesNotInGermlineResource));
final Map<Allele, Double> alleleFrequencies = new HashMap<>();
allAlleles.forEach(a -> alleleFrequencies.put(a, afOfAllelesNotInGermlineResource)); // initialize everything to the default

// look through every germline resource variant context at this locus and fill in the AFs
for (final VariantContext germlineVC : germlineVCs) {
if (! germlineVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) {
logger.warn("Germline resource variant at " + germlineVC.getContig() + ":" + germlineVC.getStart() +" missing AF attribute");
}

List<OptionalInt> germlineIndices = GATKVariantContextUtils.alleleIndices(allAlleles, germlineVC.getAlleles());
final List<Double> germlineAltAFs = Mutect2Engine.getAttributeAsDoubleList(germlineVC, VCFConstants.ALLELE_FREQUENCY_KEY, afOfAllelesNotInGermlineResource);

if (germlineAltAFs.size() == (germlineVC.getNAlleles() - 1)) { // skip VCs with a bad AF field that got parsed as a wrong-length list
for (int alleleIndex = 1; alleleIndex < allAlleles.size(); alleleIndex++) { // start at 1 to skip the reference, which doesn't have an AF annotation
final Allele allele = allAlleles.get(alleleIndex);
// note the -1 since germlineAltAFs do not include ref
germlineIndices.get(alleleIndex).ifPresent(germlineIndex -> alleleFrequencies.put(allele, germlineAltAFs.get(germlineIndex - 1)));
}
}
List<OptionalInt> germlineIndices = GATKVariantContextUtils.alleleIndices(allAlleles, germlineVC.get().getAlleles());
final List<Double> germlineAltAFs = Mutect2Engine.getAttributeAsDoubleList(germlineVC.get(), VCFConstants.ALLELE_FREQUENCY_KEY, afOfAllelesNotInGermlineResource);

return germlineIndices.stream().skip(1) // skip the reference allele
.mapToDouble(idx -> idx.isPresent() ? germlineAltAFs.get(idx.getAsInt() - 1) : afOfAllelesNotInGermlineResource) // note the -1 since germlineAltAFs do not include ref
.toArray();
} else {
return Doubles.toArray(Collections.nCopies(allAlleles.size() - 1, afOfAllelesNotInGermlineResource));
}

return allAlleles.stream().skip(1).mapToDouble(alleleFrequencies::get).toArray(); // skip the reference allele
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import javax.ws.rs.core.Variant;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
Expand Down Expand Up @@ -53,8 +55,23 @@ public void testGetGermlineAltAlleleFrequencies(final List<Allele> calledAlleles
final int stop = start + length - 1;
final VariantContext vc1 = new VariantContextBuilder("SOURCE", "1", start, stop, germlineAlleles)
.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, germlineAltAFs).make();
final double[] result = SomaticGenotypingEngine.getGermlineAltAlleleFrequencies(calledAlleles, Optional.of(vc1), DEFAULT_AF);
final double[] result = SomaticGenotypingEngine.getGermlineAltAlleleFrequencies(calledAlleles, List.of(vc1), DEFAULT_AF);
Assert.assertEquals(result, expected, 1.0e-10);

// multiallelic -- test splitting into multiple VCs
if (germlineAlleles.size() > 2) {
final Allele ref = germlineAlleles.get(0);
final List<VariantContext> germlineVCs = new ArrayList<>();
for (int n = 1; n < germlineAlleles.size(); n++) {
final Allele alt = germlineAlleles.get(n);
final VariantContext splitVC = new VariantContextBuilder("SOURCE", "1", start, stop, List.of(ref, alt))
.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, new double[]{germlineAltAFs[n-1]}).make();
germlineVCs.add(splitVC);
}

final double[] splitResult = SomaticGenotypingEngine.getGermlineAltAlleleFrequencies(calledAlleles, germlineVCs, DEFAULT_AF);
Assert.assertEquals(splitResult, expected, 1.0e-10);
}
}

@DataProvider(name = "missingAFData")
Expand All @@ -69,7 +86,21 @@ Object[][] missingAFData() {

@Test(dataProvider = "missingAFData")
public void testGetGermlineAltAlleleFrequenciesWithMissingAF(final VariantContext vc) {
final double[] result = SomaticGenotypingEngine.getGermlineAltAlleleFrequencies(vc.getAlleles(), Optional.of(vc), DEFAULT_AF);
davidbenjamin marked this conversation as resolved.
Show resolved Hide resolved
final double[] result = SomaticGenotypingEngine.getGermlineAltAlleleFrequencies(vc.getAlleles(), List.of(vc), DEFAULT_AF);
Assert.assertEquals(result, new double[] {DEFAULT_AF}, 1.0e-10);
}

// test getting alt allele frequencies when each alt has its own VCF line and its own VariantContext
@Test
public void testGetGermlineAltAlleleFrequenciesFromSplitAllelesFormat() {
final double af1 = 0.1;
final double af2 = 0.2;
final VariantContext vc1 = new VariantContextBuilder("SOURCE", "1", 1, 1, Arrays.asList(Allele.REF_A, Allele.ALT_C))
.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, af1).make();
final VariantContext vc2 = new VariantContextBuilder("SOURCE", "1", 1, 1, Arrays.asList(Allele.REF_A, Allele.ALT_T))
.attribute(VCFConstants.ALLELE_FREQUENCY_KEY, af2).make();
final List<Allele> alleles = List.of(Allele.REF_A, Allele.ALT_C, Allele.ALT_T);
final double[] result = SomaticGenotypingEngine.getGermlineAltAlleleFrequencies(alleles, List.of(vc1, vc2), DEFAULT_AF);
Assert.assertEquals(result, new double[] {af1, af2}, 1.0e-10);
}
}
Loading