From f628b0dadc18934108f2c06d84cc4d25876eaec9 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Thu, 16 May 2024 02:09:39 -0400 Subject: [PATCH 1/5] allow M2 germline resource with split multiallelics --- .../mutect/SomaticGenotypingEngine.java | 38 ++++++++++--------- .../SomaticGenotypingEngineUnitTest.java | 4 +- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java index 5324df8f59a..407b60bab67 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java @@ -404,9 +404,7 @@ private Optional getForNormal(final Supplier supplier) { private static Map getNegativeLogPopulationAFAnnotation(List germlineResourceVariants, final List allAlleles, final double afOfAllelesNotInGermlineResource) { - final Optional germlineVC = germlineResourceVariants.isEmpty() ? Optional.empty() - : Optional.of(germlineResourceVariants.get(0)); // assume only one VC per site - final double[] populationAlleleFrequencies = getGermlineAltAlleleFrequencies(allAlleles, germlineVC, afOfAllelesNotInGermlineResource); + final double[] populationAlleleFrequencies = getGermlineAltAlleleFrequencies(allAlleles, germlineResourceVariants, afOfAllelesNotInGermlineResource); return ImmutableMap.of(GATKVCFConstants.POPULATION_AF_KEY, MathUtils.applyToArray(populationAlleleFrequencies, x -> - Math.log10(x))); } @@ -416,27 +414,33 @@ private static Map getNegativeLogPopulationAFAnnotation(List allAlleles, final Optional germlineVC, final double afOfAllelesNotInGermlineResource) { + static double[] getGermlineAltAlleleFrequencies(final List allAlleles, final List germlineVCs, final double afOfAllelesNotInGermlineResource) { Utils.validateArg(!allAlleles.isEmpty(), "allAlleles are empty -- there is not even a reference allele."); - if (germlineVC.isPresent()) { - if (! germlineVC.get().hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) { - logger.warn("Germline resource variant at " + germlineVC.get().getContig() + ":" + germlineVC.get().getStart() +" missing AF attribute"); - return Doubles.toArray(Collections.nCopies(allAlleles.size() - 1, afOfAllelesNotInGermlineResource)); + final Map alleleFrequencies = new HashMap<>(); + allAlleles.forEach(a -> alleleFrequencies.put(a, afOfAllelesNotInGermlineResource)); // initialize everything to the default + + // look through every germline resource variant context at this locus and fill in the AFs + for (final VariantContext germlineVC : germlineVCs) { + if (! germlineVC.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)) { + logger.warn("Germline resource variant at " + germlineVC.getContig() + ":" + germlineVC.getStart() +" missing AF attribute"); + } + + List germlineIndices = GATKVariantContextUtils.alleleIndices(allAlleles, germlineVC.getAlleles()); + final List germlineAltAFs = Mutect2Engine.getAttributeAsDoubleList(germlineVC, VCFConstants.ALLELE_FREQUENCY_KEY, afOfAllelesNotInGermlineResource); + + for (int alleleIndex = 0; alleleIndex < allAlleles.size(); alleleIndex++) { + final Allele allele = allAlleles.get(alleleIndex); + // note the -1 since germlineAltAFs do not include ref + germlineIndices.get(alleleIndex).ifPresent(germlineIndex -> alleleFrequencies.put(allele, germlineAltAFs.get(germlineIndex - 1))); } - List germlineIndices = GATKVariantContextUtils.alleleIndices(allAlleles, germlineVC.get().getAlleles()); - final List germlineAltAFs = Mutect2Engine.getAttributeAsDoubleList(germlineVC.get(), VCFConstants.ALLELE_FREQUENCY_KEY, afOfAllelesNotInGermlineResource); - - return germlineIndices.stream().skip(1) // skip the reference allele - .mapToDouble(idx -> idx.isPresent() ? germlineAltAFs.get(idx.getAsInt() - 1) : afOfAllelesNotInGermlineResource) // note the -1 since germlineAltAFs do not include ref - .toArray(); - } else { - return Doubles.toArray(Collections.nCopies(allAlleles.size() - 1, afOfAllelesNotInGermlineResource)); } + + return allAlleles.stream().skip(1).mapToDouble(alleleFrequencies::get).toArray(); // skip the reference allele } /** diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngineUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngineUnitTest.java index 9a6dd2b54eb..6a47d3e0ed1 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngineUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngineUnitTest.java @@ -53,7 +53,7 @@ public void testGetGermlineAltAlleleFrequencies(final List calledAlleles final int stop = start + length - 1; final VariantContext vc1 = new VariantContextBuilder("SOURCE", "1", start, stop, germlineAlleles) .attribute(VCFConstants.ALLELE_FREQUENCY_KEY, germlineAltAFs).make(); - final double[] result = SomaticGenotypingEngine.getGermlineAltAlleleFrequencies(calledAlleles, Optional.of(vc1), DEFAULT_AF); + final double[] result = SomaticGenotypingEngine.getGermlineAltAlleleFrequencies(calledAlleles, List.of(vc1), DEFAULT_AF); Assert.assertEquals(result, expected, 1.0e-10); } @@ -69,7 +69,7 @@ Object[][] missingAFData() { @Test(dataProvider = "missingAFData") public void testGetGermlineAltAlleleFrequenciesWithMissingAF(final VariantContext vc) { - final double[] result = SomaticGenotypingEngine.getGermlineAltAlleleFrequencies(vc.getAlleles(), Optional.of(vc), DEFAULT_AF); + final double[] result = SomaticGenotypingEngine.getGermlineAltAlleleFrequencies(vc.getAlleles(), List.of(vc), DEFAULT_AF); Assert.assertEquals(result, new double[] {DEFAULT_AF}, 1.0e-10); } } \ No newline at end of file From 4adec1002d4eddba6726fba2f5851c2a7fd9c01d Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Thu, 16 May 2024 02:18:14 -0400 Subject: [PATCH 2/5] unit test --- .../mutect/SomaticGenotypingEngineUnitTest.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngineUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngineUnitTest.java index 6a47d3e0ed1..09b90610f44 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngineUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngineUnitTest.java @@ -11,6 +11,7 @@ import org.testng.annotations.Test; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Optional; @@ -55,6 +56,21 @@ public void testGetGermlineAltAlleleFrequencies(final List calledAlleles .attribute(VCFConstants.ALLELE_FREQUENCY_KEY, germlineAltAFs).make(); final double[] result = SomaticGenotypingEngine.getGermlineAltAlleleFrequencies(calledAlleles, List.of(vc1), DEFAULT_AF); Assert.assertEquals(result, expected, 1.0e-10); + + // multiallelic -- test splitting into multiple VCs + if (germlineAlleles.size() > 2) { + final Allele ref = germlineAlleles.get(0); + final List germlineVCs = new ArrayList<>(); + for (int n = 1; n < germlineAlleles.size(); n++) { + final Allele alt = germlineAlleles.get(n); + final VariantContext splitVC = new VariantContextBuilder("SOURCE", "1", start, stop, List.of(ref, alt)) + .attribute(VCFConstants.ALLELE_FREQUENCY_KEY, new double[]{germlineAltAFs[n-1]}).make(); + germlineVCs.add(splitVC); + } + + final double[] splitResult = SomaticGenotypingEngine.getGermlineAltAlleleFrequencies(calledAlleles, germlineVCs, DEFAULT_AF); + Assert.assertEquals(splitResult, expected, 1.0e-10); + } } @DataProvider(name = "missingAFData") From ccbbd13e4334d63648506c351a18ac14a3b21873 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Thu, 16 May 2024 09:42:29 -0400 Subject: [PATCH 3/5] whoops --- .../tools/walkers/mutect/SomaticGenotypingEngine.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java index 407b60bab67..60048933599 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java @@ -433,7 +433,7 @@ static double[] getGermlineAltAlleleFrequencies(final List allAlleles, f List germlineIndices = GATKVariantContextUtils.alleleIndices(allAlleles, germlineVC.getAlleles()); final List germlineAltAFs = Mutect2Engine.getAttributeAsDoubleList(germlineVC, VCFConstants.ALLELE_FREQUENCY_KEY, afOfAllelesNotInGermlineResource); - for (int alleleIndex = 0; alleleIndex < allAlleles.size(); alleleIndex++) { + for (int alleleIndex = 1; alleleIndex < allAlleles.size(); alleleIndex++) { // start at 1 to skip the reference, which doesn't have an AF annotation final Allele allele = allAlleles.get(alleleIndex); // note the -1 since germlineAltAFs do not include ref germlineIndices.get(alleleIndex).ifPresent(germlineIndex -> alleleFrequencies.put(allele, germlineAltAFs.get(germlineIndex - 1))); From bb14662a5e4084faecc49150321df115873193f6 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Thu, 16 May 2024 13:31:17 -0400 Subject: [PATCH 4/5] Edge case --- .../tools/walkers/mutect/SomaticGenotypingEngine.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java index 60048933599..f5f56a2483e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java @@ -433,10 +433,12 @@ static double[] getGermlineAltAlleleFrequencies(final List allAlleles, f List germlineIndices = GATKVariantContextUtils.alleleIndices(allAlleles, germlineVC.getAlleles()); final List germlineAltAFs = Mutect2Engine.getAttributeAsDoubleList(germlineVC, VCFConstants.ALLELE_FREQUENCY_KEY, afOfAllelesNotInGermlineResource); - for (int alleleIndex = 1; alleleIndex < allAlleles.size(); alleleIndex++) { // start at 1 to skip the reference, which doesn't have an AF annotation - final Allele allele = allAlleles.get(alleleIndex); - // note the -1 since germlineAltAFs do not include ref - germlineIndices.get(alleleIndex).ifPresent(germlineIndex -> alleleFrequencies.put(allele, germlineAltAFs.get(germlineIndex - 1))); + if (germlineAltAFs.size() == (germlineVC.getNAlleles() - 1)) { // skip VCs with a bad AF field that got parsed as a wrong-length list + for (int alleleIndex = 1; alleleIndex < allAlleles.size(); alleleIndex++) { // start at 1 to skip the reference, which doesn't have an AF annotation + final Allele allele = allAlleles.get(alleleIndex); + // note the -1 since germlineAltAFs do not include ref + germlineIndices.get(alleleIndex).ifPresent(germlineIndex -> alleleFrequencies.put(allele, germlineAltAFs.get(germlineIndex - 1))); + } } } From 22d221b19cd239767fae0bf2674eeca7d47eae44 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Mon, 3 Jun 2024 13:18:03 -0400 Subject: [PATCH 5/5] added test --- .../mutect/SomaticGenotypingEngineUnitTest.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngineUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngineUnitTest.java index 09b90610f44..c99fe8be850 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngineUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngineUnitTest.java @@ -10,6 +10,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import javax.ws.rs.core.Variant; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -88,4 +89,18 @@ public void testGetGermlineAltAlleleFrequenciesWithMissingAF(final VariantContex final double[] result = SomaticGenotypingEngine.getGermlineAltAlleleFrequencies(vc.getAlleles(), List.of(vc), DEFAULT_AF); Assert.assertEquals(result, new double[] {DEFAULT_AF}, 1.0e-10); } + + // test getting alt allele frequencies when each alt has its own VCF line and its own VariantContext + @Test + public void testGetGermlineAltAlleleFrequenciesFromSplitAllelesFormat() { + final double af1 = 0.1; + final double af2 = 0.2; + final VariantContext vc1 = new VariantContextBuilder("SOURCE", "1", 1, 1, Arrays.asList(Allele.REF_A, Allele.ALT_C)) + .attribute(VCFConstants.ALLELE_FREQUENCY_KEY, af1).make(); + final VariantContext vc2 = new VariantContextBuilder("SOURCE", "1", 1, 1, Arrays.asList(Allele.REF_A, Allele.ALT_T)) + .attribute(VCFConstants.ALLELE_FREQUENCY_KEY, af2).make(); + final List alleles = List.of(Allele.REF_A, Allele.ALT_C, Allele.ALT_T); + final double[] result = SomaticGenotypingEngine.getGermlineAltAlleleFrequencies(alleles, List.of(vc1, vc2), DEFAULT_AF); + Assert.assertEquals(result, new double[] {af1, af2}, 1.0e-10); + } } \ No newline at end of file