Skip to content

Commit

Permalink
Haploid plus diploid runs, but there's an AD validation issue that co…
Browse files Browse the repository at this point in the history
…uld be Gnarly

or GDB
  • Loading branch information
ldgauthier committed Apr 1, 2022
1 parent 67f5682 commit 4300329
Show file tree
Hide file tree
Showing 7 changed files with 55,888 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -318,41 +318,44 @@ protected GenotypesContext iterateOnGenotypes(final VariantContext vc, final Lis
int newPLsize = -1;
final int maximumAlleleCount = inputAllelesWithNonRef.size();
final int numConcreteAlts = maximumAlleleCount - 2; //-1 for NON_REF and -1 for ref
if (maximumAlleleCount <= maxAllelesToOutput) {
newPLsize = likelihoodSizeCache[numConcreteAlts + 1]; //cache is indexed by #alleles with ref; don't count NON_REF
} else {
newPLsize = GenotypeLikelihoods.numLikelihoods(numConcreteAlts + 1, ASSUMED_PLOIDY);
}



for ( final Genotype g : vc.getGenotypes() ) {
final String name = g.getSampleName();
if(g.getPloidy() != ASSUMED_PLOIDY && !isGDBnoCall(g)) {
/*if(g.getPloidy() != ASSUMED_PLOIDY && !isGDBnoCall(g)) {
throw new UserException.BadInput("This tool assumes diploid genotypes, but sample " + name + " has ploidy "
+ g.getPloidy() + " at position " + vc.getContig() + ":" + vc.getStart() + ".");
}
}*/
final Genotype calledGT;
final GenotypeBuilder genotypeBuilder = new GenotypeBuilder(g);
genotypeBuilder.name(name);
if (isGDBnoCall(g) || g.getAllele(0).equals(Allele.NON_REF_ALLELE) || g.getAllele(1).equals(Allele.NON_REF_ALLELE)) {
genotypeBuilder.alleles(GATKVariantContextUtils.noCallAlleles(ASSUMED_PLOIDY));
if (isGDBnoCall(g) || g.getAlleles().contains(Allele.NON_REF_ALLELE)) {
genotypeBuilder.alleles(GATKVariantContextUtils.noCallAlleles(g.getPloidy()));
}
else if (nonRefReturned) {
if (g.hasAD()) {
final int[] AD = trimADs(g, targetAlleles.size());
genotypeBuilder.AD(AD);
}
else if (g.countAllele(Allele.NON_REF_ALLELE) > 0) {
genotypeBuilder.alleles(GATKVariantContextUtils.noCallAlleles(ASSUMED_PLOIDY)).noGQ();
genotypeBuilder.alleles(GATKVariantContextUtils.noCallAlleles(g.getPloidy())).noGQ();
}
}
if (g.hasPL()) {
final int[] PLs = trimPLs(g, newPLsize);
//lookup PL size from cache if ploidy matches and cache has our number of alts
if (maximumAlleleCount <= maxAllelesToOutput && g.getPloidy() == ASSUMED_PLOIDY) {
newPLsize = likelihoodSizeCache[numConcreteAlts + 1]; //cache is indexed by #alleles with ref; don't count NON_REF
//otherwise calculate size on the fly
} else {
newPLsize = GenotypeLikelihoods.numLikelihoods(numConcreteAlts + 1, g.getPloidy());
}
final int[] PLs = trimPLs(g, newPLsize); //get rid of <NON_REF> values
genotypeBuilder.PL(PLs);
genotypeBuilder.GQ(MathUtils.secondSmallestMinusSmallest(PLs, 0));
//If GenomicsDB returns no-call genotypes like CombineGVCFs (depending on the GenomicsDBExportConfiguration),
// then we need to actually find the GT from PLs
makeGenotypeCall(genotypeBuilder, GenotypeLikelihoods.fromPLs(PLs).getAsVector(), targetAlleles);
makeGenotypeCall(g, genotypeBuilder, GenotypeLikelihoods.fromPLs(PLs).getAsVector(), targetAlleles);
}
final Map<String, Object> attrs = new HashMap<>(g.getExtendedAttributes());
attrs.remove(GATKVCFConstants.MIN_DP_FORMAT_KEY);
Expand All @@ -365,7 +368,7 @@ else if (g.countAllele(Allele.NON_REF_ALLELE) > 0) {
}

//running total for AC values
for (int i = 0; i < ASSUMED_PLOIDY; i++) {
for (int i = 0; i < calledGT.getPloidy(); i++) {
final Allele a = calledGT.getAllele(i);
final int count = targetAlleleCounts.getOrDefault(a, 0);
if (!a.equals(Allele.NO_CALL)) {
Expand All @@ -386,27 +389,28 @@ else if (g.countAllele(Allele.NON_REF_ALLELE) > 0) {
* For a genotype with likelihoods that has a no-call GT, determine the most likely genotype from PLs and set it
* We use a GenotypeLikelihoodCalculator to convert from the best PL index to the indexes of the alleles for that
* genotype so we can set the GenotypeBuilder with the alleles
* @param g Genotype use to make the gb GenotypeBuilder
* @param gb GenotypeBuilder to modify and pass back
* @param genotypeLikelihoods PLs to use to call genotype; count should agree with number of alleles in allelesToUse
* @param allelesToUse alleles in the parent VariantContext (with ref), because GenotypeBuilder needs the allele String rather than index
*/
@VisibleForTesting
protected void makeGenotypeCall(final GenotypeBuilder gb,
protected void makeGenotypeCall(final Genotype g, final GenotypeBuilder gb,
final double[] genotypeLikelihoods,
final List<Allele> allelesToUse) {
final int maxAllelesToOutput = maxAltAllelesToOutput + 1; //+1 for ref

if ( genotypeLikelihoods == null || !GATKVariantContextUtils.isInformative(genotypeLikelihoods) ) {
gb.alleles(GATKVariantContextUtils.noCallAlleles(ASSUMED_PLOIDY)).noGQ();
gb.alleles(GATKVariantContextUtils.noCallAlleles(g.getAlleles().size())).noGQ();
} else {
final int maxLikelihoodIndex = MathUtils.maxElementIndex(genotypeLikelihoods);

GenotypeLikelihoodCalculator glCalc;
if ( allelesToUse.size() <= maxAllelesToOutput ) {
if ( allelesToUse.size() <= maxAllelesToOutput && g.getPloidy() == ASSUMED_PLOIDY) {
glCalc = glcCache.get(allelesToUse.size());
} else {
final GenotypeLikelihoodCalculators GLCprovider = new GenotypeLikelihoodCalculators();
glCalc = GLCprovider.getInstance(ASSUMED_PLOIDY, allelesToUse.size());
glCalc = GLCprovider.getInstance(g.getPloidy(), allelesToUse.size());
}

final GenotypeAlleleCounts alleleCounts = glCalc.genotypeAlleleCountsAt(maxLikelihoodIndex);
Expand All @@ -425,7 +429,8 @@ protected void makeGenotypeCall(final GenotypeBuilder gb,
* @return true if this is a genotype that should be represented as a ploidy-aware, spec compliant no-call
*/
private static boolean isGDBnoCall(final Genotype g) {
return g.getPloidy() == 1 && (g.getAllele(0).isReference() || g.getAllele(0).isNoCall());
//return g.getPloidy() == 1 && (g.getAllele(0).isReference() || g.getAllele(0).isNoCall());
return !g.hasPL() && !g.hasAD() && g.isNoCall();
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package org.broadinstitute.hellbender.tools.walkers;

import com.google.errorprone.annotations.Var;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
Expand Down Expand Up @@ -53,6 +56,9 @@ public void assertThatExpectedOutputUpdateToggleIsDisabled() {
@DataProvider(name="VCFdata")
public Object[][] getVCFdata() {
return new Object[][]{
//chrX haploid sample plus diploid sample
{new File[]{getTestFile("NA12891.chrX.haploid.rb.g.vcf"), getTestFile("NA12892.chrX.diploid.rb.g.vcf")},
getTestFile("lotsOfAltsNoPLs.vcf"), null, Arrays.asList(new SimpleInterval("chrX", 1000000, 5000000)), Arrays.asList("--merge-input-intervals", "--only-output-calls-starting-in-intervals"), b38_reference_20_21},

//8 ALT alleles -- no PLs
{new File[]{getTestFile("sample6.vcf"), getTestFile("sample7.vcf"), getTestFile("sample8.vcf"), getTestFile("sample9.vcf")},
Expand Down Expand Up @@ -218,4 +224,26 @@ public void testOnEmptyAnnotations() {
Assert.assertEquals(sors.get(1), VCFConstants.MISSING_VALUE_v4);
}

@Test
public void testHaploidInput() {
final File haploidGVCF = new File(getToolTestDataDir(), "haploid.mini.g.vcf");
final File output = createTempFile("GnarlyGenotyper", ".vcf");
final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(b37_reference_20_21))
.add("V", haploidGVCF)
.addOutput(output)
.add(StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false");
runCommandLine(args);

final Pair<VCFHeader, List<VariantContext>> outputData = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath());
Assert.assertEquals(outputData.getRight().size(), 1);
final VariantContext vc = outputData.getRight().get(0);
Assert.assertEquals(vc.getGenotypes().size(), 1);
final Genotype g = vc.getGenotype(0);
Assert.assertEquals(g.getPloidy(), 1);
Assert.assertEquals(g.getAlleles().get(0), Allele.ALT_A);
Assert.assertTrue(g.hasPL());
Assert.assertEquals(g.getPL().length, 2);
Assert.assertEquals(g.getPL(), new int[]{1169, 0});
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,12 @@ public void testGenotypeCallForLotsOfAlts() {
final VariantContext vc = VariantContextTestUtils.makeVC("test", Arrays.asList(Aref, oneInserted, twoInserted, threeInserted, fourRepeats, fiveRepeats), g1, g2);

final GenotypeBuilder builder1 = new GenotypeBuilder(g1);
engine.makeGenotypeCall(builder1, GenotypeLikelihoods.fromPLs(sample1pls).getAsVector(), Arrays.asList(Aref, oneInserted, twoInserted, threeInserted, fourRepeats, fiveRepeats));
engine.makeGenotypeCall(g1, builder1, GenotypeLikelihoods.fromPLs(sample1pls).getAsVector(), Arrays.asList(Aref, oneInserted, twoInserted, threeInserted, fourRepeats, fiveRepeats));
final List<Allele> calledAlleles1 = builder1.make().getAlleles();
Assert.assertTrue(calledAlleles1.size() == 2 && calledAlleles1.contains(oneInserted) && calledAlleles1.contains(twoInserted));

final GenotypeBuilder builder2 = new GenotypeBuilder(g2);
engine.makeGenotypeCall(builder2, GenotypeLikelihoods.fromPLs(sample2pls).getAsVector(), Arrays.asList(Aref, oneInserted, twoInserted, threeInserted, fourRepeats, fiveRepeats));
engine.makeGenotypeCall(g2, builder2, GenotypeLikelihoods.fromPLs(sample2pls).getAsVector(), Arrays.asList(Aref, oneInserted, twoInserted, threeInserted, fourRepeats, fiveRepeats));
final List<Allele> calledAlleles2 = builder2.make().getAlleles();
Assert.assertTrue(calledAlleles2.size() == 2 && calledAlleles2.contains(Aref) && calledAlleles2.contains(oneInserted));
}
Expand Down
Loading

0 comments on commit 4300329

Please sign in to comment.