Skip to content

Commit

Permalink
Fix automatic zeroing of array end/non-ref data because sometimes the…
Browse files Browse the repository at this point in the history
…re's no

non-ref
Reenable EM in multi-allelic QUAL calculation
  • Loading branch information
ldgauthier committed Feb 11, 2022
1 parent b42568f commit 483ec41
Show file tree
Hide file tree
Showing 22 changed files with 8,203 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ private AlleleSubsettingUtils() {} // prevent instantiation
* @param allelesToKeep the subset of alleles to use with the new Genotypes
* @param assignmentMethod assignment strategy for the (subsetted) PLs
* @param depth the original variant DP or 0 if there was no DP
* @param suppressUninformativePLs force the output of a PL array even if there is no data
* @param forceEmitUninformativePLs force the output of a PL array even if there is no data
* @return a new non-null GenotypesContext
*/
public static GenotypesContext subsetAlleles(final GenotypesContext originalGs, final int defaultPloidy,
Expand All @@ -55,7 +55,7 @@ public static GenotypesContext subsetAlleles(final GenotypesContext originalGs,
final GenotypePriorCalculator gpc,
final GenotypeAssignmentMethod assignmentMethod,
final int depth,
final boolean suppressUninformativePLs) {
final boolean forceEmitUninformativePLs) {
Utils.nonNull(originalGs, "original GenotypesContext must not be null");
Utils.nonNull(allelesToKeep, "allelesToKeep is null");
Utils.nonEmpty(allelesToKeep, "must keep at least one allele");
Expand Down Expand Up @@ -94,7 +94,7 @@ public static GenotypesContext subsetAlleles(final GenotypesContext originalGs,
newLog10GQ = -0.1*g.getGQ();
}

final boolean useNewLikelihoods = !suppressUninformativePLs ||
final boolean useNewLikelihoods = forceEmitUninformativePLs ||
(newLikelihoods != null && (depth != 0 || GATKVariantContextUtils.isInformative(newLikelihoods)));

final GenotypeBuilder gb = new GenotypeBuilder(g);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ public VariantContext calculateGenotypes(final VariantContext vc, final Genotype
if (maxAltAlleles < vc.getAlternateAlleles().size()) {
final List<Allele> allelesToKeep = AlleleSubsettingUtils.calculateMostLikelyAlleles(vc, defaultPloidy, maxAltAlleles);
final GenotypesContext reducedGenotypes = allelesToKeep.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) :
AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), allelesToKeep, gpc, GenotypeAssignmentMethod.SET_TO_NO_CALL, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false);
AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), allelesToKeep, gpc, GenotypeAssignmentMethod.SET_TO_NO_CALL, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);
reducedVC = new VariantContextBuilder(vc).alleles(allelesToKeep).genotypes(reducedGenotypes).make();
}

Expand Down Expand Up @@ -181,7 +181,7 @@ && noAllelesOrFirstAlleleIsNotNonRef(outputAlternativeAlleles.alleles) && givenA
// create the genotypes
//TODO: omit subsetting if output alleles is not a proper subset of vc.getAlleles
final GenotypesContext genotypes = outputAlleles.size() == 1 ? GATKVariantContextUtils.subsetToRefOnly(vc, defaultPloidy) :
AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), outputAlleles, gpc, configuration.genotypeArgs.genotypeAssignmentMethod, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false);
AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(), defaultPloidy, vc.getAlleles(), outputAlleles, gpc, configuration.genotypeArgs.genotypeAssignmentMethod, vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);

if (configuration.genotypeArgs.usePosteriorProbabilitiesToCalculateQual && hasPosteriors(genotypes)) {
final double log10NoVariantPosterior = phredNoVariantPosteriorProbability(outputAlleles, genotypes) * -.1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ private static double[] log10NormalizedGenotypePosteriors(final Genotype g, fina
final double[] log10Likelihoods;
if (g.hasLikelihoods()) {
log10Likelihoods = g.getLikelihoods().getAsVector();
} else if (g.isHomRef()) {
} else if ( g.isHomRef() || g.isNoCall()) {
if (g.getPloidy() != 2) {
throw new IllegalStateException("Likelihoods are required to calculate posteriors for hom-refs with ploidy != 2, " +
"but were not found for genotype " + g + " with ploidy " + g.getPloidy());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ protected GenotypeBuilder changeCallToHomRefVersusNonRef(final VariantContext lo
genotype.getPloidy(), lowQualVariant.getAlleles(), Arrays.asList(inputRefAllele, bestAlt),
null, GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, //BEST_MATCH to avoid no-calling low qual genotypes
lowQualVariant.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0),
false); //emitEmptyPLs = true to make sure we always subset
true); //forceEmitUniformativePLs = true to make sure we always have PLs
final Genotype subsetG = context.get(0);
gb = new GenotypeBuilder(subsetG).noAttributes(); //remove attributes because hom ref blocks shouldn't have posteriors
//subsetting may strip GQ and PLs for low qual genotypes
Expand Down Expand Up @@ -569,7 +569,7 @@ VariantContext cleanUpHighQualityVariant(final VariantContext variant) {
newAlleleSetUntrimmed.removeAll(allelesToDrop);
final GenotypesContext gc = AlleleSubsettingUtils.subsetAlleles(variant.getGenotypes(), genotype.getPloidy(), variant.getAlleles(),
newAlleleSetUntrimmed, null, GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN,
variant.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);
variant.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true); //forceEmit = true so we always have PLs
if (gc.get(0).isHomRef() || !gc.get(0).hasGQ() || gc.get(0).getAlleles().contains(Allele.NO_CALL)) { //could be low quality or no-call after subsetting
if (dropLowQuals) {
return null;
Expand Down Expand Up @@ -781,7 +781,7 @@ private static void addQualAnnotations(final Map<String, Object> destination, fi
final GenotypesContext gc = AlleleSubsettingUtils.subsetAlleles(updatedAllelesVC.getGenotypes(),
updatedAllelesGenotype.getPloidy(), updatedAllelesVC.getAlleles(), Arrays.asList(updatedAllelesVC.getReference(), alt), null,
GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, 0, true);
//assignment method doens't really matter as long as we don't zero out PLs; don't need depth to get PLs for quals
//assignment method doesn't really matter as long as we don't zero out PLs; don't need depth to get PLs for quals

final Genotype subsettedGenotype = gc.get(0);
final int[] likelihoods = getGenotypePosteriorsOtherwiseLikelihoods(subsettedGenotype, posteriorsKey);
Expand Down Expand Up @@ -862,8 +862,10 @@ private static void copyInfoAnnotations(final Map<String, Object> destinationAtt
final List<String> subsetList;
if (alleleSpecificValues.size() > 0) {
subsetList = AlleleSubsettingUtils.remapRLengthList(alleleSpecificValues, relevantIndices, "");
//zero out non-ref value, just in case
subsetList.set(subsetList.size()-1,((AlleleSpecificAnnotation)annotation).getEmptyRawValue());
if (sourceVC.getAlleles().get(relevantIndices[relevantIndices.length - 1]).equals(Allele.NON_REF_ALLELE)) {
//zero out non-ref value, just in case
subsetList.set(subsetList.size() - 1, ((AlleleSpecificAnnotation) annotation).getEmptyRawValue());
}
} else {
subsetList = Collections.nCopies(relevantIndices.length, "");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1033,7 +1033,7 @@ private VariantContext subsetRecord(final VariantContext vc, final boolean prese
newGC = sub.getNAlleles() == vc.getNAlleles() ? subGenotypesWithOldAlleles :
AlleleSubsettingUtils.subsetAlleles(subGenotypesWithOldAlleles, 0, vc.getAlleles(),
sub.getAlleles(), null, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES,
vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false);
vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);
} else {
newGC = sub.getGenotypes();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ public static GenotypeCounts computeDiploidGenotypeCounts(final VariantContext v
* @return true if we have enough info for AF calculation
*/
public static boolean genotypeIsUsableForAFCalculation(Genotype g) {
return g.hasLikelihoods() || (g.isHomRef() && g.hasGQ() && 2 == g.getPloidy());
return g.hasLikelihoods() || g.hasGQ() || g.getAlleles().stream().anyMatch(a -> a.isCalled() && a.isNonReference() && !a.isSymbolic());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1687,7 +1687,7 @@ public static List<VariantContext> splitVariantContextToBiallelics(final Variant
genotypeAssignmentMethodUsed != GenotypeAssignmentMethod.SET_TO_NO_CALL)
AlleleSubsettingUtils.addInfoFieldAnnotations(vc, builder, keepOriginalChrCounts);

builder.genotypes(AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(),2,vc.getAlleles(), alleles, null, genotypeAssignmentMethodUsed,vc.getAttributeAsInt("DP",0), true));
builder.genotypes(AlleleSubsettingUtils.subsetAlleles(vc.getGenotypes(),2,vc.getAlleles(), alleles, null, genotypeAssignmentMethodUsed,vc.getAttributeAsInt("DP",0), false));
final VariantContext trimmed = trimAlleles(builder.make(), trimLeft, true);
biallelics.add(trimmed);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,12 @@ public Object[][] gvcfsToGenotype() {
{getTestFile( "combined.single.sample.pipeline.gatk3.vcf"),
getTestFile( "expected/includeLowQualSites.vcf"),
Arrays.asList( " --" + GenotypeGVCFs.ALL_SITES_LONG_NAME + " -L 20:10,012,730-10,012,740"),
b37_reference_20_21}
b37_reference_20_21},

//23 highly multi-allelic sites across 54 1000G exomes to test allele subsetting and QUAL calculation
{getTestFile("multiallelicQualRegression.vcf "),
getTestFile("multiallelicQualRegression.expected.vcf"),
NO_EXTRA_ARGS, hg38Reference}
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ public void testUpdatePLsAndADData(final VariantContext originalVC,
AlleleSubsettingUtils.subsetAlleles(oldGs, 0, originalVC.getAlleles(),
selectedVCwithGTs.getAlleles(), null,
GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES,
originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);
originalVC.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false);

Assert.assertEquals(actual.size(), expectedGenotypes.size());
for ( final Genotype expected : expectedGenotypes ) {
Expand Down Expand Up @@ -327,7 +327,7 @@ public void testUninformativePLsAreKeptWhenDepthIsNotZero(){
final List<Allele> alleles = Arrays.asList(Aref);
final Genotype uniformativePL = new GenotypeBuilder("sample", alleles).PL(new int[] {0}).make();
final GenotypesContext result = AlleleSubsettingUtils.subsetAlleles(GenotypesContext.create(uniformativePL), 2,
alleles, alleles, null, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, 10, true);
alleles, alleles, null, GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, 10, false);
final Genotype genotype = result.get(0);
Assert.assertTrue(genotype.hasPL());
Assert.assertEquals(genotype.getPL(), new int[]{0});
Expand Down Expand Up @@ -412,7 +412,7 @@ public void testAlleleReorderingBehavior() {

final GenotypesContext newGs = AlleleSubsettingUtils.subsetAlleles(GenotypesContext.create(g5),
2, threeAlleles, threeAllelesSorted, null,
GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, 10, true);
GenotypeAssignmentMethod.DO_NOT_ASSIGN_GENOTYPES, 10, false);

Assert.assertEquals(newGs.get(0).getPL(), new int[] {50, 20, 0, 40, 10, 30});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,34 @@ public class ReblockGVCFIntegrationTest extends CommandLineProgramTest {
private static final String hg38_reference_20_21 = largeFileTestDir + "Homo_sapiens_assembly38.20.21.fasta";
private static final String b37_reference_20_21 = largeFileTestDir + "human_g1k_v37.20.21.fasta";

@Test //covers inputs with "MQ" annotation
public void testJustOneSample() throws Exception {
@Test
public void testWithExactComparison() throws Exception {
//covers inputs with old format "MQ" annotation
final IntegrationTestSpec spec = new IntegrationTestSpec(
"-L chr20:69771 -O %s -R " + hg38_reference_20_21 +
" -V " + getToolTestDataDir() + "gvcfForReblocking.g.vcf -rgq-threshold 19" +
" --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE + " false",
Arrays.asList(getToolTestDataDir() + "testJustOneSample.expected.g.vcf"));
spec.executeTest("testJustOneSample", this);
spec.executeTest("testWithExactComparison", this);


//Broad production arguments on WGS data
final IntegrationTestSpec spec2 = new IntegrationTestSpec(
"-O %s -R " + hg38Reference +
" -V " + getToolTestDataDir() + "prodWgsInput.g.vcf " +
" -do-qual-approx --floor-blocks -GQB 20 -GQB 30 -GQB 40 " +
" --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE + " false",
Arrays.asList(getToolTestDataDir() + "prodWgsOutput.g.vcf"));
spec2.executeTest("testWithExactComparison", this);

//Exome data with AS annotations and zero DP regression test
final IntegrationTestSpec spec3 = new IntegrationTestSpec(
"-O %s -R " + hg38Reference +
" -V " + getToolTestDataDir() + "prodWesInput.g.vcf " +
" -do-qual-approx --floor-blocks -GQB 20 -GQB 30 -GQB 40 " +
" --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE + " false",
Arrays.asList(getToolTestDataDir() + "prodWesOutput.g.vcf"));
spec3.executeTest("testWithExactComparison", this);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public class ReblockGVCFUnitTest extends CommandLineProgramTest {
@Test
public void testCleanUpHighQualityVariant() {
final ReblockGVCF reblocker = new ReblockGVCF();
//We need an annotation engine for cleanUpHighQualityVariant()
//We need an annotation engine for cleanUpHighQualityVariant(), but this is just a dummy; annotations won't initialize properly without runCommandLine()
reblocker.createAnnotationEngine();
//...and a vcfwriter
reblocker.vcfWriter = new ReblockingGVCFWriter(new GVCFWriterUnitTest.MockWriter(), Arrays.asList(20, 100), true, null, new ReblockingOptions());
Expand Down Expand Up @@ -381,7 +381,7 @@ public void testAnnotationSubsetting() {
origAttributes.put(GATKVCFConstants.AS_RAW_RMS_MAPPING_QUALITY_KEY, "123769.00|46800.00|0.00|0.00");
originalBuilder.attributes(origAttributes);
final VariantContext originalVC = originalBuilder.make();
final Genotype newG = VariantContextTestUtils.makeG("sample1", LONG_REF, LONG_SNP, 41, 0, 37, 200, 100, 200, 400, 600, 800, 1200);
final Genotype newG = VariantContextTestUtils.makeG("sample1", LONG_REF, LONG_SNP, 41, 0, 37, 200, 100, 200);
final VariantContext regenotypedVC = makeDeletionVC("", Arrays.asList(LONG_REF, LONG_SNP, Allele.NON_REF_ALLELE), LONG_REF.length(), newG);

final Map<String, Object> subsetAnnotations = ReblockGVCF.subsetAnnotationsIfNecessary(annotationEngine, true, null, originalVC, regenotypedVC);
Expand Down
Loading

0 comments on commit 483ec41

Please sign in to comment.