Skip to content

Commit

Permalink
Fix automatic zeroing of array end/non-ref data because sometimes the…
Browse files Browse the repository at this point in the history
…re's no

non-ref
Reenable EM in multi-allelic QUAL calculation
  • Loading branch information
ldgauthier committed Feb 11, 2022
1 parent b42568f commit 6f61408
Show file tree
Hide file tree
Showing 16 changed files with 8,192 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ private static double[] log10NormalizedGenotypePosteriors(final Genotype g, fina
final double[] log10Likelihoods;
if (g.hasLikelihoods()) {
log10Likelihoods = g.getLikelihoods().getAsVector();
} else if (g.isHomRef()) {
} else if ( g.isHomRef() || g.isNoCall()) {
if (g.getPloidy() != 2) {
throw new IllegalStateException("Likelihoods are required to calculate posteriors for hom-refs with ploidy != 2, " +
"but were not found for genotype " + g + " with ploidy " + g.getPloidy());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ VariantContext cleanUpHighQualityVariant(final VariantContext variant) {
newAlleleSetUntrimmed.removeAll(allelesToDrop);
final GenotypesContext gc = AlleleSubsettingUtils.subsetAlleles(variant.getGenotypes(), genotype.getPloidy(), variant.getAlleles(),
newAlleleSetUntrimmed, null, GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN,
variant.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), true);
variant.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0), false);
if (gc.get(0).isHomRef() || !gc.get(0).hasGQ() || gc.get(0).getAlleles().contains(Allele.NO_CALL)) { //could be low quality or no-call after subsetting
if (dropLowQuals) {
return null;
Expand Down Expand Up @@ -780,8 +780,8 @@ private static void addQualAnnotations(final Map<String, Object> destination, fi
//TODO: this isn't going to work for DRAGEN's genotype posteriors
final GenotypesContext gc = AlleleSubsettingUtils.subsetAlleles(updatedAllelesVC.getGenotypes(),
updatedAllelesGenotype.getPloidy(), updatedAllelesVC.getAlleles(), Arrays.asList(updatedAllelesVC.getReference(), alt), null,
GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, 0, true);
//assignment method doens't really matter as long as we don't zero out PLs; don't need depth to get PLs for quals
GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, 0, false);
//assignment method doesn't really matter as long as we don't zero out PLs; don't need depth to get PLs for quals

final Genotype subsettedGenotype = gc.get(0);
final int[] likelihoods = getGenotypePosteriorsOtherwiseLikelihoods(subsettedGenotype, posteriorsKey);
Expand Down Expand Up @@ -862,8 +862,10 @@ private static void copyInfoAnnotations(final Map<String, Object> destinationAtt
final List<String> subsetList;
if (alleleSpecificValues.size() > 0) {
subsetList = AlleleSubsettingUtils.remapRLengthList(alleleSpecificValues, relevantIndices, "");
//zero out non-ref value, just in case
subsetList.set(subsetList.size()-1,((AlleleSpecificAnnotation)annotation).getEmptyRawValue());
if (sourceVC.getAlleles().get(relevantIndices[relevantIndices.length - 1]).equals(Allele.NON_REF_ALLELE)) {
//zero out non-ref value, just in case
subsetList.set(subsetList.size() - 1, ((AlleleSpecificAnnotation) annotation).getEmptyRawValue());
}
} else {
subsetList = Collections.nCopies(relevantIndices.length, "");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ public static GenotypeCounts computeDiploidGenotypeCounts(final VariantContext v
* @return true if we have enough info for AF calculation
*/
public static boolean genotypeIsUsableForAFCalculation(Genotype g) {
return g.hasLikelihoods() || (g.isHomRef() && g.hasGQ() && 2 == g.getPloidy());
return g.hasLikelihoods() || g.hasGQ() || g.getAlleles().stream().anyMatch(a -> a.isCalled() && a.isNonReference() && !a.isSymbolic());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,12 @@ public Object[][] gvcfsToGenotype() {
{getTestFile( "combined.single.sample.pipeline.gatk3.vcf"),
getTestFile( "expected/includeLowQualSites.vcf"),
Arrays.asList( " --" + GenotypeGVCFs.ALL_SITES_LONG_NAME + " -L 20:10,012,730-10,012,740"),
b37_reference_20_21}
b37_reference_20_21},

//23 highly multi-allelic sites across 54 1000G exomes to test allele subsetting and QUAL calculation
{getTestFile("multiallelicQualRegression.vcf "),
getTestFile("multiallelicQualRegression.expected.vcf"),
NO_EXTRA_ARGS, hg38Reference}
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,34 @@ public class ReblockGVCFIntegrationTest extends CommandLineProgramTest {
private static final String hg38_reference_20_21 = largeFileTestDir + "Homo_sapiens_assembly38.20.21.fasta";
private static final String b37_reference_20_21 = largeFileTestDir + "human_g1k_v37.20.21.fasta";

@Test //covers inputs with "MQ" annotation
public void testJustOneSample() throws Exception {
@Test
public void testWithExactComparison() throws Exception {
//covers inputs with old format "MQ" annotation
final IntegrationTestSpec spec = new IntegrationTestSpec(
"-L chr20:69771 -O %s -R " + hg38_reference_20_21 +
" -V " + getToolTestDataDir() + "gvcfForReblocking.g.vcf -rgq-threshold 19" +
" --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE + " false",
Arrays.asList(getToolTestDataDir() + "testJustOneSample.expected.g.vcf"));
spec.executeTest("testJustOneSample", this);
spec.executeTest("testWithExactComparison", this);


//Broad production arguments on WGS data
final IntegrationTestSpec spec2 = new IntegrationTestSpec(
"-O %s -R " + hg38Reference +
" -V " + getToolTestDataDir() + "prodWgsInput.g.vcf " +
" -do-qual-approx --floor-blocks -GQB 20 -GQB 30 -GQB 40 " +
" --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE + " false",
Arrays.asList(getToolTestDataDir() + "prodWgsOutput.g.vcf"));
spec2.executeTest("testWithExactComparison", this);

//Exome data with AS annotations and zero DP regression test
final IntegrationTestSpec spec3 = new IntegrationTestSpec(
"-O %s -R " + hg38Reference +
" -V " + getToolTestDataDir() + "prodWesInput.g.vcf " +
" -do-qual-approx --floor-blocks -GQB 20 -GQB 30 -GQB 40 " +
" --" + StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE + " false",
Arrays.asList(getToolTestDataDir() + "prodWesOutput.g.vcf"));
spec3.executeTest("testWithExactComparison", this);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public class ReblockGVCFUnitTest extends CommandLineProgramTest {
@Test
public void testCleanUpHighQualityVariant() {
final ReblockGVCF reblocker = new ReblockGVCF();
//We need an annotation engine for cleanUpHighQualityVariant()
//We need an annotation engine for cleanUpHighQualityVariant(), but this is just a dummy; annotations won't initialize properly without runCommandLine()
reblocker.createAnnotationEngine();
//...and a vcfwriter
reblocker.vcfWriter = new ReblockingGVCFWriter(new GVCFWriterUnitTest.MockWriter(), Arrays.asList(20, 100), true, null, new ReblockingOptions());
Expand Down Expand Up @@ -381,7 +381,7 @@ public void testAnnotationSubsetting() {
origAttributes.put(GATKVCFConstants.AS_RAW_RMS_MAPPING_QUALITY_KEY, "123769.00|46800.00|0.00|0.00");
originalBuilder.attributes(origAttributes);
final VariantContext originalVC = originalBuilder.make();
final Genotype newG = VariantContextTestUtils.makeG("sample1", LONG_REF, LONG_SNP, 41, 0, 37, 200, 100, 200, 400, 600, 800, 1200);
final Genotype newG = VariantContextTestUtils.makeG("sample1", LONG_REF, LONG_SNP, 41, 0, 37, 200, 100, 200);
final VariantContext regenotypedVC = makeDeletionVC("", Arrays.asList(LONG_REF, LONG_SNP, Allele.NON_REF_ALLELE), LONG_REF.length(), newG);

final Map<String, Object> subsetAnnotations = ReblockGVCF.subsetAnnotationsIfNecessary(annotationEngine, true, null, originalVC, regenotypedVC);
Expand Down
Loading

0 comments on commit 6f61408

Please sign in to comment.