From 5b8736715724d6ba7994613acbc37e1bf187f538 Mon Sep 17 00:00:00 2001 From: Nalini Ganapati <35076948+nalinigans@users.noreply.github.com> Date: Tue, 4 Jan 2022 10:56:58 -0500 Subject: [PATCH] Fix bug which caused --max_alternate_alleles to be ignored when using GenomicsDB (#7576) * Fix a bug where --max_alternate_alleles was being ignored instead of being passed to GenomicsDB * Cleanup some variable names which shouldn't have been uppercase --- .../tools/genomicsdb/GenomicsDBOptions.java | 16 ++++++++---- .../tools/walkers/GenotypeGVCFs.java | 5 ++-- .../tools/walkers/GenotypeGVCFsEngine.java | 2 +- ...GenotypeCalculationArgumentCollection.java | 18 ++++++------- .../walkers/genotyper/GenotypingEngine.java | 6 ++--- .../gnarlyGenotyper/GnarlyGenotyper.java | 7 ++--- .../haplotypecaller/HaplotypeCaller.java | 2 +- .../HaplotypeCallerEngine.java | 4 +-- .../HaplotypeCallerGenotypingEngine.java | 2 +- .../walkers/variantutils/ReblockGVCF.java | 2 +- .../walkers/GenotypeGVCFsIntegrationTest.java | 26 ++++++++++++++++--- 11 files changed, 58 insertions(+), 32 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBOptions.java b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBOptions.java index aa6aeae9079..57c92cf0585 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBOptions.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBOptions.java @@ -25,18 +25,24 @@ public GenomicsDBOptions(final Path reference) { } public GenomicsDBOptions(final Path reference, GenomicsDBArgumentCollection genomicsdbArgs) { - this(reference, genomicsdbArgs, new GenotypeCalculationArgumentCollection(), false); + this(reference, genomicsdbArgs, new GenotypeCalculationArgumentCollection()); } public GenomicsDBOptions(final Path reference, final GenomicsDBArgumentCollection genomicsdbArgs, - final GenotypeCalculationArgumentCollection genotypeCalcArgs, final boolean forceCallGenotypes) { + final GenotypeCalculationArgumentCollection genotypeCalcArgs) { this.reference = reference; - this.callGenotypes = genomicsdbArgs.callGenotypes || forceCallGenotypes; - this.maxDiploidAltAllelesThatCanBeGenotyped = genomicsdbArgs.maxDiploidAltAllelesThatCanBeGenotyped; - this.maxGenotypeCount = genotypeCalcArgs.MAX_GENOTYPE_COUNT; + this.callGenotypes = genomicsdbArgs.callGenotypes; this.useBCFCodec = genomicsdbArgs.useBCFCodec; this.sharedPosixFSOptimizations = genomicsdbArgs.sharedPosixFSOptimizations; this.useGcsHdfsConnector = genomicsdbArgs.useGcsHdfsConnector; + if (genotypeCalcArgs != null) { + this.maxDiploidAltAllelesThatCanBeGenotyped = genotypeCalcArgs.maxAlternateAlleles; + this.maxGenotypeCount = genotypeCalcArgs.maxGenotypeCount; + } else { + // Some defaults + this.maxDiploidAltAllelesThatCanBeGenotyped = genomicsdbArgs.maxDiploidAltAllelesThatCanBeGenotyped; + this.maxGenotypeCount = GenotypeCalculationArgumentCollection.DEFAULT_MAX_GENOTYPE_COUNT; + } } public Path getReference() { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java index eb87d041913..98c03b75179 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFs.java @@ -208,9 +208,10 @@ public boolean requiresReference() { @Override protected GenomicsDBOptions getGenomicsDBOptions() { - //extract called genotypes so hom refs with no PLs aren't ambiguous if (genomicsDBOptions == null) { - genomicsDBOptions = new GenomicsDBOptions(referenceArguments.getReferencePath(), genomicsdbArgs, genotypeArgs, true); + //extract called genotypes so hom refs with no PLs aren't ambiguous + genomicsdbArgs.callGenotypes = true; + genomicsDBOptions = new GenomicsDBOptions(referenceArguments.getReferencePath(), genomicsdbArgs, genotypeArgs); } return genomicsDBOptions; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsEngine.java index c6270834ebb..fe599aa7da1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/GenotypeGVCFsEngine.java @@ -300,7 +300,7 @@ private VariantContext callSomaticGenotypes(final VariantContext vc, double tlod final VariantContextBuilder builder = new VariantContextBuilder(vc); final VariantContext regenotypedVC = builder.genotypes(newGenotypes).make(); - final int maxAltAlleles = genotypingEngine.getConfiguration().genotypeArgs.MAX_ALTERNATE_ALLELES; + final int maxAltAlleles = genotypingEngine.getConfiguration().genotypeArgs.maxAlternateAlleles; List allelesToKeep; //we need to make sure all alleles pass the tlodThreshold diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeCalculationArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeCalculationArgumentCollection.java index 44b0fb41a31..e688ac27457 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeCalculationArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypeCalculationArgumentCollection.java @@ -5,7 +5,6 @@ import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.hellbender.engine.FeatureInput; import org.broadinstitute.hellbender.exceptions.GATKException; -import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.variant.HomoSapiensConstants; import java.io.Serializable; @@ -127,7 +126,7 @@ public GenotypeCalculationArgumentCollection clone() { * Note that the default was changed from 10.0 to 30.0 in version 4.1.0.0 to accompany the switch to use the the new quality score by default. */ @Argument(fullName = CALL_CONFIDENCE_LONG_NAME, shortName = CALL_CONFIDENCE_SHORT_NAME, doc = "The minimum phred-scaled confidence threshold at which variants should be called", optional = true) - public double STANDARD_CONFIDENCE_FOR_CALLING = DEFAULT_STANDARD_CONFIDENCE_FOR_CALLING; + public double standardConfidenceForCalling = DEFAULT_STANDARD_CONFIDENCE_FOR_CALLING; /** * If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES), @@ -135,11 +134,11 @@ public GenotypeCalculationArgumentCollection clone() { * scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend * that you not play around with this parameter. * - * See also {@link #MAX_GENOTYPE_COUNT}. + * See also {@link #maxGenotypeCount}. */ @Advanced @Argument(fullName = MAX_ALTERNATE_ALLELES_LONG_NAME, doc = "Maximum number of alternate alleles to genotype", optional = true) - public int MAX_ALTERNATE_ALLELES = DEFAULT_MAX_ALTERNATE_ALLELES; + public int maxAlternateAlleles = DEFAULT_MAX_ALTERNATE_ALLELES; /** * If there are more than this number of genotypes at a locus presented to the genotyper, then only this many genotypes will be used. @@ -152,17 +151,18 @@ public GenotypeCalculationArgumentCollection clone() { * Unless there is a good reason to change the default value, we highly recommend that you not play around with this parameter. * * The maximum number of alternative alleles used in the genotyping step will be the lesser of the two: - * 1. the largest number of alt alleles, given ploidy, that yields a genotype count no higher than {@link #MAX_GENOTYPE_COUNT} - * 2. the value of {@link #MAX_ALTERNATE_ALLELES} + * 1. the largest number of alt alleles, given ploidy, that yields a genotype count no higher than {@link #maxGenotypeCount} + * 2. the value of {@link #maxAlternateAlleles} * - * See also {@link #MAX_ALTERNATE_ALLELES}. + * See also {@link #maxAlternateAlleles}. */ @Advanced @Argument(fullName = MAX_GENOTYPE_COUNT_LONG_NAME, doc = "Maximum number of genotypes to consider at any site", optional = true) - public int MAX_GENOTYPE_COUNT = DEFAULT_MAX_GENOTYPE_COUNT; + public int maxGenotypeCount = DEFAULT_MAX_GENOTYPE_COUNT; /** - * Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy + * Sample ploidy - equivalent to number of chromoso + * mes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy */ @Argument(shortName = SAMPLE_PLOIDY_SHORT_NAME, fullName = SAMPLE_PLOIDY_LONG_NAME, doc="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", optional=true) public int samplePloidy = HomoSapiensConstants.DEFAULT_PLOIDY; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java index af0ee663543..bd306a0b3e7 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java @@ -127,7 +127,7 @@ public VariantContext calculateGenotypes(final VariantContext vc, final Genotype } final int defaultPloidy = configuration.genotypeArgs.samplePloidy; - final int maxAltAlleles = configuration.genotypeArgs.MAX_ALTERNATE_ALLELES; + final int maxAltAlleles = configuration.genotypeArgs.maxAlternateAlleles; VariantContext reducedVC = vc; if (maxAltAlleles < vc.getAlternateAlleles().size()) { @@ -305,7 +305,7 @@ private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult // we want to keep the NON_REF symbolic allele but only in the absence of a non-symbolic allele, e.g. // if we combined a ref / NON_REF gVCF with a ref / alt gVCF final boolean isNonRefWhichIsLoneAltAllele = alternativeAlleleCount == 1 && allele.equals(Allele.NON_REF_ALLELE); - final boolean isPlausible = afCalculationResult.passesThreshold(allele, configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING); + final boolean isPlausible = afCalculationResult.passesThreshold(allele, configuration.genotypeArgs.standardConfidenceForCalling); //it's possible that the upstream deletion that spanned this site was not emitted, mooting the symbolic spanning deletion allele final boolean isSpuriousSpanningDeletion = GATKVCFConstants.isSpanningDeletion(allele) && !isVcCoveredByDeletion(vc); @@ -414,7 +414,7 @@ protected final boolean passesEmitThreshold(final double conf, final boolean bes } protected final boolean passesCallThreshold(final double conf) { - return conf >= configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING; + return conf >= configuration.genotypeArgs.standardConfidenceForCalling; } protected Map composeCallAttributes(final VariantContext vc, final List alleleCountsofMLE, diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java index cd4917c178f..e30a44a6ddd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/gnarlyGenotyper/GnarlyGenotyper.java @@ -173,8 +173,9 @@ protected List transformTraversalIntervals(final List args = new ArrayList(); + args.add("--"+GenotypeCalculationArgumentCollection.MAX_ALTERNATE_ALLELES_LONG_NAME); + args.add("2"); // Too small max_alternate_alleles arg to GenomicsDB, should fail + try { + File output = runGenotypeGVCFS(genomicsDBUri, expected, args, reference); + Assert.fail("Expected exception not thrown"); + } catch (IllegalStateException e) { + // Pass + } + + args.clear(); + args.add("--"+GenotypeCalculationArgumentCollection.MAX_ALTERNATE_ALLELES_LONG_NAME); + args.add("8"); + runGenotypeGVCFSAndAssertSomething(genomicsDBUri, expected, args, VariantContextTestUtils::assertVariantContextsHaveSameGenotypes, reference); // The default option with GenomicsDB input uses VCFCodec for decoding, test BCFCodec explicitly - final List args = new ArrayList(); args.add("--"+GenomicsDBArgumentCollection.USE_BCF_CODEC_LONG_NAME); runGenotypeGVCFSAndAssertSomething(genomicsDBUri, expected, args, VariantContextTestUtils::assertVariantContextsHaveSameGenotypes, reference); } @@ -362,9 +374,8 @@ private void runGenotypeGVCFSAndAssertSomething(File input, File expected, List< ); } - private void runGenotypeGVCFSAndAssertSomething(String input, File expected, List additionalArguments, BiConsumer assertion, String reference) throws IOException { + private File runGenotypeGVCFS(String input, File expected, List additionalArguments, String reference) { final File output = UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? expected : createTempFile("genotypegvcf", ".vcf"); - final ArgumentsBuilder args = new ArgumentsBuilder(); args.addReference(new File(reference)) .add("V", input) @@ -377,6 +388,13 @@ private void runGenotypeGVCFSAndAssertSomething(String input, File expected, Lis Utils.resetRandomGenerator(); runCommandLine(args); + return output; + } + + private void runGenotypeGVCFSAndAssertSomething(String input, File expected, List additionalArguments, BiConsumer assertion, String reference) throws IOException { + final File output = runGenotypeGVCFS(input, expected, additionalArguments, reference); + Assert.assertTrue(output.exists()); + if (! UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS) { final List expectedVC = VariantContextTestUtils.getVariantContexts(expected); final List actualVC = VariantContextTestUtils.getVariantContexts(output);