From 8ee86e7c147399277a57f74bf751c66b3bda157f Mon Sep 17 00:00:00 2001 From: jamesemery Date: Tue, 12 Mar 2024 09:55:44 -0400 Subject: [PATCH] Fix to long deletions that overhang into the assembly window causing exceptions in HaplotypeCaller (#8731) --- .../HaplotypeCallerEngine.java | 1 + .../HaplotypeCallerIntegrationTest.java | 3 +- ...ode_givenAlleles_ExtremeLengthDeletion.vcf | 29 ++++++++++++++++++ ...givenAlleles_ExtremeLengthDeletion.vcf.idx | Bin 0 -> 359 bytes 4 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf.idx diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java index 9e09eeba8a9..36918f1fb91 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java @@ -758,6 +758,7 @@ public List callRegion(final AssemblyRegion region, final Featur final List givenAlleles = features.getValues(hcArgs.alleles).stream() .filter(vc -> hcArgs.forceCallFiltered || vc.isNotFiltered()) .flatMap(vc -> GATKVariantContextUtils.splitVariantContextToEvents(vc, false, GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, false).stream()) + .filter(event -> event.getStart() >= region.getSpan().getStart()) // filter out events that do not start within the region. This approach works because events that begin upstream of the calling window cannot be called by this region calling code in the frist place. .collect(Collectors.toList()); if( givenAlleles.isEmpty() && region.size() == 0 ) { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index cddb87c5dac..1e059b444b5 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -671,7 +671,8 @@ public void testFloorGVCFBlocks(final String inputFileName, final String referen public Object[][] getForceCallingInputs() { return new Object[][] { {NA12878_20_21_WGS_bam, new File(TEST_FILES_DIR, "testGenotypeGivenAllelesMode_givenAlleles.vcf"), "20:10000000-10010000"}, - {NA12878_20_21_WGS_bam, new File(toolsTestDir, "mutect/gga_mode.vcf"), "20:9998500-10010000"} + {NA12878_20_21_WGS_bam, new File(toolsTestDir, "mutect/gga_mode.vcf"), "20:9998500-10010000"}, + {NA12878_20_21_WGS_bam, new File(TEST_FILES_DIR, "testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf"), "20:9998500-10010000"} // This is designed to test https://github.com/broadinstitute/gatk/issues/8675, which stemmed from an edge case in the force calling logic where a deletion allele that is longer than the assembly window padding spans into the assembly window. This tests that we do not see an exception in this case. }; } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf new file mode 100644 index 00000000000..e36b2052864 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf @@ -0,0 +1,29 @@ +##fileformat=VCFv4.2 +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 +20 10000694 . GAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAA A . . . GT 0|1 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf.idx new file mode 100644 index 0000000000000000000000000000000000000000..f469a4b8452bb36aea9d4360945adfaccb3d7ab2 GIT binary patch literal 359 zcmZ8cO-sW-5M4iR;?