From 0b185790933ba238ebae85f9eeca29aa1b041a04 Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Mon, 11 Dec 2023 16:56:19 -0500 Subject: [PATCH] Support for custom ploidy regions in HaplotypeCaller (#8609) For having variable ploidy in different regions, like making haploid calls outside the PAR on chrX or chrY, there is now a --ploidy-regions flag. The -ploidy flag sets the default ploidy to use everywhere, and --ploidy-regions should be a .bed or .interval_list with "name" column containing the desired ploidy to use in that region when genotyping. Note that variants near the boundary may not have the matching ploidy since the ploidy used will be determined using the following precedence: * ploidy given in --ploidy-regions for all intervals overlapping the active region when calling your variant with ties broken by using largest ploidy); note ploidy interval may only overlap the active region and determine the ploidy of your variant even if the end coordinate written for your variant lies outside the given region * ploidy given via global -ploidy flag * ploidy determined by the default global built-in constant for humans (2). --------- Co-authored-by: Ty Kay Co-authored-by: rickymagner <81349869+rickymagner@users.noreply.github.com> --- .../formats/records/SimpleCount.java | 14 +- .../haplotypecaller/HaplotypeCaller.java | 15 + .../HaplotypeCallerArgumentCollection.java | 17 +- .../HaplotypeCallerEngine.java | 161 +++++++++-- .../RampedHaplotypeCallerEngine.java | 8 +- .../HaplotypeCallerIntegrationTest.java | 119 ++++++++ .../expected.testPloidyRegions.vcf | 273 ++++++++++++++++++ .../testMismatchPloidyRegions.bed | 3 + .../testNegativePloidyRegions.bed | 3 + .../testNonIntegerPloidyRegions.bed | 3 + .../haplotypecaller/testPloidyRegions.bed | 4 + 11 files changed, 598 insertions(+), 22 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testPloidyRegions.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testMismatchPloidyRegions.bed create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testNegativePloidyRegions.bed create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testNonIntegerPloidyRegions.bed create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testPloidyRegions.bed diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/SimpleCount.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/SimpleCount.java index 5d6521850b5..bdf70dc5ccf 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/SimpleCount.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/SimpleCount.java @@ -2,6 +2,8 @@ import htsjdk.samtools.util.Locatable; import htsjdk.tribble.Feature; +import htsjdk.tribble.NamedFeature; +import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.param.ParamUtils; @@ -19,7 +21,17 @@ public class SimpleCount implements Locatable, Feature { public SimpleCount(final SimpleInterval interval, final int count) { this.interval = Utils.nonNull(interval); - this.count = ParamUtils.isPositiveOrZero(count, "Can't construct SimpleCount with negative count."); + this.count = ParamUtils.isPositiveOrZero(count, "Can't construct SimpleCount with negative count at " + interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd() + "."); + } + + public SimpleCount(final NamedFeature namedFeature) { + try { + int count = Integer.parseInt(namedFeature.getName()); + this.interval = new SimpleInterval(namedFeature); + this.count = ParamUtils.isPositiveOrZero(count, "Can't construct SimpleCount with negative count at " + namedFeature.getContig() + ":" + namedFeature.getStart() + "-" + namedFeature.getEnd() + "."); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Error parsing name into integer for feature at " + namedFeature.getContig() + ":" + namedFeature.getStart() + "-" + namedFeature.getEnd() + "."); + } } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCaller.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCaller.java index f504d400b13..557e7f0add9 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -127,6 +127,21 @@ * argument. Note however that very high ploidies (such as are encountered in large pooled experiments) may cause * performance challenges including excessive slowness. We are working on resolving these limitations.

* + *

For having variable ploidy in different regions, like making haploid calls outside the PAR on chrX or chrY, + * see the --ploidy-regions flag. The -ploidy flag sets the default ploidy to use everywhere, and --ploidy-regions + * should be a .bed or .interval_list with "name" column containing the desired ploidy to use in that region + * when genotyping. Note that variants near the boundary may not have the matching ploidy since the ploidy used will + * be determined using the following precedence:

+ *
    + *
  1. ploidy given in --ploidy-regions for all intervals overlapping the active region when calling your variant + * (with ties broken by using largest ploidy); note ploidy interval may only overlap the active region and determine + * the ploidy of your variant even if the end coordinate written for your variant lies outside the given region;
  2. + *
  3. ploidy given via global -ploidy flag;
  4. + *
  5. ploidy determined by the default global built-in constant for humans (2).
  6. + *
+ * + *

Coordinates for the PAR for CRCh38 can be found here.

+ * *

Additional Notes

*