From d8e2b89c30bbdbb61a96e074db0f598b08f34053 Mon Sep 17 00:00:00 2001 From: Ty Kay Date: Wed, 2 Aug 2023 09:42:40 -0400 Subject: [PATCH 1/5] creates multiple enginges to handle different ploidies Finish changes to HaplotypeCaller to add ploidy-regions flag allowing variable ploidy Change IntervalFileFeature interface to NamedFeature for htsjdk dependency Address comments for draft PR Make a few changes and refactors based on comments Add some tests and small tweaks --- .../formats/records/SimpleCount.java | 14 +- .../haplotypecaller/HaplotypeCaller.java | 15 + .../HaplotypeCallerArgumentCollection.java | 25 +- .../HaplotypeCallerEngine.java | 160 ++++++++-- .../RampedHaplotypeCallerEngine.java | 8 +- .../HaplotypeCallerIntegrationTest.java | 117 ++++++++ .../expected.testPloidyRegions.vcf | 273 ++++++++++++++++++ .../testMismatchPloidyRegions.bed | 3 + .../testNegativePloidyRegions.bed | 3 + .../testNonIntegerPloidyRegions.bed | 3 + .../haplotypecaller/testPloidyRegions.bed | 4 + 11 files changed, 603 insertions(+), 22 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/expected.testPloidyRegions.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testMismatchPloidyRegions.bed create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testNegativePloidyRegions.bed create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testNonIntegerPloidyRegions.bed create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testPloidyRegions.bed diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/SimpleCount.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/SimpleCount.java index 5d6521850b5..bdf70dc5ccf 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/SimpleCount.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/formats/records/SimpleCount.java @@ -2,6 +2,8 @@ import htsjdk.samtools.util.Locatable; import htsjdk.tribble.Feature; +import htsjdk.tribble.NamedFeature; +import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.param.ParamUtils; @@ -19,7 +21,17 @@ public class SimpleCount implements Locatable, Feature { public SimpleCount(final SimpleInterval interval, final int count) { this.interval = Utils.nonNull(interval); - this.count = ParamUtils.isPositiveOrZero(count, "Can't construct SimpleCount with negative count."); + this.count = ParamUtils.isPositiveOrZero(count, "Can't construct SimpleCount with negative count at " + interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd() + "."); + } + + public SimpleCount(final NamedFeature namedFeature) { + try { + int count = Integer.parseInt(namedFeature.getName()); + this.interval = new SimpleInterval(namedFeature); + this.count = ParamUtils.isPositiveOrZero(count, "Can't construct SimpleCount with negative count at " + namedFeature.getContig() + ":" + namedFeature.getStart() + "-" + namedFeature.getEnd() + "."); + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Error parsing name into integer for feature at " + namedFeature.getContig() + ":" + namedFeature.getStart() + "-" + namedFeature.getEnd() + "."); + } } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCaller.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCaller.java index f504d400b13..557e7f0add9 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCaller.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCaller.java @@ -127,6 +127,21 @@ * argument. Note however that very high ploidies (such as are encountered in large pooled experiments) may cause * performance challenges including excessive slowness. We are working on resolving these limitations.

* + *

For having variable ploidy in different regions, like making haploid calls outside the PAR on chrX or chrY, + * see the --ploidy-regions flag. The -ploidy flag sets the default ploidy to use everywhere, and --ploidy-regions + * should be a .bed or .interval_list with "name" column containing the desired ploidy to use in that region + * when genotyping. Note that variants near the boundary may not have the matching ploidy since the ploidy used will + * be determined using the following precedence:

+ *
    + *
  1. ploidy given in --ploidy-regions for all intervals overlapping the active region when calling your variant + * (with ties broken by using largest ploidy); note ploidy interval may only overlap the active region and determine + * the ploidy of your variant even if the end coordinate written for your variant lies outside the given region;
  2. + *
  3. ploidy given via global -ploidy flag;
  4. + *
  5. ploidy determined by the default global built-in constant for humans (2).
  6. + *
+ * + *

Coordinates for the PAR for CRCh38 can be found here.

+ * *

Additional Notes

*