Skip to content

Commit

Permalink
creates multiple enginges to handle different ploidies
Browse files Browse the repository at this point in the history
Finish changes to HaplotypeCaller to add ploidy-regions flag allowing variable ploidy

Change IntervalFileFeature interface to NamedFeature for htsjdk dependency

Address comments for draft PR

Make a few changes and refactors based on comments

Add some tests and small tweaks
  • Loading branch information
Ty Kay authored and lbergelson committed Dec 8, 2023
1 parent e37b344 commit d8e2b89
Show file tree
Hide file tree
Showing 11 changed files with 603 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import htsjdk.samtools.util.Locatable;
import htsjdk.tribble.Feature;
import htsjdk.tribble.NamedFeature;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.param.ParamUtils;
Expand All @@ -19,7 +21,17 @@ public class SimpleCount implements Locatable, Feature {
public SimpleCount(final SimpleInterval interval,
final int count) {
this.interval = Utils.nonNull(interval);
this.count = ParamUtils.isPositiveOrZero(count, "Can't construct SimpleCount with negative count.");
this.count = ParamUtils.isPositiveOrZero(count, "Can't construct SimpleCount with negative count at " + interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd() + ".");
}

public SimpleCount(final NamedFeature namedFeature) {
try {
int count = Integer.parseInt(namedFeature.getName());
this.interval = new SimpleInterval(namedFeature);
this.count = ParamUtils.isPositiveOrZero(count, "Can't construct SimpleCount with negative count at " + namedFeature.getContig() + ":" + namedFeature.getStart() + "-" + namedFeature.getEnd() + ".");
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Error parsing name into integer for feature at " + namedFeature.getContig() + ":" + namedFeature.getStart() + "-" + namedFeature.getEnd() + ".");
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,21 @@
* argument. Note however that very high ploidies (such as are encountered in large pooled experiments) may cause
* performance challenges including excessive slowness. We are working on resolving these limitations.</p>
*
* <p>For having variable ploidy in different regions, like making haploid calls outside the PAR on chrX or chrY,
* see the --ploidy-regions flag. The -ploidy flag sets the default ploidy to use everywhere, and --ploidy-regions
* should be a .bed or .interval_list with "name" column containing the desired ploidy to use in that region
* when genotyping. Note that variants near the boundary may not have the matching ploidy since the ploidy used will
* be determined using the following precedence: </p>
* <ol>
* <li>ploidy given in --ploidy-regions for all intervals overlapping the active region when calling your variant
* (with ties broken by using largest ploidy); note ploidy interval may only overlap the active region and determine
* the ploidy of your variant even if the end coordinate written for your variant lies outside the given region;</li>
* <li>ploidy given via global -ploidy flag;</li>
* <li>ploidy determined by the default global built-in constant for humans (2).</li>
* </ol>
*
* <p>Coordinates for the PAR for CRCh38 can be found <a href='http://useast.ensembl.org/info/genome/genebuild/human_PARS.html'>here</a>.</p>
*
* <h3>Additional Notes</h3>
* <ul>
* <li>When working with PCR-free data, be sure to set `-pcr_indel_model NONE` (see argument below).</li>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.broadinstitute.hellbender.tools.walkers.haplotypecaller;

import htsjdk.tribble.NamedFeature;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.arrow.util.VisibleForTesting;
import org.apache.commons.lang3.ArrayUtils;
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
Expand All @@ -9,6 +11,7 @@
import org.broadinstitute.hellbender.cmdline.ReadFilterArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.argumentcollections.DbsnpArgumentCollection;
import org.broadinstitute.hellbender.engine.FeatureDataSource;
import org.broadinstitute.hellbender.engine.FeatureInput;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.engine.spark.AssemblyRegionArgumentCollection;
Expand All @@ -23,9 +26,11 @@
/**
* Set of arguments for the {@link HaplotypeCallerEngine}
*/
public class HaplotypeCallerArgumentCollection extends AssemblyBasedCallerArgumentCollection implements Serializable{
public class HaplotypeCallerArgumentCollection extends AssemblyBasedCallerArgumentCollection implements Serializable {
private static final long serialVersionUID = 1L;

public static final String PLOIDY_REGIONS_NAME = "ploidy-regions";

public static final String GQ_BAND_LONG_NAME = "gvcf-gq-bands";
public static final String GQ_BAND_SHORT_NAME = "GQB";
public static final String DO_NOT_CORRECT_OVERLAPPING_BASE_QUALITIES_LONG_NAME = "do-not-correct-overlapping-quality";
Expand Down Expand Up @@ -61,6 +66,9 @@ protected ReadThreadingAssemblerArgumentCollection getReadThreadingAssemblerArgu
return new HaplotypeCallerReadThreadingAssemblerArgumentCollection();
}

@Argument(fullName = PLOIDY_REGIONS_NAME, shortName = PLOIDY_REGIONS_NAME, doc = "Interval file with column specifying desired ploidy for genotyping models. Overrides default ploidy and user-provided --ploidy argument in specific regions.", optional = true)
public FeatureDataSource<NamedFeature> ploidyRegions = null;

/**
* You can use this argument to specify that HC should process a single sample out of a multisample BAM file. This
* is especially useful if your samples are all in the same file but you need to run them individually through HC
Expand Down Expand Up @@ -312,6 +320,21 @@ boolean isFlowBasedCallingMode() {
return flowMode != FlowMode.NONE;
}

// Default constructor
public HaplotypeCallerArgumentCollection() {
super();
}

// Private constructor for use in copyWithNewPloidy
// Allows you to make new instances of hcArgs with identical fields except for genotyper ploidy
private HaplotypeCallerArgumentCollection(int ploidy) {
this.standardArgs.genotypeArgs.samplePloidy = ploidy;
}

// Copy method used to create new hcArgs with same fields except input ploidy model
public HaplotypeCallerArgumentCollection copyWithNewPloidy(int ploidy) {
return new HaplotypeCallerArgumentCollection(ploidy);
}

/**
* the different flow modes, in terms of their parameters and their values
Expand Down
Loading

0 comments on commit d8e2b89

Please sign in to comment.