Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Passing implementation of custom ploidy regions in HaplotypeCaller #8609

Merged
merged 5 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import htsjdk.samtools.util.Locatable;
import htsjdk.tribble.Feature;
import htsjdk.tribble.NamedFeature;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.param.ParamUtils;
Expand All @@ -19,7 +21,17 @@ public class SimpleCount implements Locatable, Feature {
public SimpleCount(final SimpleInterval interval,
final int count) {
this.interval = Utils.nonNull(interval);
this.count = ParamUtils.isPositiveOrZero(count, "Can't construct SimpleCount with negative count.");
this.count = ParamUtils.isPositiveOrZero(count, "Can't construct SimpleCount with negative count at " + interval.getContig() + ":" + interval.getStart() + "-" + interval.getEnd() + ".");
}

public SimpleCount(final NamedFeature namedFeature) {
try {
int count = Integer.parseInt(namedFeature.getName());
this.interval = new SimpleInterval(namedFeature);
this.count = ParamUtils.isPositiveOrZero(count, "Can't construct SimpleCount with negative count at " + namedFeature.getContig() + ":" + namedFeature.getStart() + "-" + namedFeature.getEnd() + ".");
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Error parsing name into integer for feature at " + namedFeature.getContig() + ":" + namedFeature.getStart() + "-" + namedFeature.getEnd() + ".");
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,21 @@
* argument. Note however that very high ploidies (such as are encountered in large pooled experiments) may cause
* performance challenges including excessive slowness. We are working on resolving these limitations.</p>
*
* <p>For having variable ploidy in different regions, like making haploid calls outside the PAR on chrX or chrY,
* see the --ploidy-regions flag. The -ploidy flag sets the default ploidy to use everywhere, and --ploidy-regions
* should be a .bed or .interval_list with "name" column containing the desired ploidy to use in that region
* when genotyping. Note that variants near the boundary may not have the matching ploidy since the ploidy used will
* be determined using the following precedence: </p>
* <ol>
* <li>ploidy given in --ploidy-regions for all intervals overlapping the active region when calling your variant
* (with ties broken by using largest ploidy); note ploidy interval may only overlap the active region and determine
* the ploidy of your variant even if the end coordinate written for your variant lies outside the given region;</li>
* <li>ploidy given via global -ploidy flag;</li>
* <li>ploidy determined by the default global built-in constant for humans (2).</li>
* </ol>
*
* <p>Coordinates for the PAR for CRCh38 can be found <a href='http://useast.ensembl.org/info/genome/genebuild/human_PARS.html'>here</a>.</p>
*
* <h3>Additional Notes</h3>
* <ul>
* <li>When working with PCR-free data, be sure to set `-pcr_indel_model NONE` (see argument below).</li>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
package org.broadinstitute.hellbender.tools.walkers.haplotypecaller;

import htsjdk.tribble.NamedFeature;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.arrow.util.VisibleForTesting;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.SerializationUtils;
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import org.broadinstitute.barclay.argparser.Hidden;
import org.broadinstitute.hellbender.cmdline.ReadFilterArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.argumentcollections.DbsnpArgumentCollection;
import org.broadinstitute.hellbender.engine.FeatureDataSource;
import org.broadinstitute.hellbender.engine.FeatureInput;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.engine.spark.AssemblyRegionArgumentCollection;
Expand All @@ -23,9 +27,11 @@
/**
* Set of arguments for the {@link HaplotypeCallerEngine}
*/
public class HaplotypeCallerArgumentCollection extends AssemblyBasedCallerArgumentCollection implements Serializable{
public class HaplotypeCallerArgumentCollection extends AssemblyBasedCallerArgumentCollection implements Serializable {
private static final long serialVersionUID = 1L;

public static final String PLOIDY_REGIONS_NAME = "ploidy-regions";

public static final String GQ_BAND_LONG_NAME = "gvcf-gq-bands";
public static final String GQ_BAND_SHORT_NAME = "GQB";
public static final String DO_NOT_CORRECT_OVERLAPPING_BASE_QUALITIES_LONG_NAME = "do-not-correct-overlapping-quality";
Expand Down Expand Up @@ -61,6 +67,9 @@ protected ReadThreadingAssemblerArgumentCollection getReadThreadingAssemblerArgu
return new HaplotypeCallerReadThreadingAssemblerArgumentCollection();
}

@Argument(fullName = PLOIDY_REGIONS_NAME, shortName = PLOIDY_REGIONS_NAME, doc = "Interval file with column specifying desired ploidy for genotyping models. Overrides default ploidy and user-provided --ploidy argument in specific regions.", optional = true)
public FeatureInput<NamedFeature> ploidyRegions = null;

/**
* You can use this argument to specify that HC should process a single sample out of a multisample BAM file. This
* is especially useful if your samples are all in the same file but you need to run them individually through HC
Expand Down Expand Up @@ -312,6 +321,12 @@ boolean isFlowBasedCallingMode() {
return flowMode != FlowMode.NONE;
}

// Copy method used to create new hcArgs with same fields except input ploidy model
public HaplotypeCallerArgumentCollection copyWithNewPloidy(int ploidy) {
HaplotypeCallerArgumentCollection newArgsWithNewPloidy = SerializationUtils.clone(this);
newArgsWithNewPloidy.standardArgs.genotypeArgs.samplePloidy = ploidy;
return newArgsWithNewPloidy;
}

/**
* the different flow modes, in terms of their parameters and their values
Expand Down
Loading
Loading