Skip to content

Commit

Permalink
Adding TransmittedSingleton annotation and adding quality threshold a… (
Browse files Browse the repository at this point in the history
#8329)

* Adding TransmittedSingleton annotation and adding quality threshold arguments to PossibleDenovo Annotation
  • Loading branch information
meganshand committed May 25, 2023
1 parent 947fbce commit e3f30ff
Show file tree
Hide file tree
Showing 8 changed files with 563 additions and 113 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* A common interface for handling annotations that require pedigree file information either in the form of explicitly
* selected founderIDs or in the form of an imported pedigreeFile.
*
* In order to use the the behavior, simply extend Pedigree annotation and access its constructors, then call
* In order to use the behavior, simply extend Pedigree annotation and access its constructors, then call
* getFounderGenotypes() to extract only the genotypes corresponding to requested founder samples or that appear as founders
* in the provided pedigree file. If no founderIDs or pedigreeFiles are present, then it defaults to returning all genotypes.
*
Expand Down Expand Up @@ -103,6 +103,26 @@ void validateArguments(Collection<String> founderIds, GATKPath pedigreeFile) {
}
}

/**
* Warning generator for when a pedigree file is required and founderIDs cannot be used for this annotation.
* @param founderIds
* @param pedigreeFile
*/
protected String validateArgumentsWhenPedigreeRequired(Collection<String> founderIds, GATKPath pedigreeFile) {
if (pedigreeFile == null) {
if ((founderIds != null && !founderIds.isEmpty())) {
return "PossibleDenovo annotation will not be calculated, must provide a valid PED file (-ped). Founder-id arguments cannot be used for this annotation";
} else {
return "PossibleDenovo Annotation will not be calculated, must provide a valid PED file (-ped) from the command line.";
}
} else {
if ((founderIds != null && !founderIds.isEmpty())) {
return "PossibleDenovo annotation does not take founder-id arguments, trio information will be extracted only from the provided PED file";
}
}
return null;
}

/**
* This is a getter for the pedigree file, which could be null.
*
Expand All @@ -111,4 +131,19 @@ void validateArguments(Collection<String> founderIds, GATKPath pedigreeFile) {
public @Nullable GATKPath getPedigreeFile() {
return pedigreeFile;
}

/**
* Helper function to check if the variant context has GQs for the trio
* @param vc variant context
* @param trio trio to check for GQs in the variant context
*/
protected static boolean contextHasTrioGQs(final VariantContext vc, final Trio trio) {
final String mom = trio.getMaternalID();
final String dad = trio.getPaternalID();
final String kid = trio.getChildID();

return (!mom.isEmpty() && vc.hasGenotype(mom) && vc.getGenotype(mom).hasGQ())
&& (!dad.isEmpty() && vc.hasGenotype(dad) && vc.getGenotype(dad).hasGQ())
&& (!kid.isEmpty() && vc.hasGenotype(kid) && vc.getGenotype(kid).hasGQ());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.engine.ReferenceContext;
Expand Down Expand Up @@ -35,15 +36,30 @@
*/
@DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Existence of a de novo mutation in at least one of the given families (hiConfDeNovo, loConfDeNovo)")
public final class PossibleDeNovo extends PedigreeAnnotation implements InfoFieldAnnotation {
public static final String DENOVO_PARENT_GQ_THRESHOLD = "denovo-parent-gq-threshold";
public static final String DENOVO_DEPTH_THRESHOLD = "denovo-depth-threshold";
protected final Logger warning = LogManager.getLogger(this.getClass());
private final MendelianViolation mendelianViolation;
private Set<Trio> trios;

@Argument(fullName = DENOVO_PARENT_GQ_THRESHOLD, doc = "Minimum genotype quality for parents to be considered for de novo calculation (separate from GQ thershold for full trio).", optional = true)
public int parentGQThreshold = hi_GQ_threshold;

@Argument(fullName = DENOVO_DEPTH_THRESHOLD, doc = "Minimum depth (DP) for all trio members to be considered for de novo calculation.", optional = true)
public int depthThreshold = 0;

@VisibleForTesting
public PossibleDeNovo(final Set<Trio> trios, final double minGenotypeQualityP) {
public PossibleDeNovo(final Set<Trio> trios, final double minGenotypeQualityP, final int parentGQThreshold, final int depthThreshold) {
super((Set<String>) null);
this.trios = Collections.unmodifiableSet(new LinkedHashSet<>(trios));
mendelianViolation = new MendelianViolation(minGenotypeQualityP);
this.parentGQThreshold = parentGQThreshold;
this.depthThreshold = depthThreshold;
}

@VisibleForTesting
public PossibleDeNovo(final Set<Trio> trios, final double minGenotypeQualityP) {
this(trios, minGenotypeQualityP, hi_GQ_threshold, 0);
}

public PossibleDeNovo(final GATKPath pedigreeFile){
Expand All @@ -58,19 +74,13 @@ public PossibleDeNovo(){

@Override
void validateArguments(Collection<String> founderIds, GATKPath pedigreeFile) {
if (pedigreeFile == null) {
if ((founderIds != null && !founderIds.isEmpty())) {
warning.warn("PossibleDenovo annotation will not be calculated, must provide a valid PED file (-ped). Founder-id arguments cannot be used for this annotation");
} else {
warning.warn("PossibleDenovo Annotation will not be calculated, must provide a valid PED file (-ped) from the command line.");
}
} else {
if ((founderIds != null && !founderIds.isEmpty())) {
warning.warn("PossibleDenovo annotation does not take founder-id arguments, trio information will be extracted only from the provided PED file");
}
String warningString = validateArgumentsWhenPedigreeRequired(founderIds, pedigreeFile);
if (warningString != null) {
warning.warn(warningString);
}
}


// Static thresholds for the denovo calculation
public final static double DEFAULT_MIN_GENOTYPE_QUALITY_P = 0; // TODO should this be exposed as a command line argument?
private static final int hi_GQ_threshold = 20; //WARNING - If you change this value, update the description in GATKVCFHeaderLines
Expand Down Expand Up @@ -105,15 +115,20 @@ public Map<String, Object> annotate(final ReferenceContext ref,
final List<String> lowConfDeNovoChildren = new ArrayList<>();
for (final Trio trio : trioSet) {
if (vc.isBiallelic() &&
PossibleDeNovo.contextHasTrioGQs(vc, trio) &&
contextHasTrioGQs(vc, trio) &&
mendelianViolation.isViolation(trio.getMother(), trio.getFather(), trio.getChild(), vc) &&
mendelianViolation.getParentsRefRefChildHet() > 0) {

final int childGQ = vc.getGenotype(trio.getChildID()).getGQ();
final int momGQ = vc.getGenotype(trio.getMaternalID()).getGQ();
final int dadGQ = vc.getGenotype(trio.getPaternalID()).getGQ();

if (childGQ >= hi_GQ_threshold && momGQ >= hi_GQ_threshold && dadGQ >= hi_GQ_threshold) {
final int childDP = vc.getGenotype(trio.getChildID()).getDP();
final int momDP = vc.getGenotype(trio.getMaternalID()).getDP();
final int dadDP = vc.getGenotype(trio.getPaternalID()).getDP();

if (childGQ >= hi_GQ_threshold && momGQ >= parentGQThreshold && dadGQ >= parentGQThreshold &&
childDP >= depthThreshold && momDP >= depthThreshold && dadDP >= depthThreshold) {
highConfDeNovoChildren.add(trio.getChildID());
} else if (childGQ >= lo_GQ_threshold && momGQ > 0 && dadGQ > 0) {
lowConfDeNovoChildren.add(trio.getChildID());
Expand All @@ -135,14 +150,4 @@ public Map<String, Object> annotate(final ReferenceContext ref,
return attributeMap;
}

private static boolean contextHasTrioGQs(final VariantContext vc, final Trio trio) {
final String mom = trio.getMaternalID();
final String dad = trio.getPaternalID();
final String kid = trio.getChildID();

return (!mom.isEmpty() && vc.hasGenotype(mom) && vc.getGenotype(mom).hasGQ())
&& (!dad.isEmpty() && vc.hasGenotype(dad) && vc.getGenotype(dad).hasGQ())
&& (!kid.isEmpty() && vc.hasGenotype(kid) && vc.getGenotype(kid).hasGQ());
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFConstants;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.ExperimentalFeature;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.logging.OneShotLogger;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.samples.Trio;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;

import java.util.*;


/**
* Existence of a transmitted or non-transmitted singleton in at least one of the given families
*
* <p>This annotation uses the genotype information from individuals in family trios to identify transmitted and non-transmitted singletons and the sample(s) in which they occur.
* Transmitted singletons occur at sites in a cohort where the allele count is two and these two alleles occur in one parent and the child of a trio. A non-transmitted singleton
* are sites with an allele count of one and this one allele occurs in a parent, but not the child of a trio. In both cases the other parent must have a high quality hom ref call.
*
* <h3>Caveats</h3>
* <ul>
* <li>The calculation assumes that the organism is diploid.</li>
* <li>This annotation requires a valid pedigree file.</li>
* <li>This annotation is only valid for trios, not quads or other more complicated family structures.</li>
* <li>Only reports possible singletons for families where each of the three samples has high GQ (>20) and high depth (>20)</li>
* <li>Only reports possible singletons at sites with a Call Rate greater than 90% (meaning less than 10% of the samples at the given site were no-calls)</li>
* </ul>
*/
@DocumentedFeature(groupName= HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Existence of a transmitted (or non-transmitted) singleton in at least one of the given families (transmittedSingleton, nonTransmittedSingleton)")
public final class TransmittedSingleton extends PedigreeAnnotation implements InfoFieldAnnotation {
protected final Logger warning = LogManager.getLogger(this.getClass());
private final OneShotLogger oneShotLogger = new OneShotLogger(this.getClass());
private Set<Trio> trios;
private final int HI_GQ_THRESHOLD = 20;
private final int HI_DP_THRESHOLD = 20;
private final double CALL_RATE_THRESHOLD = 0.90;

@VisibleForTesting
public TransmittedSingleton(final Set<Trio> trios) {
super((Set<String>) null);
this.trios = Collections.unmodifiableSet(new LinkedHashSet<>(trios));
}

public TransmittedSingleton(final GATKPath pedigreeFile){
super(pedigreeFile);
}

public TransmittedSingleton(){
super((Set<String>) null);
}

private Set<Trio> initializeAndGetTrios() {
if (trios == null) {
trios = getTrios();
}
final long numOfTrios = trios.stream().map(trio -> trio.getMother().getFamilyID()).count();
if (numOfTrios == 0) {
oneShotLogger.warn("Submitted pedigree has no trios. TransmittedSingleton annotation will not be calculated.");
} else if (numOfTrios != trios.stream().map(trio -> trio.getMother().getFamilyID()).distinct().count()) {
oneShotLogger.warn("Submitted pedigree has non-trio families. TransmittedSingleton annotation is only valid for trios. Non-trio families (such as quads) will be ignored.");
}
return trios;
}

@Override
void validateArguments(Collection<String> founderIds, GATKPath pedigreeFile) {
String warningString = validateArgumentsWhenPedigreeRequired(founderIds, pedigreeFile);
if (warningString != null) {
warning.warn(warningString);
}
}

@Override
public List<String> getKeyNames() {
return Arrays.asList(GATKVCFConstants.TRANSMITTED_SINGLETON, GATKVCFConstants.NON_TRANSMITTED_SINGLETON);
}
@Override
public Map<String, Object> annotate(final ReferenceContext ref,
final VariantContext vc,
final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
Utils.nonNull(vc);
Set<Trio> trioSet = initializeAndGetTrios();
if (!vc.isBiallelic() || trioSet.isEmpty()) {
return Collections.emptyMap();
}
long highQualCalls = vc.getGenotypes().stream().filter(gt -> gt.getGQ() > HI_GQ_THRESHOLD).count();
if ((double) highQualCalls / vc.getNSamples() < CALL_RATE_THRESHOLD) {
return Collections.emptyMap();
}
final List<String> transmittedSingletonParent = new ArrayList<>();
final List<String> nonTransmittedSingletonParent = new ArrayList<>();
for (final Trio trio : trioSet) {
if (vc.isBiallelic() &&
contextHasTrioGQs(vc, trio)) {

final boolean oneParentHasAllele = (vc.getGenotype(trio.getMaternalID()).isHet() && vc.getGenotype(trio.getPaternalID()).isHomRef()) || (vc.getGenotype(trio.getMaternalID()).isHomRef() && vc.getGenotype(trio.getPaternalID()).isHet());
final String matchingParentId = vc.getGenotype(trio.getMaternalID()).isHet() ? trio.getMaternalID() : trio.getPaternalID();

final boolean momIsHighGQ = vc.getGenotype(trio.getMaternalID()).getGQ() >= HI_GQ_THRESHOLD;
final boolean dadIsHighGQ = vc.getGenotype(trio.getPaternalID()).getGQ() >= HI_GQ_THRESHOLD;

final boolean childIsHighGQHet = vc.getGenotype(trio.getChildID()).isHet() && vc.getGenotype(trio.getChildID()).getGQ() >= HI_GQ_THRESHOLD;
final boolean childIsHighGQHomRef = vc.getGenotype(trio.getChildID()).isHomRef() && vc.getGenotype(trio.getChildID()).getGQ() >= HI_GQ_THRESHOLD;

final boolean childIsHighDepth = vc.getGenotype(trio.getChildID()).getDP() >= HI_DP_THRESHOLD;
final boolean momIsHighDepth = vc.getGenotype(trio.getChildID()).getDP() >= HI_DP_THRESHOLD;
final boolean dadIsHighDepth = vc.getGenotype(trio.getChildID()).getDP() >= HI_DP_THRESHOLD;

//TODO: This only works for trios (not quads or other more complicated family structures that would make the AC>2)
if (childIsHighDepth && momIsHighDepth && dadIsHighDepth &&
vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0) == 2 &&
childIsHighGQHet && oneParentHasAllele && momIsHighGQ && dadIsHighGQ) {
transmittedSingletonParent.add(matchingParentId);
}
//TODO: This only works for trios (not quads or other more complicated family structures that would make the AC>1)
if (childIsHighDepth && momIsHighDepth && dadIsHighDepth &&
vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0) == 1 &&
childIsHighGQHomRef && momIsHighGQ && dadIsHighGQ) {
nonTransmittedSingletonParent.add(matchingParentId);
}
}
}

final Map<String, Object> attributeMap = new LinkedHashMap<>(1);
if (!transmittedSingletonParent.isEmpty()) {
attributeMap.put(GATKVCFConstants.TRANSMITTED_SINGLETON, transmittedSingletonParent);
}
if (!nonTransmittedSingletonParent.isEmpty()) {
attributeMap.put(GATKVCFConstants.NON_TRANSMITTED_SINGLETON, nonTransmittedSingletonParent);
}
return attributeMap;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ public final class GATKVCFConstants {
public static final String GQ_STDEV_KEY = "GQ_STDDEV";
public static final String HAPLOTYPE_SCORE_KEY = "HaplotypeScore";
public static final String HI_CONF_DENOVO_KEY = "hiConfDeNovo";
public static final String TRANSMITTED_SINGLETON = "transmittedSingleton";
public static final String NON_TRANSMITTED_SINGLETON = "nonTransmittedSingleton";
public static final String INTERVAL_GC_CONTENT_KEY = "IGC";
public static final String INBREEDING_COEFFICIENT_KEY = "InbreedingCoeff";
public static final String AS_INBREEDING_COEFFICIENT_KEY = "AS_InbreedingCoeff";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ public static VCFFormatHeaderLine getEquivalentFormatHeaderLine(final String inf
addInfoLine(new VCFInfoHeaderLine(AS_VQS_SENS_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.String, "For each alt allele, the calibration sensitivity threshold of being a true variant versus being false under the trained gaussian mixture model"));
addInfoLine(new VCFInfoHeaderLine(AS_YNG_STATUS_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.String, "For each alt allele, the yay/nay/grey status (yay are known good alleles, nay are known false positives, grey are unknown)"));

addInfoLine(new VCFInfoHeaderLine(TRANSMITTED_SINGLETON, 1, VCFHeaderLineType.String, "Possible transmitted singleton (site with AC=2 from parent and child). Parent ID is listed."));
addInfoLine(new VCFInfoHeaderLine(NON_TRANSMITTED_SINGLETON, 1, VCFHeaderLineType.String, "Possible non transmitted singleton (site with AC=1 in just one parent). Parent ID is listed."));
addInfoLine(new VCFInfoHeaderLine(HI_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "High confidence possible de novo mutation (GQ >= 20 for all trio members)=[comma-delimited list of child samples]"));
addInfoLine(new VCFInfoHeaderLine(LO_CONF_DENOVO_KEY, 1, VCFHeaderLineType.String, "Low confidence possible de novo mutation (GQ >= 10 for child, GQ > 0 for parents)=[comma-delimited list of child samples]"));
addInfoLine(new VCFInfoHeaderLine(QUAL_BY_DEPTH_KEY, 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -693,4 +693,17 @@ public void testConfigFileControlsAnnotationPackages() throws IOException {
runToolInNewJVM("TestAnnotationsTool", args);
Assert.assertEquals(Files.readAllLines(output.toPath()), Collections.singletonList(TestAnnotation.class.getSimpleName()));
}

@Test
public void testPossibleDeNovoAnnotationArguments() {
CommandLineParser clp = new CommandLineArgumentParser(
new Object(),
Collections.singletonList(new GATKAnnotationPluginDescriptor( Collections.singletonList(new PossibleDeNovo()), null)),
Collections.emptySet());
String[] args = {"--" + PossibleDeNovo.DENOVO_DEPTH_THRESHOLD,"20","--" + PossibleDeNovo.DENOVO_PARENT_GQ_THRESHOLD,"40"};
clp.parseArguments(nullMessageStream, args);
PossibleDeNovo annot = (PossibleDeNovo) instantiateAnnotations(clp).get(0);
Assert.assertEquals(annot.parentGQThreshold, 40);
Assert.assertEquals(annot.depthThreshold, 20);
}
}
Loading

0 comments on commit e3f30ff

Please sign in to comment.