Skip to content

Commit

Permalink
Added an argument mode manager for group arguments and a demonstratio…
Browse files Browse the repository at this point in the history
…n of how it might be used in HaplotypeCaller --dragen-mode (#7745)
  • Loading branch information
jamesemery committed May 10, 2022
1 parent a3141f7 commit 7614ab6
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 49 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package org.broadinstitute.hellbender.cmdline;

import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.CommandLineArgumentParser;
import org.broadinstitute.barclay.argparser.CommandLineParser;
import org.broadinstitute.barclay.argparser.NamedArgumentDefinition;
import org.broadinstitute.hellbender.exceptions.GATKException;

import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.util.*;

/**
* This class is a static helper for implementing 'mode arguments' by tools. A mode argument is defined as
* an argument which controls the setting of multiple (other) arguments to preset values, thereby resulting
* in the tool being used in a specific 'mode'. In this sense, a mode is simply a macro of sorts. A mode
* argument might be binary (in which case the associated argument and their values will be set when it is true)
* or be an enum (in which case a different set of arguments and value be set according to the selected value).
*
* The detection of the mode (i.e. the implementation of the mode argument) is the tool's responsibility. Following
* that, a public method of this class, setArgValues, can be used to install the associated argument values
* in the command parser, which will propagate them into the associate tools instance fields.
*
* The setting of the arguments is done such that argument already specified on the command line are not
* overwritten. This allows for mode refinement - i.e. the setting of the mode while turing some of its arguments
* to specific values.
*
* NOTE: This utility relies on the argument engine having already been executed. Consequently, the best place in the
* standard GATKTool would be in the #customCommandLineArgumentValidation() or at a similar stage since that gets
* executed after Barclay parsing is complete but before the tool has done anything with the filled arguments.
*/

public final class ModeArgumentUtils {

protected static final Logger logger = LogManager.getLogger(ModeArgumentUtils.class);

/**
* Set a group of arguments (and associated fields) to the specified values - thereby implementing a mode
*
* The method does not overwrite arguments already set on the command line (as indicated by the parser)
* Additionally, the method emits a warning message indicating which argument were set by the mode and
* to which values.
*
* NOTE: This does not validate that the specified mode was actually set, that responsibility is left to tool authors.
*
* @param parser - commandline parser associated with the tool. This should be an instance of CommandLineArgumentParser
* @param argValues - an array of arguments and values to potentially set. The order of elements in the array
* is arg0,value0,arg1,value1 ...
* @param modeName - the name of the mode being set. This is used in textual message, such as the warning
* issued to notify the user of the changed argument and not for validation that the argument was set.
*/
public static void setArgValues(final CommandLineParser parser, final String[] argValues, final String modeName) {
final Map<String, String> modifiedArgs = new LinkedHashMap<>();

for ( int i = 0 ; i < argValues.length ; i += 2 ) {
if ( !hasBeenSet(parser, argValues[i]) ) {
String parserMessage = setValue(parser, argValues[i], argValues[i+1]);

if ( StringUtils.isEmpty(parserMessage) ) {
modifiedArgs.put(argValues[i], argValues[i + 1]);
} else {
modifiedArgs.put(argValues[i], argValues[i + 1] + " (" + parserMessage + ")");
}
} else {
logger.info("parameter not set by the '" + modeName + "' argument mode, as it was already set on the command line: " + argValues[i]);
}
}

logModeNotice(modifiedArgs, modeName);
}

private static boolean hasBeenSet(final CommandLineParser parser, final String alias) {

if ( parser instanceof CommandLineArgumentParser ) {
NamedArgumentDefinition namedArg = ((CommandLineArgumentParser)parser).getNamedArgumentDefinitionByAlias(alias);

return (namedArg != null) ? namedArg.getHasBeenSet() : false;
} else {
throw new IllegalArgumentException("command line parser is not CommandLineArgumentParser");
}
}

private static String setValue(final CommandLineParser parser, final String alias, final String value) {
if ( parser instanceof CommandLineArgumentParser ) {
NamedArgumentDefinition namedArg = ((CommandLineArgumentParser)parser).getNamedArgumentDefinitionByAlias(alias);
if ( namedArg == null ) {
throw new IllegalArgumentException("alias not found: " + alias);
}

PrintStream ps = new PrintStream(new ByteArrayOutputStream());
List<String> values = Arrays.asList(value);
namedArg.setArgumentValues((CommandLineArgumentParser)parser, ps, values);
return ps.toString();
} else {
throw new IllegalArgumentException("command line parser is not CommandLineArgumentParser");
}
}

private static void logModeNotice(final Map<String, String> modifiedArgs, final String modeName) {
logger.warn("*************************************************************************");
logger.warn(String.format("* %-69s *", "--" + modeName + " was enabled"));
logger.warn("* The following arguments have had their inputs overwritten: *");
modifiedArgs.forEach((name, value) -> {
logger.warn(String.format("* %-69s *", "--" + name + " " + value));
});
logger.warn("* *");
logger.warn("* If you would like to run this mode with different inputs for any *");
logger.warn("* of the above arguments please manually construct the command or *");
logger.warn("* add your specific inputs after the mode argument. This mode *");
logger.warn("* will not override inputs explicitly provided. *");
logger.warn("*************************************************************************");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@ public final class GenotypeCalculationArgumentCollection implements Serializable
public static final String MAX_GENOTYPE_COUNT_LONG_NAME = "max-genotype-count";
public static final String SAMPLE_PLOIDY_SHORT_NAME = "ploidy";
public static final String SAMPLE_PLOIDY_LONG_NAME = "sample-ploidy";
public static final String GENOTYPE_ASSIGNMENT_METHOD_LONG_NAME = "genotype-assignment-method";
public static final String USE_POSTERIORS_TO_CALCULATE_QUAL_LONG_NAME = "use-posteriors-to-calculate-qual";

public static final double DEFAULT_STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
public static final int DEFAULT_MAX_ALTERNATE_ALLELES = 6;
public static final int DEFAULT_MAX_GENOTYPE_COUNT = 1024;

@Argument(fullName="use-posteriors-to-calculate-qual", shortName="gp-qual", optional = true, doc = "if available, use the genotype posterior probabilities to calculate the site QUAL")
@Argument(fullName= USE_POSTERIORS_TO_CALCULATE_QUAL_LONG_NAME, shortName="gp-qual", optional = true, doc = "if available, use the genotype posterior probabilities to calculate the site QUAL")
public boolean usePosteriorProbabilitiesToCalculateQual = false;

/**
Expand Down Expand Up @@ -188,6 +190,6 @@ public GenotypeCalculationArgumentCollection clone() {
@Argument(fullName= NUM_REF_SAMPLES_LONG_NAME,doc="Number of hom-ref genotypes to infer at sites not present in a panel",optional=true)
public int numRefIfMissing = 0;

@Argument(fullName= "genotype-assignment-method", shortName = "gam", doc = "How we assign genotypes", optional = true)
@Argument(fullName= GENOTYPE_ASSIGNMENT_METHOD_LONG_NAME, shortName = "gam", doc = "How we assign genotypes", optional = true)
public GenotypeAssignmentMethod genotypeAssignmentMethod = GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN;
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ public abstract class AssemblyBasedCallerArgumentCollection {
* See documentation at {@link SmithWatermanAlignmentConstants#ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS}.
*/
private static final SWParameters DEFAULT_READ_TO_HAPLOTYPE_SMITH_WATERMAN_PARAMETERS = SmithWatermanAlignmentConstants.ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS;
public static final String SOFT_CLIP_LOW_QUALITY_ENDS_LONG_NAME = "soft-clip-low-quality-ends";

public ReadThreadingAssembler createReadThreadingAssembler() {
final ReadThreadingAssembler assemblyEngine = assemblerArgs.makeReadThreadingAssembler();
Expand Down Expand Up @@ -171,7 +172,7 @@ public ReadThreadingAssembler createReadThreadingAssembler() {
public boolean forceCallFiltered = false;

@Advanced
@Argument(fullName = "soft-clip-low-quality-ends", doc = "If enabled will preserve low-quality read ends as softclips (used for DRAGEN-GATK BQD genotyper model)", optional = true)
@Argument(fullName = SOFT_CLIP_LOW_QUALITY_ENDS_LONG_NAME, doc = "If enabled will preserve low-quality read ends as softclips (used for DRAGEN-GATK BQD genotyper model)", optional = true)
public boolean softClipLowQualityEnds = false;

@Advanced
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKReadFilterPluginDescriptor;
import org.broadinstitute.hellbender.cmdline.ModeArgumentUtils;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.argumentcollections.ReferenceInputArgumentCollection;
import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup;
Expand Down Expand Up @@ -160,14 +161,18 @@ public List<ReadFilter> getDefaultReadFilters() {
}

/**
* This is being used to set the mapping quality filter when in dragen mode... there are problems here...
* This is being used to set the mapping quality filter when in dragen mode. This is also where make alterations to the input arguments based on DragenMode.
*/
protected String[] customCommandLineValidation() {
if (hcArgs.dragenMode) {
final GATKReadFilterPluginDescriptor readFilterPlugin =
getCommandLineParser().getPluginDescriptor(GATKReadFilterPluginDescriptor.class);
Optional<ReadFilter> filterOptional = readFilterPlugin.getResolvedInstances().stream().filter(rf -> rf instanceof MappingQualityReadFilter).findFirst();
filterOptional.ifPresent(readFilter -> ((MappingQualityReadFilter) readFilter).minMappingQualityScore = 1);
ModeArgumentUtils.setArgValues(
getCommandLineParser(),
hcArgs.getDragenNameValuePairs(),
HaplotypeCallerArgumentCollection.DRAGEN_GATK_MODE_LONG_NAME);
}
return null;
}
Expand Down Expand Up @@ -221,44 +226,6 @@ public void onTraversalStart() {
logger.warn("*************************************************************************");
}

if (hcArgs.dragenMode) {
logger.warn("*************************************************************************");
logger.warn("* DRAGEN-GATK mode enabled *");
logger.warn("* The following arguments have had their inputs overwritten: *");
logger.warn("* --apply-frd *");
logger.warn("* --apply-bqd *");
logger.warn("* --transform-dragen-mapping-quality *");
logger.warn("* --soft-clip-low-quality-ends *");
logger.warn("* --mapping-quality-threshold-for-genotyping 1 *");
logger.warn("* --minimum-mapping-quality 1 *");
logger.warn("* --allele-informative-reads-overlap-margin 1 *");
logger.warn("* --disable-cap-base-qualities-to-map-quality *");
logger.warn("* --enable-dynamic-read-disqualification-for-genotyping *");
logger.warn("* --expected-mismatch-rate-for-read-disqualification 0.03 *");
logger.warn("* --genotype-assignment-method USE_POSTERIOR_PROBABILITIES *");
logger.warn("* --padding-around-indels 150 *");
logger.warn("* --standard-min-confidence-threshold-for-calling 3.0 *");
logger.warn("* --use-posteriors-to-calculate-qual *");
logger.warn("* --allele-informative-reads-overlap-margin 1 *");
logger.warn("* *");
logger.warn("* If you would like to run DRAGEN-GATK with different inputs for any *");
logger.warn("* of the above arguments please manually construct the command. *");
logger.warn("*************************************************************************");
hcArgs.applyBQD = true;
hcArgs.applyFRD = true;
hcArgs.transformDRAGENMapQ = true;
hcArgs.softClipLowQualityEnds = true;
hcArgs.mappingQualityThreshold = 1;
hcArgs.informativeReadOverlapMargin = 1;
hcArgs.likelihoodArgs.disableCapReadQualitiesToMapQ = true;
hcArgs.likelihoodArgs.enableDynamicReadDisqualification = true;
hcArgs.likelihoodArgs.expectedErrorRatePerBase = 0.03;
hcArgs.standardArgs.genotypeArgs.genotypeAssignmentMethod = GenotypeAssignmentMethod.USE_POSTERIOR_PROBABILITIES;
hcArgs.standardArgs.genotypeArgs.standardConfidenceForCalling = 3.0;
hcArgs.standardArgs.genotypeArgs.usePosteriorProbabilitiesToCalculateQual = true;
assemblyRegionArgs.indelPaddingForGenotyping = 150;
}

final VariantAnnotatorEngine variantAnnotatorEngine = new VariantAnnotatorEngine(makeVariantAnnotations(),
hcArgs.dbsnp.dbsnp, hcArgs.comps, hcArgs.emitReferenceConfidence != ReferenceConfidenceMode.NONE, false);
hcEngine = new HaplotypeCallerEngine(hcArgs, assemblyRegionArgs, createOutputBamIndex, createOutputBamMD5, getHeaderForReads(), getReferenceReader(referenceArguments), variantAnnotatorEngine);
Expand Down
Loading

0 comments on commit 7614ab6

Please sign in to comment.