Skip to content

Commit

Permalink
experimental version of argument mode manager that is more generalize…
Browse files Browse the repository at this point in the history
…d demonstrated on the HaplotypeCaller dragen mode
  • Loading branch information
jamesemery committed Mar 30, 2022
1 parent a85c25f commit 9fbc2e8
Show file tree
Hide file tree
Showing 6 changed files with 157 additions and 49 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package org.broadinstitute.hellbender.cmdline;

import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.CommandLineArgumentParser;
import org.broadinstitute.barclay.argparser.CommandLineParser;
import org.broadinstitute.barclay.argparser.NamedArgumentDefinition;
import org.broadinstitute.hellbender.exceptions.GATKException;

import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.util.*;

/**
* This class is a static helper for implementing 'mode arguments' by tools. A mode argument is defined as
* an argument which controls the setting of multiple (other) arguments to preset values, thereby resulting
* in the tool being used in a specific 'mode'. In this sense, a mode is simply a macro of sorts. A mode
* argument might be binary (in which case the associated argument and their values will be set when it is true)
* or be an enum (in which case a different set of arguments and value be set according to the selected value).
*
* The detection of the mode (i.e. the implementation of the mode argument) is the tool's responsibility. Following
* that, a public method of this class, setArgValues, can be used to install the associated argument values
* in the command parser, which will propagate them into the associate tools instance fields.
*
* The setting of the arguments is done such that argument already specified on the command line are not
* overwritten. This allows for mode refinement - i.e. the setting of the mode while turing some of its arguments
* to specific values.
*/

public class ModeArgumentUtils {

protected static final Logger logger = LogManager.getLogger(ModeArgumentUtils.class);

/**
* set a group of arguments (and associated fields) to the specified values - thereby implementing a mode
*
* The method does not overwrite arguments already set on the command line (as indicated by the parser)
* Additionally, the method emits a warning message indicating which argument were set by the mode and
* to which values.
*
* @param parser - commandline parser associated with the tool. This should be an instance of CommandLineArgumentParser
* @param argValues - an array of arguments and values to potentially set. The order of elements in the array
* is arg0,value0,arg1,value1 ...
* @param modeName - the name of the mode being set. This is used in textual message, such as the warning
* issued to notify the user of the changed argument
*/
public static void setArgValues(final CommandLineParser parser, final String[] argValues, final String modeName) {
final Map<String, String> modifiedArgs = new LinkedHashMap<>();

for ( int i = 0 ; i < argValues.length ; i += 2 ) {
if ( !hasBeenSet(parser, argValues[i]) ) {
String parserMessage = setValue(parser, argValues[i], argValues[i+1]);

if ( StringUtils.isEmpty(parserMessage) ) {
modifiedArgs.put(argValues[i], argValues[i + 1]);
} else {
modifiedArgs.put(argValues[i], argValues[i + 1] + " (" + parserMessage + ")");
}
} else {
logger.info("parameter not set by this mode, as it was already set on the command line: " + argValues[i]);
}
}

logModeNotice(modifiedArgs, modeName);
}

private static boolean hasBeenSet(final CommandLineParser parser, final String alias) {

if ( parser instanceof CommandLineArgumentParser ) {
NamedArgumentDefinition namedArg = ((CommandLineArgumentParser)parser).getNamedArgumentDefinitionByAlias(alias);

return (namedArg != null) ? namedArg.getHasBeenSet() : false;
} else {
throw new IllegalArgumentException("command line parser is not CommandLineArgumentParser");
}
}

private static String setValue(final CommandLineParser parser, final String alias, final String value) {
if ( parser instanceof CommandLineArgumentParser ) {
NamedArgumentDefinition namedArg = ((CommandLineArgumentParser)parser).getNamedArgumentDefinitionByAlias(alias);
if ( namedArg == null ) {
throw new IllegalArgumentException("alias not found: " + alias);
}

PrintStream ps = new PrintStream(new ByteArrayOutputStream());
List<String> values = Arrays.asList(value);
namedArg.setArgumentValues((CommandLineArgumentParser)parser, ps, values);
return ps.toString();
} else {
throw new IllegalArgumentException("command line parser is not CommandLineArgumentParser");
}
}

private static void logModeNotice(final Map<String, String> modifiedArgs, final String modeName) {
logger.warn("*************************************************************************");
logger.warn(String.format("* %-69s *", "--" + modeName + " was enabled"));
logger.warn("* The following arguments have had their inputs overwritten: *");
modifiedArgs.forEach((name, value) -> {
logger.warn(String.format("* %-69s *", "--" + name + " " + value));
});
logger.warn("* *");
logger.warn("* If you would like to run this mode with different inputs for any *");
logger.warn("* of the above arguments please manually construct the command or *");
logger.warn("* add your specific inputs after the mode argument. This mode *");
logger.warn("* will not override inputs explicitly provided. *");
logger.warn("*************************************************************************");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@ public final class GenotypeCalculationArgumentCollection implements Serializable
public static final String MAX_GENOTYPE_COUNT_LONG_NAME = "max-genotype-count";
public static final String SAMPLE_PLOIDY_SHORT_NAME = "ploidy";
public static final String SAMPLE_PLOIDY_LONG_NAME = "sample-ploidy";
public static final String GENOTYPE_ASSIGNMENT_METHOD_LONG_NAME = "genotype-assignment-method";
public static final String USE_POSTERIORS_TO_CALCULATE_QUAL_LONG_NAME = "use-posteriors-to-calculate-qual";

public static final double DEFAULT_STANDARD_CONFIDENCE_FOR_CALLING = 30.0;
public static final int DEFAULT_MAX_ALTERNATE_ALLELES = 6;
public static final int DEFAULT_MAX_GENOTYPE_COUNT = 1024;

@Argument(fullName="use-posteriors-to-calculate-qual", shortName="gp-qual", optional = true, doc = "if available, use the genotype posterior probabilities to calculate the site QUAL")
@Argument(fullName= USE_POSTERIORS_TO_CALCULATE_QUAL_LONG_NAME, shortName="gp-qual", optional = true, doc = "if available, use the genotype posterior probabilities to calculate the site QUAL")
public boolean usePosteriorProbabilitiesToCalculateQual = false;

/**
Expand Down Expand Up @@ -188,6 +190,6 @@ public GenotypeCalculationArgumentCollection clone() {
@Argument(fullName= NUM_REF_SAMPLES_LONG_NAME,doc="Number of hom-ref genotypes to infer at sites not present in a panel",optional=true)
public int numRefIfMissing = 0;

@Argument(fullName= "genotype-assignment-method", shortName = "gam", doc = "How we assign genotypes", optional = true)
@Argument(fullName= GENOTYPE_ASSIGNMENT_METHOD_LONG_NAME, shortName = "gam", doc = "How we assign genotypes", optional = true)
public GenotypeAssignmentMethod genotypeAssignmentMethod = GenotypeAssignmentMethod.USE_PLS_TO_ASSIGN;
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ public abstract class AssemblyBasedCallerArgumentCollection {
private static final SWParameters DEFAULT_HAPLOTYPE_TO_REFERENCE_SMITH_WATERMAN_PARAMETERS = SmithWatermanAlignmentConstants.NEW_SW_PARAMETERS;
/** See documentation at {@link SmithWatermanAlignmentConstants#ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS}. */
private static final SWParameters DEFAULT_READ_TO_HAPLOTYPE_SMITH_WATERMAN_PARAMETERS = SmithWatermanAlignmentConstants.ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS;
public static final String SOFT_CLIP_LOW_QUALITY_ENDS_LONG_NAME = "soft-clip-low-quality-ends";

public ReadThreadingAssembler createReadThreadingAssembler() {
final ReadThreadingAssembler assemblyEngine = assemblerArgs.makeReadThreadingAssembler();
Expand Down Expand Up @@ -161,7 +162,7 @@ public ReadThreadingAssembler createReadThreadingAssembler() {
public boolean forceCallFiltered = false;

@Advanced
@Argument(fullName = "soft-clip-low-quality-ends", doc = "If enabled will preserve low-quality read ends as softclips (used for DRAGEN-GATK BQD genotyper model)", optional = true)
@Argument(fullName = SOFT_CLIP_LOW_QUALITY_ENDS_LONG_NAME, doc = "If enabled will preserve low-quality read ends as softclips (used for DRAGEN-GATK BQD genotyper model)", optional = true)
public boolean softClipLowQualityEnds = false;

@Advanced
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.GATKPlugin.GATKReadFilterPluginDescriptor;
import org.broadinstitute.hellbender.cmdline.ModeArgumentUtils;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.argumentcollections.ReferenceInputArgumentCollection;
import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup;
Expand Down Expand Up @@ -160,14 +161,18 @@ public List<ReadFilter> getDefaultReadFilters() {
}

/**
* This is being used to set the mapping quality filter when in dragen mode... there are problems here...
* This is being used to set the mapping quality filter when in dragen mode. This is also where make alterations to the input arguments based on DragenMode.
*/
protected String[] customCommandLineValidation() {
if (hcArgs.dragenMode) {
final GATKReadFilterPluginDescriptor readFilterPlugin =
getCommandLineParser().getPluginDescriptor(GATKReadFilterPluginDescriptor.class);
Optional<ReadFilter> filterOptional = readFilterPlugin.getResolvedInstances().stream().filter(rf -> rf instanceof MappingQualityReadFilter).findFirst();
filterOptional.ifPresent(readFilter -> ((MappingQualityReadFilter) readFilter).minMappingQualityScore = 1);
ModeArgumentUtils.setArgValues(
getCommandLineParser(),
hcArgs.getDragenNameValuePairs(),
HaplotypeCallerArgumentCollection.DRAGEN_GATK_MODE_LONG_NAME);
}
return null;
}
Expand Down Expand Up @@ -216,44 +221,6 @@ public void onTraversalStart() {
logger.warn("*************************************************************************");
}

if (hcArgs.dragenMode) {
logger.warn("*************************************************************************");
logger.warn("* DRAGEN-GATK mode enabled *");
logger.warn("* The following arguments have had their inputs overwritten: *");
logger.warn("* --apply-frd *");
logger.warn("* --apply-bqd *");
logger.warn("* --transform-dragen-mapping-quality *");
logger.warn("* --soft-clip-low-quality-ends *");
logger.warn("* --mapping-quality-threshold-for-genotyping 1 *");
logger.warn("* --minimum-mapping-quality 1 *");
logger.warn("* --allele-informative-reads-overlap-margin 1 *");
logger.warn("* --disable-cap-base-qualities-to-map-quality *");
logger.warn("* --enable-dynamic-read-disqualification-for-genotyping *");
logger.warn("* --expected-mismatch-rate-for-read-disqualification 0.03 *");
logger.warn("* --genotype-assignment-method USE_POSTERIOR_PROBABILITIES *");
logger.warn("* --padding-around-indels 150 *");
logger.warn("* --standard-min-confidence-threshold-for-calling 3.0 *");
logger.warn("* --use-posteriors-to-calculate-qual *");
logger.warn("* --allele-informative-reads-overlap-margin 1 *");
logger.warn("* *");
logger.warn("* If you would like to run DRAGEN-GATK with different inputs for any *");
logger.warn("* of the above arguments please manually construct the command. *");
logger.warn("*************************************************************************");
hcArgs.applyBQD = true;
hcArgs.applyFRD = true;
hcArgs.transformDRAGENMapQ = true;
hcArgs.softClipLowQualityEnds = true;
hcArgs.mappingQualityThreshold = 1;
hcArgs.informativeReadOverlapMargin = 1;
hcArgs.likelihoodArgs.disableCapReadQualitiesToMapQ = true;
hcArgs.likelihoodArgs.enableDynamicReadDisqualification = true;
hcArgs.likelihoodArgs.expectedErrorRatePerBase = 0.03;
hcArgs.standardArgs.genotypeArgs.genotypeAssignmentMethod = GenotypeAssignmentMethod.USE_POSTERIOR_PROBABILITIES;
hcArgs.standardArgs.genotypeArgs.standardConfidenceForCalling = 3.0;
hcArgs.standardArgs.genotypeArgs.usePosteriorProbabilitiesToCalculateQual = true;
assemblyRegionArgs.indelPaddingForGenotyping = 150;
}

final VariantAnnotatorEngine variantAnnotatorEngine = new VariantAnnotatorEngine(makeVariantAnnotations(),
hcArgs.dbsnp.dbsnp, hcArgs.comps, hcArgs.emitReferenceConfidence != ReferenceConfidenceMode.NONE, false);
hcEngine = new HaplotypeCallerEngine(hcArgs, assemblyRegionArgs, createOutputBamIndex, createOutputBamMD5, getHeaderForReads(), getReferenceReader(referenceArguments), variantAnnotatorEngine);
Expand Down
Loading

0 comments on commit 9fbc2e8

Please sign in to comment.