diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCaller.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCaller.java index 8d6ffb8fcf7..301e2d7e275 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCaller.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCaller.java @@ -5,6 +5,7 @@ import org.broadinstitute.barclay.argparser.ArgumentCollection; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; +import org.broadinstitute.barclay.argparser.CommandLineArgumentParser; import org.broadinstitute.hellbender.cmdline.CommandLineProgram; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection; @@ -76,6 +77,9 @@ * diverges the second time we suggest checking if input count or karyotype values or other inputs are abnormal * (an example of abnormality is a count file containing mostly zeros).

* + *

More details about the model and inference procedure can be found in the white paper + * https://github.com/broadinstitute/gatk/blob/master/docs/CNV/germline-cnv-caller-model.pdf

+ * *

Python environment setup

* *

The computation done by this tool, aside from input data parsing and validation, is performed outside of the Java @@ -121,9 +125,12 @@ *

CASE mode:
*

The tool will be run in CASE mode using the argument {@code run-mode CASE}. The path to a previously * obtained model directory must be provided via the {@code model} argument in this mode. The modeled intervals are - * then specified by a file contained in the model directory, all interval-related arguments are ignored in this - * mode, and all model intervals must be present in all of the input count files. The tool output in CASE mode - * is only the "-calls" subdirectory and is organized similarly to that in COHORT mode.

+ * then specified by a file contained in the model directory, and all model intervals must be present in all of the + * input count files. All interval-related arguments (e.g. {@code interval-psi-scale}) are redundant in this mode + * and will trigger an exception if provided. However, an advanced user can adjust various sample-related + * (e.g. {@code sample-psi-scale}) and global (e.g. {@code p_alt}) arguments for custom applications of the tool. + * Inference-related arguments (e.g. {@code min_training_epochs}) can be adjusted as well. The tool output in CASE + * mode is only the "-calls" subdirectory and is organized similarly to that in COHORT mode.

* *

Note that at the moment, this tool does not automatically verify the compatibility of the provided parametrization * with the provided count files. Model compatibility may be assessed a posteriori by inspecting the magnitude of @@ -359,8 +366,9 @@ protected Object doWork() { } private void validateArguments() { - germlineCallingArgumentCollection.validate(); - germlineDenoisingModelArgumentCollection.validate(); + final CommandLineArgumentParser clpParser = (CommandLineArgumentParser) getCommandLineParser(); + germlineCallingArgumentCollection.validate(clpParser, runMode); + germlineDenoisingModelArgumentCollection.validate(clpParser, runMode); germlineCNVHybridADVIArgumentCollection.validate(); Utils.validateArg(inputReadCountPaths.size() == new HashSet<>(inputReadCountPaths).size(), diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java index cffad07d934..61932bfa55d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java @@ -1,7 +1,10 @@ package org.broadinstitute.hellbender.tools.copynumber.arguments; +import com.google.common.collect.ImmutableList; import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineArgumentParser; import org.broadinstitute.hellbender.tools.copynumber.GermlineCNVCaller; +import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.param.ParamUtils; import java.io.Serializable; @@ -21,6 +24,11 @@ public final class GermlineCallingArgumentCollection implements Serializable { public static final String CLASS_COHERENCE_LENGTH_LONG_NAME = "class-coherence-length"; public static final String MAX_COPY_NUMBER_LONG_NAME = "max-copy-number"; + // these model parameters will be extracted from provided model in CASE mode + private static final List HIDDEN_ARGS_CASE_MODE = ImmutableList.of( + P_ACTIVE_LONG_NAME, + CLASS_COHERENCE_LENGTH_LONG_NAME); + @Argument( doc = "Total prior probability of alternative copy-number states (the reference copy-number " + "is set to the contig integer ploidy)", @@ -78,7 +86,11 @@ public List generatePythonArguments(final GermlineCNVCaller.RunMode runM return arguments; } - public void validate() { + public void validate(final CommandLineArgumentParser clpParser, final GermlineCNVCaller.RunMode runMode) { + if (runMode == GermlineCNVCaller.RunMode.CASE) + HIDDEN_ARGS_CASE_MODE.forEach(a -> Utils.validateArg( + !clpParser.getNamedArgumentDefinitionByAlias(a).getHasBeenSet(), + String.format("Argument '--%s' cannot be set in the CASE mode.", a))); ParamUtils.isPositive(cnvCoherenceLength, String.format("Coherence length of CNV events (%s) must be positive.", CNV_COHERENCE_LENGTH_LONG_NAME)); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineDenoisingModelArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineDenoisingModelArgumentCollection.java index 2c00f60bc14..207dc131e8f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineDenoisingModelArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineDenoisingModelArgumentCollection.java @@ -1,8 +1,12 @@ package org.broadinstitute.hellbender.tools.copynumber.arguments; +import com.google.common.collect.ImmutableList; import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineArgumentParser; +import org.broadinstitute.hellbender.engine.filters.ReadFilter; import org.broadinstitute.hellbender.tools.copynumber.GermlineCNVCaller; import org.broadinstitute.hellbender.utils.param.ParamUtils; +import org.broadinstitute.hellbender.utils.Utils; import java.io.Serializable; import java.util.ArrayList; @@ -28,6 +32,16 @@ public final class GermlineDenoisingModelArgumentCollection implements Serializa public static final String ENABLE_BIAS_FACTORS_LONG_NAME = "enable-bias-factors"; public static final String ACTIVE_CLASS_PADDING_HYBRID_MODE_LONG_NAME = "active-class-padding-hybrid-mode"; + // these model parameters will be extracted from provided model in CASE mode + private static final List HIDDEN_ARGS_CASE_MODE = ImmutableList.of( + MAX_BIAS_FACTORS_LONG_NAME, + INTERVAL_PSI_SCALE_LONG_NAME, + LOG_MEAN_BIAS_STANDARD_DEVIATION_LONG_NAME, + INIT_ARD_REL_UNEXPLAINED_VARIANCE_LONG_NAME, + ENABLE_BIAS_FACTORS_LONG_NAME, + NUM_GC_BINS_LONG_NAME, + GC_CURVE_STANDARD_DEVIATION_LONG_NAME); + public enum CopyNumberPosteriorExpectationMode { MAP("map"), EXACT("exact"), @@ -168,7 +182,11 @@ public List generatePythonArguments(final GermlineCNVCaller.RunMode runM return arguments; } - public void validate() { + public void validate(final CommandLineArgumentParser clpParser, final GermlineCNVCaller.RunMode runMode) { + if (runMode == GermlineCNVCaller.RunMode.CASE) + HIDDEN_ARGS_CASE_MODE.forEach(a -> Utils.validateArg( + !clpParser.getNamedArgumentDefinitionByAlias(a).getHasBeenSet(), + String.format("Argument '--%s' cannot be set in the CASE mode.", a))); ParamUtils.isPositive(maxBiasFactors, String.format("Maximum number of bias factors (%s) must be positive.", MAX_BIAS_FACTORS_LONG_NAME)); diff --git a/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/case_denoising_calling.py b/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/case_denoising_calling.py index cf81082c349..20c1cc42965 100644 --- a/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/case_denoising_calling.py +++ b/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/case_denoising_calling.py @@ -23,6 +23,23 @@ # logging args gcnvkernel.cli_commons.add_logging_args_to_argparse(parser) +hidden_denoising_args = { + "max_bias_factors", + "psi_t_scale", + "log_mean_bias_std", + "init_ard_rel_unexplained_variance", + "enable_bias_factors", + "enable_explicit_gc_bias_modeling", + "disable_bias_factors_in_active_class", + "num_gc_bins", + "gc_curve_sd" +} + +hidden_calling_args = { + "p_active", + "class_coherence_length" +} + # add tool-specific args group = parser.add_argument_group(title="Required arguments") @@ -79,26 +96,13 @@ # Note: we are hiding parameters that are either set by the model or are irrelevant to the case calling task gcnvkernel.DenoisingModelConfig.expose_args( parser, - hide={ - "--max_bias_factors", - "--psi_t_scale", - "--log_mean_bias_std", - "--init_ard_rel_unexplained_variance", - "--enable_bias_factors", - "--enable_explicit_gc_bias_modeling", - "--disable_bias_factors_in_active_class", - "--num_gc_bins", - "--gc_curve_sd", - }) + hide={"--" + arg for arg in hidden_denoising_args}) # add calling config args # Note: we are hiding parameters that are either set by the model or are irrelevant to the case calling task gcnvkernel.CopyNumberCallingConfig.expose_args( parser, - hide={ - '--p_active', - '--class_coherence_length' - }) + hide={"--" + arg for arg in hidden_calling_args}) # override some inference parameters gcnvkernel.HybridInferenceParameters.expose_args(parser) @@ -109,24 +113,16 @@ def update_args_dict_from_saved_model(input_model_path: str, logging.info("Loading denoising model configuration from the provided model...") with open(os.path.join(input_model_path, "denoising_config.json"), 'r') as fp: loaded_denoising_config_dict = json.load(fp) - - # boolean flags - _args_dict['enable_bias_factors'] = \ - loaded_denoising_config_dict['enable_bias_factors'] - _args_dict['enable_explicit_gc_bias_modeling'] = \ - loaded_denoising_config_dict['enable_explicit_gc_bias_modeling'] - _args_dict['disable_bias_factors_in_active_class'] = \ - loaded_denoising_config_dict['disable_bias_factors_in_active_class'] - - # bias factor related - _args_dict['max_bias_factors'] = \ - loaded_denoising_config_dict['max_bias_factors'] - - # gc-related - _args_dict['num_gc_bins'] = \ - loaded_denoising_config_dict['num_gc_bins'] - _args_dict['gc_curve_sd'] = \ - loaded_denoising_config_dict['gc_curve_sd'] + with open(os.path.join(input_model_path, "calling_config.json"), 'r') as fp: + loaded_calling_config_dict = json.load(fp) + + # load arguments from the model denoising config that are hidden by the tool + for arg in hidden_denoising_args: + _args_dict[arg] = \ + loaded_denoising_config_dict[arg] + for arg in hidden_calling_args: + _args_dict[arg] = \ + loaded_calling_config_dict[arg] logging.info("- bias factors enabled: " + repr(_args_dict['enable_bias_factors'])) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCallerIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCallerIntegrationTest.java index 0a7b73a0f41..e24d145d4c1 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCallerIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/copynumber/GermlineCNVCallerIntegrationTest.java @@ -5,6 +5,7 @@ import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection; import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberStandardArgument; +import org.broadinstitute.hellbender.tools.copynumber.arguments.GermlineDenoisingModelArgumentCollection; import org.broadinstitute.hellbender.utils.IntervalMergingRule; import org.testng.annotations.Test; @@ -71,6 +72,22 @@ public void testCaseWithoutModel() { runCommandLine(argsBuilder); } + @Test(groups = {"python"}, expectedExceptions = IllegalArgumentException.class) + public void testCaseWithHiddenArguments() { + final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); + Arrays.stream(TEST_COUNT_FILES, 0, 5).forEach(argsBuilder::addInput); + argsBuilder.add(GermlineCNVCaller.RUN_MODE_LONG_NAME, GermlineCNVCaller.RunMode.CASE.name()) + .add(GermlineCNVCaller.CONTIG_PLOIDY_CALLS_DIRECTORY_LONG_NAME, + CONTIG_PLOIDY_CALLS_OUTPUT_DIR.getAbsolutePath()) + .add(CopyNumberStandardArgument.MODEL_LONG_NAME, + new File(OUTPUT_DIR, "test-germline-cnv-cohort-model").getAbsolutePath()) + .add(StandardArgumentDefinitions.OUTPUT_LONG_NAME, OUTPUT_DIR.getAbsolutePath()) + .add(CopyNumberStandardArgument.OUTPUT_PREFIX_LONG_NAME, "test-germline-cnv-case"); + // add argument that is not applicable in CASE mode + argsBuilder.add(GermlineDenoisingModelArgumentCollection.INTERVAL_PSI_SCALE_LONG_NAME, 0.1); + runCommandLine(argsBuilder); + } + @Test(groups = {"python"}, enabled = false) public void testCohortWithInputModel() { }