From 1d9d10483da95736ff5bcd2ad8f4b981a5749319 Mon Sep 17 00:00:00 2001
From: Andrey Smirnov The tool will be run in CASE mode using the argument {@code run-mode CASE}. The path to a previously
* obtained model directory must be provided via the {@code model} argument in this mode. The modeled intervals are
- * then specified by a file contained in the model directory, all interval-related arguments are ignored in this
- * mode, and all model intervals must be present in all of the input count files. The tool output in CASE mode
- * is only the "-calls" subdirectory and is organized similarly to that in COHORT mode.
Note that at the moment, this tool does not automatically verify the compatibility of the provided parametrization
* with the provided count files. Model compatibility may be assessed a posteriori by inspecting the magnitude of
diff --git a/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/case_denoising_calling.py b/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/case_denoising_calling.py
index cf81082c349..20c1cc42965 100644
--- a/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/case_denoising_calling.py
+++ b/src/main/resources/org/broadinstitute/hellbender/tools/copynumber/case_denoising_calling.py
@@ -23,6 +23,23 @@
# logging args
gcnvkernel.cli_commons.add_logging_args_to_argparse(parser)
+hidden_denoising_args = {
+ "max_bias_factors",
+ "psi_t_scale",
+ "log_mean_bias_std",
+ "init_ard_rel_unexplained_variance",
+ "enable_bias_factors",
+ "enable_explicit_gc_bias_modeling",
+ "disable_bias_factors_in_active_class",
+ "num_gc_bins",
+ "gc_curve_sd"
+}
+
+hidden_calling_args = {
+ "p_active",
+ "class_coherence_length"
+}
+
# add tool-specific args
group = parser.add_argument_group(title="Required arguments")
@@ -79,26 +96,13 @@
# Note: we are hiding parameters that are either set by the model or are irrelevant to the case calling task
gcnvkernel.DenoisingModelConfig.expose_args(
parser,
- hide={
- "--max_bias_factors",
- "--psi_t_scale",
- "--log_mean_bias_std",
- "--init_ard_rel_unexplained_variance",
- "--enable_bias_factors",
- "--enable_explicit_gc_bias_modeling",
- "--disable_bias_factors_in_active_class",
- "--num_gc_bins",
- "--gc_curve_sd",
- })
+ hide={"--" + arg for arg in hidden_denoising_args})
# add calling config args
# Note: we are hiding parameters that are either set by the model or are irrelevant to the case calling task
gcnvkernel.CopyNumberCallingConfig.expose_args(
parser,
- hide={
- '--p_active',
- '--class_coherence_length'
- })
+ hide={"--" + arg for arg in hidden_calling_args})
# override some inference parameters
gcnvkernel.HybridInferenceParameters.expose_args(parser)
@@ -109,24 +113,16 @@ def update_args_dict_from_saved_model(input_model_path: str,
logging.info("Loading denoising model configuration from the provided model...")
with open(os.path.join(input_model_path, "denoising_config.json"), 'r') as fp:
loaded_denoising_config_dict = json.load(fp)
-
- # boolean flags
- _args_dict['enable_bias_factors'] = \
- loaded_denoising_config_dict['enable_bias_factors']
- _args_dict['enable_explicit_gc_bias_modeling'] = \
- loaded_denoising_config_dict['enable_explicit_gc_bias_modeling']
- _args_dict['disable_bias_factors_in_active_class'] = \
- loaded_denoising_config_dict['disable_bias_factors_in_active_class']
-
- # bias factor related
- _args_dict['max_bias_factors'] = \
- loaded_denoising_config_dict['max_bias_factors']
-
- # gc-related
- _args_dict['num_gc_bins'] = \
- loaded_denoising_config_dict['num_gc_bins']
- _args_dict['gc_curve_sd'] = \
- loaded_denoising_config_dict['gc_curve_sd']
+ with open(os.path.join(input_model_path, "calling_config.json"), 'r') as fp:
+ loaded_calling_config_dict = json.load(fp)
+
+ # load arguments from the model denoising config that are hidden by the tool
+ for arg in hidden_denoising_args:
+ _args_dict[arg] = \
+ loaded_denoising_config_dict[arg]
+ for arg in hidden_calling_args:
+ _args_dict[arg] = \
+ loaded_calling_config_dict[arg]
logging.info("- bias factors enabled: "
+ repr(_args_dict['enable_bias_factors']))
From c8b5266165975a3bd9000aca4ca5b0f1d928f911 Mon Sep 17 00:00:00 2001
From: Andrey Smirnov
More details about the model and inference procedure can be found in the white paper + * https://github.com/broadinstitute/gatk/blob/master/docs/CNV/germline-cnv-caller-model.pdf
+ * *The computation done by this tool, aside from input data parsing and validation, is performed outside of the Java @@ -121,12 +125,12 @@ *
The tool will be run in CASE mode using the argument {@code run-mode CASE}. The path to a previously * obtained model directory must be provided via the {@code model} argument in this mode. The modeled intervals are - * then specified by a file contained in the model directory,and all model intervals must be present in all of the - * input count files. All interval-related arguments (e.g. {@code interval-psi-scale} argument) are ignored in this - * mode, however an advanced user can adjust various sample-related (e.g. {@code sample-psi-scale}) and global - * (e.g. {@code p_alt}) arguments for custom applications of the tool. Inference-related (e.g. - * {@code min_training_epochs}) arguments can be adjusted as well. The tool output in CASE mode is only the "-calls" - * subdirectory and is organized similarly to that in COHORT mode.
+ * then specified by a file contained in the model directory, and all model intervals must be present in all of the + * input count files. All interval-related arguments (e.g. {@code interval-psi-scale}) are redundant in this mode + * and will trigger an exception if provided. However, an advanced user can adjust various sample-related + * (e.g. {@code sample-psi-scale}) and global (e.g. {@code p_alt}) arguments for custom applications of the tool. + * Inference-related arguments (e.g. {@code min_training_epochs}) can be adjusted as well. The tool output in CASE + * mode is only the "-calls" subdirectory and is organized similarly to that in COHORT mode. * *Note that at the moment, this tool does not automatically verify the compatibility of the provided parametrization
* with the provided count files. Model compatibility may be assessed a posteriori by inspecting the magnitude of
@@ -362,8 +366,9 @@ protected Object doWork() {
}
private void validateArguments() {
- germlineCallingArgumentCollection.validate();
- germlineDenoisingModelArgumentCollection.validate();
+ final CommandLineArgumentParser clpParser = (CommandLineArgumentParser) getCommandLineParser();
+ germlineCallingArgumentCollection.validate(clpParser, runMode);
+ germlineDenoisingModelArgumentCollection.validate(clpParser, runMode);
germlineCNVHybridADVIArgumentCollection.validate();
Utils.validateArg(inputReadCountPaths.size() == new HashSet<>(inputReadCountPaths).size(),
diff --git a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java
index cffad07d934..61932bfa55d 100644
--- a/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java
+++ b/src/main/java/org/broadinstitute/hellbender/tools/copynumber/arguments/GermlineCallingArgumentCollection.java
@@ -1,7 +1,10 @@
package org.broadinstitute.hellbender.tools.copynumber.arguments;
+import com.google.common.collect.ImmutableList;
import org.broadinstitute.barclay.argparser.Argument;
+import org.broadinstitute.barclay.argparser.CommandLineArgumentParser;
import org.broadinstitute.hellbender.tools.copynumber.GermlineCNVCaller;
+import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.param.ParamUtils;
import java.io.Serializable;
@@ -21,6 +24,11 @@ public final class GermlineCallingArgumentCollection implements Serializable {
public static final String CLASS_COHERENCE_LENGTH_LONG_NAME = "class-coherence-length";
public static final String MAX_COPY_NUMBER_LONG_NAME = "max-copy-number";
+ // these model parameters will be extracted from provided model in CASE mode
+ private static final List