Skip to content

Commit

Permalink
Item #2 -- "Add a simple direct integration test for the new
Browse files Browse the repository at this point in the history
--floor-blocks HaplotypeCaller arg"
Also clarified beta status for GVCF mode just for Mutect2
  • Loading branch information
ldgauthier committed Aug 2, 2019
1 parent 5bbf188 commit 5fd4d9b
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ public abstract class AssemblyBasedCallerArgumentCollection {

public static final String MIN_BASE_QUALITY_SCORE_LONG_NAME = "min-base-quality-score";
public static final String SMITH_WATERMAN_LONG_NAME = "smith-waterman";
public static final String EMIT_REFERENCE_CONFIDENCE_LONG_NAME = "emit-ref-confidence";

public ReadThreadingAssembler createReadThreadingAssembler() {
final ReadThreadingAssembler assemblyEngine = assemblerArgs.makeReadThreadingAssembler();
Expand Down Expand Up @@ -107,11 +108,12 @@ public ReadThreadingAssembler createReadThreadingAssembler() {
public SmithWatermanAligner.Implementation smithWatermanImplementation = SmithWatermanAligner.Implementation.JAVA;

/**
* (BETA feature) The reference confidence mode makes it possible to emit a per-bp or summarized confidence estimate for a site being strictly homozygous-reference.
* This is similar to the HaplotypeCaller reference confidence/GVCF mode. See https://software.broadinstitute.org/gatk/documentation/article.php?id=4017 for information about GVCFs.
* The reference confidence mode makes it possible to emit a per-bp or summarized confidence estimate for a site being strictly homozygous-reference.
* See https://software.broadinstitute.org/gatk/documentation/article.php?id=4017 for information about GVCFs.
* This is a BETA FEATURE for Mutect2 similar to the HaplotypeCaller reference confidence/GVCF mode.
*/
@Advanced
@Argument(fullName="emit-ref-confidence", shortName="ERC", doc="(BETA feature) Mode for emitting reference confidence scores", optional = true)
@Argument(fullName=EMIT_REFERENCE_CONFIDENCE_LONG_NAME, shortName="ERC", doc="Mode for emitting reference confidence scores (BETA for Mutect2)", optional = true)
public ReferenceConfidenceMode emitReferenceConfidence = ReferenceConfidenceMode.NONE;

protected abstract int getDefaultMaxMnpDistance();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,18 @@ private void validateAndInitializeArgs() {
if ( emitReferenceConfidence() && samplesList.numberOfSamples() != 1 ) {
throw new CommandLineException.BadArgumentValue("--emit-ref-confidence", "Can only be used in single sample mode currently. Use the --sample-name argument to run on a single sample out of a multi-sample BAM file.");
}

if (hcArgs.floorBlocks && !emitReferenceConfidence()) {
throw new UserException(HaplotypeCallerArgumentCollection.OUTPUT_BLOCK_LOWER_BOUNDS + " refers to GVCF blocks," +
" so reference confidence mode (" + AssemblyBasedCallerArgumentCollection.EMIT_REFERENCE_CONFIDENCE_LONG_NAME +
") must be specified.");
}

if (hcArgs.floorBlocks && !emitReferenceConfidence()) {
throw new UserException(HaplotypeCallerArgumentCollection.OUTPUT_BLOCK_LOWER_BOUNDS + " refers to GVCF blocks," +
" so reference confidence mode (" + AssemblyBasedCallerArgumentCollection.EMIT_REFERENCE_CONFIDENCE_LONG_NAME +
") must be specified.");
}
}

private void initializeSamples() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.apache.commons.lang3.tuple.Pair;
import org.broadinstitute.barclay.argparser.CommandLineException;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection;
import org.broadinstitute.hellbender.engine.AssemblyRegionWalker;
Expand Down Expand Up @@ -371,6 +372,43 @@ else if (!vc.getAlternateAllele(0).equals(Allele.NON_REF_ALLELE)){ //there
}
}

/*
* Test that in VCF mode we're consistent with past GATK4 results
*/
@Test(dataProvider="HaplotypeCallerTestInputs")
public void testFloorGVCFBlocks(final String inputFileName, final String referenceFileName) throws Exception {
Utils.resetRandomGenerator();

final File output = createTempFile("testFloorGVCFBlocks", ".vcf");

final List<String> requestedGqBands = Arrays.asList("10","20","30","40","50","60");

final ArgumentsBuilder args = new ArgumentsBuilder().addInput(new File(inputFileName))
.addReference(new File(referenceFileName))
.addInterval(new SimpleInterval("20:10000000-10100000"))
.addBooleanArgument(StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, false)
.addArgument("pairHMM", "AVX_LOGLESS_CACHING")
.addArgument("floor-blocks")
.addArgument("ERC", "GVCF")
.addOutput(output);
requestedGqBands.forEach(x -> args.addArgument("GQB",x));
runCommandLine(args);

final List<String> allGqBands = new ArrayList(requestedGqBands);
allGqBands.add("99");
allGqBands.add("0");

//The interval here is big, so use a FeatureDataSource to limit memory usage
try (final FeatureDataSource<VariantContext> actualVcs = new FeatureDataSource<>(output)) {
actualVcs.forEach(vc -> {
//sometimes there are calls with alt alleles that are genotyped hom ref and those don't get floored
if (vc.hasAttribute("END") && vc.getGenotype(0).hasGQ()) {
Assert.assertTrue(allGqBands.contains(Integer.toString(vc.getGenotype(0).getGQ())));
}
});
}
}

@Test
public void testGenotypeGivenAllelesMode() throws IOException {
Utils.resetRandomGenerator();
Expand Down

0 comments on commit 5fd4d9b

Please sign in to comment.