Skip to content

Commit

Permalink
ReblockGVCFs cleanup (#8411)
Browse files Browse the repository at this point in the history
* ReblockGVCFs cleanup for dragen inputs

* addressing comments
  • Loading branch information
meganshand committed Jul 14, 2023
1 parent 8b7a3f6 commit 248dd79
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ public final class ReblockGVCF extends MultiVariantWalker {
public static final String RGQ_THRESHOLD_SHORT_NAME = "rgq-threshold";
public static final String TREE_SCORE_THRESHOLD_LONG_NAME = "tree-score-threshold-to-no-call";
public static final String ANNOTATIONS_TO_KEEP_LONG_NAME = "annotations-to-keep";
public static final String ANNOTATIONS_TO_REMOVE_LONG_NAME = "format-annotations-to-remove";
public static final String KEEP_ALL_ALTS_ARG_NAME = "keep-all-alts";
public static final String QUAL_APPROX_LONG_NAME = "do-qual-score-approximation";
public static final String QUAL_APPROX_SHORT_NAME = "do-qual-approx";
Expand Down Expand Up @@ -154,6 +155,10 @@ public final class ReblockGVCF extends MultiVariantWalker {
@Argument(fullName=ANNOTATIONS_TO_KEEP_LONG_NAME, doc="Annotations that are not recognized by GATK to keep, that should be kept in final GVCF at variant sites.", optional = true)
private List<String> annotationsToKeep = new ArrayList<>();

@Advanced
@Argument(fullName=ANNOTATIONS_TO_REMOVE_LONG_NAME, doc="FORMAT level annotations to remove from all genotypes in final GVCF.", optional = true)
private List<String> annotationsToRemove = new ArrayList<>();

@Advanced
@Argument(fullName=QUAL_APPROX_LONG_NAME, shortName=QUAL_APPROX_SHORT_NAME, doc="Add necessary INFO field annotation to perform QUAL approximation downstream; required for GnarlyGenotyper", optional = true)
protected boolean doQualApprox = false;
Expand Down Expand Up @@ -227,6 +232,9 @@ public void onTraversalStart() {
+ ", but the " + GATKVCFConstants.TREE_SCORE + " annotation is not present in the input GVCF.");
}

List<String> missingAnnotationsToRemove = annotationsToRemove.stream().filter(a -> inputHeader.getFormatHeaderLine(a)==null).toList();
missingAnnotationsToRemove.forEach(a -> logger.warn("FORMAT level annotation " + a + ", which was requested to be removed by --" + ANNOTATIONS_TO_REMOVE_LONG_NAME + ", not found in input GVCF header."));

final Set<VCFHeaderLine> inputHeaders = inputHeader.getMetaDataInSortedOrder();

final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeaders);
Expand Down Expand Up @@ -317,7 +325,27 @@ protected void createAnnotationEngine() {
// get VariantContexts from input gVCFs and regenotype
@Override
public void apply(VariantContext variant, ReadsContext reads, ReferenceContext ref, FeatureContext features) {
regenotypeVC(variant);
if (!variant.hasAllele(Allele.NON_REF_ALLELE)) {
throw new UserException("Variant Context at " + variant.getContig() + ":" + variant.getStart() + " does not contain a <NON-REF> allele. This tool is only intended for use with GVCFs.");
}
VariantContext newVC = annotationsToRemove.size() > 0 ? removeVCFFormatAnnotations(variant) : variant;
regenotypeVC(newVC);
}

/**
* Remove format level annotations from genotype in variant context.
*
* @param vc variant context to remove format annotations from
* @return variant context with format annotations removed from genotype
*/
private VariantContext removeVCFFormatAnnotations(final VariantContext vc) {
final Genotype genotype = vc.getGenotype(0);
Map<String, Object> extendedAttributes = genotype.getExtendedAttributes();
for (String annotation : annotationsToRemove) {
extendedAttributes.remove(annotation);
}
final Genotype newGenotype = new GenotypeBuilder(genotype).noAttributes().attributes(extendedAttributes).make();
return new VariantContextBuilder(vc).genotypes(newGenotype).make();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureDataSource;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.testutils.CommandLineProgramTester;
Expand Down Expand Up @@ -575,4 +576,35 @@ public void testFilters() throws IOException {
Assert.assertFalse(filteredRefBlockVC.isFiltered()); // Ref block is unfiltered even though the input RefBlock and low qual variant were both filtered
Assert.assertEquals(filteredRefBlockVC.getGenotype(0).getDP(), 12); // Ref block is combination of filtered variant with depth 22 and filtered ref block with depth 1
}

@Test
public void testRemovingFormatAnnotations() {
final File input = getTestFile("dragen.g.vcf");
final File output = createTempFile("reblockedgvcf", ".vcf");
final String priKey = "PRI";

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(hg38Reference))
.add("V", input)
.add(ReblockGVCF.ANNOTATIONS_TO_REMOVE_LONG_NAME, priKey)
.addOutput(output);
runCommandLine(args);

final List<VariantContext> outVCs = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()).getRight();
for(VariantContext vc : outVCs){
Assert.assertNull(vc.getGenotype(0).getExtendedAttribute(priKey));
}
}

@Test
public void testNonGVCFInput() {
final File output = createTempFile("reblockedgvcf", ".vcf");

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addReference(new File(b37_reference_20_21))
.add("V", "src/test/resources/large/NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.chr20.vcf")
.addOutput(output);

Assert.assertThrows(GATKException.class, () -> runCommandLine(args));
}
}

0 comments on commit 248dd79

Please sign in to comment.