Skip to content

Commit

Permalink
Remove header lines in ReblockGVCFs when we remove FORMAT annotations (
Browse files Browse the repository at this point in the history
…#8895)

* remove header lines when removing annotation in Reblocking

* clean up
  • Loading branch information
meganshand committed Jun 26, 2024
1 parent e600f1c commit baa0dd0
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ public final class ReblockGVCF extends MultiVariantWalker {

@Advanced
@Argument(fullName=ANNOTATIONS_TO_REMOVE_LONG_NAME, doc="FORMAT level annotations to remove from all genotypes in final GVCF.", optional = true)
private List<String> annotationsToRemove = new ArrayList<>();
private List<String> formatAnnotationsToRemove = new ArrayList<>();

@Advanced
@Argument(fullName=QUAL_APPROX_LONG_NAME, shortName=QUAL_APPROX_SHORT_NAME, doc="Add necessary INFO field annotation to perform QUAL approximation downstream; required for GnarlyGenotyper", optional = true)
Expand Down Expand Up @@ -238,9 +238,6 @@ public void onTraversalStart() {
+ ", but the " + GATKVCFConstants.TREE_SCORE + " annotation is not present in the input GVCF.");
}

List<String> missingAnnotationsToRemove = annotationsToRemove.stream().filter(a -> inputHeader.getFormatHeaderLine(a)==null).toList();
missingAnnotationsToRemove.forEach(a -> logger.warn("FORMAT level annotation " + a + ", which was requested to be removed by --" + ANNOTATIONS_TO_REMOVE_LONG_NAME + ", not found in input GVCF header."));

final Set<VCFHeaderLine> inputHeaders = inputHeader.getMetaDataInSortedOrder();

final Set<VCFHeaderLine> headerLines = new HashSet<>(inputHeaders);
Expand All @@ -249,6 +246,11 @@ public void onTraversalStart() {
(vcfHeaderLine.getKey().equals("INFO")) && ((VCFInfoHeaderLine)vcfHeaderLine).getID().equals(GATKVCFConstants.RAW_RMS_MAPPING_QUALITY_DEPRECATED) || //remove old (maybe wrong type) and add new with deprecated note
(vcfHeaderLine.getKey().equals("INFO")) && infoFieldAnnotationKeyNamesToRemove.contains(((VCFInfoHeaderLine)vcfHeaderLine).getID()));

List<String> missingAnnotationsToRemove = formatAnnotationsToRemove.stream().filter(a -> inputHeader.getFormatHeaderLine(a)==null).toList();
missingAnnotationsToRemove.forEach(a -> logger.warn("FORMAT level annotation " + a + ", which was requested to be removed by --" + ANNOTATIONS_TO_REMOVE_LONG_NAME + ", not found in input GVCF header."));
headerLines.removeIf(vcfHeaderLine -> vcfHeaderLine.getKey().equals("FORMAT") &&
formatAnnotationsToRemove.contains(((VCFFormatHeaderLine) vcfHeaderLine).getID()));

headerLines.addAll(getDefaultToolVCFHeaderLines());

genotypingEngine = createGenotypingEngine(new IndexedSampleList(getSamplesForVariants()));
Expand Down Expand Up @@ -338,7 +340,7 @@ public void apply(VariantContext variant, ReadsContext reads, ReferenceContext r
if (!variant.hasAllele(Allele.NON_REF_ALLELE)) {
throw new UserException("Variant Context at " + variant.getContig() + ":" + variant.getStart() + " does not contain a <NON-REF> allele. This tool is only intended for use with GVCFs.");
}
VariantContext newVC = annotationsToRemove.size() > 0 ? removeVCFFormatAnnotations(variant) : variant;
VariantContext newVC = formatAnnotationsToRemove.size() > 0 ? removeVCFFormatAnnotations(variant) : variant;
regenotypeVC(newVC);
}

Expand All @@ -356,7 +358,7 @@ private VariantContext removeVCFFormatAnnotations(final VariantContext vc) {
return vc;
}

for (String annotation : annotationsToRemove) {
for (String annotation : formatAnnotationsToRemove) {
extendedAttributes.remove(annotation);
}
final Genotype newGenotype = new GenotypeBuilder(genotype).noAttributes().attributes(extendedAttributes).make();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,9 @@ public void testRemovingFormatAnnotations() {
.addOutput(output);
runCommandLine(args);

final List<VariantContext> outVCs = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()).getRight();
final Pair<VCFHeader, List<VariantContext>> outVCF = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath());
Assert.assertNull(outVCF.getLeft().getFormatHeaderLine(priKey));
final List<VariantContext> outVCs = outVCF.getRight();
for(VariantContext vc : outVCs){
Assert.assertNull(vc.getGenotype(0).getExtendedAttribute(priKey));
}
Expand Down

0 comments on commit baa0dd0

Please sign in to comment.