Skip to content

Commit

Permalink
VariantsToTable: Include all fields when none are specified (#7911)
Browse files Browse the repository at this point in the history
VariantsToTable now outputs all fields declared in the VCF header when no fields are selected. 

Added integration tests to cover this new functionality

Fixes #7677
  • Loading branch information
orlicohen committed Jun 30, 2022
1 parent c40187a commit 6596ea8
Show file tree
Hide file tree
Showing 7 changed files with 373 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,14 @@

import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLineCount;
import htsjdk.variant.vcf.*;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import picard.cmdline.programgroups.VariantEvaluationProgramGroup;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.ReadsContext;
Expand All @@ -38,7 +35,8 @@
* This tool extracts specified fields for each variant in a VCF file to a tab-delimited table, which may be easier
* to work with than a VCF. By default, the tool only extracts PASS or . (unfiltered) variants in the VCF file. Filtered variants may be
* included in the output by adding the --show-filtered flag. The tool can extract both INFO (i.e. site-level) fields and
* FORMAT (i.e. sample-level) fields.
* FORMAT (i.e. sample-level) fields. If the tool is run without specifying any fields, it defaults to include all fields
* declared in the VCF header.
* </p>
*
* <h4>INFO/site-level fields</h4>
Expand Down Expand Up @@ -100,6 +98,12 @@
* 1 65068538 SNP 49,0 35,4
* 1 111146235 SNP 69,1 77,4
* </pre>
* <pre>
* gatk VariantsToTable \
* -V input.vcf \
* -O output.table
* </pre>
* <p>would produce a file that includes all fields declared in the VCF header.</p>
*
* <h3>Notes</h3>
* <ul>
Expand Down Expand Up @@ -212,9 +216,39 @@ public void onTraversalStart() {
inputHeader = getHeaderForVariants();
outputStream = createPrintStream();

// if no fields specified, default to include all fields listed in header into table
if(fieldsToTake.isEmpty() && genotypeFieldsToTake.isEmpty() && asFieldsToTake.isEmpty() && asGenotypeFieldsToTake.isEmpty()){
logger.warn("No fields were specified. All fields declared in the VCF header will be included in the output table.");

// add all mandatory VCF fields (except INFO)
for(VCFHeader.HEADER_FIELDS headerField : VCFHeader.HEADER_FIELDS.values()){
if(!headerField.name().equals(VCFHeader.HEADER_FIELDS.INFO.name())) {
fieldsToTake.add(headerField.name());
}
}

// add all INFO fields present in VCF header
for (final VCFInfoHeaderLine infoLine : inputHeader.getInfoHeaderLines()) {
fieldsToTake.add(infoLine.getID());
}

// add all FORMAT fields present in VCF header
for (final VCFFormatHeaderLine formatLine : inputHeader.getFormatHeaderLines()) {
// ensure GT field listed as first FORMAT field
if(formatLine.getID().equals(VCFConstants.GENOTYPE_KEY)) {
genotypeFieldsToTake.add(0, formatLine.getID());
}
else {
genotypeFieldsToTake.add(formatLine.getID());
}
}
}

// if fields specified, but none are genotype fields, set samples to empty
if (genotypeFieldsToTake.isEmpty() && asGenotypeFieldsToTake.isEmpty()) {
samples = Collections.emptySortedSet();
} else {
}
else {
final Map<String, VCFHeader> vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants());
samples = VcfUtils.getSortedSampleSet(vcfHeaders, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);

Expand All @@ -238,6 +272,7 @@ public void onTraversalStart() {
outputStream.println("RecordID\tSample\tVariable\tValue");
} else {
final List<String> fields = new ArrayList<>();

fields.addAll(fieldsToTake);
fields.addAll(asFieldsToTake);
fields.addAll(createGenotypeFields());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
import org.testng.annotations.Test;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;

Expand Down Expand Up @@ -236,4 +237,44 @@ public void testMoltenOutputWithMultipleAlleles() throws IOException {
spec.setTrimWhiteSpace(false);
spec.executeTest("testMoltenOutputWithMultipleAlleles", this);
}

@Test
public void testNoFieldsSpecifiedNoSamples() throws IOException {
final File inputFile = new File(getToolTestDataDir(), "VCFWithoutGenotypes_dbsnp_138.snippet.vcf");
final File outputFile = createTempFile("noFieldsSpecifiedOutput", ".table");
final File expectedFile = new File(getToolTestDataDir(), "expected.noFieldsSpecifiedNoSamples.table");

final String[] args = new String[] {"--variant", inputFile.getAbsolutePath(),
"-O", outputFile.getAbsolutePath()};
runCommandLine(args);

IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile);
}

@Test
public void testNoFieldsSpecifiedWithSamples() throws IOException {
final File inputFile = new File(getToolTestDataDir(), "VCFWithGenotypes_1000G.phase3.snippet.vcf");
final File outputFile = createTempFile("noFieldsSpecifiedWithSamplesOutput", ".table");
final File expectedFile = new File(getToolTestDataDir(), "expected.noFieldsSpecifiedWithSamples.table");

final String[] args = new String[] {"--variant", inputFile.getAbsolutePath(),
"-O", outputFile.getAbsolutePath()};
runCommandLine(args);

IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile);
}

@Test
public void testNoFieldsSpecifiedFormatFieldInHeaderNoSamples() throws IOException {
final File inputFile = new File(getToolTestDataDir(), "VCFWithoutGenotypesWithFormatField_dbsnp_138.snippet.vcf");
final File outputFile = createTempFile("noFieldsSpecifiedNoSamplesOutput", ".table");
final File expectedFile = new File(getToolTestDataDir(), "expected.noFieldsSpecifiedNoSamples.table");

final String[] args = new String[] {"--variant", inputFile.getAbsolutePath(),
"-O", outputFile.getAbsolutePath()};
runCommandLine(args);

IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile);
}

}
Loading

0 comments on commit 6596ea8

Please sign in to comment.