From f20763f8801f1755fc709d88538b5454cdacc431 Mon Sep 17 00:00:00 2001 From: Orli Cohen Date: Fri, 17 Jun 2022 10:40:26 -0400 Subject: [PATCH 1/7] VariantsToTable: setting default to include all fields Fixes #7677 --- .../walkers/variantutils/VariantsToTable.java | 14 +++++++++++++- .../VariantsToTableIntegrationTest.java | 13 +++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java index c8ae76086a4..a92a76bb52e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java @@ -12,7 +12,6 @@ import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; -import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; import picard.cmdline.programgroups.VariantEvaluationProgramGroup; import org.broadinstitute.hellbender.engine.FeatureContext; import org.broadinstitute.hellbender.engine.ReadsContext; @@ -30,6 +29,7 @@ import java.util.function.Function; import static org.broadinstitute.hellbender.utils.Utils.split; +import static org.broadinstitute.hellbender.utils.Utils.stream; /** * Extract fields from a VCF file to a tab-delimited table @@ -135,6 +135,10 @@ public final class VariantsToTable extends VariantWalker { shortName="F", doc="The name of a standard VCF field or an INFO field to include in the output table", optional=true) protected List fieldsToTake = new ArrayList<>(); + //protected List fieldsToTake = VCFHeader.HEADER_FIELDS.name(); + + + /** * Any annotation name in the FORMAT field (e.g., GQ, PL) to include in the output table. @@ -238,6 +242,14 @@ public void onTraversalStart() { outputStream.println("RecordID\tSample\tVariable\tValue"); } else { final List fields = new ArrayList<>(); + + // if no fields specified, default to all mandatory fields + if(fieldsToTake.isEmpty()){ + for(VCFHeader.HEADER_FIELDS header : VCFHeader.HEADER_FIELDS.values()){ + fieldsToTake.add(header.name()); + } + } + fields.addAll(fieldsToTake); fields.addAll(asFieldsToTake); fields.addAll(createGenotypeFields()); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java index b28094cf72e..0b8fcf0dbb8 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java @@ -5,6 +5,7 @@ import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; import org.testng.annotations.Test; +import java.io.File; import java.io.IOException; import java.util.Arrays; @@ -236,4 +237,16 @@ public void testMoltenOutputWithMultipleAlleles() throws IOException { spec.setTrimWhiteSpace(false); spec.executeTest("testMoltenOutputWithMultipleAlleles", this); } + + @Test + public void testNoFieldsSpecified() throws IOException { + final File inputFile = new File(getToolTestDataDir(), "multiallelic.vcf"); + final File outputFile = new File(getToolTestDataDir(), "noFieldsOutput.vcf"); + //createTempFile("noFieldsOutput", ".table"); + + final String[] args = new String[] {"--variant", inputFile.getAbsolutePath(), + "-O", outputFile.getAbsolutePath()}; + runCommandLine(args); + } + } From 3e9d12368afde6e8cf513f105c51904d8502016b Mon Sep 17 00:00:00 2001 From: Orli Cohen Date: Fri, 17 Jun 2022 14:47:11 -0400 Subject: [PATCH 2/7] Output table includes all fields in VCF header when none specified, in progress --- .../walkers/variantutils/VariantsToTable.java | 33 ++++++++++++++----- .../VariantsToTableIntegrationTest.java | 2 +- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java index a92a76bb52e..d8b97ad0124 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java @@ -2,9 +2,7 @@ import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.VariantContext; -import htsjdk.variant.vcf.VCFConstants; -import htsjdk.variant.vcf.VCFHeader; -import htsjdk.variant.vcf.VCFHeaderLineCount; +import htsjdk.variant.vcf.*; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.broadinstitute.barclay.argparser.Advanced; @@ -135,7 +133,6 @@ public final class VariantsToTable extends VariantWalker { shortName="F", doc="The name of a standard VCF field or an INFO field to include in the output table", optional=true) protected List fieldsToTake = new ArrayList<>(); - //protected List fieldsToTake = VCFHeader.HEADER_FIELDS.name(); @@ -218,7 +215,8 @@ public void onTraversalStart() { if (genotypeFieldsToTake.isEmpty() && asGenotypeFieldsToTake.isEmpty()) { samples = Collections.emptySortedSet(); - } else { + } + else { final Map vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants()); samples = VcfUtils.getSortedSampleSet(vcfHeaders, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); @@ -243,11 +241,30 @@ public void onTraversalStart() { } else { final List fields = new ArrayList<>(); - // if no fields specified, default to all mandatory fields - if(fieldsToTake.isEmpty()){ + // if no fields specified, include all fields listed in header into table + if(fieldsToTake.isEmpty() && genotypeFieldsToTake.isEmpty() && asFieldsToTake.isEmpty() && asGenotypeFieldsToTake.isEmpty()){ + logger.warn("No fields were specified. All fields will be included in output table."); + + // if + final Map vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants()); + samples = VcfUtils.getSortedSampleSet(vcfHeaders, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + + // add all mandatory fields except INFO for(VCFHeader.HEADER_FIELDS header : VCFHeader.HEADER_FIELDS.values()){ - fieldsToTake.add(header.name()); + if(header.name() != "INFO") + fieldsToTake.add(header.name()); } + + // add all INFO fields present in header + for (final VCFInfoHeaderLine infoLine : inputHeader.getInfoHeaderLines()) { + fieldsToTake.add(infoLine.getID()); + } + + // add all FORMAT fields present in header + for (final VCFFormatHeaderLine formatLine : inputHeader.getFormatHeaderLines()) { + genotypeFieldsToTake.add(formatLine.getID()); + } + } fields.addAll(fieldsToTake); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java index 0b8fcf0dbb8..2a624612c54 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java @@ -240,7 +240,7 @@ public void testMoltenOutputWithMultipleAlleles() throws IOException { @Test public void testNoFieldsSpecified() throws IOException { - final File inputFile = new File(getToolTestDataDir(), "multiallelic.vcf"); + final File inputFile = new File(getToolTestDataDir(), "vcfexample2.vcf"); final File outputFile = new File(getToolTestDataDir(), "noFieldsOutput.vcf"); //createTempFile("noFieldsOutput", ".table"); From dc7ff59b4b4453e3b1ce75bd82a00a04af514e73 Mon Sep 17 00:00:00 2001 From: Orli Cohen Date: Wed, 22 Jun 2022 11:19:54 -0400 Subject: [PATCH 3/7] added default to include all fields in VCF header when no fields specified, integration tests and expected output files --- .../walkers/variantutils/VariantsToTable.java | 59 ++++----- .../VariantsToTableIntegrationTest.java | 15 ++- .../VariantsToTable/1000G.phase3.snippet.vcf | 48 +++++++ .../expected.noFieldsSpecified.table | 4 + ...xpected.noFieldsSpecifiedWithSamples.table | 4 + .../modified_dbsnp_138.snippet.vcf | 117 ++++++++++++++++++ 6 files changed, 215 insertions(+), 32 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/1000G.phase3.snippet.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecified.table create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecifiedWithSamples.table create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/modified_dbsnp_138.snippet.vcf diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java index d8b97ad0124..7d924698e54 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java @@ -213,10 +213,37 @@ public void onTraversalStart() { inputHeader = getHeaderForVariants(); outputStream = createPrintStream(); - if (genotypeFieldsToTake.isEmpty() && asGenotypeFieldsToTake.isEmpty()) { - samples = Collections.emptySortedSet(); + // if no fields specified, default to include all fields listed in header into table + if(fieldsToTake.isEmpty() && genotypeFieldsToTake.isEmpty() && asFieldsToTake.isEmpty() && asGenotypeFieldsToTake.isEmpty()){ + logger.warn("No fields were specified. All fields will be included in output table."); + + // add all mandatory VCF fields (except INFO) + for(VCFHeader.HEADER_FIELDS header : VCFHeader.HEADER_FIELDS.values()){ + if(header.name() != "INFO") + fieldsToTake.add(header.name()); + } + + // add all INFO fields present in VCF header + for (final VCFInfoHeaderLine infoLine : inputHeader.getInfoHeaderLines()) { + fieldsToTake.add(infoLine.getID()); + } + + // add all FORMAT fields present in VCF header + for (final VCFFormatHeaderLine formatLine : inputHeader.getFormatHeaderLines()) { + if(formatLine.getID().equals("GT")){ + genotypeFieldsToTake.add(0, formatLine.getID()); + } + else { + genotypeFieldsToTake.add(formatLine.getID()); + } + } } - else { + + // if fields specified, but none are genotype fields, set samples to empty + if (genotypeFieldsToTake.isEmpty() && asGenotypeFieldsToTake.isEmpty() && (!fieldsToTake.isEmpty() || !asFieldsToTake.isEmpty())) { + samples = Collections.emptySortedSet(); + } + else { final Map vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants()); samples = VcfUtils.getSortedSampleSet(vcfHeaders, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); @@ -241,32 +268,6 @@ public void onTraversalStart() { } else { final List fields = new ArrayList<>(); - // if no fields specified, include all fields listed in header into table - if(fieldsToTake.isEmpty() && genotypeFieldsToTake.isEmpty() && asFieldsToTake.isEmpty() && asGenotypeFieldsToTake.isEmpty()){ - logger.warn("No fields were specified. All fields will be included in output table."); - - // if - final Map vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants()); - samples = VcfUtils.getSortedSampleSet(vcfHeaders, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); - - // add all mandatory fields except INFO - for(VCFHeader.HEADER_FIELDS header : VCFHeader.HEADER_FIELDS.values()){ - if(header.name() != "INFO") - fieldsToTake.add(header.name()); - } - - // add all INFO fields present in header - for (final VCFInfoHeaderLine infoLine : inputHeader.getInfoHeaderLines()) { - fieldsToTake.add(infoLine.getID()); - } - - // add all FORMAT fields present in header - for (final VCFFormatHeaderLine formatLine : inputHeader.getFormatHeaderLines()) { - genotypeFieldsToTake.add(formatLine.getID()); - } - - } - fields.addAll(fieldsToTake); fields.addAll(asFieldsToTake); fields.addAll(createGenotypeFields()); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java index 2a624612c54..2717d666cd2 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java @@ -240,9 +240,18 @@ public void testMoltenOutputWithMultipleAlleles() throws IOException { @Test public void testNoFieldsSpecified() throws IOException { - final File inputFile = new File(getToolTestDataDir(), "vcfexample2.vcf"); - final File outputFile = new File(getToolTestDataDir(), "noFieldsOutput.vcf"); - //createTempFile("noFieldsOutput", ".table"); + final File inputFile = new File(getToolTestDataDir(), "modified_dbsnp_138.snippet.vcf"); + final File outputFile = new File(getToolTestDataDir(), "expected.noFieldsSpecified.table"); + + final String[] args = new String[] {"--variant", inputFile.getAbsolutePath(), + "-O", outputFile.getAbsolutePath()}; + runCommandLine(args); + } + + @Test + public void testNoFieldsSpecifiedWithSamples() throws IOException { + final File inputFile = new File(getToolTestDataDir(), "1000G.phase3.snippet.vcf"); + final File outputFile = new File(getToolTestDataDir(), "expected.noFieldsSpecifiedWithSamples.table"); final String[] args = new String[] {"--variant", inputFile.getAbsolutePath(), "-O", outputFile.getAbsolutePath()}; diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/1000G.phase3.snippet.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/1000G.phase3.snippet.vcf new file mode 100644 index 00000000000..eaf1060e2e7 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/1000G.phase3.snippet.vcf @@ -0,0 +1,48 @@ +##fileformat=VCFv4.2 +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##GATKCommandLine= +##GATKVersion=2.5-191-g02f8427 +##HaplotypeCaller="analysis_type=HaplotypeCaller input_file=[/humgen/1kg/processing/production_wgs_final/chr20/ALL.chr20.bam.list] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=[/humgen/1kg/processing/production_wgs_final/chr20/.queue/scatterGather/call.for.1000G-1-sg/temp_0001_of_1000/scatter.intervals] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/humgen/1kg/reference/human_g1k_v37_decoy.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=200 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false performanceLog=null useOriginalQualities=false BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false defaultBaseQualities=-1 validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub graphOutput=null bamOutput=null bam_compression=null disable_bam_indexing=null generate_md5=null simplifyBAM=null bamWriterType=CALLED_HAPLOTYPES dbsnp=(RodBinding name= source=UNBOUND) comp=[] annotation=[ClippingRankSumTest, DepthPerSampleHC] excludeAnnotation=[SpanningDeletions, TandemRepeatAnnotator] heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=10.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.05 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null useDebruijnAssembler=false minKmerForDebruijnAssembler=11 onlyUseKmerSizeForDebruijnAssembler=-1 kmerSize=[10, 25] dontIncreaseKmerSizesForCycles=false numPruningSamples=3 maxPathsPerSample=8 dontRecoverDanglingTails=false minPruning=2 gcpHMM=10 includeUmappedReads=false useAllelesTrigger=false useFilteredReadsForAnnotations=false phredScaledGlobalReadMismappingRate=45 maxNumHaplotypesInPopulation=25 mergeVariantsViaLD=false pair_hmm_implementation=LOGLESS_CACHING keepRG=null justDetermineActiveRegions=false dontGenotype=false errorCorrectKmers=false debug=false debugGraphTransformations=false useLowQualityBasesForAssembly=false dontTrimActiveRegions=false dontUseSoftClippedBases=false captureAssemblyFailureBAM=false allowCyclesInKmerGraphToGeneratePaths=false errorCorrectReads=false kmerLengthForReadErrorCorrection=25 minObservationsForKmerToBeSolid=20 activityProfileOut=null activeRegionOut=null activeRegionIn=null activeRegionExtension=null forceActive=false activeRegionMaxSize=null bandPassSigma=null min_mapping_quality_score=20 filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false" +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##source=SelectVariants +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 +20 10000054 . CTTTG C 504.42 PASS AC=0;AF=0.00;AN=6;BaseQRankSum=-0.975;ClippingRankSum=-2.925;DP=22;FS=1.899;InbreedingCoeff=0.0592;MQ=59.27;MQ0=0;MQRankSum=-3.212;QD=2.43;ReadPosRankSum=-0.264;VQSLOD=5.10;culprit=FS GT:AD:DP:GQ:PL 0/0:2,0:2:6:0,6,119 0/0:10,0:10:29:0,29,592 0/0:10,0:10:30:0,30,598 +20 10000107 . T C 263.95 PASS AC=0;AF=0.00;AN=6;BaseQRankSum=-0.444;ClippingRankSum=-3.132;DP=25;FS=0.948;InbreedingCoeff=-0.0102;MQ=59.19;MQ0=0;MQRankSum=2.292;POSITIVE_TRAIN_SITE;QD=10.56;ReadPosRankSum=0.055;VQSLOD=7.76;culprit=FS GT:AD:DP:GQ:PL 0/0:5,0:5:15:0,15,387 0/0:13,0:13:42:0,42,786 0/0:7,0:7:24:0,24,548 +20 10000117 . C T 329458.17 PASS AC=1;AF=0.167;AN=6;BaseQRankSum=10.505;ClippingRankSum=-20.658;DP=28;FS=8.305;InbreedingCoeff=0.1727;MQ=59.17;MQ0=0;MQRankSum=2.689;POSITIVE_TRAIN_SITE;QD=25.46;ReadPosRankSum=-4.688;VQSLOD=3.19;culprit=ReadPosRankSum GT:AD:DP:GQ:PL 0/0:5,0:5:15:0,15,189 0/1:8,8:16:99:254,0,231 0/0:7,0:7:21:0,21,271 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecified.table b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecified.table new file mode 100644 index 00000000000..6a6253c3ca6 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecified.table @@ -0,0 +1,4 @@ +CHROM POS ID REF ALT QUAL FILTER AC CAF CLNDSDB COMMON DP DSS HD KGPROD KGPhase1 LSD REF RS RSPOS SAO SSR VC VP WGT dbSNPBuildID +20 10000092 rs183046704 T G -10.0 PASS NA [0.9991,0.0009183] NA 0 NA NA NA true true NA T 183046704 10000092 0 0 SNV 0x050000000001000014000100 1 135 +20 10000107 rs188245245 T C -10.0 PASS NA [0.9982,0.001837] NA 0 NA NA NA true true NA T 188245245 10000107 0 0 SNV 0x050000000001000014000100 1 135 +20 10000117 rs4816203 C T -10.0 PASS NA [0.3682,0.6318] NA 1 NA NA NA true true NA C 4816203 10000117 0 0 SNV 0x05010000000117011e000100 1 111 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecifiedWithSamples.table b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecifiedWithSamples.table new file mode 100644 index 00000000000..0cc83cb8076 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecifiedWithSamples.table @@ -0,0 +1,4 @@ +CHROM POS ID REF ALT QUAL FILTER AC AF AN BaseQRankSum ClippingRankSum DP DS END FS HaplotypeScore InbreedingCoeff MLEAC MLEAF MQ MQ0 MQRankSum NEGATIVE_TRAIN_SITE POSITIVE_TRAIN_SITE QD ReadPosRankSum VQSLOD culprit HG00096.GT HG00096.AD HG00096.DP HG00096.GQ HG00096.PL HG00097.GT HG00097.AD HG00097.DP HG00097.GQ HG00097.PL HG00099.GT HG00099.AD HG00099.DP HG00099.GQ HG00099.PL +20 10000054 . CTTTG C 504.42 PASS 0 0.00 6 -0.975 -2.925 22 NA NA 1.899 NA 0.0592 NA NA 59.27 0 -3.212 NA NA 2.43 -0.264 5.10 FS CTTTG/CTTTG 2,0 2 6 0,6,119 CTTTG/CTTTG 10,0 10 29 0,29,592 CTTTG/CTTTG 10,0 10 30 0,30,598 +20 10000107 . T C 263.95 PASS 0 0.00 6 -0.444 -3.132 25 NA NA 0.948 NA -0.0102 NA NA 59.19 0 2.292 NA true 10.56 0.055 7.76 FS T/T 5,0 5 15 0,15,387 T/T 13,0 13 42 0,42,786 T/T 7,0 7 24 0,24,548 +20 10000117 . C T 329458.1699999999 PASS 1 0.167 6 10.505 -20.658 28 NA NA 8.305 NA 0.1727 NA NA 59.17 0 2.689 NA true 25.46 -4.688 3.19 ReadPosRankSum C/C 5,0 5 15 0,15,189 C/T 8,8 16 99 254,0,231 C/C 7,0 7 21 0,21,271 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/modified_dbsnp_138.snippet.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/modified_dbsnp_138.snippet.vcf new file mode 100644 index 00000000000..628a1d05d1e --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/modified_dbsnp_138.snippet.vcf @@ -0,0 +1,117 @@ +##fileformat=VCFv4.2 +##FILTER= +##GATKCommandLine.SelectVariants= +##GATKCommandLine= +##GATKCommandLine= +##INFO= +##INFO= +##INFO= +##INFO== 1% and for which 2 or more founders contribute to that minor allele frequency."> +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##dbSNP_BUILD_ID=138 +##fileDate=20130806 +##phasing=partial +##source=SelectVariants +##variationPropertyDocumentationUrl=ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf +#CHROM POS ID REF ALT QUAL FILTER INFO +20 10000092 rs183046704 T G . . CAF=[0.9991,0.0009183];COMMON=0;KGPROD;KGPhase1;RS=183046704;RSPOS=10000092;SAO=0;SSR=0;VC=SNV;VP=0x050000000001000014000100;WGT=1;dbSNPBuildID=135 +20 10000107 rs188245245 T C . . CAF=[0.9982,0.001837];COMMON=0;KGPROD;KGPhase1;RS=188245245;RSPOS=10000107;SAO=0;SSR=0;VC=SNV;VP=0x050000000001000014000100;WGT=1;dbSNPBuildID=135 +20 10000117 rs4816203 C T . . CAF=[0.3682,0.6318];COMMON=1;G5;G5A;GNO;KGPROD;KGPhase1;KGPilot123;OTHERKG;RS=4816203;RSPOS=10000117;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=0x05010000000117011e000100;WGT=1;dbSNPBuildID=111 From 0a6d1efcb571aa772f86a3b7e4288ceccc4fd1e2 Mon Sep 17 00:00:00 2001 From: Orli Cohen Date: Mon, 27 Jun 2022 13:02:24 -0400 Subject: [PATCH 4/7] responding to review comments --- .../walkers/variantutils/VariantsToTable.java | 14 +-- .../VariantsToTableIntegrationTest.java | 12 +- .../modified_dbsnp_138.snippet.vcf | 117 ------------------ 3 files changed, 15 insertions(+), 128 deletions(-) delete mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/modified_dbsnp_138.snippet.vcf diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java index 7d924698e54..0a413c7ba69 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java @@ -27,7 +27,6 @@ import java.util.function.Function; import static org.broadinstitute.hellbender.utils.Utils.split; -import static org.broadinstitute.hellbender.utils.Utils.stream; /** * Extract fields from a VCF file to a tab-delimited table @@ -134,9 +133,6 @@ public final class VariantsToTable extends VariantWalker { doc="The name of a standard VCF field or an INFO field to include in the output table", optional=true) protected List fieldsToTake = new ArrayList<>(); - - - /** * Any annotation name in the FORMAT field (e.g., GQ, PL) to include in the output table. * This argument accepts any number of inputs e.g. -GF GQ -GF PL @@ -218,9 +214,10 @@ public void onTraversalStart() { logger.warn("No fields were specified. All fields will be included in output table."); // add all mandatory VCF fields (except INFO) - for(VCFHeader.HEADER_FIELDS header : VCFHeader.HEADER_FIELDS.values()){ - if(header.name() != "INFO") - fieldsToTake.add(header.name()); + for(VCFHeader.HEADER_FIELDS headerField : VCFHeader.HEADER_FIELDS.values()){ + if(!headerField.name().equals(VCFHeader.HEADER_FIELDS.INFO.name())) { + fieldsToTake.add(headerField.name()); + } } // add all INFO fields present in VCF header @@ -230,7 +227,8 @@ public void onTraversalStart() { // add all FORMAT fields present in VCF header for (final VCFFormatHeaderLine formatLine : inputHeader.getFormatHeaderLines()) { - if(formatLine.getID().equals("GT")){ + // ensure GT field listed as first FORMAT field + if(formatLine.getID().equals("GT")) { genotypeFieldsToTake.add(0, formatLine.getID()); } else { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java index 2717d666cd2..3efd76ce629 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java @@ -240,22 +240,28 @@ public void testMoltenOutputWithMultipleAlleles() throws IOException { @Test public void testNoFieldsSpecified() throws IOException { - final File inputFile = new File(getToolTestDataDir(), "modified_dbsnp_138.snippet.vcf"); - final File outputFile = new File(getToolTestDataDir(), "expected.noFieldsSpecified.table"); + final File inputFile = new File(getToolTestDataDir(), "extraheaderlinesdeleted_dbsnp_138.snippet.vcf"); + final File outputFile = createTempFile(getToolTestDataDir(), "noFieldsSpecifiedOutput.table"); + final File expectedFile = new File(getToolTestDataDir(), "expected.noFieldsSpecified.table"); final String[] args = new String[] {"--variant", inputFile.getAbsolutePath(), "-O", outputFile.getAbsolutePath()}; runCommandLine(args); + + IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile); } @Test public void testNoFieldsSpecifiedWithSamples() throws IOException { final File inputFile = new File(getToolTestDataDir(), "1000G.phase3.snippet.vcf"); - final File outputFile = new File(getToolTestDataDir(), "expected.noFieldsSpecifiedWithSamples.table"); + final File outputFile = createTempFile(getToolTestDataDir(), "noFieldsSpecifiedWithSamplesOutput.table"); + final File expectedFile = new File(getToolTestDataDir(), "expected.noFieldsSpecifiedWithSamples.table"); final String[] args = new String[] {"--variant", inputFile.getAbsolutePath(), "-O", outputFile.getAbsolutePath()}; runCommandLine(args); + + IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile); } } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/modified_dbsnp_138.snippet.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/modified_dbsnp_138.snippet.vcf deleted file mode 100644 index 628a1d05d1e..00000000000 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/modified_dbsnp_138.snippet.vcf +++ /dev/null @@ -1,117 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER= -##GATKCommandLine.SelectVariants= -##GATKCommandLine= -##GATKCommandLine= -##INFO= -##INFO= -##INFO= -##INFO== 1% and for which 2 or more founders contribute to that minor allele frequency."> -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##dbSNP_BUILD_ID=138 -##fileDate=20130806 -##phasing=partial -##source=SelectVariants -##variationPropertyDocumentationUrl=ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf -#CHROM POS ID REF ALT QUAL FILTER INFO -20 10000092 rs183046704 T G . . CAF=[0.9991,0.0009183];COMMON=0;KGPROD;KGPhase1;RS=183046704;RSPOS=10000092;SAO=0;SSR=0;VC=SNV;VP=0x050000000001000014000100;WGT=1;dbSNPBuildID=135 -20 10000107 rs188245245 T C . . CAF=[0.9982,0.001837];COMMON=0;KGPROD;KGPhase1;RS=188245245;RSPOS=10000107;SAO=0;SSR=0;VC=SNV;VP=0x050000000001000014000100;WGT=1;dbSNPBuildID=135 -20 10000117 rs4816203 C T . . CAF=[0.3682,0.6318];COMMON=1;G5;G5A;GNO;KGPROD;KGPhase1;KGPilot123;OTHERKG;RS=4816203;RSPOS=10000117;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=0x05010000000117011e000100;WGT=1;dbSNPBuildID=111 From 8a33af3604749a6c944956a2642610d2bf59cf1d Mon Sep 17 00:00:00 2001 From: Orli Cohen Date: Mon, 27 Jun 2022 13:11:22 -0400 Subject: [PATCH 5/7] expected output file --- ...raheaderlinesdeleted_dbsnp_138.snippet.vcf | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/extraheaderlinesdeleted_dbsnp_138.snippet.vcf diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/extraheaderlinesdeleted_dbsnp_138.snippet.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/extraheaderlinesdeleted_dbsnp_138.snippet.vcf new file mode 100644 index 00000000000..628a1d05d1e --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/extraheaderlinesdeleted_dbsnp_138.snippet.vcf @@ -0,0 +1,117 @@ +##fileformat=VCFv4.2 +##FILTER= +##GATKCommandLine.SelectVariants= +##GATKCommandLine= +##GATKCommandLine= +##INFO= +##INFO= +##INFO= +##INFO== 1% and for which 2 or more founders contribute to that minor allele frequency."> +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##dbSNP_BUILD_ID=138 +##fileDate=20130806 +##phasing=partial +##source=SelectVariants +##variationPropertyDocumentationUrl=ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf +#CHROM POS ID REF ALT QUAL FILTER INFO +20 10000092 rs183046704 T G . . CAF=[0.9991,0.0009183];COMMON=0;KGPROD;KGPhase1;RS=183046704;RSPOS=10000092;SAO=0;SSR=0;VC=SNV;VP=0x050000000001000014000100;WGT=1;dbSNPBuildID=135 +20 10000107 rs188245245 T C . . CAF=[0.9982,0.001837];COMMON=0;KGPROD;KGPhase1;RS=188245245;RSPOS=10000107;SAO=0;SSR=0;VC=SNV;VP=0x050000000001000014000100;WGT=1;dbSNPBuildID=135 +20 10000117 rs4816203 C T . . CAF=[0.3682,0.6318];COMMON=1;G5;G5A;GNO;KGPROD;KGPhase1;KGPilot123;OTHERKG;RS=4816203;RSPOS=10000117;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=0x05010000000117011e000100;WGT=1;dbSNPBuildID=111 From 9b956ba1c004ac8d8671e8293aa72643e96b5a18 Mon Sep 17 00:00:00 2001 From: Orli Cohen Date: Wed, 29 Jun 2022 11:27:18 -0400 Subject: [PATCH 6/7] responding to review comments & added integration test --- .../walkers/variantutils/VariantsToTable.java | 15 ++- .../VariantsToTableIntegrationTest.java | 21 +++- .../VCFWithGenotypes_1000G.phase3.snippet.vcf | 48 +++++++ ...typesWithFormatField_dbsnp_138.snippet.vcf | 118 ++++++++++++++++++ .../VCFWithoutGenotypes_dbsnp_138.snippet.vcf | 117 +++++++++++++++++ .../expected.noFieldsSpecifiedNoSamples.table | 4 + 6 files changed, 315 insertions(+), 8 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/VCFWithGenotypes_1000G.phase3.snippet.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/VCFWithoutGenotypesWithFormatField_dbsnp_138.snippet.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/VCFWithoutGenotypes_dbsnp_138.snippet.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecifiedNoSamples.table diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java index 0a413c7ba69..83fe33d671d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java @@ -35,7 +35,8 @@ * This tool extracts specified fields for each variant in a VCF file to a tab-delimited table, which may be easier * to work with than a VCF. By default, the tool only extracts PASS or . (unfiltered) variants in the VCF file. Filtered variants may be * included in the output by adding the --show-filtered flag. The tool can extract both INFO (i.e. site-level) fields and - * FORMAT (i.e. sample-level) fields. + * FORMAT (i.e. sample-level) fields. If the tool is run without specifying any fields, it defaults to include all fields + * declared in the VCF header. *

* *

INFO/site-level fields

@@ -97,6 +98,12 @@ * 1 65068538 SNP 49,0 35,4 * 1 111146235 SNP 69,1 77,4 * + *
+ *     gatk VariantsToTable \
+ *     -V input.vcf \
+ *     -O output.table
+ * 
+ *

would produce a file that includes all fields declared in the VCF header.

* *

Notes

*
    @@ -211,7 +218,7 @@ public void onTraversalStart() { // if no fields specified, default to include all fields listed in header into table if(fieldsToTake.isEmpty() && genotypeFieldsToTake.isEmpty() && asFieldsToTake.isEmpty() && asGenotypeFieldsToTake.isEmpty()){ - logger.warn("No fields were specified. All fields will be included in output table."); + logger.warn("No fields were specified. All fields declared in the VCF header will be included in the output table."); // add all mandatory VCF fields (except INFO) for(VCFHeader.HEADER_FIELDS headerField : VCFHeader.HEADER_FIELDS.values()){ @@ -228,7 +235,7 @@ public void onTraversalStart() { // add all FORMAT fields present in VCF header for (final VCFFormatHeaderLine formatLine : inputHeader.getFormatHeaderLines()) { // ensure GT field listed as first FORMAT field - if(formatLine.getID().equals("GT")) { + if(formatLine.getID().equals(VCFConstants.GENOTYPE_KEY)) { genotypeFieldsToTake.add(0, formatLine.getID()); } else { @@ -238,7 +245,7 @@ public void onTraversalStart() { } // if fields specified, but none are genotype fields, set samples to empty - if (genotypeFieldsToTake.isEmpty() && asGenotypeFieldsToTake.isEmpty() && (!fieldsToTake.isEmpty() || !asFieldsToTake.isEmpty())) { + if (genotypeFieldsToTake.isEmpty() && asGenotypeFieldsToTake.isEmpty()) { samples = Collections.emptySortedSet(); } else { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java index 3efd76ce629..f7426201709 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java @@ -240,8 +240,8 @@ public void testMoltenOutputWithMultipleAlleles() throws IOException { @Test public void testNoFieldsSpecified() throws IOException { - final File inputFile = new File(getToolTestDataDir(), "extraheaderlinesdeleted_dbsnp_138.snippet.vcf"); - final File outputFile = createTempFile(getToolTestDataDir(), "noFieldsSpecifiedOutput.table"); + final File inputFile = new File(getToolTestDataDir(), "VCFWithoutGenotypes_dbsnp_138.snippet.vcf"); + final File outputFile = createTempFile("noFieldsSpecifiedOutput", ".table"); final File expectedFile = new File(getToolTestDataDir(), "expected.noFieldsSpecified.table"); final String[] args = new String[] {"--variant", inputFile.getAbsolutePath(), @@ -253,8 +253,8 @@ public void testNoFieldsSpecified() throws IOException { @Test public void testNoFieldsSpecifiedWithSamples() throws IOException { - final File inputFile = new File(getToolTestDataDir(), "1000G.phase3.snippet.vcf"); - final File outputFile = createTempFile(getToolTestDataDir(), "noFieldsSpecifiedWithSamplesOutput.table"); + final File inputFile = new File(getToolTestDataDir(), "VCFWithGenotypes_1000G.phase3.snippet.vcf"); + final File outputFile = createTempFile("noFieldsSpecifiedWithSamplesOutput", ".table"); final File expectedFile = new File(getToolTestDataDir(), "expected.noFieldsSpecifiedWithSamples.table"); final String[] args = new String[] {"--variant", inputFile.getAbsolutePath(), @@ -264,4 +264,17 @@ public void testNoFieldsSpecifiedWithSamples() throws IOException { IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile); } + @Test + public void testNoFieldsSpecifiedFormatFieldInHeaderNoSamples() throws IOException { + final File inputFile = new File(getToolTestDataDir(), "VCFWithoutGenotypesWithFormatField_dbsnp_138.snippet.vcf"); + final File outputFile = createTempFile("noFieldsSpecifiedNoSamplesOutput", ".table"); + final File expectedFile = new File(getToolTestDataDir(), "expected.noFieldsSpecifiedNoSamples.table"); + + final String[] args = new String[] {"--variant", inputFile.getAbsolutePath(), + "-O", outputFile.getAbsolutePath()}; + runCommandLine(args); + + IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile); + } + } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/VCFWithGenotypes_1000G.phase3.snippet.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/VCFWithGenotypes_1000G.phase3.snippet.vcf new file mode 100644 index 00000000000..eaf1060e2e7 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/VCFWithGenotypes_1000G.phase3.snippet.vcf @@ -0,0 +1,48 @@ +##fileformat=VCFv4.2 +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##GATKCommandLine= +##GATKVersion=2.5-191-g02f8427 +##HaplotypeCaller="analysis_type=HaplotypeCaller input_file=[/humgen/1kg/processing/production_wgs_final/chr20/ALL.chr20.bam.list] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=[/humgen/1kg/processing/production_wgs_final/chr20/.queue/scatterGather/call.for.1000G-1-sg/temp_0001_of_1000/scatter.intervals] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/humgen/1kg/reference/human_g1k_v37_decoy.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=200 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false performanceLog=null useOriginalQualities=false BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false defaultBaseQualities=-1 validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub graphOutput=null bamOutput=null bam_compression=null disable_bam_indexing=null generate_md5=null simplifyBAM=null bamWriterType=CALLED_HAPLOTYPES dbsnp=(RodBinding name= source=UNBOUND) comp=[] annotation=[ClippingRankSumTest, DepthPerSampleHC] excludeAnnotation=[SpanningDeletions, TandemRepeatAnnotator] heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=10.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.05 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null useDebruijnAssembler=false minKmerForDebruijnAssembler=11 onlyUseKmerSizeForDebruijnAssembler=-1 kmerSize=[10, 25] dontIncreaseKmerSizesForCycles=false numPruningSamples=3 maxPathsPerSample=8 dontRecoverDanglingTails=false minPruning=2 gcpHMM=10 includeUmappedReads=false useAllelesTrigger=false useFilteredReadsForAnnotations=false phredScaledGlobalReadMismappingRate=45 maxNumHaplotypesInPopulation=25 mergeVariantsViaLD=false pair_hmm_implementation=LOGLESS_CACHING keepRG=null justDetermineActiveRegions=false dontGenotype=false errorCorrectKmers=false debug=false debugGraphTransformations=false useLowQualityBasesForAssembly=false dontTrimActiveRegions=false dontUseSoftClippedBases=false captureAssemblyFailureBAM=false allowCyclesInKmerGraphToGeneratePaths=false errorCorrectReads=false kmerLengthForReadErrorCorrection=25 minObservationsForKmerToBeSolid=20 activityProfileOut=null activeRegionOut=null activeRegionIn=null activeRegionExtension=null forceActive=false activeRegionMaxSize=null bandPassSigma=null min_mapping_quality_score=20 filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false" +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##source=SelectVariants +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 +20 10000054 . CTTTG C 504.42 PASS AC=0;AF=0.00;AN=6;BaseQRankSum=-0.975;ClippingRankSum=-2.925;DP=22;FS=1.899;InbreedingCoeff=0.0592;MQ=59.27;MQ0=0;MQRankSum=-3.212;QD=2.43;ReadPosRankSum=-0.264;VQSLOD=5.10;culprit=FS GT:AD:DP:GQ:PL 0/0:2,0:2:6:0,6,119 0/0:10,0:10:29:0,29,592 0/0:10,0:10:30:0,30,598 +20 10000107 . T C 263.95 PASS AC=0;AF=0.00;AN=6;BaseQRankSum=-0.444;ClippingRankSum=-3.132;DP=25;FS=0.948;InbreedingCoeff=-0.0102;MQ=59.19;MQ0=0;MQRankSum=2.292;POSITIVE_TRAIN_SITE;QD=10.56;ReadPosRankSum=0.055;VQSLOD=7.76;culprit=FS GT:AD:DP:GQ:PL 0/0:5,0:5:15:0,15,387 0/0:13,0:13:42:0,42,786 0/0:7,0:7:24:0,24,548 +20 10000117 . C T 329458.17 PASS AC=1;AF=0.167;AN=6;BaseQRankSum=10.505;ClippingRankSum=-20.658;DP=28;FS=8.305;InbreedingCoeff=0.1727;MQ=59.17;MQ0=0;MQRankSum=2.689;POSITIVE_TRAIN_SITE;QD=25.46;ReadPosRankSum=-4.688;VQSLOD=3.19;culprit=ReadPosRankSum GT:AD:DP:GQ:PL 0/0:5,0:5:15:0,15,189 0/1:8,8:16:99:254,0,231 0/0:7,0:7:21:0,21,271 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/VCFWithoutGenotypesWithFormatField_dbsnp_138.snippet.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/VCFWithoutGenotypesWithFormatField_dbsnp_138.snippet.vcf new file mode 100644 index 00000000000..a595955ce8b --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/VCFWithoutGenotypesWithFormatField_dbsnp_138.snippet.vcf @@ -0,0 +1,118 @@ +##fileformat=VCFv4.2 +##FILTER= +##FORMAT= +##GATKCommandLine.SelectVariants= +##GATKCommandLine= +##GATKCommandLine= +##INFO= +##INFO= +##INFO= +##INFO== 1% and for which 2 or more founders contribute to that minor allele frequency."> +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##dbSNP_BUILD_ID=138 +##fileDate=20130806 +##phasing=partial +##source=SelectVariants +##variationPropertyDocumentationUrl=ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf +#CHROM POS ID REF ALT QUAL FILTER INFO +20 10000092 rs183046704 T G . . CAF=[0.9991,0.0009183];COMMON=0;KGPROD;KGPhase1;RS=183046704;RSPOS=10000092;SAO=0;SSR=0;VC=SNV;VP=0x050000000001000014000100;WGT=1;dbSNPBuildID=135 +20 10000107 rs188245245 T C . . CAF=[0.9982,0.001837];COMMON=0;KGPROD;KGPhase1;RS=188245245;RSPOS=10000107;SAO=0;SSR=0;VC=SNV;VP=0x050000000001000014000100;WGT=1;dbSNPBuildID=135 +20 10000117 rs4816203 C T . . CAF=[0.3682,0.6318];COMMON=1;G5;G5A;GNO;KGPROD;KGPhase1;KGPilot123;OTHERKG;RS=4816203;RSPOS=10000117;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=0x05010000000117011e000100;WGT=1;dbSNPBuildID=111 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/VCFWithoutGenotypes_dbsnp_138.snippet.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/VCFWithoutGenotypes_dbsnp_138.snippet.vcf new file mode 100644 index 00000000000..628a1d05d1e --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/VCFWithoutGenotypes_dbsnp_138.snippet.vcf @@ -0,0 +1,117 @@ +##fileformat=VCFv4.2 +##FILTER= +##GATKCommandLine.SelectVariants= +##GATKCommandLine= +##GATKCommandLine= +##INFO= +##INFO= +##INFO= +##INFO== 1% and for which 2 or more founders contribute to that minor allele frequency."> +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##dbSNP_BUILD_ID=138 +##fileDate=20130806 +##phasing=partial +##source=SelectVariants +##variationPropertyDocumentationUrl=ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf +#CHROM POS ID REF ALT QUAL FILTER INFO +20 10000092 rs183046704 T G . . CAF=[0.9991,0.0009183];COMMON=0;KGPROD;KGPhase1;RS=183046704;RSPOS=10000092;SAO=0;SSR=0;VC=SNV;VP=0x050000000001000014000100;WGT=1;dbSNPBuildID=135 +20 10000107 rs188245245 T C . . CAF=[0.9982,0.001837];COMMON=0;KGPROD;KGPhase1;RS=188245245;RSPOS=10000107;SAO=0;SSR=0;VC=SNV;VP=0x050000000001000014000100;WGT=1;dbSNPBuildID=135 +20 10000117 rs4816203 C T . . CAF=[0.3682,0.6318];COMMON=1;G5;G5A;GNO;KGPROD;KGPhase1;KGPilot123;OTHERKG;RS=4816203;RSPOS=10000117;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=0x05010000000117011e000100;WGT=1;dbSNPBuildID=111 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecifiedNoSamples.table b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecifiedNoSamples.table new file mode 100644 index 00000000000..6a6253c3ca6 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecifiedNoSamples.table @@ -0,0 +1,4 @@ +CHROM POS ID REF ALT QUAL FILTER AC CAF CLNDSDB COMMON DP DSS HD KGPROD KGPhase1 LSD REF RS RSPOS SAO SSR VC VP WGT dbSNPBuildID +20 10000092 rs183046704 T G -10.0 PASS NA [0.9991,0.0009183] NA 0 NA NA NA true true NA T 183046704 10000092 0 0 SNV 0x050000000001000014000100 1 135 +20 10000107 rs188245245 T C -10.0 PASS NA [0.9982,0.001837] NA 0 NA NA NA true true NA T 188245245 10000107 0 0 SNV 0x050000000001000014000100 1 135 +20 10000117 rs4816203 C T -10.0 PASS NA [0.3682,0.6318] NA 1 NA NA NA true true NA C 4816203 10000117 0 0 SNV 0x05010000000117011e000100 1 111 From cb099242df2d6d469f59c422ff21ee92f8724cf4 Mon Sep 17 00:00:00 2001 From: Orli Cohen Date: Wed, 29 Jun 2022 21:31:09 -0400 Subject: [PATCH 7/7] responding to review --- .../walkers/variantutils/VariantsToTable.java | 2 +- .../VariantsToTableIntegrationTest.java | 4 +- .../VariantsToTable/1000G.phase3.snippet.vcf | 48 ------- .../expected.noFieldsSpecified.table | 4 - ...raheaderlinesdeleted_dbsnp_138.snippet.vcf | 117 ------------------ 5 files changed, 3 insertions(+), 172 deletions(-) delete mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/1000G.phase3.snippet.vcf delete mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecified.table delete mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/extraheaderlinesdeleted_dbsnp_138.snippet.vcf diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java index 83fe33d671d..db5d4e2e63c 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable.java @@ -246,7 +246,7 @@ public void onTraversalStart() { // if fields specified, but none are genotype fields, set samples to empty if (genotypeFieldsToTake.isEmpty() && asGenotypeFieldsToTake.isEmpty()) { - samples = Collections.emptySortedSet(); + samples = Collections.emptySortedSet(); } else { final Map vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants()); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java index f7426201709..10c922a267a 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTableIntegrationTest.java @@ -239,10 +239,10 @@ public void testMoltenOutputWithMultipleAlleles() throws IOException { } @Test - public void testNoFieldsSpecified() throws IOException { + public void testNoFieldsSpecifiedNoSamples() throws IOException { final File inputFile = new File(getToolTestDataDir(), "VCFWithoutGenotypes_dbsnp_138.snippet.vcf"); final File outputFile = createTempFile("noFieldsSpecifiedOutput", ".table"); - final File expectedFile = new File(getToolTestDataDir(), "expected.noFieldsSpecified.table"); + final File expectedFile = new File(getToolTestDataDir(), "expected.noFieldsSpecifiedNoSamples.table"); final String[] args = new String[] {"--variant", inputFile.getAbsolutePath(), "-O", outputFile.getAbsolutePath()}; diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/1000G.phase3.snippet.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/1000G.phase3.snippet.vcf deleted file mode 100644 index eaf1060e2e7..00000000000 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/1000G.phase3.snippet.vcf +++ /dev/null @@ -1,48 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##GATKCommandLine= -##GATKCommandLine= -##GATKVersion=2.5-191-g02f8427 -##HaplotypeCaller="analysis_type=HaplotypeCaller input_file=[/humgen/1kg/processing/production_wgs_final/chr20/ALL.chr20.bam.list] read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=[/humgen/1kg/processing/production_wgs_final/chr20/.queue/scatterGather/call.for.1000G-1-sg/temp_0001_of_1000/scatter.intervals] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/humgen/1kg/reference/human_g1k_v37_decoy.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=200 baq=OFF baqGapOpenPenalty=40.0 fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false performanceLog=null useOriginalQualities=false BQSR=null quantize_quals=0 disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 allow_bqsr_on_reduced_bams_despite_repeated_warnings=false defaultBaseQualities=-1 validation_strictness=SILENT remove_program_records=false keep_program_records=false unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false logging_level=INFO log_to_file=null help=false version=false out=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub no_cmdline_in_header=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub bcf=org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub graphOutput=null bamOutput=null bam_compression=null disable_bam_indexing=null generate_md5=null simplifyBAM=null bamWriterType=CALLED_HAPLOTYPES dbsnp=(RodBinding name= source=UNBOUND) comp=[] annotation=[ClippingRankSumTest, DepthPerSampleHC] excludeAnnotation=[SpanningDeletions, TandemRepeatAnnotator] heterozygosity=0.001 indel_heterozygosity=1.25E-4 genotyping_mode=DISCOVERY output_mode=EMIT_VARIANTS_ONLY standard_min_confidence_threshold_for_calling=10.0 standard_min_confidence_threshold_for_emitting=10.0 alleles=(RodBinding name= source=UNBOUND) max_alternate_alleles=6 input_prior=[] contamination_fraction_to_filter=0.05 contamination_fraction_per_sample_file=null p_nonref_model=EXACT_INDEPENDENT exactcallslog=null useDebruijnAssembler=false minKmerForDebruijnAssembler=11 onlyUseKmerSizeForDebruijnAssembler=-1 kmerSize=[10, 25] dontIncreaseKmerSizesForCycles=false numPruningSamples=3 maxPathsPerSample=8 dontRecoverDanglingTails=false minPruning=2 gcpHMM=10 includeUmappedReads=false useAllelesTrigger=false useFilteredReadsForAnnotations=false phredScaledGlobalReadMismappingRate=45 maxNumHaplotypesInPopulation=25 mergeVariantsViaLD=false pair_hmm_implementation=LOGLESS_CACHING keepRG=null justDetermineActiveRegions=false dontGenotype=false errorCorrectKmers=false debug=false debugGraphTransformations=false useLowQualityBasesForAssembly=false dontTrimActiveRegions=false dontUseSoftClippedBases=false captureAssemblyFailureBAM=false allowCyclesInKmerGraphToGeneratePaths=false errorCorrectReads=false kmerLengthForReadErrorCorrection=25 minObservationsForKmerToBeSolid=20 activityProfileOut=null activeRegionOut=null activeRegionIn=null activeRegionExtension=null forceActive=false activeRegionMaxSize=null bandPassSigma=null min_mapping_quality_score=20 filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false" -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##contig= -##contig= -##source=SelectVariants -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 -20 10000054 . CTTTG C 504.42 PASS AC=0;AF=0.00;AN=6;BaseQRankSum=-0.975;ClippingRankSum=-2.925;DP=22;FS=1.899;InbreedingCoeff=0.0592;MQ=59.27;MQ0=0;MQRankSum=-3.212;QD=2.43;ReadPosRankSum=-0.264;VQSLOD=5.10;culprit=FS GT:AD:DP:GQ:PL 0/0:2,0:2:6:0,6,119 0/0:10,0:10:29:0,29,592 0/0:10,0:10:30:0,30,598 -20 10000107 . T C 263.95 PASS AC=0;AF=0.00;AN=6;BaseQRankSum=-0.444;ClippingRankSum=-3.132;DP=25;FS=0.948;InbreedingCoeff=-0.0102;MQ=59.19;MQ0=0;MQRankSum=2.292;POSITIVE_TRAIN_SITE;QD=10.56;ReadPosRankSum=0.055;VQSLOD=7.76;culprit=FS GT:AD:DP:GQ:PL 0/0:5,0:5:15:0,15,387 0/0:13,0:13:42:0,42,786 0/0:7,0:7:24:0,24,548 -20 10000117 . C T 329458.17 PASS AC=1;AF=0.167;AN=6;BaseQRankSum=10.505;ClippingRankSum=-20.658;DP=28;FS=8.305;InbreedingCoeff=0.1727;MQ=59.17;MQ0=0;MQRankSum=2.689;POSITIVE_TRAIN_SITE;QD=25.46;ReadPosRankSum=-4.688;VQSLOD=3.19;culprit=ReadPosRankSum GT:AD:DP:GQ:PL 0/0:5,0:5:15:0,15,189 0/1:8,8:16:99:254,0,231 0/0:7,0:7:21:0,21,271 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecified.table b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecified.table deleted file mode 100644 index 6a6253c3ca6..00000000000 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/expected.noFieldsSpecified.table +++ /dev/null @@ -1,4 +0,0 @@ -CHROM POS ID REF ALT QUAL FILTER AC CAF CLNDSDB COMMON DP DSS HD KGPROD KGPhase1 LSD REF RS RSPOS SAO SSR VC VP WGT dbSNPBuildID -20 10000092 rs183046704 T G -10.0 PASS NA [0.9991,0.0009183] NA 0 NA NA NA true true NA T 183046704 10000092 0 0 SNV 0x050000000001000014000100 1 135 -20 10000107 rs188245245 T C -10.0 PASS NA [0.9982,0.001837] NA 0 NA NA NA true true NA T 188245245 10000107 0 0 SNV 0x050000000001000014000100 1 135 -20 10000117 rs4816203 C T -10.0 PASS NA [0.3682,0.6318] NA 1 NA NA NA true true NA C 4816203 10000117 0 0 SNV 0x05010000000117011e000100 1 111 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/extraheaderlinesdeleted_dbsnp_138.snippet.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/extraheaderlinesdeleted_dbsnp_138.snippet.vcf deleted file mode 100644 index 628a1d05d1e..00000000000 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/VariantsToTable/extraheaderlinesdeleted_dbsnp_138.snippet.vcf +++ /dev/null @@ -1,117 +0,0 @@ -##fileformat=VCFv4.2 -##FILTER= -##GATKCommandLine.SelectVariants= -##GATKCommandLine= -##GATKCommandLine= -##INFO= -##INFO= -##INFO= -##INFO== 1% and for which 2 or more founders contribute to that minor allele frequency."> -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##dbSNP_BUILD_ID=138 -##fileDate=20130806 -##phasing=partial -##source=SelectVariants -##variationPropertyDocumentationUrl=ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf -#CHROM POS ID REF ALT QUAL FILTER INFO -20 10000092 rs183046704 T G . . CAF=[0.9991,0.0009183];COMMON=0;KGPROD;KGPhase1;RS=183046704;RSPOS=10000092;SAO=0;SSR=0;VC=SNV;VP=0x050000000001000014000100;WGT=1;dbSNPBuildID=135 -20 10000107 rs188245245 T C . . CAF=[0.9982,0.001837];COMMON=0;KGPROD;KGPhase1;RS=188245245;RSPOS=10000107;SAO=0;SSR=0;VC=SNV;VP=0x050000000001000014000100;WGT=1;dbSNPBuildID=135 -20 10000117 rs4816203 C T . . CAF=[0.3682,0.6318];COMMON=1;G5;G5A;GNO;KGPROD;KGPhase1;KGPilot123;OTHERKG;RS=4816203;RSPOS=10000117;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=0x05010000000117011e000100;WGT=1;dbSNPBuildID=111