From 8fd8e7f84aeba966d938c87ed2ea6d85328a84df Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Thu, 16 Nov 2023 14:01:21 -0500 Subject: [PATCH] adding negative test for bad combinations of avoidnio and other args --- .../tools/genomicsdb/GenomicsDBImport.java | 52 +++++++++-------- .../GenomicsDBImportIntegrationTest.java | 57 ++++++++++++++----- .../spark/PileupSparkIntegrationTest.java | 6 -- .../testutils/ArgumentsBuilder.java | 25 ++++++++ .../hellbender/testutils/BaseTest.java | 7 +++ 5 files changed, 104 insertions(+), 43 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java index 25fdaf24deb..783b520f272 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java @@ -37,7 +37,6 @@ import org.genomicsdb.importer.GenomicsDBImporter; import org.genomicsdb.model.BatchCompletionCallbackFunctionArgument; import org.genomicsdb.model.Coordinates; -import org.genomicsdb.model.GenomicsDBCallsetsMapProto; import org.genomicsdb.model.GenomicsDBImportConfiguration; import org.genomicsdb.model.GenomicsDBVidMapProto; import org.genomicsdb.model.ImportConfig; @@ -50,16 +49,17 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; +import java.util.Objects; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; -import java.util.Arrays; import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; @@ -68,9 +68,6 @@ import java.util.concurrent.ThreadFactory; import java.util.stream.Collectors; -import static org.broadinstitute.hellbender.tools.genomicsdb.GATKGenomicsDBUtils.genomicsDBGetAbsolutePath; -import static org.broadinstitute.hellbender.tools.genomicsdb.GATKGenomicsDBUtils.genomicsDBApppendPaths; - /** * Import single-sample GVCFs into GenomicsDB before joint genotyping. * @@ -219,7 +216,7 @@ public final class GenomicsDBImport extends GATKTool { public static final String MAX_NUM_INTERVALS_TO_IMPORT_IN_PARALLEL = "max-num-intervals-to-import-in-parallel"; public static final String MERGE_CONTIGS_INTO_NUM_PARTITIONS = "merge-contigs-into-num-partitions"; public static final String BYPASS_FEATURE_READER = "bypass-feature-reader"; - public static final String VCF_HEADER = "header"; + public static final String VCF_HEADER_OVERRIDE = "header"; public static final int INTERVAL_LIST_SIZE_WARNING_THRESHOLD = 100; public static final int ARRAY_COLUMN_BOUNDS_START = 0; public static final int ARRAY_COLUMN_BOUNDS_END = 1; @@ -242,7 +239,7 @@ public final class GenomicsDBImport extends GATKTool { "when using the "+INTERVAL_LIST_LONG_NAME+" option. " + "Either this or "+WORKSPACE_ARG_LONG_NAME+" must be specified. " + "Must point to an existing workspace.", - mutex = {WORKSPACE_ARG_LONG_NAME, VCF_HEADER}) + mutex = {WORKSPACE_ARG_LONG_NAME, VCF_HEADER_OVERRIDE}) private String incrementalImportWorkspace; @Argument(fullName = SEGMENT_SIZE_ARG_LONG_NAME, @@ -257,7 +254,7 @@ public final class GenomicsDBImport extends GATKTool { " data for only a single sample. Either this or " + SAMPLE_NAME_MAP_LONG_NAME + " must be specified.", optional = true, - mutex = {SAMPLE_NAME_MAP_LONG_NAME}) + mutex = {SAMPLE_NAME_MAP_LONG_NAME, AVOID_NIO}) private List variantPaths; @Argument(fullName = VCF_BUFFER_SIZE_ARG_NAME, @@ -367,7 +364,7 @@ public final class GenomicsDBImport extends GATKTool { optional = true) private boolean sharedPosixFSOptimizations = false; - @Argument(fullName = VCF_HEADER, + @Argument(fullName = VCF_HEADER_OVERRIDE, doc = "Specify a vcf file to use instead of reading and combining headers from the input vcfs", optional = true, mutex ={INCREMENTAL_WORKSPACE_ARG_LONG_NAME} @@ -384,10 +381,12 @@ public final class GenomicsDBImport extends GATKTool { @Argument(fullName = AVOID_NIO, doc = "Do not attempt to open the input vcf file paths in java. This can only be used with " + BYPASS_FEATURE_READER + ". It allows operating on file systems which GenomicsDB understands how to open but GATK does not. This will disable " - + "many of the sanity checks." + + "many of the sanity checks.", + mutex = {StandardArgumentDefinitions.VARIANT_LONG_NAME} ) @Advanced private boolean avoidNio = false; + @Argument(fullName = USE_GCS_HDFS_CONNECTOR, doc = "Use the GCS HDFS Connector instead of the native GCS SDK client with gs:// URLs.", optional = true) @@ -395,7 +394,7 @@ public final class GenomicsDBImport extends GATKTool { //executor service used when vcfInitializerThreads > 1 private ExecutorService inputPreloadExecutorService; - + /** * Get the largest interval per contig that contains the intervals specified on the command line. * @param getIntervals intervals to be transformed @@ -457,10 +456,6 @@ public int getDefaultCloudIndexPrefetchBufferSize() { // Path to combined VCF header file to be written by GenomicsDBImporter private String vcfHeaderFile; - // GenomicsDB callset map protobuf structure containing all callset names - // used to write the callset json file on traversal success - private GenomicsDBCallsetsMapProto.CallsetMappingPB callsetMappingPB; - //in-progress batchCount private int batchCount = 1; @@ -517,8 +512,19 @@ private void assertVariantPathsOrSampleNameFileWasSpecified(){ private void assertAvoidNioConditionsAreValid() { if (avoidNio && (!bypassFeatureReader || headerOverride == null) ){ - throw new CommandLineException("If --" +AVOID_NIO + " is set then --" + BYPASS_FEATURE_READER - + " and --" + VCF_HEADER + " must also be specified."); + final List missing = new ArrayList<>(); + if(!bypassFeatureReader){ + missing.add(BYPASS_FEATURE_READER); + } + if(headerOverride == null){ + missing.add(VCF_HEADER_OVERRIDE); + } + final String missingArgs = String.join(" and ", missing); + + // this potentially produces and exception with bad grammar but that's probably ok + throw new CommandLineException.MissingArgument(missingArgs, "If --" +AVOID_NIO + " is set then --" + BYPASS_FEATURE_READER + + " and --" + VCF_HEADER_OVERRIDE + " must also be specified."); + } } @@ -550,7 +556,7 @@ private static void assertIntervalsCoverEntireContigs(GenomicsDBImporter importe */ private void initializeHeaderAndSampleMappings() { // Only one of -V and --sampleNameMapFile may be specified - if (variantPaths != null && variantPaths.size() > 0) { + if (variantPaths != null && !variantPaths.isEmpty()) { // -V was specified final List headers = new ArrayList<>(variantPaths.size()); sampleNameMap = new SampleNameMap(); @@ -669,9 +675,9 @@ private void writeIntervalListToFile() { @Override public void onTraversalStart() { String workspaceDir = overwriteCreateOrCheckWorkspace(); - vidMapJSONFile = genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); - callsetMapJSONFile = genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); - vcfHeaderFile = genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); + vidMapJSONFile = GATKGenomicsDBUtils.genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME); + callsetMapJSONFile = GATKGenomicsDBUtils.genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME); + vcfHeaderFile = GATKGenomicsDBUtils.genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME); if (getIntervalsFromExistingWorkspace) { // intervals may be null if merge-contigs-into-num-partitions was used to create the workspace // if so, we need to wait for vid to be generated before writing out the interval list @@ -811,7 +817,7 @@ private List generateIntervalListFromWorkspace() { final int start = Integer.parseInt(partitionInfo[1]); final int end = Integer.parseInt(partitionInfo[2]); return new SimpleInterval(contig, start, end); - }).filter(o -> o != null).collect(Collectors.toList()); + }).filter(Objects::nonNull).collect(Collectors.toList()); } private ImportConfig createImportConfig(final int batchSize) { @@ -1027,7 +1033,7 @@ public CloseableTribbleIterator iterator() throws IOException { * @return The workspace directory */ private String overwriteCreateOrCheckWorkspace() { - String workspaceDir = genomicsDBGetAbsolutePath(workspace); + String workspaceDir = GATKGenomicsDBUtils.genomicsDBGetAbsolutePath(workspace); // From JavaDoc for GATKGenomicsDBUtils.createTileDBWorkspacevid // returnCode = 0 : OK. If overwriteExistingWorkspace is true and the workspace exists, it is deleted first. // returnCode = -1 : path was not a directory diff --git a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java index 17ba65831ea..85bc0e62737 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java @@ -101,7 +101,7 @@ public final class GenomicsDBImportIntegrationTest extends CommandLineProgramTes //This file was obtained from combined.gatk3.7.g.vcf.gz by dropping all the samples private static final String COMBINED_SITES_ONLY = largeFileTestDir + "gvcfs/combined.gatk3.7_sites_only.g.vcf.gz"; private static final String INTERVAL_PICARD_STYLE_EXPECTED = toolsTestDir + "GenomicsDBImport/interval_expected.interval_list"; - private static final String MULTIPLE_NON_ADJACENT_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS_PICARD_STYLE_EXPECTED = + private static final String MULTIPLE_NON_ADJACENT_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS_PICARD_STYLE_EXPECTED = toolsTestDir + "GenomicsDBImport/multiple_non_adjacent_intervals_combine_gvcfs_expected.interval_list"; private static final String MERGED_CONTIGS_INTERVAL_PICARD_STYLE_EXPECTED = toolsTestDir + "GenomicsDBImport/chr20_chr21_merged_contigs_expected.interval_list"; @@ -383,6 +383,35 @@ public void testGenomicsDbImportThrowsOnMnp() throws IOException { } } + @DataProvider + public Object[][] getInvalidArgsForAvoidNio(){ + final ArgumentsBuilder baseArgs = ArgumentsBuilder.create() + .add(GenomicsDBImport.WORKSPACE_ARG_LONG_NAME, createTempFile()) + .addInterval("fake") + .addFlag(GenomicsDBImport.AVOID_NIO); + return new Object[][]{ + {baseArgs, CommandLineException.MissingArgument.class}, //no input + {baseArgs.copy() + .addVCF("fake.vcf"), CommandLineException.class + }, //not allowed with variant, we shoul have some sort of mutex exception... + {baseArgs.copy() + .add(GenomicsDBImport.SAMPLE_NAME_MAP_LONG_NAME, "fake.samplenames"), CommandLineException.MissingArgument.class + }, //missing header + {baseArgs.copy() + .add(GenomicsDBImport.VCF_HEADER_OVERRIDE, "fake.vcf"), CommandLineException.MissingArgument.class + }, //missing input + {baseArgs.copy() + .add(GenomicsDBImport.VCF_HEADER_OVERRIDE, "fake.vcf") + .addVCF("fake.vcf"), CommandLineException.class // can't use with -V + } + }; + } + + @Test(dataProvider = "getInvalidArgsForAvoidNio") + public void testInvalidArgumentCombinationsWithAvoidNio(ArgumentsBuilder args, Class expectedException){ + Assert.assertThrows(expectedException, () -> runCommandLine(args)); + } + private void testGenomicsDBImporterWithGenotypes(final List vcfInputs, final List intervals, final String expectedCombinedVCF, final String referenceFile) throws IOException { @@ -408,7 +437,7 @@ private void testGenomicsDBImporterWithGenotypes(final List vcfInputs, f final boolean testAll, final boolean produceGTField, final boolean sitesOnlyQuery) throws IOException { - testGenomicsDBImporterWithGenotypes(vcfInputs, intervals, expectedCombinedVCF, referenceFile, testAll, produceGTField, + testGenomicsDBImporterWithGenotypes(vcfInputs, intervals, expectedCombinedVCF, referenceFile, testAll, produceGTField, sitesOnlyQuery, false); } @@ -456,11 +485,11 @@ private void testGenomicsDBAgainstCombineGVCFs(final List vcfInputs, fin private void testGenomicsDBAgainstCombineGVCFs(final List vcfInputs, final List intervals, final String referenceFile, final String[] CombineGVCFArgs, - final int numVCFReaderThreadsInImporter, final int chrsToPartitions, + final int numVCFReaderThreadsInImporter, final int chrsToPartitions, final boolean useNativeReader) throws IOException { final String workspace = createTempDir("genomicsdb-tests-").getAbsolutePath() + "/workspace"; - writeToGenomicsDB(vcfInputs, intervals, workspace, 0, false, 0, numVCFReaderThreadsInImporter, false, false, false, + writeToGenomicsDB(vcfInputs, intervals, workspace, 0, false, 0, numVCFReaderThreadsInImporter, false, false, false, chrsToPartitions, useNativeReader); checkJSONFilesAreWritten(workspace); for(SimpleInterval currInterval : intervals) { @@ -504,7 +533,7 @@ public void testGenomicsDBAlleleSpecificAnnotationsInTheMiddleOfSpanningDeletion @Test public void testGenomicsDBNoRemapMissingToNonRef() throws IOException { - testGenomicsDBAgainstCombineGVCFs(Arrays.asList(COMBINEGVCFS_TEST_DIR+"NA12878.AS.NON_REF_remap_check.chr20snippet.g.vcf", + testGenomicsDBAgainstCombineGVCFs(Arrays.asList(COMBINEGVCFS_TEST_DIR+"NA12878.AS.NON_REF_remap_check.chr20snippet.g.vcf", COMBINEGVCFS_TEST_DIR+"NA12892.AS.chr20snippet.g.vcf"), new ArrayList(Arrays.asList(new SimpleInterval("20", 10433313, 10700000))), b37_reference_20_21, @@ -671,14 +700,14 @@ private void writeToGenomicsDB(final List vcfInputs, final List vcfInputs, final List intervals, final String workspace, - final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads, + final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads, final boolean mergeIntervals, final boolean overwriteWorkspace, final boolean incremental) { - writeToGenomicsDB(vcfInputs, intervals, workspace, batchSize, useBufferSize, bufferSizePerSample, threads, mergeIntervals, + writeToGenomicsDB(vcfInputs, intervals, workspace, batchSize, useBufferSize, bufferSizePerSample, threads, mergeIntervals, overwriteWorkspace, incremental, 0, false); } private void writeToGenomicsDB(final List vcfInputs, final List intervals, final String workspace, - final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads, + final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads, final boolean mergeIntervals, final boolean overwriteWorkspace, final boolean incremental, final int chrsToPartitions, final boolean useNativeReader) { final ArgumentsBuilder args = new ArgumentsBuilder(); @@ -1013,7 +1042,7 @@ public Object[][] dataForTestExplicitIndicesInSampleNameMapInTheCloud() { final String NA19625_UNCOMPRESSED_WITH_INDEX = GVCFS_WITH_INDICES_BUCKET + "NA19625.g.vcf"; final String NA19625_UNCOMPRESSED_NO_INDEX = GVCFS_WITHOUT_INDICES_BUCKET + "NA19625.g.vcf"; final String NA19625_UNCOMPRESSED_INDEX = GVCF_INDICES_ONLY_BUCKET + "NA19625.g.vcf.idx"; - + return new Object[][] { // All VCFs have explicit indices, samples in order, TABIX index { @@ -1371,7 +1400,7 @@ public void testIncrementalMustHaveExistingWorkspace() { writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace + "workspace2", 0, false, 0, 1, false, false, true); } - private void testIncrementalImport(final int stepSize, final List intervals, final String workspace, + private void testIncrementalImport(final int stepSize, final List intervals, final String workspace, final int batchSize, final boolean produceGTField, final boolean useVCFCodec, final String expected, final int chrsToPartitions, final boolean useNativeReader) throws IOException { testIncrementalImport(stepSize, intervals, workspace, batchSize, produceGTField, useVCFCodec, expected, @@ -1384,7 +1413,7 @@ private void testIncrementalImport(final int stepSize, final List 0 && useNativeReader)); checkJSONFilesAreWritten(workspace); } @@ -1434,7 +1463,7 @@ public void testGenomicsDBIncrementalAndBatchSize1WithNonAdjacentIntervalsNative @Test(expectedExceptions = {UserException.class}, expectedExceptionsMessageRegExp=".*must be block compressed.*") public void testGenomicsDBImportNativeReaderNoCompressedVcf() throws IOException { - testGenomicsDBImporterWithGenotypes(Arrays.asList(NA_12878_PHASED), MULTIPLE_INTERVALS, NA_12878_PHASED, b37_reference_20_21, + testGenomicsDBImporterWithGenotypes(Arrays.asList(NA_12878_PHASED), MULTIPLE_INTERVALS, NA_12878_PHASED, b37_reference_20_21, false, true, false, true); } @@ -1448,14 +1477,14 @@ public void testGenomicsDBIncrementalAndBatchSize1WithNonAdjacentIntervalsMergeC @Test public void testGenomicsDBIncrementalAndBatchSize2() throws IOException { final String workspace = createTempDir("genomicsdb-incremental-tests").getAbsolutePath() + "/workspace"; - testIncrementalImport(2, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, false, + testIncrementalImport(2, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, false, COMBINED_WITH_GENOTYPES, 0, false); } @Test public void testGenomicsDBMultipleIncrementalImports() throws IOException { final String workspace = createTempDir("genomicsdb-incremental-tests").getAbsolutePath() + "/workspace"; - testIncrementalImport(1, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, true, + testIncrementalImport(1, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, true, COMBINED_WITH_GENOTYPES, 0, false); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/PileupSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/PileupSparkIntegrationTest.java index 656000c25d9..4af39b776ac 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/spark/PileupSparkIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/PileupSparkIntegrationTest.java @@ -25,12 +25,6 @@ public Object[][] shuffleParameters() { return new Object[][] { { false }, { true } }; } - private File createTempFile() throws IOException { - final File out = IOUtils.createTempFile("out", ".txt"); - out.delete(); - return out; - } - @Test(dataProvider = "shuffle") public void testSimplePileup(boolean useShuffle) throws Exception { final File out = createTempFile(); diff --git a/src/testUtils/java/org/broadinstitute/hellbender/testutils/ArgumentsBuilder.java b/src/testUtils/java/org/broadinstitute/hellbender/testutils/ArgumentsBuilder.java index 289cae67008..f343ec74c55 100644 --- a/src/testUtils/java/org/broadinstitute/hellbender/testutils/ArgumentsBuilder.java +++ b/src/testUtils/java/org/broadinstitute/hellbender/testutils/ArgumentsBuilder.java @@ -24,6 +24,15 @@ public final class ArgumentsBuilder { public ArgumentsBuilder(){} + /** + * static factory to allow fluent style creation + * create().add()... + * @return new ArgumentsBuilder + */ + public static ArgumentsBuilder create(){ + return new ArgumentsBuilder(); + } + public ArgumentsBuilder(Object[] args){ for (Object arg: args){ if (arg instanceof String){ @@ -34,6 +43,22 @@ public ArgumentsBuilder(Object[] args){ } } + /** + * Concatenate the arguments from other onto the end of this builder + * @return this builder combined with other + */ + public ArgumentsBuilder concat(ArgumentsBuilder other){ + this.args.addAll(other.args); + return this; + } + + /** + * @return a copy of this builder + */ + public ArgumentsBuilder copy(){ + return ArgumentsBuilder.create().concat(this); + } + /** * Add a string to the arguments list * Strings are processed specially, they are reformatted to match the new unix style arguments diff --git a/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java b/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java index 7eadf5aa2a1..988a6f8cc3c 100644 --- a/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java +++ b/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java @@ -294,6 +294,13 @@ public static File createTempFile(final String name, final String extension) { return IOUtils.createTempFile(name, extension); } + /** + * Create a temp file with an arbitrary name and extension + */ + public static File createTempFile(){ + return createTempFile("default", ".tmp"); + } + /** * Creates a temp path that will be deleted on exit after tests are complete. *