From 2ad4a3e5bbbb8a51ddb766a22d4c7f2e15a4c59c Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Fri, 8 Dec 2023 20:09:45 -0500 Subject: [PATCH] Allow GenomicsDBImport to connect to az:// files without interference (#8438) * GATK's lack of support for az:// URIs means that although GenomicsDB can natively read them, parts of the java code crash when interacting with them * Adding --avoid-nio and --header arguments These allow disabling all of the java interaction with the az:// links and simply passing them through to genomicsdb This disables some safeguards but allows operating on files in azur * Update GenomicsDB version to 1.5.1 for azure improved support * There are no direct tests on azure since we do not yet have any infrastructure to generate the necessary tokens, there is a disabled test which requires https://github.com/broadinstitute/gatk/issues/8612 before we can enable it. --------- Co-authored-by: Nalini Ganapati Co-authored-by: Nalini Ganapati --- build.gradle | 4 +- .../tools/genomicsdb/GenomicsDBImport.java | 156 ++++++++++++------ .../GenomicsDBImportIntegrationTest.java | 96 +++++++++-- .../spark/PileupSparkIntegrationTest.java | 14 +- .../tools/GenomicsDBImport/azureHeader.vcf | 95 +++++++++++ .../GenomicsDBImport/azureSampleNameMap.txt | 1 + .../testutils/ArgumentsBuilder.java | 25 +++ .../hellbender/testutils/BaseTest.java | 33 +++- .../testutils/CommandLineProgramTester.java | 1 + 9 files changed, 345 insertions(+), 80 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/GenomicsDBImport/azureHeader.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/GenomicsDBImport/azureSampleNameMap.txt diff --git a/build.gradle b/build.gradle index 55bb2dca57c..d580ab8bfd0 100644 --- a/build.gradle +++ b/build.gradle @@ -63,7 +63,7 @@ final barclayVersion = System.getProperty('barclay.version','5.0.0') final sparkVersion = System.getProperty('spark.version', '3.3.1') final hadoopVersion = System.getProperty('hadoop.version', '3.3.6') final disqVersion = System.getProperty('disq.version','0.3.8') -final genomicsdbVersion = System.getProperty('genomicsdb.version','1.5.0') +final genomicsdbVersion = System.getProperty('genomicsdb.version','1.5.1') final bigQueryVersion = System.getProperty('bigQuery.version', '2.35.0') final bigQueryStorageVersion = System.getProperty('bigQueryStorage.version', '2.47.0') final guavaVersion = System.getProperty('guava.version', '32.1.3-jre') @@ -976,7 +976,7 @@ ossIndexAudit { outputFormat = 'DEFAULT' // Optional, other values are: 'DEPENDENCY_GRAPH' prints dependency graph showing direct/transitive dependencies, 'JSON_CYCLONE_DX_1_4' prints a CycloneDX 1.4 SBOM in JSON format. showAll = false // if true prints all dependencies. By default is false, meaning only dependencies with vulnerabilities will be printed. printBanner = true // if true will print ASCII text banner. By default is true. - + // ossIndexAudit can be configured to exclude vulnerabilities from matching // excludeVulnerabilityIds = ['39d74cc8-457a-4e57-89ef-a258420138c5'] // list containing ids of vulnerabilities to be ignored // excludeCoordinates = ['commons-fileupload:commons-fileupload:1.3'] // list containing coordinate of components which if vulnerable should be ignored diff --git a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java index bf5b3a61198..fd58587abd4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.java @@ -22,6 +22,7 @@ import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup; +import org.broadinstitute.hellbender.engine.FeatureInput; import org.broadinstitute.hellbender.engine.GATKTool; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.exceptions.UserException; @@ -36,29 +37,29 @@ import org.genomicsdb.importer.GenomicsDBImporter; import org.genomicsdb.model.BatchCompletionCallbackFunctionArgument; import org.genomicsdb.model.Coordinates; -import org.genomicsdb.model.GenomicsDBCallsetsMapProto; import org.genomicsdb.model.GenomicsDBImportConfiguration; import org.genomicsdb.model.GenomicsDBVidMapProto; import org.genomicsdb.model.ImportConfig; import java.io.File; import java.io.IOException; +import java.io.Serial; import java.net.URI; import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; +import java.util.Objects; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; -import java.util.Arrays; import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; @@ -67,9 +68,6 @@ import java.util.concurrent.ThreadFactory; import java.util.stream.Collectors; -import static org.broadinstitute.hellbender.tools.genomicsdb.GATKGenomicsDBUtils.genomicsDBGetAbsolutePath; -import static org.broadinstitute.hellbender.tools.genomicsdb.GATKGenomicsDBUtils.genomicsDBApppendPaths; - /** * Import single-sample GVCFs into GenomicsDB before joint genotyping. * @@ -146,7 +144,7 @@ * * * It is also possible to specify an explicit index for only a subset of the samples: - * + * *
  *  sample1      sample1.vcf.gz
  *  sample2      sample2.vcf.gz      sample2.vcf.gz.tbi
@@ -218,12 +216,14 @@ public final class GenomicsDBImport extends GATKTool {
     public static final String MAX_NUM_INTERVALS_TO_IMPORT_IN_PARALLEL = "max-num-intervals-to-import-in-parallel";
     public static final String MERGE_CONTIGS_INTO_NUM_PARTITIONS = "merge-contigs-into-num-partitions";
     public static final String BYPASS_FEATURE_READER = "bypass-feature-reader";
+    public static final String VCF_HEADER_OVERRIDE = "header";
     public static final int INTERVAL_LIST_SIZE_WARNING_THRESHOLD = 100;
     public static final int ARRAY_COLUMN_BOUNDS_START = 0;
     public static final int ARRAY_COLUMN_BOUNDS_END = 1;
 
     public static final String SHARED_POSIXFS_OPTIMIZATIONS = GenomicsDBArgumentCollection.SHARED_POSIXFS_OPTIMIZATIONS;
     public static final String USE_GCS_HDFS_CONNECTOR = GenomicsDBArgumentCollection.USE_GCS_HDFS_CONNECTOR;
+    public static final String AVOID_NIO = "avoid-nio";
 
     @Argument(fullName = WORKSPACE_ARG_LONG_NAME,
               doc = "Workspace for GenomicsDB. Can be a POSIX file system absolute or relative path or a HDFS/GCS URL. " +
@@ -239,7 +239,7 @@ public final class GenomicsDBImport extends GATKTool {
                     "when using the "+INTERVAL_LIST_LONG_NAME+" option. " +
                     "Either this or "+WORKSPACE_ARG_LONG_NAME+" must be specified. " +
                     "Must point to an existing workspace.",
-              mutex = {WORKSPACE_ARG_LONG_NAME})
+              mutex = {WORKSPACE_ARG_LONG_NAME, VCF_HEADER_OVERRIDE})
     private String incrementalImportWorkspace;
 
     @Argument(fullName = SEGMENT_SIZE_ARG_LONG_NAME,
@@ -254,7 +254,7 @@ public final class GenomicsDBImport extends GATKTool {
                     " data for only a single sample. Either this or " + SAMPLE_NAME_MAP_LONG_NAME +
                     " must be specified.",
               optional = true,
-              mutex = {SAMPLE_NAME_MAP_LONG_NAME})
+              mutex = {SAMPLE_NAME_MAP_LONG_NAME, AVOID_NIO})
     private List variantPaths;
 
     @Argument(fullName = VCF_BUFFER_SIZE_ARG_NAME,
@@ -364,6 +364,13 @@ public final class GenomicsDBImport extends GATKTool {
             optional = true)
     private boolean sharedPosixFSOptimizations = false;
 
+    @Argument(fullName = VCF_HEADER_OVERRIDE,
+        doc = "Specify a vcf file to use instead of reading and combining headers from the input vcfs",
+        optional = true,
+        mutex ={INCREMENTAL_WORKSPACE_ARG_LONG_NAME}
+    )
+    private FeatureInput headerOverride = null;
+
     @Argument(fullName = BYPASS_FEATURE_READER,
             doc = "Use htslib to read input VCFs instead of GATK's FeatureReader. This will reduce memory usage and potentially speed up " +
                   "the import. Lower memory requirements may also enable parallelism through " + MAX_NUM_INTERVALS_TO_IMPORT_IN_PARALLEL +
@@ -371,6 +378,15 @@ public final class GenomicsDBImport extends GATKTool {
             optional = true)
     private boolean bypassFeatureReader = false;
 
+    @Argument(fullName = AVOID_NIO,
+                doc = "Do not attempt to open the input vcf file paths in java.  This can only be used with " + BYPASS_FEATURE_READER
+            + ".  It allows operating on file systems which GenomicsDB understands how to open but GATK does not.  This will disable "
+            + "many of the sanity checks.",
+            mutex = {StandardArgumentDefinitions.VARIANT_LONG_NAME}
+    )
+    @Advanced
+    private boolean avoidNio = false;
+
     @Argument(fullName = USE_GCS_HDFS_CONNECTOR,
             doc = "Use the GCS HDFS Connector instead of the native GCS SDK client with gs:// URLs.",
             optional = true)
@@ -440,10 +456,6 @@ public int getDefaultCloudIndexPrefetchBufferSize() {
     // Path to combined VCF header file to be written by GenomicsDBImporter
     private String vcfHeaderFile;
 
-    // GenomicsDB callset map protobuf structure containing all callset names
-    // used to write the callset json file on traversal success
-    private GenomicsDBCallsetsMapProto.CallsetMappingPB callsetMappingPB;
-
     //in-progress batchCount
     private int batchCount = 1;
 
@@ -463,11 +475,14 @@ public void onStartup() {
         initializeWorkspaceAndToolMode();
         assertVariantPathsOrSampleNameFileWasSpecified();
         assertOverwriteWorkspaceAndIncrementalImportMutuallyExclusive();
+        assertAvoidNioConditionsAreValid();
         initializeHeaderAndSampleMappings();
         initializeIntervals();
         super.onStartup();
     }
 
+
+
     private void initializeWorkspaceAndToolMode() {
         if (incrementalImportWorkspace != null && !incrementalImportWorkspace.isEmpty()) {
             doIncrementalImport = true;
@@ -495,6 +510,24 @@ private void assertVariantPathsOrSampleNameFileWasSpecified(){
         }
     }
 
+    private void assertAvoidNioConditionsAreValid() {
+        if (avoidNio && (!bypassFeatureReader || headerOverride == null) ){
+            final List missing = new ArrayList<>();
+            if(!bypassFeatureReader){
+                missing.add(BYPASS_FEATURE_READER);
+            }
+            if(headerOverride == null){
+                missing.add(VCF_HEADER_OVERRIDE);
+            }
+            final String missingArgs = String.join(" and ", missing);
+
+            // this potentially produces and exception with bad grammar but that's probably ok
+            throw new CommandLineException.MissingArgument(missingArgs, "If --" +AVOID_NIO + " is set then --" + BYPASS_FEATURE_READER
+                    + " and --" + VCF_HEADER_OVERRIDE + " must also be specified.");
+
+        }
+    }
+
     private static void assertIntervalsCoverEntireContigs(GenomicsDBImporter importer,
                                                           List intervals) {
         GenomicsDBVidMapProto.VidMappingPB vidMapPB = importer.getProtobufVidMapping();
@@ -523,32 +556,37 @@ private static void assertIntervalsCoverEntireContigs(GenomicsDBImporter importe
      */
     private void initializeHeaderAndSampleMappings() {
         // Only one of -V and --sampleNameMapFile may be specified
-        if (variantPaths != null && variantPaths.size() > 0) {
+        if (variantPaths != null && !variantPaths.isEmpty()) {
             // -V was specified
             final List headers = new ArrayList<>(variantPaths.size());
             sampleNameMap = new SampleNameMap();
-            for (final String variantPathString : variantPaths) {
-                final Path variantPath = IOUtils.getPath(variantPathString);
-                if (bypassFeatureReader) {
-                    GATKGenomicsDBUtils.assertVariantFileIsCompressedAndIndexed(variantPath);
-                }
-                final  VCFHeader header = getHeaderFromPath(variantPath, null);
-                Utils.validate(header != null, "Null header was found in " + variantPath + ".");
-                assertGVCFHasOnlyOneSample(variantPathString, header);
-                headers.add(header);
 
-                final String sampleName = header.getGenotypeSamples().get(0);
-                try {
-                    sampleNameMap.addSample(sampleName, new URI(variantPathString));
-                }
-                catch(final URISyntaxException e) {
-                    throw new UserException("Malformed URI "+e.toString(), e);
+            if(headerOverride == null) {
+                for (final String variantPathString : variantPaths) {
+                    final Path variantPath = IOUtils.getPath(variantPathString);
+                    if (bypassFeatureReader) { // avoid-nio can't be set here because it requires headerOverride
+                        GATKGenomicsDBUtils.assertVariantFileIsCompressedAndIndexed(variantPath);
+                    }
+                    final VCFHeader header = getHeaderFromPath(variantPath);
+                    Utils.validate(header != null, "Null header was found in " + variantPath + ".");
+                    assertGVCFHasOnlyOneSample(variantPathString, header);
+                    headers.add(header);
+
+                    final String sampleName = header.getGenotypeSamples().get(0);
+                    try {
+                        sampleNameMap.addSample(sampleName, new URI(variantPathString));
+                    } catch (final URISyntaxException e) {
+                        throw new UserException("Malformed URI " + e.getMessage(), e);
+                    }
                 }
+                mergedHeaderLines = VCFUtils.smartMergeHeaders(headers, true);
+                mergedHeaderSequenceDictionary = new VCFHeader(mergedHeaderLines).getSequenceDictionary();
+            } else {
+                final VCFHeader header = getHeaderFromPath(headerOverride.toPath());
+                mergedHeaderLines = new LinkedHashSet<>(header.getMetaDataInInputOrder());
+                mergedHeaderSequenceDictionary = header.getSequenceDictionary();
             }
-            mergedHeaderLines = VCFUtils.smartMergeHeaders(headers, true);
-            mergedHeaderSequenceDictionary = new VCFHeader(mergedHeaderLines).getSequenceDictionary();
             mergedHeaderLines.addAll(getDefaultToolVCFHeaderLines());
-
         } else if (sampleNameMapFile != null) {
             // --sampleNameMap was specified
 
@@ -556,31 +594,34 @@ private void initializeHeaderAndSampleMappings() {
             //the resulting database will have incorrect sample names
             //see https://github.com/broadinstitute/gatk/issues/3682 for more information
             // The SampleNameMap class guarantees that the samples will be sorted correctly.
-            sampleNameMap = new SampleNameMap(IOUtils.getPath(sampleNameMapFile), bypassFeatureReader);
+            sampleNameMap = new SampleNameMap(IOUtils.getPath(sampleNameMapFile),
+                    bypassFeatureReader && !avoidNio);
 
             final String firstSample = sampleNameMap.getSampleNameToVcfPath().entrySet().iterator().next().getKey();
-            final Path firstVCFPath = sampleNameMap.getVCFForSampleAsPath(firstSample);
-            final Path firstVCFIndexPath = sampleNameMap.getVCFIndexForSampleAsPath(firstSample);
-            final VCFHeader header = getHeaderFromPath(firstVCFPath, firstVCFIndexPath);
 
+            final VCFHeader header;
+            if(headerOverride == null){
+                final Path firstVCFPath = sampleNameMap.getVCFForSampleAsPath(firstSample);
+                header = getHeaderFromPath(firstVCFPath);
+            } else {
+                header = getHeaderFromPath(headerOverride.toPath());
+            }
             //getMetaDataInInputOrder() returns an ImmutableSet - LinkedHashSet is mutable and preserves ordering
-            mergedHeaderLines = new LinkedHashSet(header.getMetaDataInInputOrder());
+            mergedHeaderLines = new LinkedHashSet<>(header.getMetaDataInInputOrder());
             mergedHeaderSequenceDictionary = header.getSequenceDictionary();
             mergedHeaderLines.addAll(getDefaultToolVCFHeaderLines());
-        }
-        else if (getIntervalsFromExistingWorkspace){
+        } else if (getIntervalsFromExistingWorkspace){
             final String vcfHeader = IOUtils.appendPathToDir(workspace, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME);
             IOUtils.assertPathsAreReadable(vcfHeader);
             final String header = GenomicsDBUtils.readEntireFile(vcfHeader);
             try {
                 File tempHeader = IOUtils.createTempFile("tempheader", ".vcf");
-                Files.write(tempHeader.toPath(), header.getBytes(StandardCharsets.UTF_8));
+                Files.writeString(tempHeader.toPath(), header);
                 mergedHeaderSequenceDictionary = VCFFileReader.getSequenceDictionary(tempHeader);
             } catch (final IOException e) {
-                throw new UserException("Unable to create temporary header file to get sequence dictionary");
+                throw new UserException("Unable to create temporary header file to get sequence dictionary", e);
             }
-        }
-        else {
+        } else {
             throw new UserException(StandardArgumentDefinitions.VARIANT_LONG_NAME+" or "+
                     SAMPLE_NAME_MAP_LONG_NAME+" must be specified unless "+
                     INTERVAL_LIST_LONG_NAME+" is specified");
@@ -599,8 +640,12 @@ else if (getIntervalsFromExistingWorkspace){
         }
     }
 
-    private VCFHeader getHeaderFromPath(final Path variantPath, final Path variantIndexPath) {
-        try(final FeatureReader reader = getReaderFromPath(variantPath, variantIndexPath)) {
+    private VCFHeader getHeaderFromPath(final Path variantPath) {
+        //TODO make this mangling unecessary
+        final String variantURI = variantPath.toAbsolutePath().toUri().toString();
+        try(final FeatureReader reader = AbstractFeatureReader.getFeatureReader(variantURI, null, new VCFCodec(), false,
+                BucketUtils.getPrefetchingWrapper(cloudPrefetchBuffer),
+                BucketUtils.getPrefetchingWrapper(cloudIndexPrefetchBuffer))) {
             return (VCFHeader) reader.getHeader();
         } catch (final IOException e) {
             throw new UserException("Error while reading vcf header from " + variantPath.toUri(), e);
@@ -633,9 +678,9 @@ private void writeIntervalListToFile() {
     @Override
     public void onTraversalStart() {
         String workspaceDir = overwriteCreateOrCheckWorkspace();
-        vidMapJSONFile = genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME);
-        callsetMapJSONFile = genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME);
-        vcfHeaderFile = genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME);
+        vidMapJSONFile = GATKGenomicsDBUtils.genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME);
+        callsetMapJSONFile = GATKGenomicsDBUtils.genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME);
+        vcfHeaderFile = GATKGenomicsDBUtils.genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME);
         if (getIntervalsFromExistingWorkspace) {
             // intervals may be null if merge-contigs-into-num-partitions was used to create the workspace
             // if so, we need to wait for vid to be generated before writing out the interval list
@@ -775,7 +820,7 @@ private List generateIntervalListFromWorkspace() {
             final int start = Integer.parseInt(partitionInfo[1]);
             final int end = Integer.parseInt(partitionInfo[2]);
             return new SimpleInterval(contig, start, end);
-        }).filter(o -> o != null).collect(Collectors.toList());
+        }).filter(Objects::nonNull).collect(Collectors.toList());
     }
 
     private ImportConfig createImportConfig(final int batchSize) {
@@ -785,7 +830,7 @@ private ImportConfig createImportConfig(final int batchSize) {
                 GenomicsDBImportConfiguration.ImportConfiguration.newBuilder();
         importConfigurationBuilder.addAllColumnPartitions(partitions);
         importConfigurationBuilder.setSizePerColumnPartition(vcfBufferSizePerSample);
-        importConfigurationBuilder.setFailIfUpdating(true && !doIncrementalImport);
+        importConfigurationBuilder.setFailIfUpdating(!doIncrementalImport);
         importConfigurationBuilder.setSegmentSize(segmentSize);
         importConfigurationBuilder.setConsolidateTiledbArrayAfterLoad(doConsolidation);
         importConfigurationBuilder.setEnableSharedPosixfsOptimizations(sharedPosixFSOptimizations);
@@ -936,7 +981,7 @@ private FeatureReader getReaderFromPath(final Path variantPath,
             /* Anonymous FeatureReader subclass that wraps returned iterators to ensure that the GVCFs do not
              * contain MNPs.
              */
-            return new FeatureReader() {
+            return new FeatureReader<>() {
                 /** Iterator that asserts that variants are not MNPs. */
                 class NoMnpIterator implements CloseableTribbleIterator {
                     private final CloseableTribbleIterator inner;
@@ -971,7 +1016,8 @@ public VariantContext next() {
                     return new NoMnpIterator(reader.query(chr, start, end));
                 }
 
-                @Override public CloseableTribbleIterator iterator() throws IOException {
+                @Override
+                public CloseableTribbleIterator iterator() throws IOException {
                     return new NoMnpIterator(reader.iterator());
                 }
             };
@@ -990,7 +1036,7 @@ public VariantContext next() {
      * @return  The workspace directory
      */
     private String overwriteCreateOrCheckWorkspace() {
-        String workspaceDir = genomicsDBGetAbsolutePath(workspace);
+        String workspaceDir = GATKGenomicsDBUtils.genomicsDBGetAbsolutePath(workspace);
         // From JavaDoc for GATKGenomicsDBUtils.createTileDBWorkspacevid
         //   returnCode = 0 : OK. If overwriteExistingWorkspace is true and the workspace exists, it is deleted first.
         //   returnCode = -1 : path was not a directory
@@ -1016,7 +1062,7 @@ private String overwriteCreateOrCheckWorkspace() {
     }
 
     static class UnableToCreateGenomicsDBWorkspace extends UserException {
-        private static final long serialVersionUID = 1L;
+        @Serial private static final long serialVersionUID = 1L;
 
         UnableToCreateGenomicsDBWorkspace(final String message){
             super(message);
@@ -1028,7 +1074,7 @@ static class UnableToCreateGenomicsDBWorkspace extends UserException {
      * dictionary (as returned by {@link #getBestAvailableSequenceDictionary})
      * to parse/verify them. Does nothing if no intervals were specified.
      */
-    protected void initializeIntervals() {
+    void initializeIntervals() {
         if (intervalArgumentCollection.intervalsSpecified()) {
             if (getIntervalsFromExistingWorkspace || doIncrementalImport) {
                 logger.warn(INCREMENTAL_WORKSPACE_ARG_LONG_NAME+" was set, so ignoring specified intervals." +
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java
index 17ba65831ea..f816dc3c3a3 100644
--- a/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java
+++ b/src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java
@@ -101,7 +101,7 @@ public final class GenomicsDBImportIntegrationTest extends CommandLineProgramTes
     //This file was obtained from combined.gatk3.7.g.vcf.gz by dropping all the samples
     private static final String COMBINED_SITES_ONLY = largeFileTestDir + "gvcfs/combined.gatk3.7_sites_only.g.vcf.gz";
     private static final String INTERVAL_PICARD_STYLE_EXPECTED = toolsTestDir + "GenomicsDBImport/interval_expected.interval_list";
-    private static final String MULTIPLE_NON_ADJACENT_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS_PICARD_STYLE_EXPECTED = 
+    private static final String MULTIPLE_NON_ADJACENT_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS_PICARD_STYLE_EXPECTED =
             toolsTestDir + "GenomicsDBImport/multiple_non_adjacent_intervals_combine_gvcfs_expected.interval_list";
     private static final String MERGED_CONTIGS_INTERVAL_PICARD_STYLE_EXPECTED =
             toolsTestDir + "GenomicsDBImport/chr20_chr21_merged_contigs_expected.interval_list";
@@ -383,6 +383,74 @@ public void testGenomicsDbImportThrowsOnMnp() throws IOException {
         }
     }
 
+    @DataProvider
+    public Object[][] getInvalidArgsForAvoidNio(){
+        final ArgumentsBuilder baseArgs = ArgumentsBuilder.create()
+            .add(GenomicsDBImport.WORKSPACE_ARG_LONG_NAME, createTempFile())
+                .addInterval("fake")
+                .addFlag(GenomicsDBImport.AVOID_NIO);
+        return new Object[][]{
+                {baseArgs, CommandLineException.MissingArgument.class}, //no input
+                {baseArgs.copy()
+                        .addVCF("fake.vcf"), CommandLineException.class
+                }, //not allowed with variant, we shoul have some sort of mutex exception...
+                {baseArgs.copy()
+                        .add(GenomicsDBImport.SAMPLE_NAME_MAP_LONG_NAME, "fake.samplenames"), CommandLineException.MissingArgument.class
+                }, //missing header
+                {baseArgs.copy()
+                        .add(GenomicsDBImport.VCF_HEADER_OVERRIDE, "fake.vcf"), CommandLineException.MissingArgument.class
+                }, //missing input
+                {baseArgs.copy()
+                        .add(GenomicsDBImport.VCF_HEADER_OVERRIDE, "fake.vcf")
+                        .addVCF("fake.vcf"), CommandLineException.class // can't use with -V
+                }
+         };
+    }
+
+    @Test(dataProvider = "getInvalidArgsForAvoidNio")
+    public void testInvalidArgumentCombinationsWithAvoidNio(ArgumentsBuilder args, Class expectedException){
+         Assert.assertThrows(expectedException, () -> runCommandLine(args));
+    }
+
+    /*
+     * this is a test that can be run locally if you enable it and fill in the SAS token with one from
+     * https://app.terra.bio/#workspaces/axin-pipeline-testing-20230927/gatk-azure-testing
+     *
+     * it's basically an example of how to run the tool on azure
+     *
+     * note that the http url for the file azure files looks like this:
+     *
+     * https://.blob.core.windows.net//?
+     * the SAS token includes the '?' generally
+     *
+     * to restructure into an az:// link you move the username
+     * az://@blob.core.windows.new/
+     *
+     */
+    @Test(enabled = false, groups={"cloud","azure"})
+    public void testImportFromAzure(){
+
+        final String SAS_TOKEN="put a sas token in me";
+
+        final String workspace = createTempDir("genomicsdb-tests-").getAbsolutePath() + "/workspace";
+        final String sample = "NA19625";
+        final String azLocation = "az://lzb25a77f5eadb0fa72a2ae7.blob.core.windows.net/sc-62528cd7-3299-4440-8c17-10f458e589d3/NA19625.g.vcf.gz";
+        final String sampleMapText = String.format("%s\t%s\n", sample, azLocation);
+        final File sampleMappingFile = IOUtils.writeTempFile(sampleMapText, "sampleMapping", ".txt");
+
+        final ArgumentsBuilder args = ArgumentsBuilder.create()
+                .add(GenomicsDBImport.WORKSPACE_ARG_LONG_NAME, workspace)
+                .addInterval("chr20")
+                .addFlag(GenomicsDBImport.AVOID_NIO)
+                .add(GenomicsDBImport.SAMPLE_NAME_MAP_LONG_NAME, sampleMappingFile)
+                .addFlag(GenomicsDBImport.BYPASS_FEATURE_READER)
+                .add(GenomicsDBImport.VCF_HEADER_OVERRIDE, GENOMICSDB_TEST_DIR + "azureHeader.vcf");
+        Map environment = new HashMap<>(System.getenv());
+        final String sasTokenEnvVariable = "AZURE_STORAGE_SAS_TOKEN";
+        environment.put(sasTokenEnvVariable, SAS_TOKEN);
+        runToolInNewJVM(GenomicsDBImport.class.getSimpleName(), args, environment);
+    }
+
     private void testGenomicsDBImporterWithGenotypes(final List vcfInputs, final List intervals,
                                                      final String expectedCombinedVCF,
                                                       final String referenceFile) throws IOException {
@@ -408,7 +476,7 @@ private void testGenomicsDBImporterWithGenotypes(final List vcfInputs, f
                                                      final boolean testAll,
                                                      final boolean produceGTField,
                                                      final boolean sitesOnlyQuery) throws IOException {
-         testGenomicsDBImporterWithGenotypes(vcfInputs, intervals, expectedCombinedVCF, referenceFile, testAll, produceGTField, 
+         testGenomicsDBImporterWithGenotypes(vcfInputs, intervals, expectedCombinedVCF, referenceFile, testAll, produceGTField,
                  sitesOnlyQuery, false);
     }
 
@@ -456,11 +524,11 @@ private void testGenomicsDBAgainstCombineGVCFs(final List vcfInputs, fin
 
     private void testGenomicsDBAgainstCombineGVCFs(final List vcfInputs, final List intervals,
                                                    final String referenceFile, final String[] CombineGVCFArgs,
-                                                   final int numVCFReaderThreadsInImporter, final int chrsToPartitions, 
+                                                   final int numVCFReaderThreadsInImporter, final int chrsToPartitions,
                                                    final boolean useNativeReader) throws IOException {
         final String workspace = createTempDir("genomicsdb-tests-").getAbsolutePath() + "/workspace";
 
-        writeToGenomicsDB(vcfInputs, intervals, workspace, 0, false, 0, numVCFReaderThreadsInImporter, false, false, false, 
+        writeToGenomicsDB(vcfInputs, intervals, workspace, 0, false, 0, numVCFReaderThreadsInImporter, false, false, false,
                           chrsToPartitions, useNativeReader);
         checkJSONFilesAreWritten(workspace);
         for(SimpleInterval currInterval : intervals) {
@@ -504,7 +572,7 @@ public void testGenomicsDBAlleleSpecificAnnotationsInTheMiddleOfSpanningDeletion
 
     @Test
     public void testGenomicsDBNoRemapMissingToNonRef() throws IOException {
-        testGenomicsDBAgainstCombineGVCFs(Arrays.asList(COMBINEGVCFS_TEST_DIR+"NA12878.AS.NON_REF_remap_check.chr20snippet.g.vcf", 
+        testGenomicsDBAgainstCombineGVCFs(Arrays.asList(COMBINEGVCFS_TEST_DIR+"NA12878.AS.NON_REF_remap_check.chr20snippet.g.vcf",
                 COMBINEGVCFS_TEST_DIR+"NA12892.AS.chr20snippet.g.vcf"),
                 new ArrayList(Arrays.asList(new SimpleInterval("20", 10433313, 10700000))),
                 b37_reference_20_21,
@@ -671,14 +739,14 @@ private void writeToGenomicsDB(final List vcfInputs, final List vcfInputs, final List intervals, final String workspace,
-                                   final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads, 
+                                   final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads,
                                    final boolean mergeIntervals, final boolean overwriteWorkspace, final boolean incremental) {
-        writeToGenomicsDB(vcfInputs, intervals, workspace, batchSize, useBufferSize, bufferSizePerSample, threads, mergeIntervals, 
+        writeToGenomicsDB(vcfInputs, intervals, workspace, batchSize, useBufferSize, bufferSizePerSample, threads, mergeIntervals,
                           overwriteWorkspace, incremental, 0, false);
     }
 
     private void writeToGenomicsDB(final List vcfInputs, final List intervals, final String workspace,
-                                   final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads, 
+                                   final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads,
                                    final boolean mergeIntervals, final boolean overwriteWorkspace, final boolean incremental,
                                    final int chrsToPartitions, final boolean useNativeReader) {
         final ArgumentsBuilder args = new ArgumentsBuilder();
@@ -1013,7 +1081,7 @@ public Object[][] dataForTestExplicitIndicesInSampleNameMapInTheCloud() {
         final String NA19625_UNCOMPRESSED_WITH_INDEX = GVCFS_WITH_INDICES_BUCKET + "NA19625.g.vcf";
         final String NA19625_UNCOMPRESSED_NO_INDEX = GVCFS_WITHOUT_INDICES_BUCKET + "NA19625.g.vcf";
         final String NA19625_UNCOMPRESSED_INDEX = GVCF_INDICES_ONLY_BUCKET + "NA19625.g.vcf.idx";
-        
+
         return new Object[][] {
                 // All VCFs have explicit indices, samples in order, TABIX index
                 {
@@ -1371,7 +1439,7 @@ public void testIncrementalMustHaveExistingWorkspace() {
         writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace + "workspace2", 0, false, 0, 1, false, false, true);
     }
 
-    private void testIncrementalImport(final int stepSize, final List intervals, final String workspace, 
+    private void testIncrementalImport(final int stepSize, final List intervals, final String workspace,
                                        final int batchSize, final boolean produceGTField, final boolean useVCFCodec, final String expected,
                                        final int chrsToPartitions, final boolean useNativeReader) throws IOException {
         testIncrementalImport(stepSize, intervals, workspace, batchSize, produceGTField, useVCFCodec, expected,
@@ -1384,7 +1452,7 @@ private void testIncrementalImport(final int stepSize, final List 0 && useNativeReader));
             checkJSONFilesAreWritten(workspace);
         }
@@ -1434,7 +1502,7 @@ public void testGenomicsDBIncrementalAndBatchSize1WithNonAdjacentIntervalsNative
 
     @Test(expectedExceptions = {UserException.class}, expectedExceptionsMessageRegExp=".*must be block compressed.*")
     public void testGenomicsDBImportNativeReaderNoCompressedVcf() throws IOException {
-        testGenomicsDBImporterWithGenotypes(Arrays.asList(NA_12878_PHASED), MULTIPLE_INTERVALS, NA_12878_PHASED, b37_reference_20_21, 
+        testGenomicsDBImporterWithGenotypes(Arrays.asList(NA_12878_PHASED), MULTIPLE_INTERVALS, NA_12878_PHASED, b37_reference_20_21,
                 false, true, false, true);
     }
 
@@ -1448,14 +1516,14 @@ public void testGenomicsDBIncrementalAndBatchSize1WithNonAdjacentIntervalsMergeC
     @Test
     public void testGenomicsDBIncrementalAndBatchSize2() throws IOException {
         final String workspace = createTempDir("genomicsdb-incremental-tests").getAbsolutePath() + "/workspace";
-        testIncrementalImport(2, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, false, 
+        testIncrementalImport(2, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, false,
                               COMBINED_WITH_GENOTYPES, 0, false);
     }
 
     @Test
     public void testGenomicsDBMultipleIncrementalImports() throws IOException {
         final String workspace = createTempDir("genomicsdb-incremental-tests").getAbsolutePath() + "/workspace";
-        testIncrementalImport(1, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, true, 
+        testIncrementalImport(1, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, true,
                               COMBINED_WITH_GENOTYPES, 0, false);
     }
 
diff --git a/src/test/java/org/broadinstitute/hellbender/tools/spark/PileupSparkIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/spark/PileupSparkIntegrationTest.java
index 656000c25d9..98995ad319a 100644
--- a/src/test/java/org/broadinstitute/hellbender/tools/spark/PileupSparkIntegrationTest.java
+++ b/src/test/java/org/broadinstitute/hellbender/tools/spark/PileupSparkIntegrationTest.java
@@ -25,15 +25,15 @@ public Object[][] shuffleParameters() {
         return new Object[][] { { false }, { true } };
     }
 
-    private File createTempFile() throws IOException {
+    private File createAndDeleteTempFile() {
         final File out = IOUtils.createTempFile("out", ".txt");
         out.delete();
         return out;
     }
-
+    
     @Test(dataProvider = "shuffle")
     public void testSimplePileup(boolean useShuffle) throws Exception {
-        final File out = createTempFile();
+        final File out = createAndDeleteTempFile();
         final ArgumentsBuilder args = new ArgumentsBuilder();
         args.addRaw("--input");
         args.addRaw(NA12878_20_21_WGS_bam);
@@ -53,7 +53,7 @@ public void testSimplePileup(boolean useShuffle) throws Exception {
 
     @Test(dataProvider = "shuffle")
     public void testVerbosePileup(boolean useShuffle) throws Exception {
-        final File out = createTempFile();
+        final File out = createAndDeleteTempFile();
         final ArgumentsBuilder args = new ArgumentsBuilder();
         args.addRaw("--input");
         args.addRaw(NA12878_20_21_WGS_bam);
@@ -74,7 +74,7 @@ public void testVerbosePileup(boolean useShuffle) throws Exception {
 
     @Test(dataProvider = "shuffle")
     public void testFeaturesPileup(boolean useShuffle) throws Exception {
-        final File out = createTempFile();
+        final File out = createAndDeleteTempFile();
         final ArgumentsBuilder args = new ArgumentsBuilder();
         args.addRaw("--input");
         args.addRaw(NA12878_20_21_WGS_bam);
@@ -95,7 +95,7 @@ public void testFeaturesPileup(boolean useShuffle) throws Exception {
 
     @Test(dataProvider = "shuffle")
     public void testInsertLengthPileup(boolean useShuffle) throws Exception {
-        final File out = createTempFile();
+        final File out = createAndDeleteTempFile();
         final ArgumentsBuilder args = new ArgumentsBuilder();
         args.addRaw("--input");
         args.addRaw(NA12878_20_21_WGS_bam);
@@ -128,7 +128,7 @@ public void testFeaturesPileupHdfs(boolean useShuffle) throws Exception {
             cluster.getFileSystem().copyFromLocalFile(new Path(dbsnp_138_b37_20_21_vcf), vcfPath);
             cluster.getFileSystem().copyFromLocalFile(new Path(dbsnp_138_b37_20_21_vcf + ".idx"), idxPath);
 
-            final File out = createTempFile();
+            final File out = createAndDeleteTempFile();
             final ArgumentsBuilder args = new ArgumentsBuilder();
             args.addRaw("--input");
             args.addRaw(NA12878_20_21_WGS_bam);
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/GenomicsDBImport/azureHeader.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/GenomicsDBImport/azureHeader.vcf
new file mode 100644
index 00000000000..6c0a9fcb965
--- /dev/null
+++ b/src/test/resources/org/broadinstitute/hellbender/tools/GenomicsDBImport/azureHeader.vcf
@@ -0,0 +1,95 @@
+##fileformat=VCFv4.2
+##ALT=
+##FILTER=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##FORMAT=
+##GATKCommandLine.HaplotypeCaller=
+##GVCFBlock0-1=minGQ=0(inclusive),maxGQ=1(exclusive)
+##GVCFBlock1-2=minGQ=1(inclusive),maxGQ=2(exclusive)
+##GVCFBlock10-11=minGQ=10(inclusive),maxGQ=11(exclusive)
+##GVCFBlock11-12=minGQ=11(inclusive),maxGQ=12(exclusive)
+##GVCFBlock12-13=minGQ=12(inclusive),maxGQ=13(exclusive)
+##GVCFBlock13-14=minGQ=13(inclusive),maxGQ=14(exclusive)
+##GVCFBlock14-15=minGQ=14(inclusive),maxGQ=15(exclusive)
+##GVCFBlock15-16=minGQ=15(inclusive),maxGQ=16(exclusive)
+##GVCFBlock16-17=minGQ=16(inclusive),maxGQ=17(exclusive)
+##GVCFBlock17-18=minGQ=17(inclusive),maxGQ=18(exclusive)
+##GVCFBlock18-19=minGQ=18(inclusive),maxGQ=19(exclusive)
+##GVCFBlock19-20=minGQ=19(inclusive),maxGQ=20(exclusive)
+##GVCFBlock2-3=minGQ=2(inclusive),maxGQ=3(exclusive)
+##GVCFBlock20-21=minGQ=20(inclusive),maxGQ=21(exclusive)
+##GVCFBlock21-22=minGQ=21(inclusive),maxGQ=22(exclusive)
+##GVCFBlock22-23=minGQ=22(inclusive),maxGQ=23(exclusive)
+##GVCFBlock23-24=minGQ=23(inclusive),maxGQ=24(exclusive)
+##GVCFBlock24-25=minGQ=24(inclusive),maxGQ=25(exclusive)
+##GVCFBlock25-26=minGQ=25(inclusive),maxGQ=26(exclusive)
+##GVCFBlock26-27=minGQ=26(inclusive),maxGQ=27(exclusive)
+##GVCFBlock27-28=minGQ=27(inclusive),maxGQ=28(exclusive)
+##GVCFBlock28-29=minGQ=28(inclusive),maxGQ=29(exclusive)
+##GVCFBlock29-30=minGQ=29(inclusive),maxGQ=30(exclusive)
+##GVCFBlock3-4=minGQ=3(inclusive),maxGQ=4(exclusive)
+##GVCFBlock30-31=minGQ=30(inclusive),maxGQ=31(exclusive)
+##GVCFBlock31-32=minGQ=31(inclusive),maxGQ=32(exclusive)
+##GVCFBlock32-33=minGQ=32(inclusive),maxGQ=33(exclusive)
+##GVCFBlock33-34=minGQ=33(inclusive),maxGQ=34(exclusive)
+##GVCFBlock34-35=minGQ=34(inclusive),maxGQ=35(exclusive)
+##GVCFBlock35-36=minGQ=35(inclusive),maxGQ=36(exclusive)
+##GVCFBlock36-37=minGQ=36(inclusive),maxGQ=37(exclusive)
+##GVCFBlock37-38=minGQ=37(inclusive),maxGQ=38(exclusive)
+##GVCFBlock38-39=minGQ=38(inclusive),maxGQ=39(exclusive)
+##GVCFBlock39-40=minGQ=39(inclusive),maxGQ=40(exclusive)
+##GVCFBlock4-5=minGQ=4(inclusive),maxGQ=5(exclusive)
+##GVCFBlock40-41=minGQ=40(inclusive),maxGQ=41(exclusive)
+##GVCFBlock41-42=minGQ=41(inclusive),maxGQ=42(exclusive)
+##GVCFBlock42-43=minGQ=42(inclusive),maxGQ=43(exclusive)
+##GVCFBlock43-44=minGQ=43(inclusive),maxGQ=44(exclusive)
+##GVCFBlock44-45=minGQ=44(inclusive),maxGQ=45(exclusive)
+##GVCFBlock45-46=minGQ=45(inclusive),maxGQ=46(exclusive)
+##GVCFBlock46-47=minGQ=46(inclusive),maxGQ=47(exclusive)
+##GVCFBlock47-48=minGQ=47(inclusive),maxGQ=48(exclusive)
+##GVCFBlock48-49=minGQ=48(inclusive),maxGQ=49(exclusive)
+##GVCFBlock49-50=minGQ=49(inclusive),maxGQ=50(exclusive)
+##GVCFBlock5-6=minGQ=5(inclusive),maxGQ=6(exclusive)
+##GVCFBlock50-51=minGQ=50(inclusive),maxGQ=51(exclusive)
+##GVCFBlock51-52=minGQ=51(inclusive),maxGQ=52(exclusive)
+##GVCFBlock52-53=minGQ=52(inclusive),maxGQ=53(exclusive)
+##GVCFBlock53-54=minGQ=53(inclusive),maxGQ=54(exclusive)
+##GVCFBlock54-55=minGQ=54(inclusive),maxGQ=55(exclusive)
+##GVCFBlock55-56=minGQ=55(inclusive),maxGQ=56(exclusive)
+##GVCFBlock56-57=minGQ=56(inclusive),maxGQ=57(exclusive)
+##GVCFBlock57-58=minGQ=57(inclusive),maxGQ=58(exclusive)
+##GVCFBlock58-59=minGQ=58(inclusive),maxGQ=59(exclusive)
+##GVCFBlock59-60=minGQ=59(inclusive),maxGQ=60(exclusive)
+##GVCFBlock6-7=minGQ=6(inclusive),maxGQ=7(exclusive)
+##GVCFBlock60-70=minGQ=60(inclusive),maxGQ=70(exclusive)
+##GVCFBlock7-8=minGQ=7(inclusive),maxGQ=8(exclusive)
+##GVCFBlock70-80=minGQ=70(inclusive),maxGQ=80(exclusive)
+##GVCFBlock8-9=minGQ=8(inclusive),maxGQ=9(exclusive)
+##GVCFBlock80-90=minGQ=80(inclusive),maxGQ=90(exclusive)
+##GVCFBlock9-10=minGQ=9(inclusive),maxGQ=10(exclusive)
+##GVCFBlock90-99=minGQ=90(inclusive),maxGQ=99(exclusive)
+##GVCFBlock99-2147483647=minGQ=99(inclusive),maxGQ=2147483647(exclusive)
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##INFO=
+##contig=
+##contig=
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA19625
diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/GenomicsDBImport/azureSampleNameMap.txt b/src/test/resources/org/broadinstitute/hellbender/tools/GenomicsDBImport/azureSampleNameMap.txt
new file mode 100644
index 00000000000..6d2f0566d63
--- /dev/null
+++ b/src/test/resources/org/broadinstitute/hellbender/tools/GenomicsDBImport/azureSampleNameMap.txt
@@ -0,0 +1 @@
+NA19625	az://sc-62528cd7-3299-4440-8c17-10f458e589d3@lzb25a77f5eadb0fa72a2ae7.blob.core.windows.net/NA19625.g.vcf.gz
\ No newline at end of file
diff --git a/src/testUtils/java/org/broadinstitute/hellbender/testutils/ArgumentsBuilder.java b/src/testUtils/java/org/broadinstitute/hellbender/testutils/ArgumentsBuilder.java
index 289cae67008..f343ec74c55 100644
--- a/src/testUtils/java/org/broadinstitute/hellbender/testutils/ArgumentsBuilder.java
+++ b/src/testUtils/java/org/broadinstitute/hellbender/testutils/ArgumentsBuilder.java
@@ -24,6 +24,15 @@ public final class ArgumentsBuilder {
 
     public ArgumentsBuilder(){}
 
+    /**
+     * static factory to allow fluent style creation
+     * create().add()...
+     * @return new ArgumentsBuilder
+     */
+    public static ArgumentsBuilder create(){
+        return new ArgumentsBuilder();
+    }
+
     public ArgumentsBuilder(Object[] args){
         for (Object arg: args){
             if (arg instanceof String){
@@ -34,6 +43,22 @@ public ArgumentsBuilder(Object[] args){
         }
     }
 
+    /**
+     * Concatenate the arguments from other onto the end of this builder
+     * @return this builder combined with other
+     */
+    public ArgumentsBuilder concat(ArgumentsBuilder other){
+        this.args.addAll(other.args);
+        return this;
+    }
+
+    /**
+     * @return a copy of this builder
+     */
+    public ArgumentsBuilder copy(){
+        return ArgumentsBuilder.create().concat(this);
+    }
+
     /**
      * Add a string to the arguments list
      * Strings are processed specially, they are reformatted to match the new unix style arguments
diff --git a/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java b/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java
index 7eadf5aa2a1..43581e2b330 100644
--- a/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java
+++ b/src/testUtils/java/org/broadinstitute/hellbender/testutils/BaseTest.java
@@ -88,6 +88,19 @@ public static void runProcess(final ProcessController processController, final S
      * @param arguments arguments to provide to the tool
      */
     public static void runToolInNewJVM(String toolName, ArgumentsBuilder arguments){
+        runToolInNewJVM(toolName, arguments, System.getenv());
+    }
+
+    /**
+     * Spawn a new jvm with the same classpath as this one and run a gatk CommandLineProgram
+     * This is useful for running tests that require changing static state that is not allowed to change during
+     * a tool run but which needs to be changed to test some condition.
+     *
+     * @param toolName CommandLineProgram to run
+     * @param arguments arguments to provide to the tool
+     * @param environment a map of key-value pairs which will be used as to set the System environment
+     */
+    public static void runToolInNewJVM(String toolName, ArgumentsBuilder arguments, Map environment){
         final String javaHome = System.getProperty("java.home");
         final String javaBin = javaHome + File.separator + "bin" + File.separator + "java";
         final String classpath = System.getProperty("java.class.path");;
@@ -97,8 +110,8 @@ public static void runToolInNewJVM(String toolName, ArgumentsBuilder arguments){
                 Main.class.getName(),
                 toolName));
         baseCommand.addAll(arguments.getArgsList());
-
-        runProcess(ProcessController.getThreadLocal(), baseCommand.toArray(new String[0]));
+        runProcess(ProcessController.getThreadLocal(), baseCommand.toArray(new String[0]), environment,
+                "Java exited with non-zero value. Command: "+ String.join(" ",baseCommand) + "\n");
     }
 
     @BeforeSuite
@@ -294,6 +307,13 @@ public static File createTempFile(final String name, final String extension) {
         return IOUtils.createTempFile(name, extension);
     }
 
+    /**
+     * Create a temp file with an arbitrary name and extension
+     */
+    public static File createTempFile(){
+        return createTempFile("default", ".tmp");
+    }
+
     /**
      * Creates a temp path that will be deleted on exit after tests are complete.
      *
@@ -346,6 +366,15 @@ public static File createTempDir(final String prefix){
         return IOUtils.createTempDir(prefix);
     }
 
+    /**
+     * Creates an empty temp directory which will be deleted on exit after tests are complete
+     *
+     * @return an empty directory will be deleted after the program exits
+     */
+    public static File createTempDir(){
+        return createTempDir("tmp");
+    }
+
     /**
      * Log this message so that it shows up inline during output as well as in html reports
      */
diff --git a/src/testUtils/java/org/broadinstitute/hellbender/testutils/CommandLineProgramTester.java b/src/testUtils/java/org/broadinstitute/hellbender/testutils/CommandLineProgramTester.java
index 0fd63c50a0f..509aed3cc0a 100644
--- a/src/testUtils/java/org/broadinstitute/hellbender/testutils/CommandLineProgramTester.java
+++ b/src/testUtils/java/org/broadinstitute/hellbender/testutils/CommandLineProgramTester.java
@@ -9,6 +9,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
 
 /**
  * Utility interface for CommandLine Program testing. API users that have their own Main implementation