Skip to content

Commit

Permalink
adding negative test for bad combinations of avoidnio and other args
Browse files Browse the repository at this point in the history
  • Loading branch information
lbergelson committed Nov 16, 2023
1 parent 15516a9 commit 8fd8e7f
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import org.genomicsdb.importer.GenomicsDBImporter;
import org.genomicsdb.model.BatchCompletionCallbackFunctionArgument;
import org.genomicsdb.model.Coordinates;
import org.genomicsdb.model.GenomicsDBCallsetsMapProto;
import org.genomicsdb.model.GenomicsDBImportConfiguration;
import org.genomicsdb.model.GenomicsDBVidMapProto;
import org.genomicsdb.model.ImportConfig;
Expand All @@ -50,16 +49,17 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.Arrays;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
Expand All @@ -68,9 +68,6 @@
import java.util.concurrent.ThreadFactory;
import java.util.stream.Collectors;

import static org.broadinstitute.hellbender.tools.genomicsdb.GATKGenomicsDBUtils.genomicsDBGetAbsolutePath;
import static org.broadinstitute.hellbender.tools.genomicsdb.GATKGenomicsDBUtils.genomicsDBApppendPaths;

/**
* Import single-sample GVCFs into GenomicsDB before joint genotyping.
*
Expand Down Expand Up @@ -219,7 +216,7 @@ public final class GenomicsDBImport extends GATKTool {
public static final String MAX_NUM_INTERVALS_TO_IMPORT_IN_PARALLEL = "max-num-intervals-to-import-in-parallel";
public static final String MERGE_CONTIGS_INTO_NUM_PARTITIONS = "merge-contigs-into-num-partitions";
public static final String BYPASS_FEATURE_READER = "bypass-feature-reader";
public static final String VCF_HEADER = "header";
public static final String VCF_HEADER_OVERRIDE = "header";
public static final int INTERVAL_LIST_SIZE_WARNING_THRESHOLD = 100;
public static final int ARRAY_COLUMN_BOUNDS_START = 0;
public static final int ARRAY_COLUMN_BOUNDS_END = 1;
Expand All @@ -242,7 +239,7 @@ public final class GenomicsDBImport extends GATKTool {
"when using the "+INTERVAL_LIST_LONG_NAME+" option. " +
"Either this or "+WORKSPACE_ARG_LONG_NAME+" must be specified. " +
"Must point to an existing workspace.",
mutex = {WORKSPACE_ARG_LONG_NAME, VCF_HEADER})
mutex = {WORKSPACE_ARG_LONG_NAME, VCF_HEADER_OVERRIDE})
private String incrementalImportWorkspace;

@Argument(fullName = SEGMENT_SIZE_ARG_LONG_NAME,
Expand All @@ -257,7 +254,7 @@ public final class GenomicsDBImport extends GATKTool {
" data for only a single sample. Either this or " + SAMPLE_NAME_MAP_LONG_NAME +
" must be specified.",
optional = true,
mutex = {SAMPLE_NAME_MAP_LONG_NAME})
mutex = {SAMPLE_NAME_MAP_LONG_NAME, AVOID_NIO})
private List<String> variantPaths;

@Argument(fullName = VCF_BUFFER_SIZE_ARG_NAME,
Expand Down Expand Up @@ -367,7 +364,7 @@ public final class GenomicsDBImport extends GATKTool {
optional = true)
private boolean sharedPosixFSOptimizations = false;

@Argument(fullName = VCF_HEADER,
@Argument(fullName = VCF_HEADER_OVERRIDE,
doc = "Specify a vcf file to use instead of reading and combining headers from the input vcfs",
optional = true,
mutex ={INCREMENTAL_WORKSPACE_ARG_LONG_NAME}
Expand All @@ -384,18 +381,20 @@ public final class GenomicsDBImport extends GATKTool {
@Argument(fullName = AVOID_NIO,
doc = "Do not attempt to open the input vcf file paths in java. This can only be used with " + BYPASS_FEATURE_READER
+ ". It allows operating on file systems which GenomicsDB understands how to open but GATK does not. This will disable "
+ "many of the sanity checks."
+ "many of the sanity checks.",
mutex = {StandardArgumentDefinitions.VARIANT_LONG_NAME}
)
@Advanced
private boolean avoidNio = false;

@Argument(fullName = USE_GCS_HDFS_CONNECTOR,
doc = "Use the GCS HDFS Connector instead of the native GCS SDK client with gs:// URLs.",
optional = true)
public boolean useGcsHdfsConnector = false;

//executor service used when vcfInitializerThreads > 1
private ExecutorService inputPreloadExecutorService;

/**
* Get the largest interval per contig that contains the intervals specified on the command line.
* @param getIntervals intervals to be transformed
Expand Down Expand Up @@ -457,10 +456,6 @@ public int getDefaultCloudIndexPrefetchBufferSize() {
// Path to combined VCF header file to be written by GenomicsDBImporter
private String vcfHeaderFile;

// GenomicsDB callset map protobuf structure containing all callset names
// used to write the callset json file on traversal success
private GenomicsDBCallsetsMapProto.CallsetMappingPB callsetMappingPB;

//in-progress batchCount
private int batchCount = 1;

Expand Down Expand Up @@ -517,8 +512,19 @@ private void assertVariantPathsOrSampleNameFileWasSpecified(){

private void assertAvoidNioConditionsAreValid() {
if (avoidNio && (!bypassFeatureReader || headerOverride == null) ){
throw new CommandLineException("If --" +AVOID_NIO + " is set then --" + BYPASS_FEATURE_READER
+ " and --" + VCF_HEADER + " must also be specified.");
final List<String> missing = new ArrayList<>();
if(!bypassFeatureReader){
missing.add(BYPASS_FEATURE_READER);
}
if(headerOverride == null){
missing.add(VCF_HEADER_OVERRIDE);
}
final String missingArgs = String.join(" and ", missing);

// this potentially produces and exception with bad grammar but that's probably ok
throw new CommandLineException.MissingArgument(missingArgs, "If --" +AVOID_NIO + " is set then --" + BYPASS_FEATURE_READER
+ " and --" + VCF_HEADER_OVERRIDE + " must also be specified.");

}
}

Expand Down Expand Up @@ -550,7 +556,7 @@ private static void assertIntervalsCoverEntireContigs(GenomicsDBImporter importe
*/
private void initializeHeaderAndSampleMappings() {
// Only one of -V and --sampleNameMapFile may be specified
if (variantPaths != null && variantPaths.size() > 0) {
if (variantPaths != null && !variantPaths.isEmpty()) {
// -V was specified
final List<VCFHeader> headers = new ArrayList<>(variantPaths.size());
sampleNameMap = new SampleNameMap();
Expand Down Expand Up @@ -669,9 +675,9 @@ private void writeIntervalListToFile() {
@Override
public void onTraversalStart() {
String workspaceDir = overwriteCreateOrCheckWorkspace();
vidMapJSONFile = genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME);
callsetMapJSONFile = genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME);
vcfHeaderFile = genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME);
vidMapJSONFile = GATKGenomicsDBUtils.genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME);
callsetMapJSONFile = GATKGenomicsDBUtils.genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME);
vcfHeaderFile = GATKGenomicsDBUtils.genomicsDBApppendPaths(workspaceDir, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME);
if (getIntervalsFromExistingWorkspace) {
// intervals may be null if merge-contigs-into-num-partitions was used to create the workspace
// if so, we need to wait for vid to be generated before writing out the interval list
Expand Down Expand Up @@ -811,7 +817,7 @@ private List<SimpleInterval> generateIntervalListFromWorkspace() {
final int start = Integer.parseInt(partitionInfo[1]);
final int end = Integer.parseInt(partitionInfo[2]);
return new SimpleInterval(contig, start, end);
}).filter(o -> o != null).collect(Collectors.toList());
}).filter(Objects::nonNull).collect(Collectors.toList());
}

private ImportConfig createImportConfig(final int batchSize) {
Expand Down Expand Up @@ -1027,7 +1033,7 @@ public CloseableTribbleIterator<VariantContext> iterator() throws IOException {
* @return The workspace directory
*/
private String overwriteCreateOrCheckWorkspace() {
String workspaceDir = genomicsDBGetAbsolutePath(workspace);
String workspaceDir = GATKGenomicsDBUtils.genomicsDBGetAbsolutePath(workspace);
// From JavaDoc for GATKGenomicsDBUtils.createTileDBWorkspacevid
// returnCode = 0 : OK. If overwriteExistingWorkspace is true and the workspace exists, it is deleted first.
// returnCode = -1 : path was not a directory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public final class GenomicsDBImportIntegrationTest extends CommandLineProgramTes
//This file was obtained from combined.gatk3.7.g.vcf.gz by dropping all the samples
private static final String COMBINED_SITES_ONLY = largeFileTestDir + "gvcfs/combined.gatk3.7_sites_only.g.vcf.gz";
private static final String INTERVAL_PICARD_STYLE_EXPECTED = toolsTestDir + "GenomicsDBImport/interval_expected.interval_list";
private static final String MULTIPLE_NON_ADJACENT_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS_PICARD_STYLE_EXPECTED =
private static final String MULTIPLE_NON_ADJACENT_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS_PICARD_STYLE_EXPECTED =
toolsTestDir + "GenomicsDBImport/multiple_non_adjacent_intervals_combine_gvcfs_expected.interval_list";
private static final String MERGED_CONTIGS_INTERVAL_PICARD_STYLE_EXPECTED =
toolsTestDir + "GenomicsDBImport/chr20_chr21_merged_contigs_expected.interval_list";
Expand Down Expand Up @@ -383,6 +383,35 @@ public void testGenomicsDbImportThrowsOnMnp() throws IOException {
}
}

@DataProvider
public Object[][] getInvalidArgsForAvoidNio(){
final ArgumentsBuilder baseArgs = ArgumentsBuilder.create()
.add(GenomicsDBImport.WORKSPACE_ARG_LONG_NAME, createTempFile())
.addInterval("fake")
.addFlag(GenomicsDBImport.AVOID_NIO);
return new Object[][]{
{baseArgs, CommandLineException.MissingArgument.class}, //no input
{baseArgs.copy()
.addVCF("fake.vcf"), CommandLineException.class
}, //not allowed with variant, we shoul have some sort of mutex exception...
{baseArgs.copy()
.add(GenomicsDBImport.SAMPLE_NAME_MAP_LONG_NAME, "fake.samplenames"), CommandLineException.MissingArgument.class
}, //missing header
{baseArgs.copy()
.add(GenomicsDBImport.VCF_HEADER_OVERRIDE, "fake.vcf"), CommandLineException.MissingArgument.class
}, //missing input
{baseArgs.copy()
.add(GenomicsDBImport.VCF_HEADER_OVERRIDE, "fake.vcf")
.addVCF("fake.vcf"), CommandLineException.class // can't use with -V
}
};
}

@Test(dataProvider = "getInvalidArgsForAvoidNio")
public void testInvalidArgumentCombinationsWithAvoidNio(ArgumentsBuilder args, Class<? extends Exception> expectedException){
Assert.assertThrows(expectedException, () -> runCommandLine(args));
}

private void testGenomicsDBImporterWithGenotypes(final List<String> vcfInputs, final List<SimpleInterval> intervals,
final String expectedCombinedVCF,
final String referenceFile) throws IOException {
Expand All @@ -408,7 +437,7 @@ private void testGenomicsDBImporterWithGenotypes(final List<String> vcfInputs, f
final boolean testAll,
final boolean produceGTField,
final boolean sitesOnlyQuery) throws IOException {
testGenomicsDBImporterWithGenotypes(vcfInputs, intervals, expectedCombinedVCF, referenceFile, testAll, produceGTField,
testGenomicsDBImporterWithGenotypes(vcfInputs, intervals, expectedCombinedVCF, referenceFile, testAll, produceGTField,
sitesOnlyQuery, false);
}

Expand Down Expand Up @@ -456,11 +485,11 @@ private void testGenomicsDBAgainstCombineGVCFs(final List<String> vcfInputs, fin

private void testGenomicsDBAgainstCombineGVCFs(final List<String> vcfInputs, final List<SimpleInterval> intervals,
final String referenceFile, final String[] CombineGVCFArgs,
final int numVCFReaderThreadsInImporter, final int chrsToPartitions,
final int numVCFReaderThreadsInImporter, final int chrsToPartitions,
final boolean useNativeReader) throws IOException {
final String workspace = createTempDir("genomicsdb-tests-").getAbsolutePath() + "/workspace";

writeToGenomicsDB(vcfInputs, intervals, workspace, 0, false, 0, numVCFReaderThreadsInImporter, false, false, false,
writeToGenomicsDB(vcfInputs, intervals, workspace, 0, false, 0, numVCFReaderThreadsInImporter, false, false, false,
chrsToPartitions, useNativeReader);
checkJSONFilesAreWritten(workspace);
for(SimpleInterval currInterval : intervals) {
Expand Down Expand Up @@ -504,7 +533,7 @@ public void testGenomicsDBAlleleSpecificAnnotationsInTheMiddleOfSpanningDeletion

@Test
public void testGenomicsDBNoRemapMissingToNonRef() throws IOException {
testGenomicsDBAgainstCombineGVCFs(Arrays.asList(COMBINEGVCFS_TEST_DIR+"NA12878.AS.NON_REF_remap_check.chr20snippet.g.vcf",
testGenomicsDBAgainstCombineGVCFs(Arrays.asList(COMBINEGVCFS_TEST_DIR+"NA12878.AS.NON_REF_remap_check.chr20snippet.g.vcf",
COMBINEGVCFS_TEST_DIR+"NA12892.AS.chr20snippet.g.vcf"),
new ArrayList<SimpleInterval>(Arrays.asList(new SimpleInterval("20", 10433313, 10700000))),
b37_reference_20_21,
Expand Down Expand Up @@ -671,14 +700,14 @@ private void writeToGenomicsDB(final List<String> vcfInputs, final List<SimpleIn
}

private void writeToGenomicsDB(final List<String> vcfInputs, final List<SimpleInterval> intervals, final String workspace,
final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads,
final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads,
final boolean mergeIntervals, final boolean overwriteWorkspace, final boolean incremental) {
writeToGenomicsDB(vcfInputs, intervals, workspace, batchSize, useBufferSize, bufferSizePerSample, threads, mergeIntervals,
writeToGenomicsDB(vcfInputs, intervals, workspace, batchSize, useBufferSize, bufferSizePerSample, threads, mergeIntervals,
overwriteWorkspace, incremental, 0, false);
}

private void writeToGenomicsDB(final List<String> vcfInputs, final List<SimpleInterval> intervals, final String workspace,
final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads,
final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads,
final boolean mergeIntervals, final boolean overwriteWorkspace, final boolean incremental,
final int chrsToPartitions, final boolean useNativeReader) {
final ArgumentsBuilder args = new ArgumentsBuilder();
Expand Down Expand Up @@ -1013,7 +1042,7 @@ public Object[][] dataForTestExplicitIndicesInSampleNameMapInTheCloud() {
final String NA19625_UNCOMPRESSED_WITH_INDEX = GVCFS_WITH_INDICES_BUCKET + "NA19625.g.vcf";
final String NA19625_UNCOMPRESSED_NO_INDEX = GVCFS_WITHOUT_INDICES_BUCKET + "NA19625.g.vcf";
final String NA19625_UNCOMPRESSED_INDEX = GVCF_INDICES_ONLY_BUCKET + "NA19625.g.vcf.idx";

return new Object[][] {
// All VCFs have explicit indices, samples in order, TABIX index
{
Expand Down Expand Up @@ -1371,7 +1400,7 @@ public void testIncrementalMustHaveExistingWorkspace() {
writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace + "workspace2", 0, false, 0, 1, false, false, true);
}

private void testIncrementalImport(final int stepSize, final List<SimpleInterval> intervals, final String workspace,
private void testIncrementalImport(final int stepSize, final List<SimpleInterval> intervals, final String workspace,
final int batchSize, final boolean produceGTField, final boolean useVCFCodec, final String expected,
final int chrsToPartitions, final boolean useNativeReader) throws IOException {
testIncrementalImport(stepSize, intervals, workspace, batchSize, produceGTField, useVCFCodec, expected,
Expand All @@ -1384,7 +1413,7 @@ private void testIncrementalImport(final int stepSize, final List<SimpleInterval
throws IOException {
for(int i=0; i<LOCAL_GVCFS.size(); i+=stepSize) {
int upper = Math.min(i+stepSize, LOCAL_GVCFS.size());
writeToGenomicsDB(LOCAL_GVCFS.subList(i, upper), intervals, workspace, batchSize, false, 0, 1, false, false, i!=0,
writeToGenomicsDB(LOCAL_GVCFS.subList(i, upper), intervals, workspace, batchSize, false, 0, 1, false, false, i!=0,
chrsToPartitions, (i == 0 && useNativeReaderInitial) || (i > 0 && useNativeReader));
checkJSONFilesAreWritten(workspace);
}
Expand Down Expand Up @@ -1434,7 +1463,7 @@ public void testGenomicsDBIncrementalAndBatchSize1WithNonAdjacentIntervalsNative

@Test(expectedExceptions = {UserException.class}, expectedExceptionsMessageRegExp=".*must be block compressed.*")
public void testGenomicsDBImportNativeReaderNoCompressedVcf() throws IOException {
testGenomicsDBImporterWithGenotypes(Arrays.asList(NA_12878_PHASED), MULTIPLE_INTERVALS, NA_12878_PHASED, b37_reference_20_21,
testGenomicsDBImporterWithGenotypes(Arrays.asList(NA_12878_PHASED), MULTIPLE_INTERVALS, NA_12878_PHASED, b37_reference_20_21,

Check warning on line 1466 in src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java

View check run for this annotation

Codecov / codecov/patch

src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java#L1466

Added line #L1466 was not covered by tests
false, true, false, true);
}

Expand All @@ -1448,14 +1477,14 @@ public void testGenomicsDBIncrementalAndBatchSize1WithNonAdjacentIntervalsMergeC
@Test
public void testGenomicsDBIncrementalAndBatchSize2() throws IOException {
final String workspace = createTempDir("genomicsdb-incremental-tests").getAbsolutePath() + "/workspace";
testIncrementalImport(2, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, false,
testIncrementalImport(2, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, false,
COMBINED_WITH_GENOTYPES, 0, false);
}

@Test
public void testGenomicsDBMultipleIncrementalImports() throws IOException {
final String workspace = createTempDir("genomicsdb-incremental-tests").getAbsolutePath() + "/workspace";
testIncrementalImport(1, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, true,
testIncrementalImport(1, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, true,
COMBINED_WITH_GENOTYPES, 0, false);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,6 @@ public Object[][] shuffleParameters() {
return new Object[][] { { false }, { true } };
}

private File createTempFile() throws IOException {
final File out = IOUtils.createTempFile("out", ".txt");
out.delete();
return out;
}

@Test(dataProvider = "shuffle")
public void testSimplePileup(boolean useShuffle) throws Exception {
final File out = createTempFile();
Expand Down
Loading

0 comments on commit 8fd8e7f

Please sign in to comment.