Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GenomicsDB on azure #8438

Merged
merged 1 commit into from
Dec 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ final barclayVersion = System.getProperty('barclay.version','5.0.0')
final sparkVersion = System.getProperty('spark.version', '3.3.1')
final hadoopVersion = System.getProperty('hadoop.version', '3.3.6')
final disqVersion = System.getProperty('disq.version','0.3.8')
final genomicsdbVersion = System.getProperty('genomicsdb.version','1.5.0')
final genomicsdbVersion = System.getProperty('genomicsdb.version','1.5.1')
final bigQueryVersion = System.getProperty('bigQuery.version', '2.35.0')
final bigQueryStorageVersion = System.getProperty('bigQueryStorage.version', '2.47.0')
final guavaVersion = System.getProperty('guava.version', '32.1.3-jre')
Expand Down Expand Up @@ -976,7 +976,7 @@ ossIndexAudit {
outputFormat = 'DEFAULT' // Optional, other values are: 'DEPENDENCY_GRAPH' prints dependency graph showing direct/transitive dependencies, 'JSON_CYCLONE_DX_1_4' prints a CycloneDX 1.4 SBOM in JSON format.
showAll = false // if true prints all dependencies. By default is false, meaning only dependencies with vulnerabilities will be printed.
printBanner = true // if true will print ASCII text banner. By default is true.

// ossIndexAudit can be configured to exclude vulnerabilities from matching
// excludeVulnerabilityIds = ['39d74cc8-457a-4e57-89ef-a258420138c5'] // list containing ids of vulnerabilities to be ignored
// excludeCoordinates = ['commons-fileupload:commons-fileupload:1.3'] // list containing coordinate of components which if vulnerable should be ignored
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
//This file was obtained from combined.gatk3.7.g.vcf.gz by dropping all the samples
private static final String COMBINED_SITES_ONLY = largeFileTestDir + "gvcfs/combined.gatk3.7_sites_only.g.vcf.gz";
private static final String INTERVAL_PICARD_STYLE_EXPECTED = toolsTestDir + "GenomicsDBImport/interval_expected.interval_list";
private static final String MULTIPLE_NON_ADJACENT_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS_PICARD_STYLE_EXPECTED =
private static final String MULTIPLE_NON_ADJACENT_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS_PICARD_STYLE_EXPECTED =
toolsTestDir + "GenomicsDBImport/multiple_non_adjacent_intervals_combine_gvcfs_expected.interval_list";
private static final String MERGED_CONTIGS_INTERVAL_PICARD_STYLE_EXPECTED =
toolsTestDir + "GenomicsDBImport/chr20_chr21_merged_contigs_expected.interval_list";
Expand Down Expand Up @@ -383,6 +383,74 @@
}
}

@DataProvider
public Object[][] getInvalidArgsForAvoidNio(){
final ArgumentsBuilder baseArgs = ArgumentsBuilder.create()
.add(GenomicsDBImport.WORKSPACE_ARG_LONG_NAME, createTempFile())
.addInterval("fake")
.addFlag(GenomicsDBImport.AVOID_NIO);
return new Object[][]{
{baseArgs, CommandLineException.MissingArgument.class}, //no input
{baseArgs.copy()
.addVCF("fake.vcf"), CommandLineException.class
}, //not allowed with variant, we shoul have some sort of mutex exception...
{baseArgs.copy()
.add(GenomicsDBImport.SAMPLE_NAME_MAP_LONG_NAME, "fake.samplenames"), CommandLineException.MissingArgument.class
}, //missing header
{baseArgs.copy()
.add(GenomicsDBImport.VCF_HEADER_OVERRIDE, "fake.vcf"), CommandLineException.MissingArgument.class
}, //missing input
{baseArgs.copy()
.add(GenomicsDBImport.VCF_HEADER_OVERRIDE, "fake.vcf")
.addVCF("fake.vcf"), CommandLineException.class // can't use with -V
}
};
}

@Test(dataProvider = "getInvalidArgsForAvoidNio")
public void testInvalidArgumentCombinationsWithAvoidNio(ArgumentsBuilder args, Class<? extends Exception> expectedException){
Assert.assertThrows(expectedException, () -> runCommandLine(args));
}

/*
* this is a test that can be run locally if you enable it and fill in the SAS token with one from
* https://app.terra.bio/#workspaces/axin-pipeline-testing-20230927/gatk-azure-testing
*
* it's basically an example of how to run the tool on azure
*
* note that the http url for the file azure files looks like this:
*
* https://<bucket_name>.blob.core.windows.net/<user_name>/<filepath>?<sas token>
* the SAS token includes the '?' generally
*
* to restructure into an az:// link you move the username
* az://<user_name>@<bucket_name>blob.core.windows.new/<filepath>
*
*/
@Test(enabled = false, groups={"cloud","azure"})
public void testImportFromAzure(){

final String SAS_TOKEN="put a sas token in me";

Check warning on line 433 in src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java

View check run for this annotation

Codecov / codecov/patch

src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java#L433

Added line #L433 was not covered by tests

final String workspace = createTempDir("genomicsdb-tests-").getAbsolutePath() + "/workspace";
final String sample = "NA19625";
final String azLocation = "az://lzb25a77f5eadb0fa72a2ae7.blob.core.windows.net/sc-62528cd7-3299-4440-8c17-10f458e589d3/NA19625.g.vcf.gz";
final String sampleMapText = String.format("%s\t%s\n", sample, azLocation);
final File sampleMappingFile = IOUtils.writeTempFile(sampleMapText, "sampleMapping", ".txt");

Check warning on line 439 in src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java

View check run for this annotation

Codecov / codecov/patch

src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java#L435-L439

Added lines #L435 - L439 were not covered by tests

final ArgumentsBuilder args = ArgumentsBuilder.create()
.add(GenomicsDBImport.WORKSPACE_ARG_LONG_NAME, workspace)
.addInterval("chr20")
.addFlag(GenomicsDBImport.AVOID_NIO)
.add(GenomicsDBImport.SAMPLE_NAME_MAP_LONG_NAME, sampleMappingFile)
.addFlag(GenomicsDBImport.BYPASS_FEATURE_READER)
.add(GenomicsDBImport.VCF_HEADER_OVERRIDE, GENOMICSDB_TEST_DIR + "azureHeader.vcf");
Map<String, String> environment = new HashMap<>(System.getenv());
final String sasTokenEnvVariable = "AZURE_STORAGE_SAS_TOKEN";
environment.put(sasTokenEnvVariable, SAS_TOKEN);
runToolInNewJVM(GenomicsDBImport.class.getSimpleName(), args, environment);
}

Check warning on line 452 in src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java

View check run for this annotation

Codecov / codecov/patch

src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java#L441-L452

Added lines #L441 - L452 were not covered by tests

private void testGenomicsDBImporterWithGenotypes(final List<String> vcfInputs, final List<SimpleInterval> intervals,
final String expectedCombinedVCF,
final String referenceFile) throws IOException {
Expand All @@ -408,7 +476,7 @@
final boolean testAll,
final boolean produceGTField,
final boolean sitesOnlyQuery) throws IOException {
testGenomicsDBImporterWithGenotypes(vcfInputs, intervals, expectedCombinedVCF, referenceFile, testAll, produceGTField,
testGenomicsDBImporterWithGenotypes(vcfInputs, intervals, expectedCombinedVCF, referenceFile, testAll, produceGTField,
sitesOnlyQuery, false);
}

Expand Down Expand Up @@ -456,11 +524,11 @@

private void testGenomicsDBAgainstCombineGVCFs(final List<String> vcfInputs, final List<SimpleInterval> intervals,
final String referenceFile, final String[] CombineGVCFArgs,
final int numVCFReaderThreadsInImporter, final int chrsToPartitions,
final int numVCFReaderThreadsInImporter, final int chrsToPartitions,
final boolean useNativeReader) throws IOException {
final String workspace = createTempDir("genomicsdb-tests-").getAbsolutePath() + "/workspace";

writeToGenomicsDB(vcfInputs, intervals, workspace, 0, false, 0, numVCFReaderThreadsInImporter, false, false, false,
writeToGenomicsDB(vcfInputs, intervals, workspace, 0, false, 0, numVCFReaderThreadsInImporter, false, false, false,
chrsToPartitions, useNativeReader);
checkJSONFilesAreWritten(workspace);
for(SimpleInterval currInterval : intervals) {
Expand Down Expand Up @@ -504,7 +572,7 @@

@Test
public void testGenomicsDBNoRemapMissingToNonRef() throws IOException {
testGenomicsDBAgainstCombineGVCFs(Arrays.asList(COMBINEGVCFS_TEST_DIR+"NA12878.AS.NON_REF_remap_check.chr20snippet.g.vcf",
testGenomicsDBAgainstCombineGVCFs(Arrays.asList(COMBINEGVCFS_TEST_DIR+"NA12878.AS.NON_REF_remap_check.chr20snippet.g.vcf",
COMBINEGVCFS_TEST_DIR+"NA12892.AS.chr20snippet.g.vcf"),
new ArrayList<SimpleInterval>(Arrays.asList(new SimpleInterval("20", 10433313, 10700000))),
b37_reference_20_21,
Expand Down Expand Up @@ -671,14 +739,14 @@
}

private void writeToGenomicsDB(final List<String> vcfInputs, final List<SimpleInterval> intervals, final String workspace,
final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads,
final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads,
final boolean mergeIntervals, final boolean overwriteWorkspace, final boolean incremental) {
writeToGenomicsDB(vcfInputs, intervals, workspace, batchSize, useBufferSize, bufferSizePerSample, threads, mergeIntervals,
writeToGenomicsDB(vcfInputs, intervals, workspace, batchSize, useBufferSize, bufferSizePerSample, threads, mergeIntervals,
overwriteWorkspace, incremental, 0, false);
}

private void writeToGenomicsDB(final List<String> vcfInputs, final List<SimpleInterval> intervals, final String workspace,
final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads,
final int batchSize, final Boolean useBufferSize, final int bufferSizePerSample, int threads,
final boolean mergeIntervals, final boolean overwriteWorkspace, final boolean incremental,
final int chrsToPartitions, final boolean useNativeReader) {
final ArgumentsBuilder args = new ArgumentsBuilder();
Expand Down Expand Up @@ -1013,7 +1081,7 @@
final String NA19625_UNCOMPRESSED_WITH_INDEX = GVCFS_WITH_INDICES_BUCKET + "NA19625.g.vcf";
final String NA19625_UNCOMPRESSED_NO_INDEX = GVCFS_WITHOUT_INDICES_BUCKET + "NA19625.g.vcf";
final String NA19625_UNCOMPRESSED_INDEX = GVCF_INDICES_ONLY_BUCKET + "NA19625.g.vcf.idx";

return new Object[][] {
// All VCFs have explicit indices, samples in order, TABIX index
{
Expand Down Expand Up @@ -1371,7 +1439,7 @@
writeToGenomicsDB(LOCAL_GVCFS, INTERVAL, workspace + "workspace2", 0, false, 0, 1, false, false, true);
}

private void testIncrementalImport(final int stepSize, final List<SimpleInterval> intervals, final String workspace,
private void testIncrementalImport(final int stepSize, final List<SimpleInterval> intervals, final String workspace,
final int batchSize, final boolean produceGTField, final boolean useVCFCodec, final String expected,
final int chrsToPartitions, final boolean useNativeReader) throws IOException {
testIncrementalImport(stepSize, intervals, workspace, batchSize, produceGTField, useVCFCodec, expected,
Expand All @@ -1384,7 +1452,7 @@
throws IOException {
for(int i=0; i<LOCAL_GVCFS.size(); i+=stepSize) {
int upper = Math.min(i+stepSize, LOCAL_GVCFS.size());
writeToGenomicsDB(LOCAL_GVCFS.subList(i, upper), intervals, workspace, batchSize, false, 0, 1, false, false, i!=0,
writeToGenomicsDB(LOCAL_GVCFS.subList(i, upper), intervals, workspace, batchSize, false, 0, 1, false, false, i!=0,
chrsToPartitions, (i == 0 && useNativeReaderInitial) || (i > 0 && useNativeReader));
checkJSONFilesAreWritten(workspace);
}
Expand Down Expand Up @@ -1434,7 +1502,7 @@

@Test(expectedExceptions = {UserException.class}, expectedExceptionsMessageRegExp=".*must be block compressed.*")
public void testGenomicsDBImportNativeReaderNoCompressedVcf() throws IOException {
testGenomicsDBImporterWithGenotypes(Arrays.asList(NA_12878_PHASED), MULTIPLE_INTERVALS, NA_12878_PHASED, b37_reference_20_21,
testGenomicsDBImporterWithGenotypes(Arrays.asList(NA_12878_PHASED), MULTIPLE_INTERVALS, NA_12878_PHASED, b37_reference_20_21,

Check warning on line 1505 in src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java

View check run for this annotation

Codecov / codecov/patch

src/test/java/org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImportIntegrationTest.java#L1505

Added line #L1505 was not covered by tests
false, true, false, true);
}

Expand All @@ -1448,14 +1516,14 @@
@Test
public void testGenomicsDBIncrementalAndBatchSize2() throws IOException {
final String workspace = createTempDir("genomicsdb-incremental-tests").getAbsolutePath() + "/workspace";
testIncrementalImport(2, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, false,
testIncrementalImport(2, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, false,
COMBINED_WITH_GENOTYPES, 0, false);
}

@Test
public void testGenomicsDBMultipleIncrementalImports() throws IOException {
final String workspace = createTempDir("genomicsdb-incremental-tests").getAbsolutePath() + "/workspace";
testIncrementalImport(1, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, true,
testIncrementalImport(1, MULTIPLE_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS, workspace, 2, true, true,
COMBINED_WITH_GENOTYPES, 0, false);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@
return new Object[][] { { false }, { true } };
}

private File createTempFile() throws IOException {
private File createAndDeleteTempFile() {
final File out = IOUtils.createTempFile("out", ".txt");
out.delete();
return out;
}

@Test(dataProvider = "shuffle")
public void testSimplePileup(boolean useShuffle) throws Exception {
final File out = createTempFile();
final File out = createAndDeleteTempFile();
final ArgumentsBuilder args = new ArgumentsBuilder();
args.addRaw("--input");
args.addRaw(NA12878_20_21_WGS_bam);
Expand All @@ -53,7 +53,7 @@

@Test(dataProvider = "shuffle")
public void testVerbosePileup(boolean useShuffle) throws Exception {
final File out = createTempFile();
final File out = createAndDeleteTempFile();
final ArgumentsBuilder args = new ArgumentsBuilder();
args.addRaw("--input");
args.addRaw(NA12878_20_21_WGS_bam);
Expand All @@ -74,7 +74,7 @@

@Test(dataProvider = "shuffle")
public void testFeaturesPileup(boolean useShuffle) throws Exception {
final File out = createTempFile();
final File out = createAndDeleteTempFile();
final ArgumentsBuilder args = new ArgumentsBuilder();
args.addRaw("--input");
args.addRaw(NA12878_20_21_WGS_bam);
Expand All @@ -95,7 +95,7 @@

@Test(dataProvider = "shuffle")
public void testInsertLengthPileup(boolean useShuffle) throws Exception {
final File out = createTempFile();
final File out = createAndDeleteTempFile();
final ArgumentsBuilder args = new ArgumentsBuilder();
args.addRaw("--input");
args.addRaw(NA12878_20_21_WGS_bam);
Expand Down Expand Up @@ -128,7 +128,7 @@
cluster.getFileSystem().copyFromLocalFile(new Path(dbsnp_138_b37_20_21_vcf), vcfPath);
cluster.getFileSystem().copyFromLocalFile(new Path(dbsnp_138_b37_20_21_vcf + ".idx"), idxPath);

final File out = createTempFile();
final File out = createAndDeleteTempFile();

Check warning on line 131 in src/test/java/org/broadinstitute/hellbender/tools/spark/PileupSparkIntegrationTest.java

View check run for this annotation

Codecov / codecov/patch

src/test/java/org/broadinstitute/hellbender/tools/spark/PileupSparkIntegrationTest.java#L131

Added line #L131 was not covered by tests
final ArgumentsBuilder args = new ArgumentsBuilder();
args.addRaw("--input");
args.addRaw(NA12878_20_21_WGS_bam);
Expand Down
Loading
Loading