From 8d1db54eca0ade5c570828cb010dc75b96d0f870 Mon Sep 17 00:00:00 2001 From: Ted Sharpe Date: Tue, 7 Jun 2022 12:26:20 -0400 Subject: [PATCH] tests, doc --- .../hellbender/tools/DumpTabixIndex.java | 203 ++++++++++-------- .../utils/io/FeatureOutputStream.java | 8 +- .../tools/DumpTabixIndexIntegrationTest.java | 25 +++ .../utils/io/FeatureOutputStreamUnitTest.java | 13 +- .../broadinstitute/hellbender/tools/test.tbi | Bin 0 -> 101 bytes .../hellbender/tools/test.tbi.expected.txt | 10 + 6 files changed, 166 insertions(+), 93 deletions(-) create mode 100644 src/test/java/org/broadinstitute/hellbender/tools/DumpTabixIndexIntegrationTest.java create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/test.tbi create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/test.tbi.expected.txt diff --git a/src/main/java/org/broadinstitute/hellbender/tools/DumpTabixIndex.java b/src/main/java/org/broadinstitute/hellbender/tools/DumpTabixIndex.java index 4f4f2513eba..2c8d9896424 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/DumpTabixIndex.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/DumpTabixIndex.java @@ -1,123 +1,146 @@ package org.broadinstitute.hellbender.tools; +import htsjdk.samtools.util.FileExtensions; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; -import org.broadinstitute.barclay.argparser.ExperimentalFeature; +import org.broadinstitute.barclay.help.DocumentedFeature; import org.broadinstitute.hellbender.cmdline.CommandLineProgram; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.engine.GATKPath; import org.broadinstitute.hellbender.exceptions.UserException; import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; +import java.io.*; import java.util.ArrayList; import java.util.List; import java.util.zip.GZIPInputStream; @CommandLineProgramProperties( - summary = "Prints a tabix index file as text.", + summary = "Prints a text file describing the contents of the tabix index input file.", oneLineSummary = "Dumps a tabix index file.", + usageExample = "gatk DumpTabixIndex -I tabixIndex.tbi -O output.txt", programGroup = DiagnosticsAndQCProgramGroup.class ) -@ExperimentalFeature +@DocumentedFeature public class DumpTabixIndex extends CommandLineProgram { @Argument( doc = "Tabix index file.", fullName = "tabix-index", shortName = StandardArgumentDefinitions.INPUT_SHORT_NAME) - String tabixIndexFile; + GATKPath tabixIndexFile; + + @Argument( doc = "Output file.", + fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, + shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME) + GATKPath outputFile; @Override protected Object doWork() { - try ( final InputStream is = new GZIPInputStream(new FileInputStream(tabixIndexFile)) ) { - if ( readByte(is) != 'T' || readByte(is) != 'B' || - readByte(is) != 'I' || readByte(is) != 1 ) { - throw new UserException("Incorrect magic number for tabix index"); - } - final int nTigs = readInt(is); - final int format = readInt(is); - final int seqCol = readInt(is); - final int begCol = readInt(is); - final int endCol = readInt(is); - final char meta = (char)readInt(is); - final int skip = readInt(is); - final int namesLen = readInt(is); - System.out.println("#tigs\tformat\tseqCol\tbegCol\tendCol\tmetaChr\tskip"); - System.out.println(nTigs + "\t" + format + "\t" + seqCol + "\t" + begCol + "\t" + - endCol + "\t" + meta + "\t" + skip + "\n"); - final List tigNames = readContigNames(is, nTigs, namesLen); - for ( final String tigName : tigNames ) { - System.out.println(tigName + " binned index:"); - int nBins = readInt(is); - while ( nBins-- > 0 ) { - final int binNo = readInt(is); - int nChunks = readInt(is); - if ( binNo > 37448 ) { - if ( nChunks != 2 ) { - throw new UserException("pseudobin has " + nChunks + " chunks"); - } - final long chunkStart = readLong(is); - final long chunkEnd = readLong(is); - final long nMapped = readLong(is); - final long nUnmapped = readLong(is); - System.out.println(tigName + " summary: mapped=" + nMapped + - "\tplaced=" + nUnmapped + - "\tstart=" + Long.toHexString(chunkStart >>> 16) + - ":" + Long.toHexString(chunkStart & 0xffff) + - "\tend=" + Long.toHexString(chunkEnd >>> 16) + - ":" + Long.toHexString(chunkStart & 0xffff)); - continue; - } - if ( binNo == 0 ) { - System.out.print(binNo + "\t" + tigName + ":0M-512M\t"); - } else if ( binNo <= 8 ) { - final int binStart = (binNo - 1)*64; - System.out.print(binNo + "\t" + tigName + ":" + binStart + "M-" + (binStart+64) + "M\t"); - } else if ( binNo <= 72 ) { - final int binStart = (binNo - 9)*8; - System.out.print(binNo + "\t" + tigName + ":" + binStart + "M-" + (binStart+8) + "M\t"); - } else if ( binNo <= 584 ) { - final int binStart = binNo - 73; - System.out.print(binNo + "\t" + tigName + ":" + binStart + "M-" + (binStart + 1) + "M\t"); - } else if ( binNo <= 4680 ) { - final int binStart = (binNo - 585)*128; - System.out.print(binNo + "\t" + tigName + ":" + binStart + "K-" + (binStart + 128) + "K\t"); - } else { - final int binStart = (binNo - 4681)*16; - System.out.print(binNo + "\t" + tigName + ":" + binStart + "K-" + (binStart + 16) + "K\t"); - } - while ( nChunks-- > 0 ) { - final long chunkStart = readLong(is); - final long chunkEnd = readLong(is); - System.out.print("\t" + Long.toHexString(chunkStart >>> 16) + - ":" + Long.toHexString(chunkStart & 0xffff) + - "->" + Long.toHexString(chunkEnd >>> 16) + - ":" + Long.toHexString(chunkEnd & 0xffff)); + if ( !tabixIndexFile.hasExtension(FileExtensions.TABIX_INDEX) ) { + throw new UserException("Expected a " + FileExtensions.TABIX_INDEX + " file as input."); + } + + final PrintStream writer = new PrintStream(outputFile.getOutputStream()); + try ( final PushbackInputStream is = + new PushbackInputStream(new GZIPInputStream(tabixIndexFile.getInputStream())) ) { + dumpTabixIndex(is, writer); + } catch ( final IOException ioe ) { + throw new UserException("Trouble reading index.", ioe); + } + if ( writer.checkError() ) { + throw new UserException("Trouble writing output."); + } + + return null; + } + + public static void dumpTabixIndex( final PushbackInputStream is, + final PrintStream writer ) throws IOException { + if ( readByte(is) != 'T' || readByte(is) != 'B' || + readByte(is) != 'I' || readByte(is) != 1 ) { + throw new UserException("Incorrect magic number for tabix index"); + } + final int nTigs = readInt(is); + final int format = readInt(is); + final int seqCol = readInt(is); + final int begCol = readInt(is); + final int endCol = readInt(is); + final char meta = (char)readInt(is); + final int skip = readInt(is); + final int namesLen = readInt(is); + writer.println("#tigs\tformat\tseqCol\tbegCol\tendCol\tmetaChr\tskip"); + writer.println(nTigs + "\t" + format + "\t" + seqCol + "\t" + begCol + "\t" + + endCol + "\t" + meta + "\t" + skip + "\n"); + final List tigNames = readContigNames(is, nTigs, namesLen); + for ( final String tigName : tigNames ) { + writer.println(tigName + " binned index:"); + int nBins = readInt(is); + while ( nBins-- > 0 ) { + final int binNo = readInt(is); + int nChunks = readInt(is); + if ( binNo > 37448 ) { + if ( nChunks != 2 ) { + throw new UserException("pseudobin has " + nChunks + " chunks"); } - System.out.println(); + final long chunkStart = readLong(is); + final long chunkEnd = readLong(is); + final long nMapped = readLong(is); + final long nUnmapped = readLong(is); + writer.println(tigName + " summary: mapped=" + nMapped + + "\tplaced=" + nUnmapped + + "\tstart=" + Long.toHexString(chunkStart >>> 16) + + ":" + Long.toHexString(chunkStart & 0xffff) + + "\tend=" + Long.toHexString(chunkEnd >>> 16) + + ":" + Long.toHexString(chunkStart & 0xffff)); + continue; } - int nIntervals = readInt(is); - int nK = 0; - System.out.println(); - System.out.println(tigName + " linear index:"); - while ( nIntervals-- > 0 ) { - final long chunkOffset = readLong(is); - System.out.println(nK + "K\t" + Long.toHexString(chunkOffset >>> 16) + - ":" + Long.toHexString(chunkOffset & 0xffff)); - nK += 16; + if ( binNo == 0 ) { + writer.print(binNo + "\t" + tigName + ":0M-512M\t"); + } else if ( binNo <= 8 ) { + final int binStart = (binNo - 1)*64; + writer.print(binNo + "\t" + tigName + ":" + binStart + "M-" + (binStart+64) + "M\t"); + } else if ( binNo <= 72 ) { + final int binStart = (binNo - 9)*8; + writer.print(binNo + "\t" + tigName + ":" + binStart + "M-" + (binStart+8) + "M\t"); + } else if ( binNo <= 584 ) { + final int binStart = binNo - 73; + writer.print(binNo + "\t" + tigName + ":" + binStart + "M-" + (binStart + 1) + "M\t"); + } else if ( binNo <= 4680 ) { + final int binStart = (binNo - 585)*128; + writer.print(binNo + "\t" + tigName + ":" + binStart + "K-" + (binStart + 128) + "K\t"); + } else { + final int binStart = (binNo - 4681)*16; + writer.print(binNo + "\t" + tigName + ":" + binStart + "K-" + (binStart + 16) + "K\t"); } + while ( nChunks-- > 0 ) { + final long chunkStart = readLong(is); + final long chunkEnd = readLong(is); + writer.print("\t" + Long.toHexString(chunkStart >>> 16) + + ":" + Long.toHexString(chunkStart & 0xffff) + + "->" + Long.toHexString(chunkEnd >>> 16) + + ":" + Long.toHexString(chunkEnd & 0xffff)); + } + writer.println(); } - if ( is.available() > 0 ) { - final long nUnmapped = readLong(is); - System.out.println(nUnmapped + " unplaced reads."); - } - if ( is.available() != 0 ) { - throw new UserException("Unexpected data follows index."); + int nIntervals = readInt(is); + int nK = 0; + writer.println(); + writer.println(tigName + " linear index:"); + while ( nIntervals-- > 0 ) { + final long chunkOffset = readLong(is); + writer.println(nK + "K\t" + Long.toHexString(chunkOffset >>> 16) + + ":" + Long.toHexString(chunkOffset & 0xffff)); + nK += 16; } - } catch ( final IOException ioe ) { - throw new UserException("Can't read " + tabixIndexFile + " as gzip input stream", ioe); } - return null; + int nextByte; + if ( (nextByte = is.read()) != -1 ) { + is.unread(nextByte); + final long nUnmapped = readLong(is); + writer.println(nUnmapped + " unplaced reads."); + } + if ( (nextByte = is.read()) != -1 ) { + throw new UserException("Unexpected data follows index."); + } } public static List readContigNames( final InputStream is, diff --git a/src/main/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStream.java b/src/main/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStream.java index 3d78c98f5b3..b4b0b0fba4d 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStream.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStream.java @@ -30,7 +30,13 @@ public class FeatureOutputStream implements FeatureSink { private final Function encoder; private final OutputStream outputStream; - private final LocationAware locationAware; + private final LocationAware locationAware; // same object as outputStream, above + // This is necessary because there is no LocationAwareOutputStream class for + // PositionalOutputStream and BlockCompressedOutputStream to extend. Sadly. + // Do not wrap the BlockCompressedOutputStream in a PositionalOutputStream. The + // PositionalOutputStream just counts bytes output, but that's not a valid virtual file offset + // for a bgzip-compressed file. + private final IndexCreator indexCreator; private final Path featurePath; diff --git a/src/test/java/org/broadinstitute/hellbender/tools/DumpTabixIndexIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/DumpTabixIndexIntegrationTest.java new file mode 100644 index 00000000000..a3f9a9ce02a --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/DumpTabixIndexIntegrationTest.java @@ -0,0 +1,25 @@ +package org.broadinstitute.hellbender.tools; + +import htsjdk.samtools.util.Log; +import org.broadinstitute.hellbender.CommandLineProgramTest; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; +import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; +import org.broadinstitute.hellbender.tools.sv.LocusDepthtoBAF; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.util.Collections; + +public class DumpTabixIndexIntegrationTest extends CommandLineProgramTest { + @Test + public void trivialDumpTabixTest() throws IOException { + final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); + argsBuilder.add(StandardArgumentDefinitions.INPUT_SHORT_NAME, toolsTestDir + "test.tbi"); + argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s"); + final IntegrationTestSpec testSpec = + new IntegrationTestSpec(argsBuilder.getString(), + Collections.singletonList(toolsTestDir + "test.tbi.expected.txt")); + testSpec.executeTest("dump tabix index", this); + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStreamUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStreamUnitTest.java index c0c4dc4c009..c3f3ea70af3 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStreamUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/io/FeatureOutputStreamUnitTest.java @@ -10,9 +10,12 @@ import htsjdk.tribble.readers.AsciiLineReader; import htsjdk.tribble.readers.AsciiLineReaderIterator; import htsjdk.tribble.readers.LineIterator; +import htsjdk.tribble.readers.PositionalBufferedStream; import org.broadinstitute.hellbender.GATKBaseTest; +import org.broadinstitute.hellbender.engine.FeatureDataSource; import org.broadinstitute.hellbender.engine.GATKPath; import org.broadinstitute.hellbender.tools.sv.*; +import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.codecs.BafEvidenceCodec; import org.broadinstitute.hellbender.utils.codecs.DepthEvidenceCodec; import org.broadinstitute.hellbender.utils.codecs.DiscordantPairEvidenceCodec; @@ -94,17 +97,23 @@ public void test(final ArrayList featureList, private void testWithStream(final FeatureOutputStream stream, final ArrayList featureList, final Path outFilePath, final FeatureCodec codec, final String expectedHeader, final boolean indexed) throws IOException { - final Path outIndexPath = Paths.get(outFilePath + FileExtensions.TABIX_INDEX); if (expectedHeader != null) { stream.writeHeader(expectedHeader); } featureList.stream().forEachOrdered(s -> stream.write(s)); stream.close(); if (indexed) { + final Path outIndexPath = Paths.get(outFilePath + FileExtensions.TABIX_INDEX); Assert.assertTrue(Files.exists(outIndexPath), "Index does not exist"); + + // try to use the index a little + final FeatureDataSource dataSource = new FeatureDataSource(outFilePath.toFile()); + dataSource.query(new SimpleInterval("chr1", 1234, 5678)); + dataSource.close(); } final InputStream inputStream = IOUtil.isBlockCompressed(outFilePath) ? - new BlockCompressedInputStream(outFilePath.toFile()) : new FileInputStream(outFilePath.toFile()); + new BlockCompressedInputStream(outFilePath.toFile()) : + new PositionalBufferedStream(new FileInputStream(outFilePath.toFile())); final LineIterator reader = new AsciiLineReaderIterator(AsciiLineReader.from(inputStream)); //Check header diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/test.tbi b/src/test/resources/org/broadinstitute/hellbender/tools/test.tbi new file mode 100644 index 0000000000000000000000000000000000000000..7cc868976a8e8f36a66b8e4810e1fc0784753428 GIT binary patch literal 101 zcmb2|=3rp}f&Xj_PR>jW9t_-tpHfm%8W;r5vK&1XU>K(0cTQsy3wITlqljXI=8P}1 kST45c^{o<{eEz_J0}Kqco3@;71nQSZGf0|&87K+?0Nb(};s5{u literal 0 HcmV?d00001 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/test.tbi.expected.txt b/src/test/resources/org/broadinstitute/hellbender/tools/test.tbi.expected.txt new file mode 100644 index 00000000000..19339f2baa1 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/test.tbi.expected.txt @@ -0,0 +1,10 @@ +#tigs format seqCol begCol endCol metaChr skip +1 0 1 2 2 # 0 + +chr9 binned index: +4681 chr9:0K-16K 0:0->35:0 +chr9 summary: mapped=1 placed=0 start=0:0 end=35:0 + +chr9 linear index: +0K 0:0 +0 unplaced reads.