-
Notifications
You must be signed in to change notification settings - Fork 586
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Created a debug output mode that dumps the the exact inputs/outputs of the PairHMM to a file #7660
Changes from all commits
9da0710
48ee29c
ee0e5e4
c124dd0
dc06537
286c365
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,12 @@ | ||
package org.broadinstitute.hellbender.utils.pairhmm; | ||
|
||
import com.google.common.annotations.VisibleForTesting; | ||
import htsjdk.samtools.SAMUtils; | ||
import htsjdk.variant.variantcontext.Allele; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.broadinstitute.gatk.nativebindings.pairhmm.PairHMMNativeArguments; | ||
import org.broadinstitute.hellbender.exceptions.GATKException; | ||
import org.broadinstitute.hellbender.exceptions.UserException; | ||
import org.broadinstitute.hellbender.utils.MathUtils; | ||
import org.broadinstitute.hellbender.utils.Utils; | ||
|
@@ -13,6 +15,8 @@ | |
import org.broadinstitute.hellbender.utils.read.GATKRead; | ||
|
||
import java.io.Closeable; | ||
import java.io.IOException; | ||
import java.io.OutputStreamWriter; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
@@ -31,6 +35,8 @@ public abstract class PairHMM implements Closeable{ | |
protected byte[] previousHaplotypeBases; | ||
protected int hapStartIndex; | ||
|
||
protected OutputStreamWriter debugOutputStream; | ||
|
||
public enum Implementation { | ||
/* Very slow implementation which uses very accurate log10 sum functions. Only meant to be used as a reference test implementation */ | ||
EXACT(args -> { | ||
|
@@ -228,6 +234,7 @@ public void computeLog10Likelihoods(final LikelihoodMatrix<GATKRead, Haplotype> | |
readBases, readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype, nextAlleleBases); | ||
logLikelihoods.set(a, readIndex, lk); | ||
mLogLikelihoodArray[idx++] = lk; | ||
writeToResultsFileIfApplicable(readBases, readQuals, readInsQuals, readDelQuals, overallGCP, alleleBases, lk); | ||
jamesemery marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
readIndex++; | ||
} | ||
|
@@ -239,6 +246,7 @@ public void computeLog10Likelihoods(final LikelihoodMatrix<GATKRead, Haplotype> | |
} | ||
} | ||
|
||
|
||
/** | ||
* Compute the total probability of read arising from haplotypeBases given base substitution, insertion, and deletion | ||
* probabilities. | ||
|
@@ -350,12 +358,46 @@ public double[] getLogLikelihoodArray() { | |
return mLogLikelihoodArray; | ||
} | ||
|
||
/** | ||
* Attach a debugOuputStream to this HMM instance | ||
*/ | ||
public void setAndInitializeDebugOutputStream(final OutputStreamWriter writer) { | ||
try { | ||
debugOutputStream = writer; | ||
debugOutputStream.write("# hap-bases read-bases read-qual read-ins-qual read-del-qual gcp expected-result"); | ||
} catch (IOException e) { | ||
throw new GATKException("Error writing to specified HMM results output stream", e); | ||
} | ||
} | ||
|
||
/** | ||
* Method to be invoked by implementing HMM engines to output the various hmm inputs/outputs with uniform formatting. | ||
*/ | ||
protected void writeToResultsFileIfApplicable(byte[] readBases, byte[] readQuals, byte[] readInsQuals, byte[] readDelQuals, byte[] overallGCP, byte[] alleleBases, double lk) { | ||
|
||
if (debugOutputStream!= null) { | ||
try { | ||
debugOutputStream.write("\n" + new String(alleleBases) + " " + new String(readBases) + " " + SAMUtils.phredToFastq(readQuals) + " " + SAMUtils.phredToFastq(readInsQuals) + " " + SAMUtils.phredToFastq(readDelQuals) + " " + SAMUtils.phredToFastq(overallGCP) + " " + String.format("%e",lk)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Confirming that the values in this table will never contain internal whitespace? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think so. If the alleles or the phred to fastq mehtods are returning results with whitespaces in them we have much bigger problems... |
||
} catch (IOException e) { | ||
throw new GATKException("Error writing to specified HMM results output stream", e); | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Called at the end of the program to close files, print profiling information etc | ||
*/ | ||
@Override | ||
public void close() { | ||
if(doProfiling) | ||
logger.info("Total compute time in PairHMM computeLogLikelihoods() : "+(pairHMMComputeTime*1e-9)); | ||
if(doProfiling) { | ||
logger.info("Total compute time in PairHMM computeLogLikelihoods() : " + (pairHMMComputeTime * 1e-9)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the time unit here? Seconds? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seconds I believe just as it has in the past for gatk. Its using nanoTime so converting to seconds should be 10^-9. |
||
} | ||
if(debugOutputStream != null) { | ||
try { | ||
debugOutputStream.close(); | ||
} catch (IOException e) { | ||
throw new GATKException("Error closing the pairHMM debug output stream", e); | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do all of the available HMMs respect this argument, or only some of them? If only some, list the ones that respect it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
all of them as far as i've tested. I have a test for all of them but the AVX was producing some different output on travis...