-
Notifications
You must be signed in to change notification settings - Fork 586
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[BIOIN-1570] Fixed edge case in variant annotation #8810
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -202,7 +202,7 @@ protected void variantType(final VariantContext vc, final LocalContext localCont | |
if (isSpecial(alleles.get(i))){ | ||
continue; | ||
} | ||
if ((localContext.hmerIndelLength.get(i)==null) || (localContext.hmerIndelLength.get(i)==0)){ | ||
if ((localContext.hmerIndelLength==null) || (localContext.hmerIndelLength.get(i)==null) || (localContext.hmerIndelLength.get(i)==0)){ | ||
isHmer=false; | ||
} | ||
} | ||
|
@@ -250,7 +250,11 @@ protected void isHmerIndel(final VariantContext vc, final LocalContext localCont | |
// get byte before and after | ||
final byte before = getReferenceNucleotide(localContext, vc.getStart() - 1); | ||
final byte[] after = getReferenceHmerPlus(localContext, vc.getEnd() + 1, MOTIF_SIZE); | ||
|
||
if (after.length==0){ | ||
logger.warn("Failed to get hmer from reference, isHmerIndel and RightMotif annotations will not be calculated. " + | ||
"Start index: " + vc.getEnd() + 1 + " Reference length: " + localContext.ref.getBases().length); | ||
return; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm looking inside of establishReadGroupFlowOrder() and it looks like it tries to disable annotation if it can't establish what the flow order is for the local context (which is explicitly the case for the user that brought up this issue)... Can you disentangle what is supposed to happen for this annotation class in the event that no-flow order can be established? The annotation requires a flow order in order to determine what kind of indel it is no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is a method called isActualFlowOrderRequired in this abstract class that returns false, the annotations that the user enabled all inherit from that abstract class and do not require flow order. The only annotation that does require it is |
||
// build two haplotypes. add byte before and after | ||
final byte[] refHap = buildHaplotype(before, ref.getBases(), after); | ||
final byte[] altHap = buildHaplotype(before, alt.getBases(), after); | ||
|
@@ -338,6 +342,11 @@ protected void getLeftMotif(final VariantContext vc, final LocalContext localCon | |
} | ||
|
||
String motif = getRefMotif(localContext, vc.getStart() - MOTIF_SIZE, MOTIF_SIZE); | ||
if (motif.length() != MOTIF_SIZE){ | ||
logger.warn("Failed to get motif from reference, getLeftMotif annotation will not be calculated. " + | ||
"Start index: " + vc.getStart() + " Reference length: " + localContext.ref.getBases().length); | ||
return; | ||
} | ||
if ( a.length() != refLength ) { | ||
motif = motif.substring(1) + vc.getReference().getBaseString().substring(0, 1); | ||
} | ||
|
@@ -350,8 +359,12 @@ protected void getLeftMotif(final VariantContext vc, final LocalContext localCon | |
protected void getRightMotif(final VariantContext vc, final LocalContext localContext) { | ||
|
||
final int refLength = vc.getReference().length(); | ||
final String motif = getRefMotif(localContext, vc.getStart() + refLength, MOTIF_SIZE); | ||
|
||
String motif = getRefMotif(localContext, vc.getStart() + refLength, MOTIF_SIZE); | ||
if (motif.length() != MOTIF_SIZE){ | ||
logger.warn("Failed to get motif from reference, getRightMotif annotation will not be calculated. " + | ||
"Start index: " + vc.getStart() + refLength + " Reference length: " + localContext.ref.getBases().length); | ||
return; | ||
} | ||
// fill empty entries (non indel alelles) | ||
for ( int i = 0 ; i < localContext.rightMotif.size() ; i++ ) { | ||
if ( localContext.rightMotif.get(i) == null ) { | ||
|
@@ -366,6 +379,11 @@ protected void gcContent(final VariantContext vc, final LocalContext localContex | |
|
||
final int begin = vc.getStart() - (GC_CONTENT_SIZE / 2); | ||
final String seq = getRefMotif(localContext, begin + 1, GC_CONTENT_SIZE); | ||
if ( seq.length() != GC_CONTENT_SIZE ) { | ||
logger.warn("gcContent will not be calculated at position " + vc.getContig() + ":" + vc.getStart() + | ||
" too close to the edge of the reference"); | ||
return; | ||
} | ||
int gcCount = 0; | ||
for ( byte b : seq.getBytes() ) { | ||
if ( b == 'G' || b == 'C' ) { | ||
|
@@ -424,19 +442,24 @@ protected void cycleSkip(final VariantContext vc, final LocalContext localContex | |
localContext.attributes.put(GATKVCFConstants.FLOW_CYCLESKIP_STATUS, css); | ||
} | ||
|
||
// get a single nucleoid from reference | ||
// get a single nucleotid from reference | ||
private byte getReferenceNucleotide(final LocalContext localContext, final int start) { | ||
final int index = start - localContext.ref.getWindow().getStart(); | ||
final byte[] bases = localContext.ref.getBases(); | ||
Utils.validIndex(index, bases.length); | ||
Utils.validIndex(index, bases.length); // do not catch, if here the location is outside of the reference, there is a problem! | ||
return bases[index]; | ||
} | ||
|
||
// get an hmer from reference plus a number of additional bases | ||
private byte[] getReferenceHmerPlus(final LocalContext localContext, final int start, final int additional) { | ||
int index = start - localContext.ref.getWindow().getStart(); | ||
final byte[] bases = localContext.ref.getBases(); | ||
Utils.validIndex(index, bases.length); | ||
try { | ||
Utils.validIndex(index, bases.length); | ||
} catch (IllegalArgumentException e) { | ||
logger.warn("Failed to get hmer from reference. Start index: " + index + " Reference length: " + bases.length); | ||
return new byte[0]; | ||
} | ||
|
||
// get hmer | ||
final StringBuilder sb = new StringBuilder(); | ||
|
@@ -458,8 +481,12 @@ private String getRefMotif(final LocalContext localContext, final int start, fin | |
final byte[] bases = localContext.ref.getBases(); | ||
final int startIndex = start - localContext.ref.getWindow().getStart(); | ||
final int endIndex = startIndex + length; | ||
Utils.validIndex(startIndex, bases.length); | ||
Utils.validIndex(endIndex-1, bases.length); | ||
try { | ||
Utils.validIndex(startIndex, bases.length); | ||
Utils.validIndex(endIndex-1, bases.length); | ||
} catch (IllegalArgumentException e) { | ||
return ""; | ||
} | ||
return new String(Arrays.copyOfRange(bases, startIndex, endIndex)); | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -69,7 +69,25 @@ public Object[][] getTestData() { | |
// ins hmer indel | ||
"TATCT C ATTGACCAA", "CA", | ||
"ins", "1", "2", "A", "ATCTC", "TTGAC", "0.3", "NA", "h-indel" | ||
}, | ||
{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add to these comments a little more context for the nature of this error and what those are testing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added comments |
||
// Close to 5' edge | ||
"ATCT C ATTGACCAA", "CA", | ||
"ins", "1", "2", "A", null, "TTGAC", "0.3", "NA", "h-indel" | ||
}, | ||
{ | ||
// close to 3' edge | ||
"TATCT C ATT", "CA", | ||
"ins", "1", "2", "A", "ATCTC", null, null, "NA", "h-indel" | ||
}, | ||
{ | ||
// very close to 3' edge | ||
"TATCT C ", "CA", | ||
"ins", "1", null, null, "ATCTC", null, null, "NA", "non-h-indel" | ||
} | ||
|
||
|
||
|
||
}; | ||
|
||
return testData; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is going to log many many times for every variant. We have the concept of a
OneShotLogger
in GATK that wraps the logging code and skips reporting if this error has already come up before. Wrap these messages in that at the very least so it doesn't inundate the user with millions of error lines.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed. I actually used the logger on purpose (there is a oneshotlogger in this file already) because I thought that I would like the user to see that they are trying to call variants really close to the edge and not get this warning buried in the log. I don't expect to get millions messages like this (there are not that many edges of the contigs in a reasonable genome).
If this is convincing, let me know and I will revert this change.