diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java index acf1c74bf9c..11890925065 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java @@ -34,7 +34,6 @@ * are accumulated as well. */ public abstract class FlowAnnotatorBase implements InfoFieldAnnotation { - private final static Logger logger = LogManager.getLogger(FlowAnnotatorBase.class); protected final OneShotLogger flowMissingOneShotLogger = new OneShotLogger(FlowAnnotatorBase.class); @@ -202,7 +201,7 @@ protected void variantType(final VariantContext vc, final LocalContext localCont if (isSpecial(alleles.get(i))){ continue; } - if ((localContext.hmerIndelLength.get(i)==null) || (localContext.hmerIndelLength.get(i)==0)){ + if ((localContext.hmerIndelLength==null) || (localContext.hmerIndelLength.get(i)==null) || (localContext.hmerIndelLength.get(i)==0)){ isHmer=false; } } @@ -250,7 +249,12 @@ protected void isHmerIndel(final VariantContext vc, final LocalContext localCont // get byte before and after final byte before = getReferenceNucleotide(localContext, vc.getStart() - 1); final byte[] after = getReferenceHmerPlus(localContext, vc.getEnd() + 1, MOTIF_SIZE); - + if (after.length==0){ + flowMissingOneShotLogger.warn("Failed to get hmer from reference, probably because the " + + "variant is very close to the end of the chromosome, isHmerIndel and RightMotif annotations will not be calculated. " + + "Variant position: " + vc.getContig() + ":" + vc.getEnd() + 1); + return; + } // build two haplotypes. add byte before and after final byte[] refHap = buildHaplotype(before, ref.getBases(), after); final byte[] altHap = buildHaplotype(before, alt.getBases(), after); @@ -338,6 +342,12 @@ protected void getLeftMotif(final VariantContext vc, final LocalContext localCon } String motif = getRefMotif(localContext, vc.getStart() - MOTIF_SIZE, MOTIF_SIZE); + if (motif.length() != MOTIF_SIZE){ + flowMissingOneShotLogger.warn("Failed to get motif from reference, probably because the variant is very close to the " + + "start of the chromosome. LeftMotif annotation will not be calculated. " + + "Variant position: " + vc.getContig() + ":" + vc.getStart()); + return; + } if ( a.length() != refLength ) { motif = motif.substring(1) + vc.getReference().getBaseString().substring(0, 1); } @@ -350,8 +360,13 @@ protected void getLeftMotif(final VariantContext vc, final LocalContext localCon protected void getRightMotif(final VariantContext vc, final LocalContext localContext) { final int refLength = vc.getReference().length(); - final String motif = getRefMotif(localContext, vc.getStart() + refLength, MOTIF_SIZE); - + String motif = getRefMotif(localContext, vc.getStart() + refLength, MOTIF_SIZE); + if (motif.length() != MOTIF_SIZE){ + flowMissingOneShotLogger.warn("Failed to get motif from reference, probably because " + + "the variant is close to the end of the chromosome. RightMotif annotation will not be calculated. " + + "Variant position: " + vc.getContig() + ":" + vc.getStart()); + return; + } // fill empty entries (non indel alelles) for ( int i = 0 ; i < localContext.rightMotif.size() ; i++ ) { if ( localContext.rightMotif.get(i) == null ) { @@ -366,6 +381,11 @@ protected void gcContent(final VariantContext vc, final LocalContext localContex final int begin = vc.getStart() - (GC_CONTENT_SIZE / 2); final String seq = getRefMotif(localContext, begin + 1, GC_CONTENT_SIZE); + if ( seq.length() != GC_CONTENT_SIZE ) { + flowMissingOneShotLogger.warn("gcContent will not be calculated at position " + vc.getContig() + ":" + vc.getStart() + + " because the variant is too close to the edge of the chromosome"); + return; + } int gcCount = 0; for ( byte b : seq.getBytes() ) { if ( b == 'G' || b == 'C' ) { @@ -424,11 +444,11 @@ protected void cycleSkip(final VariantContext vc, final LocalContext localContex localContext.attributes.put(GATKVCFConstants.FLOW_CYCLESKIP_STATUS, css); } - // get a single nucleoid from reference + // get a single nucleotid from reference private byte getReferenceNucleotide(final LocalContext localContext, final int start) { final int index = start - localContext.ref.getWindow().getStart(); final byte[] bases = localContext.ref.getBases(); - Utils.validIndex(index, bases.length); + Utils.validIndex(index, bases.length); // do not catch, if here the location is outside of the reference, there is a problem! return bases[index]; } @@ -436,7 +456,13 @@ private byte getReferenceNucleotide(final LocalContext localContext, final int s private byte[] getReferenceHmerPlus(final LocalContext localContext, final int start, final int additional) { int index = start - localContext.ref.getWindow().getStart(); final byte[] bases = localContext.ref.getBases(); - Utils.validIndex(index, bases.length); + try { + Utils.validIndex(index, bases.length); + } catch (IllegalArgumentException e) { + flowMissingOneShotLogger.warn("Failed to get hmer from reference, too close to the edge. " + + "Position: " + localContext.ref.getContig() + ":" + index); + return new byte[0]; + } // get hmer final StringBuilder sb = new StringBuilder(); @@ -458,8 +484,12 @@ private String getRefMotif(final LocalContext localContext, final int start, fin final byte[] bases = localContext.ref.getBases(); final int startIndex = start - localContext.ref.getWindow().getStart(); final int endIndex = startIndex + length; - Utils.validIndex(startIndex, bases.length); - Utils.validIndex(endIndex-1, bases.length); + try { + Utils.validIndex(startIndex, bases.length); + Utils.validIndex(endIndex-1, bases.length); + } catch (IllegalArgumentException e) { + return ""; + } return new String(Arrays.copyOfRange(bases, startIndex, endIndex)); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java index 7bca9a3bdec..2f927d690c6 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java @@ -69,7 +69,28 @@ public Object[][] getTestData() { // ins hmer indel "TATCT C ATTGACCAA", "CA", "ins", "1", "2", "A", "ATCTC", "TTGAC", "0.3", "NA", "h-indel" + }, + //The following tests test the edge case of variant so close to the edge of the chromosome + //that some of the annotations can't be calculated + { + // Close to 5' edge of the chromosome, expect not to crash on finding the left motif + "ATCT C ATTGACCAA", "CA", + "ins", "1", "2", "A", null, "TTGAC", "0.3", "NA", "h-indel" + }, + { + // close to 3' edge of the chromosome, should not find left motif or GC content + "TATCT C ATT", "CA", + "ins", "1", "2", "A", "ATCTC", null, null, "NA", "h-indel" + }, + { + // very close to 3' edge of the chromosome, should not find left motif, GC content or if it is + // an hmer indel + "TATCT C ", "CA", + "ins", "1", null, null, "ATCTC", null, null, "NA", "non-h-indel" } + + + }; return testData;