From 1e21c8cd918e2576925809729327e4bac90021c0 Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Fri, 3 May 2024 19:45:02 +0300 Subject: [PATCH 1/4] [BIOIN-1570] Fixed edge case in variant annotation when the variant is close to the edge of the reference --- .../annotator/flow/FlowAnnotatorBase.java | 45 +++++++++++++++---- .../annotator/flow/FlowAnnotatorUnitTest.java | 18 ++++++++ 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java index acf1c74bf9c..3997c4cbdc8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java @@ -202,7 +202,7 @@ protected void variantType(final VariantContext vc, final LocalContext localCont if (isSpecial(alleles.get(i))){ continue; } - if ((localContext.hmerIndelLength.get(i)==null) || (localContext.hmerIndelLength.get(i)==0)){ + if ((localContext.hmerIndelLength==null) || (localContext.hmerIndelLength.get(i)==null) || (localContext.hmerIndelLength.get(i)==0)){ isHmer=false; } } @@ -250,7 +250,11 @@ protected void isHmerIndel(final VariantContext vc, final LocalContext localCont // get byte before and after final byte before = getReferenceNucleotide(localContext, vc.getStart() - 1); final byte[] after = getReferenceHmerPlus(localContext, vc.getEnd() + 1, MOTIF_SIZE); - + if (after.length==0){ + logger.warn("Failed to get hmer from reference, isHmerIndel and RightMotif annotations will not be calculated. " + + "Start index: " + vc.getEnd() + 1 + " Reference length: " + localContext.ref.getBases().length); + return; + } // build two haplotypes. add byte before and after final byte[] refHap = buildHaplotype(before, ref.getBases(), after); final byte[] altHap = buildHaplotype(before, alt.getBases(), after); @@ -338,6 +342,11 @@ protected void getLeftMotif(final VariantContext vc, final LocalContext localCon } String motif = getRefMotif(localContext, vc.getStart() - MOTIF_SIZE, MOTIF_SIZE); + if (motif.length() != MOTIF_SIZE){ + logger.warn("Failed to get motif from reference, getLeftMotif annotation will not be calculated. " + + "Start index: " + vc.getStart() + " Reference length: " + localContext.ref.getBases().length); + return; + } if ( a.length() != refLength ) { motif = motif.substring(1) + vc.getReference().getBaseString().substring(0, 1); } @@ -350,8 +359,12 @@ protected void getLeftMotif(final VariantContext vc, final LocalContext localCon protected void getRightMotif(final VariantContext vc, final LocalContext localContext) { final int refLength = vc.getReference().length(); - final String motif = getRefMotif(localContext, vc.getStart() + refLength, MOTIF_SIZE); - + String motif = getRefMotif(localContext, vc.getStart() + refLength, MOTIF_SIZE); + if (motif.length() != MOTIF_SIZE){ + logger.warn("Failed to get motif from reference, getRightMotif annotation will not be calculated. " + + "Start index: " + vc.getStart() + refLength + " Reference length: " + localContext.ref.getBases().length); + return; + } // fill empty entries (non indel alelles) for ( int i = 0 ; i < localContext.rightMotif.size() ; i++ ) { if ( localContext.rightMotif.get(i) == null ) { @@ -366,6 +379,11 @@ protected void gcContent(final VariantContext vc, final LocalContext localContex final int begin = vc.getStart() - (GC_CONTENT_SIZE / 2); final String seq = getRefMotif(localContext, begin + 1, GC_CONTENT_SIZE); + if ( seq.length() != GC_CONTENT_SIZE ) { + logger.warn("gcContent will not be calculated at position " + vc.getContig() + ":" + vc.getStart() + + " too close to the edge of the reference"); + return; + } int gcCount = 0; for ( byte b : seq.getBytes() ) { if ( b == 'G' || b == 'C' ) { @@ -424,11 +442,11 @@ protected void cycleSkip(final VariantContext vc, final LocalContext localContex localContext.attributes.put(GATKVCFConstants.FLOW_CYCLESKIP_STATUS, css); } - // get a single nucleoid from reference + // get a single nucleotid from reference private byte getReferenceNucleotide(final LocalContext localContext, final int start) { final int index = start - localContext.ref.getWindow().getStart(); final byte[] bases = localContext.ref.getBases(); - Utils.validIndex(index, bases.length); + Utils.validIndex(index, bases.length); // do not catch, if here the location is outside of the reference, there is a problem! return bases[index]; } @@ -436,7 +454,12 @@ private byte getReferenceNucleotide(final LocalContext localContext, final int s private byte[] getReferenceHmerPlus(final LocalContext localContext, final int start, final int additional) { int index = start - localContext.ref.getWindow().getStart(); final byte[] bases = localContext.ref.getBases(); - Utils.validIndex(index, bases.length); + try { + Utils.validIndex(index, bases.length); + } catch (IllegalArgumentException e) { + logger.warn("Failed to get hmer from reference. Start index: " + index + " Reference length: " + bases.length); + return new byte[0]; + } // get hmer final StringBuilder sb = new StringBuilder(); @@ -458,8 +481,12 @@ private String getRefMotif(final LocalContext localContext, final int start, fin final byte[] bases = localContext.ref.getBases(); final int startIndex = start - localContext.ref.getWindow().getStart(); final int endIndex = startIndex + length; - Utils.validIndex(startIndex, bases.length); - Utils.validIndex(endIndex-1, bases.length); + try { + Utils.validIndex(startIndex, bases.length); + Utils.validIndex(endIndex-1, bases.length); + } catch (IllegalArgumentException e) { + return ""; + } return new String(Arrays.copyOfRange(bases, startIndex, endIndex)); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java index 7bca9a3bdec..93d5783b4c5 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java @@ -69,7 +69,25 @@ public Object[][] getTestData() { // ins hmer indel "TATCT C ATTGACCAA", "CA", "ins", "1", "2", "A", "ATCTC", "TTGAC", "0.3", "NA", "h-indel" + }, + { + // ins hmer indel + "ATCT C ATTGACCAA", "CA", + "ins", "1", "2", "A", null, "TTGAC", "0.3", "NA", "h-indel" + }, + { + // ins hmer indel + "TATCT C ATT", "CA", + "ins", "1", "2", "A", "ATCTC", null, null, "NA", "h-indel" + }, + { + // ins hmer indel + "TATCT C ", "CA", + "ins", "1", null, null, "ATCTC", null, null, "NA", "non-h-indel" } + + + }; return testData; From 4466f851b0810b7ed8cfe76923b74df1663b3d80 Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Fri, 3 May 2024 21:21:05 +0300 Subject: [PATCH 2/4] [BIOIN-1570] Fixed comments --- .../tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java index 93d5783b4c5..41728bf8fca 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java @@ -71,17 +71,17 @@ public Object[][] getTestData() { "ins", "1", "2", "A", "ATCTC", "TTGAC", "0.3", "NA", "h-indel" }, { - // ins hmer indel + // Close to 5' edge "ATCT C ATTGACCAA", "CA", "ins", "1", "2", "A", null, "TTGAC", "0.3", "NA", "h-indel" }, { - // ins hmer indel + // close to 3' edge "TATCT C ATT", "CA", "ins", "1", "2", "A", "ATCTC", null, null, "NA", "h-indel" }, { - // ins hmer indel + // very close to 3' edge "TATCT C ", "CA", "ins", "1", null, null, "ATCTC", null, null, "NA", "non-h-indel" } From 500460e099cedb5a480143804b0aa160c7f20eda Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Wed, 29 May 2024 21:33:11 +0300 Subject: [PATCH 3/4] logger -> OneShotLogger --- .../walkers/annotator/flow/FlowAnnotatorBase.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java index 3997c4cbdc8..b7bb4b6361f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java @@ -34,7 +34,6 @@ * are accumulated as well. */ public abstract class FlowAnnotatorBase implements InfoFieldAnnotation { - private final static Logger logger = LogManager.getLogger(FlowAnnotatorBase.class); protected final OneShotLogger flowMissingOneShotLogger = new OneShotLogger(FlowAnnotatorBase.class); @@ -251,7 +250,7 @@ protected void isHmerIndel(final VariantContext vc, final LocalContext localCont final byte before = getReferenceNucleotide(localContext, vc.getStart() - 1); final byte[] after = getReferenceHmerPlus(localContext, vc.getEnd() + 1, MOTIF_SIZE); if (after.length==0){ - logger.warn("Failed to get hmer from reference, isHmerIndel and RightMotif annotations will not be calculated. " + + flowMissingOneShotLogger.warn("Failed to get hmer from reference, isHmerIndel and RightMotif annotations will not be calculated. " + "Start index: " + vc.getEnd() + 1 + " Reference length: " + localContext.ref.getBases().length); return; } @@ -343,7 +342,7 @@ protected void getLeftMotif(final VariantContext vc, final LocalContext localCon String motif = getRefMotif(localContext, vc.getStart() - MOTIF_SIZE, MOTIF_SIZE); if (motif.length() != MOTIF_SIZE){ - logger.warn("Failed to get motif from reference, getLeftMotif annotation will not be calculated. " + + flowMissingOneShotLogger.warn("Failed to get motif from reference, getLeftMotif annotation will not be calculated. " + "Start index: " + vc.getStart() + " Reference length: " + localContext.ref.getBases().length); return; } @@ -361,7 +360,7 @@ protected void getRightMotif(final VariantContext vc, final LocalContext localCo final int refLength = vc.getReference().length(); String motif = getRefMotif(localContext, vc.getStart() + refLength, MOTIF_SIZE); if (motif.length() != MOTIF_SIZE){ - logger.warn("Failed to get motif from reference, getRightMotif annotation will not be calculated. " + + flowMissingOneShotLogger.warn("Failed to get motif from reference, getRightMotif annotation will not be calculated. " + "Start index: " + vc.getStart() + refLength + " Reference length: " + localContext.ref.getBases().length); return; } @@ -380,7 +379,7 @@ protected void gcContent(final VariantContext vc, final LocalContext localContex final int begin = vc.getStart() - (GC_CONTENT_SIZE / 2); final String seq = getRefMotif(localContext, begin + 1, GC_CONTENT_SIZE); if ( seq.length() != GC_CONTENT_SIZE ) { - logger.warn("gcContent will not be calculated at position " + vc.getContig() + ":" + vc.getStart() + + flowMissingOneShotLogger.warn("gcContent will not be calculated at position " + vc.getContig() + ":" + vc.getStart() + " too close to the edge of the reference"); return; } @@ -457,7 +456,7 @@ private byte[] getReferenceHmerPlus(final LocalContext localContext, final int s try { Utils.validIndex(index, bases.length); } catch (IllegalArgumentException e) { - logger.warn("Failed to get hmer from reference. Start index: " + index + " Reference length: " + bases.length); + flowMissingOneShotLogger.warn("Failed to get hmer from reference. Start index: " + index + " Reference length: " + bases.length); return new byte[0]; } From 9cc86d8d8d9068fb5dd59ba2d9ec47e22b581baf Mon Sep 17 00:00:00 2001 From: Ilya Soifer Date: Fri, 31 May 2024 21:37:55 +0300 Subject: [PATCH 4/4] PR comments --- .../annotator/flow/FlowAnnotatorBase.java | 20 +++++++++++-------- .../annotator/flow/FlowAnnotatorUnitTest.java | 9 ++++++--- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java index b7bb4b6361f..11890925065 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorBase.java @@ -250,8 +250,9 @@ protected void isHmerIndel(final VariantContext vc, final LocalContext localCont final byte before = getReferenceNucleotide(localContext, vc.getStart() - 1); final byte[] after = getReferenceHmerPlus(localContext, vc.getEnd() + 1, MOTIF_SIZE); if (after.length==0){ - flowMissingOneShotLogger.warn("Failed to get hmer from reference, isHmerIndel and RightMotif annotations will not be calculated. " + - "Start index: " + vc.getEnd() + 1 + " Reference length: " + localContext.ref.getBases().length); + flowMissingOneShotLogger.warn("Failed to get hmer from reference, probably because the " + + "variant is very close to the end of the chromosome, isHmerIndel and RightMotif annotations will not be calculated. " + + "Variant position: " + vc.getContig() + ":" + vc.getEnd() + 1); return; } // build two haplotypes. add byte before and after @@ -342,8 +343,9 @@ protected void getLeftMotif(final VariantContext vc, final LocalContext localCon String motif = getRefMotif(localContext, vc.getStart() - MOTIF_SIZE, MOTIF_SIZE); if (motif.length() != MOTIF_SIZE){ - flowMissingOneShotLogger.warn("Failed to get motif from reference, getLeftMotif annotation will not be calculated. " + - "Start index: " + vc.getStart() + " Reference length: " + localContext.ref.getBases().length); + flowMissingOneShotLogger.warn("Failed to get motif from reference, probably because the variant is very close to the " + + "start of the chromosome. LeftMotif annotation will not be calculated. " + + "Variant position: " + vc.getContig() + ":" + vc.getStart()); return; } if ( a.length() != refLength ) { @@ -360,8 +362,9 @@ protected void getRightMotif(final VariantContext vc, final LocalContext localCo final int refLength = vc.getReference().length(); String motif = getRefMotif(localContext, vc.getStart() + refLength, MOTIF_SIZE); if (motif.length() != MOTIF_SIZE){ - flowMissingOneShotLogger.warn("Failed to get motif from reference, getRightMotif annotation will not be calculated. " + - "Start index: " + vc.getStart() + refLength + " Reference length: " + localContext.ref.getBases().length); + flowMissingOneShotLogger.warn("Failed to get motif from reference, probably because " + + "the variant is close to the end of the chromosome. RightMotif annotation will not be calculated. " + + "Variant position: " + vc.getContig() + ":" + vc.getStart()); return; } // fill empty entries (non indel alelles) @@ -380,7 +383,7 @@ protected void gcContent(final VariantContext vc, final LocalContext localContex final String seq = getRefMotif(localContext, begin + 1, GC_CONTENT_SIZE); if ( seq.length() != GC_CONTENT_SIZE ) { flowMissingOneShotLogger.warn("gcContent will not be calculated at position " + vc.getContig() + ":" + vc.getStart() + - " too close to the edge of the reference"); + " because the variant is too close to the edge of the chromosome"); return; } int gcCount = 0; @@ -456,7 +459,8 @@ private byte[] getReferenceHmerPlus(final LocalContext localContext, final int s try { Utils.validIndex(index, bases.length); } catch (IllegalArgumentException e) { - flowMissingOneShotLogger.warn("Failed to get hmer from reference. Start index: " + index + " Reference length: " + bases.length); + flowMissingOneShotLogger.warn("Failed to get hmer from reference, too close to the edge. " + + "Position: " + localContext.ref.getContig() + ":" + index); return new byte[0]; } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java index 41728bf8fca..2f927d690c6 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/annotator/flow/FlowAnnotatorUnitTest.java @@ -70,18 +70,21 @@ public Object[][] getTestData() { "TATCT C ATTGACCAA", "CA", "ins", "1", "2", "A", "ATCTC", "TTGAC", "0.3", "NA", "h-indel" }, + //The following tests test the edge case of variant so close to the edge of the chromosome + //that some of the annotations can't be calculated { - // Close to 5' edge + // Close to 5' edge of the chromosome, expect not to crash on finding the left motif "ATCT C ATTGACCAA", "CA", "ins", "1", "2", "A", null, "TTGAC", "0.3", "NA", "h-indel" }, { - // close to 3' edge + // close to 3' edge of the chromosome, should not find left motif or GC content "TATCT C ATT", "CA", "ins", "1", "2", "A", "ATCTC", null, null, "NA", "h-indel" }, { - // very close to 3' edge + // very close to 3' edge of the chromosome, should not find left motif, GC content or if it is + // an hmer indel "TATCT C ", "CA", "ins", "1", null, null, "ATCTC", null, null, "NA", "non-h-indel" }