From 964de7aa41398c89a946911eee8c233ff2cc9ea9 Mon Sep 17 00:00:00 2001 From: Ted Sharpe Date: Thu, 28 Apr 2022 15:01:01 -0400 Subject: [PATCH] prohibit multi-allelic sites --- .../hellbender/tools/sv/LocusDepthtoBAF.java | 44 +++++++------------ .../tools/walkers/sv/CollectSVEvidence.java | 19 +++----- 2 files changed, 20 insertions(+), 43 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java index 23b4305a8db..3a740c40cd6 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java @@ -72,7 +72,6 @@ public class LocusDepthtoBAF extends MultiFeatureWalker { public static final String BAF_SITES_VCF_LONG_NAME = "baf-sites-vcf"; public static final String SAMPLE_NAMES_NAME = "sample-names"; public static final String BAF_EVIDENCE_FILE_NAME = "baf-evidence-output"; - public static final String BIALLELIC_ONLY_NAME = "biallelic-only"; public static final String MIN_TOTAL_DEPTH_NAME = "min-total-depth"; public static final String MAX_STD_NAME = "max-std"; public static final String MIN_HET_PROBABILITY = "min-het-probability"; @@ -99,11 +98,6 @@ public class LocusDepthtoBAF extends MultiFeatureWalker { shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME ) private GATKPath bafOutputFile; - @Argument( - doc = "Process only bi-allelic SNP sites", - fullName = BIALLELIC_ONLY_NAME, optional = true ) - private boolean biAllelicOnly = true; - @Argument( doc = "Maximum standard deviation across bafs at a locus (otherwise locus will be ignored)", fullName = MAX_STD_NAME, optional = true ) @@ -150,8 +144,8 @@ public void onTraversalStart() { } @Override - public void apply( LocusDepth locusDepth, Object header, - ReadsContext reads, ReferenceContext ref ) { + public void apply( final LocusDepth locusDepth, final Object header, + final ReadsContext reads, final ReferenceContext ref ) { if ( !sameLocus(locusDepth) ) { processBuffer(); } @@ -179,12 +173,12 @@ public Object onTraversalSuccess() { if ( totalDepth < minTotalDepth ) { return BafEvidence.MISSING_VALUE; } - double expectRefAlt = totalDepth / 2.; + final double expectRefAlt = totalDepth / 2.; final double altDepth = locusDepth.getDepth(altIndex); final double refDiff = locusDepth.getDepth(refIndex) - expectRefAlt; final double altDiff = altDepth - expectRefAlt; - double chiSq = (refDiff * refDiff + altDiff * altDiff) / expectRefAlt; - double fitProb = 1. - chiSqDist.cumulativeProbability(chiSq); + final double chiSq = (refDiff * refDiff + altDiff * altDiff) / expectRefAlt; + final double fitProb = 1. - chiSqDist.cumulativeProbability(chiSq); if ( fitProb < minHetProbability ) { return BafEvidence.MISSING_VALUE; } @@ -212,26 +206,18 @@ private void processBuffer() { if ( refIndex > 3 ) { throw new UserException("ref call is not [ACGT] in vcf at " + vc.getContig() + ":" + vc.getStart()); } - final int nAlleles = alleles.size(); - final int[] altIndices = new int[nAlleles - 1]; - for ( int alleleIndex = 1; alleleIndex < nAlleles; ++alleleIndex ) { - final int altIndex = Nucleotide.decode(alleles.get(1).getBases()[0]).ordinal(); - if ( altIndex > 3 ) { - throw new UserException("alt call is not [ACGT] in vcf at " + vc.getContig() + ":" + vc.getStart()); - } - altIndices[alleleIndex - 1] = altIndex; + final int altIndex = Nucleotide.decode(alleles.get(1).getBases()[0]).ordinal(); + if ( altIndex > 3 ) { + throw new UserException("alt call is not [ACGT] in vcf at " + vc.getContig() + ":" + vc.getStart()); } final List beList = new ArrayList<>(sameLocusBuffer.size()); for ( final LocusDepth ld : sameLocusBuffer ) { - for ( final int altIndex : altIndices ) { - double baf = calcBAF(ld, refIndex, altIndex); - if ( baf != BafEvidence.MISSING_VALUE ) { - beList.add(new BafEvidence(ld.getSample(), ld.getContig(), ld.getStart(), baf)); - break; - } + final double baf = calcBAF(ld, refIndex, altIndex); + if ( baf != BafEvidence.MISSING_VALUE ) { + beList.add(new BafEvidence(ld.getSample(), ld.getContig(), ld.getStart(), baf)); } } - int nBafs = beList.size(); + final int nBafs = beList.size(); if ( nBafs == 1 ) { writer.write(new BafEvidence(beList.get(0), .5)); } else { @@ -246,8 +232,8 @@ private void processBuffer() { vals[idx] = beList.get(idx).getValue(); } Arrays.sort(vals); - int midIdx = nBafs / 2; - double median = + final int midIdx = nBafs / 2; + final double median = (nBafs & 1) != 0 ? vals[midIdx] : (vals[midIdx] + vals[midIdx - 1])/2.; final double adj = .5 - median; for ( final BafEvidence bafEvidence : beList ) { @@ -261,7 +247,7 @@ private void processBuffer() { private VariantContext nextSite() { while ( bafSitesIterator.hasNext() ) { final VariantContext vc = bafSitesIterator.next(); - if ( vc.isSNP() && (!biAllelicOnly || vc.isBiallelic()) ) { + if ( vc.isSNP() && vc.isBiallelic() ) { return vc; } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java index c2ab98ae64a..096c4554edb 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java @@ -96,7 +96,6 @@ public class CollectSVEvidence extends ReadWalker { public static final String ALLELE_COUNT_OUTPUT_ARGUMENT_LONG_NAME = "allele-count-file"; public static final String ALLELE_COUNT_INPUT_ARGUMENT_SHORT_NAME = "F"; public static final String ALLELE_COUNT_INPUT_ARGUMENT_LONG_NAME = "allele-count-vcf"; - public static final String BIALLELIC_ONLY_NAME = "biallelic-only"; public static final String SAMPLE_NAME_ARGUMENT_LONG_NAME = "sample-name"; public static final String COMPRESSION_LEVEL_ARGUMENT_LONG_NAME = "compression-level"; @@ -123,11 +122,6 @@ public class CollectSVEvidence extends ReadWalker { public GATKPath alleleCountInputFilename; - @Argument( - doc = "Process only bi-allelic SNP sites for locus depth", - fullName = BIALLELIC_ONLY_NAME, optional = true ) - private boolean biAllelicOnly = true; - @Argument(fullName = "allele-count-min-mapq", doc = "minimum mapping quality for read to be allele-counted", optional = true) @@ -172,7 +166,7 @@ public void onTraversalStart() { if ( alleleCountInputFilename != null && alleleCountOutputFilename != null ) { alleleCounter = new AlleleCounter(sequenceDictionary, sampleName, compressionLevel, alleleCountInputFilename, alleleCountOutputFilename, - biAllelicOnly, minMapQ, minQ); + minMapQ, minQ); } else if ( alleleCountInputFilename != null ) { throw new UserException("Having specified an allele-count-vcf input, " + "you must also supply an allele-count-file for output."); @@ -330,7 +324,7 @@ private void flushSplitCounts(final Predicate flushablePosition, final FeatureSink srWriter) { while (splitCounts.size() > 0 && flushablePosition.test(splitCounts.peek())) { - SplitPos pos = splitCounts.poll(); + final SplitPos pos = splitCounts.poll(); int countAtPos = 1; while (splitCounts.size() > 0 && splitCounts.peek().equals(pos)) { countAtPos++; @@ -341,7 +335,7 @@ private void flushSplitCounts(final Predicate flushablePosition, } } - private SplitPos getSplitPosition(GATKRead read) { + private SplitPos getSplitPosition( final GATKRead read ) { if (read.getCigar().getFirstCigarElement().getOperator() == CigarOperator.M) { final int matchLength = read.getCigar().getCigarElements().stream().filter(e -> e.getOperator().consumesReferenceBases()).mapToInt(CigarElement::getLength).sum(); return new SplitPos(read.getStart() + matchLength, POSITION.RIGHT); @@ -352,7 +346,7 @@ private SplitPos getSplitPosition(GATKRead read) { return new SplitPos(-1, POSITION.MIDDLE); } - private boolean isSoftClipped(final GATKRead read) { + private boolean isSoftClipped( final GATKRead read ) { final CigarOperator firstOperator = read.getCigar().getFirstCigarElement().getOperator(); final CigarOperator lastOperator = read.getCigar().getLastCigarElement().getOperator(); return (firstOperator == CigarOperator.SOFT_CLIP && lastOperator != CigarOperator.SOFT_CLIP) || @@ -564,7 +558,6 @@ final static class AlleleCounter { private final SAMSequenceDictionary dict; private final String sampleName; private final FeatureSink writer; - private final boolean biAllelicOnly; private final int minMapQ; private final int minQ; private final Iterator snpSourceItr; @@ -575,7 +568,6 @@ public AlleleCounter( final SAMSequenceDictionary dict, final int compressionLevel, final GATKPath inputPath, final GATKPath outputPath, - final boolean biAllelicOnly, final int minMapQ, final int minQ ) { this.dict = dict; @@ -594,7 +586,6 @@ public AlleleCounter( final SAMSequenceDictionary dict, } this.writer = codec.makeSink(outputPath, dict, sampleNames, compressionLevel); } - this.biAllelicOnly = biAllelicOnly; this.minMapQ = minMapQ; this.minQ = minQ; final FeatureDataSource snpSource = @@ -704,7 +695,7 @@ private boolean readNextLocus() { return false; } VariantContext snp = snpSourceItr.next(); - while ( !snp.isSNP() || (biAllelicOnly && !snp.isBiallelic()) ) { + while ( !snp.isSNP() || !snp.isBiallelic() ) { if ( !snpSourceItr.hasNext() ) { return false; }