Skip to content

Commit

Permalink
prohibit multi-allelic sites
Browse files Browse the repository at this point in the history
  • Loading branch information
tedsharpe committed Apr 28, 2022
1 parent 2d9e9a2 commit 964de7a
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ public class LocusDepthtoBAF extends MultiFeatureWalker<LocusDepth> {
public static final String BAF_SITES_VCF_LONG_NAME = "baf-sites-vcf";
public static final String SAMPLE_NAMES_NAME = "sample-names";
public static final String BAF_EVIDENCE_FILE_NAME = "baf-evidence-output";
public static final String BIALLELIC_ONLY_NAME = "biallelic-only";
public static final String MIN_TOTAL_DEPTH_NAME = "min-total-depth";
public static final String MAX_STD_NAME = "max-std";
public static final String MIN_HET_PROBABILITY = "min-het-probability";
Expand All @@ -99,11 +98,6 @@ public class LocusDepthtoBAF extends MultiFeatureWalker<LocusDepth> {
shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME )
private GATKPath bafOutputFile;

@Argument(
doc = "Process only bi-allelic SNP sites",
fullName = BIALLELIC_ONLY_NAME, optional = true )
private boolean biAllelicOnly = true;

@Argument(
doc = "Maximum standard deviation across bafs at a locus (otherwise locus will be ignored)",
fullName = MAX_STD_NAME, optional = true )
Expand Down Expand Up @@ -150,8 +144,8 @@ public void onTraversalStart() {
}

@Override
public void apply( LocusDepth locusDepth, Object header,
ReadsContext reads, ReferenceContext ref ) {
public void apply( final LocusDepth locusDepth, final Object header,
final ReadsContext reads, final ReferenceContext ref ) {
if ( !sameLocus(locusDepth) ) {
processBuffer();
}
Expand Down Expand Up @@ -179,12 +173,12 @@ public Object onTraversalSuccess() {
if ( totalDepth < minTotalDepth ) {
return BafEvidence.MISSING_VALUE;
}
double expectRefAlt = totalDepth / 2.;
final double expectRefAlt = totalDepth / 2.;
final double altDepth = locusDepth.getDepth(altIndex);
final double refDiff = locusDepth.getDepth(refIndex) - expectRefAlt;
final double altDiff = altDepth - expectRefAlt;
double chiSq = (refDiff * refDiff + altDiff * altDiff) / expectRefAlt;
double fitProb = 1. - chiSqDist.cumulativeProbability(chiSq);
final double chiSq = (refDiff * refDiff + altDiff * altDiff) / expectRefAlt;
final double fitProb = 1. - chiSqDist.cumulativeProbability(chiSq);
if ( fitProb < minHetProbability ) {
return BafEvidence.MISSING_VALUE;
}
Expand Down Expand Up @@ -212,26 +206,18 @@ private void processBuffer() {
if ( refIndex > 3 ) {
throw new UserException("ref call is not [ACGT] in vcf at " + vc.getContig() + ":" + vc.getStart());
}
final int nAlleles = alleles.size();
final int[] altIndices = new int[nAlleles - 1];
for ( int alleleIndex = 1; alleleIndex < nAlleles; ++alleleIndex ) {
final int altIndex = Nucleotide.decode(alleles.get(1).getBases()[0]).ordinal();
if ( altIndex > 3 ) {
throw new UserException("alt call is not [ACGT] in vcf at " + vc.getContig() + ":" + vc.getStart());
}
altIndices[alleleIndex - 1] = altIndex;
final int altIndex = Nucleotide.decode(alleles.get(1).getBases()[0]).ordinal();
if ( altIndex > 3 ) {
throw new UserException("alt call is not [ACGT] in vcf at " + vc.getContig() + ":" + vc.getStart());
}
final List<BafEvidence> beList = new ArrayList<>(sameLocusBuffer.size());
for ( final LocusDepth ld : sameLocusBuffer ) {
for ( final int altIndex : altIndices ) {
double baf = calcBAF(ld, refIndex, altIndex);
if ( baf != BafEvidence.MISSING_VALUE ) {
beList.add(new BafEvidence(ld.getSample(), ld.getContig(), ld.getStart(), baf));
break;
}
final double baf = calcBAF(ld, refIndex, altIndex);
if ( baf != BafEvidence.MISSING_VALUE ) {
beList.add(new BafEvidence(ld.getSample(), ld.getContig(), ld.getStart(), baf));
}
}
int nBafs = beList.size();
final int nBafs = beList.size();
if ( nBafs == 1 ) {
writer.write(new BafEvidence(beList.get(0), .5));
} else {
Expand All @@ -246,8 +232,8 @@ private void processBuffer() {
vals[idx] = beList.get(idx).getValue();
}
Arrays.sort(vals);
int midIdx = nBafs / 2;
double median =
final int midIdx = nBafs / 2;
final double median =
(nBafs & 1) != 0 ? vals[midIdx] : (vals[midIdx] + vals[midIdx - 1])/2.;
final double adj = .5 - median;
for ( final BafEvidence bafEvidence : beList ) {
Expand All @@ -261,7 +247,7 @@ private void processBuffer() {
private VariantContext nextSite() {
while ( bafSitesIterator.hasNext() ) {
final VariantContext vc = bafSitesIterator.next();
if ( vc.isSNP() && (!biAllelicOnly || vc.isBiallelic()) ) {
if ( vc.isSNP() && vc.isBiallelic() ) {
return vc;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ public class CollectSVEvidence extends ReadWalker {
public static final String ALLELE_COUNT_OUTPUT_ARGUMENT_LONG_NAME = "allele-count-file";
public static final String ALLELE_COUNT_INPUT_ARGUMENT_SHORT_NAME = "F";
public static final String ALLELE_COUNT_INPUT_ARGUMENT_LONG_NAME = "allele-count-vcf";
public static final String BIALLELIC_ONLY_NAME = "biallelic-only";
public static final String SAMPLE_NAME_ARGUMENT_LONG_NAME = "sample-name";
public static final String COMPRESSION_LEVEL_ARGUMENT_LONG_NAME = "compression-level";

Expand All @@ -123,11 +122,6 @@ public class CollectSVEvidence extends ReadWalker {
public GATKPath alleleCountInputFilename;


@Argument(
doc = "Process only bi-allelic SNP sites for locus depth",
fullName = BIALLELIC_ONLY_NAME, optional = true )
private boolean biAllelicOnly = true;

@Argument(fullName = "allele-count-min-mapq",
doc = "minimum mapping quality for read to be allele-counted",
optional = true)
Expand Down Expand Up @@ -172,7 +166,7 @@ public void onTraversalStart() {
if ( alleleCountInputFilename != null && alleleCountOutputFilename != null ) {
alleleCounter = new AlleleCounter(sequenceDictionary, sampleName, compressionLevel,
alleleCountInputFilename, alleleCountOutputFilename,
biAllelicOnly, minMapQ, minQ);
minMapQ, minQ);
} else if ( alleleCountInputFilename != null ) {
throw new UserException("Having specified an allele-count-vcf input, " +
"you must also supply an allele-count-file for output.");
Expand Down Expand Up @@ -330,7 +324,7 @@ private void flushSplitCounts(final Predicate<SplitPos> flushablePosition,
final FeatureSink<SplitReadEvidence> srWriter) {

while (splitCounts.size() > 0 && flushablePosition.test(splitCounts.peek())) {
SplitPos pos = splitCounts.poll();
final SplitPos pos = splitCounts.poll();
int countAtPos = 1;
while (splitCounts.size() > 0 && splitCounts.peek().equals(pos)) {
countAtPos++;
Expand All @@ -341,7 +335,7 @@ private void flushSplitCounts(final Predicate<SplitPos> flushablePosition,
}
}

private SplitPos getSplitPosition(GATKRead read) {
private SplitPos getSplitPosition( final GATKRead read ) {
if (read.getCigar().getFirstCigarElement().getOperator() == CigarOperator.M) {
final int matchLength = read.getCigar().getCigarElements().stream().filter(e -> e.getOperator().consumesReferenceBases()).mapToInt(CigarElement::getLength).sum();
return new SplitPos(read.getStart() + matchLength, POSITION.RIGHT);
Expand All @@ -352,7 +346,7 @@ private SplitPos getSplitPosition(GATKRead read) {
return new SplitPos(-1, POSITION.MIDDLE);
}

private boolean isSoftClipped(final GATKRead read) {
private boolean isSoftClipped( final GATKRead read ) {
final CigarOperator firstOperator = read.getCigar().getFirstCigarElement().getOperator();
final CigarOperator lastOperator = read.getCigar().getLastCigarElement().getOperator();
return (firstOperator == CigarOperator.SOFT_CLIP && lastOperator != CigarOperator.SOFT_CLIP) ||
Expand Down Expand Up @@ -564,7 +558,6 @@ final static class AlleleCounter {
private final SAMSequenceDictionary dict;
private final String sampleName;
private final FeatureSink<LocusDepth> writer;
private final boolean biAllelicOnly;
private final int minMapQ;
private final int minQ;
private final Iterator<VariantContext> snpSourceItr;
Expand All @@ -575,7 +568,6 @@ public AlleleCounter( final SAMSequenceDictionary dict,
final int compressionLevel,
final GATKPath inputPath,
final GATKPath outputPath,
final boolean biAllelicOnly,
final int minMapQ,
final int minQ ) {
this.dict = dict;
Expand All @@ -594,7 +586,6 @@ public AlleleCounter( final SAMSequenceDictionary dict,
}
this.writer = codec.makeSink(outputPath, dict, sampleNames, compressionLevel);
}
this.biAllelicOnly = biAllelicOnly;
this.minMapQ = minMapQ;
this.minQ = minQ;
final FeatureDataSource<VariantContext> snpSource =
Expand Down Expand Up @@ -704,7 +695,7 @@ private boolean readNextLocus() {
return false;
}
VariantContext snp = snpSourceItr.next();
while ( !snp.isSNP() || (biAllelicOnly && !snp.isBiallelic()) ) {
while ( !snp.isSNP() || !snp.isBiallelic() ) {
if ( !snpSourceItr.hasNext() ) {
return false;
}
Expand Down

0 comments on commit 964de7a

Please sign in to comment.