From 9ae1fd867f652ea3c2c3154ab5be527e743799db Mon Sep 17 00:00:00 2001 From: tedsharpe Date: Thu, 28 Apr 2022 16:25:54 -0400 Subject: [PATCH] LocusDepthtoBAF tool (#7776) * new LocusDepthtoBAF tool * tests * address reviewer comments * prohibit multi-allelic sites --- .../hellbender/tools/sv/BafEvidence.java | 12 + .../hellbender/tools/sv/LocusDepth.java | 44 +-- .../hellbender/tools/sv/LocusDepthtoBAF.java | 256 ++++++++++++++++++ .../tools/walkers/sv/CollectSVEvidence.java | 12 +- .../utils/codecs/LocusDepthBCICodec.java | 13 +- .../utils/codecs/LocusDepthCodec.java | 13 +- .../tools/sv/BafEvidenceUnitTest.java | 11 + .../sv/LocusDepthtoBAFIntegrationTest.java | 78 ++++++ .../tools/sv/LocusDepthtoBAFUnitTest.java | 24 ++ .../sv/PrintSVEvidenceIntegrationTest.java | 8 +- .../walkers/sv/LocusDepthtoBAF/test1.baf.txt | 0 .../walkers/sv/LocusDepthtoBAF/test1.ld.txt | 2 + .../walkers/sv/LocusDepthtoBAF/test1.vcf | 4 + .../walkers/sv/LocusDepthtoBAF/test2.baf.txt | 6 + .../walkers/sv/LocusDepthtoBAF/test2.ld.txt | 6 + .../walkers/sv/LocusDepthtoBAF/test2.vcf | 6 + .../walkers/sv/LocusDepthtoBAF/test2a.vcf | 6 + .../walkers/sv/LocusDepthtoBAF/test3.baf.txt | 2 + .../walkers/sv/LocusDepthtoBAF/test3.ld.txt | 2 + .../walkers/sv/LocusDepthtoBAF/test3.vcf | 4 + 20 files changed, 462 insertions(+), 47 deletions(-) create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java create mode 100644 src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFIntegrationTest.java create mode 100644 src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.baf.txt create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.ld.txt create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.baf.txt create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.ld.txt create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2a.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.baf.txt create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.ld.txt create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.vcf diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java index 730c1dfda6d..050ec1b5003 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/BafEvidence.java @@ -13,17 +13,29 @@ public final class BafEvidence implements SVFeature { private final double value; public final static String BCI_VERSION = "1.0"; + public final static double MISSING_VALUE = -1.; public BafEvidence( final String sample, final String contig, final int position, final double value ) { Utils.nonNull(sample); Utils.nonNull(contig); + Utils.validateArg(position > 0, "starting position must be positive"); + Utils.validateArg(value >= 0. || value == MISSING_VALUE, + "value must be non-negative or missing"); this.sample = sample; this.contig = contig; this.position = position; this.value = value; } + // value-altering constructor + public BafEvidence( final BafEvidence that, final double value ) { + this.sample = that.sample; + this.contig = that.contig; + this.position = that.position; + this.value = value; + } + public String getSample() { return sample; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepth.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepth.java index 869878741a3..d3ac47ae9f1 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepth.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepth.java @@ -2,6 +2,7 @@ import com.google.common.annotations.VisibleForTesting; import htsjdk.samtools.util.Locatable; +import org.broadinstitute.hellbender.utils.Utils; import java.util.*; @@ -11,25 +12,26 @@ public final class LocusDepth implements SVFeature { private final String contig; private final int position; private final String sample; - private final byte refCall; // index into nucleotideValues - private final int[] depths; + private final int[] depths; // four slots for each call, ACGT + public final static String BCI_VERSION = "1.0"; - public LocusDepth( final Locatable loc, final String sample, final byte refCall ) { - this.contig = loc.getContig(); - this.position = loc.getStart(); - this.sample = sample; - this.refCall = refCall; - this.depths = new int[4]; + public LocusDepth( final Locatable loc, final String sample ) { + this(loc.getContig(), loc.getStart(), sample, 0, 0, 0, 0); } - public LocusDepth( final String contig, final int position, - final String sample, final byte refCall, + public LocusDepth( final String contig, final int position, final String sample, final int aDepth, final int cDepth, final int gDepth, final int tDepth ) { + Utils.nonNull(contig, "contig may not be null"); + Utils.validateArg(position > 0, "starting position must be positive"); + Utils.nonNull(sample, "sample must not be null"); + Utils.validateArg(aDepth >= 0, "depth must be non-negative"); + Utils.validateArg(cDepth >= 0, "depth must be non-negative"); + Utils.validateArg(gDepth >= 0, "depth must be non-negative"); + Utils.validateArg(tDepth >= 0, "depth must be non-negative"); this.contig = contig; this.position = position; this.sample = sample; - this.refCall = refCall; this.depths = new int[4]; depths[0] = aDepth; depths[1] = cDepth; @@ -57,13 +59,11 @@ public int getStart() { } public String getSample() { return sample; } - public char getRefCall() { - return (char)refCall; - } - public int getADepth() { return depths[0]; } - public int getCDepth() { return depths[1]; } - public int getGDepth() { return depths[2]; } - public int getTDepth() { return depths[3]; } + + public int getTotalDepth() { return depths[0] + depths[1] + depths[2] + depths[3]; } + + // idx is index into "ACGT" + public int getDepth( final int idx ) { return depths[idx]; } @Override public LocusDepth extractSamples( final Set sampleNames, final Object header ) { @@ -77,7 +77,7 @@ public boolean equals( final Object obj ) { } public boolean equals( final LocusDepth that ) { - return this.position == that.position && this.refCall == that.refCall && + return this.position == that.position && this.depths[0] == that.depths[0] && this.depths[1] == that.depths[1] && this.depths[2] == that.depths[2] && this.depths[3] == that.depths[3] && Objects.equals(this.contig, that.contig) && @@ -86,12 +86,12 @@ public boolean equals( final LocusDepth that ) { @Override public int hashCode() { - return Objects.hash(contig, position, sample, refCall, Arrays.hashCode(depths)); + return Objects.hash(contig, position, sample, Arrays.hashCode(depths)); } @Override public String toString() { - return contig + "\t" + position + "\t" + sample + "\t" + (char)refCall + "\t" + - depths[0] + "\t" + depths[1] + "\t" + depths[2] + "\t" + depths[3]; + return sample + "@" + contig + ":" + position + + "[" + depths[0] + "," + depths[1] + "," + depths[2] + "," + depths[3] + "]"; } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java new file mode 100644 index 00000000000..3a740c40cd6 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAF.java @@ -0,0 +1,256 @@ +package org.broadinstitute.hellbender.tools.sv; + +import com.google.common.annotations.VisibleForTesting; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.util.Locatable; +import htsjdk.tribble.Feature; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import org.apache.commons.math3.distribution.ChiSquaredDistribution; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.barclay.argparser.ExperimentalFeature; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup; +import org.broadinstitute.hellbender.engine.*; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.utils.MathUtils.RunningAverage; +import org.broadinstitute.hellbender.utils.Nucleotide; +import org.broadinstitute.hellbender.utils.codecs.FeatureOutputCodec; +import org.broadinstitute.hellbender.utils.codecs.FeatureOutputCodecFinder; +import org.broadinstitute.hellbender.utils.codecs.FeatureSink; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +/** + *

Merges locus-sorted LocusDepth evidence files, and calculates the bi-allelic frequency (baf) + * for each sample and site, and writes these values as a BafEvidence output file.

+ *

Samples at sites that have too few depth counts (controlled by --min-total-depth), and samples + * failing a Pearson's chi square test for goodness of fit to a bi-allelic model are excluded. (I.e., + * only samples that are judged to be heterozygous at a site are included.)

+ *

Across all evaluable samples, sites where the samples have bafs with too much variance + * (controlled by --max-std) are excluded. The median baf value at each site is adjusted to be + * exactly 0.5.

+ * + *

Inputs

+ * + * + * + *

Output

+ * + * + * + *

Usage example

+ * + *
+ *     gatk LocusDepthtoBAF \
+ *       -F file1.ld.txt.gz [-F file2.ld.txt.gz ...] \
+ *       -O merged.baf.bci
+ * 
+ * + * @author Ted Sharpe <tsharpe@broadinstitute.org> + */ +@CommandLineProgramProperties( + summary = "Convert LocusDepth to BafEvidence", + oneLineSummary = "Convert LocusDepth to BafEvidence", + programGroup = StructuralVariantDiscoveryProgramGroup.class +) +@ExperimentalFeature +public class LocusDepthtoBAF extends MultiFeatureWalker { + public static final String LOCUS_DEPTH_FILE_NAME = "locus-depth"; + public static final String BAF_SITES_VCF_LONG_NAME = "baf-sites-vcf"; + public static final String SAMPLE_NAMES_NAME = "sample-names"; + public static final String BAF_EVIDENCE_FILE_NAME = "baf-evidence-output"; + public static final String MIN_TOTAL_DEPTH_NAME = "min-total-depth"; + public static final String MAX_STD_NAME = "max-std"; + public static final String MIN_HET_PROBABILITY = "min-het-probability"; + public static final String COMPRESSION_LEVEL_NAME = "compression-level"; + public static ChiSquaredDistribution chiSqDist = new ChiSquaredDistribution(null, 1.); + + @Argument( + doc = "Locus depth files to process", + fullName = LOCUS_DEPTH_FILE_NAME, + shortName = StandardArgumentDefinitions.FEATURE_SHORT_NAME ) + private List> locusDepthFiles; + + @Argument(fullName = BAF_SITES_VCF_LONG_NAME, + doc = "Input VCF of SNPs marking loci for allele counts") + public GATKPath bafSitesFile; + + @Argument(fullName = SAMPLE_NAMES_NAME, + doc = "Sample names. Necessary only when writing a *.baf.bci output file.", optional = true) + private List sampleNames; + + @Argument( + doc = "BAF output file", + fullName = BAF_EVIDENCE_FILE_NAME, + shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME ) + private GATKPath bafOutputFile; + + @Argument( + doc = "Maximum standard deviation across bafs at a locus (otherwise locus will be ignored)", + fullName = MAX_STD_NAME, optional = true ) + private double maxStdDev = .2; + + @Argument( + doc = "Minimum total depth at a locus (otherwise locus will be ignored)", + fullName = MIN_TOTAL_DEPTH_NAME, optional = true ) + private int minTotalDepth = 10; + + @Argument( + doc = "Minimum probability of site being biallelic and heterozygous", + fullName = MIN_HET_PROBABILITY, optional = true ) + private double minHetProbability = .5; + + @Argument( + doc = "Output compression level", + fullName = COMPRESSION_LEVEL_NAME, + minValue = 0, maxValue = 9, optional = true ) + private int compressionLevel = 4; + + private FeatureDataSource bafSitesSource; + private Iterator bafSitesIterator; + private FeatureSink writer; + private final List sameLocusBuffer = new ArrayList<>(); + + @Override + @SuppressWarnings("unchecked") + public void onTraversalStart() { + super.onTraversalStart(); + bafSitesSource = new FeatureDataSource<>(bafSitesFile.toPath().toString()); + bafSitesIterator = bafSitesSource.iterator(); + final SAMSequenceDictionary dict = getDictionary(); + dict.assertSameDictionary(bafSitesSource.getSequenceDictionary()); + final FeatureOutputCodec> codec = + FeatureOutputCodecFinder.find(bafOutputFile); + if ( !codec.getFeatureType().isAssignableFrom(BafEvidence.class) ) { + throw new UserException("We're intending to write BafEvidence, but the feature type " + + "associated with the output file expects features of type " + + codec.getFeatureType().getSimpleName()); + } + writer = (FeatureSink)codec.makeSink(bafOutputFile, dict, sampleNames, + compressionLevel); + } + + @Override + public void apply( final LocusDepth locusDepth, final Object header, + final ReadsContext reads, final ReferenceContext ref ) { + if ( !sameLocus(locusDepth) ) { + processBuffer(); + } + sameLocusBuffer.add(locusDepth); + } + + @Override + public Object onTraversalSuccess() { + super.onTraversalSuccess(); + if ( !sameLocusBuffer.isEmpty() ) { + processBuffer(); + } + bafSitesSource.close(); + writer.close(); + return null; + } + + /** Performs a Pearson's chi square test for goodness of fit to a biallelic model to determine + * heterozygosity. If this fails, BafEvidence.MISSING_VALUE is returned. If the test succeeds, + * the depth of the alt call as a fraction of the total depth is returned. + */ + @VisibleForTesting double calcBAF( final LocusDepth locusDepth, + final int refIndex, final int altIndex ) { + final int totalDepth = locusDepth.getTotalDepth(); + if ( totalDepth < minTotalDepth ) { + return BafEvidence.MISSING_VALUE; + } + final double expectRefAlt = totalDepth / 2.; + final double altDepth = locusDepth.getDepth(altIndex); + final double refDiff = locusDepth.getDepth(refIndex) - expectRefAlt; + final double altDiff = altDepth - expectRefAlt; + final double chiSq = (refDiff * refDiff + altDiff * altDiff) / expectRefAlt; + final double fitProb = 1. - chiSqDist.cumulativeProbability(chiSq); + if ( fitProb < minHetProbability ) { + return BafEvidence.MISSING_VALUE; + } + return altDepth / totalDepth; + } + + private boolean sameLocus( final Locatable locus ) { + if ( sameLocusBuffer.isEmpty() ) { + return true; + } + final LocusDepth curLocus = sameLocusBuffer.get(0); + return curLocus.getContig().equals(locus.getContig()) && + curLocus.getStart() == locus.getStart(); + } + + private void processBuffer() { + final VariantContext vc = nextSite(); + if ( !sameLocus(vc) ) { + final Locatable loc = sameLocusBuffer.get(0); + throw new UserException("expecting locus " + loc.getContig() + ":" + loc.getStart() + + ", but found locus " + vc.getContig() + ":" + vc.getStart() + " in baf sites vcf"); + } + final List alleles = vc.getAlleles(); + final int refIndex = Nucleotide.decode(alleles.get(0).getBases()[0]).ordinal(); + if ( refIndex > 3 ) { + throw new UserException("ref call is not [ACGT] in vcf at " + vc.getContig() + ":" + vc.getStart()); + } + final int altIndex = Nucleotide.decode(alleles.get(1).getBases()[0]).ordinal(); + if ( altIndex > 3 ) { + throw new UserException("alt call is not [ACGT] in vcf at " + vc.getContig() + ":" + vc.getStart()); + } + final List beList = new ArrayList<>(sameLocusBuffer.size()); + for ( final LocusDepth ld : sameLocusBuffer ) { + final double baf = calcBAF(ld, refIndex, altIndex); + if ( baf != BafEvidence.MISSING_VALUE ) { + beList.add(new BafEvidence(ld.getSample(), ld.getContig(), ld.getStart(), baf)); + } + } + final int nBafs = beList.size(); + if ( nBafs == 1 ) { + writer.write(new BafEvidence(beList.get(0), .5)); + } else { + final RunningAverage ra = new RunningAverage(); + for ( final BafEvidence bafEvidence : beList ) { + ra.add(bafEvidence.getValue()); + } + final double stddev = ra.stddev(); + if ( stddev <= maxStdDev ) { + final double[] vals = new double[nBafs]; + for ( int idx = 0; idx != nBafs; ++idx ) { + vals[idx] = beList.get(idx).getValue(); + } + Arrays.sort(vals); + final int midIdx = nBafs / 2; + final double median = + (nBafs & 1) != 0 ? vals[midIdx] : (vals[midIdx] + vals[midIdx - 1])/2.; + final double adj = .5 - median; + for ( final BafEvidence bafEvidence : beList ) { + writer.write(new BafEvidence(bafEvidence, bafEvidence.getValue()+adj)); + } + } + } + sameLocusBuffer.clear(); + } + + private VariantContext nextSite() { + while ( bafSitesIterator.hasNext() ) { + final VariantContext vc = bafSitesIterator.next(); + if ( vc.isSNP() && vc.isBiallelic() ) { + return vc; + } + } + throw new UserException("baf sites vcf exhausted before locus depth data"); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java index 1570a9f196e..096c4554edb 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/CollectSVEvidence.java @@ -121,6 +121,7 @@ public class CollectSVEvidence extends ReadWalker { optional = true) public GATKPath alleleCountInputFilename; + @Argument(fullName = "allele-count-min-mapq", doc = "minimum mapping quality for read to be allele-counted", optional = true) @@ -323,7 +324,7 @@ private void flushSplitCounts(final Predicate flushablePosition, final FeatureSink srWriter) { while (splitCounts.size() > 0 && flushablePosition.test(splitCounts.peek())) { - SplitPos pos = splitCounts.poll(); + final SplitPos pos = splitCounts.poll(); int countAtPos = 1; while (splitCounts.size() > 0 && splitCounts.peek().equals(pos)) { countAtPos++; @@ -334,7 +335,7 @@ private void flushSplitCounts(final Predicate flushablePosition, } } - private SplitPos getSplitPosition(GATKRead read) { + private SplitPos getSplitPosition( final GATKRead read ) { if (read.getCigar().getFirstCigarElement().getOperator() == CigarOperator.M) { final int matchLength = read.getCigar().getCigarElements().stream().filter(e -> e.getOperator().consumesReferenceBases()).mapToInt(CigarElement::getLength).sum(); return new SplitPos(read.getStart() + matchLength, POSITION.RIGHT); @@ -345,7 +346,7 @@ private SplitPos getSplitPosition(GATKRead read) { return new SplitPos(-1, POSITION.MIDDLE); } - private boolean isSoftClipped(final GATKRead read) { + private boolean isSoftClipped( final GATKRead read ) { final CigarOperator firstOperator = read.getCigar().getFirstCigarElement().getOperator(); final CigarOperator lastOperator = read.getCigar().getLastCigarElement().getOperator(); return (firstOperator == CigarOperator.SOFT_CLIP && lastOperator != CigarOperator.SOFT_CLIP) || @@ -694,14 +695,13 @@ private boolean readNextLocus() { return false; } VariantContext snp = snpSourceItr.next(); - while ( !snp.isSNP() ) { + while ( !snp.isSNP() || !snp.isBiallelic() ) { if ( !snpSourceItr.hasNext() ) { return false; } snp = snpSourceItr.next(); } - final byte[] refSeq = snp.getReference().getBases(); - final LocusDepth locusDepth = new LocusDepth(snp, sampleName, refSeq[0]); + final LocusDepth locusDepth = new LocusDepth(snp, sampleName); locusDepthQueue.add(locusDepth); return true; } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthBCICodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthBCICodec.java index 18c980e7d66..4d69ce73708 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthBCICodec.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthBCICodec.java @@ -30,9 +30,7 @@ public LocusDepth decode( final Reader reader ) throws IOException { } final DataInputStream dis = reader.getStream(); return new LocusDepth(reader.getDictionary().getSequence(dis.readInt()).getSequenceName(), - dis.readInt(), - reader.getSampleNames().get(dis.readInt()), - dis.readByte(), + dis.readInt(), reader.getSampleNames().get(dis.readInt()), dis.readInt(), dis.readInt(), dis.readInt(), dis.readInt()); } @@ -68,11 +66,10 @@ public void encode( final LocusDepth locusDepth, final Writer writer dos.writeInt(writer.getContigIndex(locusDepth.getContig())); dos.writeInt(locusDepth.getStart()); dos.writeInt(writer.getSampleIndex(locusDepth.getSample())); - dos.writeByte(locusDepth.getRefCall()); - dos.writeInt(locusDepth.getADepth()); - dos.writeInt(locusDepth.getCDepth()); - dos.writeInt(locusDepth.getGDepth()); - dos.writeInt(locusDepth.getTDepth()); + dos.writeInt(locusDepth.getDepth(0)); + dos.writeInt(locusDepth.getDepth(1)); + dos.writeInt(locusDepth.getDepth(2)); + dos.writeInt(locusDepth.getDepth(3)); } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java index e1b77d34975..87ac356d48e 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/codecs/LocusDepthCodec.java @@ -30,17 +30,16 @@ public LocusDepthCodec() { @Override public LocusDepth decode( final String line ) { final List tokens = splitter.splitToList(line); - if ( tokens.size() != 8 ) { + if ( tokens.size() != 7 ) { throw new IllegalArgumentException("Invalid number of columns: " + tokens.size()); } return new LocusDepth(tokens.get(0), Integer.parseUnsignedInt(tokens.get(1)) + 1, tokens.get(2), - (byte)tokens.get(3).charAt(0), + Integer.parseUnsignedInt(tokens.get(3)), Integer.parseUnsignedInt(tokens.get(4)), Integer.parseUnsignedInt(tokens.get(5)), - Integer.parseUnsignedInt(tokens.get(6)), - Integer.parseUnsignedInt(tokens.get(7))); + Integer.parseUnsignedInt(tokens.get(6))); } @Override public Object readActualHeader( LineIterator reader ) { return null; } @@ -86,8 +85,8 @@ public FeatureSink makeSortMerger( final GATKPath path, public static String encode( final LocusDepth locusDepth ) { return locusDepth.getContig() + "\t" + (locusDepth.getStart() - 1) + "\t" + - locusDepth.getSample() + "\t" + locusDepth.getRefCall() + "\t" + - locusDepth.getADepth() + "\t" + locusDepth.getCDepth() + "\t" + - locusDepth.getGDepth() + "\t" + locusDepth.getTDepth(); + locusDepth.getSample() + "\t" + + locusDepth.getDepth(0) + "\t" + locusDepth.getDepth(1) + "\t" + + locusDepth.getDepth(2) + "\t" + locusDepth.getDepth(3); } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/BafEvidenceUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/BafEvidenceUnitTest.java index 945b2adfd9c..0523f51ef64 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/BafEvidenceUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/BafEvidenceUnitTest.java @@ -73,4 +73,15 @@ public void testBinaryRoundTrip() { } Assert.assertEquals(recoveredBafs, bafs); } + + @Test + public void testValueAlteringConstructor() { + final BafEvidence bafEvidence = new BafEvidence("sample", "contig", 1234, .4); + final BafEvidence newValue = new BafEvidence(bafEvidence, .5); + Assert.assertEquals(newValue.getSample(), bafEvidence.getSample()); + Assert.assertEquals(newValue.getContig(), bafEvidence.getContig()); + Assert.assertEquals(newValue.getStart(), bafEvidence.getStart()); + Assert.assertEquals(bafEvidence.getValue(), .4); + Assert.assertEquals(newValue.getValue(), .5); + } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFIntegrationTest.java new file mode 100644 index 00000000000..c437c0b8830 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFIntegrationTest.java @@ -0,0 +1,78 @@ +package org.broadinstitute.hellbender.tools.sv; + +import htsjdk.samtools.util.Log; +import org.broadinstitute.hellbender.CommandLineProgramTest; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.testutils.ArgumentsBuilder; +import org.broadinstitute.hellbender.testutils.IntegrationTestSpec; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.util.Collections; + +public class LocusDepthtoBAFIntegrationTest extends CommandLineProgramTest { + public static final String ld2bafTestDir = toolsTestDir + "walkers/sv/LocusDepthtoBAF/"; + + @Override public String getTestedToolName() { return "LocusDepthtoBAF"; } + + @Test + public void testStdDevTooBig() throws IOException { + final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); + argsBuilder.add(StandardArgumentDefinitions.VERBOSITY_NAME, Log.LogLevel.ERROR.name()); + argsBuilder.add(LocusDepthtoBAF.MIN_HET_PROBABILITY, "0."); + argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, hg19_chr1_1M_dict); + argsBuilder.add(LocusDepthtoBAF.BAF_SITES_VCF_LONG_NAME, ld2bafTestDir + "test1.vcf"); + argsBuilder.add(StandardArgumentDefinitions.FEATURE_SHORT_NAME, ld2bafTestDir + "test1.ld.txt"); + argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s"); + final IntegrationTestSpec testSpec = + new IntegrationTestSpec(argsBuilder.getString(), + Collections.singletonList(ld2bafTestDir + "test1.baf.txt")); + testSpec.setOutputFileExtension("baf.txt"); + testSpec.executeTest("test1: standard deviation too big", this); + } + + @Test + public void testAOK() throws IOException { + final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); + argsBuilder.add(StandardArgumentDefinitions.VERBOSITY_NAME, Log.LogLevel.ERROR.name()); + argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, publicTestDir + "hg19micro.dict"); + argsBuilder.add(LocusDepthtoBAF.BAF_SITES_VCF_LONG_NAME, ld2bafTestDir + "test2.vcf"); + argsBuilder.add(StandardArgumentDefinitions.FEATURE_SHORT_NAME, ld2bafTestDir + "test2.ld.txt"); + argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s"); + final IntegrationTestSpec testSpec = + new IntegrationTestSpec(argsBuilder.getString(), + Collections.singletonList(ld2bafTestDir + "test2.baf.txt")); + testSpec.setOutputFileExtension("baf.txt"); + testSpec.executeTest("test2: a-ok", this); + } + + @Test + public void testSitesMismatch() throws IOException { + final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); + argsBuilder.add(StandardArgumentDefinitions.VERBOSITY_NAME, Log.LogLevel.ERROR.name()); + argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, publicTestDir + "hg19micro.dict"); + argsBuilder.add(LocusDepthtoBAF.BAF_SITES_VCF_LONG_NAME, ld2bafTestDir + "test2a.vcf"); + argsBuilder.add(StandardArgumentDefinitions.FEATURE_SHORT_NAME, ld2bafTestDir + "test2.ld.txt"); + argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s"); + final IntegrationTestSpec testSpec = + new IntegrationTestSpec(argsBuilder.getString(), 1, UserException.class); + testSpec.setOutputFileExtension("baf.txt"); + testSpec.executeTest("test2: a-ok", this); + } + + @Test + public void testAdjustMedian() throws IOException { + final ArgumentsBuilder argsBuilder = new ArgumentsBuilder(); + argsBuilder.add(StandardArgumentDefinitions.VERBOSITY_NAME, Log.LogLevel.ERROR.name()); + argsBuilder.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, hg19_chr1_1M_dict); + argsBuilder.add(LocusDepthtoBAF.BAF_SITES_VCF_LONG_NAME, ld2bafTestDir + "test3.vcf"); + argsBuilder.add(StandardArgumentDefinitions.FEATURE_SHORT_NAME, ld2bafTestDir + "test3.ld.txt"); + argsBuilder.add(StandardArgumentDefinitions.OUTPUT_SHORT_NAME, "%s"); + final IntegrationTestSpec testSpec = + new IntegrationTestSpec(argsBuilder.getString(), + Collections.singletonList(ld2bafTestDir + "test3.baf.txt")); + testSpec.setOutputFileExtension("baf.txt"); + testSpec.executeTest("test3: adjust median", this); + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java new file mode 100644 index 00000000000..b2c4bcc737d --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/LocusDepthtoBAFUnitTest.java @@ -0,0 +1,24 @@ +package org.broadinstitute.hellbender.tools.sv; + +import org.testng.Assert; +import org.testng.annotations.Test; + +public class LocusDepthtoBAFUnitTest { + @Test + public void testBAFCalc() { + final LocusDepthtoBAF instance = new LocusDepthtoBAF(); + final String tig = "1"; + final int pos = 1; + final int refIndex = 0; + final int altIndex = 1; + final String smpl = "s"; + // too shallow + Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, 4, 4, 0, 0), refIndex, altIndex), BafEvidence.MISSING_VALUE); + // just deep enough + Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, 5, 5, 0, 0), refIndex, altIndex), .5); + // too unequal + Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, 7, 3, 0, 0), refIndex, altIndex), BafEvidence.MISSING_VALUE); + // sufficiently equal + Assert.assertEquals(instance.calcBAF(new LocusDepth(tig, pos, smpl, 6, 4, 0, 0), refIndex, altIndex), .4); + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidenceIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidenceIntegrationTest.java index 82d387b03ba..0e425c70405 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidenceIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/sv/PrintSVEvidenceIntegrationTest.java @@ -161,8 +161,8 @@ public void testDepthUniquenessCriterion( int val1, int val2 ) { public void testLocusDepthViolateUniquenessCriterion() { final DummyFeatureSink sink = new DummyFeatureSink<>(); final LocusDepthSortMerger sortMerger = new LocusDepthSortMerger(dict, sink); - sortMerger.write(new LocusDepth("chr1", 1, "sample", (byte)'A', 1, 0, 0, 0)); - sortMerger.write(new LocusDepth("chr1", 1, "sample", (byte)'A', 0, 1, 0, 0)); + sortMerger.write(new LocusDepth("chr1", 1, "sample", 1, 0, 0, 0)); + sortMerger.write(new LocusDepth("chr1", 1, "sample", 0, 1, 0, 0)); sortMerger.close(); Assert.assertEquals(sink.nRecsWritten, 0); } @@ -171,8 +171,8 @@ public void testLocusDepthViolateUniquenessCriterion() { public void testLocusDepthUniquenessCriterion() { final DummyFeatureSink sink = new DummyFeatureSink<>(); final LocusDepthSortMerger sortMerger = new LocusDepthSortMerger(dict, sink); - sortMerger.write(new LocusDepth("chr1", 1, "sample1", (byte)'A', 1, 0, 0, 0)); - sortMerger.write(new LocusDepth("chr1", 1, "sample2", (byte)'A', 0, 1, 0, 0)); + sortMerger.write(new LocusDepth("chr1", 1, "sample1", 1, 0, 0, 0)); + sortMerger.write(new LocusDepth("chr1", 1, "sample2", 0, 1, 0, 0)); sortMerger.close(); Assert.assertEquals(sink.nRecsWritten, 2); } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.baf.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.baf.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.ld.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.ld.txt new file mode 100644 index 00000000000..1dcdbc6a01a --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.ld.txt @@ -0,0 +1,2 @@ +1 1000 smpl1 3 7 0 0 +1 1000 smpl2 7 3 0 0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.vcf new file mode 100644 index 00000000000..43f4c1b5b83 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test1.vcf @@ -0,0 +1,4 @@ +##fileformat=VCFv4.2 +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 1001 snp1 A C . . . diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.baf.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.baf.txt new file mode 100644 index 00000000000..20d6e23b241 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.baf.txt @@ -0,0 +1,6 @@ +1 1000 0.6 smpl1 +1 1000 0.5 smpl2 +1 1000 0.4 smpl3 +2 2000 0.6 smpl1 +2 2000 0.5 smpl2 +2 2000 0.4 smpl3 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.ld.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.ld.txt new file mode 100644 index 00000000000..e9db3347d77 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.ld.txt @@ -0,0 +1,6 @@ +1 1000 smpl1 4 0 0 6 +1 1000 smpl2 5 0 0 5 +1 1000 smpl3 6 0 0 4 +2 2000 smpl1 4 0 0 6 +2 2000 smpl2 5 0 0 5 +2 2000 smpl3 6 0 0 4 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.vcf new file mode 100644 index 00000000000..501d6a72e56 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.2 +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 1001 snp1 A T . . . +2 2001 snp2 A T . . . diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2a.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2a.vcf new file mode 100644 index 00000000000..4e8dead4c1b --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test2a.vcf @@ -0,0 +1,6 @@ +##fileformat=VCFv4.2 +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 1001 snp1 A T . . . +2 9999 snp2 A T . . . diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.baf.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.baf.txt new file mode 100644 index 00000000000..c1009f969fd --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.baf.txt @@ -0,0 +1,2 @@ +1 1000 0.5 smpl1 +1 1000 0.5 smpl2 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.ld.txt b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.ld.txt new file mode 100644 index 00000000000..24d1ae5eb0a --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.ld.txt @@ -0,0 +1,2 @@ +1 1000 smpl1 0 4 0 6 +1 1000 smpl2 0 4 0 6 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.vcf new file mode 100644 index 00000000000..8f657040a1d --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/sv/LocusDepthtoBAF/test3.vcf @@ -0,0 +1,4 @@ +##fileformat=VCFv4.2 +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO +1 1001 snp1 C T . . .