Skip to content

Commit

Permalink
fixed an issue where the logic for suffix ends was incorrect for dang…
Browse files Browse the repository at this point in the history
…ling tails
  • Loading branch information
jamesemery committed Jul 2, 2020
1 parent 39a0d04 commit 1ee14e0
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ final int mergeDanglingTail(final DanglingChainMergeHelper danglingTailMergeResu

final int lastRefIndex = danglingTailMergeResult.cigar.getReferenceLength() - 1;
final int matchingSuffix = Math.min(longestSuffixMatch(danglingTailMergeResult.referencePathString, danglingTailMergeResult.danglingPathString, lastRefIndex), lastElement.getLength());
if (matchingSuffix == 0 && matchingSuffix >= minMatchingBasesToDangingEndRecovery ) {
if (minMatchingBasesToDangingEndRecovery >= 0 ? matchingSuffix < minMatchingBasesToDangingEndRecovery : matchingSuffix == 0 ) {
return 0;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,53 @@ public void testDanglingTails(final String refEnd,
}
}

@Test
public void testForkedDanglingEndsWithSuffixCode() {

final int kmerSize = 15;

// construct the haplotypes
final String commonPrefix = "AAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTTT";
final String refEnd = "GCTAGCTAATCGTTAAGCTTTAAC";
final String altEnd1 = "GCTAGCTAA"+ "GGCG";// two mismatches compared to the reference
final String altEnd2 = "GCTAGCTAA"+ "GCCGATGGCT";
final String ref = commonPrefix + refEnd;
final String alt1 = commonPrefix + altEnd1;
final String alt2 = commonPrefix + altEnd2;

// create the graph and populate it
final ReadThreadingGraph rtgraph = new ReadThreadingGraph(kmerSize);
rtgraph.addSequence("ref", ref.getBytes(), true);
final GATKRead read1 = ArtificialReadUtils.createArtificialRead(alt1.getBytes(), Utils.dupBytes((byte) 30, alt1.length()), alt1.length() + "M");
final GATKRead read2 = ArtificialReadUtils.createArtificialRead(alt2.getBytes(), Utils.dupBytes((byte) 30, alt2.length()), alt2.length() + "M");
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
rtgraph.setMinMatchingBasesToDangingEndRecovery(1);
rtgraph.addRead(read2, header);
rtgraph.addRead(read1, header);
rtgraph.buildGraphIfNecessary();

Assert.assertEquals(rtgraph.getSinks().size(), 3);

// Testing a degenerate case where the wrong "reference" path is selected and assuring that when
for (final MultiDeBruijnVertex altSink : rtgraph.getSinks()) {
if (rtgraph.isReferenceNode(altSink) || !altSink.getSequenceString().equals("GCTAAGCCGATGGCT")) {
continue;
}

// confirm that the SW alignment agrees with our expectations
final ReadThreadingGraph.DanglingChainMergeHelper result = rtgraph.generateCigarAgainstDownwardsReferencePath(altSink,
0, 2, false, SmithWatermanJavaAligner.getInstance());
Assert.assertNotNull(result);
Assert.assertTrue(ReadThreadingGraph.cigarIsOkayToMerge(result.cigar, false, true));

Assert.assertEquals(Math.min(AbstractReadThreadingGraph.longestSuffixMatch(result.referencePathString, result.danglingPathString, result.cigar.getReferenceLength() - 1), result.cigar.getCigarElement(0).getLength()), 0);

// confirm that the tail merging works as expected
final int mergeResult = rtgraph.mergeDanglingTail(result);
Assert.assertTrue(mergeResult == 0);
}
}

@Test
public void testForkedDanglingEnds() {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection;
import org.broadinstitute.hellbender.engine.FeatureDataSource;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
import org.broadinstitute.hellbender.testutils.VariantContextTestUtils;
import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerArgumentCollection;
Expand Down Expand Up @@ -549,6 +550,42 @@ public Object[][] vcfsForFiltering() {
};
}

@Test
public void testThing() {
Utils.resetRandomGenerator();

//NOTE: AlleleSpecific support in the VCF mode is implemented but bogus for now.
// This test should not be treated as a strict check of correctness.
final File output = createTempFile("testVCFModeIsConsistentWithPastResults", ".vcf");
// final File expected = new File(TEST_FILES_DIR + "expected.testVCFMode.gatk4.alleleSpecific.vcf");

// final String outputPath = UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ? expected.getAbsolutePath() : output.getAbsolutePath();

final String[] args = {
"-R",
"gs://broad-dsp-spec-ops/scratch/andrea/mtb/Mycobacterium_tuberculosis_H37Rv.fasta",
"-I",
"gs://broad-dsp-spec-ops-cromwell-execution/MicrobialGenomePipeline/3625b1d1-38a1-40de-a810-23b347d51b14/call-AlignToRef/MicrobialAlignmentPipeline/87d3ba55-4497-474c-9232-44144c7c3ef4/call-AlignAndMarkDuplicates/D1CLVACXX.1.Solexa-125092.aligned.realigned.bam",
"-O", output.getAbsolutePath(),
"-L", "/Users/emeryj/hellbender/UserLiason/debugM2Graph/Mycobacterium_tuberculosis_H37Rv.intervals",
"--annotation","StrandBiasBySample",
"--num-matching-bases-in-dangling-end-to-recover",
"1",
"--max-reads-per-alignment-start",
"75",
"--max-mnp-distance",
"0"};

runCommandLine(args);

// Test for an exact match against past results
// if ( ! UPDATE_EXACT_MATCH_EXPECTED_OUTPUTS ) {
// IntegrationTestSpec.assertEqualTextFiles(output, expected);
// }

}


@Test(dataProvider = "vcfsForFiltering")
public void testFilterMitochondria(File unfiltered, final double minAlleleFraction, final List<String> intervals, List<Set<String>> expectedFilters, List<List<String>> expectedASFilters) {
final File filteredVcf = createTempFile("filtered", ".vcf");
Expand Down

0 comments on commit 1ee14e0

Please sign in to comment.