Skip to content

Commit

Permalink
Inverted SoftClippedReadFilter to conform to the standard filtering l…
Browse files Browse the repository at this point in the history
…ogic (#8888)
  • Loading branch information
jamesemery committed Jun 28, 2024
1 parent adbb626 commit 92dc4ae
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,8 @@ private ReadFilterArgumentDefinitions(){}

public static final String KEEP_INTERVAL_NAME = "keep-intervals";

public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "soft-clipped-ratio-threshold";
public static final String SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD = "soft-clipped-leading-trailing-ratio";

public static final String INVERT_SOFT_CLIP_RATIO_FILTER = "invert-soft-clip-ratio-filter";
public static final String SOFT_CLIPPED_RATIO_THRESHOLD = "max-soft-clipped-ratio";
public static final String SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD = "max-soft-clipped-leading-trailing-ratio";

public static final String READ_FILTER_TAG = "read-filter-tag";
public static final String READ_FILTER_TAG_COMP = "read-filter-tag-comp";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,21 @@ public final class SoftClippedReadFilter extends ReadFilter {
static final long serialVersionUID = 1L;
private final Logger logger = LogManager.getLogger(this.getClass());

@VisibleForTesting
@Argument(fullName = ReadFilterArgumentDefinitions.INVERT_SOFT_CLIP_RATIO_FILTER,
doc = "Inverts the results from this filter, causing all variants that would pass to fail and visa-versa.",
optional = true
)
boolean doInvertFilter = false;

@VisibleForTesting
@Argument(fullName = ReadFilterArgumentDefinitions.SOFT_CLIPPED_RATIO_THRESHOLD,
doc = "Threshold ratio of soft clipped bases (anywhere in the cigar string) to total bases in read for read to be filtered.",
optional = true,
mutex = { ReadFilterArgumentDefinitions.SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD }
)
Double minimumSoftClippedRatio = null;
Double maximumSoftClippedRatio = null;

@VisibleForTesting
@Argument(fullName = ReadFilterArgumentDefinitions.SOFT_CLIPPED_LEADING_TRAILING_RATIO_THRESHOLD,
doc = "Threshold ratio of soft clipped bases (leading / trailing the cigar string) to total bases in read for read to be filtered.",
optional = true,
mutex = {ReadFilterArgumentDefinitions.SOFT_CLIPPED_RATIO_THRESHOLD}
)
Double minimumLeadingTrailingSoftClippedRatio = null;
Double maximumLeadingTrailingSoftClippedRatio = null;

// Command line parser requires a no-arg constructor
public SoftClippedReadFilter() {}
Expand All @@ -61,15 +54,15 @@ private boolean testMinSoftClippedRatio(final GATKRead read) {
totalLength += element.getLength();
}

final double softClipRatio = ((double)numSoftClippedBases / (double)totalLength);
final double softClipRatio = totalLength != 0 ? ((double)numSoftClippedBases / (double)totalLength) : 0.0;

return softClipRatio > minimumSoftClippedRatio;
return softClipRatio <= maximumSoftClippedRatio;
}

private boolean testMinLeadingTrailingSoftClippedRatio(final GATKRead read) {

if ( read.getCigarElements().size() < 1 ) {
return false;
return true; //NOTE: in this edge case that the read should pass this filter as there are no cigar elements to have edge soft-clipping.
}

// Get the index of the last cigar element:
Expand All @@ -90,24 +83,25 @@ private boolean testMinLeadingTrailingSoftClippedRatio(final GATKRead read) {
.sum();

// Calculate the ratio:
final double softClipRatio = ((double)numLeadingTrailingSoftClippedBases / (double)totalLength);
final double softClipRatio = totalLength != 0 ? ((double)numLeadingTrailingSoftClippedBases / (double)totalLength) : 0.0;

return softClipRatio > minimumLeadingTrailingSoftClippedRatio;
return softClipRatio <= maximumLeadingTrailingSoftClippedRatio;
}

@Override
// NOTE: for read filters we always return true if the read passes the filter, and false if it doesn't.
public boolean test(final GATKRead read) {

final boolean result;

// NOTE: Since we have mutex'd the args for the clipping ratios, we only need to see if they
// have been specified. If they have, that's the filter logic we're using.
// If we specified the clipping ratio, we use the min sequence length test:
if ( minimumSoftClippedRatio != null ) {
if ( maximumSoftClippedRatio != null ) {
result = testMinSoftClippedRatio(read);
}
// If we specified the leading/trailing clipping ratio, we use the min sequence length test:
else if ( minimumLeadingTrailingSoftClippedRatio != null ) {
else if ( maximumLeadingTrailingSoftClippedRatio != null ) {
result = testMinLeadingTrailingSoftClippedRatio(read);
}
else {
Expand All @@ -118,10 +112,6 @@ else if ( minimumLeadingTrailingSoftClippedRatio != null ) {
);
}

// Check for if we want to invert our results:
if ( doInvertFilter ) {
return !result;
}
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,10 @@ public void testOverclippedSoftClipRatioFilter(final String cigarString,
final boolean expectedResult) {

final SoftClippedReadFilter filter = new SoftClippedReadFilter();
filter.minimumSoftClippedRatio = clipRatio;
filter.maximumSoftClippedRatio = clipRatio;

final GATKRead read = buildSAMRead(cigarString);
Assert.assertEquals(filter.test(read), expectedResult, cigarString);

filter.doInvertFilter = true;
Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString);
}

@Test(dataProvider= "SoftClippedLeadingTrailingRatioDataProvider")
Expand All @@ -52,13 +49,10 @@ public void testSoftClippedLeadingTrailingRatioFilter(final String cigarString,
final boolean expectedResult) {

final SoftClippedReadFilter filter = new SoftClippedReadFilter();
filter.minimumLeadingTrailingSoftClippedRatio = clipRatio;
filter.maximumLeadingTrailingSoftClippedRatio = clipRatio;

final GATKRead read = buildSAMRead(cigarString);
Assert.assertEquals(filter.test(read), expectedResult, cigarString);

filter.doInvertFilter = true;
Assert.assertEquals(filter.test(read), !expectedResult, "Inverted case: " + cigarString);
}

@DataProvider(name = "SoftClipRatioDataProvider")
Expand All @@ -67,25 +61,25 @@ public Iterator<Object[]> softClipRatioDataProvider() {

// ---------------------------------------
// Null / trivial cases:
testData.add(new Object[] { "", 0.1, false });
testData.add(new Object[] { "10H", 0.1, false });
testData.add(new Object[] { "", 0.1, true });
testData.add(new Object[] { "10H", 0.1, true });

// ---------------------------------------
// Soft clip ratio test:

testData.add(new Object[] { "1S1M1S17M", 0.2, false }); // 2/20 = .100
testData.add(new Object[] { "1S1M2S17M", 0.2, false }); // 3/21 = .143
testData.add(new Object[] { "1S1M3S17M", 0.2, false }); // 4/22 = .182
testData.add(new Object[] { "1S1M4S17M", 0.2, true }); // 5/23 = .217
testData.add(new Object[] { "1S1M5S17M", 0.2, true }); // 6/24 = .250
testData.add(new Object[] { "1S1M6S17M", 0.2, true }); // 7/25 = .280
testData.add(new Object[] { "1S1M1S17M", 0.2, true }); // 2/20 = .100
testData.add(new Object[] { "1S1M2S17M", 0.2, true }); // 3/21 = .143
testData.add(new Object[] { "1S1M3S17M", 0.2, true }); // 4/22 = .182
testData.add(new Object[] { "1S1M4S17M", 0.2, false }); // 5/23 = .217
testData.add(new Object[] { "1S1M5S17M", 0.2, false }); // 6/24 = .250
testData.add(new Object[] { "1S1M6S17M", 0.2, false }); // 7/25 = .280

// ---------------------------------------
// Soft clip placement:

testData.add(new Object[] { "101S100M", 0.5, true });
testData.add(new Object[] { "100M101S", 0.5, true });
testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, true });
testData.add(new Object[] { "101S100M", 0.5, false });
testData.add(new Object[] { "100M101S", 0.5, false });
testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, false });

return testData.iterator();
}
Expand All @@ -96,42 +90,42 @@ public Iterator<Object[]> softClippedLeadingTrailingRatioDataProvider() {

// ---------------------------------------
// Null / trivial cases:
testData.add(new Object[] { "", 0.1, false });
testData.add(new Object[] { "10H", 0.1, false });
testData.add(new Object[] { "", 0.1, true });
testData.add(new Object[] { "10H", 0.1, true });

// ---------------------------------------
// Soft clip ratio test:

// Non-leading/-trailing
testData.add(new Object[] { "1S1M1S17M", 0.2, false }); // 2/20 = .100
testData.add(new Object[] { "1S1M2S17M", 0.2, false }); // 3/21 = .143
testData.add(new Object[] { "1S1M3S17M", 0.2, false }); // 4/22 = .182
testData.add(new Object[] { "1S1M4S17M", 0.2, false }); // 5/23 = .217
testData.add(new Object[] { "1S1M5S17M", 0.2, false }); // 6/24 = .250
testData.add(new Object[] { "1S1M6S17M", 0.2, false }); // 7/25 = .280
testData.add(new Object[] { "1S1M1S17M", 0.2, true }); // 2/20 = .100
testData.add(new Object[] { "1S1M2S17M", 0.2, true }); // 3/21 = .143
testData.add(new Object[] { "1S1M3S17M", 0.2, true }); // 4/22 = .182
testData.add(new Object[] { "1S1M4S17M", 0.2, true }); // 5/23 = .217
testData.add(new Object[] { "1S1M5S17M", 0.2, true }); // 6/24 = .250
testData.add(new Object[] { "1S1M6S17M", 0.2, true }); // 7/25 = .280

// Leading:
testData.add(new Object[] { "2S1S1S16M", 0.2, false }); // 2/20 = .100
testData.add(new Object[] { "3S1S1S16M", 0.2, false }); // 3/21 = .143
testData.add(new Object[] { "4S1S1S16M", 0.2, false }); // 4/22 = .182
testData.add(new Object[] { "5S1S1S16M", 0.2, true }); // 5/23 = .217
testData.add(new Object[] { "6S1S1S16M", 0.2, true }); // 6/24 = .250
testData.add(new Object[] { "7S1S1S16M", 0.2, true }); // 7/25 = .280
testData.add(new Object[] { "2S1S1S16M", 0.2, true }); // 2/20 = .100
testData.add(new Object[] { "3S1S1S16M", 0.2, true }); // 3/21 = .143
testData.add(new Object[] { "4S1S1S16M", 0.2, true }); // 4/22 = .182
testData.add(new Object[] { "5S1S1S16M", 0.2, false }); // 5/23 = .217
testData.add(new Object[] { "6S1S1S16M", 0.2, false }); // 6/24 = .250
testData.add(new Object[] { "7S1S1S16M", 0.2, false }); // 7/25 = .280

// Trailing:
testData.add(new Object[] { "1M1S16M2S", 0.2, false }); // 2/20 = .100
testData.add(new Object[] { "1M1S16M3S", 0.2, false }); // 3/21 = .143
testData.add(new Object[] { "1M1S16M4S", 0.2, false }); // 4/22 = .182
testData.add(new Object[] { "1M1S16M5S", 0.2, true }); // 5/23 = .217
testData.add(new Object[] { "1M1S16M6S", 0.2, true }); // 6/24 = .250
testData.add(new Object[] { "1M1S16M7S", 0.2, true }); // 7/25 = .280
testData.add(new Object[] { "1M1S16M2S", 0.2, true }); // 2/20 = .100
testData.add(new Object[] { "1M1S16M3S", 0.2, true }); // 3/21 = .143
testData.add(new Object[] { "1M1S16M4S", 0.2, true }); // 4/22 = .182
testData.add(new Object[] { "1M1S16M5S", 0.2, false }); // 5/23 = .217
testData.add(new Object[] { "1M1S16M6S", 0.2, false }); // 6/24 = .250
testData.add(new Object[] { "1M1S16M7S", 0.2, false }); // 7/25 = .280

// ---------------------------------------
// Soft clip placement:

testData.add(new Object[] { "101S100M", 0.5, true });
testData.add(new Object[] { "100M101S", 0.5, true });
testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, false });
testData.add(new Object[] { "101S100M", 0.5, false });
testData.add(new Object[] { "100M101S", 0.5, false });
testData.add(new Object[] { "25H20S10M20S10M20S10M20S10M20S10M20S25H", 0.5, true });

return testData.iterator();
}
Expand Down

0 comments on commit 92dc4ae

Please sign in to comment.