Skip to content

Commit

Permalink
Merge pull request #82 from andersen-lab/amplicon_filter
Browse files Browse the repository at this point in the history
Amplicon filter
  • Loading branch information
gkarthik committed Jan 5, 2021
2 parents 98c2394 + 9c69970 commit 0889a14
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 20 deletions.
22 changes: 11 additions & 11 deletions src/interval_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ void IntervalTree::insert(ITNode *root, Interval data){
}
}
// update max value of ancestor node
if(root->max < data.low)
root->max = data.low;
if(root->max < data.high)
root->max = data.high;
}


// A utility function to check if given two intervals overlap
bool doOverlap(Interval i1, Interval i2){
// A utility function to check if the 1st interval envelops the second
bool doEnvelop(Interval i1, Interval i2){
if(i1.low <= i2.low && i1.high >= i2.high)
return true;
return false;
Expand All @@ -53,23 +53,23 @@ bool doOverlap(Interval i1, Interval i2){

// The main function that searches an interval i in a given
// Interval Tree.
bool IntervalTree::overlapSearch(ITNode *root, Interval i){
bool IntervalTree::envelopSearch(ITNode *root, Interval i){
// Base Case, tree is empty
//std::cout << root->data->low << ":" << root->data->high << std::endl;
if (root == NULL) return false;

// If given interval overlaps with root
if (doOverlap(*(root->data), i))
if (doEnvelop(*(root->data), i))
return true;

// If left child of root is present and max of left child is
// greater than or equal to given interval, then i may
// overlap with an interval in left subtree
if (root->left != NULL && root->left->max >= i.low)
return overlapSearch(root->left, i);
// be enveloped by an amplicon in left subtree
if (root->left != NULL && root->left->max >= i.high)
return envelopSearch(root->left, i);

// Else interval can only overlap with right subtree
return overlapSearch(root->right, i);
// Else interval can only be enveloped by amplicon in right subtree
return envelopSearch(root->right, i);
}

// A helper function for inorder traversal of the tree
Expand Down
6 changes: 3 additions & 3 deletions src/interval_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class ITNode{
void setRight(ITNode *node);
*/
public:
ITNode(Interval value): data(new Interval(value)), left(nullptr), right(nullptr), max(value.low) {} // constructor
ITNode(Interval value): data(new Interval(value)), left(nullptr), right(nullptr), max(value.high) {} // constructor
Interval *data; // pointer to node's interval data object
ITNode *left, *right; // pointer to node's left & right child node objects
int max;
Expand All @@ -38,13 +38,13 @@ class IntervalTree{
private:
ITNode *_root;
void insert(ITNode *root, Interval data);
bool overlapSearch(ITNode *root, Interval data);
bool envelopSearch(ITNode *root, Interval data);
void inOrder(ITNode * root);

public:
IntervalTree(); // constructor
void insert(Interval data){ insert(_root, data);}
bool overlapSearch(Interval data){ return overlapSearch(_root, data);}
bool envelopSearch(Interval data){ return envelopSearch(_root, data);}
void inOrder() {inOrder(_root);}
};

Expand Down
4 changes: 2 additions & 2 deletions src/ivar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ void print_trim_usage(){
"Input Options Description\n"
" -i (Required) Sorted bam file, with aligned reads, to trim primers and quality\n"
" -b BED file with primer sequences and positions. If no BED file is specified, only quality trimming will be done.\n"
" -f Primer pair information file containing left and right primer names for the same amplicon separated by a tab\n"
" If provided, reads will be filtered based on their overlap with amplicons prior to trimming\n"
" -f [EXPERIMENTAL] Primer pair information file containing left and right primer names for the same amplicon separated by a tab\n"
" If provided, reads that do not fall within atleat one amplicon will be ignored prior to primer trimming.\n"
" -m Minimum length of read to retain after trimming (Default: 30)\n"
" -q Minimum quality threshold for sliding window to pass (Default: 20)\n"
" -s Width of sliding window (Default: 4)\n"
Expand Down
11 changes: 8 additions & 3 deletions src/trim_primer_quality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ int get_bigger_primer(std::vector<primer> primers){
return max_primer_len;
}

// check if read overlaps with any of the amplicons
// check if read is enveloped by any of the amplicons
bool amplicon_filter(IntervalTree amplicons, bam1_t* r){
Interval fragment_coords = Interval(0, 1);
if(r->core.isize > 0){
Expand All @@ -342,7 +342,7 @@ bool amplicon_filter(IntervalTree amplicons, bam1_t* r){
fragment_coords.high = bam_endpos(r);
}
// debugging
bool amplicon_flag = amplicons.overlapSearch(fragment_coords);
bool amplicon_flag = amplicons.envelopSearch(fragment_coords);
return amplicon_flag;
}

Expand All @@ -363,6 +363,8 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,
if(!pair_info.empty()){
amplicons = populate_amplicons(pair_info, primers);
}
std::cout << "Amplicons detected: " << std::endl;
amplicons.inOrder();
if(bam.empty()){
std::cout << "Bam file is empty." << std::endl;
return -1;
Expand Down Expand Up @@ -605,7 +607,10 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,
std::cout << unmapped_counter << " unmapped reads were not written to file." << std::endl;
}
if(amplicon_flag_ctr > 0){
std::cout << amplicon_flag_ctr << " reads were ignored because they did not fall within an amplicon" << std::endl;
std::cout << round_int(amplicon_flag_ctr, mapped)
<< "% (" << amplicon_flag_ctr
<< ") reads were ignored because they did not fall within an amplicon"
<< std::endl;
}
if(failed_frag_size > 0){
std::cout << round_int(failed_frag_size, mapped)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_interval_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ int test_itree_overlap(IntervalTree tree, Interval queries[], int num_tests, boo
int result = 0;
for (int i = 0; i < num_tests; i++)
{
result = tree.overlapSearch(queries[i]);
result = tree.envelopSearch(queries[i]);
if (result != expected[i])
{
std::cout << "Interval Tree overlap behavior incorrect for interval " << queries[i].low << ":" << queries[i].high
Expand Down

0 comments on commit 0889a14

Please sign in to comment.