From ee89b38d21290e812ac9542d018dcdac645dde4b Mon Sep 17 00:00:00 2001 From: Alaa Abdel Latif Date: Tue, 1 Dec 2020 13:55:16 -0800 Subject: [PATCH 1/4] log % of reads ignored due to amplicon filter step during trimming --- src/trim_primer_quality.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/trim_primer_quality.cpp b/src/trim_primer_quality.cpp index 7408eac1..9734ffcc 100755 --- a/src/trim_primer_quality.cpp +++ b/src/trim_primer_quality.cpp @@ -363,6 +363,8 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, if(!pair_info.empty()){ amplicons = populate_amplicons(pair_info, primers); } + std::cout << "Amplicons detected: " << std::endl; + amplicons.inOrder(); if(bam.empty()){ std::cout << "Bam file is empty." << std::endl; return -1; @@ -605,7 +607,10 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::cout << unmapped_counter << " unmapped reads were not written to file." << std::endl; } if(amplicon_flag_ctr > 0){ - std::cout << amplicon_flag_ctr << " reads were ignored because they did not fall within an amplicon" << std::endl; + std::cout << round_int(amplicon_flag_ctr, mapped) + << "% (" << amplicon_flag_ctr + << ") reads were ignored because they did not fall within an amplicon" + << std::endl; } if(failed_frag_size > 0){ std::cout << round_int(failed_frag_size, mapped) From fb42656a793ff856737aae23aa380e4b8fbb2536 Mon Sep 17 00:00:00 2001 From: Alaa Abdel Latif Date: Mon, 4 Jan 2021 14:45:40 -0800 Subject: [PATCH 2/4] changed description of ivar trim to indicate that amplicon-based filter is in experimental phase of development --- src/ivar.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ivar.cpp b/src/ivar.cpp index 3fbe9412..161f4c02 100755 --- a/src/ivar.cpp +++ b/src/ivar.cpp @@ -65,8 +65,8 @@ void print_trim_usage(){ "Input Options Description\n" " -i (Required) Sorted bam file, with aligned reads, to trim primers and quality\n" " -b BED file with primer sequences and positions. If no BED file is specified, only quality trimming will be done.\n" - " -f Primer pair information file containing left and right primer names for the same amplicon separated by a tab\n" - " If provided, reads will be filtered based on their overlap with amplicons prior to trimming\n" + " -f [EXPERIMENTAL] Primer pair information file containing left and right primer names for the same amplicon separated by a tab\n" + " If provided, reads that do not fall within atleat one amplicon will be ignored prior to primer trimming.\n" " -m Minimum length of read to retain after trimming (Default: 30)\n" " -q Minimum quality threshold for sliding window to pass (Default: 20)\n" " -s Width of sliding window (Default: 4)\n" From 272544deae4b46af3e6cb92dca6ed983f644e711 Mon Sep 17 00:00:00 2001 From: Alaa Abdel Latif Date: Mon, 4 Jan 2021 14:46:59 -0800 Subject: [PATCH 3/4] changed traversal mechanics for envelopSearch (previously named overlapSearch), changed naming conventions accordingly --- src/interval_tree.cpp | 22 +++++++++++----------- src/interval_tree.h | 6 +++--- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/interval_tree.cpp b/src/interval_tree.cpp index 4906338a..1f1098cf 100755 --- a/src/interval_tree.cpp +++ b/src/interval_tree.cpp @@ -38,13 +38,13 @@ void IntervalTree::insert(ITNode *root, Interval data){ } } // update max value of ancestor node - if(root->max < data.low) - root->max = data.low; + if(root->max < data.high) + root->max = data.high; } -// A utility function to check if given two intervals overlap -bool doOverlap(Interval i1, Interval i2){ +// A utility function to check if the 1st interval envelops the second +bool doEnvelop(Interval i1, Interval i2){ if(i1.low <= i2.low && i1.high >= i2.high) return true; return false; @@ -53,23 +53,23 @@ bool doOverlap(Interval i1, Interval i2){ // The main function that searches an interval i in a given // Interval Tree. -bool IntervalTree::overlapSearch(ITNode *root, Interval i){ +bool IntervalTree::envelopSearch(ITNode *root, Interval i){ // Base Case, tree is empty //std::cout << root->data->low << ":" << root->data->high << std::endl; if (root == NULL) return false; // If given interval overlaps with root - if (doOverlap(*(root->data), i)) + if (doEnvelop(*(root->data), i)) return true; // If left child of root is present and max of left child is // greater than or equal to given interval, then i may - // overlap with an interval in left subtree - if (root->left != NULL && root->left->max >= i.low) - return overlapSearch(root->left, i); + // be enveloped by an amplicon in left subtree + if (root->left != NULL && root->left->max >= i.high) + return envelopSearch(root->left, i); - // Else interval can only overlap with right subtree - return overlapSearch(root->right, i); + // Else interval can only be enveloped by amplicon in right subtree + return envelopSearch(root->right, i); } // A helper function for inorder traversal of the tree diff --git a/src/interval_tree.h b/src/interval_tree.h index 224f0c55..63742437 100755 --- a/src/interval_tree.h +++ b/src/interval_tree.h @@ -25,7 +25,7 @@ class ITNode{ void setRight(ITNode *node); */ public: - ITNode(Interval value): data(new Interval(value)), left(nullptr), right(nullptr), max(value.low) {} // constructor + ITNode(Interval value): data(new Interval(value)), left(nullptr), right(nullptr), max(value.high) {} // constructor Interval *data; // pointer to node's interval data object ITNode *left, *right; // pointer to node's left & right child node objects int max; @@ -38,13 +38,13 @@ class IntervalTree{ private: ITNode *_root; void insert(ITNode *root, Interval data); - bool overlapSearch(ITNode *root, Interval data); + bool envelopSearch(ITNode *root, Interval data); void inOrder(ITNode * root); public: IntervalTree(); // constructor void insert(Interval data){ insert(_root, data);} - bool overlapSearch(Interval data){ return overlapSearch(_root, data);} + bool envelopSearch(Interval data){ return envelopSearch(_root, data);} void inOrder() {inOrder(_root);} }; From 9c699707754958bae31184ddbcab97ed4caa4569 Mon Sep 17 00:00:00 2001 From: Alaa Abdel Latif Date: Mon, 4 Jan 2021 14:48:01 -0800 Subject: [PATCH 4/4] changed use of IntervalTree to accomodate changes in method names --- src/trim_primer_quality.cpp | 4 ++-- tests/test_interval_tree.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/trim_primer_quality.cpp b/src/trim_primer_quality.cpp index 9734ffcc..38db3d9d 100755 --- a/src/trim_primer_quality.cpp +++ b/src/trim_primer_quality.cpp @@ -331,7 +331,7 @@ int get_bigger_primer(std::vector primers){ return max_primer_len; } -// check if read overlaps with any of the amplicons +// check if read is enveloped by any of the amplicons bool amplicon_filter(IntervalTree amplicons, bam1_t* r){ Interval fragment_coords = Interval(0, 1); if(r->core.isize > 0){ @@ -342,7 +342,7 @@ bool amplicon_filter(IntervalTree amplicons, bam1_t* r){ fragment_coords.high = bam_endpos(r); } // debugging - bool amplicon_flag = amplicons.overlapSearch(fragment_coords); + bool amplicon_flag = amplicons.envelopSearch(fragment_coords); return amplicon_flag; } diff --git a/tests/test_interval_tree.cpp b/tests/test_interval_tree.cpp index 06214ed6..c9111118 100755 --- a/tests/test_interval_tree.cpp +++ b/tests/test_interval_tree.cpp @@ -7,7 +7,7 @@ int test_itree_overlap(IntervalTree tree, Interval queries[], int num_tests, boo int result = 0; for (int i = 0; i < num_tests; i++) { - result = tree.overlapSearch(queries[i]); + result = tree.envelopSearch(queries[i]); if (result != expected[i]) { std::cout << "Interval Tree overlap behavior incorrect for interval " << queries[i].low << ":" << queries[i].high