Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added optional -x flag for ivar trim. allows trimming of reads that occur at a specified offset position relative to the primer sequence positions #88

Merged
merged 4 commits into from
Feb 5, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.

AC_PREREQ([2.63])
AC_INIT([ivar], [1.3], [[email protected]])
AC_INIT([ivar], [1.3.1], [[email protected]])
AM_INIT_AUTOMAKE([foreign subdir-objects])
AC_CONFIG_HEADERS([config.h])

Expand Down
12 changes: 9 additions & 3 deletions src/ivar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include "suffix_tree.h"
#include "get_common_variants.h"

const std::string VERSION = "1.3";
const std::string VERSION = "1.3.1";

struct args_t {
std::string bam; // -i
Expand All @@ -37,6 +37,7 @@ struct args_t {
char gap; // -n
bool keep_min_coverage; // -k
std::string primer_pair_file; // -f
int32_t primer_offset; // -x
std::string file_list; // -f
bool write_no_primers_flag; // -e
std::string gff; // -g
Expand Down Expand Up @@ -67,6 +68,7 @@ void print_trim_usage(){
" -b BED file with primer sequences and positions. If no BED file is specified, only quality trimming will be done.\n"
" -f [EXPERIMENTAL] Primer pair information file containing left and right primer names for the same amplicon separated by a tab\n"
" If provided, reads that do not fall within atleat one amplicon will be ignored prior to primer trimming.\n"
" -x Primer position offset (Default: 0). Reads that occur at the specified offset positions relative to primer positions will also be trimmed.\n"
" -m Minimum length of read to retain after trimming (Default: 30)\n"
" -q Minimum quality threshold for sliding window to pass (Default: 20)\n"
" -s Width of sliding window (Default: 4)\n"
Expand Down Expand Up @@ -165,7 +167,7 @@ void print_version_info(){
"\nPlease raise issues and bug reports at https://github.com/andersen-lab/ivar/\n\n";
}

static const char *trim_opt_str = "i:b:f:p:m:q:s:ekh?";
static const char *trim_opt_str = "i:b:f:x:p:m:q:s:ekh?";
static const char *variants_opt_str = "p:t:q:m:r:g:h?";
static const char *consensus_opt_str = "i:p:q:t:m:n:kh?";
static const char *removereads_opt_str = "i:p:t:b:h?";
Expand Down Expand Up @@ -221,6 +223,7 @@ int main(int argc, char* argv[]){
g_args.keep_for_reanalysis = false;
g_args.bed = "";
g_args.primer_pair_file = "";
g_args.primer_offset = 0;
opt = getopt( argc, argv, trim_opt_str);
while( opt != -1 ) {
switch( opt ) {
Expand All @@ -232,6 +235,9 @@ int main(int argc, char* argv[]){
break;
case 'f':
g_args.primer_pair_file = optarg;
break;
case 'x':
g_args.primer_offset = std::stoi(optarg);
break;
case 'p':
g_args.prefix = optarg;
Expand Down Expand Up @@ -264,7 +270,7 @@ int main(int argc, char* argv[]){
return -1;
}
g_args.prefix = get_filename_without_extension(g_args.prefix,".bam");
res = trim_bam_qual_primer(g_args.bam, g_args.bed, g_args.prefix, g_args.region, g_args.min_qual, g_args.sliding_window, cl_cmd.str(), g_args.write_no_primers_flag, g_args.keep_for_reanalysis, g_args.min_length, g_args.primer_pair_file);
res = trim_bam_qual_primer(g_args.bam, g_args.bed, g_args.prefix, g_args.region, g_args.min_qual, g_args.sliding_window, cl_cmd.str(), g_args.write_no_primers_flag, g_args.keep_for_reanalysis, g_args.min_length, g_args.primer_pair_file, g_args.primer_offset);
}
// ivar variants
else if (cmd.compare("variants") == 0){
Expand Down
6 changes: 3 additions & 3 deletions src/primer_bed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ void print_bed_format(){
std::cout << "It requires the following columns delimited by a tab: chrom, chromStart, chromEnd, name, score, strand" << std::endl;
}

std::vector<primer> populate_from_file(std::string path){
std::vector<primer> populate_from_file(std::string path, int32_t offset = 0){
Copy link
Member

@gkarthik gkarthik Feb 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that offset will not be needed in most of the function calls, can you please overload this function instead of adding offset to every call?

std::ifstream data(path.c_str());
std::string line;
std::vector<primer> primers;
Expand All @@ -103,7 +103,7 @@ std::vector<primer> populate_from_file(std::string path){
break;
case 1:
if(std::all_of(cell.begin(), cell.end(), ::isdigit)) {
p.set_start(std::stoul(cell));
p.set_start(std::stoul(cell) - offset);
} else {
print_bed_format();
primers.clear();
Expand All @@ -112,7 +112,7 @@ std::vector<primer> populate_from_file(std::string path){
break;
case 2:
if(std::all_of(cell.begin(), cell.end(), ::isdigit)) {
p.set_end(std::stoul(cell)-1); // Bed format - End is not 0 based
p.set_end(std::stoul(cell) - 1 + offset); // Bed format - End is not 0 based
} else {
print_bed_format();
primers.clear();
Expand Down
2 changes: 1 addition & 1 deletion src/primer_bed.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class primer {

};

std::vector<primer> populate_from_file(std::string path);
std::vector<primer> populate_from_file(std::string path, int32_t offset);
std::vector<primer> get_primers(std::vector<primer> p, unsigned int pos);
int get_primer_indice(std::vector<primer> p, std::string name);
int populate_pair_indices(std::vector<primer> &primers, std::string path);
Expand Down
4 changes: 2 additions & 2 deletions src/trim_primer_quality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,12 +346,12 @@ bool amplicon_filter(IntervalTree amplicons, bam1_t* r){
return amplicon_flag;
}

int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool keep_for_reanalysis, int min_length = 30, std::string pair_info = "") {
int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool keep_for_reanalysis, int min_length = 30, std::string pair_info = "", int32_t primer_offset = 0) {
int retval = 0;
std::vector<primer> primers;
int max_primer_len = 0;
if(!bed.empty()){
primers = populate_from_file(bed);
primers = populate_from_file(bed, primer_offset);
if(primers.size() == 0){
std::cout << "Exiting." << std::endl;
return -1;
Expand Down
2 changes: 1 addition & 1 deletion src/trim_primer_quality.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ inline void free_cigar(cigar_ t) { if (t.free_cig) free(t.cigar); }
void add_pg_line_to_header(bam_hdr_t** hdr, char *cmd);


int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool mark_qcfail_flag, int min_length, std::string pair_info);
int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool mark_qcfail_flag, int min_length, std::string pair_info, int32_t primer_offset);
void free_cigar(cigar_ t);
int32_t get_pos_on_query(uint32_t *cigar, uint32_t ncigar, int32_t pos, int32_t ref_start);
int32_t get_pos_on_reference(uint32_t *cigar, uint32_t ncigar, uint32_t pos, uint32_t ref_start);
Expand Down
7 changes: 4 additions & 3 deletions tests/test_amplicon_search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
#include "../src/primer_bed.h"
#include "../src/interval_tree.h"

int test_amplicon_search(std::string bam_file, std::string bed_file, std::string pair_info_file, bool expected[])
int test_amplicon_search(std::string bam_file, std::string bed_file, std::string pair_info_file, int32_t primer_offset, bool expected[])
{
std::vector<primer> primers;
IntervalTree amplicons;
primers = populate_from_file(bed_file);
primers = populate_from_file(bed_file, primer_offset);
std::cout << "Total Number of primers: " << primers.size() << std::endl;
amplicons = populate_amplicons(pair_info_file, primers);
samFile *in = hts_open(bam_file.c_str(), "r");
Expand All @@ -35,13 +35,14 @@ int main(){
int success;
std::string bam = "../data/test_amplicon.sorted.bam";
std::string pair_indice = "../data/pair_info_2.tsv";
int32_t primer_offset = 0;
std::string bed = "../data/test_isize.bed";
std::string prefix = "/tmp/data/trim_amplicon";
std::string bam_out = "/tmp/trim_amplicon.bam";

IntervalTree amplicons;
bool expected[8] = {true, false, true, false, true, false, true, false};
success = test_amplicon_search(bam, bed, pair_indice, expected);
success = test_amplicon_search(bam, bed, pair_indice, primer_offset, expected);
amplicons.inOrder();
return success;
}
3 changes: 2 additions & 1 deletion tests/test_isize_trim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ int test_isize_trim(uint8_t min_qual, uint8_t sliding_window, bool no_write_flag
std::string bam = "../data/test_isize.sorted.bam";
std::string bed = "../data/test_isize.bed";
std::string pair_info = "";
int32_t primer_offset = 0;
std::string prefix = "../data/trim_isize";
std::string bam_out = "../data/trim_isize.bam";

Expand All @@ -29,7 +30,7 @@ int test_isize_trim(uint8_t min_qual, uint8_t sliding_window, bool no_write_flag
std::string cmd = "@PG\tID:ivar-trim\tPN:ivar\tVN:1.0.0\tCL:ivar trim\n";

// Test and check result
res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length, pair_info);
res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length, pair_info, primer_offset);

if (res) {
success = -1;
Expand Down
3 changes: 2 additions & 1 deletion tests/test_primer_bed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ int main(){
int num_tests = 2;
int success = 0;
std::vector<primer>::iterator it;
std::vector<primer> primers = populate_from_file("../data/test.bed");
int32_t primer_offset = 0;
std::vector<primer> primers = populate_from_file("../data/test.bed", primer_offset);
std::string primer_names[] = {"WNV_400_1_LEFT", "WNV_400_1_LEFT_alt", "WNV_400_2_LEFT", "WNV_400_1_RIGHT", "WNV_400_2_RIGHT", "WNV_400_3_LEFT", "WNV_400_2_LEFT_alt", "WNV_400_2_RIGHT_alt"};
int primer_indices[] = {0,1,2,3,4,5,6,7,8};
unsigned int primer_start[] = {8,7, 230, 359,658,569,251,352};
Expand Down
3 changes: 2 additions & 1 deletion tests/test_primer_trim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
int main(){
int success = 0;
std::string bam = "../data/test.unmapped.sorted.bam";
std::vector<primer> primers = populate_from_file("../data/test.bed");
int32_t primer_offset = 0;
std::vector<primer> primers = populate_from_file("../data/test.bed", primer_offset);
int max_primer_len = 0;
max_primer_len = get_bigger_primer(primers);
std::string region_;
Expand Down
3 changes: 2 additions & 1 deletion tests/test_primer_trim_edge_cases.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
int main(){
int success = 0;
std::string bam = "../data/primer_only/primer_edge_cases.bam";
std::vector<primer> primers = populate_from_file("../data/test.bed");
int32_t primer_offset = 0;
std::vector<primer> primers = populate_from_file("../data/test.bed", primer_offset);
int max_primer_len = 0;
max_primer_len = get_bigger_primer(primers);
std::string region_;
Expand Down
3 changes: 2 additions & 1 deletion tests/test_trim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@ int test_trim(uint8_t min_qual, uint8_t sliding_window, bool no_write_flag, bool
std::string bam = "../data/test.unmapped.sorted.bam";
std::string bed = "../data/test.bed";
std::string pair_info = "";
int32_t primer_offset = 0;
std::string prefix = "/tmp/trim";
std::string bam_out = "/tmp/trim.bam";

std::string region_ = "";
std::string cmd = "@PG\tID:ivar-trim\tPN:ivar\tVN:1.0.0\tCL:ivar trim\n";

// Test and check result
res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length, pair_info);
res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length, pair_info, primer_offset);
if (res) {
success = -1;
std::cerr << testname << " failed: trim_bam_qual_primer() returned " << res << std::endl;
Expand Down
3 changes: 2 additions & 1 deletion tests/test_unpaired_trim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
int main(){
int success = 0;
std::string bam = "../data/test.sim.merged.sorted.bam";
std::vector<primer> primers = populate_from_file("../data/test_merged.bed");
int32_t primer_offset = 0;
std::vector<primer> primers = populate_from_file("../data/test_merged.bed", primer_offset);
int max_primer_len = 0;
max_primer_len = get_bigger_primer(primers);
std::string region_;
Expand Down