From 2141fa6ea7d3f152880768a1120e4209472f50b9 Mon Sep 17 00:00:00 2001 From: cmaceves Date: Tue, 29 Mar 2022 14:16:33 -0700 Subject: [PATCH 01/14] trim until first base that consumes the reference --- src/trim_primer_quality.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/trim_primer_quality.cpp b/src/trim_primer_quality.cpp index 899c5d67..1747994b 100755 --- a/src/trim_primer_quality.cpp +++ b/src/trim_primer_quality.cpp @@ -221,7 +221,8 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r ncigar[j] = bam_cigar_gen(n, cig); j++; } - if(del_len ==0 && (bam_cigar_type(ncigar[j-1]) & 1) && (bam_cigar_type(ncigar[j-1]) & 2)){ // After soft clipping of query complete, keep incrementing start_pos until first base that consumes both query and ref + if (del_len ==0 && (bam_cigar_type(ncigar[j-1])&1)){ + //if(del_len ==0 && (bam_cigar_type(ncigar[j-1]) & 1) && (bam_cigar_type(ncigar[j-1]) & 2)){ // After soft clipping of query complete, keep incrementing start_pos until first base that consumes both query and ref pos_start = true; } } From b0b8dc5f7f5c7225a83d0b29ebd36fca3f7cafa5 Mon Sep 17 00:00:00 2001 From: cmaceves Date: Mon, 4 Apr 2022 14:01:48 -0700 Subject: [PATCH 02/14] add in more specific condition for soft clips --- src/trim_primer_quality.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/trim_primer_quality.cpp b/src/trim_primer_quality.cpp index 1747994b..7a13374b 100755 --- a/src/trim_primer_quality.cpp +++ b/src/trim_primer_quality.cpp @@ -221,11 +221,14 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r ncigar[j] = bam_cigar_gen(n, cig); j++; } - if (del_len ==0 && (bam_cigar_type(ncigar[j-1])&1)){ + if(del_len == 0 && (bam_cigar_type(ncigar[j-1]) & 1) && (bam_cigar_op(ncigar[j-1]) != 4)){ //if(del_len ==0 && (bam_cigar_type(ncigar[j-1]) & 1) && (bam_cigar_type(ncigar[j-1]) & 2)){ // After soft clipping of query complete, keep incrementing start_pos until first base that consumes both query and ref - pos_start = true; + pos_start = true; } } + + //deletions consume the reference but not the query, + //insertions consume the query but not the reference if((bam_cigar_type(cig) & 2)) { // Consumes reference but not query start_pos += ref_add; } @@ -234,6 +237,7 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r if(reverse){ reverse_cigar(ncigar, j); } + return { ncigar, true, From 21bce801a998e8e75d36c7277b0908771c73a1c2 Mon Sep 17 00:00:00 2001 From: cmaceves Date: Wed, 6 Apr 2022 10:20:25 -0700 Subject: [PATCH 03/14] adding in gene:id --- src/ref_seq.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ref_seq.cpp b/src/ref_seq.cpp index 1b3d22a4..f0406ad1 100644 --- a/src/ref_seq.cpp +++ b/src/ref_seq.cpp @@ -109,6 +109,7 @@ ref_antd::~ref_antd() if (this->fai) fai_destroy(this->fai); } +//used to add codon info to variants output int ref_antd::codon_aa_stream(std::string region, std::ostringstream &line_stream, std::ofstream &fout, int64_t pos, char alt){ std::vector features = gff.query_features(pos, "CDS"); if(features.size() == 0){ // No matching CDS @@ -118,8 +119,9 @@ int ref_antd::codon_aa_stream(std::string region, std::ostringstream &line_strea std::vector::iterator it; char *ref_codon, *alt_codon; for(it = features.begin(); it != features.end(); it++){ + //std::cout << it->get_attribute("Parent") << "\n"; fout << line_stream.str(); - fout << it->get_attribute("ID") << "\t"; + fout << it->get_attribute("Parent") + ":" + it->get_attribute("ID") << "\t"; ref_codon = this->get_codon(pos, region, *it); fout << ref_codon[0] << ref_codon[1] << ref_codon[2] << "\t"; fout << codon2aa(ref_codon[0], ref_codon[1], ref_codon[2]) << "\t"; From 5784cfe05e092faae35f061f4ef01b258d7c40f0 Mon Sep 17 00:00:00 2001 From: cmaceves Date: Wed, 6 Apr 2022 10:20:54 -0700 Subject: [PATCH 04/14] udpating test data to have gene name --- data/test.gff | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/data/test.gff b/data/test.gff index ee33aabc..ee2f916c 100644 --- a/data/test.gff +++ b/data/test.gff @@ -1,7 +1,7 @@ ## GFF3 file format -test Genbank primer_binding_site 2 290 . + . ID=id-test1;Note=daft punk -test Genbank long_terminal_repeat 1 55 . + . ID=id-test2;Note=random access memories -test Genbank CDS 2 292 . + . ID=id-test3;Note=eluvite -test Genbank CDS 1 55 . + . ID=id-test4;Note=ategnato -test Genbank CDS 2 292 . + . ID=id-testedit1;Note=PinkFloyd;EditPosition=100;EditSequence=A -test Genbank CDS 2 292 . + . ID=id-testedit2;Note=AnotherBrickInTheWall;EditPosition=102;EditSequence=AA \ No newline at end of file +test Genbank primer_binding_site 2 290 . + . ID=id-test1;Note=daft punk;Parent=geneFragmentsOfTime; +test Genbank long_terminal_repeat 1 55 . + . ID=id-test2;Note=random access memories;Parent=geneGetLucky; +test Genbank CDS 2 292 . + . ID=id-test3;Note=eluvite;Parent=KatyPerry; +test Genbank CDS 1 55 . + . ID=id-test4;Note=ategnato;Parent=KatyPerry; +test Genbank CDS 2 292 . + . ID=id-testedit1;Note=PinkFloyd;EditPosition=100;EditSequence=A;Parent=TaylorSwift; +test Genbank CDS 2 292 . + . ID=id-testedit2;Note=AnotherBrickInTheWall;EditPosition=102;EditSequence=AA;Parent=TaylorSwift; From b814743a3107b0c96a4ae121257f31d72736da34 Mon Sep 17 00:00:00 2001 From: cmaceves Date: Wed, 6 Apr 2022 10:53:02 -0700 Subject: [PATCH 05/14] control for case where gene info not present --- src/ref_seq.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/ref_seq.cpp b/src/ref_seq.cpp index f0406ad1..8f18ef40 100644 --- a/src/ref_seq.cpp +++ b/src/ref_seq.cpp @@ -119,9 +119,14 @@ int ref_antd::codon_aa_stream(std::string region, std::ostringstream &line_strea std::vector::iterator it; char *ref_codon, *alt_codon; for(it = features.begin(); it != features.end(); it++){ - //std::cout << it->get_attribute("Parent") << "\n"; fout << line_stream.str(); - fout << it->get_attribute("Parent") + ":" + it->get_attribute("ID") << "\t"; + //add in gene level info, control for case it's not present + std::string gene = it->get_attribute("Parent"); + if (gene.empty()){ + fout << it->get_attribute("ID") << "\t"; + } else { + fout << gene + ":" + it->get_attribute("ID") << "\t"; + } ref_codon = this->get_codon(pos, region, *it); fout << ref_codon[0] << ref_codon[1] << ref_codon[2] << "\t"; fout << codon2aa(ref_codon[0], ref_codon[1], ref_codon[2]) << "\t"; From 4a2d2750e1f2fafc06c833ed4248ccb569cdfa3f Mon Sep 17 00:00:00 2001 From: cmaceves Date: Wed, 6 Apr 2022 10:53:36 -0700 Subject: [PATCH 06/14] update test to reflect additional of gene info --- tests/test_variants.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_variants.cpp b/tests/test_variants.cpp index 4d21f01c..6530e1e7 100755 --- a/tests/test_variants.cpp +++ b/tests/test_variants.cpp @@ -12,6 +12,7 @@ int call_var_check_outfile(std::string prefix, uint8_t min_qual, uint8_t min_dep std::string l; getline(outFile, l); // Ignore first line int comp = 0, ctr = 0; + std::cout << "MAKE CHANGES\n"; while(ctr < len){ getline(outFile, l); std::cout << l << std::endl; @@ -25,11 +26,11 @@ int call_var_check_outfile(std::string prefix, uint8_t min_qual, uint8_t min_dep int main() { int num_success = 0; // Quality threshold 20. Frequency threshold: 0.03. Total_DP = 3. Indel passes filters with total_depth 4. Has two lines. - std::string t_20_02_1[4] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-test3\tAAG\tK\tATG\tM", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit2\tAGA\tR\tTGA\t*", "test\t210\tA\t+GT\t1\t1\t41\t1\t0\t20\t0.25\t4\t0.4\tFALSE\tNA\tNA\tNA\tNA\tNA"}; + std::string t_20_02_1[4] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*", "test\t210\tA\t+GT\t1\t1\t41\t1\t0\t20\t0.25\t4\t0.4\tFALSE\tNA\tNA\tNA\tNA\tNA"}; // Quality threshold 3-. Frequency threshold: 0.03. Total_DP = 3. Freq = 0.666667 No Indel - std::string t_20_03[3] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-test3\tAAG\tK\tATG\tM", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit2\tAGA\tR\tTGA\t*"}; + std::string t_20_03[3] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*"}; // Quality threshold 25. Frequency threshold: 0.03. Total_DP = 2. Freq = 0.5 No Indel. - std::string t_25_03[3] = {"test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-test3\tAAG\tK\tATG\tM", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-testedit2\tAGA\tR\tTGA\t*"}; + std::string t_25_03[3] = {"test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*"}; // Minimum depth threshold. Should be empty std::string t_25_03_20[] = {}; num_success = call_var_check_outfile("../data/test.indel", 20, 0, 0.02, t_20_02_1, 4); From 5d859098e1eff14e10b3b52760b32edf200a4b54 Mon Sep 17 00:00:00 2001 From: cmaceves Date: Wed, 6 Apr 2022 10:56:54 -0700 Subject: [PATCH 07/14] double check test data up to date --- data/test.gff | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data/test.gff b/data/test.gff index ee2f916c..65704744 100644 --- a/data/test.gff +++ b/data/test.gff @@ -1,7 +1,7 @@ ## GFF3 file format test Genbank primer_binding_site 2 290 . + . ID=id-test1;Note=daft punk;Parent=geneFragmentsOfTime; test Genbank long_terminal_repeat 1 55 . + . ID=id-test2;Note=random access memories;Parent=geneGetLucky; -test Genbank CDS 2 292 . + . ID=id-test3;Note=eluvite;Parent=KatyPerry; -test Genbank CDS 1 55 . + . ID=id-test4;Note=ategnato;Parent=KatyPerry; -test Genbank CDS 2 292 . + . ID=id-testedit1;Note=PinkFloyd;EditPosition=100;EditSequence=A;Parent=TaylorSwift; -test Genbank CDS 2 292 . + . ID=id-testedit2;Note=AnotherBrickInTheWall;EditPosition=102;EditSequence=AA;Parent=TaylorSwift; +test Genbank CDS 2 292 . + . ID=id-test3;Note=eluvite;Parent=geneKatyPerry; +test Genbank CDS 1 55 . + . ID=id-test4;Note=ategnato;Parent=geneKatyPerry; +test Genbank CDS 2 292 . + . ID=id-testedit1;Note=PinkFloyd;EditPosition=100;EditSequence=A; +test Genbank CDS 2 292 . + . ID=id-testedit2;Note=AnotherBrickInTheWall;EditPosition=102;EditSequence=AA;Parent=geneTaylorSwift; From 7f5762d331cff555b13ce47714c9c7b4ee7d2556 Mon Sep 17 00:00:00 2001 From: cmaceves Date: Wed, 6 Apr 2022 13:12:11 -0700 Subject: [PATCH 08/14] updating attribute key to be gene --- data/test.gff | 6 +++--- src/ref_seq.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/data/test.gff b/data/test.gff index 65704744..9a5f0d9f 100644 --- a/data/test.gff +++ b/data/test.gff @@ -1,7 +1,7 @@ ## GFF3 file format test Genbank primer_binding_site 2 290 . + . ID=id-test1;Note=daft punk;Parent=geneFragmentsOfTime; test Genbank long_terminal_repeat 1 55 . + . ID=id-test2;Note=random access memories;Parent=geneGetLucky; -test Genbank CDS 2 292 . + . ID=id-test3;Note=eluvite;Parent=geneKatyPerry; -test Genbank CDS 1 55 . + . ID=id-test4;Note=ategnato;Parent=geneKatyPerry; +test Genbank CDS 2 292 . + . ID=id-test3;Note=eluvite;gene=geneKatyPerry; +test Genbank CDS 1 55 . + . ID=id-test4;Note=ategnato;gene=geneKatyPerry; test Genbank CDS 2 292 . + . ID=id-testedit1;Note=PinkFloyd;EditPosition=100;EditSequence=A; -test Genbank CDS 2 292 . + . ID=id-testedit2;Note=AnotherBrickInTheWall;EditPosition=102;EditSequence=AA;Parent=geneTaylorSwift; +test Genbank CDS 2 292 . + . ID=id-testedit2;Note=AnotherBrickInTheWall;EditPosition=102;EditSequence=AA;gene=geneTaylorSwift; diff --git a/src/ref_seq.cpp b/src/ref_seq.cpp index 8f18ef40..1f2e0c72 100644 --- a/src/ref_seq.cpp +++ b/src/ref_seq.cpp @@ -121,7 +121,7 @@ int ref_antd::codon_aa_stream(std::string region, std::ostringstream &line_strea for(it = features.begin(); it != features.end(); it++){ fout << line_stream.str(); //add in gene level info, control for case it's not present - std::string gene = it->get_attribute("Parent"); + std::string gene = it->get_attribute("gene"); if (gene.empty()){ fout << it->get_attribute("ID") << "\t"; } else { From 02a3d9ee97e773f06608506d852109cb286f6088 Mon Sep 17 00:00:00 2001 From: cmaceves Date: Wed, 6 Apr 2022 14:39:54 -0700 Subject: [PATCH 09/14] update test again --- src/call_variants.cpp | 7 ++++++- src/ref_seq.cpp | 9 +++++++-- tests/test_variants.cpp | 9 +++++---- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/call_variants.cpp b/src/call_variants.cpp index 46d10555..4f7ab4d6 100644 --- a/src/call_variants.cpp +++ b/src/call_variants.cpp @@ -28,6 +28,7 @@ int call_variants_from_plup(std::istream &cin, std::string out_file, uint8_t min ref_antd refantd(ref_path, gff_path); std::ostringstream out_str; std::ofstream fout((out_file+".tsv").c_str()); + //make columns in the file fout << "REGION" "\tPOS" "\tREF" @@ -47,6 +48,7 @@ int call_variants_from_plup(std::istream &cin, std::string out_file, uint8_t min "\tREF_AA" "\tALT_CODON" "\tALT_AA" + "\tPOS_AA" << std::endl; int ctr = 0; int64_t pos = 0; @@ -118,6 +120,7 @@ int call_variants_from_plup(std::istream &cin, std::string out_file, uint8_t min freq_depth = get_frequency_depth(*it, pdepth, mdepth); if(freq_depth[0] < min_threshold) continue; + //this only adds the first bit of the information to the tsv out_str << region << "\t"; out_str << pos << "\t"; out_str << ref << "\t"; @@ -135,6 +138,7 @@ int call_variants_from_plup(std::istream &cin, std::string out_file, uint8_t min Exp | Error | Err free | Obs | AD | RD | */ + //adding pvalue related info err = pow(10, ( -1 * (it->mean_qual)/10)); kt_fisher_exact((err * mdepth), (1-err) * mdepth, it->depth, ref_it->depth, &pval_left, &pval_right, &pval_twotailed); out_str << pval_left << "\t"; @@ -143,10 +147,11 @@ int call_variants_from_plup(std::istream &cin, std::string out_file, uint8_t min } else { out_str << "FALSE" << "\t"; } + //adding codon related info, can add gene info here too if(it->nuc[0] != '+' && it->nuc[0] != '-'){ refantd.codon_aa_stream(region, out_str, fout, pos, it->nuc[0]); } else { - fout << out_str.str() << "NA\tNA\tNA\tNA\tNA" << std::endl; + fout << out_str.str() << "NA\tNA\tNA\tNA\tNA\tNA" << std::endl; } out_str.str(""); out_str.clear(); diff --git a/src/ref_seq.cpp b/src/ref_seq.cpp index 1f2e0c72..781e9287 100644 --- a/src/ref_seq.cpp +++ b/src/ref_seq.cpp @@ -113,7 +113,7 @@ ref_antd::~ref_antd() int ref_antd::codon_aa_stream(std::string region, std::ostringstream &line_stream, std::ofstream &fout, int64_t pos, char alt){ std::vector features = gff.query_features(pos, "CDS"); if(features.size() == 0){ // No matching CDS - fout << line_stream.str() << "NA\tNA\tNA\tNA\tNA" << std::endl; + fout << line_stream.str() << "NA\tNA\tNA\tNA\tNA\tNA" << std::endl; return 0; } std::vector::iterator it; @@ -132,7 +132,12 @@ int ref_antd::codon_aa_stream(std::string region, std::ostringstream &line_strea fout << codon2aa(ref_codon[0], ref_codon[1], ref_codon[2]) << "\t"; alt_codon = this->get_codon(pos, region, *it, alt); fout << alt_codon[0] << alt_codon[1] << alt_codon[2] << "\t"; - fout << codon2aa(alt_codon[0], alt_codon[1], alt_codon[2]); + fout << codon2aa(alt_codon[0], alt_codon[1], alt_codon[2]) << "\t"; + + //adding amino acid position + int64_t start = it->get_start(); + int64_t aa_pos = ((pos - start) / 3)+1; + fout << aa_pos << "\t"; fout << std::endl; delete[] ref_codon; diff --git a/tests/test_variants.cpp b/tests/test_variants.cpp index 6530e1e7..06a9bf39 100755 --- a/tests/test_variants.cpp +++ b/tests/test_variants.cpp @@ -12,12 +12,13 @@ int call_var_check_outfile(std::string prefix, uint8_t min_qual, uint8_t min_dep std::string l; getline(outFile, l); // Ignore first line int comp = 0, ctr = 0; - std::cout << "MAKE CHANGES\n"; + while(ctr < len){ getline(outFile, l); std::cout << l << std::endl; std::cout << out[ctr] << " -> CORRECT" << std::endl; comp += l.compare(out[ctr]); + ctr++; } return comp; @@ -26,11 +27,11 @@ int call_var_check_outfile(std::string prefix, uint8_t min_qual, uint8_t min_dep int main() { int num_success = 0; // Quality threshold 20. Frequency threshold: 0.03. Total_DP = 3. Indel passes filters with total_depth 4. Has two lines. - std::string t_20_02_1[4] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*", "test\t210\tA\t+GT\t1\t1\t41\t1\t0\t20\t0.25\t4\t0.4\tFALSE\tNA\tNA\tNA\tNA\tNA"}; + std::string t_20_02_1[4] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70", "test\t210\tA\t+GT\t1\t1\t41\t1\t0\t20\t0.25\t4\t0.4\tFALSE\tNA\tNA\tNA\tNA\tNA\tNA"}; // Quality threshold 3-. Frequency threshold: 0.03. Total_DP = 3. Freq = 0.666667 No Indel - std::string t_20_03[3] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*"}; + std::string t_20_03[3] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70"}; // Quality threshold 25. Frequency threshold: 0.03. Total_DP = 2. Freq = 0.5 No Indel. - std::string t_25_03[3] = {"test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*"}; + std::string t_25_03[3] = {"test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70"}; // Minimum depth threshold. Should be empty std::string t_25_03_20[] = {}; num_success = call_var_check_outfile("../data/test.indel", 20, 0, 0.02, t_20_02_1, 4); From ba09b797ed702f26984a3e7b1d69df5646c6e889 Mon Sep 17 00:00:00 2001 From: cmaceves Date: Wed, 6 Apr 2022 16:05:10 -0700 Subject: [PATCH 10/14] removing test which is failing without proper cause --- tests/test_variants.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_variants.cpp b/tests/test_variants.cpp index 06a9bf39..1383a3ca 100755 --- a/tests/test_variants.cpp +++ b/tests/test_variants.cpp @@ -18,7 +18,7 @@ int call_var_check_outfile(std::string prefix, uint8_t min_qual, uint8_t min_dep std::cout << l << std::endl; std::cout << out[ctr] << " -> CORRECT" << std::endl; comp += l.compare(out[ctr]); - + std::cout << l.compare(out[ctr]) << "\n"; ctr++; } return comp; @@ -27,14 +27,15 @@ int call_var_check_outfile(std::string prefix, uint8_t min_qual, uint8_t min_dep int main() { int num_success = 0; // Quality threshold 20. Frequency threshold: 0.03. Total_DP = 3. Indel passes filters with total_depth 4. Has two lines. - std::string t_20_02_1[4] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70", "test\t210\tA\t+GT\t1\t1\t41\t1\t0\t20\t0.25\t4\t0.4\tFALSE\tNA\tNA\tNA\tNA\tNA\tNA"}; + std::string t_20_02_1[3] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70"}; + //"test\t210\tA\t+GT\t1\t1\t41\t1\t0\t20\t0.25\t4\t0.4\tFALSE\tNA\tNA\tNA\tNA\tNA\tNA"}; // Quality threshold 3-. Frequency threshold: 0.03. Total_DP = 3. Freq = 0.666667 No Indel std::string t_20_03[3] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70"}; // Quality threshold 25. Frequency threshold: 0.03. Total_DP = 2. Freq = 0.5 No Indel. std::string t_25_03[3] = {"test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70"}; // Minimum depth threshold. Should be empty std::string t_25_03_20[] = {}; - num_success = call_var_check_outfile("../data/test.indel", 20, 0, 0.02, t_20_02_1, 4); + num_success = call_var_check_outfile("../data/test.indel", 20, 0, 0.02, t_20_02_1, 3); std::cout << num_success << std::endl; num_success += call_var_check_outfile("../data/test.indel", 20, 0, 0.03, t_20_03, 3); std::cout << num_success << std::endl; @@ -42,7 +43,7 @@ int main() { std::cout << num_success << std::endl; num_success += call_var_check_outfile("../data/test.indel", 25, 20, 0.03, t_25_03_20, 0); std::cout << num_success << std::endl; - if(num_success == 0) + if(num_success == 9) return 0; return -1; } From 2b455d75b26742990fea5d154f867aa667f5850f Mon Sep 17 00:00:00 2001 From: cmaceves Date: Mon, 11 Apr 2022 09:21:06 -0700 Subject: [PATCH 11/14] adding back test, fixing issue with trailing tab --- src/ref_seq.cpp | 2 +- tests/test_variants.cpp | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/ref_seq.cpp b/src/ref_seq.cpp index 781e9287..d52ef05c 100644 --- a/src/ref_seq.cpp +++ b/src/ref_seq.cpp @@ -137,7 +137,7 @@ int ref_antd::codon_aa_stream(std::string region, std::ostringstream &line_strea //adding amino acid position int64_t start = it->get_start(); int64_t aa_pos = ((pos - start) / 3)+1; - fout << aa_pos << "\t"; + fout << aa_pos; fout << std::endl; delete[] ref_codon; diff --git a/tests/test_variants.cpp b/tests/test_variants.cpp index 1383a3ca..d7d42dc4 100755 --- a/tests/test_variants.cpp +++ b/tests/test_variants.cpp @@ -27,23 +27,22 @@ int call_var_check_outfile(std::string prefix, uint8_t min_qual, uint8_t min_dep int main() { int num_success = 0; // Quality threshold 20. Frequency threshold: 0.03. Total_DP = 3. Indel passes filters with total_depth 4. Has two lines. - std::string t_20_02_1[3] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70"}; - //"test\t210\tA\t+GT\t1\t1\t41\t1\t0\t20\t0.25\t4\t0.4\tFALSE\tNA\tNA\tNA\tNA\tNA\tNA"}; + std::string t_20_02_1[4] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70", "test\t210\tA\t+GT\t1\t1\t41\t1\t0\t20\t0.25\t4\t0.4\tFALSE\tNA\tNA\tNA\tNA\tNA\tNA"}; // Quality threshold 3-. Frequency threshold: 0.03. Total_DP = 3. Freq = 0.666667 No Indel std::string t_20_03[3] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70"}; // Quality threshold 25. Frequency threshold: 0.03. Total_DP = 2. Freq = 0.5 No Indel. std::string t_25_03[3] = {"test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70"}; // Minimum depth threshold. Should be empty std::string t_25_03_20[] = {}; - num_success = call_var_check_outfile("../data/test.indel", 20, 0, 0.02, t_20_02_1, 3); + num_success = call_var_check_outfile("../data/test.indel", 20, 0, 0.02, t_20_02_1, 4); std::cout << num_success << std::endl; - num_success += call_var_check_outfile("../data/test.indel", 20, 0, 0.03, t_20_03, 3); + num_success += call_var_check_outfile("../data/test.indel2", 20, 0, 0.03, t_20_03, 3); std::cout << num_success << std::endl; - num_success += call_var_check_outfile("../data/test.indel", 25, 0, 0.03, t_25_03, 3); + num_success += call_var_check_outfile("../data/test.indel3", 25, 0, 0.03, t_25_03, 3); std::cout << num_success << std::endl; - num_success += call_var_check_outfile("../data/test.indel", 25, 20, 0.03, t_25_03_20, 0); + num_success += call_var_check_outfile("../data/test.indel4", 25, 20, 0.03, t_25_03_20, 0); std::cout << num_success << std::endl; - if(num_success == 9) + if(num_success == 0) return 0; return -1; } From a1ab083132820e79490ad1fd72004e46f63c49e4 Mon Sep 17 00:00:00 2001 From: cmaceves Date: Tue, 19 Apr 2022 09:30:20 -0700 Subject: [PATCH 12/14] changing both tests and primer trim to allow leading deletions --- src/trim_primer_quality.cpp | 34 ++++++++++++++++++++++++++++++++-- tests/test_primer_trim.cpp | 23 ++++++++++++----------- 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/src/trim_primer_quality.cpp b/src/trim_primer_quality.cpp index 7a13374b..6e01f3ee 100755 --- a/src/trim_primer_quality.cpp +++ b/src/trim_primer_quality.cpp @@ -187,6 +187,7 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r int32_t n, start_pos = 0, ref_add = 0; bool pos_start = false; del_len = max_del_len; + std::cout << "primer type " << reverse << "\n"; while(i < r->core.n_cigar){ if (del_len == 0 && pos_start){ // No more bases on query to soft clip ncigar[j] = cigar[i]; @@ -200,7 +201,21 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r pos_start = true; continue; } + + //add the condition that we have a leading deletion + if(cig == 2 && del_len == 0){ + ncigar[j] = cigar[i]; + pos_start = true; + i++; + j++; + //if(reverse==0){ + // start_pos+=bam_cigar_oplen(cigar[i]); + //} + continue; + } + ref_add = n; + std::cout << "n " << n << " " << cig << "\n"; if ((bam_cigar_type(cig) & 1)){ // Consumes Query if(del_len >= n ){ ncigar[j] = bam_cigar_gen(n, BAM_CSOFT_CLIP); @@ -221,8 +236,15 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r ncigar[j] = bam_cigar_gen(n, cig); j++; } - if(del_len == 0 && (bam_cigar_type(ncigar[j-1]) & 1) && (bam_cigar_op(ncigar[j-1]) != 4)){ - //if(del_len ==0 && (bam_cigar_type(ncigar[j-1]) & 1) && (bam_cigar_type(ncigar[j-1]) & 2)){ // After soft clipping of query complete, keep incrementing start_pos until first base that consumes both query and ref + //add the condition that we have a leading deletion + if(cig == 2 && del_len == 0){ + ncigar[j] = cigar[i]; + pos_start = true; + i++; + j++; + } + + if(del_len ==0 && (bam_cigar_type(ncigar[j-1]) & 1) && (bam_cigar_type(ncigar[j-1]) & 2)){ // After soft clipping of query complete, keep incrementing start_pos until first base that consumes both query and ref pos_start = true; } } @@ -230,10 +252,18 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r //deletions consume the reference but not the query, //insertions consume the query but not the reference if((bam_cigar_type(cig) & 2)) { // Consumes reference but not query + std::cout << "here " << ref_add << " " << start_pos << " " << cig << "\n"; start_pos += ref_add; } i++; } + + uint32_t p=0; + while(p < j){ + std::cout << bam_cigar_op(ncigar[p]) << " " << bam_cigar_oplen(ncigar[p]) <<"\n"; + p++; + } + std::cout << "start pos " << start_pos << "\n"; if(reverse){ reverse_cigar(ncigar, j); } diff --git a/tests/test_primer_trim.cpp b/tests/test_primer_trim.cpp index 4bede7cf..21179603 100755 --- a/tests/test_primer_trim.cpp +++ b/tests/test_primer_trim.cpp @@ -37,29 +37,29 @@ int main(){ int primer_indices[] = {0, 0, 7, 7, 6}; uint8_t cigar_flag[5][11] = { {BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CMATCH}, - {BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CMATCH}, - {BAM_CMATCH, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP}, + {BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CDEL, BAM_CPAD, BAM_CDEL, BAM_CMATCH}, + {BAM_CMATCH, BAM_CPAD, BAM_CDEL, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP}, {BAM_CMATCH, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP}, {BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH} }; uint32_t cigar_len[5][11] = { {6, 5, 1, 139}, - {24, 3, 1, 1, 4, 114}, - {121, 4, 1, 1, 11, 6}, + {24, 3, 1, 1, 4, 1, 1, 1,114}, //change this + {121, 1, 4, 4, 1, 1, 11, 6}, //change this {103, 1, 10,1,13, 24}, {23, 18, 3, 57, 1, 10, 1, 39} }; uint32_t read_start_pos[5] = { - 30, 31, 231, 249, 274 + 30, 29, 231, 249, 274 //also change }; uint8_t condense_cigar_flag[5][8] = { {BAM_CSOFT_CLIP, BAM_CMATCH}, - {BAM_CSOFT_CLIP, BAM_CMATCH}, - {BAM_CMATCH, BAM_CSOFT_CLIP}, + {BAM_CSOFT_CLIP, BAM_CDEL, BAM_CPAD, BAM_CDEL, BAM_CMATCH}, //change this + {BAM_CMATCH, BAM_CPAD, BAM_CDEL, BAM_CSOFT_CLIP}, //change this {BAM_CMATCH, BAM_CSOFT_CLIP}, {BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH} }; - uint32_t condense_cigar_len[5][8] = {{12, 139}, {33, 114}, {121, 23}, {103, 49}, {23, 18,3, 57, 1, 10, 1, 39}}; + uint32_t condense_cigar_len[5][8] = {{12, 139}, {33, 1,1,1,114}, {121, 1, 4, 23}, {103, 49}, {23, 18,3, 57, 1, 10, 1, 39}}; unsigned int overlapping_primer_sizes[] = {0, 2, 2, 0, 0, 0, 0, 2, 2, 1}; int ctr = 0; std::vector overlapping_primers; @@ -100,11 +100,12 @@ int main(){ cigar = bam_get_cigar(aln); for (uint i = 0; i < t.nlength; ++i){ if(((cigar[i]) & BAM_CIGAR_MASK) != cigar_flag[primer_ctr][i]){ + std::cout << bam_cigar_op(cigar[i]) << " " << bam_cigar_oplen(cigar[i]) << " i " << i << "\n"; success = -1; std::cout << "Cigar flag didn't match for " << cand_primer.get_indice() << " ! Expected " << (uint) cigar_flag[primer_ctr][i] << " " << "Got " << ((cigar[i]) & BAM_CIGAR_MASK) << std::endl; } if((((cigar[i]) >> BAM_CIGAR_SHIFT)) != cigar_len[primer_ctr][i]){ - success = -1; + success = -1; std::cout << "Cigar length didn't match for " << bam_get_qname(aln) << " ! Expected " << (uint) cigar_len[primer_ctr][i] << " " << "Got " << ((cigar[i]) >> BAM_CIGAR_SHIFT) << std::endl; } } @@ -117,10 +118,10 @@ int main(){ for (uint i = 0; i < t.nlength; ++i){ if(((cigar[i]) & BAM_CIGAR_MASK) != condense_cigar_flag[primer_ctr][i]){ success = -1; - std::cout << "Cigar flag didn't match! Expected " << condense_cigar_flag[primer_ctr][i] << " " << "Got " << ((cigar[i]) & BAM_CIGAR_MASK) << std::endl; + std::cout << "Cigar flag didn't match! Expected " << condense_cigar_flag[primer_ctr][i] << " " << "Got " << ((cigar[i]) & BAM_CIGAR_MASK) << std::endl; } if((((cigar[i]) >> BAM_CIGAR_SHIFT)) != condense_cigar_len[primer_ctr][i]){ - success = -1; + success = -1; std::cout << "Cigar length didn't match after condense! Expected " << condense_cigar_len[primer_ctr][i] << " " << "Got " << ((cigar[i]) >> BAM_CIGAR_SHIFT) << std::endl; } } From e0df8af7462a879b82ece82cbe12055835b592f6 Mon Sep 17 00:00:00 2001 From: cmaceves Date: Tue, 19 Apr 2022 10:44:26 -0700 Subject: [PATCH 13/14] updating unpaired test and removing excess print lines --- src/trim_primer_quality.cpp | 13 +++++-------- tests/test_unpaired_trim.cpp | 24 ++++++++++++------------ 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/src/trim_primer_quality.cpp b/src/trim_primer_quality.cpp index 6e01f3ee..73829103 100755 --- a/src/trim_primer_quality.cpp +++ b/src/trim_primer_quality.cpp @@ -187,7 +187,7 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r int32_t n, start_pos = 0, ref_add = 0; bool pos_start = false; del_len = max_del_len; - std::cout << "primer type " << reverse << "\n"; + while(i < r->core.n_cigar){ if (del_len == 0 && pos_start){ // No more bases on query to soft clip ncigar[j] = cigar[i]; @@ -208,14 +208,11 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r pos_start = true; i++; j++; - //if(reverse==0){ - // start_pos+=bam_cigar_oplen(cigar[i]); - //} continue; } ref_add = n; - std::cout << "n " << n << " " << cig << "\n"; + if ((bam_cigar_type(cig) & 1)){ // Consumes Query if(del_len >= n ){ ncigar[j] = bam_cigar_gen(n, BAM_CSOFT_CLIP); @@ -252,7 +249,7 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r //deletions consume the reference but not the query, //insertions consume the query but not the reference if((bam_cigar_type(cig) & 2)) { // Consumes reference but not query - std::cout << "here " << ref_add << " " << start_pos << " " << cig << "\n"; + //std::cout << "ref add " << ref_add << " " << start_pos << " " << cig << "\n"; start_pos += ref_add; } i++; @@ -263,10 +260,10 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r std::cout << bam_cigar_op(ncigar[p]) << " " << bam_cigar_oplen(ncigar[p]) <<"\n"; p++; } - std::cout << "start pos " << start_pos << "\n"; + /*std::cout << "start pos " << start_pos << "\n"; if(reverse){ reverse_cigar(ncigar, j); - } + }*/ return { ncigar, diff --git a/tests/test_unpaired_trim.cpp b/tests/test_unpaired_trim.cpp index c4451167..a07b39df 100755 --- a/tests/test_unpaired_trim.cpp +++ b/tests/test_unpaired_trim.cpp @@ -41,35 +41,35 @@ int main(){ int primer_ctr = 0; int forward_primer_indices[] = {1, 1, 5, 5, 6}; int rev_primer_indices[] = {3, -1, -1, -1, 4}; - uint8_t cigar_flag[5][16] = { - {BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CINS, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP}, // 3S15M5D2M1D14M1I56M1P6M3P93M2D4M2I8M + uint8_t cigar_flag[5][17] = { + {BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CDEL, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CINS, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP}, // 3S15M5D2M1D14M1I56M1P6M3P93M2D4M2I8M {BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH}, {BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CSOFT_CLIP}, - {BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH}, + {BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CDEL, BAM_CPAD, BAM_CDEL, BAM_CMATCH, BAM_CPAD, BAM_CMATCH}, {BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CSOFT_CLIP} }; - uint32_t cigar_len[5][16] = { - {3, 15, 2, 1, 14, 1, 56, 1, 6, 3, 93, 2, 1, 3, 2, 8}, + uint32_t cigar_len[5][17] = { + {3, 15, 5, 2, 1, 14, 1, 56, 1, 6, 3, 93, 2, 1, 3, 2, 8}, //change here {1, 19, 1, 56, 1, 6, 3, 99, 2, 65}, {8, 13, 1, 6, 3, 99, 2, 62, 3}, - {9, 10, 3, 97, 2, 26}, + {9, 10, 3, 3, 3, 2, 97, 2, 26}, //change here {18, 71, 2, 89, 18} }; uint32_t read_start_pos[5] = { - 94, 92, 173, 175, 201 + 89, 92, 173, 170, 201 }; uint8_t condense_cigar_flag[5][14] = { - {BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CINS, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CSOFT_CLIP}, + {BAM_CSOFT_CLIP, BAM_CDEL, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CINS, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CSOFT_CLIP}, {BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH}, {BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CSOFT_CLIP}, - {BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH}, + {BAM_CSOFT_CLIP, BAM_CDEL, BAM_CPAD, BAM_CDEL, BAM_CMATCH, BAM_CPAD, BAM_CMATCH}, //change here {BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CSOFT_CLIP} }; - uint32_t condense_cigar_len[5][13] = { - {18, 2, 1, 14,1, 56, 1, 6, 3, 93, 2, 1, 13}, + uint32_t condense_cigar_len[5][14] = { + {18, 5, 2, 1, 14,1, 56, 1, 6, 3, 93, 2, 1, 13}, //change here {1, 19, 1, 56, 1, 6, 3, 99, 2, 65}, {31, 99, 2, 62, 3}, - {22, 97, 2, 26}, + {22, 3, 3, 2, 97, 2, 26}, //change here {18, 71, 2, 89, 18} }; unsigned int fwd_overlapping_primer_sizes[] = {2, 1, 1, 1, 1}; From c9522c3406f7f82f4696ec1b3a4df621be72c3a2 Mon Sep 17 00:00:00 2001 From: cmaceves Date: Tue, 19 Apr 2022 10:49:11 -0700 Subject: [PATCH 14/14] fixing incorrectly commented out lines --- src/trim_primer_quality.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/trim_primer_quality.cpp b/src/trim_primer_quality.cpp index 73829103..37d53e6d 100755 --- a/src/trim_primer_quality.cpp +++ b/src/trim_primer_quality.cpp @@ -187,7 +187,7 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r int32_t n, start_pos = 0, ref_add = 0; bool pos_start = false; del_len = max_del_len; - + while(i < r->core.n_cigar){ if (del_len == 0 && pos_start){ // No more bases on query to soft clip ncigar[j] = cigar[i]; @@ -255,15 +255,15 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r i++; } - uint32_t p=0; + /*uint32_t p=0; while(p < j){ std::cout << bam_cigar_op(ncigar[p]) << " " << bam_cigar_oplen(ncigar[p]) <<"\n"; p++; - } - /*std::cout << "start pos " << start_pos << "\n"; + }*/ + //std::cout << "start pos " << start_pos << "\n"; if(reverse){ reverse_cigar(ncigar, j); - }*/ + } return { ncigar,