Skip to content

Commit

Permalink
Merge pull request #124 from andersen-lab/issue_120
Browse files Browse the repository at this point in the history
addressing issue #120 fix deletion post primer
  • Loading branch information
cmaceves committed Jul 11, 2022
2 parents 17ceea0 + c9522c3 commit 384ff10
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 24 deletions.
34 changes: 33 additions & 1 deletion src/trim_primer_quality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r
int32_t n, start_pos = 0, ref_add = 0;
bool pos_start = false;
del_len = max_del_len;

while(i < r->core.n_cigar){
if (del_len == 0 && pos_start){ // No more bases on query to soft clip
ncigar[j] = cigar[i];
Expand All @@ -200,7 +201,18 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r
pos_start = true;
continue;
}

//add the condition that we have a leading deletion
if(cig == 2 && del_len == 0){
ncigar[j] = cigar[i];
pos_start = true;
i++;
j++;
continue;
}

ref_add = n;

if ((bam_cigar_type(cig) & 1)){ // Consumes Query
if(del_len >= n ){
ncigar[j] = bam_cigar_gen(n, BAM_CSOFT_CLIP);
Expand All @@ -221,18 +233,38 @@ cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos, bool unpaired_r
ncigar[j] = bam_cigar_gen(n, cig);
j++;
}
//add the condition that we have a leading deletion
if(cig == 2 && del_len == 0){
ncigar[j] = cigar[i];
pos_start = true;
i++;
j++;
}

if(del_len ==0 && (bam_cigar_type(ncigar[j-1]) & 1) && (bam_cigar_type(ncigar[j-1]) & 2)){ // After soft clipping of query complete, keep incrementing start_pos until first base that consumes both query and ref
pos_start = true;
pos_start = true;
}
}

//deletions consume the reference but not the query,
//insertions consume the query but not the reference
if((bam_cigar_type(cig) & 2)) { // Consumes reference but not query
//std::cout << "ref add " << ref_add << " " << start_pos << " " << cig << "\n";
start_pos += ref_add;
}
i++;
}

/*uint32_t p=0;
while(p < j){
std::cout << bam_cigar_op(ncigar[p]) << " " << bam_cigar_oplen(ncigar[p]) <<"\n";
p++;
}*/
//std::cout << "start pos " << start_pos << "\n";
if(reverse){
reverse_cigar(ncigar, j);
}

return {
ncigar,
true,
Expand Down
23 changes: 12 additions & 11 deletions tests/test_primer_trim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,29 +37,29 @@ int main(){
int primer_indices[] = {0, 0, 7, 7, 6};
uint8_t cigar_flag[5][11] = {
{BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CMATCH},
{BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CMATCH},
{BAM_CMATCH, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP},
{BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CDEL, BAM_CPAD, BAM_CDEL, BAM_CMATCH},
{BAM_CMATCH, BAM_CPAD, BAM_CDEL, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP},
{BAM_CMATCH, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP},
{BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH}
};
uint32_t cigar_len[5][11] = {
{6, 5, 1, 139},
{24, 3, 1, 1, 4, 114},
{121, 4, 1, 1, 11, 6},
{24, 3, 1, 1, 4, 1, 1, 1,114}, //change this
{121, 1, 4, 4, 1, 1, 11, 6}, //change this
{103, 1, 10,1,13, 24},
{23, 18, 3, 57, 1, 10, 1, 39}
};
uint32_t read_start_pos[5] = {
30, 31, 231, 249, 274
30, 29, 231, 249, 274 //also change
};
uint8_t condense_cigar_flag[5][8] = {
{BAM_CSOFT_CLIP, BAM_CMATCH},
{BAM_CSOFT_CLIP, BAM_CMATCH},
{BAM_CMATCH, BAM_CSOFT_CLIP},
{BAM_CSOFT_CLIP, BAM_CDEL, BAM_CPAD, BAM_CDEL, BAM_CMATCH}, //change this
{BAM_CMATCH, BAM_CPAD, BAM_CDEL, BAM_CSOFT_CLIP}, //change this
{BAM_CMATCH, BAM_CSOFT_CLIP},
{BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH}
};
uint32_t condense_cigar_len[5][8] = {{12, 139}, {33, 114}, {121, 23}, {103, 49}, {23, 18,3, 57, 1, 10, 1, 39}};
uint32_t condense_cigar_len[5][8] = {{12, 139}, {33, 1,1,1,114}, {121, 1, 4, 23}, {103, 49}, {23, 18,3, 57, 1, 10, 1, 39}};
unsigned int overlapping_primer_sizes[] = {0, 2, 2, 0, 0, 0, 0, 2, 2, 1};
int ctr = 0;
std::vector<primer> overlapping_primers;
Expand Down Expand Up @@ -100,11 +100,12 @@ int main(){
cigar = bam_get_cigar(aln);
for (uint i = 0; i < t.nlength; ++i){
if(((cigar[i]) & BAM_CIGAR_MASK) != cigar_flag[primer_ctr][i]){
std::cout << bam_cigar_op(cigar[i]) << " " << bam_cigar_oplen(cigar[i]) << " i " << i << "\n";
success = -1;
std::cout << "Cigar flag didn't match for " << cand_primer.get_indice() << " ! Expected " << (uint) cigar_flag[primer_ctr][i] << " " << "Got " << ((cigar[i]) & BAM_CIGAR_MASK) << std::endl;
}
if((((cigar[i]) >> BAM_CIGAR_SHIFT)) != cigar_len[primer_ctr][i]){
success = -1;
success = -1;
std::cout << "Cigar length didn't match for " << bam_get_qname(aln) << " ! Expected " << (uint) cigar_len[primer_ctr][i] << " " << "Got " << ((cigar[i]) >> BAM_CIGAR_SHIFT) << std::endl;
}
}
Expand All @@ -117,10 +118,10 @@ int main(){
for (uint i = 0; i < t.nlength; ++i){
if(((cigar[i]) & BAM_CIGAR_MASK) != condense_cigar_flag[primer_ctr][i]){
success = -1;
std::cout << "Cigar flag didn't match! Expected " << condense_cigar_flag[primer_ctr][i] << " " << "Got " << ((cigar[i]) & BAM_CIGAR_MASK) << std::endl;
std::cout << "Cigar flag didn't match! Expected " << condense_cigar_flag[primer_ctr][i] << " " << "Got " << ((cigar[i]) & BAM_CIGAR_MASK) << std::endl;
}
if((((cigar[i]) >> BAM_CIGAR_SHIFT)) != condense_cigar_len[primer_ctr][i]){
success = -1;
success = -1;
std::cout << "Cigar length didn't match after condense! Expected " << condense_cigar_len[primer_ctr][i] << " " << "Got " << ((cigar[i]) >> BAM_CIGAR_SHIFT) << std::endl;
}
}
Expand Down
24 changes: 12 additions & 12 deletions tests/test_unpaired_trim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,35 +41,35 @@ int main(){
int primer_ctr = 0;
int forward_primer_indices[] = {1, 1, 5, 5, 6};
int rev_primer_indices[] = {3, -1, -1, -1, 4};
uint8_t cigar_flag[5][16] = {
{BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CINS, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP}, // 3S15M5D2M1D14M1I56M1P6M3P93M2D4M2I8M
uint8_t cigar_flag[5][17] = {
{BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CDEL, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CINS, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP}, // 3S15M5D2M1D14M1I56M1P6M3P93M2D4M2I8M
{BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH},
{BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CSOFT_CLIP},
{BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH},
{BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CSOFT_CLIP, BAM_CDEL, BAM_CPAD, BAM_CDEL, BAM_CMATCH, BAM_CPAD, BAM_CMATCH},
{BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CSOFT_CLIP}
};
uint32_t cigar_len[5][16] = {
{3, 15, 2, 1, 14, 1, 56, 1, 6, 3, 93, 2, 1, 3, 2, 8},
uint32_t cigar_len[5][17] = {
{3, 15, 5, 2, 1, 14, 1, 56, 1, 6, 3, 93, 2, 1, 3, 2, 8}, //change here
{1, 19, 1, 56, 1, 6, 3, 99, 2, 65},
{8, 13, 1, 6, 3, 99, 2, 62, 3},
{9, 10, 3, 97, 2, 26},
{9, 10, 3, 3, 3, 2, 97, 2, 26}, //change here
{18, 71, 2, 89, 18}
};
uint32_t read_start_pos[5] = {
94, 92, 173, 175, 201
89, 92, 173, 170, 201
};
uint8_t condense_cigar_flag[5][14] = {
{BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CINS, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CSOFT_CLIP},
{BAM_CSOFT_CLIP, BAM_CDEL, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CINS, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CDEL, BAM_CMATCH, BAM_CSOFT_CLIP},
{BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CPAD, BAM_CMATCH},
{BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CSOFT_CLIP},
{BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH},
{BAM_CSOFT_CLIP, BAM_CDEL, BAM_CPAD, BAM_CDEL, BAM_CMATCH, BAM_CPAD, BAM_CMATCH}, //change here
{BAM_CSOFT_CLIP, BAM_CMATCH, BAM_CPAD, BAM_CMATCH, BAM_CSOFT_CLIP}
};
uint32_t condense_cigar_len[5][13] = {
{18, 2, 1, 14,1, 56, 1, 6, 3, 93, 2, 1, 13},
uint32_t condense_cigar_len[5][14] = {
{18, 5, 2, 1, 14,1, 56, 1, 6, 3, 93, 2, 1, 13}, //change here
{1, 19, 1, 56, 1, 6, 3, 99, 2, 65},
{31, 99, 2, 62, 3},
{22, 97, 2, 26},
{22, 3, 3, 2, 97, 2, 26}, //change here
{18, 71, 2, 89, 18}
};
unsigned int fwd_overlapping_primer_sizes[] = {2, 1, 1, 1, 1};
Expand Down

0 comments on commit 384ff10

Please sign in to comment.