Skip to content

Commit

Permalink
Merge pull request #127 from andersen-lab/issue_123
Browse files Browse the repository at this point in the history
Issue 123
  • Loading branch information
cmaceves committed Apr 11, 2022
2 parents 89632e6 + 2b455d7 commit 17ceea0
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 16 deletions.
12 changes: 6 additions & 6 deletions data/test.gff
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## GFF3 file format
test Genbank primer_binding_site 2 290 . + . ID=id-test1;Note=daft punk
test Genbank long_terminal_repeat 1 55 . + . ID=id-test2;Note=random access memories
test Genbank CDS 2 292 . + . ID=id-test3;Note=eluvite
test Genbank CDS 1 55 . + . ID=id-test4;Note=ategnato
test Genbank CDS 2 292 . + . ID=id-testedit1;Note=PinkFloyd;EditPosition=100;EditSequence=A
test Genbank CDS 2 292 . + . ID=id-testedit2;Note=AnotherBrickInTheWall;EditPosition=102;EditSequence=AA
test Genbank primer_binding_site 2 290 . + . ID=id-test1;Note=daft punk;Parent=geneFragmentsOfTime;
test Genbank long_terminal_repeat 1 55 . + . ID=id-test2;Note=random access memories;Parent=geneGetLucky;
test Genbank CDS 2 292 . + . ID=id-test3;Note=eluvite;gene=geneKatyPerry;
test Genbank CDS 1 55 . + . ID=id-test4;Note=ategnato;gene=geneKatyPerry;
test Genbank CDS 2 292 . + . ID=id-testedit1;Note=PinkFloyd;EditPosition=100;EditSequence=A;
test Genbank CDS 2 292 . + . ID=id-testedit2;Note=AnotherBrickInTheWall;EditPosition=102;EditSequence=AA;gene=geneTaylorSwift;
7 changes: 6 additions & 1 deletion src/call_variants.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ int call_variants_from_plup(std::istream &cin, std::string out_file, uint8_t min
ref_antd refantd(ref_path, gff_path);
std::ostringstream out_str;
std::ofstream fout((out_file+".tsv").c_str());
//make columns in the file
fout << "REGION"
"\tPOS"
"\tREF"
Expand All @@ -47,6 +48,7 @@ int call_variants_from_plup(std::istream &cin, std::string out_file, uint8_t min
"\tREF_AA"
"\tALT_CODON"
"\tALT_AA"
"\tPOS_AA"
<< std::endl;
int ctr = 0;
int64_t pos = 0;
Expand Down Expand Up @@ -118,6 +120,7 @@ int call_variants_from_plup(std::istream &cin, std::string out_file, uint8_t min
freq_depth = get_frequency_depth(*it, pdepth, mdepth);
if(freq_depth[0] < min_threshold)
continue;
//this only adds the first bit of the information to the tsv
out_str << region << "\t";
out_str << pos << "\t";
out_str << ref << "\t";
Expand All @@ -135,6 +138,7 @@ int call_variants_from_plup(std::istream &cin, std::string out_file, uint8_t min
Exp | Error | Err free |
Obs | AD | RD |
*/
//adding pvalue related info
err = pow(10, ( -1 * (it->mean_qual)/10));
kt_fisher_exact((err * mdepth), (1-err) * mdepth, it->depth, ref_it->depth, &pval_left, &pval_right, &pval_twotailed);
out_str << pval_left << "\t";
Expand All @@ -143,10 +147,11 @@ int call_variants_from_plup(std::istream &cin, std::string out_file, uint8_t min
} else {
out_str << "FALSE" << "\t";
}
//adding codon related info, can add gene info here too
if(it->nuc[0] != '+' && it->nuc[0] != '-'){
refantd.codon_aa_stream(region, out_str, fout, pos, it->nuc[0]);
} else {
fout << out_str.str() << "NA\tNA\tNA\tNA\tNA" << std::endl;
fout << out_str.str() << "NA\tNA\tNA\tNA\tNA\tNA" << std::endl;
}
out_str.str("");
out_str.clear();
Expand Down
18 changes: 15 additions & 3 deletions src/ref_seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,23 +109,35 @@ ref_antd::~ref_antd()
if (this->fai) fai_destroy(this->fai);
}

//used to add codon info to variants output
int ref_antd::codon_aa_stream(std::string region, std::ostringstream &line_stream, std::ofstream &fout, int64_t pos, char alt){
std::vector<gff3_feature> features = gff.query_features(pos, "CDS");
if(features.size() == 0){ // No matching CDS
fout << line_stream.str() << "NA\tNA\tNA\tNA\tNA" << std::endl;
fout << line_stream.str() << "NA\tNA\tNA\tNA\tNA\tNA" << std::endl;
return 0;
}
std::vector<gff3_feature>::iterator it;
char *ref_codon, *alt_codon;
for(it = features.begin(); it != features.end(); it++){
fout << line_stream.str();
fout << it->get_attribute("ID") << "\t";
//add in gene level info, control for case it's not present
std::string gene = it->get_attribute("gene");
if (gene.empty()){
fout << it->get_attribute("ID") << "\t";
} else {
fout << gene + ":" + it->get_attribute("ID") << "\t";
}
ref_codon = this->get_codon(pos, region, *it);
fout << ref_codon[0] << ref_codon[1] << ref_codon[2] << "\t";
fout << codon2aa(ref_codon[0], ref_codon[1], ref_codon[2]) << "\t";
alt_codon = this->get_codon(pos, region, *it, alt);
fout << alt_codon[0] << alt_codon[1] << alt_codon[2] << "\t";
fout << codon2aa(alt_codon[0], alt_codon[1], alt_codon[2]);
fout << codon2aa(alt_codon[0], alt_codon[1], alt_codon[2]) << "\t";

//adding amino acid position
int64_t start = it->get_start();
int64_t aa_pos = ((pos - start) / 3)+1;
fout << aa_pos;
fout << std::endl;

delete[] ref_codon;
Expand Down
14 changes: 8 additions & 6 deletions tests/test_variants.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ int call_var_check_outfile(std::string prefix, uint8_t min_qual, uint8_t min_dep
std::string l;
getline(outFile, l); // Ignore first line
int comp = 0, ctr = 0;

while(ctr < len){
getline(outFile, l);
std::cout << l << std::endl;
std::cout << out[ctr] << " -> CORRECT" << std::endl;
comp += l.compare(out[ctr]);
std::cout << l.compare(out[ctr]) << "\n";
ctr++;
}
return comp;
Expand All @@ -25,20 +27,20 @@ int call_var_check_outfile(std::string prefix, uint8_t min_qual, uint8_t min_dep
int main() {
int num_success = 0;
// Quality threshold 20. Frequency threshold: 0.03. Total_DP = 3. Indel passes filters with total_depth 4. Has two lines.
std::string t_20_02_1[4] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-test3\tAAG\tK\tATG\tM", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit2\tAGA\tR\tTGA\t*", "test\t210\tA\t+GT\t1\t1\t41\t1\t0\t20\t0.25\t4\t0.4\tFALSE\tNA\tNA\tNA\tNA\tNA"};
std::string t_20_02_1[4] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70", "test\t210\tA\t+GT\t1\t1\t41\t1\t0\t20\t0.25\t4\t0.4\tFALSE\tNA\tNA\tNA\tNA\tNA\tNA"};
// Quality threshold 3-. Frequency threshold: 0.03. Total_DP = 3. Freq = 0.666667 No Indel
std::string t_20_03[3] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-test3\tAAG\tK\tATG\tM", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit2\tAGA\tR\tTGA\t*"};
std::string t_20_03[3] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70"};
// Quality threshold 25. Frequency threshold: 0.03. Total_DP = 2. Freq = 0.5 No Indel.
std::string t_25_03[3] = {"test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-test3\tAAG\tK\tATG\tM", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-testedit2\tAGA\tR\tTGA\t*"};
std::string t_25_03[3] = {"test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70"};
// Minimum depth threshold. Should be empty
std::string t_25_03_20[] = {};
num_success = call_var_check_outfile("../data/test.indel", 20, 0, 0.02, t_20_02_1, 4);
std::cout << num_success << std::endl;
num_success += call_var_check_outfile("../data/test.indel", 20, 0, 0.03, t_20_03, 3);
num_success += call_var_check_outfile("../data/test.indel2", 20, 0, 0.03, t_20_03, 3);
std::cout << num_success << std::endl;
num_success += call_var_check_outfile("../data/test.indel", 25, 0, 0.03, t_25_03, 3);
num_success += call_var_check_outfile("../data/test.indel3", 25, 0, 0.03, t_25_03, 3);
std::cout << num_success << std::endl;
num_success += call_var_check_outfile("../data/test.indel", 25, 20, 0.03, t_25_03_20, 0);
num_success += call_var_check_outfile("../data/test.indel4", 25, 20, 0.03, t_25_03_20, 0);
std::cout << num_success << std::endl;
if(num_success == 0)
return 0;
Expand Down

0 comments on commit 17ceea0

Please sign in to comment.