Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 123 #127

Merged
merged 9 commits into from
Apr 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions data/test.gff
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## GFF3 file format
test Genbank primer_binding_site 2 290 . + . ID=id-test1;Note=daft punk
test Genbank long_terminal_repeat 1 55 . + . ID=id-test2;Note=random access memories
test Genbank CDS 2 292 . + . ID=id-test3;Note=eluvite
test Genbank CDS 1 55 . + . ID=id-test4;Note=ategnato
test Genbank CDS 2 292 . + . ID=id-testedit1;Note=PinkFloyd;EditPosition=100;EditSequence=A
test Genbank CDS 2 292 . + . ID=id-testedit2;Note=AnotherBrickInTheWall;EditPosition=102;EditSequence=AA
test Genbank primer_binding_site 2 290 . + . ID=id-test1;Note=daft punk;Parent=geneFragmentsOfTime;
test Genbank long_terminal_repeat 1 55 . + . ID=id-test2;Note=random access memories;Parent=geneGetLucky;
test Genbank CDS 2 292 . + . ID=id-test3;Note=eluvite;gene=geneKatyPerry;
test Genbank CDS 1 55 . + . ID=id-test4;Note=ategnato;gene=geneKatyPerry;
test Genbank CDS 2 292 . + . ID=id-testedit1;Note=PinkFloyd;EditPosition=100;EditSequence=A;
test Genbank CDS 2 292 . + . ID=id-testedit2;Note=AnotherBrickInTheWall;EditPosition=102;EditSequence=AA;gene=geneTaylorSwift;
7 changes: 6 additions & 1 deletion src/call_variants.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ int call_variants_from_plup(std::istream &cin, std::string out_file, uint8_t min
ref_antd refantd(ref_path, gff_path);
std::ostringstream out_str;
std::ofstream fout((out_file+".tsv").c_str());
//make columns in the file
fout << "REGION"
"\tPOS"
"\tREF"
Expand All @@ -47,6 +48,7 @@ int call_variants_from_plup(std::istream &cin, std::string out_file, uint8_t min
"\tREF_AA"
"\tALT_CODON"
"\tALT_AA"
"\tPOS_AA"
<< std::endl;
int ctr = 0;
int64_t pos = 0;
Expand Down Expand Up @@ -118,6 +120,7 @@ int call_variants_from_plup(std::istream &cin, std::string out_file, uint8_t min
freq_depth = get_frequency_depth(*it, pdepth, mdepth);
if(freq_depth[0] < min_threshold)
continue;
//this only adds the first bit of the information to the tsv
out_str << region << "\t";
out_str << pos << "\t";
out_str << ref << "\t";
Expand All @@ -135,6 +138,7 @@ int call_variants_from_plup(std::istream &cin, std::string out_file, uint8_t min
Exp | Error | Err free |
Obs | AD | RD |
*/
//adding pvalue related info
err = pow(10, ( -1 * (it->mean_qual)/10));
kt_fisher_exact((err * mdepth), (1-err) * mdepth, it->depth, ref_it->depth, &pval_left, &pval_right, &pval_twotailed);
out_str << pval_left << "\t";
Expand All @@ -143,10 +147,11 @@ int call_variants_from_plup(std::istream &cin, std::string out_file, uint8_t min
} else {
out_str << "FALSE" << "\t";
}
//adding codon related info, can add gene info here too
if(it->nuc[0] != '+' && it->nuc[0] != '-'){
refantd.codon_aa_stream(region, out_str, fout, pos, it->nuc[0]);
} else {
fout << out_str.str() << "NA\tNA\tNA\tNA\tNA" << std::endl;
fout << out_str.str() << "NA\tNA\tNA\tNA\tNA\tNA" << std::endl;
}
out_str.str("");
out_str.clear();
Expand Down
18 changes: 15 additions & 3 deletions src/ref_seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,23 +109,35 @@ ref_antd::~ref_antd()
if (this->fai) fai_destroy(this->fai);
}

//used to add codon info to variants output
int ref_antd::codon_aa_stream(std::string region, std::ostringstream &line_stream, std::ofstream &fout, int64_t pos, char alt){
std::vector<gff3_feature> features = gff.query_features(pos, "CDS");
if(features.size() == 0){ // No matching CDS
fout << line_stream.str() << "NA\tNA\tNA\tNA\tNA" << std::endl;
fout << line_stream.str() << "NA\tNA\tNA\tNA\tNA\tNA" << std::endl;
return 0;
}
std::vector<gff3_feature>::iterator it;
char *ref_codon, *alt_codon;
for(it = features.begin(); it != features.end(); it++){
fout << line_stream.str();
fout << it->get_attribute("ID") << "\t";
//add in gene level info, control for case it's not present
std::string gene = it->get_attribute("gene");
if (gene.empty()){
fout << it->get_attribute("ID") << "\t";
} else {
fout << gene + ":" + it->get_attribute("ID") << "\t";
}
ref_codon = this->get_codon(pos, region, *it);
fout << ref_codon[0] << ref_codon[1] << ref_codon[2] << "\t";
fout << codon2aa(ref_codon[0], ref_codon[1], ref_codon[2]) << "\t";
alt_codon = this->get_codon(pos, region, *it, alt);
fout << alt_codon[0] << alt_codon[1] << alt_codon[2] << "\t";
fout << codon2aa(alt_codon[0], alt_codon[1], alt_codon[2]);
fout << codon2aa(alt_codon[0], alt_codon[1], alt_codon[2]) << "\t";

//adding amino acid position
int64_t start = it->get_start();
int64_t aa_pos = ((pos - start) / 3)+1;
fout << aa_pos;
fout << std::endl;

delete[] ref_codon;
Expand Down
14 changes: 8 additions & 6 deletions tests/test_variants.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ int call_var_check_outfile(std::string prefix, uint8_t min_qual, uint8_t min_dep
std::string l;
getline(outFile, l); // Ignore first line
int comp = 0, ctr = 0;

while(ctr < len){
getline(outFile, l);
std::cout << l << std::endl;
std::cout << out[ctr] << " -> CORRECT" << std::endl;
comp += l.compare(out[ctr]);
std::cout << l.compare(out[ctr]) << "\n";
ctr++;
}
return comp;
Expand All @@ -25,20 +27,20 @@ int call_var_check_outfile(std::string prefix, uint8_t min_qual, uint8_t min_dep
int main() {
int num_success = 0;
// Quality threshold 20. Frequency threshold: 0.03. Total_DP = 3. Indel passes filters with total_depth 4. Has two lines.
std::string t_20_02_1[4] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-test3\tAAG\tK\tATG\tM", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit2\tAGA\tR\tTGA\t*", "test\t210\tA\t+GT\t1\t1\t41\t1\t0\t20\t0.25\t4\t0.4\tFALSE\tNA\tNA\tNA\tNA\tNA"};
std::string t_20_02_1[4] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70", "test\t210\tA\t+GT\t1\t1\t41\t1\t0\t20\t0.25\t4\t0.4\tFALSE\tNA\tNA\tNA\tNA\tNA\tNA"};
// Quality threshold 3-. Frequency threshold: 0.03. Total_DP = 3. Freq = 0.666667 No Indel
std::string t_20_03[3] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-test3\tAAG\tK\tATG\tM", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit2\tAGA\tR\tTGA\t*"};
std::string t_20_03[3] = {"test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t2\t1\t40\t0.666667\t3\t0.2\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70"};
// Quality threshold 25. Frequency threshold: 0.03. Total_DP = 2. Freq = 0.5 No Indel.
std::string t_25_03[3] = {"test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-test3\tAAG\tK\tATG\tM", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-testedit2\tAGA\tR\tTGA\t*"};
std::string t_25_03[3] = {"test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tgeneKatyPerry:id-test3\tAAG\tK\tATG\tM\t70", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tid-testedit1\tGAA\tE\tGAT\tD\t70", "test\t210\tA\tT\t1\t1\t41\t1\t1\t58\t0.5\t2\t0.4\tFALSE\tgeneTaylorSwift:id-testedit2\tAGA\tR\tTGA\t*\t70"};
// Minimum depth threshold. Should be empty
std::string t_25_03_20[] = {};
num_success = call_var_check_outfile("../data/test.indel", 20, 0, 0.02, t_20_02_1, 4);
std::cout << num_success << std::endl;
num_success += call_var_check_outfile("../data/test.indel", 20, 0, 0.03, t_20_03, 3);
num_success += call_var_check_outfile("../data/test.indel2", 20, 0, 0.03, t_20_03, 3);
std::cout << num_success << std::endl;
num_success += call_var_check_outfile("../data/test.indel", 25, 0, 0.03, t_25_03, 3);
num_success += call_var_check_outfile("../data/test.indel3", 25, 0, 0.03, t_25_03, 3);
std::cout << num_success << std::endl;
num_success += call_var_check_outfile("../data/test.indel", 25, 20, 0.03, t_25_03_20, 0);
num_success += call_var_check_outfile("../data/test.indel4", 25, 20, 0.03, t_25_03_20, 0);
std::cout << num_success << std::endl;
if(num_success == 0)
return 0;
Expand Down