Skip to content

Commit

Permalink
countfa v1.0, changed output of shuffler -f
Browse files Browse the repository at this point in the history
  • Loading branch information
bjmt committed Apr 26, 2019
1 parent 484f9c6 commit 172b8bd
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 5 deletions.
5 changes: 5 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
2019-04-26 Benjamin Jean-Marie Tremblay <[email protected]>

* Added countfa v1.0
* shuffler -f now inserts newlines every 80 characters

2019-04-25 Benjamin Jean-Marie Tremblay <[email protected]>

* Added seqgen v1.0
Expand Down
9 changes: 7 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
SRC := countlets.cpp klets.cpp shuffler.cpp shuffle_euler.cpp \
shuffle_linear.cpp shuffle_markov.cpp seqgen.cpp
shuffle_linear.cpp shuffle_markov.cpp seqgen.cpp countfa.cpp
OBJ_COUNTLETS := countlets.o klets.o
OBJ_SHUFFLER := shuffler.o klets.o shuffle_euler.o shuffle_linear.o \
shuffle_markov.o
OBJ_SEQGEN := seqgen.o
OBJ_COUNTFA := countfa.o
CC := g++

all: build install
Expand All @@ -12,6 +13,10 @@ build:
cd src;\
$(CC) --std=c++11 -O2 -Wall -c $(SRC)

countfa:
cd src;\
$(CC) $(OBJ_COUNTFA) -o ../bin/countfa

countlets:
cd src;\
$(CC) $(OBJ_COUNTLETS) -o ../bin/countlets
Expand All @@ -27,7 +32,7 @@ seqgen:
makebin:
mkdir -p bin

install: makebin countlets shuffler seqgen
install: makebin countfa countlets shuffler seqgen

clean:
cd src;\
Expand Down
17 changes: 17 additions & 0 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,30 @@ Installation

The following binaries are created:

bin/countfa
bin/countlets
bin/seqgen
bin/shuffler

Run these without any arguments or with the -h flag to see usage.


countfa
-------

Counts the number of characters per sequence in a fasta file. For each sequence,
the name followed by the character count are returned to stdout.

Example usage:

echo ">1\nACAAG\n>2\nGCCCGGTTAT" | bin/countfa

>1
5
>2
10


countlets
---------

Expand Down
119 changes: 119 additions & 0 deletions src/countfa.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Copyright (C) 2019 Benjamin Jean-Marie Tremblay
*
* This file is part of sequenceshuffler.
*
* sequenceshuffler is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* sequenceshuffler is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with sequenceshuffler. If not, see <https://www.gnu.org/licenses/>.
*
*/

#include <iostream>
#include <fstream>
#include <unistd.h>
#include <string>
using namespace std;

void usage() {
printf(
"countfa v1.0 Copyright (C) 2019 Benjamin Jean-Marie Tremblay \n"
" \n"
" Usage: coutfa -i [filename] \n"
" cat [filename] | coutfa \n"
" \n"
" -i <str> Input filename. File must be fasta-formatted. Alternatively, takes \n"
" input from a pipe. \n"
" -h Print usage and exit. \n"
);
}

void do_countfa(istream &input) {

string name, line;
int counter{0};

while (getline(input, line).good()) {

if (line.empty() || line[0] == '>') {

if (!name.empty()) {
cout << name << endl;
name.clear();
}
if (!line.empty()) {
name = line;
}
if (counter > 0) {
cout << counter << endl;
}
counter = 0;

} else if (!name.empty()) {

if (line.find(' ') != string::npos) {
line.erase(remove(line.begin(), line.end(), ' '), line.end());
}

if (line.length() == 0) {
name.clear();
counter = 0;
} else {
counter += line.length();
}

}

}

if (!name.empty()) {
cout << name << endl;
cout << counter << endl;
}

}

int main(int argc, char **argv) {

int opt;
bool has_file{false};
ifstream seqfile;

while ((opt = getopt(argc, argv, "i:h")) != -1) {
switch (opt) {
case 'i': if (optarg) {
seqfile.open(optarg);
if (seqfile.bad()) {
cerr << "Error: file not found" << endl;
exit(EXIT_FAILURE);
}
has_file = true;
}
break;
case 'h': usage();
return 0;
}
}

if (!has_file) {
if (isatty(STDIN_FILENO)) {
usage();
exit(EXIT_FAILURE);
}
do_countfa(cin);
} else {
do_countfa(seqfile);
}

return 0;

}
22 changes: 19 additions & 3 deletions src/shuffler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ void usage() {
" \n"
" -i <str> Input filename. All white space will be removed. Alternatively, can \n"
" take string input from a pipe. \n"
" -o <str> Output filename. Alternatively, prints to stdout. \n"
" -o <str> Output filename. Alternatively, prints to stdout. For fasta input, a\n"
" newline is inserted every 80 characters. \n"
" -k <int> K-let size. Defaults to 1. \n"
" -s <int> RNG seed number. Defaults to time in seconds. \n"
" -m Use the markov shuffling method. Defaults to euler. \n"
Expand Down Expand Up @@ -291,6 +292,7 @@ int main(int argc, char **argv) {
}

for (int i = 0; i < fa_names.size(); ++i) {

vector<char> letters2(fa_seqs[i].begin(), fa_seqs[i].end());
if (k >= letters2.size()) {
cerr << "Error: sequence length must be greater than k" << endl;
Expand All @@ -300,9 +302,23 @@ int main(int argc, char **argv) {
outletters = do_shuffle(letters2, k, gen, false, method_i);

if (has_out) {
outfile << fa_names[i] << "\n" << outletters << "\n";
outfile << fa_names[i] << endl;
for (int j = 0; j < outletters.length(); ++j) {
if (j % 80 == 0 && j != 0) {
outfile << endl;
}
outfile << outletters[j];
}
outfile << endl;
} else {
cout << fa_names[i] << "\n" << outletters << "\n";
cout << fa_names[i] << endl;
for (int j = 0; j < outletters.length(); ++j) {
if (j % 80 == 0 && j != 0) {
cout << endl;
}
cout << outletters[j];
}
cout << endl;
}

}
Expand Down

0 comments on commit 172b8bd

Please sign in to comment.