Skip to content

Commit

Permalink
int->long
Browse files Browse the repository at this point in the history
  • Loading branch information
bjmt committed May 6, 2019
1 parent 11fb515 commit 14a5f18
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 74 deletions.
1 change: 1 addition & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
2019-05-04 Benjamin Jean-Marie Tremblay <[email protected]

* converted a lot of int variables to long
* faster char to int conversion in klets.cpp
* countlets is much faster when providing alphabet (using unordered_map)
* countlets version bumped to 1.3
Expand Down
8 changes: 4 additions & 4 deletions src/countlets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,15 @@ void usage() {
);
}

unordered_map<string, unsigned int> count_stream(istream &input, vector<string> klets,
unordered_map<string, unsigned long> count_stream(istream &input, vector<string> klets,
unsigned int k) {

char l;

string let;
let.reserve(k + 1);

unordered_map<string, unsigned int> counts;
unordered_map<string, unsigned long> counts;
counts.reserve(klets.size());
for (size_t i = 0; i < klets.size(); ++i) {
counts[klets[i]] = 0;
Expand Down Expand Up @@ -156,7 +156,7 @@ int main(int argc, char **argv) {

/* this version loads the entire sequence into memory */

vector<unsigned int> counts;
vector<unsigned long> counts;
vector<char> letters;
size_t seqlen;
char l;
Expand Down Expand Up @@ -199,7 +199,7 @@ int main(int argc, char **argv) {

/* this version only keeps k+1 characters in memory */

unordered_map<string, unsigned int> counts;
unordered_map<string, unsigned long> counts;

if (alph.length() < 1) {
cerr << "Error: could not parse -a option" << '\n';
Expand Down
16 changes: 8 additions & 8 deletions src/countwin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ void usage() {
);
}

string make_row(string START, string STOP, vector<unsigned int> counts,
string make_row(string START, string STOP, vector<unsigned long> counts,
vector<string> klets, bool nozero) {

string out;
Expand All @@ -63,11 +63,11 @@ string make_row(string START, string STOP, vector<unsigned int> counts,

}

string extract_window(istream &input, unsigned int window) {
string extract_window(istream &input, unsigned long window) {

string out;
char l;
unsigned int counter{0};
unsigned long counter{0};
out.reserve(window);

while (input >> l) {
Expand All @@ -84,9 +84,9 @@ int main(int argc, char **argv) {

int ku{1};
unsigned int k;
unsigned int START{1};
unsigned long START{1};
int opt;
unsigned int window, STOP, step;
unsigned long window, step, STOP;
size_t alphlen;
string alph, seq;
ifstream infile;
Expand All @@ -95,7 +95,7 @@ int main(int argc, char **argv) {
vector<string> klets;
set<unsigned int> lets_set;
vector<char> lets_uniq;
vector<unsigned int> counts;
vector<unsigned long> counts;

while ((opt = getopt(argc, argv, "i:o:a:k:w:s:nh")) != -1) {
switch (opt) {
Expand Down Expand Up @@ -129,13 +129,13 @@ int main(int argc, char **argv) {
break;

case 'w': if (optarg) {
window = atoi(optarg);
window = atol(optarg);
has_win = true;
}
break;

case 's': if (optarg) {
step = atoi(optarg);
step = atol(optarg);
has_step = true;
}
break;
Expand Down
14 changes: 7 additions & 7 deletions src/klets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ using Clock = chrono::high_resolution_clock;
vector<string> make_klets(vector<char> lets_uniq, unsigned int k) {

size_t alphlen = lets_uniq.size();
unsigned int nlets = pow(alphlen, k);
unsigned int let_i, counter, step;
unsigned long nlets = pow(alphlen, k);
unsigned long let_i, counter, step;
vector<string> klets(nlets, "");

/* perhaps a bit primitive, but it works */
Expand All @@ -49,7 +49,7 @@ vector<string> make_klets(vector<char> lets_uniq, unsigned int k) {

while (counter < nlets) {

for (unsigned int j = 0; j < step; ++j) {
for (unsigned long j = 0; j < step; ++j) {
klets[counter] += lets_uniq[let_i];
++counter;
}
Expand All @@ -67,7 +67,7 @@ vector<string> make_klets(vector<char> lets_uniq, unsigned int k) {

}

vector<unsigned int> count_klets(vector<char> letters, vector<char> lets_uniq,
vector<unsigned long> count_klets(vector<char> letters, vector<char> lets_uniq,
unsigned int k, size_t alphlen) {

/* Scales very well with increasing k, but requires having the entire
Expand All @@ -80,9 +80,9 @@ vector<unsigned int> count_klets(vector<char> letters, vector<char> lets_uniq,
#endif

size_t seqlen = letters.size();
unsigned int nlets = pow(alphlen, k);
unsigned int l, counter;
vector<unsigned int> let_counts(nlets, 0);
unsigned long nlets = pow(alphlen, k);
unsigned long l, counter;
vector<unsigned long> let_counts(nlets, 0);
vector<unsigned int> intletters;
intletters.reserve(seqlen);
unordered_map<char, unsigned int> let2int;
Expand Down
2 changes: 1 addition & 1 deletion src/klets.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

std::vector<std::string> make_klets(std::vector<char> lets_uniq, unsigned int k);

std::vector<unsigned int> count_klets(std::vector<char> letters,
std::vector<unsigned long> count_klets(std::vector<char> letters,
std::vector<char> lets_uniq, unsigned int k, size_t alphlen);

#endif
70 changes: 35 additions & 35 deletions src/shuffle_euler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,19 @@ using namespace std;
using Clock = chrono::high_resolution_clock;
#endif

vector<vector<unsigned int>> make_edgelist(vector<unsigned int> let_counts,
unsigned int nletsm1, size_t alphlen) {
vector<vector<unsigned long>> make_edgelist(vector<unsigned long> let_counts,
unsigned long nletsm1, size_t alphlen) {

/* 1D vector<int> --> 2D vector<vector<int>>
* The first layer elements are vertices, second layer are the edges.
*/

/* TODO: find a cheaper alternative */

vector<vector<unsigned int>> edgelist(nletsm1, vector<unsigned int>(alphlen));
unsigned int counter{0};
vector<vector<unsigned long>> edgelist(nletsm1, vector<unsigned long>(alphlen));
unsigned long counter{0};

for (unsigned int i = 0; i < nletsm1; ++i) {
for (unsigned long i = 0; i < nletsm1; ++i) {

for (size_t j = 0; j < alphlen; ++j) {
edgelist[i][j] = let_counts[counter];
Expand All @@ -60,16 +60,16 @@ vector<vector<unsigned int>> make_edgelist(vector<unsigned int> let_counts,

}

vector<unsigned int> find_euler(vector<vector<unsigned int>> edgelist, unsigned int lasti,
unsigned int nletsm1, default_random_engine gen, size_t alphlen, unsigned int k,
vector<bool> empty_vertices, bool verbose) {
vector<unsigned long> find_euler(vector<vector<unsigned long>> edgelist,
unsigned long lasti, unsigned long nletsm1, default_random_engine gen,
size_t alphlen, unsigned int k, vector<bool> empty_vertices, bool verbose) {

unsigned int u;
unsigned int nletsm2 = pow(alphlen, k - 2);
unsigned int good_v{0}, counter{0};
unsigned long u;
unsigned long nletsm2 = pow(alphlen, k - 2);
unsigned long good_v{0}, counter{0};
vector<bool> vertices(nletsm1, false);
vector<unsigned int> last_letsi(nletsm1, 0);
vector<unsigned int> next_let_i;
vector<unsigned long> last_letsi(nletsm1, 0);
vector<unsigned long> next_let_i;
next_let_i.reserve(nletsm1);

/* The idea is to go through and make sure that every last letter for each
Expand All @@ -83,26 +83,26 @@ vector<unsigned int> find_euler(vector<vector<unsigned int>> edgelist, unsigned
vertices[lasti] = true; /* tree root */

/* I don't think there's a formula for this, so just prepare these beforehand */
for (unsigned int i = 0; i < nletsm1; ++i) {
for (unsigned long i = 0; i < nletsm1; ++i) {
next_let_i.push_back(counter * alphlen);
if (counter == nletsm2 - 1) counter = 0;
else ++counter;
}

for (unsigned int i = 0; i < nletsm1; ++i) {
for (unsigned long i = 0; i < nletsm1; ++i) {
if (empty_vertices[i]) vertices[i] = true; /* ignore unconnected vertices */
else ++good_v;
}

if (verbose) cerr << " Total vertices to travel: " << good_v << endl;

for (unsigned int i = 0; i < nletsm1; ++i) {
for (unsigned long i = 0; i < nletsm1; ++i) {

u = i;

while (!vertices[u]) {
/* pick a random possible edge from the vertex */
discrete_distribution<unsigned int> next_let(edgelist[u].begin(), edgelist[u].end());
discrete_distribution<unsigned long> next_let(edgelist[u].begin(), edgelist[u].end());
last_letsi[u] = next_let(gen);
/* now follow the edge to the next vertex */
if (k == 2)
Expand Down Expand Up @@ -130,20 +130,20 @@ vector<unsigned int> find_euler(vector<vector<unsigned int>> edgelist, unsigned

}

vector<vector<unsigned int>> fill_vertices(vector<vector<unsigned int>> edgelist,
vector<unsigned int> last_letsi, unsigned int nletsm1, size_t alphlen,
unsigned int lasti, default_random_engine gen, vector<bool> empty_vertices) {
vector<vector<unsigned long>> fill_vertices(vector<vector<unsigned long>> edgelist,
vector<unsigned long> last_letsi, unsigned long nletsm1, size_t alphlen,
unsigned long lasti, default_random_engine gen, vector<bool> empty_vertices) {

/* The incoming edgelist is just a set of counts for each letter. This
* will actually create vectors of letter indices based on counts.
*/

/* TODO: find a cheaper alternative */

vector<vector<unsigned int>> edgelist2(nletsm1);
unsigned int b;
vector<vector<unsigned long>> edgelist2(nletsm1);
unsigned long b;

for (unsigned int i = 0; i < nletsm1; ++i) {
for (unsigned long i = 0; i < nletsm1; ++i) {

if (empty_vertices[i]) continue;

Expand All @@ -152,7 +152,7 @@ vector<vector<unsigned int>> fill_vertices(vector<vector<unsigned int>> edgelist
for (size_t j = 0; j < alphlen; ++j) {

b = edgelist[i][j];
for (unsigned int h = 0; h < b; ++h) {
for (unsigned long h = 0; h < b; ++h) {
edgelist2[i].push_back(j);
}

Expand All @@ -169,15 +169,15 @@ vector<vector<unsigned int>> fill_vertices(vector<vector<unsigned int>> edgelist

}

vector<unsigned int> walk_euler(vector<vector<unsigned int>> edgelist,
vector<unsigned long> walk_euler(vector<vector<unsigned long>> edgelist,
size_t seqlen, vector<char> lets_uniq, string firstl) {

size_t alphlen = lets_uniq.size();
size_t nletsm1 = edgelist.size();
unsigned int current{0};
unsigned long current{0};
size_t n = firstl.length();
vector<unsigned int> edgelist_counter(nletsm1, 0);
vector<unsigned int> out_i;
vector<unsigned long> edgelist_counter(nletsm1, 0);
vector<unsigned long> out_i;
out_i.reserve(seqlen);

/* initialize shuffled sequence with starting vertex */
Expand Down Expand Up @@ -219,14 +219,14 @@ string shuffle_euler(vector<char> letters, default_random_engine gen, unsigned i
#endif

size_t seqlen = letters.size();
unsigned int nlets, nletsm1;
unsigned long nlets, nletsm1;
size_t alphlen;
unsigned int lasti{0};
vector<unsigned int> last_letsi, out_i;
vector<unsigned int> let_counts;
unsigned long lasti{0};
vector<unsigned long> last_letsi, out_i;
vector<unsigned long> let_counts;
vector<char> lets_uniq;
set<unsigned int> lets_set;
vector<vector<unsigned int>> edgelist;
vector<vector<unsigned long>> edgelist;
string firstl, out;

/* the first and last letters remain unchanged; these are special vertices
Expand Down Expand Up @@ -275,7 +275,7 @@ string shuffle_euler(vector<char> letters, default_random_engine gen, unsigned i
*/
vector<bool> empty_vertices;
empty_vertices.reserve(nletsm1);
for (unsigned int i = 0; i < nletsm1; ++i) {
for (unsigned long i = 0; i < nletsm1; ++i) {
empty_vertices.push_back(true);
for (size_t j = 0; j < alphlen; ++j) {
if (edgelist[i][j] > 0) {
Expand Down Expand Up @@ -303,7 +303,7 @@ string shuffle_euler(vector<char> letters, default_random_engine gen, unsigned i
#endif

/* delete last edges from edge pool */
vector<vector<unsigned int>> edgelist2;
vector<vector<unsigned long>> edgelist2;
for (size_t i = 0; i < last_letsi.size(); ++i) {
if (i != lasti) --edgelist[i][last_letsi[i]];
}
Expand Down
16 changes: 8 additions & 8 deletions src/shuffle_linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ string shuffle_linear(vector<char> letters, default_random_engine gen,

/* variables */

unsigned int seqlen1 = letters.size();
unsigned int seqlen2{seqlen1 / k};
unsigned int seqrem{seqlen1 % k};
unsigned int seqremlen{seqlen1 - seqrem};
unsigned long seqlen1 = letters.size();
unsigned long seqlen2{seqlen1 / k};
unsigned long seqrem{seqlen1 % k};
unsigned long seqremlen{seqlen1 - seqrem};

if (verbose) {
cerr << " Times split: " << seqlen2 << endl;
Expand All @@ -42,20 +42,20 @@ string shuffle_linear(vector<char> letters, default_random_engine gen,
string out;
out.reserve(seqlen1);

vector<unsigned int> seqindex;
vector<unsigned long> seqindex;
seqindex.reserve(seqlen2);

/* shuffle index */

for (unsigned int i = 0; i < seqlen2; ++i) {
for (unsigned long i = 0; i < seqlen2; ++i) {
seqindex.push_back(i * k);
}

shuffle(seqindex.begin(), seqindex.end(), gen);

/* build output string from shuffled index */

for (unsigned int i = 0; i < seqlen2; ++i) {
for (unsigned long i = 0; i < seqlen2; ++i) {
for (unsigned int j = 0; j < k; ++j) {
out += letters[seqindex[i] + j];
}
Expand All @@ -64,7 +64,7 @@ string shuffle_linear(vector<char> letters, default_random_engine gen,
/* add leftover letters */

if (seqrem > 0) {
for (unsigned int i = seqremlen; i < seqlen1; ++i) {
for (unsigned long i = seqremlen; i < seqlen1; ++i) {
out += letters[i];
}
}
Expand Down
Loading

0 comments on commit 14a5f18

Please sign in to comment.