-
Notifications
You must be signed in to change notification settings - Fork 0
/
threading2.cpp
executable file
·85 lines (69 loc) · 1.92 KB
/
threading2.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#include <iostream>
#include <vector>
#include <string>
#include <unordered_map>
#include <deque>
#include <set>
#include <algorithm>
#include <cmath>
#include <stdlib.h>
#include <stdio.h>
#include <fstream>
#include <sstream>
#include <chrono>
#include <algorithm>
#include <random>
// to complile
// g++ -std=gnu++11 -O3 -dynamiclib -ftree-vectorize -march=native -mavx bmm_7_haga.cpp -o ./bmm.dylib
// sudo /usr/bin/g++ -std=gnu++11 -Ofast -shared -fPIC -ftree-vectorize -march=native -mavx bmm_6_haga.cpp -o ./bmm.dylib
// icc -std=gnu++11 -O3 -shared -fPIC bmm_5_haga.cpp -o ./bmm.dylib
using namespace std;
struct Timer
{
std::chrono::time_point<std::chrono::high_resolution_clock> start, end;
std::chrono::duration<float> duration;
Timer() {
start = std::chrono::high_resolution_clock::now();
}
/* when the function where this object is created returns,
this object must be destroyed, hence this destructor is called */
~Timer() {
end = std::chrono::high_resolution_clock::now();
duration = end - start;
float ms = duration.count() * 1000.0f;
std::cout << "Elapsed: " << ms << " ms." << std::endl;
}
};
int N = 3000;
int step = 100;
vector<double> dvec;
vector< vector<double> > J;
std::random_device m_randomdevice;
std::mt19937 m_mt;
std::uniform_real_distribution<double> dist(0.0, 1.0);
void calc(int a, int b) {
for(int jj = a; jj < b; jj++){
for(int ii = 0; ii < N; ii++){
J[ii][jj] += 1.0; // * dist(m_mt);
}
}
}
int main(int argc, char **argv){
for(int i = 0; i < N; i++){
J.push_back(dvec);
for(int j = 0; j < N; j++){
J[i].push_back(0.0);
}
}
Timer timer;
for(int t = 0; t < 500; t ++) {
// 40100 ms without parfor
// 1869 ms with parfor
// 47000 ms with parfor (and random function)
#pragma omp parallel for
for(int i = 0; i < N; i += step) {
calc(i, i+step);
}
}
return 0;
}