-
Notifications
You must be signed in to change notification settings - Fork 1
/
dbscan.c
355 lines (287 loc) · 10.5 KB
/
dbscan.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
#include <stdio.h>
#include <stdlib.h>
#include "FileHandling.h"
#include <math.h>
#include <string.h>
#include <time.h>
int main(int argc, char *argv[])
{
unsigned int dim = 0; // Dimensions of Elements
unsigned int n = 0; // n Elements
unsigned int i,j,d; // i counter for n, j counter for Neighborhood Elements, d counter for dimensions
double minDist; // Minimum Distance for a point to be assigned to clusters
unsigned int minPoints; // Minimum points for a cluster to be created
unsigned int CounterNoise=0; // Counter for Noise Elements
unsigned int cluster = 0; // Cluster index
unsigned int h,k; // h counter for Neighborhood Distance Calculation, k counter for files
clock_t start, end; // Variables for counting execution time
char filename[40];
char c;
unsigned int choise = 0;
/* -------------------------------------------------------------------------- */
// Reading Dataset, counting n elements, assigning to X Array
FILE* Dataset;
printf("\n Give the DataSet file:");
scanf("%s", filename);
Dataset = fopen(filename, "r");
if (!Dataset)
{
printf("\n There is something wrong with the Dataset file! \n\n");
return -1;
}
/* -------------------------------------------------------------------------- */
dim = getColumns(Dataset);
n = getRows(Dataset); // Getting the Elements of the Dataset
rewind(Dataset);
printf("\n Elements:%d \n", n-1);
printf("\n Dimensions:%d \n", dim);
n--;
printf("Give the amount of Minimum Points: ");
scanf("%d",&minPoints );
printf("Give the minimum Distance: ");
scanf("%lf",&minDist);
/* -------------------------------------------------------------------------- */
// All the necessary memory allocation
float *X; // Array of Elements
X = (float *)calloc(n*dim, sizeof(float));
unsigned int *Cluster;
Cluster = (int *)calloc(n,sizeof(int));
unsigned int *visited; //Array for knowning which elements are visited and which are not
visited =(int *) calloc(n,sizeof(int));
float *distance; //array for holding distances of element i with each db element
distance =(float *) calloc(n,sizeof(float));
unsigned int *numPoints; //Array for holding the Neighborhood's size of each i element
numPoints =(int *) calloc(n,sizeof(int));
unsigned int *belong; //array for holding boolean value of an element belong to a Neighborhood
belong = (int *)calloc(n,sizeof(int));
unsigned int *nBelong; //array for holding boolean value of an element belong to a Neighborhood
nBelong = (int *)calloc(n,sizeof(int));
unsigned int *Noise; //Flag for points that are Noise
Noise =(int *) calloc(n,sizeof(int));
unsigned int *Core; //Flag for core points, 0 for border, 1 for Core
Core = (int *) calloc(n,sizeof(int));
float *distance2 ; //Array for holding Distances for each core point of a Neighborhood with Each Element of the dataset
distance2 = (float *) calloc(n*n,sizeof(float));
for(i = n; i--;)
{
Core[i] = 0;
Cluster[i] = -1;
visited[i] = 0; //0 for unvisited , 1 for visited
Noise[i] = 0;
}
/* -------------------------------------------------------------------------- */
// Passing elements to Array X[n][dim]
X = getData(Dataset,n,dim,X);
/* -------------------------------------------------------------------------- */
start = clock();
/*-----------------------Starting DBSCAN--------------------------------------*/
for(i = n; i--;)
{
if(visited[i] != 1) //Checking if the Element is unvisited or not to proceed
{
visited[i] = 1; //If it is unvisited then mark it as visited
/* -------------------------------------------------------------------------- */
//Finding the Neighborhood
//Calculating the distance of Xi with each unvisited element
for(j = n; j--;)
{
if(j != i)
{
belong[j] = 0; // Mark temp 0 for current Point, used for reset
distance[j] = 0; //Reset the distance before calculating for current point
for(d = dim; d--;)
{
//Calculating and storing distances of Xi element with the rest of the points
distance[j] += (X[j*dim + d] - X[i*dim + d])*(X[j*dim + d] - X[i*dim + d]);
}
distance[j] = sqrt(distance[j]);
//Calculating the size of the Neighborhood
if(distance[j] <= minDist)
{
//If Current elements belongs to the Neighborhood set temp 1 and Increase size
belong[j] = 1;
numPoints[i]++;
}
}
}
//Raising by one so that the i element to be accounted for
numPoints[i]++;
/* -------------------------------------------------------------------------- */
//Checking if the element is core or noise
//if core change it to Core = 1 else mark it as noise
//
if(numPoints[i] < minPoints)
{
//If Noise then mark it as one,set core to -1 and pass it to array
// C as -1 to first column,then pass the rest of the Element to C
Noise[i] = 1;
Core[i] = -1;
CounterNoise++;
Cluster[i] = -1;
}else
{
//If Found as core set Core to 1 and pass it to Cluster
Core[i] = 1;
//Passing the Core element
Cluster[i] = cluster;
/* -------------------------------------------------------------------------- */
//Passing the rest of Neighborhood to array C
for(j = n; j--;)
{
//If belong[j] = 1 means that the current element belongs to Neighborhood
//and then you pass it to array C with the current Cluster Value to
//it's First Column, then mark is as visited
if(belong[j] == 1 && visited[j] != 1)
{
Cluster[j] = cluster;
visited[j] = 1;
}
}
/* -------------------------------------------------------------------------- */
//Finding the distance for each Element of our initial Neighborhood with each Element of the dataset
//Each [j][h] Combination Holds the distances of Current j element with each h element
for(j = n; j--;)
{
if(belong[j] == 1)
{
for(h = n; h--;)
{
distance2[j*n + h] = 0;
for(d = dim; d--;)
distance2[j*n + h] += (X[h*dim + d] - X[j*dim + d])*(X[h*dim + d] - X[j*dim + d]);
distance2[j*n + h] = sqrt(distance2[j*n + h]);
}
}
}
/* -------------------------------------------------------------------------- */
for(j = n; j--;)
{
//If belong[j] = 1 means that the current element belongs to Neighborhood
if(belong[j] == 1)
{
/* -------------------------------------------------------------------------- */
//Calculating and Storing the Neighborhood of each Initial Neighborhood Element
numPoints[j] = 0;
for(h = n; h--;)
{
//Reseting temp2 for current h element
nBelong[h] = 0;
//Comparing Distance of Current h element with minDist
if(distance2[j*n + h] <= minDist)
{
//if h element is found to belong to j's element Neighborhood
//set nBelong[h] = 1 and increase the Neighborhood's size
nBelong[h] = 1;
numPoints[j]++;
}
}
//Raising by one so that the i element to be accounted for
// numPoints[j]++;
/* -------------------------------------------------------------------------- */
//Checking if the current Neighborhood Element is Core or Border,if it was Noise unmark it
if(numPoints[j] < minPoints)
{
if(Noise[j] == 1)
{
//If it was Marked as noise but currently belongs to someone's
//Neighborhood unmark it and set it directly as border
//If it was Core element it would never been marked as noise before
Core[j] = 0;
Noise[j] = 0;
}else
{
Core[j] = 0;
}
//Change it's first Column to current Cluster
Cluster[j] = cluster;
}else
{
//if it is found as Core pass it's Neighborhood to it's cluster,
//Set Core = 1
Core[j] = 1;
for(h = n; h--;){
//If nBelong[h] = 1 means that element h belongs to j's Neighborhood
if(nBelong[h] == 1)
{
//Check if it is unvisited, if yes proceed to pass it to current Cluster
//Then mark it as visited
if(visited[h] != 1)
{
Cluster[h] = cluster;
visited[h] = 1;
}
}
}
}
}
}
/* -------------------------------------------------------------------------- */
//After a succesfull Cluster Creation/Expansion increase Cluster by 1
cluster++;
}
}
}
/*---------------------------------------------------------------------------*/
//End of DBSCAN
end = clock();
/*---------------------------------------------------------------------------*/
double total_time = ((double) (end - start)) / CLOCKS_PER_SEC;
// for(i = 0; i < n ; i++)
// {
// printf("\n");
// printf("%d :",i+1 );
// printf("Noise: %d ",Noise[i]);
// printf("Core: %d ",Core[i]);
// printf("Visited: %d ",visited[i]);
// printf("Cluster %d ",Cluster[i] );
// for(d = 0; d < dim; d++ )
// printf("%lf ",X[i][d] );
//
// }
CounterNoise = 0;
for(i = n; i--;)
{
if(Noise[i] == 1)
{
CounterNoise++;
}
}
printf("\n");
printf("Clusters : %d \n",cluster );
printf("\n");
printf("CounterNoise: %d\n",CounterNoise );
printf("\n");
printf("\n Time of Algorithm Execution: %lf \n\n",total_time);
printf("\n");
FILE* NoiseFile;
NoiseFile = fopen("FinalNoise.txt","w");
for(j = n; j--;){
if(Noise[j] == 1)
{
for(d = dim; d--;)
fprintf(NoiseFile, "%lf ",X[j*dim + d] );
fprintf(NoiseFile, "\n");
}
}
fclose(NoiseFile);
for(j = 200 ; j >= cluster ; j--)
{
char *fileName;
fileName = calloc(n,sizeof(char));
snprintf(fileName,n*sizeof(char),"C%d.txt",j);
remove(fileName);
free(fileName);
}
/* -------------------------------------------------------------------------- */
free(X);
free(Cluster);
free(visited);
free(Noise);
free(nBelong);
free(belong);
free(numPoints);
free(distance);
free(Core);
free(distance2);
return 0;
}