Skip to content

Tutorial

Panagiotis Petropoulakis edited this page Jan 4, 2021 · 6 revisions
  • Step 1: Build "random" vectors. Header files: item.h and utils.h
list<Item> dataPoints;
errorCode status; // Keep errors
for(i = 0; i < n; i++){
  vector<double> components;
  
  for(j = 0; j < dim; j++){
    components.push_back(i + 5);
  }
  
  dataPoints.push_back(Item(components, status));
  if(status != SUCCESS){
    printError(status);
    return;
  }
}
  • Step 2: Initialize a clustering object. Header file: cluster.h
initAlgo = "random"; // ("k-means++)
assignAlgo = "lloyd"; // ("range-lsh", "range-hypercube")
updateAlgo = "k-means"; // ("pam-lloyd")
metrice = "euclidean"; //("cosine")

cluster* myCluster;
myCluster = new cluster(status, numClusters, initAlgo, assignAlgo, updateAlgo, metrice);
if(status != SUCCESS){
  printError(status);
  return;
}
  • Step 3: Fitt the model and perform clustering
vector<Item> clusters;
vector<int> clustersSize;

myCluster->fit(clusters, clustersSize, status);
if(status != SUCCESS){
  printError(status);
  delete myModel
  return;
}
  • Step 4: Print centroids. Header file: experiment/emperimentHelpers.h
writeClusters(cout, clusters, clustersSize, updateAlgo, numClusters, status);
if(status != SUCCESS){
  printError(status);
  delete myCluster;
  return -1;
}
  • Step 5: Evaluate clustering via silhouette
vector<double> silhouetteArray;
int i;
int silhouetteArraySize;

myCluster->getSilhouette(silhouetteArray, status);
if(status != SUCCESS){
  printError(status);
  delete myCluster;
  return -1;
}

cout << "Cluster:$ Silhouette: [";   
silhouetteArraySize = silhouetteArray.size();

/* Print silhouette results */
for(i = 0; i < silhouetteArraySize; i++){
  cout << silhouetteArray[i];

  if(i != silhouetteArraySize - 1)
    cout << ",";
} // End for

cout << "]\n";
  • Step 6: Compilation. Take a look in the experiments makefile
Clone this wiki locally