namanvashistha · RavenKing144 · Sep 22, 2019
diff --git a/dunn_index.py b/dunn_index.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+"""dunn_index.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/16IcXRcP3ke55vTQ52dO2Lxo41ldhYMlw
+"""
+
+from sklearn import datasets 
+from sklearn.cluster import KMeans 
+from sklearn.metrics import davies_bouldin_score 
+
+import numpy as np
+temp =0
+# loading the dataset 
+data_file = "iris_data_012.txt"
+X=  np.loadtxt(data_file, delimiter=",", usecols=range(0,4), dtype=np.float64)
+z=[]
+# K-Means 
+for i in range(2,50):
+  kmeans = KMeans(n_clusters=i, random_state=1).fit(X) 
+
+  # we store the cluster labels 
+  labels = kmeans.labels_ 
+
+ # print(davies_bouldin_score(X, labels)) 
+  z.append(davies_bouldin_score(X, labels))
+for i in range(1,47):
+  if z[i]<z[i-1] and z[i]<z[i+1]:
+    temp=i+2
+    break
+print(temp)
diff --git a/silhouette.py b/silhouette.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+"""silhouette.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/1gDnSfreOsusI924aP4ZaDw6_K-AImb3J
+"""
+
+from sklearn.datasets import make_blobs 
+from sklearn.cluster import KMeans 
+from sklearn.metrics import silhouette_score
+import matplotlib.pyplot as plt 
+import numpy as np
+mini = 1000000
+temp=0
+z=[]
+print("\nLoading Iris data into memory \n")
+data_file = "iris_data_012.txt"
+data_x = np.loadtxt(data_file, delimiter=",", usecols=range(0,4), dtype=np.float64)
+data_y = np.loadtxt(data_file, delimiter=",", usecols=[4], dtype=np.int)
+X, Y = make_blobs()
+for i in range(2,50):
+  cluster = KMeans(n_clusters = i) 
+  cluster_labels = cluster.fit_predict(data_x)
+  silhouette_avg = silhouette_score(data_x, cluster_labels)
+  z.append(silhouette_avg)
+for i in range(1,47):
+  if z[i]<z[i-1] and z[i]<z[i+1]:
+    temp=z[i]
+    mini = i+2
+    break
+print(temp,mini)
+
diff --git a/sompy_.py b/sompy_.py
@@ -0,0 +1,155 @@
+# -*- coding: utf-8 -*-
+"""sompy_.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/1k7dWaAq6FcNWEFJQmB6ShmPu2dYCHzQt
+"""
+
+# som_iris.py
+# SOM for Iris dataset
+# Anaconda3 5.2.0 (Python 3.6.5)
+from sklearn.datasets import make_blobs 
+from sklearn.cluster import KMeans 
+from sklearn.metrics import silhouette_score
+import matplotlib.pyplot as plt 
+
+
+# ==================================================================
+
+import numpy as np
+import matplotlib.pyplot as plt
+# note: if this fails, try >pip uninstall matplotlib
+# and then >pip install matplotlib
+
+def closest_node(data, t, map, m_rows, m_cols):
+  # (row,col) of map node closest to data[t]
+  result = (0,0)
+  small_dist = 1.0e20
+  for i in range(m_rows):
+    for j in range(m_cols):
+      ed = euc_dist(map[i][j], data[t])
+      if ed < small_dist:
+        small_dist = ed
+        result = (i, j)
+  return result
+
+def euc_dist(v1, v2):
+  return np.linalg.norm(v1 - v2)
+
+def manhattan_dist(r1, c1, r2, c2):
+  return np.abs(r1-r2) + np.abs(c1-c2)
+
+def most_common(lst, n):
+  # lst is a list of values 0 . . n
+  if len(lst) == 0: return -1
+  counts = np.zeros(shape=n, dtype=np.int)
+  for i in range(len(lst)):
+    counts[lst[i]] += 1
+  return np.argmax(counts)
+
+# ==================================================================
+
+def main():
+  # 0. get started
+  np.random.seed(1)
+  Dim = 2
+  Rows = 30; Cols = 30
+  RangeMax = Rows + Cols
+  LearnMax = 0.5
+  StepsMax = 2000
+
+  # 1. load data
+  print("\nLoading Iris data into memory \n")
+  data_file = "iris_data_012.txt"
+  data_x = np.loadtxt(data_file, delimiter=",", usecols=range(0,4), dtype=np.float64)
+  data_y = np.loadtxt(data_file, delimiter=",", usecols=[4], dtype=np.int)
+
+
+  # option: normalize data
+  mini = 1000000
+  temp=0
+  z=[]
+  #finding minimum clusters required
+  for i in range(2,50):
+    cluster = KMeans(n_clusters = i) 
+    cluster_labels = cluster.fit_predict(data_x)
+    silhouette_avg = silhouette_score(data_x, cluster_labels)
+    z.append(silhouette_avg)
+  for i in range(1,47):
+    if z[i]<z[i-1] and z[i]<z[i+1]:
+      temp=z[i]
+      mini = i+2
+      break
+  print(temp,mini)
+
+  # 2. construct the SOM
+  print("Constructing a 30x30 SOM from the iris data")
+  map = np.random.random_sample(size=(Rows,Cols,Dim))
+  for s in range(StepsMax):
+    if s % (StepsMax/10) == 0: print("step = ", str(s))
+    pct_left = 1.0 - ((s * 1.0) / StepsMax)
+    curr_range = (int)(pct_left * RangeMax)
+    curr_rate = pct_left * LearnMax
+
+    t = np.random.randint(len(data_x))
+    (bmu_row, bmu_col) = closest_node(data_x, t, map, Rows, Cols)
+    for i in range(Rows):
+      for j in range(Cols):
+        if manhattan_dist(bmu_row, bmu_col, i, j) < curr_range:
+          map[i][j] = map[i][j] + curr_rate * \
+(data_x[t] - map[i][j])
+  print("SOM construction complete \n")
+
+  # 3. construct U-Matrix
+  print("Constructing U-Matrix from SOM")
+  u_matrix = np.zeros(shape=(Rows,Cols), dtype=np.float64)
+  for i in range(Rows):
+    for j in range(Cols):
+      v = map[i][j]  # a vector
+      sum_dists = 0.0; ct = 0
+
+      if i-1 >= 0:    # above
+        sum_dists += euc_dist(v, map[i-1][j]); ct += 1
+      if i+1 <= Rows-1:   # below
+        sum_dists += euc_dist(v, map[i+1][j]); ct += 1
+      if j-1 >= 0:   # left
+        sum_dists += euc_dist(v, map[i][j-1]); ct += 1
+      if j+1 <= Cols-1:   # right
+        sum_dists += euc_dist(v, map[i][j+1]); ct += 1
+
+      u_matrix[i][j] = sum_dists / ct
+  print("U-Matrix constructed \n")
+
+  # display U-Matrix
+  plt.imshow(u_matrix, cmap='gray')  # black = close = clusters
+  plt.show()
+
+  # 4. because the data has labels, another possible visualization:
+  # associate each data label with a map node
+  print("Associating each data label to one map node ")
+  mapping = np.empty(shape=(Rows,Cols), dtype=object)
+  for i in range(Rows):
+    for j in range(Cols):
+      mapping[i][j] = []
+
+  for t in range(len(data_x)):
+    (m_row, m_col) = closest_node(data_x, t, map, Rows, Cols)
+    mapping[m_row][m_col].append(data_y[t])
+
+  label_map = np.zeros(shape=(Rows,Cols), dtype=np.int)
+  for i in range(Rows):
+    for j in range(Cols):
+      label_map[i][j] = most_common(mapping[i][j], mini)
+
+  plt.imshow(label_map, cmap=plt.cm.get_cmap('terrain_r', mini+1))
+  plt.colorbar()
+  plt.show()
+
+# ==================================================================
+
+if __name__=="__main__":
+  main()
+
+#sarthak agarwal