namanvashistha · invincibel · Aug 27, 2019 · Aug 27, 2019 · Sep 22, 2019 · Sep 22, 2019
diff --git a/Untitled0.ipynb b/Untitled0.ipynb
diff --git a/main.ipynb b/main.ipynb
@@ -0,0 +1,199 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "main.ipynb",
+      "provenance": [],
+      "private_outputs": true,
+      "collapsed_sections": [],
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/namanvashistha/minor1/blob/hritik/main.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "CDQ2ISwS95Bu",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# som_iris.py\n",
+        "# SOM for Iris dataset\n",
+        "# Anaconda3 5.2.0 (Python 3.6.5)\n",
+        "\n",
+        "# ==================================================================\n",
+        "\n",
+        "import numpy as np\n",
+        "import matplotlib.pyplot as plt\n",
+        "import pandas as pd\n",
+        "from sklearn.preprocessing import MinMaxScaler\n",
+        "from sklearn.cluster import KMeans\n",
+        "from sklearn.metrics import silhouette_score\n",
+        "# note: if this fails, try >pip uninstall matplotlib\n",
+        "# and then >pip install matplotlib\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "def closest_node(data, t, map, m_rows, m_cols):\n",
+        "  # (row,col) of map node closest to data[t]\n",
+        "  result = (0,0)\n",
+        "  small_dist = 1.0e20\n",
+        "  for i in range(m_rows):\n",
+        "    for j in range(m_cols):\n",
+        "      ed = euc_dist(map[i][j], data[t])\n",
+        "      if ed < small_dist:\n",
+        "        small_dist = ed\n",
+        "        result = (i, j)\n",
+        "  return result\n",
+        "\n",
+        "def euc_dist(v1, v2):\n",
+        "  return np.linalg.norm(v1 - v2)\n",
+        "\n",
+        "def manhattan_dist(r1, c1, r2, c2):\n",
+        "  return np.abs(r1-r2) + np.abs(c1-c2)\n",
+        "\n",
+        "def most_common(lst, n):\n",
+        "  # lst is a list of values 0 . . n\n",
+        "  if len(lst) == 0: return -1\n",
+        "  counts = np.zeros(shape=n, dtype=np.int)\n",
+        "  for i in range(len(lst)):\n",
+        "    counts[lst[i]] += 1\n",
+        "  return np.argmax(counts)\n",
+        "\n",
+        "# ==================================================================\n",
+        "\n",
+        "def main():\n",
+        "  \n",
+        "  data = pd.read_csv('spiral.txt')\n",
+        "  data.head()\n",
+        "  \n",
+        "  \n",
+        "  \n",
+        "  \n",
+        "  # 0. get started\n",
+        "  np.random.seed(1)\n",
+        "  Dim = 2\n",
+        "  Rows = 30; Cols = 30\n",
+        "  RangeMax = Rows + Cols\n",
+        "  LearnMax = 0.5\n",
+        "  StepsMax = 2000\n",
+        "\n",
+        "  # 1. load data\n",
+        "  print(\"\\nLoading Iris data into memory \\n\")\n",
+        "  data_file = \"spiral.txt\"\n",
+        "  data_x = np.loadtxt(data_file, delimiter=\",\", usecols=range(0,2),\n",
+        "    dtype=np.float64)\n",
+        "  data_y = np.loadtxt(data_file, delimiter=\",\", usecols=[2],\n",
+        "    dtype=np.int)\n",
+        "  # option: normalize data\n",
+        "  mini = 1000000\n",
+        "  temp=0\n",
+        "  z=[]\n",
+        "  #finding minimum clusters required\n",
+        "  for i in range(2,50):\n",
+        "    cluster = KMeans(n_clusters = i) \n",
+        "    cluster_labels = cluster.fit_predict(data_x)\n",
+        "    silhouette_avg = silhouette_score(data_x, cluster_labels)\n",
+        "    z.append(silhouette_avg)\n",
+        "  for i in range(1,47):\n",
+        "    if z[i]<z[i-1] and z[i]<z[i+1]:\n",
+        "      temp=z[i]\n",
+        "      mini = i+2\n",
+        "      break\n",
+        "  print(\"No. of clusters to be formed: \",mini)\n",
+        "  # 2. construct the SOM\n",
+        "  print(\"Constructing a 30x30 SOM from the iris data\")\n",
+        "  map = np.random.random_sample(size=(Rows,Cols,Dim))\n",
+        "  for s in range(StepsMax):\n",
+        "    if s % (StepsMax/10) == 0: print(\"step = \", str(s))\n",
+        "    pct_left = 1.0 - ((s * 1.0) / StepsMax)\n",
+        "    curr_range = (int)(pct_left * RangeMax)\n",
+        "    curr_rate = pct_left * LearnMax\n",
+        "\n",
+        "    t = np.random.randint(len(data_x))\n",
+        "    (bmu_row, bmu_col) = closest_node(data_x, t, map, Rows, Cols)\n",
+        "    for i in range(Rows):\n",
+        "      for j in range(Cols):\n",
+        "        if manhattan_dist(bmu_row, bmu_col, i, j) < curr_range:\n",
+        "          map[i][j] = map[i][j] + curr_rate * \\\n",
+        "(data_x[t] - map[i][j])\n",
+        "  print(\"SOM construction complete \\n\")\n",
+        "\n",
+        "  # 3. construct U-Matrix\n",
+        "  print(\"Constructing U-Matrix from SOM\")\n",
+        "  u_matrix = np.zeros(shape=(Rows,Cols), dtype=np.float64)\n",
+        "  for i in range(Rows):\n",
+        "    for j in range(Cols):\n",
+        "      v = map[i][j]  # a vector\n",
+        "      sum_dists = 0.0; ct = 0\n",
+        "\n",
+        "      if i-1 >= 0:    # above\n",
+        "        sum_dists += euc_dist(v, map[i-1][j]); ct += 1\n",
+        "      if i+1 <= Rows-1:   # below\n",
+        "        sum_dists += euc_dist(v, map[i+1][j]); ct += 1\n",
+        "      if j-1 >= 0:   # left\n",
+        "        sum_dists += euc_dist(v, map[i][j-1]); ct += 1\n",
+        "      if j+1 <= Cols-1:   # right\n",
+        "        sum_dists += euc_dist(v, map[i][j+1]); ct += 1\n",
+        "\n",
+        "      u_matrix[i][j] = sum_dists / ct\n",
+        "  print(\"U-Matrix constructed \\n\")\n",
+        "\n",
+        "  # display U-Matrix\n",
+        "  plt.imshow(u_matrix, cmap='gray')  # black = close = clusters\n",
+        "  plt.show()\n",
+        "\n",
+        "  # 4. because the data has labels, another possible visualization:\n",
+        "  # associate each data label with a map node\n",
+        "  print(\"Associating each data label to one map node \")\n",
+        "  mapping = np.empty(shape=(Rows,Cols), dtype=object)\n",
+        "  for i in range(Rows):\n",
+        "    for j in range(Cols):\n",
+        "      mapping[i][j] = []\n",
+        "\n",
+        "  for t in range(len(data_x)):\n",
+        "    (m_row, m_col) = closest_node(data_x, t, map, Rows, Cols)\n",
+        "    mapping[m_row][m_col].append(data_y[t])\n",
+        "\n",
+        "  label_map = np.zeros(shape=(Rows,Cols), dtype=np.int)\n",
+        "  for i in range(Rows):\n",
+        "    for j in range(Cols):\n",
+        "      label_map[i][j] = most_common(mapping[i][j], mini)\n",
+        "  \n",
+        " \n",
+        "  #labels = label_map.labels_ \n",
+        "  \n",
+        "  #print(davies_bouldin_score(X, labels)) \n",
+        "  plt.imshow(label_map, cmap=plt.cm.get_cmap('terrain_r', mini))\n",
+        "  plt.colorbar()\n",
+        "  plt.show()\n",
+        "\n",
+        "# ==================================================================\n",
+        "\n",
+        "if __name__==\"__main__\":\n",
+        "  main()\n"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    }
+  ]
+}