Advertisement
makispaiktis

ML - Lab 7 - kmeans

Oct 22nd, 2022 (edited)
709
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.86 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. import sklearn
  5. import math
  6.  
  7. # Create Data and scatter them
  8. X = [7, 3, 1, 5, 1, 7, 8, 5]
  9. Y = [1, 4, 5, 8, 3, 8, 2, 9]
  10. labels = ["x" + str(i) for i in range(1, len(X)+1)]
  11. kdata = pd.DataFrame({"X": X, "Y": Y}, index=labels)
  12. plt.figure()
  13. plt.scatter(kdata.X, kdata.Y)
  14. for i in range(len(kdata.index)):
  15.     plt.text(kdata.loc[labels[i], "X"], kdata.loc[labels[i], "Y"], '%s' % (str(labels[i])), size=15, zorder=1)
  16. plt.title("2-D points before clustering")
  17. plt.xlabel("X")
  18. plt.ylabel("Y")
  19. plt.show()
  20.  
  21.  
  22. # Create a kmeans model with initial centroids the 3 first points
  23. from sklearn.cluster import KMeans
  24. K = 3
  25. init_centroids = kdata.loc[labels[0:K], :]
  26. kmeans = KMeans(n_clusters=K, init=init_centroids)
  27. kmeans = kmeans.fit(kdata)
  28. print("Final Centroids = ")
  29. print(kmeans.cluster_centers_)
  30. print()
  31. print("Each point's label of centroid = ")
  32. print(kmeans.labels_)
  33. print()
  34.  
  35.  
  36. # Cohesion, Separation
  37. print("K = " + str(K))
  38. cohesion = kmeans.inertia_
  39. print("Cohesion = " + str(cohesion))
  40. separation = 0
  41. distance = lambda x1, x2: math.sqrt(((x1.X - x2.X) ** 2) + ((x1.Y - x2.Y) ** 2))
  42. m = kdata.mean()
  43. for i in list(set(kmeans.labels_)):
  44.     mi = kdata.loc[kmeans.labels_ == i, :].mean()
  45.     Ci = len(kdata.loc[kmeans.labels_ == i, :].index)
  46.     separation += Ci * (distance(m, mi) ** 2)
  47. print("Separation = " + str(separation))
  48. print("CSS + BSS = " + str(cohesion + separation))
  49.  
  50.  
  51. # Draw centroids
  52. plt.figure()
  53. plt.scatter(kdata.X, kdata.Y, c=kmeans.labels_)
  54. for i in range(len(kdata.index)):
  55.     plt.text(kdata.loc[labels[i], "X"], kdata.loc[labels[i], "Y"], '%s' % (str(labels[i])), size=15, zorder=1)
  56. plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], marker="+", s=169, c=range(K))
  57. plt.title("After clustering")
  58. plt.xlabel("X")
  59. plt.ylabel("Y")
  60. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement