Advertisement
makispaiktis

ML - Lab 8 - Hierarchical Clustering: Dendrograms

Oct 22nd, 2022 (edited)
1,000
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.43 KB | None | 0 0
  1. import pandas as pd
  2. import matplotlib.pyplot as plt
  3. import sklearn
  4.  
  5. # Read and plot the data
  6. X = [2, 8, 0, 7, 6]
  7. Y = [0, 4, 6, 2, 1]
  8. labels = ["x"+str(i) for i in range(1, len(X)+1)]
  9. hdata = pd.DataFrame({"X": X, "Y": Y}, index=labels)
  10. plt.figure()
  11. plt.scatter(hdata.X, hdata.Y)
  12. for i in range(len(hdata.index)):
  13.     plt.text(hdata.loc[labels[i], "X"], hdata.loc[labels[i], "Y"], '%s' % (str(labels[i])), size=15, zorder=1)
  14. plt.title("2-D Points")
  15. plt.xlabel("X")
  16. plt.ylabel("Y")
  17. plt.show()
  18.  
  19.  
  20. # Hierarchical Clustering
  21. from scipy.cluster.hierarchy import dendrogram
  22. from sklearn.cluster import AgglomerativeClustering
  23. import numpy as np
  24.  
  25.  
  26.  
  27. # Single
  28. linkage = "single"
  29. n_clusters = None
  30. clustering = AgglomerativeClustering(n_clusters=n_clusters, linkage=linkage, distance_threshold=0).fit(hdata)
  31. linkage_matrix = np.column_stack([clustering.children_, clustering.distances_, np.ones(len(hdata.index)-1)]).astype(float)
  32. plt.title("Linkage = " + linkage)
  33. print("Linkage Matrix: ")
  34. print(linkage_matrix)
  35. print()
  36. dendrogram(linkage_matrix, labels=labels)
  37. plt.show()
  38.  
  39. # Complete
  40. linkage = "complete"
  41. n_clusters = None
  42. clustering = AgglomerativeClustering(n_clusters=n_clusters, linkage=linkage, distance_threshold=0).fit(hdata)
  43. linkage_matrix = np.column_stack([clustering.children_, clustering.distances_, np.ones(len(hdata.index)-1)]).astype(float)
  44. plt.title("Linkage = " + linkage)
  45. print("Linkage Matrix: ")
  46. print(linkage_matrix)
  47. print()
  48. dendrogram(linkage_matrix, labels=labels)
  49. plt.show()
  50.  
  51.  
  52.  
  53. # Single with 2 clusters - Draw with Different Colors
  54. linkage = "single"
  55. n_clusters = 2
  56. clustering = AgglomerativeClustering(n_clusters=n_clusters, linkage=linkage).fit(hdata)
  57. plt.scatter(hdata.X, hdata.Y, c=clustering.labels_, cmap="bwr")
  58. for i in range(len(hdata.index)):
  59.     plt.text(hdata.loc[labels[i], "X"], hdata.loc[labels[i], "Y"], '%s' % (str(labels[i])), size=15, zorder=1)
  60. plt.title(linkage + " with " + str(n_clusters) + " clusters")
  61. plt.show()
  62.  
  63. # Complete with 2 clusters - Draw with Different Colors
  64. linkage = "complete"
  65. n_clusters = 2
  66. clustering = AgglomerativeClustering(n_clusters=n_clusters, linkage=linkage).fit(hdata)
  67. plt.scatter(hdata.X, hdata.Y, c=clustering.labels_, cmap="bwr")
  68. for i in range(len(hdata.index)):
  69.     plt.text(hdata.loc[labels[i], "X"], hdata.loc[labels[i], "Y"], '%s' % (str(labels[i])), size=15, zorder=1)
  70. plt.title(linkage + " with " + str(n_clusters) + " clusters")
  71. plt.show()
  72.  
  73.  
  74.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement