Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import matplotlib.pyplot as plt
- import sklearn
- # Read and plot the data
- X = [2, 8, 0, 7, 6]
- Y = [0, 4, 6, 2, 1]
- labels = ["x"+str(i) for i in range(1, len(X)+1)]
- hdata = pd.DataFrame({"X": X, "Y": Y}, index=labels)
- plt.figure()
- plt.scatter(hdata.X, hdata.Y)
- for i in range(len(hdata.index)):
- plt.text(hdata.loc[labels[i], "X"], hdata.loc[labels[i], "Y"], '%s' % (str(labels[i])), size=15, zorder=1)
- plt.title("2-D Points")
- plt.xlabel("X")
- plt.ylabel("Y")
- plt.show()
- # Hierarchical Clustering
- from scipy.cluster.hierarchy import dendrogram
- from sklearn.cluster import AgglomerativeClustering
- import numpy as np
- # Single
- linkage = "single"
- n_clusters = None
- clustering = AgglomerativeClustering(n_clusters=n_clusters, linkage=linkage, distance_threshold=0).fit(hdata)
- linkage_matrix = np.column_stack([clustering.children_, clustering.distances_, np.ones(len(hdata.index)-1)]).astype(float)
- plt.title("Linkage = " + linkage)
- print("Linkage Matrix: ")
- print(linkage_matrix)
- print()
- dendrogram(linkage_matrix, labels=labels)
- plt.show()
- # Complete
- linkage = "complete"
- n_clusters = None
- clustering = AgglomerativeClustering(n_clusters=n_clusters, linkage=linkage, distance_threshold=0).fit(hdata)
- linkage_matrix = np.column_stack([clustering.children_, clustering.distances_, np.ones(len(hdata.index)-1)]).astype(float)
- plt.title("Linkage = " + linkage)
- print("Linkage Matrix: ")
- print(linkage_matrix)
- print()
- dendrogram(linkage_matrix, labels=labels)
- plt.show()
- # Single with 2 clusters - Draw with Different Colors
- linkage = "single"
- n_clusters = 2
- clustering = AgglomerativeClustering(n_clusters=n_clusters, linkage=linkage).fit(hdata)
- plt.scatter(hdata.X, hdata.Y, c=clustering.labels_, cmap="bwr")
- for i in range(len(hdata.index)):
- plt.text(hdata.loc[labels[i], "X"], hdata.loc[labels[i], "Y"], '%s' % (str(labels[i])), size=15, zorder=1)
- plt.title(linkage + " with " + str(n_clusters) + " clusters")
- plt.show()
- # Complete with 2 clusters - Draw with Different Colors
- linkage = "complete"
- n_clusters = 2
- clustering = AgglomerativeClustering(n_clusters=n_clusters, linkage=linkage).fit(hdata)
- plt.scatter(hdata.X, hdata.Y, c=clustering.labels_, cmap="bwr")
- for i in range(len(hdata.index)):
- plt.text(hdata.loc[labels[i], "X"], hdata.loc[labels[i], "Y"], '%s' % (str(labels[i])), size=15, zorder=1)
- plt.title(linkage + " with " + str(n_clusters) + " clusters")
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement