Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- import sklearn
- # Read data and plot them
- mdata = pd.read_csv("./mdata.txt")
- print("mdata summary = ")
- print(mdata.describe())
- print()
- plt.scatter(mdata.X, mdata.Y, marker="o")
- plt.title("Data 2-D")
- plt.show()
- # kmean
- from sklearn.cluster import KMeans
- kmeans = KMeans(n_clusters=2).fit(mdata)
- plt.scatter(mdata.X, mdata.Y, c=kmeans.labels_, cmap="bwr")
- plt.title("With kmeans")
- plt.xlabel("X")
- plt.ylabel("Y")
- plt.show()
- # DBSCAN
- # Select 'epsilon' according to kNN distance plot
- from sklearn.cluster import KMeans
- from sklearn.neighbors import NearestNeighbors
- nbrs = NearestNeighbors(n_neighbors=10).fit(mdata)
- distances, indices = nbrs.kneighbors(mdata)
- print("Distances = ")
- print(distances)
- print()
- # Find the 10 nearest neighbors and find the maximum distance
- distanceDec = sorted(distances[:, 9])
- plt.plot(distanceDec)
- plt.title("10-NN 'Furthest' Neighbor Distance")
- plt.xlabel("Points sorted by distance")
- plt.ylabel("10-NN Distance")
- plt.show()
- # DBSCAN
- from sklearn.cluster import DBSCAN
- clustering = DBSCAN(eps=0.4, min_samples=10).fit(mdata)
- plt.scatter(mdata.X, mdata.Y, c=clustering.labels_)
- plt.title("With DBSCAN")
- plt.xlabel("X")
- plt.ylabel("Y")
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement