Advertisement
makispaiktis

ML - Lab 8 - DBSCAN vs kmeans and selection of 'eps'

Oct 23rd, 2022 (edited)
1,158
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.27 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. import sklearn
  5.  
  6.  
  7. # Read data and plot them
  8. mdata = pd.read_csv("./mdata.txt")
  9. print("mdata summary = ")
  10. print(mdata.describe())
  11. print()
  12. plt.scatter(mdata.X, mdata.Y, marker="o")
  13. plt.title("Data 2-D")
  14. plt.show()
  15.  
  16.  
  17. # kmean
  18. from sklearn.cluster import KMeans
  19. kmeans = KMeans(n_clusters=2).fit(mdata)
  20. plt.scatter(mdata.X, mdata.Y, c=kmeans.labels_, cmap="bwr")
  21. plt.title("With kmeans")
  22. plt.xlabel("X")
  23. plt.ylabel("Y")
  24. plt.show()
  25.  
  26.  
  27. # DBSCAN
  28.  
  29. # Select 'epsilon' according to kNN distance plot
  30. from sklearn.cluster import KMeans
  31. from sklearn.neighbors import NearestNeighbors
  32. nbrs = NearestNeighbors(n_neighbors=10).fit(mdata)
  33. distances, indices = nbrs.kneighbors(mdata)
  34. print("Distances = ")
  35. print(distances)
  36. print()
  37. # Find the 10 nearest neighbors and find the maximum distance
  38. distanceDec = sorted(distances[:, 9])
  39. plt.plot(distanceDec)
  40. plt.title("10-NN 'Furthest' Neighbor Distance")
  41. plt.xlabel("Points sorted by distance")
  42. plt.ylabel("10-NN Distance")
  43. plt.show()
  44.  
  45. # DBSCAN
  46. from sklearn.cluster import DBSCAN
  47. clustering = DBSCAN(eps=0.4, min_samples=10).fit(mdata)
  48. plt.scatter(mdata.X, mdata.Y, c=clustering.labels_)
  49. plt.title("With DBSCAN")
  50. plt.xlabel("X")
  51. plt.ylabel("Y")
  52. plt.show()
  53.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement