Advertisement
amu2002

Diabetes Clustering

Nov 20th, 2023
39
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.40 KB | None | 0 0
  1.  
  2.  
  3. import pandas as pd
  4. import seaborn as sns
  5.  
  6. df = pd.read_csv('./diabetes.csv')
  7.  
  8. df
  9.  
  10. x = df.drop('Outcome', axis = 1)
  11.  
  12. y = df['Outcome']
  13.  
  14. sns.countplot(x = y)
  15.  
  16. y.value_counts()
  17.  
  18. from sklearn.preprocessing import MinMaxScaler
  19. scaler = MinMaxScaler()
  20. x_scaled = scaler.fit_transform(x)
  21.  
  22. from sklearn.model_selection import train_test_split
  23. x_train,x_test,y_train,y_test = train_test_split(x_scaled,y, random_state = 0, test_size = 0.25)
  24.  
  25. x.shape
  26.  
  27. x_train.shape
  28.  
  29. x_test.shape
  30.  
  31. from sklearn.neighbors import KNeighborsClassifier
  32. knn =KNeighborsClassifier(n_neighbors=5)
  33. knn.fit(x_train, y_train)
  34.  
  35. from sklearn.metrics import accuracy_score, ConfusionMatrixDisplay
  36. from sklearn.metrics import classification_report
  37. y_pred = knn.predict(x_test)
  38.  
  39. ConfusionMatrixDisplay.from_predictions(y_test,y_pred)
  40.  
  41. print(classification_report(y_test, y_pred))
  42.  
  43. import matplotlib.pyplot as plt
  44. import numpy as np
  45. error = []
  46. for k in range(1,41):
  47.   knn = KNeighborsClassifier(n_neighbors=k)
  48.   knn.fit(x_train, y_train)
  49.   pred = knn.predict(x_test)
  50.   error.append(np.mean(pred != y_test))
  51. error
  52.  
  53. plt.figure(figsize = [16,9])
  54. plt.xlabel('Value of K')
  55. plt.ylabel('Error')
  56. plt.grid()
  57. plt.xticks(range(1,41))
  58. plt.plot(range(1,41), error, marker='.')
  59.  
  60. knn = KNeighborsClassifier(n_neighbors=33)
  61. knn.fit(x_train, y_train)
  62.  
  63. y_pred = knn.predict(x_test)
  64. print(classification_report(y_test, y_pred))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement