Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import seaborn as sns
- df = pd.read_csv('./diabetes.csv')
- df
- x = df.drop('Outcome', axis = 1)
- y = df['Outcome']
- sns.countplot(x = y)
- y.value_counts()
- from sklearn.preprocessing import MinMaxScaler
- scaler = MinMaxScaler()
- x_scaled = scaler.fit_transform(x)
- from sklearn.model_selection import train_test_split
- x_train,x_test,y_train,y_test = train_test_split(x_scaled,y, random_state = 0, test_size = 0.25)
- x.shape
- x_train.shape
- x_test.shape
- from sklearn.neighbors import KNeighborsClassifier
- knn =KNeighborsClassifier(n_neighbors=5)
- knn.fit(x_train, y_train)
- from sklearn.metrics import accuracy_score, ConfusionMatrixDisplay
- from sklearn.metrics import classification_report
- y_pred = knn.predict(x_test)
- ConfusionMatrixDisplay.from_predictions(y_test,y_pred)
- print(classification_report(y_test, y_pred))
- import matplotlib.pyplot as plt
- import numpy as np
- error = []
- for k in range(1,41):
- knn = KNeighborsClassifier(n_neighbors=k)
- knn.fit(x_train, y_train)
- pred = knn.predict(x_test)
- error.append(np.mean(pred != y_test))
- error
- plt.figure(figsize = [16,9])
- plt.xlabel('Value of K')
- plt.ylabel('Error')
- plt.grid()
- plt.xticks(range(1,41))
- plt.plot(range(1,41), error, marker='.')
- knn = KNeighborsClassifier(n_neighbors=33)
- knn.fit(x_train, y_train)
- y_pred = knn.predict(x_test)
- print(classification_report(y_test, y_pred))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement