Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # %%
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import accuracy_score
- import pandas as pd
- import numpy as np
- # %%
- dataset_dict = {
- 'Outlook': ['sunny', 'sunny', 'overcast', 'rainy', 'rainy', 'rainy', 'overcast', 'sunny', 'sunny', 'rainy', 'sunny', 'overcast', 'overcast', 'sunny', 'rainy', 'sunny', 'overcast', 'overcast', 'rainy', 'rainy', 'overcast', 'sunny', 'sunny', 'rainy', 'rainy', 'overcast', 'sunny', 'overcast', 'overcast', 'rainy'],
- 'Temperature': [85.0, 80.0, 83.0, 70.0, 68.0, 65.0, 64.0, 72.0, 69.0, 75.0, 75.0, 72.0, 69.0, 81.0, 71.0, 81.0, 74.0, 76.0, 78.0, 82.0, 67.0, 83.0, 67.0, 79.0, 80.0, 81.0, 75.0, 85.0, 76.0, 79.0],
- 'Humidity': [85.0, 90.0, 78.0, 96.0, 80.0, 70.0, 65.0, 95.0, 70.0, 80.0, 90.0, 75.0, 88.0, 88.0, 92.0, 85.0, 75.0, 92.0, 90.0, 82.0, 67.0, 98.0, 80.0, 92.0, 90.0, 82.0, 95.0, 95.0, 93.0, 92.0],
- 'Wind': [False, True, False, False, False, True, True, False, False, False, True, True, False, True, True, False, False, True, False, False, True, True, False, False, True, True, False, True, False, False],
- 'Play': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No']
- }
- # Create the DataFrame
- original_df = pd.DataFrame(dataset_dict)
- # Print the DataFrame
- print(original_df)
- # %%
- from sklearn.preprocessing import StandardScaler
- # %%
- df = pd.get_dummies(original_df, columns=['Outlook'],prefix='',prefix_sep='',dtype=int)
- df['Wind'] = df['Wind'].astype(int)
- df['Play'] = (df['Play']=='Yes').astype(int)
- df = df[['sunny','rainy','overcast','Temperature','Humidity','Wind','Play']]
- # %%
- X,y = df.drop(columns='Play'),df['Play']
- X_train,X_test,y_train,y_test = train_test_split(X,y,train_size = 0.5,shuffle=False)
- # %%
- scaler = StandardScaler()
- float_cols = X_train.select_dtypes(include=['float64']).columns
- X_train[float_cols] = scaler.fit_transform(X_train[float_cols])
- X_test[float_cols] = scaler.transform(X_test[float_cols])
- # %%
- print(pd.concat([X_train,y_train],axis=1).round(2),'\n')
- print(pd.concat([X_test,y_test],axis=1).round(2),'\n')
- # %%
- k = 5
- # %%
- from sklearn.neighbors import KNeighborsClassifier
- # %%
- distance_metric = 'euclidean'
- knn_clf = KNeighborsClassifier(n_neighbors=k,metric=distance_metric)
- knn_clf.fit(X_train,y_train)
- # %%
- y_pred = knn_clf.predict(X_test)
- print("Label :",list(y_test))
- print("Prediction :",list(y_pred))
- # %%
- accuracy = accuracy_score(y_test,y_pred)
- print(f'accuracy: {accuracy*100}%')
- # %%
- labels,predictions,accuracies = list(y_test),[],[]
- # %%
- k_list = [3,5,7]
- for k in k_list:
- knn_clf = KNeighborsClassifier(n_neighbors=k)
- knn_clf.fit(X_train,y_train)
- y_pred = knn_clf.predict(X_test)
- predictions.append(list(y_pred))
- accuracies.append(accuracy_score(y_test,y_pred)*100)
- # %%
- df_predictions = pd.DataFrame({'Label':labels})
- for k, pred in zip(k_list,predictions):
- df_predictions[f'k = {k}'] = pred
- # %%
- df_accuracies = pd.DataFrame({'Accuracy ':accuracies},index=[f'k = {k}' for k in k_list]).T
- # %%
- print(df_predictions)
- print(df_accuracies)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement