Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # coding: utf-8
- # In[16]:
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- import seaborn as sns
- import glob
- import os
- import plotly.express as px
- import plotly.offline as py
- from sklearn.model_selection import train_test_split
- from sklearn.svm import SVC
- from sklearn.metrics import confusion_matrix
- from sklearn.metrics import accuracy_score
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import precision_score
- from sklearn.metrics import recall_score
- from sklearn.metrics import roc_curve
- from sklearn.metrics import roc_auc_score
- from sklearn.metrics import auc
- from sklearn.metrics import precision_recall_curve
- from sklearn.model_selection import GridSearchCV
- from sklearn.metrics import roc_auc_score
- import matplotlib.pyplot as plt
- from sklearn.metrics import accuracy_score
- from sklearn import svm
- # In[17]:
- df=pd.read_csv('D:/heart.csv')
- df
- # In[18]:
- # 1. age
- # 2. sex (1 = male; 0= female)
- # 3. chest pain type (4 values)
- # 4. resting blood pressure
- # 5. serum cholestoral in mg/dl
- # 6. fasting blood sugar > 120 mg/dl
- # 7. resting electrocardiographic results (values 0,1,2)
- # 8. maximum heart rate achieved
- # 9. exercise induced angina
- # 10. oldpeak = ST depression induced by exercise relative to rest
- # 11. the slope of the peak exercise ST segment
- # 12. number of major vessels (0-3) colored by flourosopy
- # 13. thal: 3 = normal; 6 = fixed defect; 7 = reversable defect
- df.describe()
- # In[19]:
- df.head()
- # In[21]:
- # Gender Distribution
- df['sex']=df['sex'].map({
- 1:'Male',
- 0:'Female'
- })
- px.pie(df, names='sex',
- # color_discrete_sequence=px.colors.sequential.Aggrnyl,
- # hole=.6,
- )
- # In[123]:
- df.isnull().sum()
- # In[124]:
- plt.figure(figsize=(20,10))
- sns.countplot(x=df['age'],data=df)
- plt.xticks(rotation=90,fontsize=30)
- plt.yticks(fontsize=30)
- # In[24]:
- px.histogram(df,
- x='age',
- hover_data=df.columns,
- marginal="box",
- color='target')
- # In[27]:
- px.violin(df, y="chol",
- color="target",
- hover_data=df.columns,
- points="all",
- box=True,
- )
- # In[28]:
- px.violin(df, y="age",
- color="target",
- hover_data=df.columns,
- points="all",
- box=True,
- )
- # In[29]:
- sns.pairplot(df)
- plt.show()
- # In[30]:
- plt.figure(figsize=(15,15))
- sns.heatmap(df.corr()) #corelation
- plt.show()
- # In[31]:
- df.corr()
- # In[32]:
- pred=df.drop(["target","fbs","chol","age","sex","trestbps"],axis=1)
- target=df['target']
- X_train,X_test,Y_train,Y_test = train_test_split(pred,target,test_size=0.20)
- # In[33]:
- X_train.shape
- # In[34]:
- X_test.shape
- # In[35]:
- def training_model(X_train, y_train, X_test, y_test, classifier, **kwargs):
- model=classifier(**kwargs)
- model.fit(X_train,y_train)
- train_accuracy=model.score(X_train,y_train)
- test_accuracy=model.score(X_test,y_test)
- print(f"Train accuracy: {train_accuracy:0.3%}")
- print(f"Test accuracy: {test_accuracy:0.3%}")
- return model
- # In[36]:
- KNN=training_model(X_train, Y_train, X_test, Y_test, KNeighborsClassifier, n_neighbors=7)
- KNN.fit(X_train,Y_train)
- KNN.score(X_train,Y_train)
- y_pred_test=KNN.predict(X_test)
- y_pred_train=KNN.predict(X_train)
- # print (accuracy_score(Y_test, y_pred_test))
- # In[37]:
- matrix=confusion_matrix(Y_test,y_pred_test)
- sns.heatmap(matrix,annot=True)
- # In[38]:
- precision = precision_score(Y_test, y_pred_test)
- recall = recall_score(Y_test, y_pred_test)
- print("Precision: ",precision)
- print("Recall is: ",recall)
- print("fbeta score:",(2*precision*recall)/(precision+recall))
- # In[41]:
- y_pred_prob = KNN.predict_proba(X_test)[:,1]
- FPR, TPR, threshold = roc_curve(Y_test, y_pred_prob)
- # In[55]:
- SVM = svm.SVC(kernel='linear',C=5,probability=True)
- SVM.fit(X_train, Y_train)
- yhat1_test = SVM.predict(X_test)
- yhat1_train=SVM.predict(X_train)
- print ("test_acc_score",accuracy_score(Y_test, yhat1_test))
- print ("train_acc_score",accuracy_score(Y_train, yhat1_train))
- # In[48]:
- matrix1=confusion_matrix(Y_test,yhat1_test)
- sns.heatmap(matrix1,annot=True)
- # In[59]:
- precision = precision_score(Y_test,yhat1_test)
- recall = recall_score(Y_test, yhat1_test)
- print("Precision: ",precision)
- print("Recall is: ",recall)
- print("fbeta score is:",(2*precision*recall)/(precision+recall))
- # In[56]:
- y_pred_prob1 = SVM.predict_proba(X_test)[:,1]
- FPR1, TPR1, threshold1 = roc_curve(Y_test, y_pred_prob1)
- # In[57]:
- # matplotlib
- plt.style.use('seaborn')
- # plot roc curves
- plt.plot(FPR, TPR, linestyle='--',color='green', label='KNN')
- plt.plot(FPR1, TPR1, linestyle='--',color='red', label='SVM')
- # title
- plt.title('ROC curve')
- # x label
- plt.xlabel('False Positive Rate')
- # y label
- plt.ylabel('True Positive rate')
- plt.legend(loc='best')
- plt.savefig('ROC',dpi=300)
- plt.show();
- # In[58]:
- AUC_score1 = roc_auc_score(Y_test, y_pred_prob)
- AUC_score2 = roc_auc_score(Y_test, y_pred_prob1)
- print(AUC_score1)
- print(AUC_score2)
- # In[ ]:
- # parameter tuning for SVM using gridsearch
- svm_clf = SVC(kernel='rbf', gamma=0.1, C=1.0)
- params = {"C":(0.1, 0.5, 1, 2, 5, 10, 20),
- "gamma":(0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 1),
- "kernel":('linear', 'poly', 'rbf')}
- svm_cv = GridSearchCV(svm_clf, params, n_jobs=-1, cv=5, verbose=1, scoring="accuracy")
- svm_cv.fit(X_train, Y_train)
- best_params = svm_cv.best_params_
- print(f"Best params: {best_params}")
- svm_clf = SVC(**best_params)
- svm_clf.fit(X_train, Y_train)
- print_score(svm_clf, X_train, Y_train, X_test, Y_test, train=True)
- print_score(svm_clf, X_train, Y_train, X_test, Y_test, train=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement