Advertisement
Radeen10-_

Machine Learning Model

Feb 2nd, 2022
987
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.88 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # coding: utf-8
  3.  
  4. # In[16]:
  5.  
  6.  
  7. import pandas as pd
  8. import numpy as np
  9. import matplotlib.pyplot as plt
  10. import seaborn as sns
  11. import glob
  12. import os
  13.  
  14. import plotly.express as px
  15. import plotly.offline as py
  16. from sklearn.model_selection import train_test_split
  17. from sklearn.svm import SVC
  18. from sklearn.metrics import confusion_matrix
  19. from sklearn.metrics import accuracy_score
  20. from sklearn.neighbors import KNeighborsClassifier
  21. from sklearn.metrics import precision_score
  22. from sklearn.metrics import recall_score
  23. from sklearn.metrics import roc_curve
  24. from sklearn.metrics import roc_auc_score
  25. from sklearn.metrics import auc
  26. from sklearn.metrics import precision_recall_curve
  27. from sklearn.model_selection import GridSearchCV
  28. from sklearn.metrics import roc_auc_score
  29. import matplotlib.pyplot as plt
  30. from sklearn.metrics import accuracy_score
  31. from sklearn import svm
  32.  
  33.  
  34. # In[17]:
  35.  
  36.  
  37. df=pd.read_csv('D:/heart.csv')
  38. df
  39.  
  40.  
  41. # In[18]:
  42.  
  43.  
  44. # 1.    age
  45. # 2.    sex (1 = male; 0= female)
  46. # 3.    chest pain type (4 values)
  47. # 4.    resting blood pressure
  48. # 5.    serum cholestoral in mg/dl
  49. # 6.    fasting blood sugar > 120 mg/dl
  50. # 7.    resting electrocardiographic results (values 0,1,2)
  51. # 8.    maximum heart rate achieved
  52. # 9.    exercise induced angina
  53. # 10.   oldpeak = ST depression induced by exercise relative to rest
  54. # 11.   the slope of the peak exercise ST segment
  55. # 12.   number of major vessels (0-3) colored by flourosopy
  56. # 13.   thal: 3 = normal; 6 = fixed defect; 7 = reversable defect
  57.  
  58. df.describe()
  59.  
  60.  
  61. # In[19]:
  62.  
  63.  
  64. df.head()
  65.  
  66.  
  67. # In[21]:
  68.  
  69.  
  70. # Gender Distribution
  71. df['sex']=df['sex'].map({
  72.     1:'Male',
  73.     0:'Female'
  74. })
  75. px.pie(df, names='sex',
  76. #        color_discrete_sequence=px.colors.sequential.Aggrnyl,
  77. #        hole=.6,
  78.        
  79.        )
  80.  
  81.  
  82. # In[123]:
  83.  
  84.  
  85. df.isnull().sum()
  86.  
  87.  
  88. # In[124]:
  89.  
  90.  
  91. plt.figure(figsize=(20,10))
  92. sns.countplot(x=df['age'],data=df)
  93. plt.xticks(rotation=90,fontsize=30)
  94. plt.yticks(fontsize=30)
  95.  
  96.  
  97. # In[24]:
  98.  
  99.  
  100. px.histogram(df,
  101.              x='age',
  102.              hover_data=df.columns,
  103.              marginal="box",
  104.              color='target')
  105.  
  106.  
  107. # In[27]:
  108.  
  109.  
  110. px.violin(df, y="chol",
  111.            color="target",
  112.            hover_data=df.columns,
  113.             points="all",
  114.            box=True,
  115.           )
  116.  
  117.  
  118. # In[28]:
  119.  
  120.  
  121. px.violin(df, y="age",
  122.            color="target",
  123.            hover_data=df.columns,
  124.             points="all",
  125.            box=True,
  126.           )
  127.  
  128.  
  129. # In[29]:
  130.  
  131.  
  132. sns.pairplot(df)
  133. plt.show()
  134.  
  135.  
  136. # In[30]:
  137.  
  138.  
  139. plt.figure(figsize=(15,15))
  140. sns.heatmap(df.corr()) #corelation
  141. plt.show()
  142.  
  143.  
  144. # In[31]:
  145.  
  146.  
  147. df.corr()
  148.  
  149.  
  150. # In[32]:
  151.  
  152.  
  153.  
  154. pred=df.drop(["target","fbs","chol","age","sex","trestbps"],axis=1)
  155. target=df['target']
  156. X_train,X_test,Y_train,Y_test = train_test_split(pred,target,test_size=0.20)
  157.  
  158.  
  159. # In[33]:
  160.  
  161.  
  162. X_train.shape
  163.  
  164.  
  165. # In[34]:
  166.  
  167.  
  168. X_test.shape
  169.  
  170.  
  171. # In[35]:
  172.  
  173.  
  174. def training_model(X_train, y_train, X_test, y_test, classifier, **kwargs):
  175.     model=classifier(**kwargs)
  176.     model.fit(X_train,y_train)
  177.     train_accuracy=model.score(X_train,y_train)
  178.     test_accuracy=model.score(X_test,y_test)
  179.     print(f"Train accuracy: {train_accuracy:0.3%}")
  180.     print(f"Test accuracy: {test_accuracy:0.3%}")
  181.    
  182.     return model
  183.  
  184.  
  185. # In[36]:
  186.  
  187.  
  188.  
  189. KNN=training_model(X_train, Y_train, X_test, Y_test, KNeighborsClassifier, n_neighbors=7)
  190. KNN.fit(X_train,Y_train)
  191. KNN.score(X_train,Y_train)
  192. y_pred_test=KNN.predict(X_test)
  193. y_pred_train=KNN.predict(X_train)
  194. # print (accuracy_score(Y_test, y_pred_test))
  195.  
  196.  
  197. # In[37]:
  198.  
  199.  
  200.  
  201. matrix=confusion_matrix(Y_test,y_pred_test)
  202. sns.heatmap(matrix,annot=True)
  203.  
  204.  
  205. # In[38]:
  206.  
  207.  
  208.  
  209. precision = precision_score(Y_test, y_pred_test)
  210. recall = recall_score(Y_test, y_pred_test)
  211. print("Precision: ",precision)
  212. print("Recall is: ",recall)
  213. print("fbeta score:",(2*precision*recall)/(precision+recall))
  214.  
  215.  
  216. # In[41]:
  217.  
  218.  
  219.  
  220. y_pred_prob = KNN.predict_proba(X_test)[:,1]
  221. FPR, TPR, threshold = roc_curve(Y_test, y_pred_prob)
  222.  
  223.  
  224.  
  225. # In[55]:
  226.  
  227.  
  228.  
  229. SVM = svm.SVC(kernel='linear',C=5,probability=True)
  230. SVM.fit(X_train, Y_train)
  231. yhat1_test = SVM.predict(X_test)
  232. yhat1_train=SVM.predict(X_train)
  233. print ("test_acc_score",accuracy_score(Y_test, yhat1_test))
  234. print ("train_acc_score",accuracy_score(Y_train, yhat1_train))
  235.  
  236.  
  237. # In[48]:
  238.  
  239.  
  240.  
  241. matrix1=confusion_matrix(Y_test,yhat1_test)
  242. sns.heatmap(matrix1,annot=True)
  243.  
  244.  
  245. # In[59]:
  246.  
  247.  
  248. precision = precision_score(Y_test,yhat1_test)
  249. recall = recall_score(Y_test, yhat1_test)
  250. print("Precision: ",precision)
  251. print("Recall is: ",recall)
  252. print("fbeta score is:",(2*precision*recall)/(precision+recall))
  253.  
  254.  
  255. # In[56]:
  256.  
  257.  
  258. y_pred_prob1 = SVM.predict_proba(X_test)[:,1]
  259. FPR1, TPR1, threshold1 = roc_curve(Y_test, y_pred_prob1)
  260.  
  261.  
  262. # In[57]:
  263.  
  264.  
  265. # matplotlib
  266.  
  267. plt.style.use('seaborn')
  268.  
  269. # plot roc curves
  270. plt.plot(FPR, TPR, linestyle='--',color='green', label='KNN')
  271. plt.plot(FPR1, TPR1, linestyle='--',color='red', label='SVM')
  272.  
  273. # title
  274. plt.title('ROC curve')
  275. # x label
  276. plt.xlabel('False Positive Rate')
  277. # y label
  278. plt.ylabel('True Positive rate')
  279.  
  280. plt.legend(loc='best')
  281. plt.savefig('ROC',dpi=300)
  282. plt.show();
  283.  
  284.  
  285. # In[58]:
  286.  
  287.  
  288.  
  289. AUC_score1 = roc_auc_score(Y_test, y_pred_prob)
  290. AUC_score2 = roc_auc_score(Y_test, y_pred_prob1)
  291.  
  292. print(AUC_score1)
  293. print(AUC_score2)
  294.  
  295.  
  296. # In[ ]:
  297.  
  298.  
  299. # parameter tuning for SVM using gridsearch
  300. svm_clf = SVC(kernel='rbf', gamma=0.1, C=1.0)
  301.  
  302. params = {"C":(0.1, 0.5, 1, 2, 5, 10, 20),
  303.           "gamma":(0.001, 0.01, 0.1, 0.25, 0.5, 0.75, 1),
  304.           "kernel":('linear', 'poly', 'rbf')}
  305.  
  306. svm_cv = GridSearchCV(svm_clf, params, n_jobs=-1, cv=5, verbose=1, scoring="accuracy")
  307. svm_cv.fit(X_train, Y_train)
  308. best_params = svm_cv.best_params_
  309. print(f"Best params: {best_params}")
  310.  
  311. svm_clf = SVC(**best_params)
  312. svm_clf.fit(X_train, Y_train)
  313.  
  314. print_score(svm_clf, X_train, Y_train, X_test, Y_test, train=True)
  315. print_score(svm_clf, X_train, Y_train, X_test, Y_test, train=False)
  316.  
  317.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement