Advertisement
brandblox

Lab_ML(17/02/25)

Feb 17th, 2025
120
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.03 KB | None | 0 0
  1. # %%
  2. import pandas as pd
  3. import numpy as np
  4. df = pd.read_csv("https://raw.githubusercontent.com/codebasics/py/master/ML/9_decision_tree/Exercise/titanic.csv")
  5. df.head()
  6.  
  7.  
  8. # %%
  9. import pandas
  10. from sklearn import model_selection
  11. from sklearn.linear_model import LogisticRegression
  12. url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
  13. names = ['preg','plas','pres','skin','test','mass','pedi','age','class']
  14. dataframe = pandas.read_csv(url,names =names)
  15. dataframe
  16.  
  17. # %%
  18. dataframe.isnull().sum()
  19.  
  20. # %%
  21. df.drop(['PassengerId','Name','SibSp','Parch','Ticket','Cabin','Embarked'],axis = 'columns',inplace =True)
  22. df.head()
  23.  
  24. # %%
  25. from sklearn.preprocessing import LabelEncoder
  26. L_Sex = LabelEncoder()
  27.  
  28. # %%
  29. df['Gender Code']=L_Sex.fit_transform(df['Sex'])
  30. df
  31.  
  32. # %%
  33. df_new = df.drop(['Sex','Survived'],axis='columns')
  34. df_new
  35.  
  36. # %%
  37. df_new.Age = df_new.Age.fillna(df_new.Age.mean())
  38. df_new
  39.  
  40. # %%
  41. target = df['Survived']
  42. target
  43.  
  44. # %%
  45. from sklearn.model_selection import train_test_split
  46. X_train, X_test, y_train, y_test = train_test_split(df_new,target,test_size=0.2)
  47.  
  48. # %%
  49. len(X_train)
  50.  
  51. # %%
  52. len(X_test)
  53.  
  54. # %%
  55. len(y_train)
  56.  
  57. # %%
  58. len(y_test)
  59.  
  60. # %%
  61. from sklearn import tree
  62. model = tree.DecisionTreeClassifier()
  63.  
  64. model.fit(X_train,y_train)
  65.  
  66. # %%
  67. y_predicted =  model.predict(X_test)
  68.  
  69. # %%
  70. from sklearn.metrics import confusion_matrix,classification_report
  71. matrix = confusion_matrix(y_test, y_predicted,labels=[1,0])
  72.  
  73. # %%
  74. print('confusion matrix \n',matrix)
  75.  
  76. # %%
  77. from matplotlib import pyplot as plt
  78. import pandas as pd
  79. import seaborn as sns
  80.  
  81. # %%
  82. def print_confusion_matrix(confusion_matrix, class_name, figsize=(10,7),fontsize=14):
  83.     df_cm=pd.DataFrame(
  84.         confusion_matrix, index=class_name, columns=class_name,
  85.     )
  86.     fig = plt.figure(figsize=figsize)
  87.     try:
  88.         heatmap = sns.heatmap(df_cm, annot=True, fmt='d')
  89.     except ValueError:
  90.         raise ValueError("Confusion matrix values must be integers.")
  91.     heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=fontsize)
  92.     heatmap.xaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize)
  93.     plt.ylabel('Truth')
  94.     plt.xlabel('Prediction')
  95.  
  96. print_confusion_matrix(matrix, ["Survived","Not Survived"])
  97.  
  98. # %%
  99. matrix = classification_report(y_test,y_predicted,labels=[1,0])
  100. print('classification report : \n', matrix)
  101.  
  102. # %%
  103. model.score(X_train,y_train)
  104.  
  105. # %%
  106. model.score(X_test,y_test)
  107.  
  108. # %%
  109. model.score(X_test,y_predicted)
  110.  
  111. # %%
  112. from sklearn import model_selection
  113.  
  114. # %%
  115. kfold = model_selection.KFold(n_splits=10,random_state=4,shuffle=True)
  116. scoring = 'accuracy'
  117. results = model_selection.cross_val_score(model, X_test, y_predicted, cv=kfold, scoring=scoring)
  118. print("Accuracy: %.3f "%(results.mean()))
  119.  
  120. # %%
  121. scoring = 'roc_auc'
  122. results = model_selection.cross_val_score(model, X_test, y_predicted, cv=kfold, scoring=scoring)
  123. print("Accuracy: %.3f(%.3f) "%(results.mean(),results.std()))
  124.  
  125.  
  126.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement