Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # %%
- import pandas as pd
- import numpy as np
- df = pd.read_csv("https://raw.githubusercontent.com/codebasics/py/master/ML/9_decision_tree/Exercise/titanic.csv")
- df.head()
- # %%
- import pandas
- from sklearn import model_selection
- from sklearn.linear_model import LogisticRegression
- url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
- names = ['preg','plas','pres','skin','test','mass','pedi','age','class']
- dataframe = pandas.read_csv(url,names =names)
- dataframe
- # %%
- dataframe.isnull().sum()
- # %%
- df.drop(['PassengerId','Name','SibSp','Parch','Ticket','Cabin','Embarked'],axis = 'columns',inplace =True)
- df.head()
- # %%
- from sklearn.preprocessing import LabelEncoder
- L_Sex = LabelEncoder()
- # %%
- df['Gender Code']=L_Sex.fit_transform(df['Sex'])
- df
- # %%
- df_new = df.drop(['Sex','Survived'],axis='columns')
- df_new
- # %%
- df_new.Age = df_new.Age.fillna(df_new.Age.mean())
- df_new
- # %%
- target = df['Survived']
- target
- # %%
- from sklearn.model_selection import train_test_split
- X_train, X_test, y_train, y_test = train_test_split(df_new,target,test_size=0.2)
- # %%
- len(X_train)
- # %%
- len(X_test)
- # %%
- len(y_train)
- # %%
- len(y_test)
- # %%
- from sklearn import tree
- model = tree.DecisionTreeClassifier()
- model.fit(X_train,y_train)
- # %%
- y_predicted = model.predict(X_test)
- # %%
- from sklearn.metrics import confusion_matrix,classification_report
- matrix = confusion_matrix(y_test, y_predicted,labels=[1,0])
- # %%
- print('confusion matrix \n',matrix)
- # %%
- from matplotlib import pyplot as plt
- import pandas as pd
- import seaborn as sns
- # %%
- def print_confusion_matrix(confusion_matrix, class_name, figsize=(10,7),fontsize=14):
- df_cm=pd.DataFrame(
- confusion_matrix, index=class_name, columns=class_name,
- )
- fig = plt.figure(figsize=figsize)
- try:
- heatmap = sns.heatmap(df_cm, annot=True, fmt='d')
- except ValueError:
- raise ValueError("Confusion matrix values must be integers.")
- heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=fontsize)
- heatmap.xaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=45, ha='right', fontsize=fontsize)
- plt.ylabel('Truth')
- plt.xlabel('Prediction')
- print_confusion_matrix(matrix, ["Survived","Not Survived"])
- # %%
- matrix = classification_report(y_test,y_predicted,labels=[1,0])
- print('classification report : \n', matrix)
- # %%
- model.score(X_train,y_train)
- # %%
- model.score(X_test,y_test)
- # %%
- model.score(X_test,y_predicted)
- # %%
- from sklearn import model_selection
- # %%
- kfold = model_selection.KFold(n_splits=10,random_state=4,shuffle=True)
- scoring = 'accuracy'
- results = model_selection.cross_val_score(model, X_test, y_predicted, cv=kfold, scoring=scoring)
- print("Accuracy: %.3f "%(results.mean()))
- # %%
- scoring = 'roc_auc'
- results = model_selection.cross_val_score(model, X_test, y_predicted, cv=kfold, scoring=scoring)
- print("Accuracy: %.3f(%.3f) "%(results.mean(),results.std()))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement