Advertisement
brandblox

Bombastic

Mar 23rd, 2025 (edited)
587
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.18 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import seaborn as sns
  5. from sklearn.model_selection import train_test_split
  6. from sklearn.preprocessing import StandardScaler
  7. from sklearn.tree import DecisionTreeClassifier
  8. from sklearn.linear_model import LogisticRegression
  9. from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score, roc_curve, auc, classification_report
  10. from sklearn.datasets import load_diabetes
  11.  
  12. def load_data():
  13.     data = load_diabetes()
  14.     df = pd.DataFrame(data.data, columns=data.feature_names)
  15.     df = df.drop(columns=["s1"])  # Dropping one unnecessary column
  16.     df['target'] = (data.target > data.target.mean()).astype(int)  # Convert target to binary
  17.     print("First 5 rows of the dataset:")
  18.     print(df.head())
  19.     return df
  20.  
  21. def split_data(df, target_column):
  22.     X = df.drop(columns=[target_column])
  23.     y = df[target_column]
  24.     return train_test_split(X, y, test_size=0.2, random_state=42)
  25.  
  26. def scale_features(X_train, X_test):
  27.     scaler = StandardScaler()
  28.     X_train_scaled = scaler.fit_transform(X_train)
  29.     X_test_scaled = scaler.transform(X_test)
  30.     return X_train_scaled, X_test_scaled
  31.  
  32. def train_decision_tree(X_train, y_train):
  33.     # model = LogisticRegression()
  34.     model = DecisionTreeClassifier()
  35.     model.fit(X_train, y_train)
  36.     return model
  37.  
  38. def make_predictions(model, X_test):
  39.     return model.predict(X_test)
  40.  
  41. def display_predictions(X_test, y_pred):
  42.     print("Sample Predictions:")
  43.     sample_df = pd.DataFrame(X_test[:5], columns=[f'Feature_{i}' for i in range(X_test.shape[1])])
  44.     sample_df['Predicted Target'] = y_pred[:5]
  45.     print(sample_df)
  46.  
  47. def evaluate_model(y_test, y_pred):
  48.     accuracy = accuracy_score(y_test, y_pred)
  49.     precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='binary')
  50.     print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, F1 Score: {f1:.2f}")
  51.     print("\nClassification Report:\n", classification_report(y_test, y_pred))
  52.  
  53. def plot_confusion_matrix(y_test, y_pred):
  54.     cm = confusion_matrix(y_test, y_pred)
  55.     sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
  56.     plt.xlabel("Predicted")
  57.     plt.ylabel("Actual")
  58.     plt.title("Confusion Matrix")
  59.     plt.show()
  60.  
  61. def plot_roc_curve(y_test, y_probs):
  62.     fpr, tpr, _ = roc_curve(y_test, y_probs)
  63.     roc_auc = auc(fpr, tpr)
  64.     plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.2f})')
  65.     plt.plot([0, 1], [0, 1], linestyle='--')
  66.     plt.xlabel("False Positive Rate")
  67.     plt.ylabel("True Positive Rate")
  68.     plt.title("ROC Curve")
  69.     plt.legend()
  70.     plt.show()
  71.  
  72. # Example usage
  73. if __name__ == "__main__":
  74.     target_column = "target"
  75.    
  76.     df = load_data()
  77.     X_train, X_test, y_train, y_test = split_data(df, target_column)
  78.     X_train, X_test = scale_features(X_train, X_test)
  79.    
  80.     model = train_decision_tree(X_train, y_train)
  81.     y_pred = make_predictions(model, X_test)
  82.    
  83.     display_predictions(X_test, y_pred)
  84.     evaluate_model(y_test, y_pred)
  85.     plot_confusion_matrix(y_test, y_pred)
  86.    
  87.     y_probs = model.predict_proba(X_test)[:, 1]
  88.     plot_roc_curve(y_test, y_probs)
  89.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement