Advertisement
brandblox

Jumbo

Apr 28th, 2025
157
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.82 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import seaborn as sns
  5. from sklearn.model_selection import train_test_split
  6. from sklearn.preprocessing import StandardScaler
  7. from sklearn.tree import DecisionTreeClassifier
  8. from sklearn.linear_model import LinearRegression, LogisticRegression
  9. from sklearn.naive_bayes import GaussianNB
  10. from sklearn.neighbors import KNeighborsClassifier
  11. from sklearn.svm import SVC
  12. from sklearn.ensemble import RandomForestClassifier
  13. from sklearn.cluster import KMeans
  14. from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, accuracy_score, roc_curve, auc, classification_report
  15. from sklearn.datasets import load_diabetes
  16.  
  17. def load_data():
  18.     # Load and preprocess the diabetes dataset
  19.     data = load_diabetes()
  20.     df = pd.DataFrame(data.data, columns=data.feature_names)
  21.     df = df.drop(columns=["s1"])  # Dropping one unnecessary column
  22.     df['target'] = (data.target > data.target.mean()).astype(int)  # Convert target to binary
  23.     print("First 5 rows of the dataset:")
  24.     print(df.head())
  25.     return df
  26.  
  27. def split_data(df, target_column):
  28.     # Split the dataset into features and target
  29.     X = df.drop(columns=[target_column])
  30.     y = df[target_column]
  31.     return train_test_split(X, y, test_size=0.2, random_state=42)
  32.  
  33. def scale_features(X_train, X_test):
  34.     # Standardize the features
  35.     scaler = StandardScaler()
  36.     X_train_scaled = scaler.fit_transform(X_train)
  37.     X_test_scaled = scaler.transform(X_test)
  38.     return X_train_scaled, X_test_scaled
  39.  
  40. def select_model(model_type):
  41.     # Select and return the model based on the type
  42.     if model_type == "linear_regression":
  43.         return LinearRegression()
  44.     elif model_type == "logistic_regression":
  45.         return LogisticRegression()
  46.     elif model_type == "decision_tree":
  47.         return DecisionTreeClassifier()
  48.     elif model_type == "random_forest":
  49.         return RandomForestClassifier()
  50.     elif model_type == "knn":
  51.         return KNeighborsClassifier()
  52.     elif model_type == "naive_bayes":
  53.         return GaussianNB()
  54.     elif model_type == "svm":
  55.         return SVC(probability=True)
  56.     elif model_type == "k_means":
  57.         return KMeans(n_clusters=2)  # For binary clustering, modify as needed
  58.  
  59. def train_model(model, X_train, y_train):
  60.     # Train the selected model
  61.     model.fit(X_train, y_train)
  62.  
  63. def make_predictions(model, X_test):
  64.     # Make predictions using the trained model
  65.     return model.predict(X_test)
  66.  
  67. def evaluate_model(model, X_test, y_test, y_pred):
  68.     # Evaluate the model using accuracy, precision, recall, and F1 score
  69.     if hasattr(model, "predict_proba"):  # If model supports probability prediction
  70.         y_probs = model.predict_proba(X_test)[:, 1]  # Get probabilities for ROC Curve
  71.         fpr, tpr, _ = roc_curve(y_test, y_probs)
  72.         roc_auc = auc(fpr, tpr)
  73.         plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {roc_auc:.2f})')
  74.         plt.plot([0, 1], [0, 1], linestyle='--')
  75.         plt.xlabel("False Positive Rate")
  76.         plt.ylabel("True Positive Rate")
  77.         plt.title("ROC Curve")
  78.         plt.legend()
  79.         plt.show()
  80.    
  81.     accuracy = accuracy_score(y_test, y_pred)
  82.     precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average='binary')
  83.     print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, F1 Score: {f1:.2f}")
  84.     print("\nClassification Report:\n", classification_report(y_test, y_pred))
  85.  
  86. def plot_confusion_matrix(y_test, y_pred):
  87.     cm = confusion_matrix(y_test, y_pred)
  88.     sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
  89.     plt.xlabel("Predicted")
  90.     plt.ylabel("Actual")
  91.     plt.title("Confusion Matrix")
  92.     plt.show()
  93.  
  94. def display_predictions(X_test, y_pred):
  95.     print("Sample Predictions:")
  96.     sample_df = pd.DataFrame(X_test[:5], columns=[f'Feature_{i}' for i in range(X_test.shape[1])])
  97.     sample_df['Predicted Target'] = y_pred[:5]
  98.     print(sample_df)
  99.  
  100. # Example usage
  101. if __name__ == "__main__":
  102.     target_column = "target"  # Column to predict
  103.  
  104.     # Load and prepare the data
  105.     df = load_data()
  106.     X_train, X_test, y_train, y_test = split_data(df, target_column)
  107.     X_train, X_test = scale_features(X_train, X_test)
  108.    
  109.     # Select the model type here (replace with your choice)
  110.     model_type = "decision_tree"  # Can be any model type from the available options
  111.    
  112.     # Choose model
  113.     model = select_model(model_type)
  114.    
  115.     # Train model
  116.     train_model(model, X_train, y_train)
  117.    
  118.     # Make predictions
  119.     y_pred = make_predictions(model, X_test)
  120.    
  121.     # Display sample predictions
  122.     display_predictions(X_test, y_pred)
  123.    
  124.     # Evaluate the model
  125.     evaluate_model(model, X_test, y_test, y_pred)
  126.    
  127.     # Plot confusion matrix
  128.     plot_confusion_matrix(y_test, y_pred)
  129.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement