Advertisement
YaBoiSwayZ

Iris classifier evaluation

Aug 10th, 2023 (edited)
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.89 KB | Source Code | 0 0
  1. # Import Libraries
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. from sklearn.datasets import load_iris
  5. from sklearn.model_selection import train_test_split, cross_validate, KFold
  6. from sklearn.preprocessing import StandardScaler
  7. from sklearn.linear_model import LogisticRegression
  8. from sklearn.svm import SVC
  9. from sklearn.tree import DecisionTreeClassifier
  10. from sklearn.ensemble import RandomForestClassifier
  11. from sklearn.neighbors import KNeighborsClassifier
  12. from sklearn.naive_bayes import GaussianNB
  13. from sklearn.ensemble import GradientBoostingClassifier
  14. from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
  15.  
  16. # Function to load and preprocess the Iris dataset
  17. def load_and_preprocess_data_optimized():
  18.     print("Loading and preprocessing data...")
  19.     iris_data = load_iris()
  20.     X = iris_data.data
  21.     y = iris_data.target
  22.     X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
  23.     scaler = StandardScaler()
  24.     X_train = scaler.fit_transform(X_train)
  25.     X_test = scaler.transform(X_test)
  26.     return X_train, X_test, y_train, y_test
  27.  
  28. # Function to define the classifiers to be used
  29. def get_classifiers_optimized():
  30.     print("Defining classifiers...")
  31.     return {
  32.         "Logistic Regression": LogisticRegression(max_iter=1000, n_jobs=-1),
  33.         "SVC": SVC(),
  34.         "Decision Tree": DecisionTreeClassifier(),
  35.         "Random Forest": RandomForestClassifier(n_jobs=-1),
  36.         "K-Neighbors": KNeighborsClassifier(n_jobs=-1),
  37.         "Naive Bayes": GaussianNB(),
  38.         "Gradient Boosting": GradientBoostingClassifier()
  39.     }
  40.  
  41. # Function to evaluate classifiers using K-fold cross-validation
  42. def evaluate_classifiers_optimized(X_train, X_test, y_train, y_test):
  43.     print("Evaluating classifiers...")
  44.     classifiers = get_classifiers_optimized()
  45.     kfold = KFold(n_splits=10, random_state=42, shuffle=True)
  46.     training_scores = []
  47.     test_scores = []
  48.     for name, clf in classifiers.items():
  49.         print(f"Evaluating {name}...")
  50.         train_scores = cross_validate(clf, X_train, y_train, cv=kfold, scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'], n_jobs=-1)
  51.         train_scores = {key.split("test_")[1]: value for key, value in train_scores.items() if key.startswith("test_")}
  52.         training_scores.append((name, train_scores))
  53.         clf.fit(X_train, y_train)
  54.         y_pred = clf.predict(X_test)
  55.         test_score = {
  56.             "accuracy": accuracy_score(y_test, y_pred),
  57.             "precision": precision_score(y_test, y_pred, average='macro'),
  58.             "recall": recall_score(y_test, y_pred, average='macro'),
  59.             "f1": f1_score(y_test, y_pred, average='macro')
  60.         }
  61.         test_scores.append((name, test_score))
  62.     return training_scores, test_scores
  63.  
  64. # Function to plot the evaluation scores
  65. def plot_scores(training_scores, test_scores):
  66.     print("Plotting scores...")
  67.     metrics = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
  68.     x = np.arange(len(metrics))
  69.     for name, train_scores in training_scores:
  70.         plt.figure(figsize=(10, 6))
  71.         train_values = [np.mean(train_scores[metric]) for metric in metrics]
  72.         test_values = [test_scores[i][1][metric.split('_')[0]] for metric in metrics for i in range(len(test_scores)) if test_scores[i][0] == name]
  73.         plt.bar(x - 0.2, train_values, 0.4, label="Training")
  74.         plt.bar(x + 0.2, test_values, 0.4, label="Test")
  75.         plt.xticks(x, [metric.split('_')[0] for metric in metrics])
  76.         plt.title(f'{name} Scores')
  77.         plt.legend()
  78.         plt.show()
  79.  
  80. # Main execution
  81. X_train, X_test, y_train, y_test = load_and_preprocess_data_optimized()
  82. training_scores_optimized, test_scores_optimized = evaluate_classifiers_optimized(X_train, X_test, y_train, y_test)
  83. plot_scores(training_scores_optimized, test_scores_optimized)
  84. print("Execution complete.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement