Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Import Libraries
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn.datasets import load_iris
- from sklearn.model_selection import train_test_split, cross_validate, KFold
- from sklearn.preprocessing import StandardScaler
- from sklearn.linear_model import LogisticRegression
- from sklearn.svm import SVC
- from sklearn.tree import DecisionTreeClassifier
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.naive_bayes import GaussianNB
- from sklearn.ensemble import GradientBoostingClassifier
- from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
- # Function to load and preprocess the Iris dataset
- def load_and_preprocess_data_optimized():
- print("Loading and preprocessing data...")
- iris_data = load_iris()
- X = iris_data.data
- y = iris_data.target
- X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
- scaler = StandardScaler()
- X_train = scaler.fit_transform(X_train)
- X_test = scaler.transform(X_test)
- return X_train, X_test, y_train, y_test
- # Function to define the classifiers to be used
- def get_classifiers_optimized():
- print("Defining classifiers...")
- return {
- "Logistic Regression": LogisticRegression(max_iter=1000, n_jobs=-1),
- "SVC": SVC(),
- "Decision Tree": DecisionTreeClassifier(),
- "Random Forest": RandomForestClassifier(n_jobs=-1),
- "K-Neighbors": KNeighborsClassifier(n_jobs=-1),
- "Naive Bayes": GaussianNB(),
- "Gradient Boosting": GradientBoostingClassifier()
- }
- # Function to evaluate classifiers using K-fold cross-validation
- def evaluate_classifiers_optimized(X_train, X_test, y_train, y_test):
- print("Evaluating classifiers...")
- classifiers = get_classifiers_optimized()
- kfold = KFold(n_splits=10, random_state=42, shuffle=True)
- training_scores = []
- test_scores = []
- for name, clf in classifiers.items():
- print(f"Evaluating {name}...")
- train_scores = cross_validate(clf, X_train, y_train, cv=kfold, scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'], n_jobs=-1)
- train_scores = {key.split("test_")[1]: value for key, value in train_scores.items() if key.startswith("test_")}
- training_scores.append((name, train_scores))
- clf.fit(X_train, y_train)
- y_pred = clf.predict(X_test)
- test_score = {
- "accuracy": accuracy_score(y_test, y_pred),
- "precision": precision_score(y_test, y_pred, average='macro'),
- "recall": recall_score(y_test, y_pred, average='macro'),
- "f1": f1_score(y_test, y_pred, average='macro')
- }
- test_scores.append((name, test_score))
- return training_scores, test_scores
- # Function to plot the evaluation scores
- def plot_scores(training_scores, test_scores):
- print("Plotting scores...")
- metrics = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
- x = np.arange(len(metrics))
- for name, train_scores in training_scores:
- plt.figure(figsize=(10, 6))
- train_values = [np.mean(train_scores[metric]) for metric in metrics]
- test_values = [test_scores[i][1][metric.split('_')[0]] for metric in metrics for i in range(len(test_scores)) if test_scores[i][0] == name]
- plt.bar(x - 0.2, train_values, 0.4, label="Training")
- plt.bar(x + 0.2, test_values, 0.4, label="Test")
- plt.xticks(x, [metric.split('_')[0] for metric in metrics])
- plt.title(f'{name} Scores')
- plt.legend()
- plt.show()
- # Main execution
- X_train, X_test, y_train, y_test = load_and_preprocess_data_optimized()
- training_scores_optimized, test_scores_optimized = evaluate_classifiers_optimized(X_train, X_test, y_train, y_test)
- plot_scores(training_scores_optimized, test_scores_optimized)
- print("Execution complete.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement