Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import numpy as np
- import matplotlib.pyplot as plt
- import logging
- import asyncio
- import psutil
- from joblib import Parallel, delayed, dump, load
- from sklearn.datasets import load_iris
- from sklearn.model_selection import train_test_split, cross_validate, KFold, GridSearchCV
- from sklearn.preprocessing import StandardScaler
- from sklearn.pipeline import Pipeline
- from sklearn.linear_model import LogisticRegression
- from sklearn.svm import SVC
- from sklearn.tree import DecisionTreeClassifier
- from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.naive_bayes import GaussianNB
- from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
- from sklearn.feature_selection import SelectKBest, f_classif
- from shap import TreeExplainer
- import json
- logging.basicConfig(level=logging.INFO)
- logger = logging.getLogger(__name__)
- def load_config(config_path="config.json"):
- with open(config_path, "r") as config_file:
- return json.load(config_file)
- def save_config(config, config_path="config.json"):
- with open(config_path, "w") as config_file:
- json.dump(config, config_file, indent=4)
- config = load_config()
- storage_location = config.get("storage_location", "default_storage_directory")
- def set_storage_location(new_location):
- global storage_location
- storage_location = new_location
- config["storage_location"] = new_location
- save_config(config)
- set_storage_location("new_storage_directory")
- os.makedirs(storage_location, exist_ok=True)
- def load_and_preprocess_data():
- try:
- iris_data = load_iris()
- X, y = iris_data.data, iris_data.target
- X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
- return X_train, X_test, y_train, y_test
- except Exception as e:
- logger.error(f"Error loading and preprocessing data: {e}")
- raise
- def create_pipeline(clf):
- try:
- pipeline = Pipeline([
- ('scaler', StandardScaler()),
- ('feature_selection', SelectKBest(score_func=f_classif, k=2)),
- ('classifier', clf)
- ])
- return pipeline
- except Exception as e:
- logger.error(f"Error creating pipeline: {e}")
- raise
- def get_classifiers():
- return {
- "Logistic Regression": LogisticRegression(max_iter=1000),
- "SVC": SVC(),
- "Decision Tree": DecisionTreeClassifier(),
- "Random Forest": RandomForestClassifier(),
- "K-Neighbors": KNeighborsClassifier(),
- "Naive Bayes": GaussianNB(),
- "Gradient Boosting": GradientBoostingClassifier()
- }
- def hyperparameter_tuning(clf, param_grid, X_train, y_train):
- try {
- grid_search = GridSearchCV(clf, param_grid, cv=5, n_jobs=-1, scoring='accuracy')
- grid_search.fit(X_train, y_train)
- return grid_search.best_estimator_
- except Exception as e:
- logger.error(f"Error during hyperparameter tuning: {e}")
- raise
- def evaluate_classifier(name, clf, X_train, y_train, X_test, y_test, kfold):
- try:
- train_scores = cross_validate(clf, X_train, y_train, cv=kfold, scoring=['accuracy', 'precision_macro', 'recall_macro', 'f1_macro'], n_jobs=-1)
- clf.fit(X_train, y_train)
- y_pred = clf.predict(X_test)
- test_score = {
- "accuracy": accuracy_score(y_test, y_pred),
- "precision": precision_score(y_test, y_pred, average='macro'),
- "recall": recall_score(y_test, y_pred, average='macro'),
- "f1": f1_score(y_test, y_pred, average='macro')
- }
- return name, train_scores, test_score
- except Exception as e:
- logger.error(f"Error evaluating classifier {name}: {e}")
- raise
- def plot_scores(training_scores, test_scores):
- metrics = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
- x = np.arange(len(metrics))
- for name, train_scores, test_score in zip(training_scores, test_scores):
- plt.figure(figsize=(10, 6))
- train_values = [np.mean(train_scores['test_' + metric]) for metric in metrics]
- test_values = [test_score[metric.split('_')[0]] for metric in metrics]
- plt.plot(x, train_values, label='Train')
- plt.plot(x, test_values, label='Test')
- plt.xticks(x, metrics)
- plt.ylabel('Score')
- plt.title(f'{name} Scores')
- plt.legend()
- plt.show()
- def save_model_locally(model, name):
- try:
- model_path = os.path.join(storage_location, f'{name}_model.joblib')
- dump(model, model_path)
- except Exception as e:
- logger.error(f"Error saving model locally: {e}")
- raise
- def monitor_resources():
- try:
- print(f"CPU usage: {psutil.cpu_percent()}%")
- print(f"Memory usage: {psutil.virtual_memory().percent}%")
- except ImportError:
- logger.warning("psutil not installed. Resource monitoring not available.")
- async def async_main():
- try:
- X_train, X_test, y_train, y_test = load_and_preprocess_data()
- classifiers = get_classifiers()
- kfold = KFold(n_splits=5, shuffle=True, random_state=42)
- tuned_classifiers = {}
- for name, clf in classifiers.items():
- logger.info(f"Tuning hyperparameters for {name}")
- pipeline = create_pipeline(clf)
- param_grid = {
- 'classifier__C': [0.1, 1, 10] if name in ['Logistic Regression', 'SVC'] else {},
- 'classifier__max_depth': [3, 5, 7] if name in ['Decision Tree', 'Random Forest'] else {}
- }
- tuned_clf = hyperparameter_tuning(pipeline, param_grid, X_train, y_train)
- tuned_classifiers[name] = tuned_clf
- results = Parallel(n_jobs=-1)(delayed(evaluate_classifier)(name, clf, X_train, y_train, X_test, y_test, kfold) for name, clf in tuned_classifiers.items())
- for name, _, test_score in results:
- save_model_locally(tuned_classifiers[name], name)
- for name, clf in tuned_classifiers.items():
- explainer = TreeExplainer(clf.named_steps['classifier'])
- shap_values = explainer.shap_values(X_test)
- shap.summary_plot(shap_values, X_test, feature_names=load_iris().feature_names)
- plot_scores([result[1] for result in results], [result[2] for result in results])
- monitor_resources()
- except Exception as e:
- logger.error(f"Error in main execution: {e}")
- raise
- if __name__ == "__main__":
- asyncio.run(async_main())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement