Advertisement
mayankjoin3

TeamLease GAN

Dec 8th, 2024
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 11.80 KB | None | 0 0
  1. import os
  2. os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # Disable GPU
  3.  
  4. input_directory = './GAN'  # Update with the directory containing input CSV files
  5. output_directory = 'Classwise_Results'  # Update with the directory for results
  6.  
  7. #print(type(X), type(y))  # Debugging: Ensure they are either Pandas or NumPy
  8.  
  9. import os
  10. # Ensure output directory exists
  11. os.makedirs(output_directory, exist_ok=True)
  12.  
  13. import pandas as pd
  14. import numpy as np
  15. import time
  16. from sklearn.model_selection import train_test_split, KFold
  17. from sklearn.preprocessing import LabelEncoder, MinMaxScaler
  18. from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, fbeta_score, matthews_corrcoef
  19. from sklearn.metrics import jaccard_score, cohen_kappa_score, hamming_loss, zero_one_loss, mean_absolute_error
  20. from sklearn.metrics import mean_squared_error, r2_score, balanced_accuracy_score
  21. from sklearn.tree import DecisionTreeClassifier
  22. from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, GradientBoostingClassifier, AdaBoostClassifier, VotingClassifier, StackingClassifier
  23. from sklearn.neighbors import KNeighborsClassifier
  24. from sklearn.svm import SVC, OneClassSVM
  25. from sklearn.naive_bayes import GaussianNB
  26. from sklearn.linear_model import LogisticRegression, SGDClassifier, Perceptron, ElasticNet
  27. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
  28. from sklearn.neural_network import MLPClassifier
  29. from xgboost import XGBClassifier
  30. # from lightgbm import LGBMClassifier
  31. # from catboost import CatBoostClassifier
  32. from sklearn.cluster import KMeans, AgglomerativeClustering
  33. from sklearn.gaussian_process import GaussianProcessClassifier
  34. from sklearn.neighbors import NearestCentroid
  35. from sklearn.mixture import GaussianMixture
  36. from sklearn.ensemble import IsolationForest
  37. from sklearn.pipeline import Pipeline
  38. from sklearn.neural_network import BernoulliRBM
  39. from keras.models import Sequential
  40. from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, LSTM, GRU, SimpleRNN
  41. from keras.utils import to_categorical
  42. import traceback
  43. import csv
  44. import warnings
  45. from collections import defaultdict
  46. from sklearn.semi_supervised import SelfTrainingClassifier
  47.  
  48.  
  49. warnings.filterwarnings("ignore")  # Suppress warnings for cleaner output
  50.  
  51. # Dataset parameters
  52. k_fold = 5  # Number of folds for cross-validation
  53. dataset_percent = 50  # Percentage of the dataset to use
  54.  
  55.  
  56. # CSV columns
  57. csv_columns = [
  58.     'Algorithm', 'Fold', 'Train Time (s)', 'Test Time (s)', 'Accuracy', 'Precision', 'Recall', 'F1 Score',
  59.     'Fbeta Score', 'Matthews Correlation Coefficient', 'Jaccard Score', 'Cohen Kappa Score',
  60.     'Hamming Loss', 'Zero One Loss', 'Mean Absolute Error', 'Mean Squared Error', 'Root Mean Squared Error',
  61.     'Balanced Accuracy', 'R2 Score'
  62. ]
  63. class_metrics_columns = [
  64.     'Algorithm', 'Fold', 'Class', 'Accuracy', 'Precision', 'Recall', 'F1 Score',
  65.     'Fbeta Score', 'Matthews Correlation Coefficient', 'Jaccard Score', 'Cohen Kappa Score',
  66.     'Hamming Loss', 'Zero One Loss', 'Mean Absolute Error', 'Mean Squared Error', 'Root Mean Squared Error',
  67.     'Balanced Accuracy', 'R2 Score'
  68. ]
  69.  
  70. # Function to compute class-wise metrics
  71. def compute_classwise_metrics(y_true, y_pred):
  72.     class_metrics = defaultdict(dict)
  73.     classes = np.unique(y_true)
  74.     for class_index in classes:
  75.         # true_class_name = class_names[class_index]
  76.         true_class_name = class_index
  77.         y_true_class = (y_true == class_index).astype(int)
  78.         y_pred_class = (y_pred == class_index).astype(int)
  79.  
  80.         class_metrics[true_class_name] = {
  81.             'Accuracy': round(accuracy_score(y_true_class, y_pred_class), 3),
  82.             'Precision': round(precision_score(y_true, y_pred, labels=[class_index], average='weighted', zero_division=1), 3),
  83.             'Recall': round(recall_score(y_true, y_pred, labels=[class_index], average='weighted', zero_division=1), 3),
  84.             'F1 Score': round(f1_score(y_true, y_pred, labels=[class_index], average='weighted', zero_division=1), 3),
  85.             'Fbeta Score': round(fbeta_score(y_true, y_pred, labels=[class_index], beta=1.0, average='weighted', zero_division=1), 3),
  86.             'Matthews Correlation Coefficient': round(matthews_corrcoef(y_true_class, y_pred_class), 3),
  87.             'Jaccard Score': round(jaccard_score(y_true, y_pred, labels=[class_index], average='weighted', zero_division=1), 3),
  88.             'Cohen Kappa Score': round(cohen_kappa_score(y_true_class, y_pred_class), 3),
  89.             'Hamming Loss': round(hamming_loss(y_true_class, y_pred_class), 3),
  90.             'Zero One Loss': round(zero_one_loss(y_true_class, y_pred_class), 3),
  91.             'Mean Absolute Error': round(mean_absolute_error(y_true_class, y_pred_class), 3),
  92.             'Mean Squared Error': round(mean_squared_error(y_true_class, y_pred_class), 3),
  93.             'Root Mean Squared Error': round(np.sqrt(mean_squared_error(y_true_class, y_pred_class)), 3),
  94.             'Balanced Accuracy': round(balanced_accuracy_score(y_true_class, y_pred_class), 3),
  95.             'R2 Score': round(r2_score(y_true_class, y_pred_class), 3),
  96.         }
  97.     return class_metrics
  98.  
  99. # Function to compute metrics
  100. def compute_metrics(y_true, y_pred):
  101.     return {
  102.         'Accuracy': accuracy_score(y_true, y_pred),
  103.         'Precision': precision_score(y_true, y_pred, average='weighted', zero_division=1),
  104.         'Recall': recall_score(y_true, y_pred, average='weighted', zero_division=1),
  105.         'F1 Score': f1_score(y_true, y_pred, average='weighted', zero_division=1),
  106.         'Fbeta Score': fbeta_score(y_true, y_pred, beta=1.0, average='weighted', zero_division=1),
  107.         'Matthews Correlation Coefficient': matthews_corrcoef(y_true, y_pred),
  108.         'Jaccard Score': jaccard_score(y_true, y_pred, average='weighted', zero_division=1),
  109.         'Cohen Kappa Score': cohen_kappa_score(y_true, y_pred),
  110.         'Hamming Loss': hamming_loss(y_true, y_pred),
  111.         'Zero One Loss': zero_one_loss(y_true, y_pred),
  112.         'Mean Absolute Error': mean_absolute_error(y_true, y_pred),
  113.         'Mean Squared Error': mean_squared_error(y_true, y_pred),
  114.         'Root Mean Squared Error': np.sqrt(mean_squared_error(y_true, y_pred)),
  115.         'Balanced Accuracy': balanced_accuracy_score(y_true, y_pred),
  116.         'R2 Score': r2_score(y_true, y_pred),
  117.     }
  118.  
  119. # Function to run and log algorithm results
  120. def run_algorithm(algo_name, model, X_train, y_train, X_test, y_test, fold, output_file, class_metrics_file):
  121.     try:
  122.         start_train = time.time()
  123.         model.fit(X_train, y_train)
  124.         train_time = time.time() - start_train
  125.  
  126.         start_test = time.time()
  127.         y_pred = model.predict(X_test)
  128.         test_time = time.time() - start_test
  129.  
  130.         metrics = compute_metrics(y_test, y_pred)
  131.         metrics.update({'Train Time (s)': train_time, 'Test Time (s)': test_time})
  132.         class_metrics = compute_classwise_metrics(y_test, y_pred)
  133.  
  134.         # Log results
  135.         with open(output_file, 'a', newline='') as f:
  136.             writer = csv.writer(f)
  137.             writer.writerow([algo_name, fold] + [metrics.get(m, -1) for m in csv_columns[2:]])
  138.  
  139.         with open(class_metrics_file, 'a', newline='') as f:
  140.             writer = csv.writer(f)
  141.             for class_label, cm in class_metrics.items():
  142.                 writer.writerow([algo_name, fold, class_label] + [cm.get(m, -1) for m in class_metrics_columns[3:]])
  143.  
  144.         print(f"{algo_name} | Fold: {fold} | Train Time: {train_time:.2f}s | Test Time: {test_time:.2f}s")
  145.     except Exception as e:
  146.         print(f"Error in {algo_name}: {traceback.format_exc()}")
  147.  
  148. # Loop through input files
  149. for input_file in os.listdir(input_directory):
  150.     if input_file.endswith('.csv'):
  151.         input_path = os.path.join(input_directory, input_file)
  152.         output_file = os.path.join(output_directory, input_file.replace('.csv', '_results.csv'))
  153.         class_metrics_file = os.path.join(output_directory, input_file.replace('.csv', '_class_results.csv'))
  154.  
  155.         df = pd.read_csv(input_path)
  156.         X = pd.get_dummies(df.iloc[:, :-1])
  157.         y = df.iloc[:, -1]
  158.         class_names = y.unique()
  159.         # y = LabelEncoder().fit_transform(df.iloc[:, -1])
  160.         # class_names = LabelEncoder().fit(df.iloc[:, -1]).classes_
  161.         # X = MinMaxScaler().fit_transform(X)
  162.  
  163.         if dataset_percent < 100:
  164.             _, X, _, y = train_test_split(X, y, test_size=dataset_percent / 100, stratify=y)
  165.  
  166.         with open(output_file, 'w', newline='') as f:
  167.             csv.writer(f).writerow(csv_columns)
  168.         with open(class_metrics_file, 'w', newline='') as f:
  169.             csv.writer(f).writerow(class_metrics_columns)
  170.  
  171.         kf = KFold(n_splits=k_fold, shuffle=True, random_state=42)
  172.  
  173.  
  174.         algorithms = {
  175.  
  176.  
  177.             'Naive Bayes': GaussianNB(),
  178.             'LDA': LinearDiscriminantAnalysis(),
  179.             'QDA': QuadraticDiscriminantAnalysis(),
  180.             'SVM': SVC(kernel='linear', max_iter=1000),
  181.             'Decision Tree': DecisionTreeClassifier(),
  182.             'SGD Classifier': SGDClassifier(),
  183.             'KNN': KNeighborsClassifier(),
  184.             'ElasticNet': ElasticNet(),
  185.             'Perceptron': Perceptron(),
  186.             'Logistic Regression': LogisticRegression(),
  187.             'Bagging': BaggingClassifier(),
  188.             'K-Means': KMeans(n_clusters=3),
  189.             'Nearest Centroid Classifier': NearestCentroid(),
  190.             'XGBoost': XGBClassifier(),
  191.             'AdaBoost': AdaBoostClassifier(),
  192.             ########'RNN': create_rnn((28, 28)),
  193.             'RBM + Logistic Regression': Pipeline(steps=[('rbm', BernoulliRBM(n_components=100, learning_rate=0.06, n_iter=10, random_state=42)),('logistic', LogisticRegression())]),
  194.             #'Voting Classifier': VotingClassifier(estimators=[('lr', LogisticRegression()),('rf', RandomForestClassifier()),('gnb', GaussianNB())], voting='hard'),
  195.             'Random Forest': RandomForestClassifier(n_estimators=10),
  196.             'Gradient Boosting': GradientBoostingClassifier(n_estimators=10),
  197.             #'Stacking Classifier': StackingClassifier(estimators=[('log_reg', LogisticRegression()),('knn', KNeighborsClassifier(n_neighbors=3))],final_estimator=LogisticRegression(),n_jobs=-1),
  198.             'MLP Classifier': MLPClassifier(),
  199.             ######### 'GRU': create_gru((28, 28)),
  200.             ######### 'LSTM': create_lstm((28, 28)),
  201.             ######### 'CNN': create_cnn((28, 28, 1)),
  202.             ######### 'Autoencoder': create_autoencoder((28,)),
  203.             #'LightGBM': LGBMClassifier(),
  204.             #'CatBoost': CatBoostClassifier(),
  205.             #'Self-Training': SelfTrainingClassifier(LogisticRegression()),
  206.             'Isolation Forest': IsolationForest(),
  207.             # 'One-Class SVM': OneClassSVM(kernel='linear', max_iter=1000)
  208.             # 'Deep Belief Network': "Implement DBN",  # Placeholder for DBN
  209.             # 'Restricted Boltzmann Machine': "Implement RBM",  # Placeholder for RBM
  210.             # 'Genetic Algorithm': ga.GeneticAlgorithm(),  # Placeholder for Genetic Algorithm-based
  211.             # 'Bayesian Network': BayesianNetwork([('A', 'B'), ('B', 'C')]),  # Example Bayesian Network
  212.             # 'Fuzzy Logic': "Implement Fuzzy Logic",  # Placeholder for Fuzzy Logic systems
  213.             # 'Conditional Random Field (CRF)': "Implement CRF",  # Placeholder for CRF
  214.                 }
  215.  
  216.         print(f"Shape of X: {X.shape}")
  217.         print(f"Shape of y: {len(y)}")      
  218.  
  219.         for fold, (train_idx, test_idx) in enumerate(kf.split(X), 1):
  220.             # X_train, X_test = X[train_idx], X[test_idx]
  221.             X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
  222.             y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
  223.  
  224.             for algo_name, model in algorithms.items():
  225.                 run_algorithm(algo_name, model, X_train, y_train, X_test, y_test, fold, output_file, class_metrics_file)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement