Advertisement
mayankjoin3

main_code_tl_99

Nov 17th, 2024
85
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 15.00 KB | None | 0 0
  1.  
  2. #Change it
  3. input_file = 'CIC_IOT_2023_combined.csv'  # Input dataset
  4.  
  5.  
  6. import os
  7. os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # Disable GPU
  8.  
  9.  
  10. import pandas as pd
  11. import numpy as np
  12. import time
  13. from sklearn.model_selection import train_test_split, KFold
  14. from sklearn.preprocessing import LabelEncoder, MinMaxScaler
  15. from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, fbeta_score, matthews_corrcoef
  16. from sklearn.metrics import jaccard_score, cohen_kappa_score, hamming_loss, zero_one_loss, mean_absolute_error
  17. from sklearn.metrics import mean_squared_error, r2_score, balanced_accuracy_score
  18. from sklearn.tree import DecisionTreeClassifier
  19. from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, GradientBoostingClassifier, AdaBoostClassifier, VotingClassifier, StackingClassifier
  20. from sklearn.neighbors import KNeighborsClassifier
  21. from sklearn.svm import SVC, OneClassSVM
  22. from sklearn.naive_bayes import GaussianNB
  23. from sklearn.linear_model import LogisticRegression, SGDClassifier, Perceptron, ElasticNet
  24. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
  25. from sklearn.neural_network import MLPClassifier
  26. from xgboost import XGBClassifier
  27. from lightgbm import LGBMClassifier
  28. from catboost import CatBoostClassifier
  29. from sklearn.cluster import KMeans, AgglomerativeClustering
  30. from sklearn.gaussian_process import GaussianProcessClassifier
  31. from sklearn.neighbors import NearestCentroid
  32. from sklearn.mixture import GaussianMixture
  33. from sklearn.ensemble import IsolationForest
  34. from sklearn.pipeline import Pipeline
  35. from sklearn.neural_network import BernoulliRBM
  36. from keras.models import Sequential
  37. from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, LSTM, GRU, SimpleRNN
  38. from keras.utils import to_categorical
  39. import traceback
  40. import csv
  41. import warnings
  42. from collections import defaultdict
  43. from sklearn.semi_supervised import SelfTrainingClassifier
  44.  
  45. # Custom imports
  46. # from pgmpy.models import BayesianNetwork
  47.  # For Bayesian Networks
  48. # import geneticalgorithm as ga
  49.  # For Genetic Algorithm-based Classifier (hypothetical)
  50. # Add fuzzy logic and CRF imports if you have specific packages (e.g., `python-crfsuite` for CRF)
  51.  
  52. warnings.filterwarnings("ignore")  # Suppress warnings for cleaner output
  53.  
  54. # Initialize dataset parameters
  55. k_fold = 5  # Change as needed
  56. dataset_percent = 100  # Change as needed
  57.  
  58. # Initialize CSV file and columns
  59. # output_file = 'results.csv'
  60. output_file = input_file.replace('.csv', '_results.csv')
  61. class_metrics_file = input_file.replace('.csv', '_class_results.csv')
  62. csv_columns = ['Algorithm', 'Fold', 'Train Time (s)', 'Test Time (s)', 'Accuracy', 'Precision', 'Recall', 'F1 Score',
  63.                'Fbeta Score', 'Matthews Correlation Coefficient', 'Jaccard Score', 'Cohen Kappa Score',
  64.                'Hamming Loss', 'Zero One Loss', 'Mean Absolute Error', 'Mean Squared Error', 'Root Mean Squared Error',
  65.                'Balanced Accuracy', 'R2 Score']
  66.  
  67. # Initialize per-class metrics CSV
  68. class_metrics_columns = ['Algorithm', 'Fold', 'Class', 'Accuracy', 'Precision', 'Recall', 'F1 Score',
  69.                'Fbeta Score', 'Matthews Correlation Coefficient', 'Jaccard Score', 'Cohen Kappa Score',
  70.                'Hamming Loss', 'Zero One Loss', 'Mean Absolute Error', 'Mean Squared Error', 'Root Mean Squared Error',
  71.                'Balanced Accuracy', 'R2 Score']
  72.  
  73. def compute_classwise_metrics(y_true, y_pred):
  74.     class_metrics = defaultdict(dict)
  75.     classes = np.unique(y_true)
  76.    
  77.     for class_index in classes:
  78.         true_class_name = class_names[class_index]
  79.         y_true_class = (y_true == class_index).astype(int)
  80.         y_pred_class = (y_pred == class_index).astype(int)
  81.        
  82.         # Calculate metrics for each true class name with rounding to 3 decimal places
  83.         class_metrics[true_class_name]['Accuracy'] = round(accuracy_score(y_true_class, y_pred_class), 3)
  84.         class_metrics[true_class_name]['Precision'] = round(precision_score(y_true, y_pred, labels=[class_index], average='weighted', zero_division=1), 3)
  85.         class_metrics[true_class_name]['Recall'] = round(recall_score(y_true, y_pred, labels=[class_index], average='weighted', zero_division=1), 3)
  86.         class_metrics[true_class_name]['F1 Score'] = round(f1_score(y_true, y_pred, labels=[class_index], average='weighted', zero_division=1), 3)
  87.         class_metrics[true_class_name]['Fbeta Score'] = round(fbeta_score(y_true, y_pred, labels=[class_index], beta=1.0, average='weighted', zero_division=1), 3)
  88.         class_metrics[true_class_name]['Matthews Correlation Coefficient'] = round(matthews_corrcoef(y_true_class, y_pred_class), 3)
  89.         class_metrics[true_class_name]['Jaccard Score'] = round(jaccard_score(y_true, y_pred, labels=[class_index], average='weighted', zero_division=1), 3)
  90.         class_metrics[true_class_name]['Cohen Kappa Score'] = round(cohen_kappa_score(y_true_class, y_pred_class), 3)
  91.         class_metrics[true_class_name]['Hamming Loss'] = round(hamming_loss(y_true_class, y_pred_class), 3)
  92.         class_metrics[true_class_name]['Zero One Loss'] = round(zero_one_loss(y_true_class, y_pred_class), 3)
  93.         class_metrics[true_class_name]['Mean Absolute Error'] = round(mean_absolute_error(y_true_class, y_pred_class), 3)
  94.         class_metrics[true_class_name]['Mean Squared Error'] = round(mean_squared_error(y_true_class, y_pred_class), 3)
  95.         class_metrics[true_class_name]['Root Mean Squared Error'] = round(np.sqrt(class_metrics[true_class_name]['Mean Squared Error']), 3)
  96.         class_metrics[true_class_name]['Balanced Accuracy'] = round(balanced_accuracy_score(y_true_class, y_pred_class), 3)
  97.         class_metrics[true_class_name]['R2 Score'] = round(r2_score(y_true_class, y_pred_class), 3)
  98.  
  99.     return class_metrics                              
  100.  
  101. # Function to handle metric calculation
  102. def compute_metrics(y_true, y_pred):
  103.     metrics = {}
  104.     metrics['Accuracy'] = accuracy_score(y_true, y_pred)
  105.     metrics['Precision'] = precision_score(y_true, y_pred, average='weighted', zero_division=1)
  106.     metrics['Recall'] = recall_score(y_true, y_pred, average='weighted', zero_division=1)
  107.     metrics['F1 Score'] = f1_score(y_true, y_pred, average='weighted', zero_division=1)
  108.     metrics['Fbeta Score'] = fbeta_score(y_true, y_pred, beta=1.0, average='weighted', zero_division=1)
  109.     metrics['Matthews Correlation Coefficient'] = matthews_corrcoef(y_true, y_pred)
  110.     metrics['Jaccard Score'] = jaccard_score(y_true, y_pred, average='weighted', zero_division=1)
  111.     metrics['Cohen Kappa Score'] = cohen_kappa_score(y_true, y_pred)
  112.     metrics['Hamming Loss'] = hamming_loss(y_true, y_pred)
  113.     metrics['Zero One Loss'] = zero_one_loss(y_true, y_pred)
  114.     metrics['Mean Absolute Error'] = mean_absolute_error(y_true, y_pred)
  115.     metrics['Mean Squared Error'] = mean_squared_error(y_true, y_pred)
  116.     metrics['Root Mean Squared Error'] = np.sqrt(metrics['Mean Squared Error'])
  117.     metrics['Balanced Accuracy'] = balanced_accuracy_score(y_true, y_pred)
  118.     metrics['R2 Score'] = r2_score(y_true, y_pred)
  119.    
  120.     return metrics
  121.  
  122. # Function to handle each algorithm execution
  123. def run_algorithm(algo_name, model, X_train, y_train, X_test, y_test, fold):
  124.     """Run a single algorithm and log its results"""
  125.     try:
  126.         start_train = time.time()
  127.         model.fit(X_train, y_train)
  128.         train_time = time.time() - start_train
  129.  
  130.         start_test = time.time()
  131.         y_pred = model.predict(X_test)
  132.         test_time = time.time() - start_test
  133.  
  134.         # Compute metrics
  135.         if algo_name == 'ElasticNet':  # Handle ElasticNet as a regression model
  136.             metrics = {}
  137.             metrics['Mean Absolute Error'] = mean_absolute_error(y_test, y_pred)
  138.             metrics['Mean Squared Error'] = mean_squared_error(y_test, y_pred)
  139.             metrics['Root Mean Squared Error'] = np.sqrt(metrics['Mean Squared Error'])
  140.             metrics['R2 Score'] = r2_score(y_test, y_pred)
  141.         else:
  142.             # Compute classification metrics
  143.             metrics = compute_metrics(y_test, y_pred)
  144.            
  145.         metrics.update({'Train Time (s)': train_time, 'Test Time (s)': test_time})
  146.        
  147.         # Compute class-wise metrics - Add this line
  148.         class_metrics = compute_classwise_metrics(y_test, y_pred)
  149.        
  150.         # Log results to CSV
  151.         with open(output_file, 'a', newline='') as f:
  152.             writer = csv.writer(f)
  153.             writer.writerow([algo_name, fold] + [metrics.get(m, -1) for m in csv_columns[2:]])
  154.  
  155.         # Write per-class metrics to `class_metrics.csv`
  156.         with open(class_metrics_file, 'a', newline='') as f:
  157.             writer = csv.writer(f)
  158.             for class_label, cm in class_metrics.items():
  159.                 writer.writerow([algo_name, fold, class_label] + [cm.get(m, -1) for m in class_metrics_columns[3:]])
  160.  
  161.         print(f"{algo_name} | Fold: {fold} | Train Time: {train_time:.2f}s | Test Time: {test_time:.2f}s")
  162.        
  163.     except Exception as e:
  164.         print(f"Error in {algo_name}: {traceback.format_exc()}")
  165.         # Log error case
  166.         with open(output_file, 'a', newline='') as f:
  167.             writer = csv.writer(f)
  168.             writer.writerow([algo_name, fold] + [-1 for _ in csv_columns[2:]])
  169.  
  170. # Load dataset
  171. df = pd.read_csv(input_file)
  172. X = df.iloc[:, :-1]
  173. y = df.iloc[:, -1]
  174.  
  175. # Encode categorical features
  176. X = pd.get_dummies(X)
  177. label_encoder = LabelEncoder()
  178. y = label_encoder.fit_transform(y)
  179. class_names = label_encoder.classes_
  180.  
  181. # Min-Max scaling
  182. scaler = MinMaxScaler()
  183. X = scaler.fit_transform(X)
  184.  
  185. # Take a subset of the dataset if dataset_percent is less than 100
  186. if dataset_percent < 100:
  187.     _, X, _, y = train_test_split(X, y, test_size=dataset_percent/100, stratify=y)
  188.  
  189. # Prepare CSV header if not present
  190. if not pd.io.common.file_exists(output_file):
  191.     with open(output_file, 'w', newline='') as f:
  192.         writer = csv.writer(f)
  193.         writer.writerow(csv_columns)
  194.  
  195. if not pd.io.common.file_exists(class_metrics_file):
  196.     with open(class_metrics_file, 'w', newline='') as f:
  197.         writer = csv.writer(f)
  198.         writer.writerow(class_metrics_columns)  
  199.  
  200. # K-Fold Cross Validation
  201. kf = KFold(n_splits=k_fold, shuffle=True, random_state=42)
  202.  
  203. # Deep Learning (CNN, RNN, LSTM, GRU, Autoencoders)
  204. def create_cnn(input_shape):
  205.     model = Sequential()
  206.     model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
  207.     model.add(MaxPooling2D(pool_size=(2, 2)))
  208.     model.add(Flatten())
  209.     model.add(Dense(128, activation='relu'))
  210.     model.add(Dense(10, activation='softmax'))
  211.     model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  212.     return model
  213.  
  214. def create_rnn(input_shape):
  215.     model = Sequential()
  216.     model.add(SimpleRNN(128, input_shape=input_shape, return_sequences=True))
  217.     model.add(Flatten())
  218.     model.add(Dense(10, activation='softmax'))
  219.     model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  220.     return model
  221.  
  222. def create_lstm(input_shape):
  223.     model = Sequential()
  224.     model.add(LSTM(128, input_shape=input_shape, return_sequences=True))
  225.     model.add(Flatten())
  226.     model.add(Dense(10, activation='softmax'))
  227.     model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  228.     return model
  229.  
  230. def create_gru(input_shape):
  231.     model = Sequential()
  232.     model.add(GRU(128, input_shape=input_shape, return_sequences=True))
  233.     model.add(Flatten())
  234.     model.add(Dense(10, activation='softmax'))
  235.     model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  236.     return model
  237.  
  238. def create_autoencoder(input_shape):
  239.     model = Sequential()
  240.     model.add(Dense(128, input_shape=input_shape, activation='relu'))
  241.     model.add(Dense(input_shape[0], activation='sigmoid'))
  242.     model.compile(optimizer='adam', loss='mse')
  243.     return model
  244.  
  245. # List of algorithms
  246. algorithms = {
  247.  
  248.  
  249.     'Naive Bayes': GaussianNB(),
  250.     'LDA': LinearDiscriminantAnalysis(),
  251.     'QDA': QuadraticDiscriminantAnalysis(),
  252. #    'SVM': SVC(kernel='linear', max_iter=1000),
  253.     'Decision Tree': DecisionTreeClassifier(),
  254. #    'SGD Classifier': SGDClassifier(),
  255.     'KNN': KNeighborsClassifier(),
  256. #    'ElasticNet': ElasticNet(),
  257. #    'Perceptron': Perceptron(),
  258.     'Logistic Regression': LogisticRegression(),
  259.     'Bagging': BaggingClassifier(),
  260.     'K-Means': KMeans(n_clusters=3),
  261.     'Nearest Centroid Classifier': NearestCentroid(),
  262.     'XGBoost': XGBClassifier(),
  263.     'AdaBoost': AdaBoostClassifier(),
  264.     ########'RNN': create_rnn((28, 28)),
  265.     'RBM + Logistic Regression': Pipeline(steps=[('rbm', BernoulliRBM(n_components=100, learning_rate=0.06, n_iter=10, random_state=42)),('logistic', LogisticRegression())]),
  266.     'Voting Classifier': VotingClassifier(estimators=[('lr', LogisticRegression()),('rf', RandomForestClassifier()),('gnb', GaussianNB())], voting='hard'),
  267.     'Random Forest': RandomForestClassifier(n_estimators=10),
  268. #    'Gradient Boosting': GradientBoostingClassifier(n_estimators=10),
  269.     'Stacking Classifier': StackingClassifier(estimators=[('log_reg', LogisticRegression()),('knn', KNeighborsClassifier(n_neighbors=3))],final_estimator=LogisticRegression(),n_jobs=-1),
  270.     # 'MLP Classifier': MLPClassifier(),
  271.     ######### 'GRU': create_gru((28, 28)),
  272.     ######### 'LSTM': create_lstm((28, 28)),
  273.     ######### 'CNN': create_cnn((28, 28, 1)),
  274.     ######### 'Autoencoder': create_autoencoder((28,)),
  275.     'LightGBM': LGBMClassifier(),
  276.     'CatBoost': CatBoostClassifier(),
  277.     'Self-Training': SelfTrainingClassifier(LogisticRegression()),
  278.     'Isolation Forest': IsolationForest(),
  279.     # 'One-Class SVM': OneClassSVM(kernel='linear', max_iter=1000)
  280.     # 'Deep Belief Network': "Implement DBN",  # Placeholder for DBN
  281.     # 'Restricted Boltzmann Machine': "Implement RBM",  # Placeholder for RBM
  282.     # 'Genetic Algorithm': ga.GeneticAlgorithm(),  # Placeholder for Genetic Algorithm-based
  283.     # 'Bayesian Network': BayesianNetwork([('A', 'B'), ('B', 'C')]),  # Example Bayesian Network
  284.     # 'Fuzzy Logic': "Implement Fuzzy Logic",  # Placeholder for Fuzzy Logic systems
  285.     # 'Conditional Random Field (CRF)': "Implement CRF",  # Placeholder for CRF
  286. }
  287.  
  288. # Running algorithms in k-fold
  289. for fold, (train_idx, test_idx) in enumerate(kf.split(X), 1):
  290.     X_train, X_test = X[train_idx], X[test_idx]
  291.     y_train, y_test = y[train_idx], y[test_idx]
  292.  
  293.     for algo_name, model in algorithms.items():
  294.         # if 'CNN' in algo_name or 'RNN' in algo_name or 'LSTM' in algo_name or 'GRU' in algo_name or 'Autoencoder' in algo_name:
  295.         #     # Special handling for deep learning models
  296.         #     X_train_dl = X_train.reshape(-1, 28, 28, 1)
  297.         #     X_test_dl = X_test.reshape(-1, 28, 28, 1)
  298.         #     y_train_dl = to_categorical(y_train, num_classes=10)
  299.         #     y_test_dl = to_categorical(y_test, num_classes=10)
  300.         #     run_algorithm(algo_name, model, X_train_dl, y_train_dl, X_test_dl, y_test_dl, fold)
  301.         # else:
  302.         run_algorithm(algo_name, model, X_train, y_train, X_test, y_test, fold)
  303.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement