Advertisement
mayankjoin3

gwo unsw xai

Nov 19th, 2024
27
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.05 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.ensemble import RandomForestClassifier
  5. from sklearn.metrics import accuracy_score, classification_report
  6. import shap
  7. from sklearn.preprocessing import MinMaxScaler
  8. from sklearn.base import clone
  9. from scipy.stats import uniform
  10.  
  11. # Grey Wolf Optimizer Implementation
  12. class GreyWolfOptimizer:
  13.     def __init__(self, fitness_function, n_agents, n_iterations, n_features):
  14.         self.fitness_function = fitness_function
  15.         self.n_agents = n_agents
  16.         self.n_iterations = n_iterations
  17.         self.n_features = n_features
  18.         self.alpha = None
  19.         self.beta = None
  20.         self.delta = None
  21.  
  22.     def initialize_population(self):
  23.         return np.random.randint(0, 2, (self.n_agents, self.n_features))
  24.  
  25.     def optimize(self):
  26.         population = self.initialize_population()
  27.         fitness = np.array([self.fitness_function(ind) for ind in population])
  28.  
  29.         self.alpha = population[np.argmin(fitness)]
  30.         self.beta = population[np.argsort(fitness)[1]]
  31.         self.delta = population[np.argsort(fitness)[2]]
  32.  
  33.         for iteration in range(self.n_iterations):
  34.             a = 2 - iteration * (2 / self.n_iterations)  # Linearly decreasing parameter
  35.  
  36.             for i in range(self.n_agents):
  37.                 for j in range(self.n_features):
  38.                     r1, r2 = np.random.random(), np.random.random()
  39.  
  40.                     # Update positions
  41.                     A1, C1 = 2 * a * r1 - a, 2 * r2
  42.                     D_alpha = abs(C1 * self.alpha[j] - population[i, j])
  43.                     X1 = self.alpha[j] - A1 * D_alpha
  44.  
  45.                     r1, r2 = np.random.random(), np.random.random()
  46.                     A2, C2 = 2 * a * r1 - a, 2 * r2
  47.                     D_beta = abs(C2 * self.beta[j] - population[i, j])
  48.                     X2 = self.beta[j] - A2 * D_beta
  49.  
  50.                     r1, r2 = np.random.random(), np.random.random()
  51.                     A3, C3 = 2 * a * r1 - a, 2 * r2
  52.                     D_delta = abs(C3 * self.delta[j] - population[i, j])
  53.                     X3 = self.delta[j] - A3 * D_delta
  54.  
  55.                     # Final position update
  56.                     population[i, j] = np.clip((X1 + X2 + X3) / 3, 0, 1)
  57.  
  58.             # Discretize population (binary)
  59.             population = (population > 0.5).astype(int)
  60.  
  61.             # Evaluate fitness
  62.             fitness = np.array([self.fitness_function(ind) for ind in population])
  63.  
  64.             # Update alpha, beta, delta
  65.             self.alpha = population[np.argmin(fitness)]
  66.             self.beta = population[np.argsort(fitness)[1]]
  67.             self.delta = population[np.argsort(fitness)[2]]
  68.  
  69.         return self.alpha
  70.  
  71. # Fitness Function for GWO
  72. def fitness_function(features):
  73.     selected_features = np.where(features == 1)[0]
  74.     if len(selected_features) == 0:  # Avoid empty feature subset
  75.         return 1e10
  76.     X_train_sel = X_train.iloc[:, selected_features]
  77.     X_test_sel = X_test.iloc[:, selected_features]
  78.  
  79.     model = clone(clf)  # Clone the base model
  80.     model.fit(X_train_sel, y_train)
  81.     y_pred = model.predict(X_test_sel)
  82.     return 1 - accuracy_score(y_test, y_pred)  # Minimize (1 - accuracy)
  83.  
  84. # Load the UNSW-NB15 dataset
  85. file_path = "UNSW-NB15.csv"  # Replace with your actual file path
  86. data = pd.read_csv(file_path)
  87.  
  88. # Check the dataset structure
  89. print("Dataset Preview:")
  90. print(data.head())
  91.  
  92. # Preprocessing
  93. X = data.drop(columns=['label', 'id'])  # Drop target and ID columns
  94. y = data['label']
  95. X = pd.get_dummies(X, drop_first=True)  # One-hot encode categorical features
  96. scaler = MinMaxScaler()
  97. X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
  98.  
  99. # Use a 10% subset of the data
  100. X, _, y, _ = train_test_split(X, y, test_size=0.9, random_state=42, stratify=y)
  101.  
  102. # Train-test split
  103. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
  104.  
  105. # Initialize the base classifier
  106. clf = RandomForestClassifier(n_estimators=50, random_state=42)
  107.  
  108. # Apply GWO for Feature Selection
  109. gwo = GreyWolfOptimizer(fitness_function, n_agents=10, n_iterations=20, n_features=X_train.shape[1])
  110. best_features = gwo.optimize()
  111.  
  112. # Select features
  113. selected_features = np.where(best_features == 1)[0]
  114. X_train_selected = X_train.iloc[:, selected_features]
  115. X_test_selected = X_test.iloc[:, selected_features]
  116.  
  117. # Train the classifier with selected features
  118. clf.fit(X_train_selected, y_train)
  119.  
  120. # Evaluate the model
  121. y_pred = clf.predict(X_test_selected)
  122. print("\nClassification Report with Selected Features:")
  123. print(classification_report(y_test, y_pred))
  124. accuracy = accuracy_score(y_test, y_pred)
  125. print(f"Accuracy with Selected Features: {accuracy:.2f}")
  126.  
  127. # SHAP Analysis
  128. explainer = shap.TreeExplainer(clf)
  129. shap_sample = X_test_selected.sample(100, random_state=42)
  130. shap_values = explainer.shap_values(shap_sample)
  131.  
  132. # SHAP Summary Plot
  133. print("\nSHAP Summary Plot for Selected Features:")
  134. shap.summary_plot(shap_values[1], shap_sample, feature_names=X_train_selected.columns)
  135.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement