Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- """CIC_FS_GAN_class.ipynb
- Automatically generated by Colab.
- Original file is located at
- https://colab.research.google.com/drive/1g7dDDwvbq0wtXf4dxuqgLDesUbUMnudi
- """
- from datetime import datetime
- start_time = datetime.now()
- # from google.colab import drive
- # drive.mount('/content/drive')
- import pandas as pd
- import numpy as np
- data =pd.read_csv('CICIoT2023_part-00094_full_data.csv')
- data.head()
- data.shape
- data = data.dropna()
- data.shape
- features = list(data.columns[:-1])
- print(features)
- from sklearn.preprocessing import LabelEncoder, MinMaxScaler
- from sklearn.compose import ColumnTransformer
- from sklearn.pipeline import Pipeline
- # Identify columns with object (string) data type
- categorical_cols = data.select_dtypes(include=['object']).columns
- # Create a LabelEncoder for each categorical column
- label_encoders = {}
- for col in categorical_cols:
- le = LabelEncoder()
- data[col] = le.fit_transform(data[col])
- label_encoders[col] = le
- # Identify columns to be scaled (excluding categorical columns)
- numerical_cols = data.columns.difference(categorical_cols)
- # Create a MinMaxScaler
- scaler = MinMaxScaler()
- # Apply scaling to numerical columns
- scaled_data = scaler.fit_transform(data[numerical_cols])
- # Combine the scaled numerical data and the encoded categorical data
- scaled_df = pd.DataFrame(scaled_data, columns=numerical_cols, index=data.index)
- encoded_df = data[categorical_cols]
- processed_data = pd.concat([scaled_df, encoded_df], axis=1)
- features = list(processed_data.columns[:-1])
- X = processed_data[features].values
- y = data.iloc[:, -1].values
- processed_data.to_csv('cic-2023_pre-processed_wed_cleaned_output.csv', index=False)
- Data_fraction = 0.1
- MAX_ITER = 10
- df = processed_data.sample(frac=Data_fraction, random_state=42)
- df.to_csv('cic-2023_pre-processed_wed_cleaned_output_1pc.csv', index=False)
- X = df[features].values
- y = df.iloc[:, -1].values
- import numpy as np
- import random
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import numpy as np
- from scipy.optimize import differential_evolution
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- from joblib import Parallel, delayed
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import csv
- import time
- import numpy as np
- import random
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- """**Cuckoo Search**"""
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- class CuckooSearch:
- def __init__(self, X, y, pop_size=20, max_iter=10, pa=0.25, Lambda=1.5):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.pa = pa
- self.Lambda = Lambda
- self.num_features = X.shape[1]
- self.population = self.generate_initial_population()
- self.fitness_values = np.zeros(pop_size)
- self.features = None # Can be set if feature names are available
- def levy_flight(self):
- u = np.random.normal(0, 1, size=1)
- v = np.random.normal(0, 1, size=1)
- step = u / np.power(np.abs(v), 1 / self.Lambda)
- return step
- def fitness_function(self, X_train, X_test, y_train, y_test, solution):
- selected_features = np.where(solution == 1)[0]
- selected_features = selected_features[selected_features < X_train.shape[1]]
- if len(selected_features) == 0:
- return 0
- model = KNeighborsClassifier()
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- return accuracy_score(y_test, y_pred)
- def generate_initial_population(self):
- return np.random.randint(2, size=(self.pop_size, self.num_features))
- def get_best_solution(self):
- best_index = np.argmax(self.fitness_values)
- return self.population[best_index], self.fitness_values[best_index]
- def search(self):
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- for i in range(self.pop_size):
- self.fitness_values[i] = self.fitness_function(X_train, X_test, y_train, y_test, self.population[i])
- best_solution, best_fitness = self.get_best_solution()
- for iteration in range(self.max_iter):
- new_population = self.population.copy()
- for i in range(self.pop_size):
- cuckoo = self.population[i] + self.levy_flight()
- cuckoo = np.clip(cuckoo, 0, 1) > np.random.random(self.num_features)
- fitness_cuckoo = self.fitness_function(X_train, X_test, y_train, y_test, cuckoo)
- if fitness_cuckoo > self.fitness_values[i]:
- new_population[i] = cuckoo
- self.fitness_values[i] = fitness_cuckoo
- abandon_indices = np.random.rand(self.pop_size) < self.pa
- new_population[abandon_indices] = self.generate_initial_population()[abandon_indices]
- for i in np.where(abandon_indices)[0]:
- self.fitness_values[i] = self.fitness_function(X_train, X_test, y_train, y_test, new_population[i])
- self.population = new_population
- best_solution, best_fitness = self.get_best_solution()
- print(f"Iteration {iteration + 1}, Best Fitness: {best_fitness}")
- selected_indices = np.where(best_solution == 1)[0]
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- """**Evolutionary Programming**"""
- import numpy as np
- import time
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- class EvolutionaryProgramming:
- def __init__(self, X, y, pop_size=20, max_iter=10, mutation_rate=0.1):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.mutation_rate = mutation_rate
- self.generation_counter = 0
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Return worst fitness if no features are selected
- # Train and test classifier with the selected features
- model = KNeighborsClassifier(n_neighbors=3) # Simplified classifier
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- # Minimize the negative accuracy
- return -accuracy_score(y_test, y_pred)
- def mutate(self, solution):
- # Flip bits based on the mutation rate
- mutation = np.random.rand(len(solution)) < self.mutation_rate
- return np.where(mutation, 1 - solution, solution)
- def search(self):
- # Split the data into training and test sets
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- # Initialize the population (random binary solutions)
- population = np.random.randint(0, 2, size=(self.pop_size, self.X.shape[1]))
- # Start time to monitor the timing of each generation
- start_time = time.time()
- # Evolutionary Programming Loop
- for generation in range(self.max_iter):
- # Evaluate fitness of the current population
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- # Track progress
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- # Select parents (top 50% individuals)
- num_parents = self.pop_size // 2
- sorted_indices = np.argsort(fitness_scores)
- parents = population[sorted_indices[:num_parents]]
- # Mutate offspring
- offspring = np.array([self.mutate(parents[np.random.randint(num_parents)]) for _ in range(self.pop_size)])
- # Combine parents and offspring to form the new population
- population = np.vstack((parents, offspring))
- # End time
- end_time = time.time()
- total_time = end_time - start_time
- print(f"Total time taken for optimization: {total_time:.2f} seconds")
- # Return the best solution found
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- """**Firefly Optimization**"""
- import numpy as np
- import time
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- class Firefly:
- def __init__(self, X, y, pop_size=20, max_iter=10, alpha=0.2, beta_min=0.2, gamma=1.0):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.alpha = alpha
- self.beta_min = beta_min
- self.gamma = gamma
- self.generation_counter = 0
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Return worst fitness if no features are selected
- # Train and test classifier with the selected features
- model = KNeighborsClassifier(n_neighbors=3) # Simplified classifier
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- # Minimize the negative accuracy
- return -accuracy_score(y_test, y_pred)
- def move_firefly(self, firefly_i, firefly_j, beta):
- random_factor = self.alpha * (np.random.rand(len(firefly_i)) - 0.5)
- new_position = firefly_i + beta * (firefly_j - firefly_i) + random_factor
- return np.clip(new_position, 0, 1) # Ensure solution is in [0, 1]
- def search(self):
- # Split the data into training and test sets
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- # Initialize the population (random binary solutions)
- population = np.random.rand(self.pop_size, self.X.shape[1])
- # Compute initial fitness for the population
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- # Start time to monitor the timing of each generation
- start_time = time.time()
- # Firefly Optimization Loop
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- for j in range(self.pop_size):
- if fitness_scores[j] < fitness_scores[i]: # Firefly j is more attractive
- beta = self.beta_min * np.exp(-self.gamma * np.linalg.norm(population[i] - population[j]) ** 2)
- population[i] = self.move_firefly(population[i], population[j], beta)
- # Recalculate fitness after moving
- fitness_scores[i] = self.fitness_function(population[i], X_train, X_test, y_train, y_test)
- # Track progress
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- # End time
- end_time = time.time()
- total_time = end_time - start_time
- print(f"Total time taken for optimization: {total_time:.2f} seconds")
- # Return the best solution found
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = population[best_solution_index] # The solution itself (binary array)
- # Extract the selected feature names from the best solution
- selected_indices = np.where(best_solution > 0.5)[0]
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- """**Adaptive Bacterial Foraging Optimization**"""
- import numpy as np
- import time
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- class AdaptiveBacterialForaging:
- def __init__(self, X, y, pop_size=20, max_iter=10, C=0.1, elimination_prob=0.1, reproduction_prob=0.5):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.C = C
- self.elimination_prob = elimination_prob
- self.reproduction_prob = reproduction_prob
- self.generation_counter = 0
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Return worst fitness if no features selected
- # Train and test classifier with the selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- # Minimize the negative accuracy
- return -accuracy_score(y_test, y_pred)
- def chemotaxis(self, bacteria, fitness_scores, X_train, X_test, y_train, y_test):
- for i in range(len(bacteria)):
- step = self.C * np.random.randn(bacteria.shape[1])
- new_bacteria = bacteria[i] + step
- new_bacteria = np.clip(new_bacteria, 0, 1) # Ensure solution remains in [0, 1]
- # Calculate fitness for the new solution
- new_fitness = self.fitness_function(new_bacteria, X_train, X_test, y_train, y_test)
- if new_fitness < fitness_scores[i]: # If fitness improves, update bacteria position
- bacteria[i] = new_bacteria
- fitness_scores[i] = new_fitness
- return bacteria, fitness_scores
- def reproduction(self, bacteria, fitness_scores):
- # Sort bacteria by fitness and select the better half
- sorted_indices = np.argsort(fitness_scores)
- bacteria = bacteria[sorted_indices]
- fitness_scores = fitness_scores[sorted_indices]
- # Replace the worst half by cloning the better half
- for i in range(len(bacteria) // 2):
- bacteria[-(i+1)] = bacteria[i]
- fitness_scores[-(i+1)] = fitness_scores[i]
- return bacteria, fitness_scores
- def elimination_dispersal(self, bacteria, fitness_scores, X_train, X_test, y_train, y_test):
- for i in range(len(bacteria)):
- if np.random.rand() < self.elimination_prob:
- # Replace the bacteria with a new random solution
- bacteria[i] = np.random.rand(bacteria.shape[1])
- fitness_scores[i] = self.fitness_function(bacteria[i], X_train, X_test, y_train, y_test)
- return bacteria, fitness_scores
- def search(self):
- # Split the data into training and test sets
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- # Initialize the population (random binary solutions)
- bacteria = np.random.rand(self.pop_size, self.X.shape[1])
- # Compute initial fitness for the population
- fitness_scores = np.array([self.fitness_function(bac, X_train, X_test, y_train, y_test) for bac in bacteria])
- # Start time to monitor the timing of each generation
- start_time = time.time()
- # ABFO Loop
- for generation in range(self.max_iter):
- # Chemotaxis
- bacteria, fitness_scores = self.chemotaxis(bacteria, fitness_scores, X_train, X_test, y_train, y_test)
- # Reproduction
- if np.random.rand() < self.reproduction_prob:
- bacteria, fitness_scores = self.reproduction(bacteria, fitness_scores)
- # Elimination and Dispersal
- bacteria, fitness_scores = self.elimination_dispersal(bacteria, fitness_scores, X_train, X_test, y_train, y_test)
- # Track progress
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- # End time
- end_time = time.time()
- total_time = end_time - start_time
- print(f"Total time taken for optimization: {total_time:.2f} seconds")
- # Return the names of the best selected features
- best_solution = bacteria[np.argmin(fitness_scores)]
- return [features[i] for i in np.where(best_solution > 0.5)[0]]
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class AntColony:
- def __init__(self, X, y, pop_size=20, max_iter=10, alpha=1.0, beta=1.0, decay=0.1):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.alpha = alpha
- self.beta = beta
- self.decay = decay
- self.features = X.shape[1]
- # Fitness function for feature selection
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Return worst fitness if no features selected
- # Train and test classifier with the selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- # Minimize the negative accuracy
- return -accuracy_score(y_test, y_pred)
- # Function to initialize pheromone matrix
- def initialize_pheromone_matrix(self, initial_pheromone=0.1):
- return np.ones(self.features) * initial_pheromone
- # Function to choose features based on pheromone values
- def select_features(self, pheromone):
- probabilities = pheromone ** self.alpha
- probabilities /= np.sum(probabilities)
- return np.random.rand(len(pheromone)) < probabilities
- # Function to update pheromone matrix
- def update_pheromone(self, pheromone, best_solution):
- pheromone *= (1 - self.decay) # Evaporation
- pheromone += best_solution # Reinforce pheromone on the best solution
- return pheromone
- # Ant Colony Optimization for feature selection
- def search(self):
- global generation_counter
- generation_counter = 0
- # Split the data into training and test sets
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- # Initialize pheromone matrix for all features
- pheromone = self.initialize_pheromone_matrix()
- # Start time to monitor the timing of each generation
- start_time = time.time()
- best_solution = None
- best_fitness = float('inf')
- # ACO Loop
- for generation in range(self.max_iter):
- population = np.zeros((self.pop_size, self.features))
- fitness_scores = np.zeros(self.pop_size)
- # Each ant constructs a solution
- for i in range(self.pop_size):
- # Ant selects features based on pheromone trail
- solution = self.select_features(pheromone)
- population[i] = solution
- # Calculate fitness for the constructed solution
- fitness_scores[i] = self.fitness_function(solution, X_train, X_test, y_train, y_test)
- # Update best solution if necessary
- if fitness_scores[i] < best_fitness:
- best_fitness = fitness_scores[i]
- best_solution = solution
- # Update pheromone matrix based on the best solution found in this iteration
- pheromone = self.update_pheromone(pheromone, best_solution)
- # Track progress
- print(f"Generation {generation_counter}: Best fitness = {-best_fitness}")
- generation_counter += 1
- # End time
- end_time = time.time()
- total_time = end_time - start_time
- print(f"Total time taken for optimization: {total_time:.2f} seconds")
- selected_indices = np.where(best_solution > 0.5)[0]
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- """**Artificial Bee Colony Optimization**"""
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class ArtificialBeeColony:
- def __init__(self, X, y, pop_size=20, max_iter=10, limit=5):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.limit = limit
- self.features = X.shape[1]
- # Fitness function for feature selection
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Return worst fitness if no features selected
- # Train and test classifier with the selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- # Minimize the negative accuracy
- return -accuracy_score(y_test, y_pred)
- # ABC Optimization process
- def search(self):
- global generation_counter
- generation_counter = 0
- # Split the data into training and test sets
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- # Initialize the population (random binary solutions)
- population = np.random.rand(self.pop_size, self.features)
- # Initialize fitness scores
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- # Start time to monitor the timing of each generation
- start_time = time.time()
- # ABC Optimization Loop
- for generation in range(self.max_iter):
- # Employed bees search for new solutions
- for i in range(self.pop_size):
- # Choose a random feature to change
- new_solution = population[i].copy()
- random_feature = np.random.randint(0, self.features)
- new_solution[random_feature] = 1 - new_solution[random_feature] # Flip the feature selection
- # Evaluate the new solution
- new_fitness = self.fitness_function(new_solution, X_train, X_test, y_train, y_test)
- # Greedily replace the old solution if the new one is better
- if new_fitness < fitness_scores[i]:
- population[i] = new_solution
- fitness_scores[i] = new_fitness
- # Onlooker bees select solutions based on fitness
- probabilities = 1 / (1 + fitness_scores)
- probabilities /= np.sum(probabilities) # Normalize probabilities
- for i in range(self.pop_size):
- if np.random.rand() < probabilities[i]: # Select this solution
- new_solution = population[i].copy()
- random_feature = np.random.randint(0, self.features)
- new_solution[random_feature] = 1 - new_solution[random_feature] # Flip the feature selection
- # Evaluate the new solution
- new_fitness = self.fitness_function(new_solution, X_train, X_test, y_train, y_test)
- # Greedily replace if the new one is better
- if new_fitness < fitness_scores[i]:
- population[i] = new_solution
- fitness_scores[i] = new_fitness
- # Scout bees search for new solutions if no improvement after limit iterations
- for i in range(self.pop_size):
- if fitness_scores[i] >= self.limit: # Check if it meets the limit
- population[i] = np.random.rand(self.features) # Restart solution
- fitness_scores[i] = self.fitness_function(population[i], X_train, X_test, y_train, y_test)
- # Track progress
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {generation_counter}: Best fitness = {best_fitness}")
- generation_counter += 1
- # End time
- end_time = time.time()
- total_time = end_time - start_time
- print(f"Total time taken for optimization: {total_time:.2f} seconds")
- # Return the best solution found
- best_solution = population[np.argmin(fitness_scores)]
- return [features[i] for i in np.where(best_solution > 0.5)[0]]
- """**Sine Cosine Optimization**"""
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class SineCosine:
- def __init__(self, X, y, pop_size=20, max_iter=10, a=2):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.a = a
- self.features = X.shape[1]
- # Fitness function for feature selection
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Return worst fitness if no features selected
- # Train and test classifier with the selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- # Minimize the negative accuracy
- return -accuracy_score(y_test, y_pred)
- # Sine Cosine Optimization process
- def search(self):
- global generation_counter
- generation_counter = 0
- # Split the data into training and test sets
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- # Initialize the population (random binary solutions)
- population = np.random.rand(self.pop_size, self.features)
- # Compute initial fitness for the population
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- # Start time to monitor the timing of each generation
- start_time = time.time()
- # SCO Optimization Loop
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- # Generate a new solution
- new_solution = np.zeros_like(population[i])
- for j in range(self.features):
- # Calculate the sine and cosine components for the feature
- r1 = np.random.rand()
- r2 = np.random.rand()
- new_solution[j] = (np.sin(r1 * np.pi) * population[i][j] +
- np.cos(r2 * np.pi) * (np.mean(population[:, j]) - population[i][j]))
- # Ensure the solution is in [0, 1]
- new_solution[j] = np.clip(new_solution[j], 0, 1)
- # Evaluate the new solution
- new_fitness = self.fitness_function(new_solution, X_train, X_test, y_train, y_test)
- # Greedily replace the old solution if the new one is better
- if new_fitness < fitness_scores[i]:
- population[i] = new_solution
- fitness_scores[i] = new_fitness
- # Track progress
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {generation_counter}: Best fitness = {best_fitness}")
- generation_counter += 1
- # End time
- end_time = time.time()
- total_time = end_time - start_time
- print(f"Total time taken for optimization: {total_time:.2f} seconds")
- # Return the best solution found
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = population[best_solution_index] # The solution itself (binary array)
- return [features[i] for i in np.where(best_solution > 0.5)[0]]
- """**Social Spider Optimization**"""
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class SocialSpider:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.features = X.shape[1]
- # Fitness function for feature selection
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Worst fitness if no features selected
- # Train and test classifier with the selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- return -accuracy_score(y_test, y_pred) # Minimize the negative accuracy
- # Social Spider Optimization process
- def search(self):
- global generation_counter
- generation_counter = 0
- # Split the data into training and test sets
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- # Initialize the population (random binary solutions)
- population = np.random.rand(self.pop_size, self.features)
- # Compute initial fitness for the population
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- # Start time to monitor the timing of each generation
- start_time = time.time()
- # SSO Optimization Loop
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- spider_fitness = fitness_scores[i]
- best_spider = np.argmin(fitness_scores)
- # Update position based on the best spider
- for j in range(self.features):
- r1 = np.random.rand()
- r2 = np.random.rand()
- if np.random.rand() < 0.5:
- new_value = population[i][j] + r1 * (population[best_spider][j] - population[i][j])
- else:
- new_value = population[i][j] + r2 * (np.mean(population, axis=0)[j] - population[i][j])
- # Ensure the new value is within [0, 1]
- population[i][j] = np.clip(new_value, 0, 1)
- # Evaluate the new solution
- fitness_scores[i] = self.fitness_function(population[i], X_train, X_test, y_train, y_test)
- # Track progress
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {generation_counter}: Best fitness = {best_fitness}")
- generation_counter += 1
- # End time
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- # Return the best solution found
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = population[best_solution_index] # The solution itself (binary array)
- return [features[i] for i in np.where(best_solution > 0.5)[0]]
- """**Symbiotic Organisms Search Optimization**"""
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class Symbiotic:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.features = X.shape[1]
- # Fitness function for feature selection
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Worst fitness if no features selected
- # Train and test classifier with the selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- return -accuracy_score(y_test, y_pred) # Minimize the negative accuracy
- # Symbiotic Organisms Search Optimization process
- def search(self):
- global generation_counter
- generation_counter = 0
- # Split the data into training and test sets
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- # Initialize the population (random binary solutions)
- population = np.random.rand(self.pop_size, self.features)
- # Compute initial fitness for the population
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- # Start time to monitor the timing of each generation
- start_time = time.time()
- # SOS Optimization Loop
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- selected_index = np.random.choice(self.pop_size)
- while selected_index == i:
- selected_index = np.random.choice(self.pop_size)
- # Mimic symbiotic behavior: adjust current organism towards a better neighbor
- population[i] += np.random.rand(self.features) * (population[selected_index] - population[i])
- # Clip to ensure values are within bounds
- population[i] = np.clip(population[i], 0, 1)
- # Evaluate the updated solution
- fitness_scores[i] = self.fitness_function(population[i], X_train, X_test, y_train, y_test)
- # Track progress
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {generation_counter}: Best fitness = {best_fitness}")
- generation_counter += 1
- # End time
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- # Return the best solution found
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = population[best_solution_index] # The solution itself (binary array)
- return [features[i] for i in np.where(best_solution > 0.5)[0]]
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class BacterialForaging:
- def __init__(self, X, y, pop_size=20, max_iter=10, num_steps=10, step_size=0.1):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.num_steps = num_steps
- self.step_size = step_size
- self.features = X.shape[1]
- # Fitness function for feature selection
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Worst fitness if no features selected
- # Train and test classifier with the selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- return -accuracy_score(y_test, y_pred) # Minimize the negative accuracy
- # Bacterial Foraging Optimization process
- def search(self):
- global generation_counter
- generation_counter = 0
- # Split the data into training and test sets
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- # Initialize the population (random binary solutions)
- population = np.random.rand(self.pop_size, self.features)
- # Compute initial fitness for the population
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- # Start time to monitor the timing of each generation
- start_time = time.time()
- # BFO Optimization Loop
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- for step in range(self.num_steps):
- # Randomly adjust the bacterium's position
- previous_position = population[i].copy()
- population[i] += (np.random.rand(self.features) - 0.5) * self.step_size
- # Clip to ensure values are within bounds
- population[i] = np.clip(population[i], 0, 1)
- new_fitness = self.fitness_function(population[i], X_train, X_test, y_train, y_test)
- # If new fitness is better, keep the new position; else, revert
- if new_fitness < fitness_scores[i]:
- fitness_scores[i] = new_fitness
- else:
- population[i] = previous_position # Revert to previous position
- # Track progress
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {generation_counter}: Best fitness = {best_fitness}")
- generation_counter += 1
- # End time
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- # Return the best solution found
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = population[best_solution_index] # The solution itself (binary array)
- return [features[i] for i in np.where(best_solution > 0.5)[0]]
- """**Bat Optimization**"""
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class Bat:
- def __init__(self, X, y, pop_size=20, max_iter=10, alpha=0.9, gamma=1.0):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.alpha = alpha
- self.gamma = gamma
- self.features = X.shape[1]
- # Fitness function for feature selection
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Worst fitness if no features selected
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- return -accuracy_score(y_test, y_pred) # Minimize the negative accuracy
- # Bat Optimization process
- def search(self):
- global generation_counter
- generation_counter = 0
- # Split the data into training and test sets
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- # Initialize population and velocities
- population = np.random.rand(self.pop_size, self.features)
- velocities = np.zeros_like(population)
- # Compute initial fitness
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- # Start time to monitor the timing of each generation
- start_time = time.time()
- # Bat Optimization Loop
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- r = np.random.rand()
- if r > 0.5:
- # Adjust the velocity and update the solution
- velocities[i] += (population[np.random.randint(self.pop_size)] - population[i]) * np.random.rand()
- population[i] += velocities[i]
- # Ensure values are within [0, 1]
- population[i] = np.clip(population[i], 0, 1)
- # Calculate the fitness of the new solution
- fitness_scores[i] = self.fitness_function(population[i], X_train, X_test, y_train, y_test)
- # Track progress
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {generation_counter}: Best fitness = {best_fitness}")
- generation_counter += 1
- # End time
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- # Return the best solution found
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = population[best_solution_index] # The solution itself (binary array)
- return [features[i] for i in np.where(best_solution > 0.5)[0]]
- """**Big Bang Big Crunch**"""
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class BigBangBigCrunch:
- def __init__(self, X, y, pop_size=20, max_iter=10, explosion_rate=0.3):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.explosion_rate = explosion_rate
- self.features = X.shape[1]
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Worst fitness if no features are selected
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- return -accuracy_score(y_test, y_pred) # Minimize the negative accuracy
- def search(self):
- global generation_counter
- generation_counter = 0
- # Split the data into training and test sets
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- # Initialize the population and compute initial fitness
- population = np.random.rand(self.pop_size, self.features)
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- # Start time to monitor the duration of the optimization
- start_time = time.time()
- # Big Bang-Big Crunch Optimization Loop
- for generation in range(self.max_iter):
- best_fitness = -np.min(fitness_scores)
- average_fitness = np.mean(fitness_scores)
- # Big Bang (explosion): Randomly initialize new population
- if np.random.rand() < self.explosion_rate:
- population = np.random.rand(self.pop_size, self.features)
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- # Big Crunch: Move population toward the center of mass based on fitness
- else:
- for i in range(self.pop_size):
- population[i] += (np.random.rand(self.features) - 0.5) * (average_fitness - fitness_scores[i])
- population[i] = np.clip(population[i], 0, 1)
- fitness_scores[i] = self.fitness_function(population[i], X_train, X_test, y_train, y_test)
- print(f"Generation {generation_counter}: Best fitness = {best_fitness}")
- generation_counter += 1
- # End time
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- # Find and return the best solution found
- best_solution_index = np.argmin(fitness_scores)
- best_solution = population[best_solution_index]
- return [features[i] for i in np.where(best_solution > 0.5)[0]]
- """**Biogeography-based Optimization**"""
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class Biogeography:
- def __init__(self, X, y, pop_size=20, max_iter=10, migration_rate=0.3):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.migration_rate = migration_rate
- self.generation_counter = 0
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Worst fitness if no features are selected
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- return -accuracy_score(y_test, y_pred)
- def search(self):
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- population = np.random.rand(self.pop_size, self.X.shape[1])
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- start_time = time.time()
- for generation in range(self.max_iter):
- best_fitness = -np.min(fitness_scores)
- average_fitness = np.mean(fitness_scores)
- for i in range(self.pop_size):
- if np.random.rand() < self.migration_rate:
- # Migrate features from a better solution
- donor_index = np.random.choice(np.flatnonzero(fitness_scores == np.min(fitness_scores)))
- population[i] = population[donor_index] + np.random.normal(0, 0.1, size=self.X.shape[1])
- population[i] = np.clip(population[i], 0, 1)
- fitness_scores[i] = self.fitness_function(population[i], X_train, X_test, y_train, y_test)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution and return them
- return [features[i] for i in np.where(best_solution > 0.5)[0]]
- """**Tug of War Optimization**"""
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class TugOfWar:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Assign worst fitness if no features are selected
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- return -accuracy_score(y_test, y_pred)
- def search(self):
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- population = np.random.rand(self.pop_size, self.X.shape[1])
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- if np.random.rand() < 0.5:
- # Random mutation to introduce diversity
- population[i] = np.random.rand(self.X.shape[1])
- else:
- # Update based on the best solution
- best_index = np.argmin(fitness_scores)
- population[i] = population[best_index] + np.random.normal(0, 0.1, size=self.X.shape[1])
- population[i] = np.clip(population[i], 0, 1)
- # Recalculate fitness for the updated individual
- fitness_scores[i] = self.fitness_function(population[i], X_train, X_test, y_train, y_test)
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- return [features[i] for i in np.where(best_solution > 0.5)[0]]
- """**Water Cycle Optimization**"""
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class WaterCycle:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Assign worst fitness if no features are selected
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- return -accuracy_score(y_test, y_pred)
- def search(self):
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- population = np.random.rand(self.pop_size, self.X.shape[1])
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- if np.random.rand() < 0.5:
- # Water movement - introduce randomness
- population[i] = np.random.rand(self.X.shape[1])
- else:
- # Move towards the best solution (simulating flow towards an optimal solution)
- best_index = np.argmin(fitness_scores)
- population[i] = population[best_index] + np.random.normal(0, 0.1, size=self.X.shape[1])
- population[i] = np.clip(population[i], 0, 1)
- # Update fitness for the new solution
- fitness_scores[i] = self.fitness_function(population[i], X_train, X_test, y_train, y_test)
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = population[best_solution_index] # The solution itself (binary array)
- return [features[i] for i in np.where(best_solution > 0.5)[0]]
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class WhaleOptimization:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Assign worst fitness if no features are selected
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- return -accuracy_score(y_test, y_pred)
- def search(self):
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- population = np.random.rand(self.pop_size, self.X.shape[1])
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- best_index = np.argmin(fitness_scores)
- r = np.random.rand()
- A = 2 * np.random.rand() - 1
- C = 2 * np.random.rand()
- if r < 0.5:
- population[i] = population[best_index] - A * np.abs(C * population[best_index] - population[i])
- else:
- population[i] = population[best_index] + A * np.abs(C * population[best_index] - population[i])
- # Ensure values remain in the range [0, 1]
- population[i] = np.clip(population[i], 0, 1)
- # Update fitness score for the new solution
- fitness_scores[i] = self.fitness_function(population[i], X_train, X_test, y_train, y_test)
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- return [features[i] for i in np.where(best_solution > 0.5)[0]]
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class WhaleSwarmOptimization:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Assign worst fitness if no features are selected
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- return -accuracy_score(y_test, y_pred)
- def search(self):
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- population = np.random.rand(self.pop_size, self.X.shape[1])
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- r = np.random.rand()
- if r < 0.5:
- best_index = np.argmin(fitness_scores)
- A = np.random.rand()
- population[i] = population[best_index] + A * np.abs(population[best_index] - population[i])
- else:
- worst_index = np.argmax(fitness_scores)
- A = np.random.rand()
- population[i] = population[worst_index] - A * np.abs(population[worst_index] - population[i])
- # Ensure values remain in the range [0, 1]
- population[i] = np.clip(population[i], 0, 1)
- # Update fitness score for the new solution
- fitness_scores[i] = self.fitness_function(population[i], X_train, X_test, y_train, y_test)
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- return [features[i] for i in np.where(best_solution > 0.5)[0]]
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class CatSwarmOptimizer:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.population = np.random.rand(pop_size, X.shape[1])
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- if np.random.rand() < 0.5:
- # Exploration phase
- self.population[i] = np.random.rand(self.X.shape[1])
- else:
- # Exploitation phase
- best_index = np.argmin(fitness_scores)
- self.population[i] = self.population[best_index] + np.random.normal(0, 0.1, size=self.X.shape[1])
- self.population[i] = np.clip(self.population[i], 0, 1) # Ensure values stay within [0, 1]
- # Update fitness score for the new solution
- fitness_scores[i] = self.fitness_function(self.population[i])
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class ChickenSwarmOptimizer:
- def __init__(self, X, y , pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.population = np.random.rand(pop_size, X.shape[1])
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- if np.random.rand() < 0.5:
- # Exploration phase
- self.population[i] = np.random.rand(self.X.shape[1])
- else:
- # Exploitation phase
- best_index = np.argmin(fitness_scores)
- self.population[i] = self.population[best_index] + np.random.normal(0, 0.1, size=self.X.shape[1])
- self.population[i] = np.clip(self.population[i], 0, 1) # Ensure values stay within [0, 1]
- # Update fitness score for the new solution
- fitness_scores[i] = self.fitness_function(self.population[i])
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class ClonalSelectionOptimizer:
- def __init__(self, X, y , pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.population = np.random.rand(pop_size, X.shape[1])
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- start_time = time.time()
- for generation in range(self.max_iter):
- best_index = np.argmin(fitness_scores) # Index of the best solution
- for i in range(self.pop_size):
- # Cloning and mutation
- if i == best_index:
- self.population[i] += np.random.normal(0, 0.1, size=self.X.shape[1])
- else:
- self.population[i] += np.random.rand(self.X.shape[1]) * (self.population[best_index] - self.population[i])
- self.population[i] = np.clip(self.population[i], 0, 1) # Ensure values stay within [0, 1]
- fitness_scores[i] = self.fitness_function(self.population[i])
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class CoralReefsOptimizer:
- def __init__(self, X, y , pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.population = np.random.rand(pop_size, X.shape[1])
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- start_time = time.time()
- for generation in range(self.max_iter):
- best_index = np.argmin(fitness_scores) # Index of the best solution
- for i in range(self.pop_size):
- # Update positions based on the best coral
- self.population[i] = self.population[best_index] + np.random.normal(0, 0.1, size=self.X.shape[1])
- self.population[i] = np.clip(self.population[i], 0, 1) # Ensure values stay within [0, 1]
- fitness_scores[i] = self.fitness_function(self.population[i])
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class FireworkOptimization:
- def __init__(self, X, y , pop_size=20, max_iter=10, explosion_strength=0.2):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.explosion_strength = explosion_strength
- self.generation_counter = 0
- self.population = np.random.rand(pop_size, X.shape[1])
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- # Generate sparks based on the fitness score
- num_sparks = int(self.pop_size * (1 - fitness_scores[i]))
- sparks = np.array([self.population[i] + np.random.normal(0, self.explosion_strength, size=self.X.shape[1]) for _ in range(num_sparks)])
- for spark in sparks:
- spark = np.clip(spark, 0, 1) # Ensure sparks stay within [0, 1]
- fitness = self.fitness_function(spark)
- if fitness < fitness_scores[i]:
- self.population[i] = spark
- fitness_scores[i] = fitness
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class FlowerPollination:
- def __init__(self, X, y , pop_size=20, max_iter=10, p=0.8):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.p = p
- self.generation_counter = 0
- self.population = np.random.rand(pop_size, X.shape[1])
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- if np.random.rand() < self.p:
- # Global pollination
- best_index = np.argmin(fitness_scores)
- self.population[i] += np.random.normal(0, 0.1, size=self.X.shape[1]) * (self.population[best_index] - self.population[i])
- else:
- # Local pollination
- j = np.random.randint(self.pop_size)
- self.population[i] += np.random.normal(0, 0.1, size=self.X.shape[1]) * (self.population[j] - self.population[i])
- self.population[i] = np.clip(self.population[i], 0, 1)
- fitness_scores[i] = self.fitness_function(self.population[i])
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class GravitationalSearch:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.population = np.random.rand(pop_size, X.shape[1])
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- start_time = time.time()
- for generation in range(self.max_iter):
- total_fitness = np.sum(fitness_scores)
- gravitational_force = (fitness_scores / total_fitness).reshape(-1, 1)
- for i in range(self.pop_size):
- for j in range(self.pop_size):
- if fitness_scores[j] < fitness_scores[i]: # Attractive force
- force = gravitational_force[j] / np.linalg.norm(self.population[i] - self.population[j])
- self.population[i] += force * (self.population[j] - self.population[i])
- self.population[i] = np.clip(self.population[i], 0, 1)
- fitness_scores[i] = self.fitness_function(self.population[i])
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class GrayWolfOptimization:
- def __init__(self, X, y, features, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.features = features
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.population = np.random.rand(pop_size, X.shape[1])
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- start_time = time.time()
- for generation in range(self.max_iter):
- alpha_index = np.argmin(fitness_scores)
- alpha = self.population[alpha_index]
- a = 2 - generation * (2 / self.max_iter)
- for i in range(self.pop_size):
- for j in range(3): # Update using alpha, beta, and delta wolves
- r = np.random.rand(self.X.shape[1])
- A = 2 * a * r - a
- C = 2 * np.random.rand(self.X.shape[1])
- D = np.abs(C * alpha - self.population[i])
- self.population[i] = alpha - A * D
- self.population[i] = np.clip(self.population[i], 0, 1)
- fitness_scores[i] = self.fitness_function(self.population[i])
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class GreenHeronsOptimization:
- def __init__(self, X, y, pop_size=20, max_iter=10, p=0.5):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.p = p
- self.generation_counter = 0
- self.population = np.random.rand(pop_size, X.shape[1])
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- if np.random.rand() < self.p:
- # Update by foraging behavior
- self.population[i] += np.random.normal(0, 0.1, size=self.X.shape[1]) * (np.random.rand() * (self.population.max(axis=0) - self.population[i]))
- else:
- # Update by avoiding predators
- self.population[i] += np.random.normal(0, 0.1, size=self.X.shape[1]) * (np.random.rand() * (self.population.min(axis=0) - self.population[i]))
- self.population[i] = np.clip(self.population[i], 0, 1)
- fitness_scores[i] = self.fitness_function(self.population[i])
- best_fitness = -np.min(fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class GreyWolfOptimizer:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- self.population = np.random.rand(pop_size, X.shape[1])
- self.fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- start_time = time.time()
- for generation in range(self.max_iter):
- alpha_index = np.argmin(self.fitness_scores)
- beta_index = np.argsort(self.fitness_scores)[1]
- delta_index = np.argsort(self.fitness_scores)[2]
- alpha = self.population[alpha_index]
- beta = self.population[beta_index]
- delta = self.population[delta_index]
- a = 2 - generation * (2 / self.max_iter)
- for i in range(self.pop_size):
- r1 = np.random.rand(self.X.shape[1])
- r2 = np.random.rand(self.X.shape[1])
- r3 = np.random.rand(self.X.shape[1])
- A1 = 2 * a * r1 - a
- C1 = 2 * r2
- D_alpha = np.abs(C1 * alpha - self.population[i])
- self.population[i] = alpha - A1 * D_alpha
- A2 = 2 * a * r1 - a
- C2 = 2 * r2
- D_beta = np.abs(C2 * beta - self.population[i])
- self.population[i] = beta - A2 * D_beta
- A3 = 2 * a * r1 - a
- C3 = 2 * r2
- D_delta = np.abs(C3 * delta - self.population[i])
- self.population[i] = delta - A3 * D_delta
- self.population[i] = np.clip(self.population[i], 0, 1)
- self.fitness_scores[i] = self.fitness_function(self.population[i])
- best_fitness = -np.min(self.fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(self.fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class HarmonySearch:
- def __init__(self, X, y, pop_size=20, max_iter=10, harmony_memory_size=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.harmony_memory_size = harmony_memory_size
- self.generation_counter = 0
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- self.harmony_memory = np.random.rand(harmony_memory_size, X.shape[1])
- self.fitness_scores = np.array([self.fitness_function(ind) for ind in self.harmony_memory])
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- start_time = time.time()
- for generation in range(self.max_iter):
- new_harmony = np.zeros(self.X.shape[1])
- for i in range(self.X.shape[1]):
- if np.random.rand() < 0.5: # Choose from harmony memory
- idx = np.random.randint(0, self.harmony_memory_size)
- new_harmony[i] = self.harmony_memory[idx, i]
- else: # Random choice
- new_harmony[i] = np.random.rand()
- new_harmony = np.clip(new_harmony, 0, 1)
- new_fitness = self.fitness_function(new_harmony)
- # Update harmony memory
- if new_fitness < np.max(self.fitness_scores):
- worst_index = np.argmax(self.fitness_scores)
- self.harmony_memory[worst_index] = new_harmony
- self.fitness_scores[worst_index] = new_fitness
- best_fitness = -np.min(self.fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(self.fitness_scores) # Index of the best solution
- best_solution = self.harmony_memory[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class HarrisHawk:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- self.population = np.random.rand(pop_size, X.shape[1])
- self.fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- r = np.random.rand(self.X.shape[1])
- A = np.random.rand()
- new_position = self.population[i] + A * (self.population[np.random.randint(self.pop_size)] - self.population[i]) + r
- new_position = np.clip(new_position, 0, 1)
- new_fitness = self.fitness_function(new_position)
- if new_fitness < self.fitness_scores[i]:
- self.population[i] = new_position
- self.fitness_scores[i] = new_fitness
- best_fitness = -np.min(self.fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(self.fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class HenryGasSolubility:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- self.population = np.random.rand(pop_size, X.shape[1])
- self.fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- A = np.random.rand() # Random coefficient
- new_position = self.population[i] + A * (self.population[np.random.randint(self.pop_size)] - self.population[i])
- new_position = np.clip(new_position, 0, 1)
- new_fitness = self.fitness_function(new_position)
- if new_fitness < self.fitness_scores[i]:
- self.population[i] = new_position
- self.fitness_scores[i] = new_fitness
- best_fitness = -np.min(self.fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(self.fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class InvasiveWeed:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.selected_features = None
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- self.population = np.random.rand(pop_size, X.shape[1])
- self.fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- new_position = self.population[i] + np.random.normal(0, 0.1, size=self.X.shape[1])
- new_position = np.clip(new_position, 0, 1)
- new_fitness = self.fitness_function(new_position)
- if new_fitness < self.fitness_scores[i]:
- self.population[i] = new_position
- self.fitness_scores[i] = new_fitness
- best_fitness = -np.min(self.fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(self.fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features # Return the indices of selected features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class KrillHerd:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.selected_features = None
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- self.population = np.random.rand(pop_size, X.shape[1])
- self.fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- # Create a new position by perturbing the current position
- new_position = self.population[i] + np.random.normal(0, 0.1, size=self.X.shape[1])
- new_position = np.clip(new_position, 0, 1)
- new_fitness = self.fitness_function(new_position)
- # Update the position if the new fitness is better
- if new_fitness < self.fitness_scores[i]:
- self.population[i] = new_position
- self.fitness_scores[i] = new_fitness
- best_fitness = -np.min(self.fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(self.fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class MothFlame:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.selected_features = None
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- self.population = np.random.rand(pop_size, X.shape[1])
- self.fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- # Create a new position by perturbing the current position
- new_position = self.population[i] + np.random.normal(0, 0.1, size=self.X.shape[1])
- new_position = np.clip(new_position, 0, 1)
- new_fitness = self.fitness_function(new_position)
- # Update the position if the new fitness is better
- if new_fitness < self.fitness_scores[i]:
- self.population[i] = new_position
- self.fitness_scores[i] = new_fitness
- best_fitness = -np.min(self.fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(self.fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class NonDominatedSortingGeneticOptimization:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.selected_features = None
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- self.population = np.random.rand(pop_size, X.shape[1])
- self.fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- start_time = time.time()
- for generation in range(self.max_iter):
- # Genetic operations: Selection, Crossover, Mutation (basic genetic operations)
- for i in range(self.pop_size):
- # Mimic genetic operations without full implementation
- new_position = self.population[i] + np.random.normal(0, 0.1, size=self.X.shape[1])
- new_position = np.clip(new_position, 0, 1)
- new_fitness = self.fitness_function(new_position)
- # Update the population if the new fitness is better
- if new_fitness < self.fitness_scores[i]:
- self.population[i] = new_position
- self.fitness_scores[i] = new_fitness
- best_fitness = -np.min(self.fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(self.fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class NuclearReactionOptimization:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.selected_features = None
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- self.population = np.random.rand(pop_size, X.shape[1])
- self.fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- start_time = time.time()
- for generation in range(self.max_iter):
- # Nuclear reaction operations
- for i in range(self.pop_size):
- # Mimic nuclear reaction operations by adding noise to positions
- new_position = self.population[i] + np.random.normal(0, 0.1, size=self.X.shape[1])
- new_position = np.clip(new_position, 0, 1)
- new_fitness = self.fitness_function(new_position)
- # Update the population if the new fitness is better
- if new_fitness < self.fitness_scores[i]:
- self.population[i] = new_position
- self.fitness_scores[i] = new_fitness
- best_fitness = -np.min(self.fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(self.fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class ParticleSwarm:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.selected_features = None
- # Split the dataset into training and testing sets
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- # Initialize the population and velocities
- self.population = np.random.rand(pop_size, X.shape[1])
- self.velocities = np.random.rand(pop_size, X.shape[1])
- self.fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- self.personal_best = self.population.copy()
- self.personal_best_scores = self.fitness_scores.copy()
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- r1, r2 = np.random.rand(), np.random.rand()
- self.velocities[i] += r1 * (self.personal_best[i] - self.population[i]) + r2 * (self.population[np.argmin(self.fitness_scores)] - self.population[i])
- self.population[i] += self.velocities[i]
- self.population[i] = np.clip(self.population[i], 0, 1)
- # Update fitness score for the current particle
- self.fitness_scores[i] = self.fitness_function(self.population[i])
- # Update personal best if necessary
- if self.fitness_scores[i] < self.personal_best_scores[i]:
- self.personal_best[i] = self.population[i]
- self.personal_best_scores[i] = self.fitness_scores[i]
- best_fitness = -np.min(self.fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(self.fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class Pathfinder:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.selected_features = None
- # Split the dataset into training and testing sets
- self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- # Initialize the population
- self.population = np.random.rand(pop_size, X.shape[1])
- self.fitness_scores = np.array([self.fitness_function(ind) for ind in self.population])
- def fitness_function(self, solution):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Penalize solutions with no selected features
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(self.X_train[:, selected_features], self.y_train)
- y_pred = model.predict(self.X_test[:, selected_features])
- return -accuracy_score(self.y_test, y_pred)
- def search(self):
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- new_position = self.population[i] + np.random.normal(0, 0.1, size=self.X.shape[1])
- new_position = np.clip(new_position, 0, 1)
- # Evaluate new position
- new_fitness = self.fitness_function(new_position)
- if new_fitness < self.fitness_scores[i]:
- self.population[i] = new_position
- self.fitness_scores[i] = new_fitness
- best_fitness = -np.min(self.fitness_scores)
- print(f"Generation {self.generation_counter}: Best fitness = {best_fitness}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(self.fitness_scores) # Index of the best solution
- best_solution = self.population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class QueuingSearch:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.selected_features = []
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Return a high penalty for no features selected
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- return -accuracy_score(y_test, y_pred)
- def search(self):
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- population = np.random.rand(self.pop_size, self.X.shape[1])
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- new_position = population[i] + np.random.normal(0, 0.1, size=self.X.shape[1])
- new_position = np.clip(new_position, 0, 1)
- if self.fitness_function(new_position, X_train, X_test, y_train, y_test) < fitness_scores[i]:
- population[i] = new_position
- print(f"Generation {self.generation_counter}: Best fitness = {-np.min(fitness_scores)}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class PlusLMinusR:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.selected_features = []
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Return a high penalty for no features selected
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- return -accuracy_score(y_test, y_pred)
- def search(self):
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- population = np.random.rand(self.pop_size, self.X.shape[1])
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- L = np.mean(population, axis=0)
- R = np.random.rand(self.X.shape[1])
- new_position = L + R * (population[i] - L)
- new_position = np.clip(new_position, 0, 1)
- if self.fitness_function(new_position, X_train, X_test, y_train, y_test) < fitness_scores[i]:
- population[i] = new_position
- print(f"Generation {self.generation_counter}: Best fitness = {-np.min(fitness_scores)}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = population[best_solution_index] # The solution itself (binary array)
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- import time
- class Sailfish:
- def __init__(self, X, y, pop_size=20, max_iter=10):
- self.X = X
- self.y = y
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.generation_counter = 0
- self.selected_features = []
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Return a high penalty for no features selected
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- return -accuracy_score(y_test, y_pred)
- def search(self):
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- population = np.random.rand(self.pop_size, self.X.shape[1])
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- start_time = time.time()
- for generation in range(self.max_iter):
- for i in range(self.pop_size):
- new_position = population[i] + np.random.normal(0, 0.1, size=self.X.shape[1])
- new_position = np.clip(new_position, 0, 1)
- if self.fitness_function(new_position, X_train, X_test, y_train, y_test) < fitness_scores[i]:
- population[i] = new_position
- print(f"Generation {self.generation_counter}: Best fitness = {-np.min(fitness_scores)}")
- self.generation_counter += 1
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- best_solution_index = np.argmin(fitness_scores) # Index of the best solution
- best_solution = population[best_solution_index] # The solution itself (binary array)
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0] # Indices of the selected features
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import numpy as np
- import time
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.metrics import accuracy_score
- class ShuffleFrogLeaping:
- def __init__(self, X, y, pop_size=20, max_iter=10, leaping_rate=0.5):
- self.X = X # Dataset features
- self.y = y # Dataset labels
- self.pop_size = pop_size
- self.max_iter = max_iter
- self.leaping_rate = leaping_rate
- self.selected_features = None
- def fitness_function(self, solution, X_train, X_test, y_train, y_test):
- selected_features = np.where(solution > 0.5)[0]
- if len(selected_features) == 0:
- return 1 # Return a high penalty for no features selected
- model = KNeighborsClassifier(n_neighbors=3)
- model.fit(X_train[:, selected_features], y_train)
- y_pred = model.predict(X_test[:, selected_features])
- return -accuracy_score(y_test, y_pred)
- def search(self):
- # Split the dataset into training and testing sets
- X_train, X_test, y_train, y_test = train_test_split(self.X, self.y, test_size=0.3, random_state=42)
- # Initialize population with random values
- population = np.random.rand(self.pop_size, self.X.shape[1])
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- start_time = time.time()
- for iteration in range(self.max_iter):
- # Sort indices based on fitness scores (lower is better)
- sorted_indices = np.argsort(fitness_scores)
- elite_frogs = population[sorted_indices[:self.pop_size // 2]]
- for i in range(self.pop_size // 2):
- if np.random.rand() < self.leaping_rate:
- frog = elite_frogs[i]
- new_frog = frog + np.random.uniform(-0.1, 0.1, size=frog.shape)
- new_frog = np.clip(new_frog, 0, 1)
- population[sorted_indices[i]] = new_frog
- fitness_scores = np.array([self.fitness_function(ind, X_train, X_test, y_train, y_test) for ind in population])
- end_time = time.time()
- print(f"Total time taken for optimization: {end_time - start_time:.2f} seconds")
- # Find the index of the best solution
- best_solution_index = np.argmin(fitness_scores)
- best_solution = population[best_solution_index]
- # Extract the selected features from the best solution
- selected_indices = np.where(best_solution > 0.5)[0]
- selected_features = [features[i] for i in selected_indices]
- return selected_features
- import os
- import os
- import pandas as pd
- def feature_selection(X, y, full_data, MAX_ITER, output_dir):
- # Define paths for Data and Summary folders
- data_dir = os.path.join(output_dir, 'Data')
- summary_dir = os.path.join(output_dir, 'Summary')
- print("Welcome")
- # Ensure the output directories exist
- os.makedirs(data_dir, exist_ok=True)
- os.makedirs(summary_dir, exist_ok=True)
- # List of feature selection algorithms to call
- algorithms = [
- CuckooSearch, EvolutionaryProgramming, Firefly, AdaptiveBacterialForaging,
- AntColony, ArtificialBeeColony, SineCosine, SocialSpider, Symbiotic,
- BacterialForaging, Bat, BigBangBigCrunch, Biogeography, TugOfWar,
- WaterCycle, WhaleOptimization, WhaleSwarmOptimization, CatSwarmOptimizer,
- ChickenSwarmOptimizer, ClonalSelectionOptimizer, CoralReefsOptimizer,
- FireworkOptimization, FlowerPollination, GravitationalSearch,
- GrayWolfOptimization, GreenHeronsOptimization, GreyWolfOptimizer,
- HarmonySearch, HarrisHawk, HenryGasSolubility, InvasiveWeed,
- KrillHerd, MothFlame, NonDominatedSortingGeneticOptimization,
- NuclearReactionOptimization, ParticleSwarm, Pathfinder,
- QueuingSearch, PlusLMinusR, Sailfish, ShuffleFrogLeaping
- ]
- # Iterate over each algorithm
- for algorithm in algorithms:
- algorithm_name = algorithm.__name__
- # Instantiate the algorithm class
- selector = algorithm(X, y, MAX_ITER) # Assuming the class constructor takes X, y, and MAX_ITER
- # Call the search method to get selected features
- print(algorithm_name)
- selected_features = selector.search()
- # Generate summary CSV: Algorithm name, number of selected features, and feature list
- num_selected_features = len(selected_features)
- summary_data = {
- "Algorithm": [algorithm_name],
- "Number of Selected Features": [num_selected_features],
- "Selected Features": [", ".join(map(str, selected_features))] # Ensure features are converted to string
- }
- summary_df = pd.DataFrame(summary_data)
- summary_file_path = os.path.join(summary_dir, f"{algorithm_name}_summary.csv")
- summary_df.to_csv(summary_file_path, index=False)
- # Generate selected data CSV: Full data with selected features and label
- selected_data_df = full_data[selected_features + ['label']].copy()
- selected_data_file_path = os.path.join(data_dir, f"{algorithm_name}_selected_data.csv")
- selected_data_df.to_csv(selected_data_file_path, index=False)
- output_dir=''
- X.shape
- y.shape
- data.shape
- feature_selection(X, y, data, MAX_ITER,output_dir)
- """# GAN"""
- import numpy as np
- import pandas as pd
- import os
- from tensorflow.keras.models import Sequential, Model
- from tensorflow.keras.layers import Dense, LeakyReLU, Input, Embedding, Concatenate, Flatten
- from tensorflow.keras.optimizers import RMSprop
- # Function to create a basic GAN generator model
- def create_standard_gan_generator(input_dim, output_dim):
- model = Sequential()
- model.add(Dense(256, input_dim=input_dim))
- model.add(LeakyReLU(alpha=0.2))
- model.add(Dense(512))
- model.add(LeakyReLU(alpha=0.2))
- model.add(Dense(output_dim, activation='tanh'))
- return model
- def create_cgan_generator(latent_dim, output_dim, num_classes):
- # Define label input and embedding layer for labels
- label = Input(shape=(1,), name='label_input')
- label_embedding = Embedding(num_classes, latent_dim, input_length=1)(label) # Embed to match `latent_dim`
- label_embedding = Flatten()(label_embedding) # Flatten embedding to concatenate
- # Define noise input
- noise = Input(shape=(latent_dim,), name='noise_input')
- # Concatenate noise and label embedding
- combined_input = Concatenate()([noise, label_embedding]) # This shape is (latent_dim + latent_dim)
- # Build generator model with combined input
- x = Dense(256)(combined_input)
- x = LeakyReLU(alpha=0.2)(x)
- x = Dense(512)(x)
- x = LeakyReLU(alpha=0.2)(x)
- generator_output = Dense(output_dim, activation='tanh')(x)
- # Create the model
- model = Model([noise, label], generator_output)
- return model
- # Function to create a Wasserstein GAN (WGAN) generator model
- def create_wgan_generator(input_dim, output_dim):
- model = Sequential()
- model.add(Dense(256, input_dim=input_dim))
- model.add(LeakyReLU(alpha=0.2))
- model.add(Dense(512))
- model.add(LeakyReLU(alpha=0.2))
- model.add(Dense(output_dim, activation='tanh'))
- return model
- def generate_samples(generator, n_samples, latent_dim, gan_type, num_classes=None, cls=None):
- noise = np.random.normal(0, 1, (n_samples, latent_dim))
- if gan_type == "cGAN" and cls is not None:
- labels = np.full((n_samples, 1), cls)
- generated_samples = generator.predict([noise, labels])
- else:
- generated_samples = generator.predict(noise)
- return generated_samples
- def generate_data_with_gans(data, output_dir, base_name, latent_dim=100, samples_per_class=1000):
- os.makedirs(output_dir, exist_ok=True)
- classes = np.unique(data['label'])
- num_features = data.shape[1] - 1
- num_classes = len(classes)
- for gan_type in ["StandardGAN", "cGAN", "WGAN"]:
- all_generated_data = []
- for cls in classes:
- if gan_type == "StandardGAN":
- generator = create_standard_gan_generator(latent_dim, num_features)
- generated_samples = generate_samples(generator, samples_per_class, latent_dim, gan_type)
- elif gan_type == "cGAN":
- generator = create_cgan_generator(latent_dim, num_features, num_classes)
- generated_samples = generate_samples(generator, samples_per_class, latent_dim, gan_type, num_classes, cls)
- elif gan_type == "WGAN":
- generator = create_wgan_generator(latent_dim, num_features)
- generated_samples = generate_samples(generator, samples_per_class, latent_dim, gan_type)
- generated_label = np.full((samples_per_class, 1), cls)
- generated_data = np.hstack((generated_samples, generated_label))
- all_generated_data.append(generated_data)
- all_generated_data = np.vstack(all_generated_data)
- df_generated = pd.DataFrame(all_generated_data, columns=[*data.columns[:-1], 'label'])
- filename = os.path.join(output_dir, f"{base_name}_{gan_type}.csv")
- df_generated.to_csv(filename, index=False)
- print(f"Data for {gan_type} generated and saved successfully as:", filename)
- import os
- import pandas as pd
- # Define the input and output directories
- input_dir = './Data'
- output_dir = './GAN'
- latent_dim = 134
- # Loop through each file in the input directory
- for filename in os.listdir(input_dir):
- if filename.endswith('_selected_data.csv'): # Process only files with the specific suffix
- file_path = os.path.join(input_dir, filename)
- # Extract the base file name (remove "_selected_data.csv")
- base_name = filename.replace('_selected_data.csv', '')
- # Read the data from the CSV file
- data = pd.read_csv(file_path)
- # Generate data with GANs for each file
- print(f"Processing file: {base_name}")
- generate_data_with_gans(data, output_dir, base_name, latent_dim=latent_dim)
- print(f"Finished processing file: {base_name}\n")
- end_time = datetime.now()
- print('Duration: {}'.format(end_time - start_time))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement