Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- from collections import Counter
- from datetime import datetime
- from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
- from sklearn.decomposition import PCA
- from sklearn.feature_selection import mutual_info_classif, RFE, SelectFromModel, chi2
- from sklearn.linear_model import LogisticRegression
- from xgboost import XGBClassifier
- from tensorflow.keras.models import Model
- from tensorflow.keras.layers import Input, Dense
- #################################################_______DATA INPUT_______#######################################################
- DATASET_PERCENTAGE = 0.2
- DATA_INPUT_FILE = r"UNSW_NB15_testing-set_preprocessed_dataset_r.csv"
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
- OUTPUT_FILE_NAME = f"feature_fusion_result1_{timestamp}.csv"
- # NOTE: The sum of the weights must be 1.0
- weights = {"CuckooSearch": 0.56, "FireworkOptimization": 0.24}
- feature_selection_results = {
- "CuckooSearch": ["Rate", "Srate", "Drate", "syn_flag_number", "psh_flag_number",
- "ack_flag_number", "ece_flag_number", "cwr_flag_number",
- "TCP", "UDP", "DHCP", "ICMP", "LLC", "AVG", "IAT", "Number"],
- "FireworkOptimization": ["Rate", "Srate", "Drate", "syn_flag_number", "rst_flag_number",
- "psh_flag_number", "ece_flag_number", "cwr_flag_number",
- "fin_count", "urg_count", "rst_count", "IRC", "TCP", "UDP",
- "ARP", "ICMP", "IAT", "Number", "Covariance"]
- }
- #################################################################################################################################
- df = pd.read_csv(DATA_INPUT_FILE)
- df = df.sample(frac=DATASET_PERCENTAGE, random_state=42).reset_index(drop=True)
- fusion_results = {}
- # 1: Intersection-Based Fusion
- try:
- fusion_results["Intersection-Based Fusion"] = list(set.intersection(*map(set, feature_selection_results.values())))
- except Exception as e:
- fusion_results["Intersection-Based Fusion"] = []
- print(f"Error in Intersection-Based Fusion: {e}")
- # 2: Union-Based Fusion
- try:
- fusion_results["Union-Based Fusion"] = list(set.union(*map(set, feature_selection_results.values())))
- except Exception as e:
- fusion_results["Union-Based Fusion"] = []
- print(f"Error in Union-Based Fusion: {e}")
- # 3: Majority Voting Fusion
- try:
- feature_counts = Counter(sum(feature_selection_results.values(), []))
- fusion_results["Majority Voting Fusion"] = [feat for feat, count in feature_counts.items() if count >= 2]
- except Exception as e:
- fusion_results["Majority Voting Fusion"] = []
- print(f"Error in Majority Voting Fusion: {e}")
- # 4: Weighted Voting Fusion
- try:
- feature_scores = {feat: sum(weights[algo] for algo in feature_selection_results if feat in feature_selection_results[algo])
- for feat in fusion_results["Union-Based Fusion"]}
- fusion_results["Weighted Voting Fusion"] = sorted(feature_scores, key=feature_scores.get, reverse=True)[:len(fusion_results["Intersection-Based Fusion"])]
- except Exception as e:
- fusion_results["Weighted Voting Fusion"] = []
- print(f"Error in Weighted Voting Fusion: {e}")
- # 5: Feature Importance-Based Fusion
- try:
- rf = RandomForestClassifier(n_estimators=100)
- rf.fit(df[fusion_results["Union-Based Fusion"]], df["label"])
- fusion_results["Feature Importance-Based Fusion"] = [fusion_results["Union-Based Fusion"][i] for i in np.argsort(rf.feature_importances_)[::-1][:len(fusion_results["Intersection-Based Fusion"])]]
- except Exception as e:
- fusion_results["Feature Importance-Based Fusion"] = []
- print(f"Error in Feature Importance-Based Fusion: {e}")
- # 6: Correlation-Based Fusion
- try:
- target_corr = df[fusion_results["Union-Based Fusion"]].corrwith(df["label"]).abs()
- fusion_results["Correlation-Based Fusion"] = target_corr[target_corr > 0.1].index.tolist()
- except Exception as e:
- fusion_results["Correlation-Based Fusion"] = []
- print(f"Error in Correlation-Based Fusion: {e}")
- # 7: Mutual Information-Based Fusion
- try:
- mi_scores = mutual_info_classif(df[fusion_results["Union-Based Fusion"]], df["label"])
- fusion_results["Mutual Information-Based Fusion"] = [fusion_results["Union-Based Fusion"][i] for i in np.argsort(mi_scores)[::-1][:len(fusion_results["Intersection-Based Fusion"])]]
- except Exception as e:
- fusion_results["Mutual Information-Based Fusion"] = []
- print(f"Error in Mutual Information-Based Fusion: {e}")
- # 8: RFE Fusion
- try:
- rfe = RFE(RandomForestClassifier(n_estimators=50), n_features_to_select=len(fusion_results["Intersection-Based Fusion"]))
- rfe.fit(df[fusion_results["Union-Based Fusion"]], df["label"])
- fusion_results["RFE Fusion"] = [feat for feat, keep in zip(fusion_results["Union-Based Fusion"], rfe.support_) if keep]
- except Exception as e:
- fusion_results["RFE Fusion"] = []
- print(f"Error in RFE Fusion: {e}")
- # 9: Wrapper-Based Fusion
- try:
- xgb = XGBClassifier()
- xgb.fit(df[fusion_results["Union-Based Fusion"]], df["label"])
- wrapper_model = SelectFromModel(xgb, prefit=True)
- fusion_results["Wrapper-Based Fusion"] = [feat for feat, keep in zip(fusion_results["Union-Based Fusion"], wrapper_model.get_support()) if keep]
- except Exception as e:
- fusion_results["Wrapper-Based Fusion"] = []
- print(f"Error in Wrapper-Based Fusion: {e}")
- # 10: Stacked Autoencoder Fusion
- try:
- input_layer = Input(shape=(df[fusion_results["Union-Based Fusion"]].shape[1],))
- encoded = Dense(10, activation='relu')(input_layer)
- decoded = Dense(df[fusion_results["Union-Based Fusion"]].shape[1], activation='sigmoid')(encoded)
- autoencoder = Model(input_layer, decoded)
- autoencoder.compile(optimizer='adam', loss='mse')
- autoencoder.fit(df[fusion_results["Union-Based Fusion"]], df[fusion_results["Union-Based Fusion"]], epochs=10, batch_size=10, verbose=0)
- encoded_model = Model(input_layer, encoded)
- encoded_features = encoded_model.predict(df[fusion_results["Union-Based Fusion"]])
- encoded_feature_variance = np.var(encoded_features, axis=0)
- top_encoded_indices = np.argsort(encoded_feature_variance)[::-1][:min(len(fusion_results["Intersection-Based Fusion"]), len(encoded_feature_variance))]
- fusion_results["Stacked Autoencoder Fusion"] = [fusion_results["Union-Based Fusion"][i % len(fusion_results["Union-Based Fusion"])] for i in top_encoded_indices]
- except Exception as e:
- fusion_results["Stacked Autoencoder Fusion"] = []
- print(f"Error in Stacked Autoencoder Fusion: {e}")
- # 11: Information Gain-Based Fusion
- try:
- info_gain_scores = mutual_info_classif(df[fusion_results["Union-Based Fusion"]], df["label"])
- fusion_results["Information Gain-Based Fusion"] = [fusion_results["Union-Based Fusion"][i] for i in np.argsort(info_gain_scores)[::-1][:len(fusion_results["Intersection-Based Fusion"])]]
- except Exception as e:
- fusion_results["Information Gain-Based Fusion"] = []
- print(f"Error in Information Gain-Based Fusion: {e}")
- # 12: Ensemble-Based Fusion
- try:
- et = ExtraTreesClassifier(n_estimators=50)
- et.fit(df[fusion_results["Union-Based Fusion"]], df["label"])
- fusion_results["Ensemble-Based Fusion"] = [fusion_results["Union-Based Fusion"][i] for i in np.argsort(et.feature_importances_)[::-1][:len(fusion_results["Intersection-Based Fusion"])]]
- except Exception as e:
- fusion_results["Ensemble-Based Fusion"] = []
- print(f"Error in Ensemble-Based Fusion: {e}")
- # 13: Chi-Square Fusion
- try:
- chi_scores, _ = chi2(df[fusion_results["Union-Based Fusion"]], df["label"])
- fusion_results["Chi-Square Fusion"] = [fusion_results["Union-Based Fusion"][i] for i in np.argsort(chi_scores)[::-1][:len(fusion_results["Intersection-Based Fusion"])]]
- except Exception as e:
- fusion_results["Chi-Square Fusion"] = []
- print(f"Error in Chi-Square Fusion: {e}")
- # 14: LASSO Fusion
- try:
- lasso = LogisticRegression(penalty='l1', solver='liblinear')
- lasso.fit(df[fusion_results["Union-Based Fusion"]], df["label"])
- fusion_results["LASSO Fusion"] = [feat for feat, coef in zip(fusion_results["Union-Based Fusion"], lasso.coef_[0]) if coef != 0]
- except Exception as e:
- fusion_results["LASSO Fusion"] = []
- print(f"Error in LASSO Fusion: {e}")
- # Print and Save Results - Enhanced Version
- print("\nFeature Fusion Results:\n")
- for technique, features in fusion_results.items():
- print(f"{technique}:")
- print(f" Features: {features}")
- print(f" Number of features: {len(features)}\n")
- # # Create DataFrame with explicit column order
- # fusion_df = pd.DataFrame({
- # "Technique": list(fusion_results.keys()),
- # "Number_of_features": [len(features) for features in fusion_results.values()],
- # "Selected_features": [", ".join(features) for features in fusion_results.values()]
- # })
- import json
- # Create DataFrame with proper list formatting
- fusion_df = pd.DataFrame({
- "Technique": list(fusion_results.keys()),
- "Number_of_features": [len(features) for features in fusion_results.values()],
- "Selected_features": [json.dumps(features) for features in fusion_results.values()]
- })
- # Ensure proper column order
- fusion_df = fusion_df[["Technique", "Number_of_features", "Selected_features"]]
- # Save to CSV with verification
- try:
- fusion_df.to_csv(OUTPUT_FILE_NAME, index=False)
- print(f"Successfully saved results to {OUTPUT_FILE_NAME}")
- # Verify the saved file
- try:
- saved_data = pd.read_csv(OUTPUT_FILE_NAME)
- print("\nVerification of saved file:")
- print(saved_data.head())
- print("\nColumns in saved file:", saved_data.columns.tolist())
- except Exception as verify_error:
- print(f"\nWarning: Could not verify output file - {verify_error}")
- except Exception as save_error:
- print(f"\nError saving file: {save_error}")
- print("\nData that would have been saved:")
- print(fusion_df)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement