Advertisement
mayankjoin3

Fusion with count and list formatted

Apr 7th, 2025
344
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 9.86 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. from collections import Counter
  4. from datetime import datetime
  5. from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
  6. from sklearn.decomposition import PCA
  7. from sklearn.feature_selection import mutual_info_classif, RFE, SelectFromModel, chi2
  8. from sklearn.linear_model import LogisticRegression
  9. from xgboost import XGBClassifier
  10. from tensorflow.keras.models import Model
  11. from tensorflow.keras.layers import Input, Dense
  12.  
  13. #################################################_______DATA INPUT_______#######################################################
  14. DATASET_PERCENTAGE = 0.2
  15. DATA_INPUT_FILE = r"UNSW_NB15_testing-set_preprocessed_dataset_r.csv"
  16. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  17. OUTPUT_FILE_NAME = f"feature_fusion_result1_{timestamp}.csv"
  18.  
  19. # NOTE: The sum of the weights must be 1.0
  20. weights = {"CuckooSearch": 0.56, "FireworkOptimization": 0.24}
  21.  
  22. feature_selection_results = {
  23.     "CuckooSearch": ["Rate", "Srate", "Drate", "syn_flag_number", "psh_flag_number",
  24.           "ack_flag_number", "ece_flag_number", "cwr_flag_number",
  25.           "TCP", "UDP", "DHCP", "ICMP", "LLC", "AVG", "IAT", "Number"],
  26.     "FireworkOptimization": ["Rate", "Srate", "Drate", "syn_flag_number", "rst_flag_number",
  27.           "psh_flag_number", "ece_flag_number", "cwr_flag_number",
  28.           "fin_count", "urg_count", "rst_count", "IRC", "TCP", "UDP",
  29.           "ARP", "ICMP", "IAT", "Number", "Covariance"]
  30. }
  31. #################################################################################################################################
  32.  
  33. df = pd.read_csv(DATA_INPUT_FILE)
  34. df = df.sample(frac=DATASET_PERCENTAGE, random_state=42).reset_index(drop=True)
  35. fusion_results = {}
  36.  
  37. # 1: Intersection-Based Fusion
  38. try:
  39.     fusion_results["Intersection-Based Fusion"] = list(set.intersection(*map(set, feature_selection_results.values())))
  40. except Exception as e:
  41.     fusion_results["Intersection-Based Fusion"] = []
  42.     print(f"Error in Intersection-Based Fusion: {e}")
  43.  
  44. # 2: Union-Based Fusion
  45. try:
  46.     fusion_results["Union-Based Fusion"] = list(set.union(*map(set, feature_selection_results.values())))
  47. except Exception as e:
  48.     fusion_results["Union-Based Fusion"] = []
  49.     print(f"Error in Union-Based Fusion: {e}")
  50.  
  51. # 3: Majority Voting Fusion
  52. try:
  53.     feature_counts = Counter(sum(feature_selection_results.values(), []))
  54.     fusion_results["Majority Voting Fusion"] = [feat for feat, count in feature_counts.items() if count >= 2]
  55. except Exception as e:
  56.     fusion_results["Majority Voting Fusion"] = []
  57.     print(f"Error in Majority Voting Fusion: {e}")
  58.  
  59. # 4: Weighted Voting Fusion
  60. try:
  61.     feature_scores = {feat: sum(weights[algo] for algo in feature_selection_results if feat in feature_selection_results[algo])
  62.                       for feat in fusion_results["Union-Based Fusion"]}
  63.     fusion_results["Weighted Voting Fusion"] = sorted(feature_scores, key=feature_scores.get, reverse=True)[:len(fusion_results["Intersection-Based Fusion"])]
  64. except Exception as e:
  65.     fusion_results["Weighted Voting Fusion"] = []
  66.     print(f"Error in Weighted Voting Fusion: {e}")
  67.  
  68. # 5: Feature Importance-Based Fusion
  69. try:
  70.     rf = RandomForestClassifier(n_estimators=100)
  71.     rf.fit(df[fusion_results["Union-Based Fusion"]], df["label"])
  72.     fusion_results["Feature Importance-Based Fusion"] = [fusion_results["Union-Based Fusion"][i] for i in np.argsort(rf.feature_importances_)[::-1][:len(fusion_results["Intersection-Based Fusion"])]]
  73. except Exception as e:
  74.     fusion_results["Feature Importance-Based Fusion"] = []
  75.     print(f"Error in Feature Importance-Based Fusion: {e}")
  76.  
  77. # 6: Correlation-Based Fusion
  78. try:
  79.     target_corr = df[fusion_results["Union-Based Fusion"]].corrwith(df["label"]).abs()
  80.     fusion_results["Correlation-Based Fusion"] = target_corr[target_corr > 0.1].index.tolist()
  81. except Exception as e:
  82.     fusion_results["Correlation-Based Fusion"] = []
  83.     print(f"Error in Correlation-Based Fusion: {e}")
  84.  
  85.  
  86.  
  87. # 7: Mutual Information-Based Fusion
  88. try:
  89.     mi_scores = mutual_info_classif(df[fusion_results["Union-Based Fusion"]], df["label"])
  90.     fusion_results["Mutual Information-Based Fusion"] = [fusion_results["Union-Based Fusion"][i] for i in np.argsort(mi_scores)[::-1][:len(fusion_results["Intersection-Based Fusion"])]]
  91. except Exception as e:
  92.     fusion_results["Mutual Information-Based Fusion"] = []
  93.     print(f"Error in Mutual Information-Based Fusion: {e}")
  94.  
  95. # 8: RFE Fusion
  96. try:
  97.     rfe = RFE(RandomForestClassifier(n_estimators=50), n_features_to_select=len(fusion_results["Intersection-Based Fusion"]))
  98.     rfe.fit(df[fusion_results["Union-Based Fusion"]], df["label"])
  99.     fusion_results["RFE Fusion"] = [feat for feat, keep in zip(fusion_results["Union-Based Fusion"], rfe.support_) if keep]
  100. except Exception as e:
  101.     fusion_results["RFE Fusion"] = []
  102.     print(f"Error in RFE Fusion: {e}")
  103.  
  104. # 9: Wrapper-Based Fusion
  105. try:
  106.     xgb = XGBClassifier()
  107.     xgb.fit(df[fusion_results["Union-Based Fusion"]], df["label"])
  108.     wrapper_model = SelectFromModel(xgb, prefit=True)
  109.     fusion_results["Wrapper-Based Fusion"] = [feat for feat, keep in zip(fusion_results["Union-Based Fusion"], wrapper_model.get_support()) if keep]
  110. except Exception as e:
  111.     fusion_results["Wrapper-Based Fusion"] = []
  112.     print(f"Error in Wrapper-Based Fusion: {e}")
  113.  
  114. # 10: Stacked Autoencoder Fusion
  115. try:
  116.     input_layer = Input(shape=(df[fusion_results["Union-Based Fusion"]].shape[1],))
  117.     encoded = Dense(10, activation='relu')(input_layer)
  118.     decoded = Dense(df[fusion_results["Union-Based Fusion"]].shape[1], activation='sigmoid')(encoded)
  119.     autoencoder = Model(input_layer, decoded)
  120.     autoencoder.compile(optimizer='adam', loss='mse')
  121.     autoencoder.fit(df[fusion_results["Union-Based Fusion"]], df[fusion_results["Union-Based Fusion"]], epochs=10, batch_size=10, verbose=0)
  122.     encoded_model = Model(input_layer, encoded)
  123.     encoded_features = encoded_model.predict(df[fusion_results["Union-Based Fusion"]])
  124.     encoded_feature_variance = np.var(encoded_features, axis=0)
  125.     top_encoded_indices = np.argsort(encoded_feature_variance)[::-1][:min(len(fusion_results["Intersection-Based Fusion"]), len(encoded_feature_variance))]
  126.     fusion_results["Stacked Autoencoder Fusion"] = [fusion_results["Union-Based Fusion"][i % len(fusion_results["Union-Based Fusion"])] for i in top_encoded_indices]
  127. except Exception as e:
  128.     fusion_results["Stacked Autoencoder Fusion"] = []
  129.     print(f"Error in Stacked Autoencoder Fusion: {e}")
  130.  
  131. # 11: Information Gain-Based Fusion
  132. try:
  133.     info_gain_scores = mutual_info_classif(df[fusion_results["Union-Based Fusion"]], df["label"])
  134.     fusion_results["Information Gain-Based Fusion"] = [fusion_results["Union-Based Fusion"][i] for i in np.argsort(info_gain_scores)[::-1][:len(fusion_results["Intersection-Based Fusion"])]]
  135. except Exception as e:
  136.     fusion_results["Information Gain-Based Fusion"] = []
  137.     print(f"Error in Information Gain-Based Fusion: {e}")
  138.  
  139. # 12: Ensemble-Based Fusion
  140. try:
  141.     et = ExtraTreesClassifier(n_estimators=50)
  142.     et.fit(df[fusion_results["Union-Based Fusion"]], df["label"])
  143.     fusion_results["Ensemble-Based Fusion"] = [fusion_results["Union-Based Fusion"][i] for i in np.argsort(et.feature_importances_)[::-1][:len(fusion_results["Intersection-Based Fusion"])]]
  144. except Exception as e:
  145.     fusion_results["Ensemble-Based Fusion"] = []
  146.     print(f"Error in Ensemble-Based Fusion: {e}")
  147.  
  148. # 13: Chi-Square Fusion
  149. try:
  150.     chi_scores, _ = chi2(df[fusion_results["Union-Based Fusion"]], df["label"])
  151.     fusion_results["Chi-Square Fusion"] = [fusion_results["Union-Based Fusion"][i] for i in np.argsort(chi_scores)[::-1][:len(fusion_results["Intersection-Based Fusion"])]]
  152. except Exception as e:
  153.     fusion_results["Chi-Square Fusion"] = []
  154.     print(f"Error in Chi-Square Fusion: {e}")
  155.  
  156. # 14: LASSO Fusion
  157. try:
  158.     lasso = LogisticRegression(penalty='l1', solver='liblinear')
  159.     lasso.fit(df[fusion_results["Union-Based Fusion"]], df["label"])
  160.     fusion_results["LASSO Fusion"] = [feat for feat, coef in zip(fusion_results["Union-Based Fusion"], lasso.coef_[0]) if coef != 0]
  161. except Exception as e:
  162.     fusion_results["LASSO Fusion"] = []
  163.     print(f"Error in LASSO Fusion: {e}")
  164.  
  165. # Print and Save Results - Enhanced Version
  166. print("\nFeature Fusion Results:\n")
  167. for technique, features in fusion_results.items():
  168.     print(f"{technique}:")
  169.     print(f"  Features: {features}")
  170.     print(f"  Number of features: {len(features)}\n")
  171.  
  172. # # Create DataFrame with explicit column order
  173. # fusion_df = pd.DataFrame({
  174.     # "Technique": list(fusion_results.keys()),
  175.     # "Number_of_features": [len(features) for features in fusion_results.values()],
  176.     # "Selected_features": [", ".join(features) for features in fusion_results.values()]
  177. # })
  178.  
  179. import json
  180.  
  181. # Create DataFrame with proper list formatting
  182. fusion_df = pd.DataFrame({
  183.     "Technique": list(fusion_results.keys()),
  184.     "Number_of_features": [len(features) for features in fusion_results.values()],
  185.     "Selected_features": [json.dumps(features) for features in fusion_results.values()]
  186. })
  187.  
  188. # Ensure proper column order
  189. fusion_df = fusion_df[["Technique", "Number_of_features", "Selected_features"]]
  190.  
  191. # Save to CSV with verification
  192. try:
  193.     fusion_df.to_csv(OUTPUT_FILE_NAME, index=False)
  194.     print(f"Successfully saved results to {OUTPUT_FILE_NAME}")
  195.    
  196.     # Verify the saved file
  197.     try:
  198.         saved_data = pd.read_csv(OUTPUT_FILE_NAME)
  199.         print("\nVerification of saved file:")
  200.         print(saved_data.head())
  201.         print("\nColumns in saved file:", saved_data.columns.tolist())
  202.     except Exception as verify_error:
  203.         print(f"\nWarning: Could not verify output file - {verify_error}")
  204.        
  205. except Exception as save_error:
  206.     print(f"\nError saving file: {save_error}")
  207.     print("\nData that would have been saved:")
  208.     print(fusion_df)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement