Advertisement
print623

CW_task4_Gradient_Boosting_with_RandomizedSearchCV

Dec 20th, 2023 (edited)
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.86 KB | Cybersecurity | 0 0
  1. from sklearn.ensemble import GradientBoostingClassifier
  2. from sklearn.model_selection import RandomizedSearchCV, train_test_split
  3. from sklearn.metrics import make_scorer, f1_score
  4. from sklearn.metrics import accuracy_score
  5. from sklearn.metrics import confusion_matrix
  6. import numpy as np
  7.  
  8. # Assuming x_train, y_train, x_test, y_test are already defined
  9. data = np.load('C:/Users/print15207/MATLAB Drive/Print HVDC/Smartgrid CW/train_dataset.npy',allow_pickle=True)
  10. #The training dataset is loaded from the specified NumPy file.
  11.  
  12. #Extracting Features and Labels:
  13. x = data.item()['feature']
  14. y = data.item()['label']
  15. #The features (x) and labels (y) are extracted from the loaded data.
  16.  
  17. #Splitting the Data into Training and Testing Sets:
  18. x1=x[:4800] #Only classify between class 0 (normal measurement) and class 1 (FDI attack measurement)
  19. y1=y[:4800]
  20. x_train, x_test, y_train, y_test = train_test_split(x1, y1, test_size=0.313, random_state=42)
  21.  
  22. gb_model = GradientBoostingClassifier()
  23.  
  24. # Define the hyperparameter distributions to sample from
  25. param_dist = {
  26.     'n_estimators': np.arange(50, 201, 10),
  27.     'learning_rate': [0.01, 0.1, 0.2, 0.5],
  28.     'max_depth': [3, 4, 5, 6, 7, 8, 9, 10],
  29.     'min_samples_split': [2, 5, 10, 20],
  30.     'min_samples_leaf': [1, 2, 4, 8]
  31. }
  32.  
  33. # Define F1 score as the evaluation metric for hyperparameter tuning
  34. scorer = make_scorer(f1_score)
  35.  
  36. # Perform Randomized Search with Cross Validation
  37. random_search = RandomizedSearchCV(estimator=gb_model, param_distributions=param_dist, scoring=scorer, cv=5, n_iter=50, random_state=42)
  38. random_search.fit(x_train, y_train)
  39.  
  40. # Print the best hyperparameters
  41. print("Best Hyperparameters:", random_search.best_params_)
  42.  
  43. # Evaluate the model with the best hyperparameters on the test set
  44. best_model = random_search.best_estimator_
  45. test_predictions = best_model.predict(x_test)
  46. test_f1_score = f1_score(y_test, test_predictions)
  47. print("F1 Score on Test Set with Best Hyperparameters:", test_f1_score)
  48.  
  49. # Evaluate the model using TPR and FPR
  50. conf_matrix = confusion_matrix(y_test, test_predictions)
  51. print("Confusion Matrix:")
  52. print(conf_matrix)
  53. TN, FP, FN, TP = conf_matrix.ravel()
  54.  
  55. # Calculate TPR and FPR
  56. TPR = TP / (TP + FN)
  57. FPR = FP / (FP + TN)
  58.  
  59. # Print or use the metrics
  60. print("True Positive Rate (TPR):", TPR)
  61. print("False Positive Rate (FPR):", FPR)
  62.  
  63. #Result with elapsed time: 1742 seconds:
  64. #Best Hyperparameters: {'n_estimators': 160, 'min_samples_split': 5, 'min_samples_leaf': 8, 'max_depth': 8, 'learning_rate': 0.5}
  65. #F1 Score on Test Set with Best Hyperparameters: 0.9972640218878249
  66. #Confusion Matrix:
  67. [[770   0]
  68.  [  4 729]]
  69. #True Positive Rate (TPR): 0.9945429740791268
  70. #False Positive Rate (FPR): 0.0
  71.  
  72. # Now, you can use the trained model for predictions on new data.
  73. # For example, if 'new_data' is your new dataset, you can do:
  74. # new_predictions = gb_model.predict(new_data)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement