CW_task4_Random Forest

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import make_scorer, f1_score

# Load the dataset
data = np.load('C:/Users/print15207/MATLAB Drive/Print HVDC/Smartgrid CW/train_dataset.npy', allow_pickle=True)

# Extract features and labels
x = data.item()['feature']
y = data.item()['label']

x1=x[:4800] #Only classify between class 0 (normal measurement) and class 1 (FDI attack measurement)
y1=y[:4800]
# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x1, y1, test_size=0.315, random_state=42)

# Print the shape of training and testing sets
print("Training set sizee:", x_train.shape)
print("Testing set size:", x_test.shape)

from sklearn.ensemble import RandomForestClassifier

# Initialize a Random Forest model
rf_model = RandomForestClassifier(n_estimators=77, random_state=42)

# Train the Random Forest model
rf_model.fit(x_train, y_train)

# Make predictions
rf_test_predict = rf_model.predict(x_test)

# Make predictions on the test set
test_predict = rf_model.predict(x_test)

# Evaluate the model
accuracy = accuracy_score(y_test, test_predict)
# y_test set is the true value and test_predict set is the predicted value
print("accuracy on test set: ", accuracy)

# Evaluate the model using TPR and FPR
conf_matrix = confusion_matrix(y_test, test_predict)
print("Confusion Matrix:")
print(conf_matrix)
TN, FP, FN, TP = conf_matrix.ravel()

# Calculate TPR and FPR
TPR = TP / (TP + FN)
FPR = FP / (FP + TN)

# Print or use the metrics
print("True Positive Rate (TPR):", TPR)
print("False Positive Rate (FPR):", FPR)

# Define F1 score as the evaluation metric for hyperparameter tuning
scorer = make_scorer(f1_score)
test_f1_score = f1_score(y_test, test_predict)
print("F1 score on test set: ",test_f1_score)

#Result with elapsed time: 5 seconds:
#Training set sizee: (3288, 34)
#Testing set size: (1512, 34)
#accuracy on test set:  0.998015873015873
#Confusion Matrix:
[[772   3]
 [  0 737]]
#True Positive Rate (TPR): 1.0
#False Positive Rate (FPR): 0.003870967741935484
#F1 score on test set:  0.997968855788761