CW_task4_linear_SVM

#1. Importing Libraries:
from sklearn import svm
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import make_scorer, f1_score
#The script imports necessary libraries, including scikit-learn for the SVM model, NumPy for numerical operations, and matplotlib for plotting.

#2. Loading Training Data:
data = np.load('C:/Users/print15207/MATLAB Drive/Print HVDC/Smartgrid CW/train_dataset.npy',allow_pickle=True)
#The training dataset is loaded from the specified NumPy file.

#3.Extracting Features and Labels:
x = data.item()['feature']
y = data.item()['label']
#The features (x) and labels (y) are extracted from the loaded data.

#4.Splitting the Data into Training and Testing Sets:
x1=x[:4800] #Only classify between class 0 (normal measurement) and class 1 (FDI attack measurement)
y1=y[:4800]
x_train, x_test, y_train, y_test = train_test_split(x1, y1, test_size=0.319, random_state=42)
#The data is split into training and testing sets using train_test_split from scikit-learn.
print("Training set size: ",x_train.shape)
print("Testing set size: ",x_test.shape)

#5.Model Initialization and Training:
model=svm.SVC(C=2,kernel='linear',gamma=10,decision_function_shape='ovo')
model.fit(x_train,y_train)
#An SVM model is initialized with specified parameters and trained on the training data.

#6.Evaluating the Model:
#6.1 Accuracy
train_score = model.score(x_train,y_train)
print("Accuracy on training set: ",train_score)
test_score = model.score(x_test,y_test)
print("Accuracy on test set: ",test_score)
#The accuracy of the model on the training and testing sets.

#6.2 TPR and FPR
test_predict = model.predict(x_test)
#The trained model is used to make predictions on the test data.
conf_matrix = confusion_matrix(y_test, test_predict)
print("Confusion Matrix:")
print(conf_matrix)

# y_test set is the true value and test_predict set is the predicted value
TN, FP, FN, TP = conf_matrix.ravel()
#ravel() is used to flatten the confusion matrix into a 1D array.
# Calculate TPR and FPR
TPR = TP / (TP + FN)
FPR = FP / (FP + TN)

print("True Positive Rate (TPR):", TPR)
print("False Positive Rate (FPR):", FPR)

#6.3 F1 score
# Define F1 score as the evaluation metric for hyperparameter tuning
scorer = make_scorer(f1_score)
test_f1_score = f1_score(y_test, test_predict)
print("F1 score on test set: ",test_f1_score)

#Result with elapsed time: 2 seconds:
#Training set size:  (3268, 34)
#Testing set size:  (1532, 34)
#Accuracy on training set:  0.7971236230110159
#Accuracy on test set:  0.8067885117493473
#Confusion Matrix:
[[766  19]
 [277 470]]
#True Positive Rate (TPR): 0.6291834002677377
#False Positive Rate (FPR): 0.024203821656050957
#F1 score on test set:  0.7605177993527508