CW_task4_kernel_SVM

#1. Importing Libraries:
from sklearn import svm
import numpy as np
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import make_scorer, f1_score
#The script imports necessary libraries, including scikit-learn for the SVM model, NumPy for numerical operations, and matplotlib for plotting.

#2. Loading Training Data:
data = np.load('C:/Users/print15207/MATLAB Drive/Print HVDC/Smartgrid CW/train_dataset.npy',allow_pickle=True)
#The training dataset is loaded from the specified NumPy file.

#3.Extracting Features and Labels:
x = data.item()['feature']
y = data.item()['label']
#The features (x) and labels (y) are extracted from the loaded data.

#4.Splitting the Data into Training and Validation Sets:
x1=x[:4800] #Only classify between class 0 (normal measurement) and class 1 (FDI attack measurement)
y1=y[:4800]
x_train, x_test, y_train, y_test = train_test_split(x1, y1, test_size=0.319, random_state=42)
#The data is split into training and validation sets using train_test_split from scikit-learn.
print("Training set size: ",x_train.shape)
print("Validation set size: ",x_test.shape)

#5.Model Initialization and Training:
model=svm.SVC(C=10,kernel='rbf',gamma=10,decision_function_shape='ovo')
model.fit(x_train,y_train)
#An SVM model is initialized with specified parameters and trained on the training data.

#6.Evaluating the Model:
#6.1 Accuracy
train_score = model.score(x_train,y_train)
print("Accuracy on training set: ",train_score)
test_score = model.score(x_test,y_test)
print("Accuracy on validation set: ",test_score)
#The accuracy of the model on the training and validation sets.

#6.2 TPR and FPR
test_predict = model.predict(x_test)
#The trained model is used to make predictions on the validation data.
conf_matrix = confusion_matrix(y_test, test_predict)
print("Confusion Matrix:")
print(conf_matrix)

# y_test set is the true value and test_predict set is the predicted value
TN, FP, FN, TP = conf_matrix.ravel()
#ravel() is used to flatten the confusion matrix into a 1D array.
# Calculate TPR and FPR
TPR = TP / (TP + FN)
FPR = FP / (FP + TN)

print("True Positive Rate (TPR):", TPR)
print("False Positive Rate (FPR):", FPR)

#6.3 F1 score
# Define F1 score as the evaluation metric for hyperparameter tuning
scorer = make_scorer(f1_score)
test_f1_score = f1_score(y_test, test_predict)
print("F1 score on validation set: ",test_f1_score)

#7.Loading and Predicting on test_feature Data:
data2 = np.load('C:/Users/print15207\/MATLAB Drive/Print HVDC/Smartgrid CW/test_feature.npy',allow_pickle=True)
print("test_feature size:",data2.shape)
test_predict2 = model.predict(data2)
print("Predictions on test feature:", test_predict2)

#8.Assessing Predictions on Test Data:
def assess(y_pred):
    assert np.all((y_pred==0)|(y_pred==1))
    assert len(y_pred.shape)==1
    assert y_pred.shape[0]==1200
assess(test_predict2)
#The assess function checks certain conditions about the predicted labels.

#9.Saving Predictions to a NumPy File:
np.save(f"group_8.npy", test_predict2)
#The predictions are saved to a NumPy file named "group_8.npy".

#Result with elapsed time: 2 seconds:
#Training set size:  (3268, 34)
#Validation set size:  (1532, 34)
#Accuracy on training set:  1.0
#Accuracy on test set:  0.9993472584856397
#Confusion Matrix:
[[785   0]
 [  1 746]]
#True Positive Rate (TPR): 0.998661311914324
#False Positive Rate (FPR): 0.0
#F1 score on test set:  0.999330207635633
#test_feature size: (1200, 34)
#Predictions on test feature: [0. 0. 1. ... 0. 1. 1.]