Advertisement
mirosh111000

SVM

Dec 11th, 2023
166
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.39 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. import seaborn as sns
  4. from sklearn.preprocessing import LabelEncoder
  5. from sklearn.model_selection import train_test_split
  6. from sklearn.preprocessing import StandardScaler
  7. from sklearn.metrics import accuracy_score
  8. import random
  9. import matplotlib.pyplot as plt
  10. from sklearn.preprocessing import OneHotEncoder
  11. from scipy.optimize import fsolve
  12. from sklearn.decomposition import PCA
  13. import seaborn as sns
  14. from sklearn.datasets import make_classification
  15. from sklearn.svm import LinearSVC
  16.  
  17.  
  18. class SVM:
  19.     def __init__(self, etha=0.01, alpha=0.1, epochs=1000, patience=10):
  20.         self._epochs = epochs
  21.         self._etha = etha
  22.         self._alpha = alpha
  23.         self._w = None
  24.         self.history_w = []
  25.         self.train_errors = None
  26.         self.val_errors = None
  27.         self.train_loss = None
  28.         self.val_loss = None
  29.         self.best_err = float('inf')
  30.         self.best_w = None
  31.         self.patience = patience
  32.  
  33.     def bias(self, b):
  34.         ones_column = np.ones((b.shape[0], 1))
  35.         return np.hstack((b, ones_column))
  36.  
  37.     def hinge_margin(self, w, x, y):
  38.         return max(0, 1 - y * np.dot(x, w))
  39.  
  40.     def soft_margin(self, w, x, y, alpha):
  41.         return self.hinge_margin(w, x, y) + alpha * np.dot(w, w)
  42.  
  43.     def fit(self, X_train, Y_train, X_val, Y_val):
  44.         print('\nModel is fitting:\n...')
  45.         X_train = self.bias(X_train)
  46.         X_val = self.bias(X_val)
  47.         w = np.zeros(X_train.shape[1])
  48.         Y_train = np.copy(Y_train)
  49.         new_w = [w.copy()]
  50.         train_errors = []
  51.         val_errors = []
  52.         train_loss = []
  53.         val_loss = []
  54.  
  55.         for epoch in range(self._epochs):
  56.             pom_train = 0
  57.             pom_val = 0
  58.             vtr_train = 0
  59.             vtr_val = 0
  60.            
  61.  
  62.             for i, x in enumerate(X_train):
  63.                 margin = Y_train[i] * np.dot(w, X_train[i])
  64.  
  65.                 if margin >= 1:
  66.                     w = w - self._etha * self._alpha * w / self._epochs
  67.                 else:
  68.                     w = w + self._etha * (Y_train[i] * X_train[i] - self._alpha * w / self._epochs)
  69.                     pom_train += 1
  70.                
  71.                 vtr_train += self.soft_margin(w, X_train[i], Y_train[i], self._alpha)
  72.                 new_w.append(w.copy())
  73.                
  74.  
  75.  
  76.             for i, x in enumerate(X_val):
  77.                 vtr_val += self.soft_margin(w, X_val[i], Y_val[i], self._alpha)
  78.                 pom_val += (Y_val[i] * np.dot(w, X_val[i]) < 1).astype(int)
  79.  
  80.             train_errors.append(pom_train)
  81.             val_errors.append(pom_val)
  82.             train_loss.append(vtr_train)
  83.             val_loss.append(vtr_val)
  84.  
  85.             if pom_train < 1:
  86.                 break
  87.            
  88.             if pom_train < self.best_err:
  89.                 self.best_err = pom_train
  90.                 self.best_w = np.copy(w)
  91.                 last_improvement = epoch
  92.             elif epoch - last_improvement >= self.patience:
  93.                 print(f"Early stopping at epoch {epoch}, no improvement in the last {self.patience} epochs.")
  94.                 break
  95.  
  96.         self._w = self.best_w
  97.         self.history_w = np.array(new_w)
  98.         self.train_errors = train_errors
  99.         self.val_errors = val_errors
  100.         self.train_loss = train_loss
  101.         self.val_loss = val_loss
  102.         print('Model has fitted.\n')
  103.        
  104.     def predict(self, X):
  105.         X_b = self.bias(X)
  106.         y_pred = np.sign(np.dot(X_b, self._w))
  107.         return y_pred
  108.  
  109.  
  110.    
  111. def find_best_alpha(alphas, x_train, y_train, x_val, y_val):
  112.     best_alpha = None
  113.     best_error = float('inf')
  114.  
  115.     for alpha in alphas:
  116.         model = SVM(alpha=alpha)
  117.         model.fit(x_train, y_train, x_val, y_val)
  118.         mean_val_error = np.mean(model.val_errors)
  119.        
  120.         if mean_val_error < best_error:
  121.             best_error = mean_val_error
  122.             best_alpha = alpha
  123.  
  124.     return best_alpha
  125.  
  126.  
  127.  
  128. n_features = 5
  129. X, y = make_classification(
  130.     n_samples=1001,
  131.     n_features=n_features,  
  132.     n_classes=2,    
  133.     n_clusters_per_class=1,  
  134.     class_sep=2,  
  135.     random_state=42  
  136. )
  137.  
  138. df = pd.DataFrame(data=X, columns=[f'F{i+1}' for i in range(n_features)])
  139. df['target'] = y
  140.  
  141. X = df.drop(columns=['target'])
  142. Y = df.target
  143. pca = PCA(n_components=2)
  144. X = pd.DataFrame(pca.fit_transform(X), columns=['F1', 'F2'], index=X.index)
  145.  
  146. sns.pairplot(pd.concat([X, Y], axis=1), hue='target')
  147. plt.show()
  148.  
  149. for class_ in np.unique(Y):
  150.     Y = Y.apply(lambda x: 1.0 if x == class_ else -1.0)
  151.  
  152. x_train, x_rem, y_train, y_rem = train_test_split(X, Y, test_size=0.2, random_state=42)
  153. x_test, x_val, y_test, y_val = train_test_split(x_rem, y_rem, test_size=0.5, random_state=42)
  154.  
  155.  
  156. w_svm = model_svm._w
  157. err_svm = model_svm.train_errors
  158. val_err = model_svm.val_errors
  159. tr_loss = model_svm.train_loss
  160. val_loss = model_svm.val_loss
  161. epochs_svm = range(1, len(err_svm)+1)
  162.  
  163. print("Кількість помилок на кожній епохі")
  164. print(err_svm)  
  165. print("Значення ваг")
  166. print(w_svm)
  167.  
  168. plt.figure(figsize=(8, 6))
  169. for i in np.unique(Y):
  170.     plt.scatter(X.loc[Y == i, X.columns[0]], X.loc[Y == i, X.columns[1]], label=i)
  171. x1_svm = np.array([X.iloc[:, 0].min(), X.iloc[:, 0].max()])
  172. x2_svm = np.array([(-w_svm[0] * x1_svm[0] - w_svm[2]) / w_svm[1], (-w_svm[0] * x1_svm[1] - w_svm[2]) / w_svm[1]])
  173. plt.title('Загальна вибірка')
  174. plt.plot(x1_svm, x2_svm, 'g')
  175. plt.plot(x1_svm, x2_svm+1, 'g--')
  176. plt.plot(x1_svm, x2_svm-1, 'g--')
  177. plt.ylim(X.F2.min()-1, X.F2.max()+1)
  178. plt.legend()
  179. plt.show()
  180.  
  181.  
  182. plt.figure(figsize=(10, 6))
  183. plt.plot(epochs_svm, np.array(tr_loss), label='train')
  184. plt.plot(epochs_svm, np.array(val_loss), label='validation')
  185. plt.legend()
  186. plt.grid()
  187. plt.ylabel('Втрати')
  188. plt.xlabel('Кількість епох')
  189. plt.show()
  190.    
  191.    
  192. accuracy = accuracy_score(y_train, pred_train)
  193. print(f"SVM Train Accuracy: {round(accuracy*100, 2)}%")
  194.  
  195. accuracy = accuracy_score(y_test, pred_test)
  196. print(f"SVM Test Accuracy: {round(accuracy*100, 2)}%")
  197.  
  198. accuracy = accuracy_score(y_val, pred_val)
  199. print(f"SVM Validation Accuracy: {round(accuracy*100, 2)}%")
  200.  
  201. y_wrong = y_test.loc[y_test != pred_test].index
  202. plt.title("Тестова вибірка")
  203. colors = ['blue' if y == -1 else 'orange' if y == 1 else 'black' for y in pred_test]
  204. for i in np.unique(Y):
  205.     plt.scatter(x_test.loc[(Y == i), X.columns[0]], x_test.loc[(Y == i), X.columns[1]], label=i)
  206.  
  207. plt.scatter(x_test.loc[y_wrong, X.columns[0]], x_test.loc[y_wrong, X.columns[1]], c='black', label='error')
  208. plt.plot(x1_svm, x2_svm, 'g')
  209. plt.plot(x1_svm, x2_svm+1, 'g--')
  210. plt.plot(x1_svm, x2_svm-1, 'g--')
  211. plt.ylim(X.F2.min()-1, X.F2.max()+1)
  212. plt.legend()
  213. plt.show()
  214.  
  215.  
  216. y_wrong = y_train.loc[y_train != pred_train].index
  217. plt.title("Тренувальна вибірка")
  218. colors = ['blue' if y == -1 else 'orange' if y == 1 else 'black' for y in pred_test]
  219. for i in np.unique(Y):
  220.     plt.scatter(x_train.loc[(Y == i), X.columns[0]], x_train.loc[(Y == i), X.columns[1]], label=i)
  221.  
  222. plt.scatter(x_train.loc[y_wrong, X.columns[0]], x_train.loc[y_wrong, X.columns[1]], c='black', label='error')
  223. plt.plot(x1_svm, x2_svm, 'g')
  224. plt.plot(x1_svm, x2_svm+1, 'g--')
  225. plt.plot(x1_svm, x2_svm-1, 'g--')
  226. plt.ylim(X.F2.min()-1, X.F2.max()+1)
  227. plt.legend()
  228. plt.show()
  229.  
  230.  
  231.  
  232. model = LinearSVC(max_iter=200, random_state=42)
  233. model.fit(x_train, y_train)
  234.  
  235. plt.figure(figsize=(8, 6))
  236.  
  237. for i in np.unique(Y):
  238.     plt.scatter(X.loc[Y == i, X.columns[0]], X.loc[Y == i, X.columns[1]], label=i)
  239.  
  240. x_min, x_max = X.iloc[:, 0].min() - 1, X.iloc[:, 0].max() + 1
  241. y_min, y_max = X.iloc[:, 1].min() - 1, X.iloc[:, 1].max() + 1
  242.  
  243. xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
  244. Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
  245. Z = Z.reshape(xx.shape)
  246. plt.contourf(xx, yy, Z, alpha=0.4, cmap=plt.cm.coolwarm)
  247. plt.title('Sklearn_SVM')
  248.  
  249. plt.legend()
  250. plt.show()
  251.  
  252.  
  253. pred_test = model.predict(x_test)
  254. pred_train = model.predict(x_train)
  255. pred_val = model.predict(x_val)
  256.  
  257. accuracy = accuracy_score(y_train, pred_train)
  258. print(f"SVM Train Accuracy: {round(accuracy*100, 2)}%")
  259.  
  260. accuracy = accuracy_score(y_test, pred_test)
  261. print(f"SVM Test Accuracy: {round(accuracy*100, 2)}%")
  262.  
  263. accuracy = accuracy_score(y_val, pred_val)
  264. print(f"SVM Validation Accuracy: {round(accuracy*100, 2)}%")
  265.  
  266. y_pred = model.predict(x_test)
  267. accuracy = accuracy_score(y_test, y_pred) * 100
  268.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement