Advertisement
mirosh111000

Курсова робота Мірошниченко

Dec 25th, 2023
133
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 42.71 KB | None | 0 0
  1. from sklearn.datasets import make_classification
  2. import numpy as np
  3. import pandas as pd
  4. import seaborn as sns
  5. from sklearn.preprocessing import LabelEncoder
  6. from sklearn.model_selection import train_test_split
  7. from sklearn.preprocessing import StandardScaler
  8. from sklearn.metrics import accuracy_score, classification_report, precision_score, f1_score, recall_score
  9. import random
  10. import matplotlib.pyplot as plt
  11. from sklearn.preprocessing import OneHotEncoder
  12. from scipy.optimize import fsolve
  13. from sklearn.decomposition import PCA
  14. import seaborn as sns
  15. from sklearn.svm import SVC
  16. from sklearn.linear_model import LogisticRegression
  17.  
  18.  
  19.  
  20.  
  21.  
  22.  
  23. def polynomial_features(X, d):
  24.     n_columns = 0
  25.     for i in range(n_columns + 1, d + 2):
  26.         n_columns += i
  27.  
  28.     x1 = X.iloc[:, 0]
  29.     x2 = X.iloc[:, 1]
  30.     features = pd.DataFrame(columns=[f'F{i + 1}' for i in range(n_columns)])
  31.     for k in range(len(X)):
  32.         feature_row = []
  33.         for i in range(d + 1):
  34.             for j in range(i + 1):
  35.                 feature_row.append((x1[k] ** (i - j)) * (x2[k] ** j))
  36.         features.loc[k] = feature_row
  37.     return features
  38.  
  39. class Logistic_Regression:
  40.     def __init__(self, epochs=1000, learning_rate=0.01, C=1.0, patience=100, L2=False):
  41.         self.epochs = epochs
  42.         self.learning_rate = learning_rate
  43.         self.C = C
  44.         self.patience = patience
  45.         self.t = []
  46.         self.n = []
  47.         self.best_w = None
  48.         self.best_err = float('inf')
  49.         self.last_improvement = 0
  50.         self.x_train = None
  51.         self.x_test = None
  52.         self.L2 = L2
  53.        
  54.        
  55.     def sigmoid(self, M):
  56.         return 1 / (1 + np.exp(-M))
  57.  
  58.     def diff(self, x, w, y):
  59.         M = y * np.dot(x, w.T)
  60.         d = (1 - self.sigmoid(M)) * y * x
  61.         return d
  62.  
  63.     def fit(self, X_train, y_train):
  64.         print('\nModel is fitting:\n...')
  65.         self.x_train = X_train.copy()
  66.         len_w = self.x_train.shape[1] + 1
  67.         lambda_ = 1 / self.C
  68.         w = np.random.rand(len_w)
  69.         ell = len(self.x_train)
  70.         self.x_train['bias'] = np.ones(ell)
  71.  
  72.         for k in range(self.epochs):
  73.             err = 0
  74.             dw = np.random.rand(len_w)
  75.             for i in range(ell):
  76.                 M = y_train.iloc[i] * np.dot(self.x_train.iloc[i], w)
  77.                 if M < 0:
  78.                     err += 1
  79.                 dw -= self.diff(self.x_train.iloc[i].values, w, y_train.iloc[i])
  80.            
  81.             if self.L2 == True:
  82.                 w_reg = np.copy(w)
  83.                 w_reg[-1] = 0
  84.                 dw -= 2 * lambda_ * w_reg
  85.             w -= self.learning_rate * dw / ell
  86.            
  87. #             print(f'epoch: {k} ; w = {w}; err = {err}\n')
  88.  
  89.             self.t.append(k + 1)
  90.             self.n.append(err)
  91.  
  92.             if err < self.best_err:
  93.                 self.best_err = err
  94.                 self.best_w = np.copy(w)
  95.                 self.last_improvement = k
  96.             elif k - self.last_improvement >= self.patience:
  97.                 print(f"Early stopping at epoch {k}, no improvement in the last {self.patience} epochs.")
  98.                 break
  99.  
  100.             if err < 1:
  101.                 break
  102.                
  103.         print('Model has fitted.\n')
  104.  
  105.         return self.best_w
  106.  
  107.     def predict(self, X_test):
  108.        
  109.         self.x_test = X_test.copy()
  110.         ell_test = len(self.x_test)
  111.         self.x_test['bias'] = np.ones(ell_test)
  112.         predictions = np.sign(np.dot(self.x_test.values, self.best_w))
  113.        
  114.         return predictions
  115.  
  116.  
  117.  
  118.  
  119.  
  120. class SVM:
  121.     def __init__(self, etha=0.01, alpha=0.1, epochs=1000, patience=25):
  122.         self._epochs = epochs
  123.         self._etha = etha
  124.         self._alpha = alpha
  125.         self._w = None
  126.         self.history_w = []
  127.         self.train_errors = None
  128. #         self.val_errors = None
  129.         self.train_loss = None
  130. #         self.val_loss = None
  131.         self.best_err = float('inf')
  132.         self.best_w = None
  133.         self.patience = patience
  134.  
  135.     def bias(self, b):
  136.         ones_column = np.ones((b.shape[0], 1))
  137.         return np.hstack((b, ones_column))
  138.  
  139.     def hinge_margin(self, w, x, y):
  140.         return max(0, 1 - y * np.dot(x, w))
  141.  
  142.     def soft_margin(self, w, x, y, alpha):
  143.         return self.hinge_margin(w, x, y) + alpha * np.dot(w, w)
  144.  
  145.     def fit(self, X_train, Y_train):
  146.         print('\nModel is fitting:\n...')
  147.         X_train = self.bias(X_train)
  148. #         X_val = self.bias(X_val)
  149.         w = np.zeros(X_train.shape[1])
  150.         Y_train = np.copy(Y_train)
  151.         new_w = [w.copy()]
  152.         train_errors = []
  153.         val_errors = []
  154.         train_loss = []
  155.         val_loss = []
  156.  
  157.         for epoch in range(self._epochs):
  158.             pom_train = 0
  159.             pom_val = 0
  160.             vtr_train = 0
  161.             vtr_val = 0
  162.            
  163.  
  164.             for i, x in enumerate(X_train):
  165.                 margin = Y_train[i] * np.dot(w, X_train[i])
  166.  
  167.                 if margin >= 1:
  168.                     w = w - self._etha * self._alpha * w / self._epochs
  169. #                     vtr_train += self.soft_margin(w, X_train[i], Y_train[i], self._alpha)
  170.                 else:
  171.                     w = w + self._etha * (Y_train[i] * X_train[i] - self._alpha * w / self._epochs)
  172.                     pom_train += 1
  173.                
  174.                 vtr_train += self.soft_margin(w, X_train[i], Y_train[i], self._alpha)
  175.                 new_w.append(w.copy())
  176.                
  177.  
  178.  
  179. #             for i, x in enumerate(X_val):
  180. #                 vtr_val += self.soft_margin(w, X_val[i], Y_val[i], self._alpha)
  181. #                 pom_val += (Y_val[i] * np.dot(w, X_val[i]) < 1).astype(int)
  182.  
  183.             train_errors.append(pom_train)
  184. #             val_errors.append(pom_val)
  185.             train_loss.append(vtr_train)
  186. #             val_loss.append(vtr_val)
  187.  
  188.             if pom_train < 1:
  189.                 break
  190.            
  191.             if pom_train < self.best_err:
  192.                 self.best_err = pom_train
  193.                 self.best_w = np.copy(w)
  194.                 last_improvement = epoch
  195.             elif epoch - last_improvement >= self.patience:
  196.                 print(f"Early stopping at epoch {epoch}, no improvement in the last {self.patience} epochs.")
  197.                 break
  198.  
  199.         self._w = self.best_w
  200.         self.history_w = np.array(new_w)
  201.         self.train_errors = train_errors
  202. #         self.val_errors = val_errors
  203.         self.train_loss = train_loss
  204. #         self.val_loss = val_loss
  205.         print('Model has fitted.\n')
  206.        
  207.     def predict(self, X):
  208.         X_b = self.bias(X)
  209.         y_pred = np.sign(np.dot(X_b, self._w))
  210.         return y_pred
  211.  
  212.  
  213.  
  214.  
  215.    
  216. print('Лінінйно-роздільна вибірка:\n')
  217.    
  218. # Створіть датасет з трьома лінійно роздільними класами
  219. X, y = make_classification(
  220.     n_samples=300,  # Загальна кількість зразків
  221.     n_features=4,    # Кількість ознак
  222.     n_classes=3,     # Кількість класів
  223.     n_clusters_per_class=1,  # Кількість кластерів для кожного класу
  224.     class_sep=2.0,   # Відстань між центрами кластерів (роздільність)
  225.     random_state=42  # Фіксація генерації випадкових чисел для відтворюваності
  226. )
  227.  
  228. # Створіть DataFrame для зручності роботи з даними
  229. df = pd.DataFrame(data=X, columns=[f'F{i+1}' for i in range(4)])
  230. df['target'] = y
  231.  
  232. X = df.drop(columns=['target'])
  233. Y = df.target
  234. pca = PCA(n_components=2)
  235. X = pd.DataFrame(pca.fit_transform(X), columns=['F1', 'F2'], index=X.index)
  236.  
  237.  
  238.  
  239. sns.pairplot(pd.concat([X, Y], axis=1), hue='target', palette='dark')
  240. plt.show()
  241.  
  242.  
  243.  
  244.  
  245. learning_rate = 0.01
  246. x, y = [], []
  247. x_svm, y_svm = [], []
  248. sk_x, sk_y = [], []
  249. sk_x_svm, sk_y_svm = [], []
  250.  
  251. x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
  252. accuracy_svm, pr_svm = [], []
  253. accuracy_reg, pr_reg = [], []
  254. sk_accuracy_svm, pr_sk_svm  = [], []
  255. sk_accuracy_reg, pr_sk_reg = [], []
  256.  
  257.  
  258.  
  259.  
  260.  
  261. for class_ in np.unique(Y):
  262.     print(f'Відокремлення класу {class_}')
  263.     x_train_v1, x_test_v1, y_train_v1, y_test_v1 = x_train.copy(), x_test.copy(), y_train.copy(), y_test.copy()
  264.    
  265.     y_train_v1 = y_train_v1.apply(lambda x: 1.0 if x == class_ else -1.0)
  266.     y_test_v1 = y_test_v1.apply(lambda x: 1.0 if x == class_ else -1.0)
  267.    
  268.     sns.pairplot(pd.concat([x_train_v1, y_train_v1], axis=1), hue='target', palette='dark')
  269.     plt.show()
  270.    
  271.    
  272.     model_svm = SVM()
  273.     model_svm.fit(x_train_v1, y_train_v1)
  274.     pred_test = model_svm.predict(x_test_v1)
  275.     pred_train = model_svm.predict(x_train_v1)
  276.    
  277.     w_svm = model_svm._w
  278.     err_svm = model_svm.train_errors
  279.     epochs_svm = range(1, len(err_svm)+1)
  280.  
  281.     x1_svm = np.array([X.iloc[:, 0].min(), X.iloc[:, 0].max()])
  282.     x2_svm = np.array([(-w_svm[0] * x1_svm[0] - w_svm[2]) / w_svm[1], (-w_svm[0] * x1_svm[1] - w_svm[2]) / w_svm[1]])
  283.     x_svm.append(x1_svm)
  284.     y_svm.append(x2_svm)
  285.    
  286.    
  287.     plt.figure(figsize=(8, 6))
  288.     plt.title(f'SVM Відокремлення класу {class_}')
  289.     plt.plot(epochs_svm, np.array(err_svm)/len(x_train_v1)*100)
  290.     plt.ylabel('Відсоток помилок на тренувальній вибірці')
  291.     plt.xlabel('Кількість епох')
  292.     plt.show()
  293.    
  294.    
  295.     accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], model_svm.predict(x_train_v1.loc[y_train_v1 == 1.0]))
  296.     print(f"SVM Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
  297. #     print(classification_report(y_train_v1, model_svm.predict(x_train_v1)))
  298.     report = classification_report(y_train_v1, model_svm.predict(x_train_v1))
  299.     precision_tr = float(report.split('1.0 ')[2].split()[2])
  300. #     print(precision_tr)
  301.    
  302.    
  303.     accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], model_svm.predict(x_test_v1.loc[y_test_v1 == 1.0]))
  304.     print(f"SVM Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
  305.     accuracy_svm.append((accuracy_tr, accuracy_t))
  306.    
  307. #     print(classification_report(y_test_v1, model_svm.predict(x_test_v1)))
  308.     report = classification_report(y_test_v1, model_svm.predict(x_test_v1))
  309.     precision_t = float(report.split('1.0 ')[2].split()[2])
  310.     pr_svm.append((precision_tr, precision_t))
  311.    
  312.    
  313.     plt.figure(figsize=(8, 6))
  314.     plt.title(f'SVM Тренувальна вибірка(відокремлення класу {class_})')
  315.     y_wrong = y_train_v1.loc[(y_train_v1 != pred_train)]
  316.     x_wrong = x_train_v1.loc[(y_train_v1 != pred_train)]
  317.     for i in np.unique(y_test_v1):
  318.         plt.scatter(x_train_v1.loc[y_train_v1 == i, x_test_v1.columns[0]], x_train_v1.loc[y_train_v1 == i, x_train_v1.columns[1]], label=i)
  319.     plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
  320.     x1_svm = np.array([x_train_v1.iloc[:, 0].min(), x_train_v1.iloc[:, 0].max()])
  321.     x2_svm = np.array([(-w_svm[0] * x1_svm[0] - w_svm[2]) / w_svm[1], (-w_svm[0] * x1_svm[1] - w_svm[2]) / w_svm[1]])
  322.     plt.plot(x1_svm, x2_svm, c='purple')
  323.     plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
  324.     plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--')
  325.     plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
  326.     plt.legend()
  327.     plt.show()
  328.    
  329.    
  330.     plt.figure(figsize=(8, 6))
  331.     plt.title(f'SVM Тестова вибірка(відокремлення класу {class_})')
  332. #     colors = ['violet' if y == -1 else 'orange' if y == 1 else 'black' for y in pred_test]
  333.     y_wrong = y_test_v1.loc[(y_test_v1 != pred_test)]
  334.     x_wrong = x_test_v1.loc[(y_test_v1 != pred_test)]
  335.     for i in np.unique(y_test_v1):
  336.         plt.scatter(x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[0]], x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[1]], label=i)
  337.     plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
  338.     x1_svm = np.array([x_test_v1.iloc[:, 0].min(), x_test_v1.iloc[:, 0].max()])
  339.     x2_svm = np.array([(-w_svm[0] * x1_svm[0] - w_svm[2]) / w_svm[1], (-w_svm[0] * x1_svm[1] - w_svm[2]) / w_svm[1]])
  340.     plt.plot(x1_svm, x2_svm, c='purple')
  341.     plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
  342.     plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--')
  343.     plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
  344.     plt.legend()
  345.     plt.show()
  346.    
  347.    
  348.    
  349.    
  350.    
  351.     lr = Logistic_Regression(learning_rate=learning_rate, epochs=10000, L2=False, patience=100)
  352.     w = lr.fit(x_train_v1, y_train_v1)
  353. #     print(w)
  354.     pred_test = lr.predict(x_test_v1)
  355.     pred_train = lr.predict(x_train_v1)
  356.     epochs = lr.t
  357.     epochs_err = lr.n
  358.  
  359.     x1 = [X.iloc[:, 0].min(), X.iloc[:, 0].max()]
  360.     x2 = [(-w[0] * x1[0] - w[2]) / w[1], (-w[0] * x1[1] - w[2]) / w[1]]
  361.     x.append(x1)
  362.     y.append(x2)
  363.    
  364.     accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], lr.predict(x_train_v1.loc[y_train_v1 == 1.0]))
  365.     print(f"LOG REG Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
  366. #     print(classification_report(y_train_v1, lr.predict(x_train_v1)))
  367.    
  368.     report = classification_report(y_train_v1, lr.predict(x_train_v1))
  369.     precision_tr = float(report.split('1.0 ')[2].split()[2])
  370.    
  371.     accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], lr.predict(x_test_v1.loc[y_test_v1 == 1.0]))
  372.     print(f"LOG REG Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
  373.     accuracy_reg.append((accuracy_tr, accuracy_t))
  374. #     print(classification_report(y_test_v1, lr.predict(x_test_v1)))
  375.    
  376.     report = classification_report(y_test_v1, lr.predict(x_test_v1))
  377.     precision_t = float(report.split('1.0 ')[2].split()[2])
  378.     pr_reg.append((precision_tr, precision_t))
  379.  
  380.    
  381.  
  382.     plt.figure(figsize=(8, 6))
  383.     plt.title(f'LOG REG Відокремлення класу {class_}')
  384.     plt.plot(epochs, np.array(epochs_err)/len(x_train)*100)
  385.     plt.ylabel('Відсоток помилок на тренувальній вибірці')
  386.     plt.xlabel('Кількість епох')
  387.     plt.show()
  388.    
  389.    
  390.     plt.figure(figsize=(8, 6))
  391.     plt.title(f'LOG REG Тренувальна вибірка(відокремлення класу {class_})')
  392.     y_wrong = y_train_v1.loc[(y_train_v1 != pred_train)]
  393.     x_wrong = x_train_v1.loc[(y_train_v1 != pred_train)]
  394.     for i in np.unique(y_test_v1):
  395.         plt.scatter(x_train_v1.loc[y_train_v1 == i, x_test_v1.columns[0]], x_train_v1.loc[y_train_v1 == i, x_train_v1.columns[1]], label=i)
  396.     plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
  397.     x1 = np.array([x_train_v1.iloc[:, 0].min(), x_train_v1.iloc[:, 0].max()])
  398.     x2 = np.array([(-w[0] * x1[0] - w[2]) / w[1], (-w[0] * x1[1] - w[2]) / w[1]])
  399.     plt.plot(x1, x2, c='purple')
  400.     plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
  401.     plt.legend()
  402.     plt.show()
  403.    
  404.    
  405.     plt.figure(figsize=(8, 6))
  406.     plt.title(f'LOG REG Тестова вибірка(відокремлення класу {class_})')
  407.     y_wrong = y_test_v1.loc[(y_test_v1 != pred_test)]
  408.     x_wrong = x_test_v1.loc[(y_test_v1 != pred_test)]
  409.    
  410.     for i in np.unique(y_test_v1):
  411.         plt.scatter(x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[0]], x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[1]], label=i)
  412.     plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
  413.     x1 = [x_test_v1.iloc[:, 0].min(), x_test_v1.iloc[:, 0].max()]
  414.     x2 = [(-w[0] * x1[0] - w[2]) / w[1], (-w[0] * x1[1] - w[2]) / w[1]]
  415.     plt.plot(x1, x2, c='purple')
  416.     plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
  417.     plt.legend()
  418.     plt.show()
  419.    
  420.    
  421.    
  422.     # Лінійний SVM
  423.     svm_classifier = SVC(kernel='linear', max_iter=1000)
  424.     svm_classifier.fit(x_train_v1, y_train_v1)
  425.  
  426.     logistic_reg = LogisticRegression(max_iter=1000)
  427.     logistic_reg.fit(x_train_v1, y_train_v1)
  428.  
  429.     w_log_reg = logistic_reg.coef_[0]
  430.     b_log_reg = logistic_reg.intercept_[0]
  431.  
  432.     w_svm = svm_classifier.coef_[0]
  433.     b_svm = svm_classifier.intercept_[0]
  434.    
  435.    
  436.     accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], svm_classifier.predict(x_train_v1.loc[y_train_v1 == 1.0]))
  437.     print(f"SVM sklearn Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
  438. #     print(classification_report(y_train_v1, svm_classifier.predict(x_train_v1)))
  439.    
  440.     report = classification_report(y_train_v1, svm_classifier.predict(x_train_v1))
  441.     precision_tr = float(report.split('1.0 ')[2].split()[2])
  442.    
  443.     accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], svm_classifier.predict(x_test_v1.loc[y_test_v1 == 1.0]))
  444.     print(f"SVM sklearn Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
  445.     sk_accuracy_svm.append((accuracy_tr, accuracy_t))
  446. #     print(classification_report(y_test_v1, svm_classifier.predict(x_test_v1)))
  447.    
  448.     report = classification_report(y_test_v1, svm_classifier.predict(x_test_v1))
  449.     precision_t = float(report.split('1.0 ')[2].split()[2])
  450.     pr_sk_svm.append((precision_tr, precision_t))
  451.    
  452.    
  453.    
  454.     accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], logistic_reg.predict(x_train_v1.loc[y_train_v1 == 1.0]))
  455.     print(f"LOG REG sklearn Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
  456. #     print(classification_report(y_train_v1, logistic_reg.predict(x_train_v1)))
  457.    
  458.     report = classification_report(y_train_v1, logistic_reg.predict(x_train_v1))
  459.     precision_tr = float(report.split('1.0 ')[2].split()[2])
  460.    
  461.     accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], logistic_reg.predict(x_test_v1.loc[y_test_v1 == 1.0]))
  462.     print(f"LOG REG sklearn Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
  463.     sk_accuracy_reg.append((accuracy_tr, accuracy_t))
  464. #     print(classification_report(y_test_v1, logistic_reg.predict(x_test_v1)))
  465.    
  466.     report = classification_report(y_test_v1, logistic_reg.predict(x_test_v1))
  467.     precision_t = float(report.split('1.0 ')[2].split()[2])
  468.     pr_sk_reg.append((precision_tr, precision_t))
  469.    
  470.    
  471.     # Візуалізація прямих для лінійної логістичної регресії та лінійного SVM
  472. #     plt.figure(figsize=(8, 6))
  473. #     for i in np.unique(Y):
  474. #         plt.scatter(X.loc[Y == i, X.columns[0]], X.loc[Y == i, X.columns[1]], label=i)
  475.  
  476.     plt.figure(figsize=(8, 6))
  477.     plt.title(f'Sklearn Тренувальна вибірка(відокремлення класу {class_})')
  478.     y_wrong = y_train_v1.loc[(y_train_v1 != logistic_reg.predict(x_train_v1))]
  479.     x_wrong = x_train_v1.loc[(y_train_v1 != logistic_reg.predict(x_train_v1))]
  480.     y_wrong_svm_sk = y_train_v1.loc[(y_train_v1 != svm_classifier.predict(x_train_v1))]
  481.     x_wrong_svm_sk = x_train_v1.loc[(y_train_v1 != svm_classifier.predict(x_train_v1))]
  482.    
  483.     for i in np.unique(y_test_v1):
  484.         plt.scatter(x_train_v1.loc[y_train_v1 == i, x_test_v1.columns[0]], x_train_v1.loc[y_train_v1 == i, x_train_v1.columns[1]], label=i)
  485.     plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black', label='log_reg error')
  486.     plt.scatter(x_wrong_svm_sk.iloc[:, 0], x_wrong_svm_sk.iloc[:, 1], c='r', marker='v', label='svm error', alpha=0.5)
  487.    
  488.     x1_log_reg = np.array([X.iloc[:, 0].min(), X.iloc[:, 0].max()])
  489.     x2_log_reg = (-w_log_reg[0] / w_log_reg[1]) * x1_log_reg - (b_log_reg / w_log_reg[1])
  490.     sk_x.append(x1_log_reg)
  491.     sk_y.append(x2_log_reg)
  492.  
  493.     x1_svm = np.array([X.iloc[:, 0].min(), X.iloc[:, 0].max()])
  494.     x2_svm = (-w_svm[0] / w_svm[1]) * x1_svm - (b_svm / w_svm[1])
  495.     sk_x_svm.append(x1_svm)
  496.     sk_y_svm.append(x2_svm)
  497.    
  498.    
  499.     plt.plot(x1_svm, x2_svm, c='purple', linestyle='-.', label='sklearn svm')
  500.     plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
  501.     plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--')
  502.    
  503.     plt.plot(x1_log_reg, x2_log_reg, c='purple', label='sklearn log_reg')
  504.    
  505.     plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
  506.     plt.legend()
  507.     plt.show()
  508.    
  509.    
  510.  
  511.     plt.figure(figsize=(8, 6))
  512.     plt.title(f'Sklearn Тестова вибірка(відокремлення класу {class_})')
  513.     y_wrong = y_test_v1.loc[(y_test_v1 != logistic_reg.predict(x_test_v1))]
  514.     x_wrong = x_test_v1.loc[(y_test_v1 != logistic_reg.predict(x_test_v1))]
  515.     y_wrong_svm_sk = y_test_v1.loc[(y_test_v1 != svm_classifier.predict(x_test_v1))]
  516.     x_wrong_svm_sk = x_test_v1.loc[(y_test_v1 != svm_classifier.predict(x_test_v1))]
  517.    
  518.     for i in np.unique(y_test_v1):
  519.         plt.scatter(x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[0]], x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[1]], label=i)
  520.     plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black', label='log_reg error')
  521.     plt.scatter(x_wrong_svm_sk.iloc[:, 0], x_wrong_svm_sk.iloc[:, 1], c='r', marker='v', label='svm error', alpha=0.5)
  522.    
  523.     plt.plot(x1_svm, x2_svm, c='purple', linestyle='-.', label='sklearn svm')
  524.     plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
  525.     plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--')
  526.    
  527.     plt.plot(x1_log_reg, x2_log_reg, c='purple', label='sklearn log_reg')
  528.    
  529.     plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
  530.     plt.legend()
  531.     plt.show()
  532.  
  533.  
  534.    
  535.    
  536.  
  537.    
  538. len_t = len(y_test)
  539. len_tr = len(y_train)
  540. accuracy_tr_svm = 0
  541. accuracy_t_svm = 0
  542. accuracy_tr_reg = 0
  543. accuracy_t_reg = 0
  544.  
  545. pr_tr_svm = 0
  546. pr_t_svm = 0
  547. pr_tr_reg = 0
  548. pr_t_reg = 0
  549.  
  550. sk_accuracy_tr_svm = 0
  551. sk_accuracy_t_svm = 0
  552. sk_accuracy_tr_reg = 0
  553. sk_accuracy_t_reg = 0
  554.  
  555. sk_pr_tr_svm = 0
  556. sk_pr_t_svm = 0
  557. sk_pr_tr_reg = 0
  558. sk_pr_t_reg = 0
  559.  
  560.  
  561. for i in np.unique(Y):  
  562.     len_test = (len(y_test.loc[y_test == i]))
  563.     len_train = (len(y_train.loc[y_train == i]))
  564.    
  565.    
  566.     accuracy_tr_svm += len_train / len_tr * (accuracy_svm[i][0]) * 100
  567.     accuracy_t_svm += len_test / len_t * (accuracy_svm[i][1]) * 100
  568.     sk_accuracy_tr_svm += len_train / len_tr * (sk_accuracy_svm[i][0]) * 100
  569.     sk_accuracy_t_svm += len_test / len_t * (sk_accuracy_svm[i][1]) * 100
  570.    
  571.     pr_tr_svm += len_train / len_tr * (pr_svm[i][0]) * 100
  572.     pr_t_svm += len_test / len_t * (pr_svm[i][1]) * 100
  573.     sk_pr_tr_svm += len_train / len_tr * (pr_sk_svm[i][0]) * 100
  574.     sk_pr_t_svm += len_test / len_t * (pr_sk_svm[i][1]) * 100
  575.    
  576.    
  577.    
  578.     accuracy_tr_reg += len_train / len_tr * (accuracy_reg[i][0]) * 100
  579.     accuracy_t_reg += len_test / len_t * (accuracy_reg[i][1]) * 100
  580.     sk_accuracy_tr_reg += len_train / len_tr * (sk_accuracy_reg[i][0]) * 100
  581.     sk_accuracy_t_reg += len_test / len_t * (sk_accuracy_reg[i][1]) * 100
  582.    
  583.     pr_tr_reg += len_train / len_tr * (pr_reg[i][0]) * 100
  584.     pr_t_reg += len_test / len_t * (pr_reg[i][1]) * 100
  585.     sk_pr_tr_reg += len_train / len_tr * (pr_sk_reg[i][0]) * 100
  586.     sk_pr_t_reg += len_test / len_t * (pr_sk_reg[i][1]) * 100
  587.    
  588.    
  589.    
  590.    
  591.    
  592. print(f'SVM Train Recall: {round(accuracy_tr_svm, 2)}%')
  593. print(f'SVM Test Recall: {round(accuracy_t_svm, 2)}%\n')
  594.  
  595. print(f'LOG REG Train Recall: {round(accuracy_tr_reg, 2)}%')
  596. print(f'LOG REG Test Recall: {round(accuracy_t_reg, 2)}%')
  597.  
  598. print(f'\nSVM sklearn Train Recall: {round(sk_accuracy_tr_svm, 2)}%')
  599. print(f'SVM sklearn Test Recall: {round(sk_accuracy_t_svm, 2)}%\n')
  600.  
  601. print(f'LOG REG sklearn Train Recall: {round(sk_accuracy_tr_reg, 2)}%')
  602. print(f'LOG REG sklearn Test Recall: {round(sk_accuracy_t_reg, 2)}%')
  603.  
  604.  
  605.  
  606. print(f'\nSVM Train F1-score: {round(pr_tr_svm, 2)}%')
  607. print(f'SVM Test F1-score: {round(pr_t_svm, 2)}%\n')
  608.  
  609. print(f'LOG REG Train F1-score: {round(pr_tr_reg, 2)}%')
  610. print(f'LOG REG Test F1-score: {round(pr_t_reg, 2)}%')
  611.  
  612. print(f'\nSVM sklearn Train F1-score: {round(sk_pr_tr_svm, 2)}%')
  613. print(f'SVM sklearn Test F1-score: {round(sk_pr_t_svm, 2)}%\n')
  614.  
  615. print(f'LOG REG sklearn Train F1-score: {round(sk_pr_tr_reg, 2)}%')
  616. print(f'LOG REG sklearn Test F1-score: {round(sk_pr_t_reg, 2)}%')
  617.  
  618.  
  619.  
  620.  
  621. x = np.array(x)
  622. y = np.array(y)
  623. x_svm = np.array(x_svm)
  624. y_svm = np.array(y_svm)
  625. sk_x = np.array(sk_x)
  626. sk_y = np.array(sk_y)
  627. sk_x_svm = np.array(sk_x_svm)
  628. sk_y_svm = np.array(sk_y_svm)
  629.  
  630. plt.figure(figsize=(15, 10))
  631. plt.title('Порівняння всіх методів')
  632. for i in np.unique(Y):
  633.     plt.scatter(X.loc[Y == i, X.columns[0]], X.loc[Y == i, X.columns[1]], label=f'class{i}')
  634.  
  635. colors = ['b', 'orange', 'g']
  636. for i in range(len(x)):
  637.     plt.plot(x[i], y[i], c=f'{colors[i]}', label=f'LOG_REG(class{i})')
  638.     plt.plot(x_svm[i], y_svm[i], c=f'{colors[i]}', linestyle='--', label=f'SVM(class{i})')
  639.     plt.plot(sk_x[i], sk_y[i], c=f'{colors[i]}', label=f'LOG_REG_sk(class{i})', linestyle=':', lw=1.5)
  640.     plt.plot(sk_x_svm[i], sk_y_svm[i], c=f'{colors[i]}', linestyle='-.', label=f'SVM_sk(class{i})', lw=1.5)
  641.    
  642.  
  643. plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
  644. plt.legend()
  645. plt.show()
  646.  
  647.  
  648.  
  649.  
  650.  
  651.  
  652.  
  653.  
  654. print('Лінінйно-нероздільна вибірка:\n')
  655.  
  656. # Створюємо датасет з трьома лінійно нероздільними класами
  657. X, y = make_classification(
  658.     n_samples=500,  # Загальна кількість зразків
  659.     n_features=4,    # Кількість ознак
  660.     n_classes=3,     # Кількість класів
  661.     n_clusters_per_class=1,  # Кількість кластерів для кожного класу
  662.     class_sep=.7,   # Зменшена роздільність між класами
  663.     random_state=18 # Фіксація генерації випадкових чисел для відтворюваності
  664. )
  665.  
  666. # Створіть DataFrame для зручності роботи з даними
  667. df = pd.DataFrame(data=X, columns=[f'F{i+1}' for i in range(4)])
  668. df['target'] = y
  669.  
  670.  
  671. X = df.drop(columns=['target'])
  672. Y = df.target
  673. pca = PCA(n_components=2)
  674. X = pd.DataFrame(pca.fit_transform(X), columns=['F1', 'F2'], index=X.index)
  675.  
  676. sns.pairplot(pd.concat([X, Y], axis=1), hue='target', palette='dark')
  677. plt.show()
  678.  
  679.  
  680.  
  681.  
  682. learning_rate = 0.01
  683. x, y = [], []
  684. x_svm, y_svm = [], []
  685. sk_x, sk_y = [], []
  686. sk_x_svm, sk_y_svm = [], []
  687.  
  688. x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
  689. accuracy_svm, pr_svm = [], []
  690. accuracy_reg, pr_reg = [], []
  691. sk_accuracy_svm, pr_sk_svm  = [], []
  692. sk_accuracy_reg, pr_sk_reg = [], []
  693.  
  694.  
  695.  
  696.  
  697.  
  698. for class_ in np.unique(Y):
  699.     print(f'Відокремлення класу {class_}')
  700.     x_train_v1, x_test_v1, y_train_v1, y_test_v1 = x_train.copy(), x_test.copy(), y_train.copy(), y_test.copy()
  701.    
  702.     y_train_v1 = y_train_v1.apply(lambda x: 1.0 if x == class_ else -1.0)
  703.     y_test_v1 = y_test_v1.apply(lambda x: 1.0 if x == class_ else -1.0)
  704.    
  705.     sns.pairplot(pd.concat([x_train_v1, y_train_v1], axis=1), hue='target', palette='dark')
  706.     plt.show()
  707.    
  708.    
  709.     model_svm = SVM()
  710.     model_svm.fit(x_train_v1, y_train_v1)
  711.     pred_test = model_svm.predict(x_test_v1)
  712.     pred_train = model_svm.predict(x_train_v1)
  713.    
  714.     w_svm = model_svm._w
  715.     err_svm = model_svm.train_errors
  716.     epochs_svm = range(1, len(err_svm)+1)
  717.  
  718.     x1_svm = np.array([X.iloc[:, 0].min(), X.iloc[:, 0].max()])
  719.     x2_svm = np.array([(-w_svm[0] * x1_svm[0] - w_svm[2]) / w_svm[1], (-w_svm[0] * x1_svm[1] - w_svm[2]) / w_svm[1]])
  720.     x_svm.append(x1_svm)
  721.     y_svm.append(x2_svm)
  722.    
  723.    
  724.     plt.figure(figsize=(8, 6))
  725.     plt.title(f'SVM Відокремлення класу {class_}')
  726.     plt.plot(epochs_svm, np.array(err_svm)/len(x_train_v1)*100)
  727.     plt.ylabel('Відсоток помилок на тренувальній вибірці')
  728.     plt.xlabel('Кількість епох')
  729.     plt.show()
  730.    
  731.    
  732.     accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], model_svm.predict(x_train_v1.loc[y_train_v1 == 1.0]))
  733.     print(f"SVM Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
  734.     print(classification_report(y_train_v1, model_svm.predict(x_train_v1)))
  735.     report = classification_report(y_train_v1, model_svm.predict(x_train_v1))
  736.     precision_tr = float(report.split('1.0 ')[2].split()[2])
  737.    
  738.    
  739.     accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], model_svm.predict(x_test_v1.loc[y_test_v1 == 1.0]))
  740.     print(f"SVM Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
  741.     accuracy_svm.append((accuracy_tr, accuracy_t))
  742.    
  743. #     print(classification_report(y_test_v1, model_svm.predict(x_test_v1)))
  744.     report = classification_report(y_test_v1, model_svm.predict(x_test_v1))
  745.     precision_t = float(report.split('1.0 ')[2].split()[2])
  746.     pr_svm.append((precision_tr, precision_t))
  747.    
  748.    
  749.     plt.figure(figsize=(8, 6))
  750.     plt.title(f'SVM Тренувальна вибірка(відокремлення класу {class_})')
  751.     y_wrong = y_train_v1.loc[(y_train_v1 != pred_train)]
  752.     x_wrong = x_train_v1.loc[(y_train_v1 != pred_train)]
  753.     for i in np.unique(y_test_v1):
  754.         plt.scatter(x_train_v1.loc[y_train_v1 == i, x_test_v1.columns[0]], x_train_v1.loc[y_train_v1 == i, x_train_v1.columns[1]], label=i)
  755.     plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
  756.     x1_svm = np.array([x_train_v1.iloc[:, 0].min(), x_train_v1.iloc[:, 0].max()])
  757.     x2_svm = np.array([(-w_svm[0] * x1_svm[0] - w_svm[2]) / w_svm[1], (-w_svm[0] * x1_svm[1] - w_svm[2]) / w_svm[1]])
  758.     plt.plot(x1_svm, x2_svm, c='purple')
  759.     plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
  760.     plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--')
  761.     plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
  762.     plt.legend()
  763.     plt.show()
  764.    
  765.    
  766.     plt.figure(figsize=(8, 6))
  767.     plt.title(f'SVM Тестова вибірка(відокремлення класу {class_})')
  768. #     colors = ['violet' if y == -1 else 'orange' if y == 1 else 'black' for y in pred_test]
  769.     y_wrong = y_test_v1.loc[(y_test_v1 != pred_test)]
  770.     x_wrong = x_test_v1.loc[(y_test_v1 != pred_test)]
  771.     for i in np.unique(y_test_v1):
  772.         plt.scatter(x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[0]], x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[1]], label=i)
  773.     plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
  774.     x1_svm = np.array([x_test_v1.iloc[:, 0].min(), x_test_v1.iloc[:, 0].max()])
  775.     x2_svm = np.array([(-w_svm[0] * x1_svm[0] - w_svm[2]) / w_svm[1], (-w_svm[0] * x1_svm[1] - w_svm[2]) / w_svm[1]])
  776.     plt.plot(x1_svm, x2_svm, c='purple')
  777.     plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
  778.     plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--')
  779.     plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
  780.     plt.legend()
  781.     plt.show()
  782.    
  783.    
  784.    
  785.    
  786.    
  787.     lr = Logistic_Regression(learning_rate=learning_rate, epochs=10000, L2=False, patience=100)
  788.     w = lr.fit(x_train_v1, y_train_v1)
  789. #     print(w)
  790.     pred_test = lr.predict(x_test_v1)
  791.     pred_train = lr.predict(x_train_v1)
  792.     epochs = lr.t
  793.     epochs_err = lr.n
  794.  
  795.     x1 = [X.iloc[:, 0].min(), X.iloc[:, 0].max()]
  796.     x2 = [(-w[0] * x1[0] - w[2]) / w[1], (-w[0] * x1[1] - w[2]) / w[1]]
  797.     x.append(x1)
  798.     y.append(x2)
  799.    
  800.     accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], lr.predict(x_train_v1.loc[y_train_v1 == 1.0]))
  801.     print(f"LOG REG Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
  802. #     print(classification_report(y_train_v1, lr.predict(x_train_v1)))
  803.    
  804.     report = classification_report(y_train_v1, lr.predict(x_train_v1))
  805.     precision_tr = float(report.split('1.0 ')[2].split()[2])
  806.    
  807.     accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], lr.predict(x_test_v1.loc[y_test_v1 == 1.0]))
  808.     print(f"LOG REG Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
  809.     accuracy_reg.append((accuracy_tr, accuracy_t))
  810. #     print(classification_report(y_test_v1, lr.predict(x_test_v1)))
  811.    
  812.     report = classification_report(y_test_v1, lr.predict(x_test_v1))
  813.     precision_t = float(report.split('1.0 ')[2].split()[2])
  814.     pr_reg.append((precision_tr, precision_t))
  815.  
  816.    
  817.  
  818.     plt.figure(figsize=(8, 6))
  819.     plt.title(f'LOG REG Відокремлення класу {class_}')
  820.     plt.plot(epochs, np.array(epochs_err)/len(x_train)*100)
  821.     plt.ylabel('Відсоток помилок на тренувальній вибірці')
  822.     plt.xlabel('Кількість епох')
  823.     plt.show()
  824.    
  825.    
  826.     plt.figure(figsize=(8, 6))
  827.     plt.title(f'LOG REG Тренувальна вибірка(відокремлення класу {class_})')
  828.     y_wrong = y_train_v1.loc[(y_train_v1 != pred_train)]
  829.     x_wrong = x_train_v1.loc[(y_train_v1 != pred_train)]
  830.     for i in np.unique(y_test_v1):
  831.         plt.scatter(x_train_v1.loc[y_train_v1 == i, x_test_v1.columns[0]], x_train_v1.loc[y_train_v1 == i, x_train_v1.columns[1]], label=i)
  832.     plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
  833.     x1 = np.array([x_train_v1.iloc[:, 0].min(), x_train_v1.iloc[:, 0].max()])
  834.     x2 = np.array([(-w[0] * x1[0] - w[2]) / w[1], (-w[0] * x1[1] - w[2]) / w[1]])
  835.     plt.plot(x1, x2, c='purple')
  836.     plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
  837.     plt.legend()
  838.     plt.show()
  839.    
  840.    
  841.     plt.figure(figsize=(8, 6))
  842.     plt.title(f'LOG REG Тестова вибірка(відокремлення класу {class_})')
  843.     y_wrong = y_test_v1.loc[(y_test_v1 != pred_test)]
  844.     x_wrong = x_test_v1.loc[(y_test_v1 != pred_test)]
  845.    
  846.     for i in np.unique(y_test_v1):
  847.         plt.scatter(x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[0]], x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[1]], label=i)
  848.     plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
  849.     x1 = [x_test_v1.iloc[:, 0].min(), x_test_v1.iloc[:, 0].max()]
  850.     x2 = [(-w[0] * x1[0] - w[2]) / w[1], (-w[0] * x1[1] - w[2]) / w[1]]
  851.     plt.plot(x1, x2, c='purple')
  852.     plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
  853.     plt.legend()
  854.     plt.show()
  855.    
  856.    
  857.    
  858.     # Лінійний SVM
  859.     svm_classifier = SVC(kernel='linear', max_iter=1000)
  860.     svm_classifier.fit(x_train_v1, y_train_v1)
  861.  
  862.     logistic_reg = LogisticRegression(max_iter=1000)
  863.     logistic_reg.fit(x_train_v1, y_train_v1)
  864.  
  865.     w_log_reg = logistic_reg.coef_[0]
  866.     b_log_reg = logistic_reg.intercept_[0]
  867.  
  868.     w_svm = svm_classifier.coef_[0]
  869.     b_svm = svm_classifier.intercept_[0]
  870.    
  871.    
  872.     accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], svm_classifier.predict(x_train_v1.loc[y_train_v1 == 1.0]))
  873.     print(f"SVM sklearn Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
  874. #     print(classification_report(y_train_v1, svm_classifier.predict(x_train_v1)))
  875.    
  876.     report = classification_report(y_train_v1, svm_classifier.predict(x_train_v1))
  877.     precision_tr = float(report.split('1.0 ')[2].split()[2])
  878.    
  879.     accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], svm_classifier.predict(x_test_v1.loc[y_test_v1 == 1.0]))
  880.     print(f"SVM sklearn Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
  881.     sk_accuracy_svm.append((accuracy_tr, accuracy_t))
  882. #     print(classification_report(y_test_v1, svm_classifier.predict(x_test_v1)))
  883.    
  884.     report = classification_report(y_test_v1, svm_classifier.predict(x_test_v1))
  885.     precision_t = float(report.split('1.0 ')[2].split()[2])
  886.     pr_sk_svm.append((precision_tr, precision_t))
  887.    
  888.    
  889.    
  890.     accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], logistic_reg.predict(x_train_v1.loc[y_train_v1 == 1.0]))
  891.     print(f"LOG REG sklearn Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
  892. #     print(classification_report(y_train_v1, logistic_reg.predict(x_train_v1)))
  893.    
  894.     report = classification_report(y_train_v1, logistic_reg.predict(x_train_v1))
  895.     precision_tr = float(report.split('1.0 ')[2].split()[2])
  896.    
  897.     accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], logistic_reg.predict(x_test_v1.loc[y_test_v1 == 1.0]))
  898.     print(f"LOG REG sklearn Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
  899.     sk_accuracy_reg.append((accuracy_tr, accuracy_t))
  900. #     print(classification_report(y_test_v1, logistic_reg.predict(x_test_v1)))
  901.    
  902.     report = classification_report(y_test_v1, logistic_reg.predict(x_test_v1))
  903.     precision_t = float(report.split('1.0 ')[2].split()[2])
  904.     pr_sk_reg.append((precision_tr, precision_t))
  905.    
  906.    
  907.     # Візуалізація прямих для лінійної логістичної регресії та лінійного SVM
  908. #     plt.figure(figsize=(8, 6))
  909. #     for i in np.unique(Y):
  910. #         plt.scatter(X.loc[Y == i, X.columns[0]], X.loc[Y == i, X.columns[1]], label=i)
  911.  
  912.     plt.figure(figsize=(8, 6))
  913.     plt.title(f'Sklearn Тренувальна вибірка(відокремлення класу {class_})')
  914.     y_wrong = y_train_v1.loc[(y_train_v1 != logistic_reg.predict(x_train_v1))]
  915.     x_wrong = x_train_v1.loc[(y_train_v1 != logistic_reg.predict(x_train_v1))]
  916.     y_wrong_svm_sk = y_train_v1.loc[(y_train_v1 != svm_classifier.predict(x_train_v1))]
  917.     x_wrong_svm_sk = x_train_v1.loc[(y_train_v1 != svm_classifier.predict(x_train_v1))]
  918.    
  919.     for i in np.unique(y_test_v1):
  920.         plt.scatter(x_train_v1.loc[y_train_v1 == i, x_test_v1.columns[0]], x_train_v1.loc[y_train_v1 == i, x_train_v1.columns[1]], label=i)
  921.     plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black', label='log_reg error')
  922.     plt.scatter(x_wrong_svm_sk.iloc[:, 0], x_wrong_svm_sk.iloc[:, 1], c='r', marker='v', label='svm error', alpha=0.5)
  923.    
  924.     x1_log_reg = np.array([X.iloc[:, 0].min(), X.iloc[:, 0].max()])
  925.     x2_log_reg = (-w_log_reg[0] / w_log_reg[1]) * x1_log_reg - (b_log_reg / w_log_reg[1])
  926.     sk_x.append(x1_log_reg)
  927.     sk_y.append(x2_log_reg)
  928.  
  929.     x1_svm = np.array([X.iloc[:, 0].min(), X.iloc[:, 0].max()])
  930.     x2_svm = (-w_svm[0] / w_svm[1]) * x1_svm - (b_svm / w_svm[1])
  931.     sk_x_svm.append(x1_svm)
  932.     sk_y_svm.append(x2_svm)
  933.    
  934.    
  935.     plt.plot(x1_svm, x2_svm, c='purple', linestyle='-.', label='sklearn svm')
  936.     plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
  937.     plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--', label='sklearn log_reg')
  938.    
  939.     plt.plot(x1_log_reg, x2_log_reg, c='purple')
  940.    
  941.     plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
  942.     plt.legend()
  943.     plt.show()
  944.    
  945.    
  946.  
  947.     plt.figure(figsize=(8, 6))
  948.     plt.title(f'Sklearn Тестова вибірка(відокремлення класу {class_})')
  949.     y_wrong = y_test_v1.loc[(y_test_v1 != logistic_reg.predict(x_test_v1))]
  950.     x_wrong = x_test_v1.loc[(y_test_v1 != logistic_reg.predict(x_test_v1))]
  951.     y_wrong_svm_sk = y_test_v1.loc[(y_test_v1 != svm_classifier.predict(x_test_v1))]
  952.     x_wrong_svm_sk = x_test_v1.loc[(y_test_v1 != svm_classifier.predict(x_test_v1))]
  953.    
  954.     for i in np.unique(y_test_v1):
  955.         plt.scatter(x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[0]], x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[1]], label=i)
  956.     plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black', label='log_reg error')
  957.     plt.scatter(x_wrong_svm_sk.iloc[:, 0], x_wrong_svm_sk.iloc[:, 1], c='r', marker='v', label='svm error', alpha=0.5)
  958.    
  959.     plt.plot(x1_svm, x2_svm, c='purple', linestyle='-.', label='sklearn svm')
  960.     plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
  961.     plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--')
  962.    
  963.     plt.plot(x1_log_reg, x2_log_reg, c='purple', label='sklearn log_reg')
  964.    
  965.     plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
  966.     plt.legend()
  967.     plt.show()
  968.  
  969.  
  970.    
  971.    
  972.  
  973.    
  974. len_t = len(y_test)
  975. len_tr = len(y_train)
  976. accuracy_tr_svm = 0
  977. accuracy_t_svm = 0
  978. accuracy_tr_reg = 0
  979. accuracy_t_reg = 0
  980.  
  981. pr_tr_svm = 0
  982. pr_t_svm = 0
  983. pr_tr_reg = 0
  984. pr_t_reg = 0
  985.  
  986. sk_accuracy_tr_svm = 0
  987. sk_accuracy_t_svm = 0
  988. sk_accuracy_tr_reg = 0
  989. sk_accuracy_t_reg = 0
  990.  
  991. sk_pr_tr_svm = 0
  992. sk_pr_t_svm = 0
  993. sk_pr_tr_reg = 0
  994. sk_pr_t_reg = 0
  995.  
  996.  
  997. for i in np.unique(Y):  
  998.     len_test = (len(y_test.loc[y_test == i]))
  999.     len_train = (len(y_train.loc[y_train == i]))
  1000.    
  1001.    
  1002.     accuracy_tr_svm += len_train / len_tr * (accuracy_svm[i][0]) * 100
  1003.     accuracy_t_svm += len_test / len_t * (accuracy_svm[i][1]) * 100
  1004.     sk_accuracy_tr_svm += len_train / len_tr * (sk_accuracy_svm[i][0]) * 100
  1005.     sk_accuracy_t_svm += len_test / len_t * (sk_accuracy_svm[i][1]) * 100
  1006.    
  1007.     pr_tr_svm += len_train / len_tr * (pr_svm[i][0]) * 100
  1008.     pr_t_svm += len_test / len_t * (pr_svm[i][1]) * 100
  1009.     sk_pr_tr_svm += len_train / len_tr * (pr_sk_svm[i][0]) * 100
  1010.     sk_pr_t_svm += len_test / len_t * (pr_sk_svm[i][1]) * 100
  1011.    
  1012.    
  1013.    
  1014.     accuracy_tr_reg += len_train / len_tr * (accuracy_reg[i][0]) * 100
  1015.     accuracy_t_reg += len_test / len_t * (accuracy_reg[i][1]) * 100
  1016.     sk_accuracy_tr_reg += len_train / len_tr * (sk_accuracy_reg[i][0]) * 100
  1017.     sk_accuracy_t_reg += len_test / len_t * (sk_accuracy_reg[i][1]) * 100
  1018.    
  1019.     pr_tr_reg += len_train / len_tr * (pr_reg[i][0]) * 100
  1020.     pr_t_reg += len_test / len_t * (pr_reg[i][1]) * 100
  1021.     sk_pr_tr_reg += len_train / len_tr * (pr_sk_reg[i][0]) * 100
  1022.     sk_pr_t_reg += len_test / len_t * (pr_sk_reg[i][1]) * 100
  1023.    
  1024.    
  1025.    
  1026.    
  1027.    
  1028. print(f'SVM Train Recall: {round(accuracy_tr_svm, 2)}%')
  1029. print(f'SVM Test Recall: {round(accuracy_t_svm, 2)}%\n')
  1030.  
  1031. print(f'LOG REG Train Recall: {round(accuracy_tr_reg, 2)}%')
  1032. print(f'LOG REG Test Recall: {round(accuracy_t_reg, 2)}%')
  1033.  
  1034. print(f'\nSVM sklearn Train Recall: {round(sk_accuracy_tr_svm, 2)}%')
  1035. print(f'SVM sklearn Test Recall: {round(sk_accuracy_t_svm, 2)}%\n')
  1036.  
  1037. print(f'LOG REG sklearn Train Recall: {round(sk_accuracy_tr_reg, 2)}%')
  1038. print(f'LOG REG sklearn Test Recall: {round(sk_accuracy_t_reg, 2)}%')
  1039.  
  1040.  
  1041.  
  1042. print(f'\nSVM Train F1-score: {round(pr_tr_svm, 2)}%')
  1043. print(f'SVM Test F1-score: {round(pr_t_svm, 2)}%\n')
  1044.  
  1045. print(f'LOG REG Train F1-score: {round(pr_tr_reg, 2)}%')
  1046. print(f'LOG REG Test F1-score: {round(pr_t_reg, 2)}%')
  1047.  
  1048. print(f'\nSVM sklearn Train F1-score: {round(sk_pr_tr_svm, 2)}%')
  1049. print(f'SVM sklearn Test F1-score: {round(sk_pr_t_svm, 2)}%\n')
  1050.  
  1051. print(f'LOG REG sklearn Train F1-score: {round(sk_pr_tr_reg, 2)}%')
  1052. print(f'LOG REG sklearn Test F1-score: {round(sk_pr_t_reg, 2)}%')
  1053.  
  1054.  
  1055.  
  1056.  
  1057. x = np.array(x)
  1058. y = np.array(y)
  1059. x_svm = np.array(x_svm)
  1060. y_svm = np.array(y_svm)
  1061. sk_x = np.array(sk_x)
  1062. sk_y = np.array(sk_y)
  1063. sk_x_svm = np.array(sk_x_svm)
  1064. sk_y_svm = np.array(sk_y_svm)
  1065.  
  1066. plt.figure(figsize=(15, 10))
  1067. plt.title('Порівняння всіх методів')
  1068. for i in np.unique(Y):
  1069.     plt.scatter(X.loc[Y == i, X.columns[0]], X.loc[Y == i, X.columns[1]], label=f'class{i}')
  1070.  
  1071. colors = ['b', 'orange', 'g']
  1072. for i in range(len(x)):
  1073.     plt.plot(x[i], y[i], c=f'{colors[i]}', label=f'LOG_REG(class{i})')
  1074.     plt.plot(x_svm[i], y_svm[i], c=f'{colors[i]}', linestyle='--', label=f'SVM(class{i})')
  1075.     plt.plot(sk_x[i], sk_y[i], c=f'{colors[i]}', label=f'LOG_REG_sk(class{i})', linestyle=':', lw=1.5)
  1076.     plt.plot(sk_x_svm[i], sk_y_svm[i], c=f'{colors[i]}', linestyle='-.', label=f'SVM_sk(class{i})', lw=1.5)
  1077.    
  1078.  
  1079. plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
  1080. plt.legend()
  1081. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement