Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.datasets import make_classification
- import numpy as np
- import pandas as pd
- import seaborn as sns
- from sklearn.preprocessing import LabelEncoder
- from sklearn.model_selection import train_test_split
- from sklearn.preprocessing import StandardScaler
- from sklearn.metrics import accuracy_score, classification_report, precision_score, f1_score, recall_score
- import random
- import matplotlib.pyplot as plt
- from sklearn.preprocessing import OneHotEncoder
- from scipy.optimize import fsolve
- from sklearn.decomposition import PCA
- import seaborn as sns
- from sklearn.svm import SVC
- from sklearn.linear_model import LogisticRegression
- def polynomial_features(X, d):
- n_columns = 0
- for i in range(n_columns + 1, d + 2):
- n_columns += i
- x1 = X.iloc[:, 0]
- x2 = X.iloc[:, 1]
- features = pd.DataFrame(columns=[f'F{i + 1}' for i in range(n_columns)])
- for k in range(len(X)):
- feature_row = []
- for i in range(d + 1):
- for j in range(i + 1):
- feature_row.append((x1[k] ** (i - j)) * (x2[k] ** j))
- features.loc[k] = feature_row
- return features
- class Logistic_Regression:
- def __init__(self, epochs=1000, learning_rate=0.01, C=1.0, patience=100, L2=False):
- self.epochs = epochs
- self.learning_rate = learning_rate
- self.C = C
- self.patience = patience
- self.t = []
- self.n = []
- self.best_w = None
- self.best_err = float('inf')
- self.last_improvement = 0
- self.x_train = None
- self.x_test = None
- self.L2 = L2
- def sigmoid(self, M):
- return 1 / (1 + np.exp(-M))
- def diff(self, x, w, y):
- M = y * np.dot(x, w.T)
- d = (1 - self.sigmoid(M)) * y * x
- return d
- def fit(self, X_train, y_train):
- print('\nModel is fitting:\n...')
- self.x_train = X_train.copy()
- len_w = self.x_train.shape[1] + 1
- lambda_ = 1 / self.C
- w = np.random.rand(len_w)
- ell = len(self.x_train)
- self.x_train['bias'] = np.ones(ell)
- for k in range(self.epochs):
- err = 0
- dw = np.random.rand(len_w)
- for i in range(ell):
- M = y_train.iloc[i] * np.dot(self.x_train.iloc[i], w)
- if M < 0:
- err += 1
- dw -= self.diff(self.x_train.iloc[i].values, w, y_train.iloc[i])
- if self.L2 == True:
- w_reg = np.copy(w)
- w_reg[-1] = 0
- dw -= 2 * lambda_ * w_reg
- w -= self.learning_rate * dw / ell
- # print(f'epoch: {k} ; w = {w}; err = {err}\n')
- self.t.append(k + 1)
- self.n.append(err)
- if err < self.best_err:
- self.best_err = err
- self.best_w = np.copy(w)
- self.last_improvement = k
- elif k - self.last_improvement >= self.patience:
- print(f"Early stopping at epoch {k}, no improvement in the last {self.patience} epochs.")
- break
- if err < 1:
- break
- print('Model has fitted.\n')
- return self.best_w
- def predict(self, X_test):
- self.x_test = X_test.copy()
- ell_test = len(self.x_test)
- self.x_test['bias'] = np.ones(ell_test)
- predictions = np.sign(np.dot(self.x_test.values, self.best_w))
- return predictions
- class SVM:
- def __init__(self, etha=0.01, alpha=0.1, epochs=1000, patience=25):
- self._epochs = epochs
- self._etha = etha
- self._alpha = alpha
- self._w = None
- self.history_w = []
- self.train_errors = None
- # self.val_errors = None
- self.train_loss = None
- # self.val_loss = None
- self.best_err = float('inf')
- self.best_w = None
- self.patience = patience
- def bias(self, b):
- ones_column = np.ones((b.shape[0], 1))
- return np.hstack((b, ones_column))
- def hinge_margin(self, w, x, y):
- return max(0, 1 - y * np.dot(x, w))
- def soft_margin(self, w, x, y, alpha):
- return self.hinge_margin(w, x, y) + alpha * np.dot(w, w)
- def fit(self, X_train, Y_train):
- print('\nModel is fitting:\n...')
- X_train = self.bias(X_train)
- # X_val = self.bias(X_val)
- w = np.zeros(X_train.shape[1])
- Y_train = np.copy(Y_train)
- new_w = [w.copy()]
- train_errors = []
- val_errors = []
- train_loss = []
- val_loss = []
- for epoch in range(self._epochs):
- pom_train = 0
- pom_val = 0
- vtr_train = 0
- vtr_val = 0
- for i, x in enumerate(X_train):
- margin = Y_train[i] * np.dot(w, X_train[i])
- if margin >= 1:
- w = w - self._etha * self._alpha * w / self._epochs
- # vtr_train += self.soft_margin(w, X_train[i], Y_train[i], self._alpha)
- else:
- w = w + self._etha * (Y_train[i] * X_train[i] - self._alpha * w / self._epochs)
- pom_train += 1
- vtr_train += self.soft_margin(w, X_train[i], Y_train[i], self._alpha)
- new_w.append(w.copy())
- # for i, x in enumerate(X_val):
- # vtr_val += self.soft_margin(w, X_val[i], Y_val[i], self._alpha)
- # pom_val += (Y_val[i] * np.dot(w, X_val[i]) < 1).astype(int)
- train_errors.append(pom_train)
- # val_errors.append(pom_val)
- train_loss.append(vtr_train)
- # val_loss.append(vtr_val)
- if pom_train < 1:
- break
- if pom_train < self.best_err:
- self.best_err = pom_train
- self.best_w = np.copy(w)
- last_improvement = epoch
- elif epoch - last_improvement >= self.patience:
- print(f"Early stopping at epoch {epoch}, no improvement in the last {self.patience} epochs.")
- break
- self._w = self.best_w
- self.history_w = np.array(new_w)
- self.train_errors = train_errors
- # self.val_errors = val_errors
- self.train_loss = train_loss
- # self.val_loss = val_loss
- print('Model has fitted.\n')
- def predict(self, X):
- X_b = self.bias(X)
- y_pred = np.sign(np.dot(X_b, self._w))
- return y_pred
- print('Лінінйно-роздільна вибірка:\n')
- # Створіть датасет з трьома лінійно роздільними класами
- X, y = make_classification(
- n_samples=300, # Загальна кількість зразків
- n_features=4, # Кількість ознак
- n_classes=3, # Кількість класів
- n_clusters_per_class=1, # Кількість кластерів для кожного класу
- class_sep=2.0, # Відстань між центрами кластерів (роздільність)
- random_state=42 # Фіксація генерації випадкових чисел для відтворюваності
- )
- # Створіть DataFrame для зручності роботи з даними
- df = pd.DataFrame(data=X, columns=[f'F{i+1}' for i in range(4)])
- df['target'] = y
- X = df.drop(columns=['target'])
- Y = df.target
- pca = PCA(n_components=2)
- X = pd.DataFrame(pca.fit_transform(X), columns=['F1', 'F2'], index=X.index)
- sns.pairplot(pd.concat([X, Y], axis=1), hue='target', palette='dark')
- plt.show()
- learning_rate = 0.01
- x, y = [], []
- x_svm, y_svm = [], []
- sk_x, sk_y = [], []
- sk_x_svm, sk_y_svm = [], []
- x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
- accuracy_svm, pr_svm = [], []
- accuracy_reg, pr_reg = [], []
- sk_accuracy_svm, pr_sk_svm = [], []
- sk_accuracy_reg, pr_sk_reg = [], []
- for class_ in np.unique(Y):
- print(f'Відокремлення класу {class_}')
- x_train_v1, x_test_v1, y_train_v1, y_test_v1 = x_train.copy(), x_test.copy(), y_train.copy(), y_test.copy()
- y_train_v1 = y_train_v1.apply(lambda x: 1.0 if x == class_ else -1.0)
- y_test_v1 = y_test_v1.apply(lambda x: 1.0 if x == class_ else -1.0)
- sns.pairplot(pd.concat([x_train_v1, y_train_v1], axis=1), hue='target', palette='dark')
- plt.show()
- model_svm = SVM()
- model_svm.fit(x_train_v1, y_train_v1)
- pred_test = model_svm.predict(x_test_v1)
- pred_train = model_svm.predict(x_train_v1)
- w_svm = model_svm._w
- err_svm = model_svm.train_errors
- epochs_svm = range(1, len(err_svm)+1)
- x1_svm = np.array([X.iloc[:, 0].min(), X.iloc[:, 0].max()])
- x2_svm = np.array([(-w_svm[0] * x1_svm[0] - w_svm[2]) / w_svm[1], (-w_svm[0] * x1_svm[1] - w_svm[2]) / w_svm[1]])
- x_svm.append(x1_svm)
- y_svm.append(x2_svm)
- plt.figure(figsize=(8, 6))
- plt.title(f'SVM Відокремлення класу {class_}')
- plt.plot(epochs_svm, np.array(err_svm)/len(x_train_v1)*100)
- plt.ylabel('Відсоток помилок на тренувальній вибірці')
- plt.xlabel('Кількість епох')
- plt.show()
- accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], model_svm.predict(x_train_v1.loc[y_train_v1 == 1.0]))
- print(f"SVM Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
- # print(classification_report(y_train_v1, model_svm.predict(x_train_v1)))
- report = classification_report(y_train_v1, model_svm.predict(x_train_v1))
- precision_tr = float(report.split('1.0 ')[2].split()[2])
- # print(precision_tr)
- accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], model_svm.predict(x_test_v1.loc[y_test_v1 == 1.0]))
- print(f"SVM Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
- accuracy_svm.append((accuracy_tr, accuracy_t))
- # print(classification_report(y_test_v1, model_svm.predict(x_test_v1)))
- report = classification_report(y_test_v1, model_svm.predict(x_test_v1))
- precision_t = float(report.split('1.0 ')[2].split()[2])
- pr_svm.append((precision_tr, precision_t))
- plt.figure(figsize=(8, 6))
- plt.title(f'SVM Тренувальна вибірка(відокремлення класу {class_})')
- y_wrong = y_train_v1.loc[(y_train_v1 != pred_train)]
- x_wrong = x_train_v1.loc[(y_train_v1 != pred_train)]
- for i in np.unique(y_test_v1):
- plt.scatter(x_train_v1.loc[y_train_v1 == i, x_test_v1.columns[0]], x_train_v1.loc[y_train_v1 == i, x_train_v1.columns[1]], label=i)
- plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
- x1_svm = np.array([x_train_v1.iloc[:, 0].min(), x_train_v1.iloc[:, 0].max()])
- x2_svm = np.array([(-w_svm[0] * x1_svm[0] - w_svm[2]) / w_svm[1], (-w_svm[0] * x1_svm[1] - w_svm[2]) / w_svm[1]])
- plt.plot(x1_svm, x2_svm, c='purple')
- plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
- plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--')
- plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
- plt.legend()
- plt.show()
- plt.figure(figsize=(8, 6))
- plt.title(f'SVM Тестова вибірка(відокремлення класу {class_})')
- # colors = ['violet' if y == -1 else 'orange' if y == 1 else 'black' for y in pred_test]
- y_wrong = y_test_v1.loc[(y_test_v1 != pred_test)]
- x_wrong = x_test_v1.loc[(y_test_v1 != pred_test)]
- for i in np.unique(y_test_v1):
- plt.scatter(x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[0]], x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[1]], label=i)
- plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
- x1_svm = np.array([x_test_v1.iloc[:, 0].min(), x_test_v1.iloc[:, 0].max()])
- x2_svm = np.array([(-w_svm[0] * x1_svm[0] - w_svm[2]) / w_svm[1], (-w_svm[0] * x1_svm[1] - w_svm[2]) / w_svm[1]])
- plt.plot(x1_svm, x2_svm, c='purple')
- plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
- plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--')
- plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
- plt.legend()
- plt.show()
- lr = Logistic_Regression(learning_rate=learning_rate, epochs=10000, L2=False, patience=100)
- w = lr.fit(x_train_v1, y_train_v1)
- # print(w)
- pred_test = lr.predict(x_test_v1)
- pred_train = lr.predict(x_train_v1)
- epochs = lr.t
- epochs_err = lr.n
- x1 = [X.iloc[:, 0].min(), X.iloc[:, 0].max()]
- x2 = [(-w[0] * x1[0] - w[2]) / w[1], (-w[0] * x1[1] - w[2]) / w[1]]
- x.append(x1)
- y.append(x2)
- accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], lr.predict(x_train_v1.loc[y_train_v1 == 1.0]))
- print(f"LOG REG Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
- # print(classification_report(y_train_v1, lr.predict(x_train_v1)))
- report = classification_report(y_train_v1, lr.predict(x_train_v1))
- precision_tr = float(report.split('1.0 ')[2].split()[2])
- accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], lr.predict(x_test_v1.loc[y_test_v1 == 1.0]))
- print(f"LOG REG Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
- accuracy_reg.append((accuracy_tr, accuracy_t))
- # print(classification_report(y_test_v1, lr.predict(x_test_v1)))
- report = classification_report(y_test_v1, lr.predict(x_test_v1))
- precision_t = float(report.split('1.0 ')[2].split()[2])
- pr_reg.append((precision_tr, precision_t))
- plt.figure(figsize=(8, 6))
- plt.title(f'LOG REG Відокремлення класу {class_}')
- plt.plot(epochs, np.array(epochs_err)/len(x_train)*100)
- plt.ylabel('Відсоток помилок на тренувальній вибірці')
- plt.xlabel('Кількість епох')
- plt.show()
- plt.figure(figsize=(8, 6))
- plt.title(f'LOG REG Тренувальна вибірка(відокремлення класу {class_})')
- y_wrong = y_train_v1.loc[(y_train_v1 != pred_train)]
- x_wrong = x_train_v1.loc[(y_train_v1 != pred_train)]
- for i in np.unique(y_test_v1):
- plt.scatter(x_train_v1.loc[y_train_v1 == i, x_test_v1.columns[0]], x_train_v1.loc[y_train_v1 == i, x_train_v1.columns[1]], label=i)
- plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
- x1 = np.array([x_train_v1.iloc[:, 0].min(), x_train_v1.iloc[:, 0].max()])
- x2 = np.array([(-w[0] * x1[0] - w[2]) / w[1], (-w[0] * x1[1] - w[2]) / w[1]])
- plt.plot(x1, x2, c='purple')
- plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
- plt.legend()
- plt.show()
- plt.figure(figsize=(8, 6))
- plt.title(f'LOG REG Тестова вибірка(відокремлення класу {class_})')
- y_wrong = y_test_v1.loc[(y_test_v1 != pred_test)]
- x_wrong = x_test_v1.loc[(y_test_v1 != pred_test)]
- for i in np.unique(y_test_v1):
- plt.scatter(x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[0]], x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[1]], label=i)
- plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
- x1 = [x_test_v1.iloc[:, 0].min(), x_test_v1.iloc[:, 0].max()]
- x2 = [(-w[0] * x1[0] - w[2]) / w[1], (-w[0] * x1[1] - w[2]) / w[1]]
- plt.plot(x1, x2, c='purple')
- plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
- plt.legend()
- plt.show()
- # Лінійний SVM
- svm_classifier = SVC(kernel='linear', max_iter=1000)
- svm_classifier.fit(x_train_v1, y_train_v1)
- logistic_reg = LogisticRegression(max_iter=1000)
- logistic_reg.fit(x_train_v1, y_train_v1)
- w_log_reg = logistic_reg.coef_[0]
- b_log_reg = logistic_reg.intercept_[0]
- w_svm = svm_classifier.coef_[0]
- b_svm = svm_classifier.intercept_[0]
- accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], svm_classifier.predict(x_train_v1.loc[y_train_v1 == 1.0]))
- print(f"SVM sklearn Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
- # print(classification_report(y_train_v1, svm_classifier.predict(x_train_v1)))
- report = classification_report(y_train_v1, svm_classifier.predict(x_train_v1))
- precision_tr = float(report.split('1.0 ')[2].split()[2])
- accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], svm_classifier.predict(x_test_v1.loc[y_test_v1 == 1.0]))
- print(f"SVM sklearn Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
- sk_accuracy_svm.append((accuracy_tr, accuracy_t))
- # print(classification_report(y_test_v1, svm_classifier.predict(x_test_v1)))
- report = classification_report(y_test_v1, svm_classifier.predict(x_test_v1))
- precision_t = float(report.split('1.0 ')[2].split()[2])
- pr_sk_svm.append((precision_tr, precision_t))
- accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], logistic_reg.predict(x_train_v1.loc[y_train_v1 == 1.0]))
- print(f"LOG REG sklearn Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
- # print(classification_report(y_train_v1, logistic_reg.predict(x_train_v1)))
- report = classification_report(y_train_v1, logistic_reg.predict(x_train_v1))
- precision_tr = float(report.split('1.0 ')[2].split()[2])
- accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], logistic_reg.predict(x_test_v1.loc[y_test_v1 == 1.0]))
- print(f"LOG REG sklearn Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
- sk_accuracy_reg.append((accuracy_tr, accuracy_t))
- # print(classification_report(y_test_v1, logistic_reg.predict(x_test_v1)))
- report = classification_report(y_test_v1, logistic_reg.predict(x_test_v1))
- precision_t = float(report.split('1.0 ')[2].split()[2])
- pr_sk_reg.append((precision_tr, precision_t))
- # Візуалізація прямих для лінійної логістичної регресії та лінійного SVM
- # plt.figure(figsize=(8, 6))
- # for i in np.unique(Y):
- # plt.scatter(X.loc[Y == i, X.columns[0]], X.loc[Y == i, X.columns[1]], label=i)
- plt.figure(figsize=(8, 6))
- plt.title(f'Sklearn Тренувальна вибірка(відокремлення класу {class_})')
- y_wrong = y_train_v1.loc[(y_train_v1 != logistic_reg.predict(x_train_v1))]
- x_wrong = x_train_v1.loc[(y_train_v1 != logistic_reg.predict(x_train_v1))]
- y_wrong_svm_sk = y_train_v1.loc[(y_train_v1 != svm_classifier.predict(x_train_v1))]
- x_wrong_svm_sk = x_train_v1.loc[(y_train_v1 != svm_classifier.predict(x_train_v1))]
- for i in np.unique(y_test_v1):
- plt.scatter(x_train_v1.loc[y_train_v1 == i, x_test_v1.columns[0]], x_train_v1.loc[y_train_v1 == i, x_train_v1.columns[1]], label=i)
- plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black', label='log_reg error')
- plt.scatter(x_wrong_svm_sk.iloc[:, 0], x_wrong_svm_sk.iloc[:, 1], c='r', marker='v', label='svm error', alpha=0.5)
- x1_log_reg = np.array([X.iloc[:, 0].min(), X.iloc[:, 0].max()])
- x2_log_reg = (-w_log_reg[0] / w_log_reg[1]) * x1_log_reg - (b_log_reg / w_log_reg[1])
- sk_x.append(x1_log_reg)
- sk_y.append(x2_log_reg)
- x1_svm = np.array([X.iloc[:, 0].min(), X.iloc[:, 0].max()])
- x2_svm = (-w_svm[0] / w_svm[1]) * x1_svm - (b_svm / w_svm[1])
- sk_x_svm.append(x1_svm)
- sk_y_svm.append(x2_svm)
- plt.plot(x1_svm, x2_svm, c='purple', linestyle='-.', label='sklearn svm')
- plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
- plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--')
- plt.plot(x1_log_reg, x2_log_reg, c='purple', label='sklearn log_reg')
- plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
- plt.legend()
- plt.show()
- plt.figure(figsize=(8, 6))
- plt.title(f'Sklearn Тестова вибірка(відокремлення класу {class_})')
- y_wrong = y_test_v1.loc[(y_test_v1 != logistic_reg.predict(x_test_v1))]
- x_wrong = x_test_v1.loc[(y_test_v1 != logistic_reg.predict(x_test_v1))]
- y_wrong_svm_sk = y_test_v1.loc[(y_test_v1 != svm_classifier.predict(x_test_v1))]
- x_wrong_svm_sk = x_test_v1.loc[(y_test_v1 != svm_classifier.predict(x_test_v1))]
- for i in np.unique(y_test_v1):
- plt.scatter(x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[0]], x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[1]], label=i)
- plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black', label='log_reg error')
- plt.scatter(x_wrong_svm_sk.iloc[:, 0], x_wrong_svm_sk.iloc[:, 1], c='r', marker='v', label='svm error', alpha=0.5)
- plt.plot(x1_svm, x2_svm, c='purple', linestyle='-.', label='sklearn svm')
- plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
- plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--')
- plt.plot(x1_log_reg, x2_log_reg, c='purple', label='sklearn log_reg')
- plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
- plt.legend()
- plt.show()
- len_t = len(y_test)
- len_tr = len(y_train)
- accuracy_tr_svm = 0
- accuracy_t_svm = 0
- accuracy_tr_reg = 0
- accuracy_t_reg = 0
- pr_tr_svm = 0
- pr_t_svm = 0
- pr_tr_reg = 0
- pr_t_reg = 0
- sk_accuracy_tr_svm = 0
- sk_accuracy_t_svm = 0
- sk_accuracy_tr_reg = 0
- sk_accuracy_t_reg = 0
- sk_pr_tr_svm = 0
- sk_pr_t_svm = 0
- sk_pr_tr_reg = 0
- sk_pr_t_reg = 0
- for i in np.unique(Y):
- len_test = (len(y_test.loc[y_test == i]))
- len_train = (len(y_train.loc[y_train == i]))
- accuracy_tr_svm += len_train / len_tr * (accuracy_svm[i][0]) * 100
- accuracy_t_svm += len_test / len_t * (accuracy_svm[i][1]) * 100
- sk_accuracy_tr_svm += len_train / len_tr * (sk_accuracy_svm[i][0]) * 100
- sk_accuracy_t_svm += len_test / len_t * (sk_accuracy_svm[i][1]) * 100
- pr_tr_svm += len_train / len_tr * (pr_svm[i][0]) * 100
- pr_t_svm += len_test / len_t * (pr_svm[i][1]) * 100
- sk_pr_tr_svm += len_train / len_tr * (pr_sk_svm[i][0]) * 100
- sk_pr_t_svm += len_test / len_t * (pr_sk_svm[i][1]) * 100
- accuracy_tr_reg += len_train / len_tr * (accuracy_reg[i][0]) * 100
- accuracy_t_reg += len_test / len_t * (accuracy_reg[i][1]) * 100
- sk_accuracy_tr_reg += len_train / len_tr * (sk_accuracy_reg[i][0]) * 100
- sk_accuracy_t_reg += len_test / len_t * (sk_accuracy_reg[i][1]) * 100
- pr_tr_reg += len_train / len_tr * (pr_reg[i][0]) * 100
- pr_t_reg += len_test / len_t * (pr_reg[i][1]) * 100
- sk_pr_tr_reg += len_train / len_tr * (pr_sk_reg[i][0]) * 100
- sk_pr_t_reg += len_test / len_t * (pr_sk_reg[i][1]) * 100
- print(f'SVM Train Recall: {round(accuracy_tr_svm, 2)}%')
- print(f'SVM Test Recall: {round(accuracy_t_svm, 2)}%\n')
- print(f'LOG REG Train Recall: {round(accuracy_tr_reg, 2)}%')
- print(f'LOG REG Test Recall: {round(accuracy_t_reg, 2)}%')
- print(f'\nSVM sklearn Train Recall: {round(sk_accuracy_tr_svm, 2)}%')
- print(f'SVM sklearn Test Recall: {round(sk_accuracy_t_svm, 2)}%\n')
- print(f'LOG REG sklearn Train Recall: {round(sk_accuracy_tr_reg, 2)}%')
- print(f'LOG REG sklearn Test Recall: {round(sk_accuracy_t_reg, 2)}%')
- print(f'\nSVM Train F1-score: {round(pr_tr_svm, 2)}%')
- print(f'SVM Test F1-score: {round(pr_t_svm, 2)}%\n')
- print(f'LOG REG Train F1-score: {round(pr_tr_reg, 2)}%')
- print(f'LOG REG Test F1-score: {round(pr_t_reg, 2)}%')
- print(f'\nSVM sklearn Train F1-score: {round(sk_pr_tr_svm, 2)}%')
- print(f'SVM sklearn Test F1-score: {round(sk_pr_t_svm, 2)}%\n')
- print(f'LOG REG sklearn Train F1-score: {round(sk_pr_tr_reg, 2)}%')
- print(f'LOG REG sklearn Test F1-score: {round(sk_pr_t_reg, 2)}%')
- x = np.array(x)
- y = np.array(y)
- x_svm = np.array(x_svm)
- y_svm = np.array(y_svm)
- sk_x = np.array(sk_x)
- sk_y = np.array(sk_y)
- sk_x_svm = np.array(sk_x_svm)
- sk_y_svm = np.array(sk_y_svm)
- plt.figure(figsize=(15, 10))
- plt.title('Порівняння всіх методів')
- for i in np.unique(Y):
- plt.scatter(X.loc[Y == i, X.columns[0]], X.loc[Y == i, X.columns[1]], label=f'class{i}')
- colors = ['b', 'orange', 'g']
- for i in range(len(x)):
- plt.plot(x[i], y[i], c=f'{colors[i]}', label=f'LOG_REG(class{i})')
- plt.plot(x_svm[i], y_svm[i], c=f'{colors[i]}', linestyle='--', label=f'SVM(class{i})')
- plt.plot(sk_x[i], sk_y[i], c=f'{colors[i]}', label=f'LOG_REG_sk(class{i})', linestyle=':', lw=1.5)
- plt.plot(sk_x_svm[i], sk_y_svm[i], c=f'{colors[i]}', linestyle='-.', label=f'SVM_sk(class{i})', lw=1.5)
- plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
- plt.legend()
- plt.show()
- print('Лінінйно-нероздільна вибірка:\n')
- # Створюємо датасет з трьома лінійно нероздільними класами
- X, y = make_classification(
- n_samples=500, # Загальна кількість зразків
- n_features=4, # Кількість ознак
- n_classes=3, # Кількість класів
- n_clusters_per_class=1, # Кількість кластерів для кожного класу
- class_sep=.7, # Зменшена роздільність між класами
- random_state=18 # Фіксація генерації випадкових чисел для відтворюваності
- )
- # Створіть DataFrame для зручності роботи з даними
- df = pd.DataFrame(data=X, columns=[f'F{i+1}' for i in range(4)])
- df['target'] = y
- X = df.drop(columns=['target'])
- Y = df.target
- pca = PCA(n_components=2)
- X = pd.DataFrame(pca.fit_transform(X), columns=['F1', 'F2'], index=X.index)
- sns.pairplot(pd.concat([X, Y], axis=1), hue='target', palette='dark')
- plt.show()
- learning_rate = 0.01
- x, y = [], []
- x_svm, y_svm = [], []
- sk_x, sk_y = [], []
- sk_x_svm, sk_y_svm = [], []
- x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
- accuracy_svm, pr_svm = [], []
- accuracy_reg, pr_reg = [], []
- sk_accuracy_svm, pr_sk_svm = [], []
- sk_accuracy_reg, pr_sk_reg = [], []
- for class_ in np.unique(Y):
- print(f'Відокремлення класу {class_}')
- x_train_v1, x_test_v1, y_train_v1, y_test_v1 = x_train.copy(), x_test.copy(), y_train.copy(), y_test.copy()
- y_train_v1 = y_train_v1.apply(lambda x: 1.0 if x == class_ else -1.0)
- y_test_v1 = y_test_v1.apply(lambda x: 1.0 if x == class_ else -1.0)
- sns.pairplot(pd.concat([x_train_v1, y_train_v1], axis=1), hue='target', palette='dark')
- plt.show()
- model_svm = SVM()
- model_svm.fit(x_train_v1, y_train_v1)
- pred_test = model_svm.predict(x_test_v1)
- pred_train = model_svm.predict(x_train_v1)
- w_svm = model_svm._w
- err_svm = model_svm.train_errors
- epochs_svm = range(1, len(err_svm)+1)
- x1_svm = np.array([X.iloc[:, 0].min(), X.iloc[:, 0].max()])
- x2_svm = np.array([(-w_svm[0] * x1_svm[0] - w_svm[2]) / w_svm[1], (-w_svm[0] * x1_svm[1] - w_svm[2]) / w_svm[1]])
- x_svm.append(x1_svm)
- y_svm.append(x2_svm)
- plt.figure(figsize=(8, 6))
- plt.title(f'SVM Відокремлення класу {class_}')
- plt.plot(epochs_svm, np.array(err_svm)/len(x_train_v1)*100)
- plt.ylabel('Відсоток помилок на тренувальній вибірці')
- plt.xlabel('Кількість епох')
- plt.show()
- accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], model_svm.predict(x_train_v1.loc[y_train_v1 == 1.0]))
- print(f"SVM Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
- print(classification_report(y_train_v1, model_svm.predict(x_train_v1)))
- report = classification_report(y_train_v1, model_svm.predict(x_train_v1))
- precision_tr = float(report.split('1.0 ')[2].split()[2])
- accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], model_svm.predict(x_test_v1.loc[y_test_v1 == 1.0]))
- print(f"SVM Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
- accuracy_svm.append((accuracy_tr, accuracy_t))
- # print(classification_report(y_test_v1, model_svm.predict(x_test_v1)))
- report = classification_report(y_test_v1, model_svm.predict(x_test_v1))
- precision_t = float(report.split('1.0 ')[2].split()[2])
- pr_svm.append((precision_tr, precision_t))
- plt.figure(figsize=(8, 6))
- plt.title(f'SVM Тренувальна вибірка(відокремлення класу {class_})')
- y_wrong = y_train_v1.loc[(y_train_v1 != pred_train)]
- x_wrong = x_train_v1.loc[(y_train_v1 != pred_train)]
- for i in np.unique(y_test_v1):
- plt.scatter(x_train_v1.loc[y_train_v1 == i, x_test_v1.columns[0]], x_train_v1.loc[y_train_v1 == i, x_train_v1.columns[1]], label=i)
- plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
- x1_svm = np.array([x_train_v1.iloc[:, 0].min(), x_train_v1.iloc[:, 0].max()])
- x2_svm = np.array([(-w_svm[0] * x1_svm[0] - w_svm[2]) / w_svm[1], (-w_svm[0] * x1_svm[1] - w_svm[2]) / w_svm[1]])
- plt.plot(x1_svm, x2_svm, c='purple')
- plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
- plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--')
- plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
- plt.legend()
- plt.show()
- plt.figure(figsize=(8, 6))
- plt.title(f'SVM Тестова вибірка(відокремлення класу {class_})')
- # colors = ['violet' if y == -1 else 'orange' if y == 1 else 'black' for y in pred_test]
- y_wrong = y_test_v1.loc[(y_test_v1 != pred_test)]
- x_wrong = x_test_v1.loc[(y_test_v1 != pred_test)]
- for i in np.unique(y_test_v1):
- plt.scatter(x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[0]], x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[1]], label=i)
- plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
- x1_svm = np.array([x_test_v1.iloc[:, 0].min(), x_test_v1.iloc[:, 0].max()])
- x2_svm = np.array([(-w_svm[0] * x1_svm[0] - w_svm[2]) / w_svm[1], (-w_svm[0] * x1_svm[1] - w_svm[2]) / w_svm[1]])
- plt.plot(x1_svm, x2_svm, c='purple')
- plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
- plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--')
- plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
- plt.legend()
- plt.show()
- lr = Logistic_Regression(learning_rate=learning_rate, epochs=10000, L2=False, patience=100)
- w = lr.fit(x_train_v1, y_train_v1)
- # print(w)
- pred_test = lr.predict(x_test_v1)
- pred_train = lr.predict(x_train_v1)
- epochs = lr.t
- epochs_err = lr.n
- x1 = [X.iloc[:, 0].min(), X.iloc[:, 0].max()]
- x2 = [(-w[0] * x1[0] - w[2]) / w[1], (-w[0] * x1[1] - w[2]) / w[1]]
- x.append(x1)
- y.append(x2)
- accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], lr.predict(x_train_v1.loc[y_train_v1 == 1.0]))
- print(f"LOG REG Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
- # print(classification_report(y_train_v1, lr.predict(x_train_v1)))
- report = classification_report(y_train_v1, lr.predict(x_train_v1))
- precision_tr = float(report.split('1.0 ')[2].split()[2])
- accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], lr.predict(x_test_v1.loc[y_test_v1 == 1.0]))
- print(f"LOG REG Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
- accuracy_reg.append((accuracy_tr, accuracy_t))
- # print(classification_report(y_test_v1, lr.predict(x_test_v1)))
- report = classification_report(y_test_v1, lr.predict(x_test_v1))
- precision_t = float(report.split('1.0 ')[2].split()[2])
- pr_reg.append((precision_tr, precision_t))
- plt.figure(figsize=(8, 6))
- plt.title(f'LOG REG Відокремлення класу {class_}')
- plt.plot(epochs, np.array(epochs_err)/len(x_train)*100)
- plt.ylabel('Відсоток помилок на тренувальній вибірці')
- plt.xlabel('Кількість епох')
- plt.show()
- plt.figure(figsize=(8, 6))
- plt.title(f'LOG REG Тренувальна вибірка(відокремлення класу {class_})')
- y_wrong = y_train_v1.loc[(y_train_v1 != pred_train)]
- x_wrong = x_train_v1.loc[(y_train_v1 != pred_train)]
- for i in np.unique(y_test_v1):
- plt.scatter(x_train_v1.loc[y_train_v1 == i, x_test_v1.columns[0]], x_train_v1.loc[y_train_v1 == i, x_train_v1.columns[1]], label=i)
- plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
- x1 = np.array([x_train_v1.iloc[:, 0].min(), x_train_v1.iloc[:, 0].max()])
- x2 = np.array([(-w[0] * x1[0] - w[2]) / w[1], (-w[0] * x1[1] - w[2]) / w[1]])
- plt.plot(x1, x2, c='purple')
- plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
- plt.legend()
- plt.show()
- plt.figure(figsize=(8, 6))
- plt.title(f'LOG REG Тестова вибірка(відокремлення класу {class_})')
- y_wrong = y_test_v1.loc[(y_test_v1 != pred_test)]
- x_wrong = x_test_v1.loc[(y_test_v1 != pred_test)]
- for i in np.unique(y_test_v1):
- plt.scatter(x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[0]], x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[1]], label=i)
- plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black')
- x1 = [x_test_v1.iloc[:, 0].min(), x_test_v1.iloc[:, 0].max()]
- x2 = [(-w[0] * x1[0] - w[2]) / w[1], (-w[0] * x1[1] - w[2]) / w[1]]
- plt.plot(x1, x2, c='purple')
- plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
- plt.legend()
- plt.show()
- # Лінійний SVM
- svm_classifier = SVC(kernel='linear', max_iter=1000)
- svm_classifier.fit(x_train_v1, y_train_v1)
- logistic_reg = LogisticRegression(max_iter=1000)
- logistic_reg.fit(x_train_v1, y_train_v1)
- w_log_reg = logistic_reg.coef_[0]
- b_log_reg = logistic_reg.intercept_[0]
- w_svm = svm_classifier.coef_[0]
- b_svm = svm_classifier.intercept_[0]
- accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], svm_classifier.predict(x_train_v1.loc[y_train_v1 == 1.0]))
- print(f"SVM sklearn Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
- # print(classification_report(y_train_v1, svm_classifier.predict(x_train_v1)))
- report = classification_report(y_train_v1, svm_classifier.predict(x_train_v1))
- precision_tr = float(report.split('1.0 ')[2].split()[2])
- accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], svm_classifier.predict(x_test_v1.loc[y_test_v1 == 1.0]))
- print(f"SVM sklearn Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
- sk_accuracy_svm.append((accuracy_tr, accuracy_t))
- # print(classification_report(y_test_v1, svm_classifier.predict(x_test_v1)))
- report = classification_report(y_test_v1, svm_classifier.predict(x_test_v1))
- precision_t = float(report.split('1.0 ')[2].split()[2])
- pr_sk_svm.append((precision_tr, precision_t))
- accuracy_tr = accuracy_score(y_train_v1.loc[y_train_v1 == 1.0], logistic_reg.predict(x_train_v1.loc[y_train_v1 == 1.0]))
- print(f"LOG REG sklearn Train Recall for class {class_}: {round(accuracy_tr*100, 2)}%")
- # print(classification_report(y_train_v1, logistic_reg.predict(x_train_v1)))
- report = classification_report(y_train_v1, logistic_reg.predict(x_train_v1))
- precision_tr = float(report.split('1.0 ')[2].split()[2])
- accuracy_t = accuracy_score(y_test_v1.loc[y_test_v1 == 1.0], logistic_reg.predict(x_test_v1.loc[y_test_v1 == 1.0]))
- print(f"LOG REG sklearn Test Recall for class {class_}: {round(accuracy_t*100, 2)}%")
- sk_accuracy_reg.append((accuracy_tr, accuracy_t))
- # print(classification_report(y_test_v1, logistic_reg.predict(x_test_v1)))
- report = classification_report(y_test_v1, logistic_reg.predict(x_test_v1))
- precision_t = float(report.split('1.0 ')[2].split()[2])
- pr_sk_reg.append((precision_tr, precision_t))
- # Візуалізація прямих для лінійної логістичної регресії та лінійного SVM
- # plt.figure(figsize=(8, 6))
- # for i in np.unique(Y):
- # plt.scatter(X.loc[Y == i, X.columns[0]], X.loc[Y == i, X.columns[1]], label=i)
- plt.figure(figsize=(8, 6))
- plt.title(f'Sklearn Тренувальна вибірка(відокремлення класу {class_})')
- y_wrong = y_train_v1.loc[(y_train_v1 != logistic_reg.predict(x_train_v1))]
- x_wrong = x_train_v1.loc[(y_train_v1 != logistic_reg.predict(x_train_v1))]
- y_wrong_svm_sk = y_train_v1.loc[(y_train_v1 != svm_classifier.predict(x_train_v1))]
- x_wrong_svm_sk = x_train_v1.loc[(y_train_v1 != svm_classifier.predict(x_train_v1))]
- for i in np.unique(y_test_v1):
- plt.scatter(x_train_v1.loc[y_train_v1 == i, x_test_v1.columns[0]], x_train_v1.loc[y_train_v1 == i, x_train_v1.columns[1]], label=i)
- plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black', label='log_reg error')
- plt.scatter(x_wrong_svm_sk.iloc[:, 0], x_wrong_svm_sk.iloc[:, 1], c='r', marker='v', label='svm error', alpha=0.5)
- x1_log_reg = np.array([X.iloc[:, 0].min(), X.iloc[:, 0].max()])
- x2_log_reg = (-w_log_reg[0] / w_log_reg[1]) * x1_log_reg - (b_log_reg / w_log_reg[1])
- sk_x.append(x1_log_reg)
- sk_y.append(x2_log_reg)
- x1_svm = np.array([X.iloc[:, 0].min(), X.iloc[:, 0].max()])
- x2_svm = (-w_svm[0] / w_svm[1]) * x1_svm - (b_svm / w_svm[1])
- sk_x_svm.append(x1_svm)
- sk_y_svm.append(x2_svm)
- plt.plot(x1_svm, x2_svm, c='purple', linestyle='-.', label='sklearn svm')
- plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
- plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--', label='sklearn log_reg')
- plt.plot(x1_log_reg, x2_log_reg, c='purple')
- plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
- plt.legend()
- plt.show()
- plt.figure(figsize=(8, 6))
- plt.title(f'Sklearn Тестова вибірка(відокремлення класу {class_})')
- y_wrong = y_test_v1.loc[(y_test_v1 != logistic_reg.predict(x_test_v1))]
- x_wrong = x_test_v1.loc[(y_test_v1 != logistic_reg.predict(x_test_v1))]
- y_wrong_svm_sk = y_test_v1.loc[(y_test_v1 != svm_classifier.predict(x_test_v1))]
- x_wrong_svm_sk = x_test_v1.loc[(y_test_v1 != svm_classifier.predict(x_test_v1))]
- for i in np.unique(y_test_v1):
- plt.scatter(x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[0]], x_test_v1.loc[y_test_v1 == i, x_test_v1.columns[1]], label=i)
- plt.scatter(x_wrong.iloc[:, 0], x_wrong.iloc[:, 1], c='black', label='log_reg error')
- plt.scatter(x_wrong_svm_sk.iloc[:, 0], x_wrong_svm_sk.iloc[:, 1], c='r', marker='v', label='svm error', alpha=0.5)
- plt.plot(x1_svm, x2_svm, c='purple', linestyle='-.', label='sklearn svm')
- plt.plot(x1_svm, x2_svm+1, c='purple', linestyle='--')
- plt.plot(x1_svm, x2_svm-1, c='purple', linestyle='--')
- plt.plot(x1_log_reg, x2_log_reg, c='purple', label='sklearn log_reg')
- plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
- plt.legend()
- plt.show()
- len_t = len(y_test)
- len_tr = len(y_train)
- accuracy_tr_svm = 0
- accuracy_t_svm = 0
- accuracy_tr_reg = 0
- accuracy_t_reg = 0
- pr_tr_svm = 0
- pr_t_svm = 0
- pr_tr_reg = 0
- pr_t_reg = 0
- sk_accuracy_tr_svm = 0
- sk_accuracy_t_svm = 0
- sk_accuracy_tr_reg = 0
- sk_accuracy_t_reg = 0
- sk_pr_tr_svm = 0
- sk_pr_t_svm = 0
- sk_pr_tr_reg = 0
- sk_pr_t_reg = 0
- for i in np.unique(Y):
- len_test = (len(y_test.loc[y_test == i]))
- len_train = (len(y_train.loc[y_train == i]))
- accuracy_tr_svm += len_train / len_tr * (accuracy_svm[i][0]) * 100
- accuracy_t_svm += len_test / len_t * (accuracy_svm[i][1]) * 100
- sk_accuracy_tr_svm += len_train / len_tr * (sk_accuracy_svm[i][0]) * 100
- sk_accuracy_t_svm += len_test / len_t * (sk_accuracy_svm[i][1]) * 100
- pr_tr_svm += len_train / len_tr * (pr_svm[i][0]) * 100
- pr_t_svm += len_test / len_t * (pr_svm[i][1]) * 100
- sk_pr_tr_svm += len_train / len_tr * (pr_sk_svm[i][0]) * 100
- sk_pr_t_svm += len_test / len_t * (pr_sk_svm[i][1]) * 100
- accuracy_tr_reg += len_train / len_tr * (accuracy_reg[i][0]) * 100
- accuracy_t_reg += len_test / len_t * (accuracy_reg[i][1]) * 100
- sk_accuracy_tr_reg += len_train / len_tr * (sk_accuracy_reg[i][0]) * 100
- sk_accuracy_t_reg += len_test / len_t * (sk_accuracy_reg[i][1]) * 100
- pr_tr_reg += len_train / len_tr * (pr_reg[i][0]) * 100
- pr_t_reg += len_test / len_t * (pr_reg[i][1]) * 100
- sk_pr_tr_reg += len_train / len_tr * (pr_sk_reg[i][0]) * 100
- sk_pr_t_reg += len_test / len_t * (pr_sk_reg[i][1]) * 100
- print(f'SVM Train Recall: {round(accuracy_tr_svm, 2)}%')
- print(f'SVM Test Recall: {round(accuracy_t_svm, 2)}%\n')
- print(f'LOG REG Train Recall: {round(accuracy_tr_reg, 2)}%')
- print(f'LOG REG Test Recall: {round(accuracy_t_reg, 2)}%')
- print(f'\nSVM sklearn Train Recall: {round(sk_accuracy_tr_svm, 2)}%')
- print(f'SVM sklearn Test Recall: {round(sk_accuracy_t_svm, 2)}%\n')
- print(f'LOG REG sklearn Train Recall: {round(sk_accuracy_tr_reg, 2)}%')
- print(f'LOG REG sklearn Test Recall: {round(sk_accuracy_t_reg, 2)}%')
- print(f'\nSVM Train F1-score: {round(pr_tr_svm, 2)}%')
- print(f'SVM Test F1-score: {round(pr_t_svm, 2)}%\n')
- print(f'LOG REG Train F1-score: {round(pr_tr_reg, 2)}%')
- print(f'LOG REG Test F1-score: {round(pr_t_reg, 2)}%')
- print(f'\nSVM sklearn Train F1-score: {round(sk_pr_tr_svm, 2)}%')
- print(f'SVM sklearn Test F1-score: {round(sk_pr_t_svm, 2)}%\n')
- print(f'LOG REG sklearn Train F1-score: {round(sk_pr_tr_reg, 2)}%')
- print(f'LOG REG sklearn Test F1-score: {round(sk_pr_t_reg, 2)}%')
- x = np.array(x)
- y = np.array(y)
- x_svm = np.array(x_svm)
- y_svm = np.array(y_svm)
- sk_x = np.array(sk_x)
- sk_y = np.array(sk_y)
- sk_x_svm = np.array(sk_x_svm)
- sk_y_svm = np.array(sk_y_svm)
- plt.figure(figsize=(15, 10))
- plt.title('Порівняння всіх методів')
- for i in np.unique(Y):
- plt.scatter(X.loc[Y == i, X.columns[0]], X.loc[Y == i, X.columns[1]], label=f'class{i}')
- colors = ['b', 'orange', 'g']
- for i in range(len(x)):
- plt.plot(x[i], y[i], c=f'{colors[i]}', label=f'LOG_REG(class{i})')
- plt.plot(x_svm[i], y_svm[i], c=f'{colors[i]}', linestyle='--', label=f'SVM(class{i})')
- plt.plot(sk_x[i], sk_y[i], c=f'{colors[i]}', label=f'LOG_REG_sk(class{i})', linestyle=':', lw=1.5)
- plt.plot(sk_x_svm[i], sk_y_svm[i], c=f'{colors[i]}', linestyle='-.', label=f'SVM_sk(class{i})', lw=1.5)
- plt.ylim(X.iloc[:, 1].min()-0.5, X.iloc[:, 1].max()+0.5)
- plt.legend()
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement