Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- import seaborn as sns
- from sklearn.preprocessing import LabelEncoder
- from sklearn.model_selection import train_test_split
- from sklearn.preprocessing import StandardScaler
- from sklearn.metrics import accuracy_score
- import random
- import matplotlib.pyplot as plt
- from sklearn.preprocessing import OneHotEncoder
- from scipy.optimize import fsolve
- from sklearn.decomposition import PCA
- import seaborn as sns
- from sklearn.datasets import make_classification
- from sklearn.svm import LinearSVC
- class SVM:
- def __init__(self, etha=0.01, alpha=0.1, epochs=1000, patience=10):
- self._epochs = epochs
- self._etha = etha
- self._alpha = alpha
- self._w = None
- self.history_w = []
- self.train_errors = None
- self.val_errors = None
- self.train_loss = None
- self.val_loss = None
- self.best_err = float('inf')
- self.best_w = None
- self.patience = patience
- def bias(self, b):
- ones_column = np.ones((b.shape[0], 1))
- return np.hstack((b, ones_column))
- def hinge_margin(self, w, x, y):
- return max(0, 1 - y * np.dot(x, w))
- def soft_margin(self, w, x, y, alpha):
- return self.hinge_margin(w, x, y) + alpha * np.dot(w, w)
- def fit(self, X_train, Y_train, X_val, Y_val):
- print('\nModel is fitting:\n...')
- X_train = self.bias(X_train)
- X_val = self.bias(X_val)
- w = np.zeros(X_train.shape[1])
- Y_train = np.copy(Y_train)
- new_w = [w.copy()]
- train_errors = []
- val_errors = []
- train_loss = []
- val_loss = []
- for epoch in range(self._epochs):
- pom_train = 0
- pom_val = 0
- vtr_train = 0
- vtr_val = 0
- for i, x in enumerate(X_train):
- margin = Y_train[i] * np.dot(w, X_train[i])
- if margin >= 1:
- w = w - self._etha * self._alpha * w / self._epochs
- else:
- w = w + self._etha * (Y_train[i] * X_train[i] - self._alpha * w / self._epochs)
- pom_train += 1
- vtr_train += self.soft_margin(w, X_train[i], Y_train[i], self._alpha)
- new_w.append(w.copy())
- for i, x in enumerate(X_val):
- vtr_val += self.soft_margin(w, X_val[i], Y_val[i], self._alpha)
- pom_val += (Y_val[i] * np.dot(w, X_val[i]) < 1).astype(int)
- train_errors.append(pom_train)
- val_errors.append(pom_val)
- train_loss.append(vtr_train)
- val_loss.append(vtr_val)
- if pom_train < 1:
- break
- if pom_train < self.best_err:
- self.best_err = pom_train
- self.best_w = np.copy(w)
- last_improvement = epoch
- elif epoch - last_improvement >= self.patience:
- print(f"Early stopping at epoch {epoch}, no improvement in the last {self.patience} epochs.")
- break
- self._w = self.best_w
- self.history_w = np.array(new_w)
- self.train_errors = train_errors
- self.val_errors = val_errors
- self.train_loss = train_loss
- self.val_loss = val_loss
- print('Model has fitted.\n')
- def predict(self, X):
- X_b = self.bias(X)
- y_pred = np.sign(np.dot(X_b, self._w))
- return y_pred
- def find_best_alpha(alphas, x_train, y_train, x_val, y_val):
- best_alpha = None
- best_error = float('inf')
- for alpha in alphas:
- model = SVM(alpha=alpha)
- model.fit(x_train, y_train, x_val, y_val)
- mean_val_error = np.mean(model.val_errors)
- if mean_val_error < best_error:
- best_error = mean_val_error
- best_alpha = alpha
- return best_alpha
- n_features = 5
- X, y = make_classification(
- n_samples=1001,
- n_features=n_features,
- n_classes=2,
- n_clusters_per_class=1,
- class_sep=2,
- random_state=42
- )
- df = pd.DataFrame(data=X, columns=[f'F{i+1}' for i in range(n_features)])
- df['target'] = y
- X = df.drop(columns=['target'])
- Y = df.target
- pca = PCA(n_components=2)
- X = pd.DataFrame(pca.fit_transform(X), columns=['F1', 'F2'], index=X.index)
- sns.pairplot(pd.concat([X, Y], axis=1), hue='target')
- plt.show()
- for class_ in np.unique(Y):
- Y = Y.apply(lambda x: 1.0 if x == class_ else -1.0)
- x_train, x_rem, y_train, y_rem = train_test_split(X, Y, test_size=0.2, random_state=42)
- x_test, x_val, y_test, y_val = train_test_split(x_rem, y_rem, test_size=0.5, random_state=42)
- w_svm = model_svm._w
- err_svm = model_svm.train_errors
- val_err = model_svm.val_errors
- tr_loss = model_svm.train_loss
- val_loss = model_svm.val_loss
- epochs_svm = range(1, len(err_svm)+1)
- print("Кількість помилок на кожній епохі")
- print(err_svm)
- print("Значення ваг")
- print(w_svm)
- plt.figure(figsize=(8, 6))
- for i in np.unique(Y):
- plt.scatter(X.loc[Y == i, X.columns[0]], X.loc[Y == i, X.columns[1]], label=i)
- x1_svm = np.array([X.iloc[:, 0].min(), X.iloc[:, 0].max()])
- x2_svm = np.array([(-w_svm[0] * x1_svm[0] - w_svm[2]) / w_svm[1], (-w_svm[0] * x1_svm[1] - w_svm[2]) / w_svm[1]])
- plt.title('Загальна вибірка')
- plt.plot(x1_svm, x2_svm, 'g')
- plt.plot(x1_svm, x2_svm+1, 'g--')
- plt.plot(x1_svm, x2_svm-1, 'g--')
- plt.ylim(X.F2.min()-1, X.F2.max()+1)
- plt.legend()
- plt.show()
- plt.figure(figsize=(10, 6))
- plt.plot(epochs_svm, np.array(tr_loss), label='train')
- plt.plot(epochs_svm, np.array(val_loss), label='validation')
- plt.legend()
- plt.grid()
- plt.ylabel('Втрати')
- plt.xlabel('Кількість епох')
- plt.show()
- accuracy = accuracy_score(y_train, pred_train)
- print(f"SVM Train Accuracy: {round(accuracy*100, 2)}%")
- accuracy = accuracy_score(y_test, pred_test)
- print(f"SVM Test Accuracy: {round(accuracy*100, 2)}%")
- accuracy = accuracy_score(y_val, pred_val)
- print(f"SVM Validation Accuracy: {round(accuracy*100, 2)}%")
- y_wrong = y_test.loc[y_test != pred_test].index
- plt.title("Тестова вибірка")
- colors = ['blue' if y == -1 else 'orange' if y == 1 else 'black' for y in pred_test]
- for i in np.unique(Y):
- plt.scatter(x_test.loc[(Y == i), X.columns[0]], x_test.loc[(Y == i), X.columns[1]], label=i)
- plt.scatter(x_test.loc[y_wrong, X.columns[0]], x_test.loc[y_wrong, X.columns[1]], c='black', label='error')
- plt.plot(x1_svm, x2_svm, 'g')
- plt.plot(x1_svm, x2_svm+1, 'g--')
- plt.plot(x1_svm, x2_svm-1, 'g--')
- plt.ylim(X.F2.min()-1, X.F2.max()+1)
- plt.legend()
- plt.show()
- y_wrong = y_train.loc[y_train != pred_train].index
- plt.title("Тренувальна вибірка")
- colors = ['blue' if y == -1 else 'orange' if y == 1 else 'black' for y in pred_test]
- for i in np.unique(Y):
- plt.scatter(x_train.loc[(Y == i), X.columns[0]], x_train.loc[(Y == i), X.columns[1]], label=i)
- plt.scatter(x_train.loc[y_wrong, X.columns[0]], x_train.loc[y_wrong, X.columns[1]], c='black', label='error')
- plt.plot(x1_svm, x2_svm, 'g')
- plt.plot(x1_svm, x2_svm+1, 'g--')
- plt.plot(x1_svm, x2_svm-1, 'g--')
- plt.ylim(X.F2.min()-1, X.F2.max()+1)
- plt.legend()
- plt.show()
- model = LinearSVC(max_iter=200, random_state=42)
- model.fit(x_train, y_train)
- plt.figure(figsize=(8, 6))
- for i in np.unique(Y):
- plt.scatter(X.loc[Y == i, X.columns[0]], X.loc[Y == i, X.columns[1]], label=i)
- x_min, x_max = X.iloc[:, 0].min() - 1, X.iloc[:, 0].max() + 1
- y_min, y_max = X.iloc[:, 1].min() - 1, X.iloc[:, 1].max() + 1
- xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
- Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
- Z = Z.reshape(xx.shape)
- plt.contourf(xx, yy, Z, alpha=0.4, cmap=plt.cm.coolwarm)
- plt.title('Sklearn_SVM')
- plt.legend()
- plt.show()
- pred_test = model.predict(x_test)
- pred_train = model.predict(x_train)
- pred_val = model.predict(x_val)
- accuracy = accuracy_score(y_train, pred_train)
- print(f"SVM Train Accuracy: {round(accuracy*100, 2)}%")
- accuracy = accuracy_score(y_test, pred_test)
- print(f"SVM Test Accuracy: {round(accuracy*100, 2)}%")
- accuracy = accuracy_score(y_val, pred_val)
- print(f"SVM Validation Accuracy: {round(accuracy*100, 2)}%")
- y_pred = model.predict(x_test)
- accuracy = accuracy_score(y_test, y_pred) * 100
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement