Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- import seaborn as sns
- from sklearn.preprocessing import LabelEncoder
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import accuracy_score
- import random
- import matplotlib.pyplot as plt
- from scipy.optimize import fsolve
- def polynomial_features(X, d):
- n_columns = 0
- for i in range(n_columns + 1, d + 2):
- n_columns += i
- x1 = X.iloc[:, 0]
- x2 = X.iloc[:, 1]
- features = pd.DataFrame(columns=[f'F{i + 1}' for i in range(n_columns)])
- for k in range(len(X)):
- feature_row = []
- for i in range(d + 1):
- for j in range(i + 1):
- feature_row.append((x1[k] ** (i - j)) * (x2[k] ** j))
- features.loc[k] = feature_row
- return features
- class Logistic_Regression:
- def __init__(self, epochs=1000, learning_rate=0.01, C=1.0, patience=100, L2=False):
- self.epochs = epochs
- self.learning_rate = learning_rate
- self.C = C
- self.patience = patience
- self.t = []
- self.n = []
- self.best_w = None
- self.best_err = float('inf')
- self.last_improvement = 0
- self.x_train = None
- self.x_test = None
- self.L2 = L2
- def sigmoid(self, M):
- return 1 / (1 + np.exp(-M))
- def diff(self, x, w, y):
- M = y * np.dot(x, w.T)
- d = (1 - self.sigmoid(M)) * y * x
- return d
- def fit(self, X_train, y_train):
- self.x_train = X_train.copy()
- len_w = self.x_train.shape[1] + 1
- lambda_ = 1 / self.C
- w = np.random.rand(len_w)
- ell = len(self.x_train)
- self.x_train['bias'] = np.ones(ell)
- for k in range(self.epochs):
- err = 0
- dw = np.random.rand(len_w)
- for i in range(ell):
- M = y_train.iloc[i] * np.dot(self.x_train.iloc[i].values, w)
- if M < 0:
- err += 1
- dw -= self.diff(self.x_train.iloc[i].values, w, y_train.iloc[i])
- if self.L2 == True:
- w_reg = np.copy(w)
- w_reg[-1] = 0
- dw -= 2 * lambda_ * w_reg
- w -= self.learning_rate * dw / ell
- print(f'epoch: {k} ; w = {w}; err = {err}\n')
- self.t.append(k + 1)
- self.n.append(err)
- if err < self.best_err:
- self.best_err = err
- self.best_w = np.copy(w)
- self.last_improvement = k
- elif k - self.last_improvement >= self.patience:
- print(f"Early stopping at epoch {k}, no improvement in the last {self.patience} epochs.")
- break
- if err < 1:
- break
- return self.best_w
- def predict(self, X_test):
- self.x_test = X_test.copy()
- ell_test = len(self.x_test)
- self.x_test['bias'] = np.ones(ell_test)
- predictions = np.sign(np.dot(self.x_test.values, self.best_w))
- return predictions
- def polynomial_equation(vars, x1, w, d):
- x2 = vars[0]
- features = []
- for i in range(d+1):
- for j in range(i+1):
- features.append((x1**(i-j)) * (x2**j))
- features = np.array(features)
- f_value = np.dot(w[:-1], features) + w[-1]
- return f_value
- def find_x2_given_x1_w(x1, w, d):
- initial_guess = [0.0]
- x2 = fsolve(polynomial_equation, initial_guess, args=(x1, w, d))[0]
- return x2
- df = pd.read_csv('iris.csv')
- df = df.loc[(df['species'] != 'virginica'), ['sepal_length', 'petal_length', 'species']]
- df['species'] = df['species'].apply(lambda x: 1 if x == 'setosa' else -1)
- x_train, x_test, y_train, y_test = train_test_split(df[df.columns[:-1]], df['species'], test_size=0.2, random_state=42)
- learning_rate = 0.01
- lr = Logistic_Regression(learning_rate=learning_rate, epochs=10000, L2=False)
- w = lr.fit(x_train, y_train)
- pred_test = lr.predict(x_test)
- pred_train = lr.predict(x_train)
- epochs = lr.t
- epochs_err = lr.n
- accuracy = accuracy_score(y_train, pred_train)
- print(f"Train Accuracy: {accuracy*100}%")
- accuracy = accuracy_score(y_test, pred_test)
- print(f"Test Accuracy: {accuracy*100}%")
- plt.figure(figsize=(8, 6))
- for i in np.unique(df['species']):
- plt.scatter(df.loc[df['species'] == i, df.columns[0]], df.loc[df['species'] == i, df.columns[1]], label=i)
- plt.xlabel('sepal_length')
- plt.ylabel('petal_width')
- x1 = [df.iloc[:, 0].min(), df.iloc[:, 0].max()]
- x2 = [(-w[0] * x1[0] - w[2]) / w[1], (-w[0] * x1[1] - w[2]) / w[1]]
- plt.plot(x1, x2, c='green')
- plt.legend()
- plt.show()
- plt.figure(figsize=(8, 6))
- plt.plot(epochs, np.array(epochs_err)/len(x_train)*100)
- plt.ylabel('Відсоток помилок на тренувальній вибірці')
- plt.xlabel('Кількість епох')
- plt.show()
- df = pd.read_csv('Moons.csv')
- df = df[df.columns[1:]]
- df['target'] = df['target'].apply(lambda x: 1 if x == 0 else -1)
- X = df.iloc[:, :-1]
- Y = df.iloc[:, -1]
- d = 7
- X_poly = polynomial_features(X, d)
- x_train, x_test, y_train, y_test = train_test_split(X_poly, Y, test_size=0.2, random_state=42)
- learning_rate = 0.01
- best_w = None
- best_err = float('inf')
- best_C = None
- for C_val in [0.001, 0.1, 1, 2, 3, 4, 5, 6]:
- lr = Logistic_Regression(learning_rate=learning_rate, C=C_val, epochs=10000, L2=True)
- w = lr.fit(x_train, y_train)
- predictions = lr.predict(x_test)
- pred_train = lr.predict(x_train)
- errors = np.sum(predictions != y_test)
- if errors < best_err:
- best_epochs = lr.t
- best_epochs_err = lr.n
- best_err = errors
- best_w = w
- best_C = C_val
- best_pred = predictions
- best_pred_train = pred_train
- accuracy_test = accuracy_score(y_test, best_pred)
- accuracy_train = accuracy_score(y_train, best_pred_train)
- print(f"Test Accuracy: {accuracy_test*100}%")
- print(f"Train Accuracy: {accuracy_train*100}%")
- print(f"Best C: {best_C}")
- print(f"Best W: {best_w}")
- print(f"Best err: {best_err}")
- x1 = np.linspace(df.iloc[:, 0].min(), df.iloc[:, 0].max(), 100)
- x2 = []
- for i in range(len(x1)):
- x2.append(find_x2_given_x1_w(x1[i], best_w, d))
- x2 = np.array(x2)
- plt.figure(figsize=(8, 6))
- for i in np.unique(Y):
- plt.scatter(df.loc[Y == i, df.columns[0]], df.loc[Y == i, df.columns[1]], label=i)
- plt.plot(x1, x2, c='green')
- plt.xlabel('Feature 1')
- plt.ylabel('Feature 2')
- plt.title('Non-linear Classification Example (Moons)')
- plt.legend()
- plt.show()
- plt.figure(figsize=(8, 6))
- plt.plot(best_epochs, np.array(best_epochs_err)/len(x_train)*100)
- plt.ylabel('Відсоток помилок на тренувальній вибірці')
- plt.xlabel('Кількість епох')
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement