Advertisement
mirosh111000

LinearRegression

Nov 24th, 2023
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.81 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. import seaborn as sns
  4. from sklearn.preprocessing import LabelEncoder
  5. from sklearn.model_selection import train_test_split
  6. from sklearn.metrics import accuracy_score
  7. import random
  8. import matplotlib.pyplot as plt
  9. from scipy.optimize import fsolve
  10.  
  11.  
  12. def polynomial_features(X, d):
  13.     n_columns = 0
  14.     for i in range(n_columns + 1, d + 2):
  15.         n_columns += i
  16.  
  17.     x1 = X.iloc[:, 0]
  18.     x2 = X.iloc[:, 1]
  19.     features = pd.DataFrame(columns=[f'F{i + 1}' for i in range(n_columns)])
  20.     for k in range(len(X)):
  21.         feature_row = []
  22.         for i in range(d + 1):
  23.             for j in range(i + 1):
  24.                 feature_row.append((x1[k] ** (i - j)) * (x2[k] ** j))
  25.         features.loc[k] = feature_row
  26.     return features
  27.  
  28. class Logistic_Regression:
  29.     def __init__(self, epochs=1000, learning_rate=0.01, C=1.0, patience=100, L2=False):
  30.         self.epochs = epochs
  31.         self.learning_rate = learning_rate
  32.         self.C = C
  33.         self.patience = patience
  34.         self.t = []
  35.         self.n = []
  36.         self.best_w = None
  37.         self.best_err = float('inf')
  38.         self.last_improvement = 0
  39.         self.x_train = None
  40.         self.x_test = None
  41.         self.L2 = L2
  42.        
  43.        
  44.     def sigmoid(self, M):
  45.         return 1 / (1 + np.exp(-M))
  46.  
  47.     def diff(self, x, w, y):
  48.         M = y * np.dot(x, w.T)
  49.         d = (1 - self.sigmoid(M)) * y * x
  50.         return d
  51.  
  52.     def fit(self, X_train, y_train):
  53.         self.x_train = X_train.copy()
  54.         len_w = self.x_train.shape[1] + 1
  55.         lambda_ = 1 / self.C
  56.         w = np.random.rand(len_w)
  57.         ell = len(self.x_train)
  58.         self.x_train['bias'] = np.ones(ell)
  59.  
  60.         for k in range(self.epochs):
  61.             err = 0
  62.             dw = np.random.rand(len_w)
  63.             for i in range(ell):
  64.                 M = y_train.iloc[i] * np.dot(self.x_train.iloc[i].values, w)
  65.                 if M < 0:
  66.                     err += 1
  67.                 dw -= self.diff(self.x_train.iloc[i].values, w, y_train.iloc[i])
  68.            
  69.             if self.L2 == True:
  70.                 w_reg = np.copy(w)
  71.                 w_reg[-1] = 0
  72.                 dw -= 2 * lambda_ * w_reg
  73.             w -= self.learning_rate * dw / ell
  74.            
  75.             print(f'epoch: {k} ; w = {w}; err = {err}\n')
  76.  
  77.             self.t.append(k + 1)
  78.             self.n.append(err)
  79.  
  80.             if err < self.best_err:
  81.                 self.best_err = err
  82.                 self.best_w = np.copy(w)
  83.                 self.last_improvement = k
  84.             elif k - self.last_improvement >= self.patience:
  85.                 print(f"Early stopping at epoch {k}, no improvement in the last {self.patience} epochs.")
  86.                 break
  87.  
  88.             if err < 1:
  89.                 break
  90.  
  91.         return self.best_w
  92.  
  93.     def predict(self, X_test):
  94.        
  95.         self.x_test = X_test.copy()
  96.         ell_test = len(self.x_test)
  97.         self.x_test['bias'] = np.ones(ell_test)
  98.         predictions = np.sign(np.dot(self.x_test.values, self.best_w))
  99.        
  100.         return predictions
  101.  
  102.  
  103. def polynomial_equation(vars, x1, w, d):
  104.     x2 = vars[0]
  105.     features = []
  106.     for i in range(d+1):
  107.         for j in range(i+1):
  108.             features.append((x1**(i-j)) * (x2**j))
  109.     features = np.array(features)
  110.     f_value = np.dot(w[:-1], features) + w[-1]
  111.    
  112.     return f_value
  113.  
  114. def find_x2_given_x1_w(x1, w, d):
  115.     initial_guess = [0.0]
  116.     x2 = fsolve(polynomial_equation, initial_guess, args=(x1, w, d))[0]
  117.    
  118.     return x2
  119.  
  120.  
  121. df = pd.read_csv('iris.csv')
  122. df = df.loc[(df['species'] != 'virginica'), ['sepal_length', 'petal_length', 'species']]
  123. df['species'] = df['species'].apply(lambda x: 1 if x == 'setosa' else -1)
  124.  
  125. x_train, x_test, y_train, y_test = train_test_split(df[df.columns[:-1]], df['species'], test_size=0.2, random_state=42)
  126.  
  127. learning_rate = 0.01
  128.  
  129. lr = Logistic_Regression(learning_rate=learning_rate, epochs=10000, L2=False)
  130. w = lr.fit(x_train, y_train)
  131.  
  132. pred_test = lr.predict(x_test)
  133. pred_train = lr.predict(x_train)
  134. epochs = lr.t
  135. epochs_err = lr.n
  136.  
  137. accuracy = accuracy_score(y_train, pred_train)
  138. print(f"Train Accuracy: {accuracy*100}%")
  139.  
  140. accuracy = accuracy_score(y_test, pred_test)
  141. print(f"Test Accuracy: {accuracy*100}%")
  142.  
  143.  
  144.  
  145. plt.figure(figsize=(8, 6))
  146. for i in np.unique(df['species']):
  147.     plt.scatter(df.loc[df['species'] == i, df.columns[0]], df.loc[df['species'] == i, df.columns[1]], label=i)
  148. plt.xlabel('sepal_length')
  149. plt.ylabel('petal_width')
  150. x1 = [df.iloc[:, 0].min(), df.iloc[:, 0].max()]
  151. x2 = [(-w[0] * x1[0] - w[2]) / w[1], (-w[0] * x1[1] - w[2]) / w[1]]
  152. plt.plot(x1, x2, c='green')
  153. plt.legend()
  154. plt.show()
  155.  
  156. plt.figure(figsize=(8, 6))
  157. plt.plot(epochs, np.array(epochs_err)/len(x_train)*100)
  158. plt.ylabel('Відсоток помилок на тренувальній вибірці')
  159. plt.xlabel('Кількість епох')
  160. plt.show()
  161.  
  162.  
  163.  
  164.  
  165.  
  166.  
  167. df = pd.read_csv('Moons.csv')
  168. df = df[df.columns[1:]]
  169. df['target'] = df['target'].apply(lambda x: 1 if x == 0 else -1)
  170. X = df.iloc[:, :-1]
  171. Y = df.iloc[:, -1]  
  172.  
  173. d = 7
  174. X_poly = polynomial_features(X, d)
  175. x_train, x_test, y_train, y_test = train_test_split(X_poly, Y, test_size=0.2, random_state=42)
  176.  
  177. learning_rate = 0.01
  178.  
  179. best_w = None
  180. best_err = float('inf')
  181. best_C = None
  182.  
  183. for C_val in [0.001, 0.1, 1, 2, 3, 4, 5, 6]:
  184.     lr = Logistic_Regression(learning_rate=learning_rate, C=C_val, epochs=10000, L2=True)
  185.     w = lr.fit(x_train, y_train)
  186.  
  187.     predictions = lr.predict(x_test)
  188.     pred_train = lr.predict(x_train)
  189.    
  190.     errors = np.sum(predictions != y_test)
  191.  
  192.     if errors < best_err:
  193.         best_epochs = lr.t
  194.         best_epochs_err = lr.n
  195.         best_err = errors
  196.         best_w = w
  197.         best_C = C_val
  198.         best_pred = predictions
  199.         best_pred_train = pred_train
  200.         accuracy_test = accuracy_score(y_test, best_pred)
  201.         accuracy_train = accuracy_score(y_train, best_pred_train)
  202.        
  203.  
  204. print(f"Test Accuracy: {accuracy_test*100}%")
  205. print(f"Train Accuracy: {accuracy_train*100}%")
  206.  
  207.  
  208. print(f"Best C: {best_C}")
  209. print(f"Best W: {best_w}")
  210. print(f"Best err: {best_err}")
  211.  
  212.  
  213.  
  214. x1 = np.linspace(df.iloc[:, 0].min(), df.iloc[:, 0].max(), 100)
  215. x2 = []
  216. for i in range(len(x1)):
  217.     x2.append(find_x2_given_x1_w(x1[i], best_w, d))
  218. x2 = np.array(x2)
  219. plt.figure(figsize=(8, 6))
  220. for i in np.unique(Y):
  221.     plt.scatter(df.loc[Y == i, df.columns[0]], df.loc[Y == i, df.columns[1]], label=i)
  222.  
  223. plt.plot(x1, x2, c='green')
  224. plt.xlabel('Feature 1')
  225. plt.ylabel('Feature 2')
  226. plt.title('Non-linear Classification Example (Moons)')
  227. plt.legend()
  228. plt.show()
  229.  
  230.  
  231. plt.figure(figsize=(8, 6))
  232. plt.plot(best_epochs, np.array(best_epochs_err)/len(x_train)*100)
  233. plt.ylabel('Відсоток помилок на тренувальній вибірці')
  234. plt.xlabel('Кількість епох')
  235. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement