Advertisement
mirosh111000

Лабораторна робота№4

Oct 19th, 2023
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.12 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. from sklearn.model_selection import train_test_split
  4. import matplotlib.pyplot as plt
  5. import random
  6. from sklearn.linear_model import LinearRegression
  7. from sklearn.preprocessing import PolynomialFeatures
  8.  
  9.  
  10. def function(x, coefficients):
  11.     y = coefficients[0]
  12.     for i in range(1, len(coefficients)):
  13.         y += coefficients[i] * x ** i
  14.     return y
  15.  
  16. def f(x, coefficients):
  17.     return x.dot(coefficients[1:]) + coefficients[0]
  18.  
  19. def fun_name(coefficients):
  20.     name = f'f(x)={np.round(coefficients[0], 2)}'
  21.     for i in range(1, len(coefficients)):
  22.         if coefficients[i] >= 0:
  23.             name += '+'
  24.         name += f'{np.round(coefficients[i], 2)}*x^{i}'
  25.     return name
  26.  
  27.  
  28. def mean_squared_error(actual, predicted):
  29.  
  30.     n = len(actual)
  31.     squared_errors = [(actual[i] - predicted[i]) ** 2 for i in range(n)]
  32.     mse = sum(squared_errors) / n
  33.     return mse[0]
  34.  
  35.  
  36. def gauss(A, b):
  37.     n = A.shape[0]
  38.     x = np.zeros((n, 1))
  39.  
  40.     for i in range(n):
  41.         for j in range(i + 1, n):
  42.             factor = A[j, i] / A[i, i]
  43.             b[j] -= factor * b[i]
  44.             for k in range(n):
  45.                 A[j, k] -= factor * A[i, k]
  46.  
  47.     for i in range(n - 1, -1, -1):
  48.         x[i] = b[i]
  49.         for j in range(i + 1, n):
  50.             x[i] -= A[i, j] * x[j]
  51.         x[i] /= A[i, i]
  52.  
  53.     x = [x[i][0] for i in range(len(x))]
  54.  
  55.     return x
  56.  
  57. def least_squares_polynomial_regression(X, y):
  58.    
  59.     X = np.hstack((np.ones((X.shape[0], 1)), X))
  60.     A = np.dot(X.T, X)
  61.     b = np.dot(X.T, y)
  62.     coefficients = gauss(A, b)
  63.  
  64.     return coefficients
  65.  
  66. iris = pd.read_csv('iris.csv')
  67. data = iris.drop(labels=[iris.columns[-1], iris.columns[1], iris.columns[0], iris.columns[3], iris.columns[4]], axis=1)
  68. data = np.copy(data)
  69. target = np.sin(data)
  70. for i in range(len(target)):
  71.     target[i] += random.uniform(-0.3, 0.3)
  72.    
  73.    
  74.    
  75.  
  76. degree = 1
  77. data_poly_manual = np.hstack([data ** i for i in range(1, degree + 1)])
  78. X_train, X_test, y_train, y_test = train_test_split(data_poly_manual, target, test_size=0.3, random_state=42)
  79. coefficients = least_squares_polynomial_regression(X_train, y_train)
  80. y_pred = f(X_test, coefficients)
  81.  
  82. plt.figure(figsize=(10, 7))
  83. X = np.linspace(np.min(data), np.max(data), 100)
  84. plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
  85. plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
  86. plt.plot(X, np.sin(X), c='black', label='f(x)=sin(x)', lw=2)
  87. plt.legend()
  88. plt.xlabel(f'x')
  89. plt.ylabel(f'y')
  90. plt.title('Побудова точок навчальної та тестової вибірки')
  91. plt.show()
  92.  
  93. mse = mean_squared_error(y_test, y_pred)
  94. print(f"Mean Squared Error: {mse}\n")
  95.  
  96. match_df = pd.DataFrame({f'x_test': X_test[:, 0],
  97.                          f'y_test': y_test[:, 0],
  98.                          f'y_test_pred': y_pred})
  99. print(match_df)
  100.  
  101. print(f'\n{fun_name(coefficients)} ; p = {degree}')
  102. plt.figure(figsize=(10, 7))
  103. plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
  104. plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
  105. plt.plot(X, np.sin(X), c='black', label='f(x)=sin(x)', lw=2)
  106. plt.plot(X, function(X, coefficients), label=f'p={degree}')
  107. plt.legend()
  108. plt.xlabel(f'x')
  109. plt.ylabel(f'y')
  110. plt.show()
  111.  
  112. plt.figure(figsize=(15, 10))
  113. plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
  114. plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
  115. plt.plot(X, np.sin(X), c='black', label='f(x)=sin(x)', lw=2)
  116.  
  117. mse_df = pd.DataFrame({'MSE_Train': [], 'MSE_Test': []})
  118.  
  119. for p in range(1, 21):
  120.    
  121.     degree = p
  122.     data_poly_manual = np.hstack([data ** i for i in range(1, degree + 1)])
  123.     X_train, X_test, y_train, y_test = train_test_split(data_poly_manual, target, test_size=0.3, random_state=42)
  124.     coefficients = least_squares_polynomial_regression(X_train, y_train)
  125.     y_pred = f(X_test, coefficients)
  126.    
  127.     mse_test = mean_squared_error(y_test, y_pred)
  128.     mse_train = mean_squared_error(y_train, f(X_train, coefficients))
  129.     mse_df.loc[p] = [mse_train, mse_test]
  130.     if p < 6:
  131.         plt.plot(X, function(X, coefficients), label=f' p={degree}')
  132.         print(f'{fun_name(coefficients)} ; p = {degree}')
  133.  
  134. plt.legend()
  135. plt.xlabel(f'x')
  136. plt.ylabel(f'y')
  137. plt.show()
  138. mse_df.index.names = ['p']
  139. print(mse_df)
  140.  
  141. plt.figure(figsize=(10, 7))
  142. plt.plot(mse_df.index, mse_df['MSE_Train'], label='MSE_Train')
  143. plt.plot(mse_df.index, mse_df['MSE_Test'], label='MSE_Test')
  144. plt.xlabel(f'p')
  145. plt.ylabel(f'MSE')
  146. plt.legend()
  147. plt.show()
  148.  
  149. mse_df_min = mse_df.idxmin()
  150. print(mse_df_min)
  151.  
  152. degree = mse_df_min.loc['MSE_Test']
  153. data_poly_manual = np.hstack([data ** i for i in range(1, degree + 1)])
  154. X_train, X_test, y_train, y_test = train_test_split(data_poly_manual, target, test_size=0.3, random_state=42)
  155. coefficients = least_squares_polynomial_regression(X_train, y_train)
  156. my_coefficients = coefficients.copy()
  157. y_pred = f(X_test, coefficients)
  158. mse = mean_squared_error(y_test, y_pred)
  159.  
  160. print(f'\n{fun_name(coefficients)} ; p = {degree}')
  161.  
  162. plt.figure(figsize=(10, 7))
  163. plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
  164. plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
  165. plt.plot(X, np.sin(X), c='black', label='f(x)=sin(x)', lw=2)
  166. plt.plot(X, function(X, coefficients), label=f'p={degree}')
  167. plt.legend()
  168. plt.xlabel(f'x')
  169. plt.ylabel(f'y')
  170. plt.show()
  171.  
  172.  
  173. mse_sk_df = pd.DataFrame({'MSE_Train_sk': [], 'MSE_Test_sk': []})
  174. for p in range(1, 21):
  175.     x = data
  176.     y = target
  177.     x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
  178.  
  179.     poly_features = PolynomialFeatures(degree=p)
  180.     x_train_poly = poly_features.fit_transform(x_train)
  181.     x_test_poly = poly_features.transform(x_test)
  182.  
  183.     model = LinearRegression()
  184.     model.fit(x_train_poly, y_train)
  185.  
  186.     y_pred = model.predict(x_test_poly)
  187.     test_error1 = mean_squared_error(y_test, y_pred)
  188.    
  189.     y_train_pred = model.predict(x_train_poly)
  190.     train_error1 = mean_squared_error(y_train, y_train_pred)
  191.    
  192.     mse_sk_df.loc[p] = [train_error1, test_error1]
  193.  
  194. mse_sk_df.index.names = ['p']
  195. mse_concat = pd.concat([mse_df, mse_sk_df], axis=1)
  196. print(mse_concat)
  197. mse_df_min_match = mse_concat.idxmin()
  198. print(mse_df_min_match)
  199.  
  200. degree_sk = mse_df_min_match.loc['MSE_Test_sk']
  201. x = data
  202. y = target
  203. x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
  204.  
  205. poly_features = PolynomialFeatures(degree=degree_sk)
  206. x_train_poly = poly_features.fit_transform(x_train)
  207. x_test_poly = poly_features.transform(x_test)
  208.  
  209. model = LinearRegression()
  210. model.fit(x_train_poly, y_train)
  211.  
  212. y_pred = model.predict(x_test_poly)
  213.  
  214. x_plot = X
  215. x_plot_poly = poly_features.transform(x_plot.reshape(-1, 1))
  216. y_plot = model.predict(x_plot_poly)
  217.  
  218. coefficients = model.coef_
  219. intercept = model.intercept_
  220. coefficients[0, 0] = intercept
  221. coefficients = coefficients[0]
  222.  
  223. print(f'\n{fun_name(coefficients)} ; p = {degree_sk}')
  224. print(f'{fun_name(my_coefficients)} ; p = {degree}')
  225. plt.figure(figsize=(10, 7))
  226. plt.title('Порівняння роботи з sklearn')
  227. plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
  228. plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
  229. plt.plot(X, np.sin(X), c='black', label='f(x)=sin(x)', lw=2)
  230. plt.plot(x_plot, y_plot, label=f"p_sk={degree_sk}")
  231. plt.plot(X, function(X, my_coefficients), label=f'p={degree}')
  232. plt.xlabel("X")
  233. plt.ylabel("Y")
  234. plt.legend()
  235. plt.show()
  236.  
  237. plt.figure(figsize=(10, 7))
  238. plt.title('Порівняння роботи з sklearn')
  239. plt.plot(mse_concat.index, mse_concat['MSE_Train'], label='MSE_Train')
  240. plt.plot(mse_concat.index, mse_concat['MSE_Test'], label='MSE_Test')
  241. plt.plot(mse_concat.index, mse_concat['MSE_Train_sk'], label='MSE_Train_sk')
  242. plt.plot(mse_concat.index, mse_concat['MSE_Test_sk'], label='MSE_Test_sk')
  243. plt.xlabel(f'p')
  244. plt.ylabel(f'MSE')
  245. plt.legend()
  246. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement