Advertisement
mirosh111000

Практика№4

Oct 23rd, 2023
124
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 11.55 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. from sklearn.model_selection import train_test_split
  4. import matplotlib.pyplot as plt
  5. import random
  6. from sklearn.linear_model import LinearRegression
  7. from sklearn.preprocessing import PolynomialFeatures
  8. from sklearn.linear_model import Ridge
  9.  
  10.  
  11. def function(x, coefficients):
  12.     y = coefficients[0]
  13.     for i in range(1, len(coefficients)):
  14.         y += coefficients[i] * x ** i
  15.     return y
  16.  
  17.  
  18. def f(x, coefficients):
  19.     return x.dot(coefficients[1:]) + coefficients[0]
  20.  
  21.  
  22. def fun_name(coefficients):
  23.     name = f'f(x)={np.round(coefficients[0], 2)}'
  24.     for i in range(1, len(coefficients)):
  25.         if coefficients[i] >= 0:
  26.             name += '+'
  27.         name += f'{np.round(coefficients[i], 2)}*x^{i}'
  28.     return name
  29.  
  30.  
  31. def mean_squared_error(actual, predicted):
  32.     n = len(actual)
  33.     squared_errors = [(actual[i] - predicted[i]) ** 2 for i in range(n)]
  34.     mse = sum(squared_errors) / n
  35.     return mse[0]
  36.  
  37.  
  38. def least_squares_polynomial_regression(X, y):
  39.     X = np.hstack((np.ones((X.shape[0], 1)), X))
  40.  
  41.     coefficients = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
  42.     coefficients = [coefficients[i][0] for i in range(len(coefficients))]
  43.  
  44.     return coefficients
  45.  
  46.  
  47. def R2(x, y, lambda_value):
  48.     X = np.hstack((np.ones((x.shape[0], 1)), x))
  49.  
  50.     identity_matrix = np.identity(X.shape[1])
  51.  
  52.     coefficients = np.linalg.inv(X.T.dot(X) + lambda_value * identity_matrix).dot(X.T).dot(y)
  53.     coefficients = [coefficients[i][0] for i in range(len(coefficients))]
  54.  
  55.     return coefficients
  56.  
  57.  
  58. def best_R2(x, y, x_test, y_test):
  59.     mse_df = pd.DataFrame({'MSE_Test': []})
  60.  
  61.     for i in range(1, 101):
  62.         lambda_value = i / 100
  63.         coefficients_R2 = R2(x, y, lambda_value)
  64.         y_pred = f(x_test, coefficients_R2)
  65.  
  66.         mse_test = mean_squared_error(y_test, y_pred)
  67.         mse_df.loc[lambda_value] = [mse_test]
  68.  
  69.     mse_df.index.names = ['lambda']
  70.     mse_df_min = mse_df.idxmin()
  71.  
  72.     coefficients_R2 = R2(x, y, mse_df_min.values)
  73.  
  74.     return coefficients_R2
  75.  
  76.  
  77. def best_R2_sk(x_train, y_train, x_test, y_test):
  78.  
  79.     mse_df = pd.DataFrame({'MSE_Test': []})
  80.  
  81.     for i in range(1, 101):
  82.         lambda_value = i / 100
  83.  
  84.         ridge_model = Ridge(alpha=lambda_value)
  85.         ridge_model.fit(x_train, y_train)
  86.         y_pred_R2 = ridge_model.predict(x_test)
  87.  
  88.         mse_test = mean_squared_error(y_test, y_pred_R2)
  89.         mse_df.loc[lambda_value] = [mse_test]
  90.  
  91.     mse_df.index.names = ['lambda']
  92.     mse_df_min = mse_df.idxmin()
  93.  
  94.     return mse_df_min.values
  95.  
  96. iris = pd.read_csv('iris.csv')
  97. data = iris.drop(labels=[iris.columns[-1], iris.columns[1], iris.columns[0], iris.columns[3], iris.columns[4]], axis=1)
  98. data = np.copy(data)
  99. target = np.sin(data)
  100. for i in range(len(target)):
  101.     target[i] += random.uniform(-0.3, 0.3)
  102.  
  103. degree = 1
  104. data_poly_manual = np.hstack([data ** i for i in range(1, degree + 1)])
  105. X_train, X_test, y_train, y_test = train_test_split(data_poly_manual, target, test_size=0.3, random_state=42)
  106. coefficients = least_squares_polynomial_regression(X_train, y_train)
  107. y_pred = f(X_test, coefficients)
  108.  
  109. coefficients_R2 = best_R2(X_train, y_train, X_test, y_test)
  110. y_pred_R2 = f(X_test, coefficients_R2)
  111.  
  112. plt.figure(figsize=(10, 7))
  113. X = np.linspace(np.min(data), np.max(data), 100)
  114. plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
  115. plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
  116. plt.plot(X, np.sin(X), c='black', label='f(x)=sin(x)', lw=2)
  117. plt.legend()
  118. plt.xlabel(f'x')
  119. plt.ylabel(f'y')
  120. plt.title('Побудова точок навчальної та тестової вибірки')
  121. plt.show()
  122.  
  123. mse = mean_squared_error(y_test, y_pred)
  124. mse_R2 = mean_squared_error(y_test, y_pred_R2)
  125. print(f"Mean Squared Error: {mse}\n")
  126. print(f"Mean Squared Error R2: {mse_R2}\n")
  127.  
  128.  
  129. match_df = pd.DataFrame({f'x_test': X_test[:, 0],
  130.                          f'y_test': y_test[:, 0],
  131.                          f'y_test_pred': y_pred,
  132.                          f'y_test_pred_R2:': y_pred_R2})
  133. print(match_df)
  134.  
  135. print(f'\n{fun_name(coefficients)} ; p = {degree}')
  136. print(f'R2: {fun_name(coefficients_R2)} ; p = {degree}')
  137.  
  138. plt.figure(figsize=(10, 7))
  139. plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
  140. plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
  141. plt.plot(X, np.sin(X), c='black', label='f(x)=sin(x)', lw=2)
  142. plt.plot(X, function(X, coefficients), label=f'p={degree}')
  143. plt.plot(X, function(X, coefficients_R2), label=f'R2: p={degree}')
  144. plt.legend()
  145. plt.xlabel(f'x')
  146. plt.ylabel(f'y')
  147. plt.show()
  148.  
  149. plt.figure(figsize=(15, 10))
  150. plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
  151. plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
  152. plt.plot(X, np.sin(X), c='black', label='f(x)=sin(x)', lw=2)
  153.  
  154. mse_df = pd.DataFrame({'MSE_Train': [], 'MSE_Test': [], 'R2_MSE_Train': [], 'R2_MSE_Test': []})
  155.  
  156. for p in range(1, 21):
  157.  
  158.     degree = p
  159.     data_poly_manual = np.hstack([data ** i for i in range(1, degree + 1)])
  160.     X_train, X_test, y_train, y_test = train_test_split(data_poly_manual, target, test_size=0.3, random_state=42)
  161.     coefficients = least_squares_polynomial_regression(X_train, y_train)
  162.     y_pred = f(X_test, coefficients)
  163.  
  164.     coefficients_R2 = best_R2(X_train, y_train, X_test, y_test)
  165.     y_pred_R2 = f(X_test, coefficients_R2)
  166.  
  167.     mse_test = mean_squared_error(y_test, y_pred)
  168.     mse_train = mean_squared_error(y_train, f(X_train, coefficients))
  169.  
  170.     mse_test_R2 = mean_squared_error(y_test, y_pred_R2)
  171.     mse_train_R2 = mean_squared_error(y_train, f(X_train, coefficients_R2))
  172.  
  173.     mse_df.loc[p] = [mse_train, mse_test, mse_train_R2, mse_test_R2]
  174.     if p < 6:
  175.         plt.plot(X, function(X, coefficients), label=f' p={degree}')
  176.         print(f'{fun_name(coefficients)} ; p = {degree}')
  177.  
  178. plt.legend()
  179. plt.xlabel(f'x')
  180. plt.ylabel(f'y')
  181. plt.show()
  182. mse_df.index.names = ['p']
  183. print(mse_df)
  184.  
  185. plt.figure(figsize=(10, 7))
  186. plt.plot(mse_df.index, mse_df['MSE_Train'], label='MSE_Train')
  187. plt.plot(mse_df.index, mse_df['MSE_Test'], label='MSE_Test')
  188. plt.plot(mse_df.index, mse_df['R2_MSE_Train'], label='R2_MSE_Train')
  189. plt.plot(mse_df.index, mse_df['R2_MSE_Test'], label='R2_MSE_Test')
  190. plt.xlabel(f'p')
  191. plt.ylabel(f'MSE')
  192. plt.legend()
  193. plt.show()
  194.  
  195. mse_df_min = mse_df.idxmin()
  196. print(mse_df_min)
  197.  
  198. degree = mse_df_min.loc['MSE_Test']
  199.  
  200. data_poly_manual = np.hstack([data ** i for i in range(1, degree + 1)])
  201. X_train, X_test, y_train, y_test = train_test_split(data_poly_manual, target, test_size=0.3, random_state=42)
  202.  
  203. coefficients = least_squares_polynomial_regression(X_train, y_train)
  204. coefficients_R2 = best_R2(X_train, y_train, X_test, y_test)
  205.  
  206. my_coefficients = coefficients.copy()
  207. my_coefficients_R2 = coefficients_R2.copy()
  208.  
  209. y_pred = f(X_test, coefficients)
  210. y_pred_R2 = f(X_test, coefficients_R2)
  211.  
  212. mse = mean_squared_error(y_test, y_pred)
  213. mse_R2 = mean_squared_error(y_test, y_pred_R2)
  214.  
  215.  
  216. print(f'\n{fun_name(coefficients)} ; p = {degree}')
  217. print(f'R2: {fun_name(coefficients_R2)} ; p = {degree}')
  218.  
  219. plt.figure(figsize=(10, 7))
  220. plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
  221. plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
  222. plt.plot(X, np.sin(X), c='black', label='f(x)=sin(x)', lw=2)
  223. plt.plot(X, function(X, coefficients), label=f'p={degree}')
  224. plt.plot(X, function(X, coefficients_R2), label=f'R2: p={degree}')
  225.  
  226. plt.legend()
  227. plt.xlabel(f'x')
  228. plt.ylabel(f'y')
  229. plt.show()
  230.  
  231. mse_sk_df = pd.DataFrame({'MSE_Train_sk': [], 'MSE_Test_sk': [], 'R2_MSE_Train_sk': [], 'R2_MSE_Test_sk': []})
  232. for p in range(1, 21):
  233.     x = data
  234.     y = target
  235.     x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
  236.  
  237.     poly_features = PolynomialFeatures(degree=p)
  238.     x_train_poly = poly_features.fit_transform(x_train)
  239.     x_test_poly = poly_features.transform(x_test)
  240.  
  241.     model = LinearRegression()
  242.     model.fit(x_train_poly, y_train)
  243.  
  244.     y_pred = model.predict(x_test_poly)
  245.     test_error1 = mean_squared_error(y_test, y_pred)
  246.  
  247.     y_train_pred = model.predict(x_train_poly)
  248.     train_error1 = mean_squared_error(y_train, y_train_pred)
  249.  
  250.     lambda_val =  best_R2_sk(x_train_poly, y_train, x_test_poly, y_test)
  251.  
  252.     R2_model = Ridge(alpha=lambda_val)
  253.     R2_model.fit(x_train_poly, y_train)
  254.  
  255.     y_test_pred_R2 = R2_model.predict(x_test_poly)
  256.     test_error2 = mean_squared_error(y_test, y_test_pred_R2)
  257.  
  258.     y_train_pred_R2 = R2_model.predict(x_train_poly)
  259.     train_error2 = mean_squared_error(y_train, y_train_pred)
  260.  
  261.     mse_sk_df.loc[p] = [train_error1, test_error1, train_error2, test_error2]
  262.  
  263. mse_sk_df.index.names = ['p']
  264. mse_concat = pd.concat([mse_df, mse_sk_df], axis=1)
  265. print(mse_concat)
  266. mse_df_min_match = mse_concat.idxmin()
  267. print(mse_df_min_match)
  268.  
  269. degree_sk = mse_df_min_match.loc['MSE_Test_sk']
  270. x = data
  271. y = target
  272. x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
  273.  
  274. poly_features = PolynomialFeatures(degree=degree_sk)
  275. x_train_poly = poly_features.fit_transform(x_train)
  276. x_test_poly = poly_features.transform(x_test)
  277.  
  278. lambda_val =  best_R2_sk(x_train_poly, y_train, x_test_poly, y_test)
  279. R2_model = Ridge(alpha=lambda_val)
  280. R2_model.fit(x_train_poly, y_train)
  281.  
  282. model = LinearRegression()
  283. model.fit(x_train_poly, y_train)
  284.  
  285. y_pred = model.predict(x_test_poly)
  286. y_pred_R2 = R2_model.predict(x_test_poly)
  287.  
  288. x_plot = X
  289. x_plot_poly = poly_features.transform(x_plot.reshape(-1, 1))
  290. y_plot = model.predict(x_plot_poly)
  291. y_plot_R2 = R2_model.predict(x_plot_poly)
  292.  
  293. coefficients = model.coef_
  294. intercept = model.intercept_
  295. coefficients[0, 0] = intercept
  296. coefficients = coefficients[0]
  297.  
  298. coefficients_R2 = R2_model.coef_
  299. intercept = R2_model.intercept_
  300. coefficients_R2[0, 0] = intercept
  301. coefficients_R2 = coefficients_R2[0]
  302.  
  303. print(f'\n{fun_name(coefficients)} ; p_sk = {degree_sk}')
  304. print(f'R2: {fun_name(coefficients_R2)} ; p_sk = {degree_sk}')
  305. print(f'{fun_name(my_coefficients)} ; p = {degree}')
  306. print(f'R2: {fun_name(my_coefficients_R2)} ; p = {degree}')
  307.  
  308.  
  309. plt.figure(figsize=(15, 10))
  310. plt.title('Порівняння роботи з sklearn')
  311. plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
  312. plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
  313. plt.plot(X, np.sin(X), c='black', label='f(x)=sin(x)', lw=2)
  314. plt.plot(x_plot, y_plot, label=f"p_sk={degree_sk}")
  315. plt.plot(x_plot, y_plot_R2, label=f"R2: p_sk={degree_sk}")
  316. plt.plot(X, function(X, my_coefficients), label=f'p={degree}')
  317. plt.plot(X, function(X, my_coefficients_R2), label=f'R2: p={degree}')
  318. plt.xlabel("X")
  319. plt.ylabel("Y")
  320. plt.legend()
  321. plt.show()
  322.  
  323. plt.figure(figsize=(10, 7))
  324. plt.title('Порівняння роботи з sklearn')
  325. plt.plot(mse_concat.index, mse_concat['MSE_Train'], label='MSE_Train')
  326. plt.plot(mse_concat.index, mse_concat['MSE_Test'], label='MSE_Test')
  327. plt.plot(mse_concat.index, mse_concat['MSE_Train_sk'], label='MSE_Train_sk')
  328. plt.plot(mse_concat.index, mse_concat['MSE_Test_sk'], label='MSE_Test_sk')
  329. plt.plot(mse_concat.index, mse_concat['R2_MSE_Train'], label='R2_MSE_Train')
  330. plt.plot(mse_concat.index, mse_concat['R2_MSE_Test'], label='R2_MSE_Test')
  331. plt.plot(mse_concat.index, mse_concat['R2_MSE_Train_sk'], label='R2_MSE_Train_sk')
  332. plt.plot(mse_concat.index, mse_concat['R2_MSE_Test_sk'], label='R2_MSE_Test_sk')
  333. plt.xlabel(f'p')
  334. plt.ylabel(f'MSE')
  335. plt.legend()
  336. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement