Advertisement
mirosh111000

pr4

Oct 19th, 2023
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 9.16 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3. from sklearn.model_selection import train_test_split
  4. import matplotlib.pyplot as plt
  5. import random
  6. from sklearn.linear_model import LinearRegression
  7. from sklearn.preprocessing import PolynomialFeatures
  8.  
  9.  
  10. def format_date(date):
  11.     day, month, year = map(int, date.split('.'))
  12.     return float(year) + float(month)/12 + float(day)/(365 if year % 4 != 0 else 366)
  13.  
  14. def reformate_date(number):
  15.     year = int(number)
  16.     month = (number - year) * (365 if year % 4 != 0 else 366) / 30
  17.     days = (month - int(month)) * 30
  18.     month = int(month)
  19.     days = int(days)
  20.     if month < 10:
  21.         month = f'0{month}'
  22.     return f'{days}.{month}.{year}'
  23.  
  24. def function(x, coefficients):
  25.     y = coefficients[0]
  26.     for i in range(1, len(coefficients)):
  27.         y += coefficients[i] * x ** i
  28.     return y
  29.  
  30. def f(x, coefficients):
  31.     return x.dot(coefficients[1:]) + coefficients[0]
  32.  
  33. def fun_name(coefficients):
  34.     name = f'f(x)={np.round(coefficients[0], 2)}'
  35.     for i in range(1, len(coefficients)):
  36.         if coefficients[i] >= 0:
  37.             name += '+'
  38.         name += f'{np.round(coefficients[i], 2)}*x^{i}'
  39.     return name
  40.  
  41.  
  42. def mean_squared_error(actual, predicted):
  43.  
  44.     n = len(actual)
  45.     squared_errors = [(actual[i] - predicted[i]) ** 2 for i in range(n)]
  46.     mse = sum(squared_errors) / n
  47.     return mse[0]
  48.  
  49.  
  50. def gauss(A, b):
  51.     n = A.shape[0]
  52.     x = np.zeros((n, 1))
  53.  
  54.     for i in range(n):
  55.         for j in range(i + 1, n):
  56.             factor = A[j, i] / A[i, i]
  57.             b[j] -= factor * b[i]
  58.             for k in range(n):
  59.                 A[j, k] -= factor * A[i, k]
  60.  
  61.     for i in range(n - 1, -1, -1):
  62.         x[i] = b[i]
  63.         for j in range(i + 1, n):
  64.             x[i] -= A[i, j] * x[j]
  65.         x[i] /= A[i, i]
  66.  
  67.     x = [x[i][0] for i in range(len(x))]
  68.  
  69.     return x
  70.  
  71. def least_squares_polynomial_regression(X, y):
  72.    
  73.     X = np.hstack((np.ones((X.shape[0], 1)), X))
  74.     A = np.dot(X.T, X)
  75.     b = np.dot(X.T, y)
  76.     coefficients = gauss(A, b)
  77.  
  78.     return coefficients
  79.  
  80. df = pd.DataFrame(columns=['Результат_на_400м', 'Дата'])
  81. df.loc[0] = [62.48, '22.02.2018']
  82. df.loc[1] = [58.60, '18.01.2019']
  83. df.loc[2] = [58.12, '28.01.2019']
  84. df.loc[3] = [56.88, '18.06.2019']
  85. df.loc[4] = [58.08, '18.01.2020']
  86. df.loc[5] = [57.17, '27.01.2020']
  87. df.loc[6] = [57.07, '03.02.2020']
  88. df.loc[7] = [56.22, '11.02.2020']
  89. df.loc[8] = [54.04, '26.08.2020']
  90. df.loc[9] = [53.65, '19.09.2020']
  91. df.loc[10] = [54.46, '27.01.2021']
  92. df.loc[11] = [54.07, '10.02.2021']
  93. df.loc[12] = [53.34, '16.06.2021']
  94. df.loc[13] = [53.91, '29.06.2021']
  95. df.loc[14] = [53.23, '22.01.2022']
  96. df.loc[15] = [53.54, '26.08.2022']
  97. df.loc[16] = [52.85, '17.09.2022']
  98. df.loc[17] = [53.26, '17.02.2023']
  99. df.loc[18] = [51.67, '04.05.2023']
  100. df.loc[19] = [51.16, '27.05.2023']
  101. df.loc[20] = [51.33, '07.06.2023']
  102. df.loc[21] = [51.15, '15.07.2023']
  103. df.loc[22] = [50.66, '28.07.2023']
  104. df['Форматована_дата'] = [format_date( df[df.columns[1]].loc[i] ) for i in range(len(df[df.columns[1]]))]
  105. print(df)
  106.  
  107. data = df.drop(labels=[df.columns[1], df.columns[0]], axis=1)
  108. target = df.drop(labels=[df.columns[2], df.columns[1]], axis=1)
  109. target = np.copy(target)
  110. data = np.copy(data)
  111.  
  112. degree = 1
  113. data_poly_manual = np.hstack([data ** i for i in range(1, degree + 1)])
  114. X_train, X_test, y_train, y_test = train_test_split(data_poly_manual, target, test_size=0.3, random_state=42)
  115. coefficients = least_squares_polynomial_regression(X_train, y_train)
  116. y_pred = f(X_test, coefficients)
  117.  
  118. plt.figure(figsize=(10, 7))
  119. X = np.linspace(np.min(data), np.max(data), 100)
  120. plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
  121. plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
  122. plt.legend()
  123. plt.xlabel(f'x')
  124. plt.ylabel(f'y')
  125. plt.title('Побудова точок навчальної та тестової вибірки')
  126. plt.show()
  127.  
  128. mse = mean_squared_error(y_test, y_pred)
  129. print(f"Mean Squared Error: {mse}\n")
  130.  
  131. match_df = pd.DataFrame({f'x_test': X_test[:, 0],
  132.                          f'y_test': y_test[:, 0],
  133.                          f'y_test_pred': y_pred})
  134. print(match_df)
  135.  
  136. print(f'\n{fun_name(coefficients)} ; p = {degree}')
  137. plt.figure(figsize=(10, 7))
  138. plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
  139. plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
  140. plt.plot(X, function(X, coefficients), label=f'p={degree}')
  141. plt.legend()
  142. plt.xlabel(f'x')
  143. plt.ylabel(f'y')
  144. plt.show()
  145.  
  146. plt.figure(figsize=(15, 10))
  147. plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
  148. plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
  149.  
  150. mse_df = pd.DataFrame({'MSE_Train': [], 'MSE_Test': []})
  151.  
  152. for p in range(1, 21):
  153.    
  154.     degree = p
  155.     data_poly_manual = np.hstack([data ** i for i in range(1, degree + 1)])
  156.     X_train, X_test, y_train, y_test = train_test_split(data_poly_manual, target, test_size=0.3, random_state=42)
  157.     coefficients = least_squares_polynomial_regression(X_train, y_train)
  158.     y_pred = f(X_test, coefficients)
  159.    
  160.     mse_test = mean_squared_error(y_test, y_pred)
  161.     mse_train = mean_squared_error(y_train, f(X_train, coefficients))
  162.     mse_df.loc[p] = [mse_train, mse_test]
  163.     if p < 6:
  164.         plt.plot(X, function(X, coefficients), label=f' p={degree}')
  165.         print(f'{fun_name(coefficients)} ; p = {degree}')
  166.  
  167. plt.legend()
  168. plt.xlabel(f'x')
  169. plt.ylabel(f'y')
  170. plt.show()
  171. mse_df.index.names = ['p']
  172. print(mse_df)
  173.  
  174. plt.figure(figsize=(10, 7))
  175. plt.plot(mse_df.index, mse_df['MSE_Train'], label='MSE_Train')
  176. plt.plot(mse_df.index, mse_df['MSE_Test'], label='MSE_Test')
  177. plt.xlabel(f'p')
  178. plt.ylabel(f'MSE')
  179. plt.legend()
  180. plt.show()
  181.  
  182. mse_df_min = mse_df.idxmin()
  183. print(mse_df_min)
  184.  
  185. degree = mse_df_min.loc['MSE_Test']
  186. data_poly_manual = np.hstack([data ** i for i in range(1, degree + 1)])
  187. X_train, X_test, y_train, y_test = train_test_split(data_poly_manual, target, test_size=0.3, random_state=42)
  188. coefficients = least_squares_polynomial_regression(X_train, y_train)
  189. my_coefficients = coefficients.copy()
  190. y_pred = f(X_test, coefficients)
  191. mse = mean_squared_error(y_test, y_pred)
  192.  
  193. print(f'\n{fun_name(coefficients)} ; p = {degree}')
  194.  
  195. plt.figure(figsize=(10, 7))
  196. plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
  197. plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
  198. plt.plot(X, function(X, coefficients), label=f'p={degree}')
  199. plt.legend()
  200. plt.xlabel(f'x')
  201. plt.ylabel(f'y')
  202. plt.show()
  203.  
  204.  
  205. mse_sk_df = pd.DataFrame({'MSE_Train_sk': [], 'MSE_Test_sk': []})
  206. for p in range(1, 21):
  207.     x = data
  208.     y = target
  209.     x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
  210.  
  211.     poly_features = PolynomialFeatures(degree=p)
  212.     x_train_poly = poly_features.fit_transform(x_train)
  213.     x_test_poly = poly_features.transform(x_test)
  214.  
  215.     model = LinearRegression()
  216.     model.fit(x_train_poly, y_train)
  217.  
  218.     y_pred = model.predict(x_test_poly)
  219.     test_error1 = mean_squared_error(y_test, y_pred)
  220.    
  221.     y_train_pred = model.predict(x_train_poly)
  222.     train_error1 = mean_squared_error(y_train, y_train_pred)
  223.    
  224.     mse_sk_df.loc[p] = [train_error1, test_error1]
  225.  
  226. mse_sk_df.index.names = ['p']
  227. mse_concat = pd.concat([mse_df, mse_sk_df], axis=1)
  228. print(mse_concat)
  229. mse_df_min_match = mse_concat.idxmin()
  230. print(mse_df_min_match)
  231.  
  232. degree_sk = mse_df_min_match.loc['MSE_Test_sk']
  233. x = data
  234. y = target
  235. x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
  236.  
  237. poly_features = PolynomialFeatures(degree=degree_sk)
  238. x_train_poly = poly_features.fit_transform(x_train)
  239. x_test_poly = poly_features.transform(x_test)
  240.  
  241. model = LinearRegression()
  242. model.fit(x_train_poly, y_train)
  243.  
  244. y_pred = model.predict(x_test_poly)
  245.  
  246. x_plot = X
  247. x_plot_poly = poly_features.transform(x_plot.reshape(-1, 1))
  248. y_plot = model.predict(x_plot_poly)
  249.  
  250. coefficients = model.coef_
  251. intercept = model.intercept_
  252. coefficients[0, 0] = intercept
  253. coefficients = coefficients[0]
  254.  
  255. print(f'\n{fun_name(coefficients)} ; p = {degree_sk}')
  256. print(f'{fun_name(my_coefficients)} ; p = {degree}')
  257. plt.figure(figsize=(10, 7))
  258. plt.title('Порівняння роботи з sklearn')
  259. plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
  260. plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
  261. plt.plot(x_plot, y_plot, label=f"p_sk={degree_sk}")
  262. plt.plot(X, function(X, my_coefficients), label=f'p={degree}')
  263. plt.xlabel("X")
  264. plt.ylabel("Y")
  265. plt.legend()
  266. plt.show()
  267.  
  268. plt.figure(figsize=(10, 7))
  269. plt.title('Порівняння роботи з sklearn')
  270. plt.plot(mse_concat.index, mse_concat['MSE_Train'], label='MSE_Train')
  271. plt.plot(mse_concat.index, mse_concat['MSE_Test'], label='MSE_Test')
  272. plt.plot(mse_concat.index, mse_concat['MSE_Train_sk'], label='MSE_Train_sk')
  273. plt.plot(mse_concat.index, mse_concat['MSE_Test_sk'], label='MSE_Test_sk')
  274. plt.xlabel(f'p')
  275. plt.ylabel(f'MSE')
  276. plt.legend()
  277. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement