Advertisement
pierdziadek

si

Mar 6th, 2025
122
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.40 KB | None | 0 0
  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3.  
  4. from data import get_data, inspect_data, split_data
  5.  
  6. data = get_data()
  7. inspect_data(data)
  8.  
  9. train_data, test_data = split_data(data)
  10.  
  11. # Simple Linear Regression
  12. # predict MPG (y, dependent variable) using Weight (x, independent variable) using closed-form solution
  13. # y = theta_0 + theta_1 * x - we want to find theta_0 and theta_1 parameters that minimize the prediction error
  14.  
  15. # We can calculate the error using MSE metric:
  16. # MSE = SUM (from i=1 to n) (actual_output - predicted_output) ** 2
  17.  
  18. # get the columns
  19. y_train = train_data['MPG'].to_numpy().reshape(-1, 1)
  20. x_train = train_data['Weight'].to_numpy().reshape(-1, 1)
  21.  
  22. y_test = test_data['MPG'].to_numpy().reshape(-1, 1)
  23. x_test = test_data['Weight'].to_numpy().reshape(-1, 1)
  24.  
  25. # TODO: calculate closed-form solution
  26. theta_best = [0, 0]
  27. X_b = np.c_[np.ones((x_train.shape[0], 1)), x_train]
  28. theta_best = np.linalg.inv(X_b.T @ X_b) @ X_b.T @ y_train
  29. print(f"theta best: ${theta_best}")
  30. print(f"X_b.shape: ${X_b.shape}")
  31.  
  32. # TODO: calculate error
  33. predicted = X_b @ theta_best
  34. MSE_best = np.mean((y_train - predicted) ** 2)
  35. print(f"MSE best: ${MSE_best}")
  36.  
  37. # plot the regression line
  38. x = np.linspace(min(x_test), max(x_test), 100)
  39. y = float(theta_best[0]) + float(theta_best[1]) * x
  40. plt.plot(x, y)
  41. plt.scatter(x_test, y_test)
  42. plt.xlabel('Weight')
  43. plt.ylabel('MPG')
  44. plt.show()
  45.  
  46. # TODO: standardization
  47. mean = np.mean(x_train, axis=0)
  48. odchylenie = np.std(x_train, axis=0)
  49.  
  50. x_train_scaled = (x_train - mean) / odchylenie
  51. x_test_scaled = (x_test - mean) / odchylenie
  52.  
  53. X_b = np.c_[np.ones((x_train_scaled.shape[0], 1)), x_train_scaled]
  54. X_test_b = np.c_[np.ones((x_test_scaled.shape[0], 1)), x_test_scaled]
  55.  
  56.  
  57. # TODO: calculate theta using Batch Gradient Descent
  58. lr = 0.01
  59. epochs = 5000
  60. theta_best = np.zeros((X_b.shape[1], 1))
  61. for _ in range(epochs):
  62.     gradient = (2 / X_b.shape[0]) * X_b.T @ (X_b @ theta_best - y_train)
  63.     theta_best -= lr * gradient
  64. print(f"Optymalne theta: {theta_best.flatten()}")
  65.  
  66. # TODO: calculate error
  67. y_pred = X_test_b @ theta_best
  68.  
  69. MSE_test = np.mean((y_test - y_pred) ** 2)
  70. print(f"MSE gradient: {MSE_test}")
  71.  
  72. # plot the regression line
  73. x = np.linspace(min(x_test_scaled), max(x_test_scaled), 100)
  74. y = float(theta_best[0]) + float(theta_best[1]) * x
  75. plt.plot(x, y)
  76. plt.scatter(x_test_scaled, y_test)
  77. plt.xlabel('Weight')
  78. plt.ylabel('MPG')
  79. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement