Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- from sklearn.model_selection import train_test_split
- import matplotlib.pyplot as plt
- import random
- from sklearn.linear_model import LinearRegression
- from sklearn.preprocessing import PolynomialFeatures
- from sklearn.linear_model import Ridge
- def function(x, coefficients):
- y = coefficients[0]
- for i in range(1, len(coefficients)):
- y += coefficients[i] * x ** i
- return y
- def f(x, coefficients):
- return x.dot(coefficients[1:]) + coefficients[0]
- def fun_name(coefficients):
- name = f'f(x)={np.round(coefficients[0], 2)}'
- for i in range(1, len(coefficients)):
- if coefficients[i] >= 0:
- name += '+'
- name += f'{np.round(coefficients[i], 2)}*x^{i}'
- return name
- def mean_squared_error(actual, predicted):
- n = len(actual)
- squared_errors = [(actual[i] - predicted[i]) ** 2 for i in range(n)]
- mse = sum(squared_errors) / n
- return mse[0]
- def least_squares_polynomial_regression(X, y):
- X = np.hstack((np.ones((X.shape[0], 1)), X))
- coefficients = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
- coefficients = [coefficients[i][0] for i in range(len(coefficients))]
- return coefficients
- def R2(x, y, lambda_value):
- X = np.hstack((np.ones((x.shape[0], 1)), x))
- identity_matrix = np.identity(X.shape[1])
- coefficients = np.linalg.inv(X.T.dot(X) + lambda_value * identity_matrix).dot(X.T).dot(y)
- coefficients = [coefficients[i][0] for i in range(len(coefficients))]
- return coefficients
- def best_R2(x, y, x_test, y_test):
- mse_df = pd.DataFrame({'MSE_Test': []})
- for i in range(1, 101):
- lambda_value = i / 100
- coefficients_R2 = R2(x, y, lambda_value)
- y_pred = f(x_test, coefficients_R2)
- mse_test = mean_squared_error(y_test, y_pred)
- mse_df.loc[lambda_value] = [mse_test]
- mse_df.index.names = ['lambda']
- mse_df_min = mse_df.idxmin()
- coefficients_R2 = R2(x, y, mse_df_min.values)
- return coefficients_R2
- def best_R2_sk(x_train, y_train, x_test, y_test):
- mse_df = pd.DataFrame({'MSE_Test': []})
- for i in range(1, 101):
- lambda_value = i / 100
- ridge_model = Ridge(alpha=lambda_value)
- ridge_model.fit(x_train, y_train)
- y_pred_R2 = ridge_model.predict(x_test)
- mse_test = mean_squared_error(y_test, y_pred_R2)
- mse_df.loc[lambda_value] = [mse_test]
- mse_df.index.names = ['lambda']
- mse_df_min = mse_df.idxmin()
- return mse_df_min.values
- iris = pd.read_csv('iris.csv')
- data = iris.drop(labels=[iris.columns[-1], iris.columns[1], iris.columns[0], iris.columns[3], iris.columns[4]], axis=1)
- data = np.copy(data)
- target = np.sin(data)
- for i in range(len(target)):
- target[i] += random.uniform(-0.3, 0.3)
- degree = 1
- data_poly_manual = np.hstack([data ** i for i in range(1, degree + 1)])
- X_train, X_test, y_train, y_test = train_test_split(data_poly_manual, target, test_size=0.3, random_state=42)
- coefficients = least_squares_polynomial_regression(X_train, y_train)
- y_pred = f(X_test, coefficients)
- coefficients_R2 = best_R2(X_train, y_train, X_test, y_test)
- y_pred_R2 = f(X_test, coefficients_R2)
- plt.figure(figsize=(10, 7))
- X = np.linspace(np.min(data), np.max(data), 100)
- plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
- plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
- plt.plot(X, np.sin(X), c='black', label='f(x)=sin(x)', lw=2)
- plt.legend()
- plt.xlabel(f'x')
- plt.ylabel(f'y')
- plt.title('Побудова точок навчальної та тестової вибірки')
- plt.show()
- mse = mean_squared_error(y_test, y_pred)
- mse_R2 = mean_squared_error(y_test, y_pred_R2)
- print(f"Mean Squared Error: {mse}\n")
- print(f"Mean Squared Error R2: {mse_R2}\n")
- match_df = pd.DataFrame({f'x_test': X_test[:, 0],
- f'y_test': y_test[:, 0],
- f'y_test_pred': y_pred,
- f'y_test_pred_R2:': y_pred_R2})
- print(match_df)
- print(f'\n{fun_name(coefficients)} ; p = {degree}')
- print(f'R2: {fun_name(coefficients_R2)} ; p = {degree}')
- plt.figure(figsize=(10, 7))
- plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
- plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
- plt.plot(X, np.sin(X), c='black', label='f(x)=sin(x)', lw=2)
- plt.plot(X, function(X, coefficients), label=f'p={degree}')
- plt.plot(X, function(X, coefficients_R2), label=f'R2: p={degree}')
- plt.legend()
- plt.xlabel(f'x')
- plt.ylabel(f'y')
- plt.show()
- plt.figure(figsize=(15, 10))
- plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
- plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
- plt.plot(X, np.sin(X), c='black', label='f(x)=sin(x)', lw=2)
- mse_df = pd.DataFrame({'MSE_Train': [], 'MSE_Test': [], 'R2_MSE_Train': [], 'R2_MSE_Test': []})
- for p in range(1, 21):
- degree = p
- data_poly_manual = np.hstack([data ** i for i in range(1, degree + 1)])
- X_train, X_test, y_train, y_test = train_test_split(data_poly_manual, target, test_size=0.3, random_state=42)
- coefficients = least_squares_polynomial_regression(X_train, y_train)
- y_pred = f(X_test, coefficients)
- coefficients_R2 = best_R2(X_train, y_train, X_test, y_test)
- y_pred_R2 = f(X_test, coefficients_R2)
- mse_test = mean_squared_error(y_test, y_pred)
- mse_train = mean_squared_error(y_train, f(X_train, coefficients))
- mse_test_R2 = mean_squared_error(y_test, y_pred_R2)
- mse_train_R2 = mean_squared_error(y_train, f(X_train, coefficients_R2))
- mse_df.loc[p] = [mse_train, mse_test, mse_train_R2, mse_test_R2]
- if p < 6:
- plt.plot(X, function(X, coefficients), label=f' p={degree}')
- print(f'{fun_name(coefficients)} ; p = {degree}')
- plt.legend()
- plt.xlabel(f'x')
- plt.ylabel(f'y')
- plt.show()
- mse_df.index.names = ['p']
- print(mse_df)
- plt.figure(figsize=(10, 7))
- plt.plot(mse_df.index, mse_df['MSE_Train'], label='MSE_Train')
- plt.plot(mse_df.index, mse_df['MSE_Test'], label='MSE_Test')
- plt.plot(mse_df.index, mse_df['R2_MSE_Train'], label='R2_MSE_Train')
- plt.plot(mse_df.index, mse_df['R2_MSE_Test'], label='R2_MSE_Test')
- plt.xlabel(f'p')
- plt.ylabel(f'MSE')
- plt.legend()
- plt.show()
- mse_df_min = mse_df.idxmin()
- print(mse_df_min)
- degree = mse_df_min.loc['MSE_Test']
- data_poly_manual = np.hstack([data ** i for i in range(1, degree + 1)])
- X_train, X_test, y_train, y_test = train_test_split(data_poly_manual, target, test_size=0.3, random_state=42)
- coefficients = least_squares_polynomial_regression(X_train, y_train)
- coefficients_R2 = best_R2(X_train, y_train, X_test, y_test)
- my_coefficients = coefficients.copy()
- my_coefficients_R2 = coefficients_R2.copy()
- y_pred = f(X_test, coefficients)
- y_pred_R2 = f(X_test, coefficients_R2)
- mse = mean_squared_error(y_test, y_pred)
- mse_R2 = mean_squared_error(y_test, y_pred_R2)
- print(f'\n{fun_name(coefficients)} ; p = {degree}')
- print(f'R2: {fun_name(coefficients_R2)} ; p = {degree}')
- plt.figure(figsize=(10, 7))
- plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
- plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
- plt.plot(X, np.sin(X), c='black', label='f(x)=sin(x)', lw=2)
- plt.plot(X, function(X, coefficients), label=f'p={degree}')
- plt.plot(X, function(X, coefficients_R2), label=f'R2: p={degree}')
- plt.legend()
- plt.xlabel(f'x')
- plt.ylabel(f'y')
- plt.show()
- mse_sk_df = pd.DataFrame({'MSE_Train_sk': [], 'MSE_Test_sk': [], 'R2_MSE_Train_sk': [], 'R2_MSE_Test_sk': []})
- for p in range(1, 21):
- x = data
- y = target
- x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
- poly_features = PolynomialFeatures(degree=p)
- x_train_poly = poly_features.fit_transform(x_train)
- x_test_poly = poly_features.transform(x_test)
- model = LinearRegression()
- model.fit(x_train_poly, y_train)
- y_pred = model.predict(x_test_poly)
- test_error1 = mean_squared_error(y_test, y_pred)
- y_train_pred = model.predict(x_train_poly)
- train_error1 = mean_squared_error(y_train, y_train_pred)
- lambda_val = best_R2_sk(x_train_poly, y_train, x_test_poly, y_test)
- R2_model = Ridge(alpha=lambda_val)
- R2_model.fit(x_train_poly, y_train)
- y_test_pred_R2 = R2_model.predict(x_test_poly)
- test_error2 = mean_squared_error(y_test, y_test_pred_R2)
- y_train_pred_R2 = R2_model.predict(x_train_poly)
- train_error2 = mean_squared_error(y_train, y_train_pred)
- mse_sk_df.loc[p] = [train_error1, test_error1, train_error2, test_error2]
- mse_sk_df.index.names = ['p']
- mse_concat = pd.concat([mse_df, mse_sk_df], axis=1)
- print(mse_concat)
- mse_df_min_match = mse_concat.idxmin()
- print(mse_df_min_match)
- degree_sk = mse_df_min_match.loc['MSE_Test_sk']
- x = data
- y = target
- x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
- poly_features = PolynomialFeatures(degree=degree_sk)
- x_train_poly = poly_features.fit_transform(x_train)
- x_test_poly = poly_features.transform(x_test)
- lambda_val = best_R2_sk(x_train_poly, y_train, x_test_poly, y_test)
- R2_model = Ridge(alpha=lambda_val)
- R2_model.fit(x_train_poly, y_train)
- model = LinearRegression()
- model.fit(x_train_poly, y_train)
- y_pred = model.predict(x_test_poly)
- y_pred_R2 = R2_model.predict(x_test_poly)
- x_plot = X
- x_plot_poly = poly_features.transform(x_plot.reshape(-1, 1))
- y_plot = model.predict(x_plot_poly)
- y_plot_R2 = R2_model.predict(x_plot_poly)
- coefficients = model.coef_
- intercept = model.intercept_
- coefficients[0, 0] = intercept
- coefficients = coefficients[0]
- coefficients_R2 = R2_model.coef_
- intercept = R2_model.intercept_
- coefficients_R2[0, 0] = intercept
- coefficients_R2 = coefficients_R2[0]
- print(f'\n{fun_name(coefficients)} ; p_sk = {degree_sk}')
- print(f'R2: {fun_name(coefficients_R2)} ; p_sk = {degree_sk}')
- print(f'{fun_name(my_coefficients)} ; p = {degree}')
- print(f'R2: {fun_name(my_coefficients_R2)} ; p = {degree}')
- plt.figure(figsize=(15, 10))
- plt.title('Порівняння роботи з sklearn')
- plt.scatter(X_train[:, 0], y_train[:, 0], c='blue', label='Навчальна вибірка')
- plt.scatter(X_test[:, 0], y_test[:, 0], c='red', label='Тестова вибірка')
- plt.plot(X, np.sin(X), c='black', label='f(x)=sin(x)', lw=2)
- plt.plot(x_plot, y_plot, label=f"p_sk={degree_sk}")
- plt.plot(x_plot, y_plot_R2, label=f"R2: p_sk={degree_sk}")
- plt.plot(X, function(X, my_coefficients), label=f'p={degree}')
- plt.plot(X, function(X, my_coefficients_R2), label=f'R2: p={degree}')
- plt.xlabel("X")
- plt.ylabel("Y")
- plt.legend()
- plt.show()
- plt.figure(figsize=(10, 7))
- plt.title('Порівняння роботи з sklearn')
- plt.plot(mse_concat.index, mse_concat['MSE_Train'], label='MSE_Train')
- plt.plot(mse_concat.index, mse_concat['MSE_Test'], label='MSE_Test')
- plt.plot(mse_concat.index, mse_concat['MSE_Train_sk'], label='MSE_Train_sk')
- plt.plot(mse_concat.index, mse_concat['MSE_Test_sk'], label='MSE_Test_sk')
- plt.plot(mse_concat.index, mse_concat['R2_MSE_Train'], label='R2_MSE_Train')
- plt.plot(mse_concat.index, mse_concat['R2_MSE_Test'], label='R2_MSE_Test')
- plt.plot(mse_concat.index, mse_concat['R2_MSE_Train_sk'], label='R2_MSE_Train_sk')
- plt.plot(mse_concat.index, mse_concat['R2_MSE_Test_sk'], label='R2_MSE_Test_sk')
- plt.xlabel(f'p')
- plt.ylabel(f'MSE')
- plt.legend()
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement