Advertisement
jules0707

overfitting_bagging.py

Dec 27th, 2024
89
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.32 KB | None | 0 0
  1. # %% [markdown]
  2. # ## Title :
  3. # Regression with Bagging
  4. #
  5. # ## Description :
  6. # The aim of this exercise is to understand regression using Bagging.
  7. #
  8. # <img src="./fig3.png" style="width: 900px;">
  9. #
  10. # ## Instructions:
  11. #
  12. # - Read the dataset `airquality.csv` as a Pandas dataframe.
  13. # - Take a quick look at the dataset.
  14. # - Split the data into train and test sets.
  15. # - Specify the number of bootstraps as 30 and a maximum depth of 3.
  16. # - Define a Bagging Regression model that uses Decision Tree as its base estimator.
  17. # - Fit the model on the train data.
  18. # - Use the helper code to predict using the mean model and individual estimators. The plot will look similar to the one given above.
  19. # - Predict on the test data using the first estimator and the mean model.
  20. # - Compute and display the test MSEs.
  21. #
  22. # ## Hints:
  23. #
  24. # <a href="https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html" target="_blank">sklearn.train_test_split()</a>
  25. # Split arrays or matrices into random train and test subsets.
  26. #
  27. # <a href="https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingRegressor.html" target="_blank">BaggingRegressor()</a>
  28. # Returns a Bagging regressor instance.
  29. #
  30. # <a href="https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html" target="_blank">DecisionTreeRegressor()</a>
  31. # A decision tree regressor.
  32. #
  33. # <a href="https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html#sklearn.tree.DecisionTreeRegressor.fit" target="_blank">DecisionTreeRegressor.fit()</a>
  34. # Build a decision tree regressor from the training set (X, y).
  35. #
  36. # <a href="https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html#sklearn.tree.DecisionTreeRegressor.predict" target="_blank">DecisionTreeRegressor.predict()</a>
  37. # Build a decision tree regressor from the training set (X, y).
  38. #
  39. # <a href="https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html" target="_blank">DecisionTreeRegressor().estimators_ </a>
  40. # A list of estimators. Use this to access any of the estimators.
  41. #
  42. # <a href="https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html" target="_blank">sklearn.mean_squared_error()</a>
  43. # Mean squared error regression loss.
  44.  
  45. # %%
  46. # Import necessary libraries
  47. import itertools
  48. import numpy as np
  49. import pandas as pd
  50. from numpy import std
  51. from numpy import mean
  52. import matplotlib.pyplot as plt
  53. from sklearn.datasets import make_regression
  54. from sklearn.ensemble import BaggingRegressor
  55. from sklearn.tree import DecisionTreeRegressor
  56. from sklearn.metrics import mean_squared_error
  57. from sklearn.model_selection import train_test_split
  58. %matplotlib inline
  59.  
  60.  
  61. # %%
  62. # Read the dataset
  63. df = pd.read_csv("../DATA/airquality.csv",index_col=0)
  64.  
  65. # Take a quick look at the data
  66. df.head(10)
  67. df.shape
  68.  
  69.  
  70. # %%
  71. # Use the column Ozone to drop any NaNs from the dataframe
  72. print('hello')
  73.  
  74. df = df[df.Ozone.notna()]
  75. df.shape
  76.  
  77. # %%
  78. # Assign the values of Ozon column as the predictor variable
  79. x = df[['Ozone']].values
  80.  
  81. # Use temperature as the response data
  82. y = df['Temp']
  83.  
  84.  
  85. # %%
  86. # Split the data into train and test sets with train size as 0.8
  87. # and set random_state as 102
  88. x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=102)
  89. x_test.shape
  90.  
  91. # %% [markdown]
  92. # ### Bagging Regressor
  93.  
  94. # %%
  95. # Specify the number of bootstraps as 30
  96. num_bootstraps = 30
  97.  
  98. # Specify the maximum depth of the decision tree as 3
  99. max_depth = 3
  100.  
  101. # Define the Bagging Regressor Model
  102. # Use Decision Tree as your base estimator with depth as mentioned in max_depth
  103. # Initialise number of estimators using the num_bootstraps value
  104. model = BaggingRegressor(estimator=DecisionTreeRegressor(max_depth=max_depth), n_estimators=num_bootstraps)
  105.                        
  106.  
  107. # Fit the model on the train data
  108. model.fit(x_train,y_train)
  109.  
  110.  
  111. # %%
  112. # Helper code to plot the predictions of individual estimators
  113. plt.figure(figsize=(10,8))
  114.  
  115. xrange = np.linspace(x.min(),x.max(),80).reshape(-1,1)
  116. plt.plot(x_train,y_train,'o',color='#EFAEA4', markersize=6, label="Train Data")
  117. plt.plot(x_test,y_test,'o',color='#F6345E', markersize=6, label="Test Data")
  118.  
  119. plt.xlim()
  120. for i in model.estimators_:
  121.     y_pred1 = i.predict(xrange)
  122.     plt.plot(xrange,y_pred1,alpha=0.5,linewidth=0.5,color = '#ABCCE3')
  123. plt.plot(xrange,y_pred1,alpha=0.6,linewidth=1,color = '#ABCCE3',label="Prediction of Individual Estimators")
  124.  
  125.  
  126. y_pred = model.predict(xrange)
  127. plt.plot(xrange,y_pred,alpha=0.7,linewidth=3,color='#50AEA4', label='Model Prediction')
  128. plt.xlabel("Ozone", fontsize=16)
  129. plt.ylabel("Temperature", fontsize=16)
  130. plt.xticks(fontsize=12)
  131. plt.yticks(fontsize=12)
  132. plt.legend(loc='best',fontsize=12)
  133. plt.show();
  134.  
  135.  
  136. # %%
  137. # Compute the test MSE of the prediction of every individual estimator
  138. y_pred1 = np.mean([estimator.predict(x_test) for estimator in model.estimators_],axis=0)
  139.  
  140. # Print the test MSE
  141. print("The test MSE of one estimator in the model is", round(mean_squared_error(y_test,y_pred1),2))
  142.  
  143.  
  144. # %%
  145. ### edTest(test_mse) ###
  146. # Compute the test MSE of the model prediction
  147. y_pred = model.predict(x_test)
  148.  
  149. # Print the test MSE
  150. print("The test MSE of the model is",round(mean_squared_error(y_test,y_pred),2))
  151.  
  152.  
  153.  
  154.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement