Advertisement
makispaiktis

Kaggle - Exercise 6 - Random Forest Summary

Jun 20th, 2023
692
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.98 KB | None | 0 0
  1. # ****************************************************************
  2. # ****************************************************************
  3. # 1. Random forest in training dataset
  4. # ****************************************************************
  5. # ****************************************************************
  6.  
  7.  
  8. # Import helpful libraries
  9. import pandas as pd
  10. from sklearn.ensemble import RandomForestRegressor
  11. from sklearn.metrics import mean_absolute_error
  12. from sklearn.model_selection import train_test_split
  13.  
  14. # Load the data, and separate the target
  15. iowa_file_path = '../input/train.csv'
  16. home_data = pd.read_csv(iowa_file_path)
  17.  
  18. y = home_data.SalePrice
  19. features = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'FullBath', 'BedroomAbvGr', 'TotRmsAbvGrd']
  20. X = home_data[features]
  21. X.head()
  22.  
  23. # Split into validation and training data
  24. train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1)
  25.  
  26. # Define a random forest model
  27. rf_model = RandomForestRegressor(random_state=1)
  28. rf_model.fit(train_X, train_y)
  29. rf_val_predictions = rf_model.predict(val_X)
  30. rf_val_mae = mean_absolute_error(rf_val_predictions, val_y)
  31.  
  32. print("Validation MAE for Random Forest Model: {:,.0f}".format(rf_val_mae))
  33.  
  34.  
  35.  
  36. # ****************************************************************
  37. # ****************************************************************
  38. # 2. Random forest in ALL THE dataset - testing will be applied to a different dataset-table-file
  39. # ****************************************************************
  40. # ****************************************************************
  41.  
  42. # To improve accuracy, create a new Random Forest model which you will train on all training data
  43. rf_model_on_full_data = RandomForestRegressor(random_state=1)
  44. rf_model_on_full_data.fit(X, y)
  45.  
  46. test_data_path = '../input/test.csv'
  47. test_data = pd.read_csv(test_data_path)
  48. test_X = test_data[features]
  49.  
  50. # make predictions which we will submit
  51. test_preds = rf_model_on_full_data.predict(test_X)
  52. print(test_preds)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement