Advertisement
makispaiktis

Kaggle - Intermediate ML - Compare models, create a CSV

Jun 22nd, 2023
637
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.61 KB | None | 0 0
  1. # **********************************************************************************
  2. # **********************************************************************************
  3. # 0. Function for comparing different models
  4. # **********************************************************************************
  5. # **********************************************************************************
  6.  
  7. def score_model(model, X_t=X_train, X_v=X_valid, y_t=y_train, y_v=y_valid):
  8.     model.fit(X_t, y_t)
  9.     preds = model.predict(X_v)
  10.     return mean_absolute_error(y_v, preds)
  11.  
  12.  
  13.  
  14. # **********************************************************************************
  15. # **********************************************************************************
  16. # 1. Split the dataset
  17. # **********************************************************************************
  18. # **********************************************************************************
  19.  
  20. import pandas as pd
  21. from sklearn.model_selection import train_test_split
  22.  
  23. # Read the data
  24. X_full = pd.read_csv('../input/train.csv', index_col='Id')
  25. X_test_full = pd.read_csv('../input/test.csv', index_col='Id')
  26.  
  27. # Obtain target and predictors
  28. y = X_full.SalePrice
  29. features = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'FullBath', 'BedroomAbvGr', 'TotRmsAbvGrd']
  30. X = X_full[features].copy()
  31. X_test = X_test_full[features].copy()
  32.  
  33. # Break off validation set from training data
  34. X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=0)
  35.  
  36.  
  37.  
  38. # **********************************************************************************
  39. # **********************************************************************************
  40. # 2. Create 5 models of RandomForestRegressor
  41. # **********************************************************************************
  42. # **********************************************************************************
  43.  
  44. from sklearn.ensemble import RandomForestRegressor
  45.  
  46. # Define the models
  47. model_1 = RandomForestRegressor(n_estimators=50, random_state=0)
  48. model_2 = RandomForestRegressor(n_estimators=100, random_state=0)
  49. model_3 = RandomForestRegressor(n_estimators=100, criterion='absolute_error', random_state=0)
  50. model_4 = RandomForestRegressor(n_estimators=200, min_samples_split=20, random_state=0)
  51. model_5 = RandomForestRegressor(n_estimators=100, max_depth=7, random_state=0)
  52.  
  53. models = [model_1, model_2, model_3, model_4, model_5]
  54.  
  55.  
  56.  
  57. # **********************************************************************************
  58. # **********************************************************************************
  59. # 3. Print the score of its model
  60. # **********************************************************************************
  61. # **********************************************************************************
  62.  
  63. from sklearn.metrics import mean_absolute_error
  64.  
  65. for i in range(0, len(models)):
  66.     mae = score_model(models[i])
  67.     print("Model %d MAE: %d" % (i+1, mae))
  68. best_model = model_3
  69.  
  70.  
  71. # **********************************************************************************
  72. # **********************************************************************************
  73. # 4. Create a new model
  74. # **********************************************************************************
  75. # **********************************************************************************
  76.  
  77. my_model = RandomForestRegressor()
  78. my_model.fit(X, y)
  79. preds_test = my_model.predict(X_test)
  80.  
  81. # Save predictions in format used for competition scoring
  82. output = pd.DataFrame({'Id': X_test.index,'SalePrice': preds_test})
  83. output.to_csv('submission.csv', index=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement