Advertisement
makispaiktis

Kaggle - Exercise 4 - Parameter selection (max_leaf_nodes)

Jun 20th, 2023 (edited)
514
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.13 KB | None | 0 0
  1. # *************************************************************
  2. # *************************************************************
  3. # 0. Function for evaluating a model according to max_leaf_nodes parameter
  4. # *************************************************************
  5. # *************************************************************
  6.  
  7. def get_mae(max_leaf_nodes, train_X, val_X, train_y, val_y):
  8.     model = DecisionTreeRegressor(max_leaf_nodes=max_leaf_nodes, random_state=0)
  9.     model.fit(train_X, train_y)
  10.     preds_val = model.predict(val_X)
  11.     mae = mean_absolute_error(val_y, preds_val)
  12.     return(mae)
  13.  
  14.  
  15.  
  16. # *************************************************************
  17. # *************************************************************
  18. # 1. Dataset, y, X, split, predict without parameters chosen
  19. # *************************************************************
  20. # *************************************************************
  21.  
  22. # Code you have previously used to load data
  23. import pandas as pd
  24. from sklearn.metrics import mean_absolute_error
  25. from sklearn.model_selection import train_test_split
  26. from sklearn.tree import DecisionTreeRegressor
  27.  
  28.  
  29. # Path of the file to read
  30. iowa_file_path = '../input/home-data-for-ml-course/train.csv'
  31. home_data = pd.read_csv(iowa_file_path)
  32.  
  33. y = home_data.SalePrice
  34. features = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'FullBath', 'BedroomAbvGr', 'TotRmsAbvGrd']
  35. X = home_data[features]
  36.  
  37. # Split into validation and training data
  38. train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1)
  39. iowa_model = DecisionTreeRegressor(random_state=1)
  40. iowa_model.fit(train_X, train_y)
  41.  
  42. # Make validation predictions and calculate mean absolute error
  43. val_predictions = iowa_model.predict(val_X)
  44. val_mae = mean_absolute_error(val_predictions, val_y)
  45. print("Validation MAE: {:,.0f}".format(val_mae))
  46.  
  47.  
  48.  
  49. # *************************************************************
  50. # *************************************************************
  51. # 2. Compare different tree sizes
  52. # *************************************************************
  53. # *************************************************************
  54.  
  55. candidate_max_leaf_nodes = [5, 25, 50, 100, 250, 500]
  56. min_mae = 10**10
  57. index = -1
  58. for i in range(len(candidate_max_leaf_nodes)):
  59.     max_leaf_nodes = candidate_max_leaf_nodes[i]
  60.     mae = get_mae(max_leaf_nodes, train_X, val_X, train_y, val_y)
  61.     print(max_leaf_nodes, mae)
  62.     if mae < min_mae:
  63.         min_mae = mae
  64.         index = i
  65.    
  66.  
  67. # Store the best value of max_leaf_nodes (it will be either 5, 25, 50, 100, 250 or 500)
  68. best_tree_size = candidate_max_leaf_nodes[index]
  69. print('\n', best_tree_size)
  70.  
  71.  
  72.  
  73. # *************************************************************
  74. # *************************************************************
  75. # 3. Fit model using ALL THE DATA with the right value of parameter max_leaf_nodes
  76. # *************************************************************
  77. # *************************************************************
  78.  
  79. # Fill in argument to make optimal size
  80. final_model = DecisionTreeRegressor(max_leaf_nodes=best_tree_size, random_state=0)
  81. final_model.fit(X, y)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement