Advertisement
makispaiktis

Kaggle - Exercise 3 - Split the dataset and apply MAE

Jun 20th, 2023
662
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.75 KB | None | 0 0
  1. # *****************************************************************
  2. # *****************************************************************
  3. # 1. Read the dataframe
  4. # *****************************************************************
  5. # *****************************************************************
  6.  
  7. import pandas as pd
  8. from sklearn.tree import DecisionTreeRegressor
  9.  
  10. # Path of the file to read
  11. iowa_file_path = '../input/home-data-for-ml-course/train.csv'
  12. home_data = pd.read_csv(iowa_file_path)
  13.  
  14. y = home_data.SalePrice
  15. feature_columns = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'FullBath', 'BedroomAbvGr', 'TotRmsAbvGrd']
  16. X = home_data[feature_columns]
  17.  
  18. # Specify and Fit the Model in all the samples from the dataset (wrong methodology)
  19. iowa_model = DecisionTreeRegressor()
  20. iowa_model.fit(X, y)
  21.  
  22. print("First in-sample predictions:", iowa_model.predict(X.head()))
  23. print("Actual target values for those homes:", y.head().tolist())
  24.  
  25.  
  26.  
  27. # *****************************************************************
  28. # *****************************************************************
  29. # 2. Split the dataset with a function from sklearn
  30. # *****************************************************************
  31. # *****************************************************************
  32.  
  33. from sklearn.model_selection import train_test_split
  34. train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=1)
  35.  
  36.  
  37.  
  38. # *****************************************************************
  39. # *****************************************************************
  40. # 3. Split and fit the model into the training data (correct methodology)
  41. # *****************************************************************
  42. # *****************************************************************
  43.  
  44. iowa_model = DecisionTreeRegressor(random_state=1)
  45. iowa_model.fit(train_X, train_y)
  46.  
  47.  
  48.  
  49. # *****************************************************************
  50. # *****************************************************************
  51. # 4. Predictions with validation-testing data
  52. # *****************************************************************
  53. # *****************************************************************
  54.  
  55. val_predictions = iowa_model.predict(val_X)
  56. # print the top few validation predictions
  57. print(val_predictions, '\n')
  58. # print the top few actual prices from validation data
  59. val_y.head()
  60.  
  61.  
  62.  
  63. # *****************************************************************
  64. # *****************************************************************
  65. # 5. Mean Absolute Error
  66. # *****************************************************************
  67. # *****************************************************************
  68.  
  69. from sklearn.metrics import mean_absolute_error
  70. val_mae = mean_absolute_error(val_predictions, val_y)
  71. print(val_mae)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement