pavel_777

60 sec project

Sep 27th, 2021 (edited)
130
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.35 KB | None | 0 0
  1. # сделать проект за 60 секунд
  2.  
  3. import pandas as pd
  4. from sklearn.model_selection import train_test_split
  5. from lightautoml.automl.presets.tabular_presets import TabularUtilizedAutoML
  6. from lightautoml.tasks import Task
  7. from sklearn.metrics import mean_squared_error
  8.  
  9. RANDOM_STATE = 12345 # fixed random state for various reasons
  10.  
  11. df = pd.read_csv('autos.csv')
  12. X,X_test = train_test_split(df,test_size=0.2,random_state=RANDOM_STATE)
  13.  
  14. N_FOLDS = 5 # folds cnt for AutoML
  15. TIMEOUT = 60 # Time in seconds for automl run USE TIMEOUT = 1700 for perfect score
  16.  
  17. task = Task('reg', loss='mse', metric=mean_squared_error, greater_is_better=False)
  18.  
  19. roles = {
  20.     'target': 'Price',
  21.     # 'drop': ['DateCrawled', 'DateCreated', 'LastSeen', 'NumberOfPictures']
  22. }
  23.  
  24. automl = TabularUtilizedAutoML(task = task,
  25.                                timeout = TIMEOUT,
  26.                                cpu_limit = 1,
  27.                                random_state = RANDOM_STATE,
  28.                                reader_params = {'n_jobs': 1,
  29.                                                 'cv': N_FOLDS,
  30.                                                 'random_state': RANDOM_STATE})
  31.  
  32. oof_pred = automl.fit_predict(X, roles = roles)
  33.  
  34. preds = automl.predict(X_test).data[:, 0]
  35. result = mean_squared_error(X_test['Price'], preds) ** 0.5
  36. print(f'RMSE: {result}')
  37.  
Add Comment
Please, Sign In to add comment