Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # сделать проект за 60 секунд
- import pandas as pd
- from sklearn.model_selection import train_test_split
- from lightautoml.automl.presets.tabular_presets import TabularUtilizedAutoML
- from lightautoml.tasks import Task
- from sklearn.metrics import mean_squared_error
- RANDOM_STATE = 12345 # fixed random state for various reasons
- df = pd.read_csv('autos.csv')
- X,X_test = train_test_split(df,test_size=0.2,random_state=RANDOM_STATE)
- N_FOLDS = 5 # folds cnt for AutoML
- TIMEOUT = 60 # Time in seconds for automl run USE TIMEOUT = 1700 for perfect score
- task = Task('reg', loss='mse', metric=mean_squared_error, greater_is_better=False)
- roles = {
- 'target': 'Price',
- # 'drop': ['DateCrawled', 'DateCreated', 'LastSeen', 'NumberOfPictures']
- }
- automl = TabularUtilizedAutoML(task = task,
- timeout = TIMEOUT,
- cpu_limit = 1,
- random_state = RANDOM_STATE,
- reader_params = {'n_jobs': 1,
- 'cv': N_FOLDS,
- 'random_state': RANDOM_STATE})
- oof_pred = automl.fit_predict(X, roles = roles)
- preds = automl.predict(X_test).data[:, 0]
- result = mean_squared_error(X_test['Price'], preds) ** 0.5
- print(f'RMSE: {result}')
Add Comment
Please, Sign In to add comment