Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Код fit.py:
- import pandas as pd
- from sklearn.compose import ColumnTransformer
- from sklearn.pipeline import Pipeline
- from category_encoders import CatBoostEncoder
- from sklearn.preprocessing import StandardScaler, OneHotEncoder
- from catboost import CatBoostClassifier
- import yaml
- import os
- import joblib
- # обучение модели
- def fit_model():
- with open('params.yaml', 'r') as fd:
- params = yaml.safe_load(fd)
- data = pd.read_csv('data/initial_data.csv')
- cat_features = data.select_dtypes(include='object')
- potential_binary_features = cat_features.nunique() == 2
- binary_cat_features = cat_features[potential_binary_features[potential_binary_features].index]
- other_cat_features = cat_features[potential_binary_features[~potential_binary_features].index]
- num_features = data.select_dtypes(['float'])
- preprocessor = ColumnTransformer(
- [
- ('binary', OneHotEncoder(drop=params["one_hot_drop"]), binary_cat_features.columns.tolist()),
- ('cat', CatBoostEncoder(return_df=False), other_cat_features.columns.tolist()),
- ('num', StandardScaler(), num_features.columns.tolist())
- ],
- remainder='drop',
- verbose_feature_names_out=False
- )
- model = CatBoostClassifier(auto_class_weights=params["auto_class_weights"])
- pipeline = Pipeline(
- [
- ('preprocessor', preprocessor),
- ('model', model)
- ]
- )
- pipeline.fit(data, data["target"])
- os.makedirs('models', exist_ok=True)
- with open('models/fitted_model.pkl', 'wb') as fd:
- joblib.dump(pipeline, fd)
- if __name__ == '__main__':
- fit_model()
- -----
- Ошибка полностью:
- Traceback (most recent call last):
- File "/home/mle-user/mle_projects/mle-dvc/scripts/fit.py", line 51, in <module>
- fit_model()
- File "/home/mle-user/mle_projects/mle-dvc/scripts/fit.py", line 43, in fit_model
- pipeline.fit(data, data["target"])
- File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/sklearn/base.py", line 1351, in wrapper
- return fit_method(estimator, *args, **kwargs)
- File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/sklearn/pipeline.py", line 475, in fit
- self._final_estimator.fit(Xt, y, **last_step_params["fit"])
- File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/catboost/core.py", line 5100, in fit
- self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, None, None, None, None, baseline, use_best_model,
- File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/catboost/core.py", line 2303, in _fit
- train_params = self._prepare_train_params(
- File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/catboost/core.py", line 2184, in _prepare_train_params
- train_pool = _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs,
- File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/catboost/core.py", line 1444, in _build_train_pool
- train_pool = Pool(X, y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features, pairs=pairs, weight=sample_weight, group_id=group_id,
- File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/catboost/core.py", line 793, in __init__
- self._init(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
- File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/catboost/core.py", line 1369, in _init
- self._check_label_empty(label)
- File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/catboost/core.py", line 905, in _check_label_empty
- raise CatBoostError("Labels variable is empty.")
- _catboost.CatBoostError: Labels variable is empty.
- ERROR: failed to reproduce 'fit_model': failed to run: python scripts/fit.py, exited with 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement