Advertisement
trsp

Untitled

Feb 27th, 2025
254
0
6 days
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.03 KB | Source Code | 0 0
  1. # Код fit.py:
  2.  
  3. import pandas as pd
  4. from sklearn.compose import ColumnTransformer
  5. from sklearn.pipeline import Pipeline
  6. from category_encoders import CatBoostEncoder
  7. from sklearn.preprocessing import StandardScaler, OneHotEncoder
  8. from catboost import CatBoostClassifier
  9. import yaml
  10. import os
  11. import joblib
  12.  
  13. # обучение модели
  14. def fit_model():
  15.     with open('params.yaml', 'r') as fd:
  16.         params = yaml.safe_load(fd)
  17.  
  18.     data = pd.read_csv('data/initial_data.csv')
  19.  
  20.     cat_features = data.select_dtypes(include='object')
  21.     potential_binary_features = cat_features.nunique() == 2
  22.  
  23.     binary_cat_features = cat_features[potential_binary_features[potential_binary_features].index]
  24.     other_cat_features = cat_features[potential_binary_features[~potential_binary_features].index]
  25.     num_features = data.select_dtypes(['float'])
  26.  
  27.     preprocessor = ColumnTransformer(
  28.         [
  29.         ('binary', OneHotEncoder(drop=params["one_hot_drop"]), binary_cat_features.columns.tolist()),
  30.         ('cat', CatBoostEncoder(return_df=False), other_cat_features.columns.tolist()),
  31.         ('num', StandardScaler(), num_features.columns.tolist())
  32.         ],
  33.         remainder='drop',
  34.         verbose_feature_names_out=False
  35.     )
  36.  
  37.     model = CatBoostClassifier(auto_class_weights=params["auto_class_weights"])
  38.  
  39.     pipeline = Pipeline(
  40.         [
  41.         ('preprocessor', preprocessor),
  42.         ('model', model)
  43.         ]
  44.     )
  45.     pipeline.fit(data, data["target"])
  46.  
  47.     os.makedirs('models', exist_ok=True)
  48.     with open('models/fitted_model.pkl', 'wb') as fd:
  49.         joblib.dump(pipeline, fd)
  50.  
  51.  
  52. if __name__ == '__main__':
  53.     fit_model()
  54.  
  55. -----
  56.  
  57. Ошибка полностью:
  58.  
  59. Traceback (most recent call last):
  60.   File "/home/mle-user/mle_projects/mle-dvc/scripts/fit.py", line 51, in <module>
  61.     fit_model()
  62.   File "/home/mle-user/mle_projects/mle-dvc/scripts/fit.py", line 43, in fit_model
  63.     pipeline.fit(data, data["target"])
  64.   File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/sklearn/base.py", line 1351, in wrapper
  65.     return fit_method(estimator, *args, **kwargs)
  66.   File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/sklearn/pipeline.py", line 475, in fit
  67.     self._final_estimator.fit(Xt, y, **last_step_params["fit"])
  68.   File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/catboost/core.py", line 5100, in fit
  69.     self._fit(X, y, cat_features, text_features, embedding_features, None, sample_weight, None, None, None, None, baseline, use_best_model,
  70.   File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/catboost/core.py", line 2303, in _fit
  71.     train_params = self._prepare_train_params(
  72.   File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/catboost/core.py", line 2184, in _prepare_train_params
  73.     train_pool = _build_train_pool(X, y, cat_features, text_features, embedding_features, pairs,
  74.   File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/catboost/core.py", line 1444, in _build_train_pool
  75.     train_pool = Pool(X, y, cat_features=cat_features, text_features=text_features, embedding_features=embedding_features, pairs=pairs, weight=sample_weight, group_id=group_id,
  76.   File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/catboost/core.py", line 793, in __init__
  77.     self._init(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
  78.   File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/catboost/core.py", line 1369, in _init
  79.     self._check_label_empty(label)
  80.   File "/home/mle-user/mle_projects/mle-dvc/.venv_mle-dvc/lib/python3.10/site-packages/catboost/core.py", line 905, in _check_label_empty
  81.     raise CatBoostError("Labels variable is empty.")
  82. _catboost.CatBoostError: Labels variable is empty.
  83. ERROR: failed to reproduce 'fit_model': failed to run: python scripts/fit.py, exited with 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement