Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- import os
- import time
- import logging
- from datetime import datetime
- # General Imports
- import pandas as pd
- import numpy as np
- from sklearn.model_selection import train_test_split, KFold
- from sklearn.preprocessing import MinMaxScaler
- from sklearn.metrics import mean_squared_error
- # 1. Linear Regression
- from sklearn.linear_model import LinearRegression
- # 2. Polynomial Regression (using PolynomialFeatures)
- from sklearn.preprocessing import PolynomialFeatures
- # 3. Ridge Regression
- from sklearn.linear_model import Ridge
- # 4. Lasso Regression
- from sklearn.linear_model import Lasso
- # 5. Elastic Net Regression
- from sklearn.linear_model import ElasticNet
- # 6. Bayesian Ridge Regression
- from sklearn.linear_model import BayesianRidge
- # 7. Ordinary Least Squares Regression (OLS)
- # OLS is also implemented by LinearRegression, so no specific import needed
- # 8. Huber Regression
- from sklearn.linear_model import HuberRegressor
- # 9. Theil-Sen Estimator
- from sklearn.linear_model import TheilSenRegressor
- # 10. Quantile Regression
- from sklearn.linear_model import QuantileRegressor
- # 11. Decision Tree Regression
- from sklearn.tree import DecisionTreeRegressor
- # 12. Random Forest Regression
- from sklearn.ensemble import RandomForestRegressor
- # 13. Gradient Boosting Regression
- from sklearn.ensemble import GradientBoostingRegressor
- # 14. XGBoost Regression
- import xgboost as xgb
- # 15. LightGBM Regression
- import lightgbm as lgb
- # 16. CatBoost Regression
- from catboost import CatBoostRegressor
- # 17. Support Vector Regression (SVR)
- from sklearn.svm import SVR
- # 18. K-Nearest Neighbors Regression (KNNR)
- from sklearn.neighbors import KNeighborsRegressor
- # 19. Principal Component Regression (PCR)
- from sklearn.decomposition import PCA
- from sklearn.linear_model import LinearRegression
- # 20. Partial Least Squares Regression (PLSR)
- from sklearn.cross_decomposition import PLSRegression
- # 21. Artificial Neural Networks (ANN) Regression
- from sklearn.neural_network import MLPRegressor
- # 22. Multi-layer Perceptron (MLP) Regression
- from sklearn.neural_network import MLPRegressor
- # 23. Stochastic Gradient Descent (SGD) Regression
- from sklearn.linear_model import SGDRegressor
- # 24. Bayesian Regression
- # This is another term that can refer to several models, including BayesianRidge
- from sklearn.linear_model import BayesianRidge
- # Configurations
- k_fold = 10
- dataset_percent = 100
- input_file_path = 'input/input.csv'
- output_folder = 'output'
- time_log_file = 'time.csv'
- log_file_name = f"log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
- logging.basicConfig(filename=log_file_name, level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
- # Create output folder if it doesn't exist
- os.makedirs(output_folder, exist_ok=True)
- # Load and preprocess data
- try:
- data = pd.read_csv(input_file_path)
- data = data.sample(frac=dataset_percent / 100) # Use only specified percentage of data
- X = data.iloc[:, :-1]
- y = data.iloc[:, -1]
- scaler = MinMaxScaler()
- X_scaled = scaler.fit_transform(X)
- y_scaled = scaler.fit_transform(y.values.reshape(-1, 1)).flatten()
- except Exception as e:
- logging.error("Error loading and preprocessing data: %s", e)
- # Define K-Fold cross-validation
- kf = KFold(n_splits=k_fold, shuffle=True, random_state=42)
- # Define model functions
- def train_model(model, model_name):
- results = {'actual': [], 'predicted': []}
- total_train_time, total_test_time = 0, 0
- for fold, (train_index, test_index) in enumerate(kf.split(X_scaled)):
- try:
- # Train/Test split
- X_train, X_test = X_scaled[train_index], X_scaled[test_index]
- y_train, y_test = y_scaled[train_index], y_scaled[test_index]
- # Training
- start_train = time.time()
- model.fit(X_train, y_train)
- train_time = time.time() - start_train
- total_train_time += train_time
- # Prediction
- start_test = time.time()
- y_pred_scaled = model.predict(X_test)
- test_time = time.time() - start_test
- total_test_time += test_time
- # Inverse transform predictions to original scale
- y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
- y_test_orig = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
- # Save results
- results['actual'].extend(y_test_orig)
- results['predicted'].extend(y_pred)
- logging.info(f"{model_name} - Fold {fold + 1} completed: Train time {train_time:.3f}s, Test time {test_time:.3f}s")
- except Exception as e:
- logging.error(f"{model_name} - Error in fold {fold + 1}: {e}")
- continue # Proceed to next fold in case of an error
- # Save results to CSV
- results_df = pd.DataFrame(results)
- results_file = os.path.join(output_folder, f"{model_name}.csv")
- if os.path.exists(results_file):
- results_df.to_csv(results_file, mode='a', header=False, index=False)
- else:
- results_df.to_csv(results_file, index=False)
- # Log timing information
- total_execution_time = total_train_time + total_test_time
- with open(time_log_file, mode='a') as f:
- f.write(f"{model_name},{total_train_time:.3f},{total_test_time:.3f},{total_execution_time:.3f}\n")
- logging.info(f"{model_name} - Total time: Train {total_train_time:.3f}s, Test {total_test_time:.3f}s, Total {total_execution_time:.3f}s")
- # Define regression models
- def linear_regression():
- train_model(LinearRegression(), "Linear Regression")
- def ridge_regression():
- train_model(Ridge(), "Ridge Regression")
- def lasso_regression():
- train_model(Lasso(), "Lasso Regression")
- def elastic_net_regression():
- train_model(ElasticNet(), "Elastic Net Regression")
- def bayesian_ridge_regression():
- train_model(BayesianRidge(), "Bayesian Ridge Regression")
- def huber_regression():
- train_model(HuberRegressor(), "Huber Regression")
- def decision_tree_regression():
- train_model(DecisionTreeRegressor(), "Decision Tree Regression")
- def random_forest_regression():
- train_model(RandomForestRegressor(), "Random Forest Regression")
- def gradient_boosting_regression():
- train_model(GradientBoostingRegressor(), "Gradient Boosting Regression")
- def svr_regression():
- train_model(SVR(), "Support Vector Regression (SVR)")
- def knn_regression():
- train_model(KNeighborsRegressor(), "K-Nearest Neighbors Regression (KNNR)")
- def mlp_regression():
- train_model(MLPRegressor(max_iter=1000), "Multi-layer Perceptron (MLP) Regression")
- # 2. Polynomial Regression (using PolynomialFeatures)
- from sklearn.preprocessing import PolynomialFeatures
- def polynomial_regression(degree=2):
- poly = PolynomialFeatures(degree=degree)
- X_poly = poly.fit_transform(X_scaled) # Transform features to polynomial features
- model = LinearRegression()
- train_model(model, f"Polynomial Regression (Degree {degree})")
- # 8. Theil-Sen Estimator
- from sklearn.linear_model import TheilSenRegressor
- def theil_sen_regression():
- train_model(TheilSenRegressor(), "Theil-Sen Estimator")
- # 10. Quantile Regression
- from sklearn.linear_model import QuantileRegressor
- def quantile_regression(alpha=0.5):
- train_model(QuantileRegressor(quantile=alpha), f"Quantile Regression (alpha={alpha})")
- # 14. XGBoost Regression
- import xgboost as xgb
- def xgboost_regression():
- model = xgb.XGBRegressor(objective='reg:squarederror')
- train_model(model, "XGBoost Regression")
- # 15. LightGBM Regression
- import lightgbm as lgb
- def lightgbm_regression():
- model = lgb.LGBMRegressor()
- train_model(model, "LightGBM Regression")
- # 16. CatBoost Regression
- from catboost import CatBoostRegressor
- def catboost_regression():
- model = CatBoostRegressor(silent=True)
- train_model(model, "CatBoost Regression")
- # 19. Principal Component Regression (PCR)
- from sklearn.decomposition import PCA
- from sklearn.linear_model import LinearRegression
- def pcr_regression(n_components=2):
- pca = PCA(n_components=n_components)
- X_pca = pca.fit_transform(X_scaled)
- model = LinearRegression()
- train_model(model, f"Principal Component Regression (n_components={n_components})")
- # 20. Partial Least Squares Regression (PLSR)
- from sklearn.cross_decomposition import PLSRegression
- def plsr_regression():
- model = PLSRegression()
- train_model(model, "Partial Least Squares Regression (PLSR)")
- # 21. Artificial Neural Networks (ANN) Regression
- from sklearn.neural_network import MLPRegressor
- def ann_regression():
- model = MLPRegressor(hidden_layer_sizes=(50, 50), max_iter=1000)
- train_model(model, "Artificial Neural Networks Regression")
- # 23. Stochastic Gradient Descent (SGD) Regression
- from sklearn.linear_model import SGDRegressor
- def sgd_regression():
- model = SGDRegressor()
- train_model(model, "Stochastic Gradient Descent Regression")
- # Define main function
- def main():
- try:
- # List of model functions
- models = [
- linear_regression, ridge_regression, lasso_regression,
- elastic_net_regression, bayesian_ridge_regression, huber_regression,
- decision_tree_regression, random_forest_regression, gradient_boosting_regression,
- svr_regression, knn_regression, mlp_regression, polynomial_regression,
- theil_sen_regression, quantile_regression, xgboost_regression,
- lightgbm_regression, catboost_regression, pcr_regression,
- plsr_regression, ann_regression, sgd_regression
- ]
- # Header for time log file
- if not os.path.exists(time_log_file):
- with open(time_log_file, mode='w') as f:
- f.write("Model,Train Time (s),Test Time (s),Total Time (s)\n")
- # Execute each model
- for model_func in models:
- model_name = model_func.__name__.replace('_', ' ').title()
- logging.info("Starting model: %s", model_name)
- model_func()
- except Exception as e:
- logging.error("Error in main function: %s", e)
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement