Advertisement
mayankjoin3

Ml code 24 algos full code Issue with export

Nov 8th, 2024
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.23 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import os
  4. import time
  5. import logging
  6. from datetime import datetime
  7.  
  8.  
  9. # General Imports
  10. import pandas as pd
  11. import numpy as np
  12. from sklearn.model_selection import train_test_split, KFold
  13. from sklearn.preprocessing import MinMaxScaler
  14. from sklearn.metrics import mean_squared_error
  15.  
  16. # 1. Linear Regression
  17. from sklearn.linear_model import LinearRegression
  18.  
  19. # 2. Polynomial Regression (using PolynomialFeatures)
  20. from sklearn.preprocessing import PolynomialFeatures
  21.  
  22. # 3. Ridge Regression
  23. from sklearn.linear_model import Ridge
  24.  
  25. # 4. Lasso Regression
  26. from sklearn.linear_model import Lasso
  27.  
  28. # 5. Elastic Net Regression
  29. from sklearn.linear_model import ElasticNet
  30.  
  31. # 6. Bayesian Ridge Regression
  32. from sklearn.linear_model import BayesianRidge
  33.  
  34. # 7. Ordinary Least Squares Regression (OLS)
  35. # OLS is also implemented by LinearRegression, so no specific import needed
  36.  
  37. # 8. Huber Regression
  38. from sklearn.linear_model import HuberRegressor
  39.  
  40. # 9. Theil-Sen Estimator
  41. from sklearn.linear_model import TheilSenRegressor
  42.  
  43. # 10. Quantile Regression
  44. from sklearn.linear_model import QuantileRegressor
  45.  
  46. # 11. Decision Tree Regression
  47. from sklearn.tree import DecisionTreeRegressor
  48.  
  49. # 12. Random Forest Regression
  50. from sklearn.ensemble import RandomForestRegressor
  51.  
  52. # 13. Gradient Boosting Regression
  53. from sklearn.ensemble import GradientBoostingRegressor
  54.  
  55. # 14. XGBoost Regression
  56. import xgboost as xgb
  57.  
  58. # 15. LightGBM Regression
  59. import lightgbm as lgb
  60.  
  61. # 16. CatBoost Regression
  62. from catboost import CatBoostRegressor
  63.  
  64. # 17. Support Vector Regression (SVR)
  65. from sklearn.svm import SVR
  66.  
  67. # 18. K-Nearest Neighbors Regression (KNNR)
  68. from sklearn.neighbors import KNeighborsRegressor
  69.  
  70. # 19. Principal Component Regression (PCR)
  71. from sklearn.decomposition import PCA
  72. from sklearn.linear_model import LinearRegression
  73.  
  74. # 20. Partial Least Squares Regression (PLSR)
  75. from sklearn.cross_decomposition import PLSRegression
  76.  
  77. # 21. Artificial Neural Networks (ANN) Regression
  78. from sklearn.neural_network import MLPRegressor
  79.  
  80. # 22. Multi-layer Perceptron (MLP) Regression
  81. from sklearn.neural_network import MLPRegressor
  82.  
  83. # 23. Stochastic Gradient Descent (SGD) Regression
  84. from sklearn.linear_model import SGDRegressor
  85.  
  86. # 24. Bayesian Regression
  87. # This is another term that can refer to several models, including BayesianRidge
  88. from sklearn.linear_model import BayesianRidge
  89.  
  90.  
  91. # Configurations
  92. k_fold = 10
  93. dataset_percent = 100
  94. input_file_path = 'input/input.csv'
  95. output_folder = 'output'
  96. time_log_file = 'time.csv'
  97. log_file_name = f"log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
  98. logging.basicConfig(filename=log_file_name, level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
  99.  
  100. # Create output folder if it doesn't exist
  101. os.makedirs(output_folder, exist_ok=True)
  102.  
  103. # Load and preprocess data
  104. try:
  105.     data = pd.read_csv(input_file_path)
  106.     data = data.sample(frac=dataset_percent / 100)  # Use only specified percentage of data
  107.     X = data.iloc[:, :-1]
  108.     y = data.iloc[:, -1]
  109.     scaler = MinMaxScaler()
  110.     X_scaled = scaler.fit_transform(X)
  111.     y_scaled = scaler.fit_transform(y.values.reshape(-1, 1)).flatten()
  112. except Exception as e:
  113.     logging.error("Error loading and preprocessing data: %s", e)
  114.  
  115. # Define K-Fold cross-validation
  116. kf = KFold(n_splits=k_fold, shuffle=True, random_state=42)
  117.  
  118. # Define model functions
  119. def train_model(model, model_name):
  120.     results = {'actual': [], 'predicted': []}
  121.     total_train_time, total_test_time = 0, 0
  122.  
  123.     for fold, (train_index, test_index) in enumerate(kf.split(X_scaled)):
  124.         try:
  125.             # Train/Test split
  126.             X_train, X_test = X_scaled[train_index], X_scaled[test_index]
  127.             y_train, y_test = y_scaled[train_index], y_scaled[test_index]
  128.  
  129.             # Training
  130.             start_train = time.time()
  131.             model.fit(X_train, y_train)
  132.             train_time = time.time() - start_train
  133.             total_train_time += train_time
  134.  
  135.             # Prediction
  136.             start_test = time.time()
  137.             y_pred_scaled = model.predict(X_test)
  138.             test_time = time.time() - start_test
  139.             total_test_time += test_time
  140.  
  141.             # Inverse transform predictions to original scale
  142.             y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
  143.             y_test_orig = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
  144.  
  145.             # Save results
  146.             results['actual'].extend(y_test_orig)
  147.             results['predicted'].extend(y_pred)
  148.  
  149.             logging.info(f"{model_name} - Fold {fold + 1} completed: Train time {train_time:.3f}s, Test time {test_time:.3f}s")
  150.        
  151.         except Exception as e:
  152.             logging.error(f"{model_name} - Error in fold {fold + 1}: {e}")
  153.             continue  # Proceed to next fold in case of an error
  154.  
  155.     # Save results to CSV
  156.     results_df = pd.DataFrame(results)
  157.     results_file = os.path.join(output_folder, f"{model_name}.csv")
  158.     if os.path.exists(results_file):
  159.         results_df.to_csv(results_file, mode='a', header=False, index=False)
  160.     else:
  161.         results_df.to_csv(results_file, index=False)
  162.  
  163.     # Log timing information
  164.     total_execution_time = total_train_time + total_test_time
  165.     with open(time_log_file, mode='a') as f:
  166.         f.write(f"{model_name},{total_train_time:.3f},{total_test_time:.3f},{total_execution_time:.3f}\n")
  167.     logging.info(f"{model_name} - Total time: Train {total_train_time:.3f}s, Test {total_test_time:.3f}s, Total {total_execution_time:.3f}s")
  168.  
  169. # Define regression models
  170. def linear_regression():
  171.     train_model(LinearRegression(), "Linear Regression")
  172.  
  173. def ridge_regression():
  174.     train_model(Ridge(), "Ridge Regression")
  175.  
  176. def lasso_regression():
  177.     train_model(Lasso(), "Lasso Regression")
  178.  
  179. def elastic_net_regression():
  180.     train_model(ElasticNet(), "Elastic Net Regression")
  181.  
  182. def bayesian_ridge_regression():
  183.     train_model(BayesianRidge(), "Bayesian Ridge Regression")
  184.  
  185. def huber_regression():
  186.     train_model(HuberRegressor(), "Huber Regression")
  187.  
  188. def decision_tree_regression():
  189.     train_model(DecisionTreeRegressor(), "Decision Tree Regression")
  190.  
  191. def random_forest_regression():
  192.     train_model(RandomForestRegressor(), "Random Forest Regression")
  193.  
  194. def gradient_boosting_regression():
  195.     train_model(GradientBoostingRegressor(), "Gradient Boosting Regression")
  196.  
  197. def svr_regression():
  198.     train_model(SVR(), "Support Vector Regression (SVR)")
  199.  
  200. def knn_regression():
  201.     train_model(KNeighborsRegressor(), "K-Nearest Neighbors Regression (KNNR)")
  202.  
  203. def mlp_regression():
  204.     train_model(MLPRegressor(max_iter=1000), "Multi-layer Perceptron (MLP) Regression")
  205.  
  206. # 2. Polynomial Regression (using PolynomialFeatures)
  207. from sklearn.preprocessing import PolynomialFeatures
  208.  
  209. def polynomial_regression(degree=2):
  210.     poly = PolynomialFeatures(degree=degree)
  211.     X_poly = poly.fit_transform(X_scaled)  # Transform features to polynomial features
  212.     model = LinearRegression()
  213.     train_model(model, f"Polynomial Regression (Degree {degree})")
  214.  
  215. # 8. Theil-Sen Estimator
  216. from sklearn.linear_model import TheilSenRegressor
  217.  
  218. def theil_sen_regression():
  219.     train_model(TheilSenRegressor(), "Theil-Sen Estimator")
  220.  
  221. # 10. Quantile Regression
  222. from sklearn.linear_model import QuantileRegressor
  223.  
  224. def quantile_regression(alpha=0.5):
  225.     train_model(QuantileRegressor(quantile=alpha), f"Quantile Regression (alpha={alpha})")
  226.  
  227. # 14. XGBoost Regression
  228. import xgboost as xgb
  229.  
  230. def xgboost_regression():
  231.     model = xgb.XGBRegressor(objective='reg:squarederror')
  232.     train_model(model, "XGBoost Regression")
  233.  
  234. # 15. LightGBM Regression
  235. import lightgbm as lgb
  236.  
  237. def lightgbm_regression():
  238.     model = lgb.LGBMRegressor()
  239.     train_model(model, "LightGBM Regression")
  240.  
  241. # 16. CatBoost Regression
  242. from catboost import CatBoostRegressor
  243.  
  244. def catboost_regression():
  245.     model = CatBoostRegressor(silent=True)
  246.     train_model(model, "CatBoost Regression")
  247.  
  248. # 19. Principal Component Regression (PCR)
  249. from sklearn.decomposition import PCA
  250. from sklearn.linear_model import LinearRegression
  251.  
  252. def pcr_regression(n_components=2):
  253.     pca = PCA(n_components=n_components)
  254.     X_pca = pca.fit_transform(X_scaled)
  255.     model = LinearRegression()
  256.     train_model(model, f"Principal Component Regression (n_components={n_components})")
  257.  
  258. # 20. Partial Least Squares Regression (PLSR)
  259. from sklearn.cross_decomposition import PLSRegression
  260.  
  261. def plsr_regression():
  262.     model = PLSRegression()
  263.     train_model(model, "Partial Least Squares Regression (PLSR)")
  264.  
  265. # 21. Artificial Neural Networks (ANN) Regression
  266. from sklearn.neural_network import MLPRegressor
  267.  
  268. def ann_regression():
  269.     model = MLPRegressor(hidden_layer_sizes=(50, 50), max_iter=1000)
  270.     train_model(model, "Artificial Neural Networks Regression")
  271.  
  272. # 23. Stochastic Gradient Descent (SGD) Regression
  273. from sklearn.linear_model import SGDRegressor
  274.  
  275. def sgd_regression():
  276.     model = SGDRegressor()
  277.     train_model(model, "Stochastic Gradient Descent Regression")
  278.  
  279.  
  280. # Define main function
  281. def main():
  282.     try:
  283.         # List of model functions
  284.         models = [
  285.             linear_regression, ridge_regression, lasso_regression,
  286.             elastic_net_regression, bayesian_ridge_regression, huber_regression,
  287.             decision_tree_regression, random_forest_regression, gradient_boosting_regression,
  288.             svr_regression, knn_regression, mlp_regression, polynomial_regression,
  289.             theil_sen_regression, quantile_regression, xgboost_regression,
  290.             lightgbm_regression, catboost_regression, pcr_regression,
  291.             plsr_regression, ann_regression, sgd_regression
  292.         ]
  293.  
  294.         # Header for time log file
  295.         if not os.path.exists(time_log_file):
  296.             with open(time_log_file, mode='w') as f:
  297.                 f.write("Model,Train Time (s),Test Time (s),Total Time (s)\n")
  298.  
  299.         # Execute each model
  300.         for model_func in models:
  301.             model_name = model_func.__name__.replace('_', ' ').title()
  302.             logging.info("Starting model: %s", model_name)
  303.             model_func()
  304.    
  305.     except Exception as e:
  306.         logging.error("Error in main function: %s", e)
  307.  
  308. if __name__ == "__main__":
  309.     main()
  310.  
  311.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement