Advertisement
Natalia_L

code

Dec 19th, 2024
51
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.19 KB | None | 0 0
  1. import numpy as np
  2. from sklearn.linear_model import LassoCV, Lasso
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.utils import resample
  5. from statsmodels.regression.linear_model import OLS
  6. from statsmodels.tools.tools import add_constant
  7.  
  8. # Step 1: Generate synthetic data
  9. np.random.seed(42)
  10. n, p = 100, 20
  11. X = np.random.normal(0, 1, size=(n, p))
  12. beta = np.zeros(p)
  13. beta[:5] = [2, -3, 1.5, 0, 4]  # True non-zero coefficients
  14. y = X @ beta + np.random.normal(0, 1, size=n)
  15.  
  16. # Step 2: Split data into train and test sets
  17. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
  18.  
  19. # Step 3: Fit LASSO model using cross-validation
  20. lasso_cv = LassoCV(cv=5, random_state=42).fit(X_train, y_train)
  21. selected_features = np.where(lasso_cv.coef_ != 0)[0]
  22. print("Selected features by LASSO:", selected_features)
  23.  
  24. # Step 4: Initialize variables for bootstrapping
  25. n_bootstraps = 1000
  26. coverage_results = []
  27.  
  28. # Step 5: Perform bootstrapping for each selected feature
  29. for feature in selected_features:
  30.     ci_coverage = 0
  31.  
  32.     for _ in range(n_bootstraps):
  33.         # Resample data
  34.         X_resampled, y_resampled = resample(X_train, y_train, random_state=None)
  35.  
  36.         # Refit LASSO model on resampled data
  37.         lasso = Lasso(alpha=lasso_cv.alpha_).fit(X_resampled, y_resampled)
  38.  
  39.         # Check if the feature is selected in the resampled data
  40.         if lasso.coef_[feature] != 0:
  41.             # Fit an OLS model for the selected feature
  42.             X_selected = X_resampled[:, feature]
  43.             X_selected = add_constant(X_selected)
  44.             ols = OLS(y_resampled, X_selected).fit()
  45.  
  46.             # Compute the confidence interval for the feature
  47.             ci = ols.conf_int(alpha=0.05)
  48.  
  49.             # Check if the true coefficient lies within the confidence interval
  50.             if ci[1][0] <= beta[feature] <= ci[1][1]:
  51.                 ci_coverage += 1
  52.  
  53.     # Store the coverage probability for this feature
  54.     coverage_results.append(ci_coverage / n_bootstraps)
  55.  
  56. # Step 6: Print coverage probabilities
  57. for i, feature in enumerate(selected_features):
  58.     print(f"Feature {feature}: Coverage Probability = {coverage_results[i]:.3f}")
  59.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement