Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from sklearn.linear_model import LassoCV, Lasso
- from sklearn.model_selection import train_test_split
- from sklearn.utils import resample
- from statsmodels.regression.linear_model import OLS
- from statsmodels.tools.tools import add_constant
- # Step 1: Generate synthetic data
- np.random.seed(42)
- n, p = 100, 20
- X = np.random.normal(0, 1, size=(n, p))
- beta = np.zeros(p)
- beta[:5] = [2, -3, 1.5, 0, 4] # True non-zero coefficients
- y = X @ beta + np.random.normal(0, 1, size=n)
- # Step 2: Split data into train and test sets
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
- # Step 3: Fit LASSO model using cross-validation
- lasso_cv = LassoCV(cv=5, random_state=42).fit(X_train, y_train)
- selected_features = np.where(lasso_cv.coef_ != 0)[0]
- print("Selected features by LASSO:", selected_features)
- # Step 4: Initialize variables for bootstrapping
- n_bootstraps = 1000
- coverage_results = []
- # Step 5: Perform bootstrapping for each selected feature
- for feature in selected_features:
- ci_coverage = 0
- for _ in range(n_bootstraps):
- # Resample data
- X_resampled, y_resampled = resample(X_train, y_train, random_state=None)
- # Refit LASSO model on resampled data
- lasso = Lasso(alpha=lasso_cv.alpha_).fit(X_resampled, y_resampled)
- # Check if the feature is selected in the resampled data
- if lasso.coef_[feature] != 0:
- # Fit an OLS model for the selected feature
- X_selected = X_resampled[:, feature]
- X_selected = add_constant(X_selected)
- ols = OLS(y_resampled, X_selected).fit()
- # Compute the confidence interval for the feature
- ci = ols.conf_int(alpha=0.05)
- # Check if the true coefficient lies within the confidence interval
- if ci[1][0] <= beta[feature] <= ci[1][1]:
- ci_coverage += 1
- # Store the coverage probability for this feature
- coverage_results.append(ci_coverage / n_bootstraps)
- # Step 6: Print coverage probabilities
- for i, feature in enumerate(selected_features):
- print(f"Feature {feature}: Coverage Probability = {coverage_results[i]:.3f}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement