code

import numpy as np
from sklearn.linear_model import LassoCV, Lasso
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.tools import add_constant

# Step 1: Generate synthetic data
np.random.seed(42)
n, p = 100, 20
X = np.random.normal(0, 1, size=(n, p))
beta = np.zeros(p)
beta[:5] = [2, -3, 1.5, 0, 4]  # True non-zero coefficients
y = X @ beta + np.random.normal(0, 1, size=n)

# Step 2: Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Fit LASSO model using cross-validation
lasso_cv = LassoCV(cv=5, random_state=42).fit(X_train, y_train)
selected_features = np.where(lasso_cv.coef_ != 0)[0]
print("Selected features by LASSO:", selected_features)

# Step 4: Initialize variables for bootstrapping
n_bootstraps = 1000
coverage_results = []

# Step 5: Perform bootstrapping for each selected feature
for feature in selected_features:
    ci_coverage = 0

    for _ in range(n_bootstraps):
        # Resample data
        X_resampled, y_resampled = resample(X_train, y_train, random_state=None)

        # Refit LASSO model on resampled data
        lasso = Lasso(alpha=lasso_cv.alpha_).fit(X_resampled, y_resampled)

        # Check if the feature is selected in the resampled data
        if lasso.coef_[feature] != 0:
            # Fit an OLS model for the selected feature
            X_selected = X_resampled[:, feature]
            X_selected = add_constant(X_selected)
            ols = OLS(y_resampled, X_selected).fit()

            # Compute the confidence interval for the feature
            ci = ols.conf_int(alpha=0.05)

            # Check if the true coefficient lies within the confidence interval
            if ci[1][0] <= beta[feature] <= ci[1][1]:
                ci_coverage += 1

    # Store the coverage probability for this feature
    coverage_results.append(ci_coverage / n_bootstraps)

# Step 6: Print coverage probabilities
for i, feature in enumerate(selected_features):
    print(f"Feature {feature}: Coverage Probability = {coverage_results[i]:.3f}")