Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from statsmodels.stats.outliers_influence import variance_inflation_factor
- from statsmodels.tools.tools import add_constant
- import pandas as pd
- from patsy import dmatrices
- import matplotlib.pyplot as plt
- import numpy as np
- import seaborn as sns
- #https://www.codecamp.ru/blog/how-to-calculate-vif-in-python/ for each independent model
- #https://future-chem.com/esol-reg-aic/
- #https://stackoverflow.com/questions/42658379/variance-inflation-factor-in-python
- #https://www.hse.ru/data/2010/10/22/1222676695/%D0%A7%D0%B0%D1%81%D1%82%D1%8C%204%20%D0%9C%D1%83%D0%BB%D1%8C%D1%82%D0%B8%D0%BA%D0%BE%D0%BB%D0%BB%D0%B8%D0%BD%D0%B5%D0%B0%D1%80%D0%BD%D0%BE%D1%81%D1%82%D1%8C..pdf
- """df = pd.DataFrame(
- {'a': [1, 1, 2, 3, 4],
- 'b': [2, 2, 3, 2, 1],
- 'c': [4, 6, 7, 8, 9],
- 'd': [4, 3, 4, 5, 4]}
- )"""
- plt.figure(figsize=(6, 6))
- file_name = "Test"
- df = pd.read_excel(u"test_data", sheet_name="Sheet1")
- df_new = df.dropna()
- stop = 0
- fig = plt.figure(figsize=(7, 7))
- col_names = []
- for x in df_new.columns:
- col_names.append(x)
- #matrix = sns.heatmap(df_cor, annot=True)
- #plt.savefig('heatmap_new_1.png', dpi=300, bbox_inches='tight')
- #ds=(np.linalg.inv(df_cor))
- df_cor = df_new.corr()
- dfs = pd.DataFrame(np.linalg.inv(df_cor), df_cor.columns, df_cor.index)
- for x in col_names:
- max_int = max(np.diag(dfs))
- if dfs[f'{x}'] == max(np.diag(dfs)) & dfs[f'{x}'] >= 10)]:
- df_new.drop(dfs[f'{x}'] == max(np.diag(dfs)))
- df_cor = df_new.corr()
- dfs = pd.DataFrame(np.linalg.inv(df_cor), df_cor.columns, df_cor.index)
- #matrix_1 = sns.heatmap(dfs, annot=True)
- #plt.savefig('heatmap_5.png', dpi=300, bbox_inches='tight')
- print(df)
- """df = pd.DataFrame({'rating': [90, 85, 82, 88, 94, 90, 76, 75, 87, 86],
- 'points': [25, 20, 14, 16, 27, 20, 12, 15, 14, 19],
- 'assists': [5, 7, 7, 8, 5, 7, 6, 9, 9, 5],
- 'rebounds': [11, 8, 10, 6, 6, 9, 6, 10, 10, 7]})
- """
- X = add_constant(df)
- ds=(np.linalg.inv(np.corrcoef(df, rowvar=0)))
- print(ds)
- #Источник: https://pythonpip.ru/examples/obnaruzhenie-multikollinearnosti-vif-v-python
- #print (X)
- y, D = dmatrices('b ~ a+c+b', data=df, return_type='dataframe')
- #calculate VIF for each explanatory variable
- vif = pd.DataFrame()
- vif['VIF'] = [variance_inflation_factor(D.values, i) for i in range(D.shape[1])]
- vif['variable'] = D.columns
- vif
- stop = 0
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement