Advertisement
Danila_lipatov

Untitled

Nov 22nd, 2022 (edited)
151
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.33 KB | None | 0 0
  1. from statsmodels.stats.outliers_influence import variance_inflation_factor
  2. from statsmodels.tools.tools import add_constant
  3. import pandas as pd
  4. from patsy import dmatrices
  5. import matplotlib.pyplot as plt
  6. import numpy as np
  7. import seaborn as sns
  8. #https://www.codecamp.ru/blog/how-to-calculate-vif-in-python/ for each independent model
  9. #https://future-chem.com/esol-reg-aic/
  10. #https://stackoverflow.com/questions/42658379/variance-inflation-factor-in-python
  11. #https://www.hse.ru/data/2010/10/22/1222676695/%D0%A7%D0%B0%D1%81%D1%82%D1%8C%204%20%D0%9C%D1%83%D0%BB%D1%8C%D1%82%D0%B8%D0%BA%D0%BE%D0%BB%D0%BB%D0%B8%D0%BD%D0%B5%D0%B0%D1%80%D0%BD%D0%BE%D1%81%D1%82%D1%8C..pdf
  12. """df = pd.DataFrame(
  13. {'a': [1, 1, 2, 3, 4],
  14. 'b': [2, 2, 3, 2, 1],
  15. 'c': [4, 6, 7, 8, 9],
  16. 'd': [4, 3, 4, 5, 4]}
  17. )"""
  18. plt.figure(figsize=(6, 6))
  19. file_name = "Test"
  20. df = pd.read_excel(u"test_data", sheet_name="Sheet1")
  21. df_new = df.dropna()
  22. stop = 0
  23. fig = plt.figure(figsize=(7, 7))
  24. col_names = []
  25.  
  26. for x in df_new.columns:
  27. col_names.append(x)
  28.  
  29. #matrix = sns.heatmap(df_cor, annot=True)
  30.  
  31. #plt.savefig('heatmap_new_1.png', dpi=300, bbox_inches='tight')
  32.  
  33. #ds=(np.linalg.inv(df_cor))
  34. df_cor = df_new.corr()
  35. dfs = pd.DataFrame(np.linalg.inv(df_cor), df_cor.columns, df_cor.index)
  36.  
  37. for x in col_names:
  38. max_int = max(np.diag(dfs))
  39. if dfs[f'{x}'] == max(np.diag(dfs)) & dfs[f'{x}'] >= 10)]:
  40. df_new.drop(dfs[f'{x}'] == max(np.diag(dfs)))
  41. df_cor = df_new.corr()
  42. dfs = pd.DataFrame(np.linalg.inv(df_cor), df_cor.columns, df_cor.index)
  43.  
  44. #matrix_1 = sns.heatmap(dfs, annot=True)
  45.  
  46. #plt.savefig('heatmap_5.png', dpi=300, bbox_inches='tight')
  47.  
  48. print(df)
  49. """df = pd.DataFrame({'rating': [90, 85, 82, 88, 94, 90, 76, 75, 87, 86],
  50. 'points': [25, 20, 14, 16, 27, 20, 12, 15, 14, 19],
  51. 'assists': [5, 7, 7, 8, 5, 7, 6, 9, 9, 5],
  52. 'rebounds': [11, 8, 10, 6, 6, 9, 6, 10, 10, 7]})
  53. """
  54. X = add_constant(df)
  55. ds=(np.linalg.inv(np.corrcoef(df, rowvar=0)))
  56. print(ds)
  57. #Источник: https://pythonpip.ru/examples/obnaruzhenie-multikollinearnosti-vif-v-python
  58. #print (X)
  59.  
  60. y, D = dmatrices('b ~ a+c+b', data=df, return_type='dataframe')
  61. #calculate VIF for each explanatory variable
  62. vif = pd.DataFrame()
  63. vif['VIF'] = [variance_inflation_factor(D.values, i) for i in range(D.shape[1])]
  64. vif['variable'] = D.columns
  65. vif
  66.  
  67. stop = 0
  68.  
  69.  
  70.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement