Advertisement
Danila_lipatov

Ks_2samp vs Es_2samp

Oct 25th, 2022 (edited)
142
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.21 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. import scipy.stats
  4. import seaborn as sns
  5. import matplotlib.pyplot as plt
  6. import textwrap
  7.  
  8. #IMPORTANT LINKS
  9. #https://www.stata-journal.com/article.html?article=st0174
  10. #https://journals.sagepub.com/doi/pdf/10.1177/1536867X0900900307
  11. #https://www.aatbio.com/tools/kolmogorov-smirnov-k-s-test-calculator calculator of KS-2samp
  12.  
  13. def get_nan_indexes(data_frame):
  14.     indexes = []
  15.     print(data_frame)
  16.     for column in data_frame:
  17.         index = data_frame[column].index[data_frame[column].apply(np.isnan)]
  18.         if len(index):
  19.             indexes.append(index[0])
  20.     df_index = data_frame.index.values.tolist()
  21.     return [df_index.index(i) for i in set(indexes)]
  22.  
  23. x = pd.Series([-1, np.NAN, 0, -44, 5,1, -6, 45 , 235,234 ,234 ,234,5,np.NAN,35,235,235,25, 23,52,52])
  24. y = pd.Series([-7, 1, -9, 3, np.NAN, -12, 534, 534,6 ,346,2 ,6323,234, 6 ,22,362 ,np.NAN, 3 ,232, 34, 20])
  25. z = pd.Series([346,2 ,47,234, 34 ,np.NAN,362 ,345, 3 ,234234, 34, 63])
  26. d = pd.Series([32, 34, 20])
  27. #z = pd.Series([-13, 14, -15, 0, -17, -18])
  28. #v = pd.Series([-7, np.NaN, -9, 3, -11, -12, 534, 534,6 ,346,2 ])
  29. xy = pd.DataFrame({'x': x, 'y': y, 'z' : z, 'd' : d})
  30. p_v_mass_1 = {}
  31. p_v_mass = {}
  32. for column in xy.columns:
  33.     for column2 in xy.columns:
  34.         df = pd.concat([xy[column], xy[column2]], axis=1)
  35.         df = df.dropna(subset=[column])
  36.         #print(xy[column2])
  37.         #print(xy[column])
  38.         #print(df)
  39.         if column != column2:
  40.             p_v_mass_1[(column, column2)] = scipy.stats.epps_singleton_2samp(xy[column2], df[column2])[0]
  41.             #print(df, sep=' ')
  42.         else:
  43.             p_v_mass_1[(column, column2)] = (len(df[column2]) / len(xy[column2]))
  44. print(p_v_mass_1)
  45.  
  46. for column in xy.columns:
  47.     for column2 in xy.columns:
  48.         df = pd.concat([xy[column], xy[column2]], axis=1)
  49.         df = df.dropna(subset=[column])
  50.         #print(xy[column2])
  51.         #print(xy[column])
  52.         #print(df)
  53.         if column != column2:
  54.             p_v_mass[(column, column2)] = scipy.stats.ks_2samp(xy[column2], df[column2])[0]
  55.             #print(df, sep=' ')
  56.         else:
  57.             p_v_mass[(column, column2)] = (len(df[column2]) / len(xy[column2]))
  58. print(p_v_mass)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement