Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- import scipy.stats
- import seaborn as sns
- import matplotlib.pyplot as plt
- import textwrap
- #IMPORTANT LINKS
- #https://www.stata-journal.com/article.html?article=st0174
- #https://journals.sagepub.com/doi/pdf/10.1177/1536867X0900900307
- #https://www.aatbio.com/tools/kolmogorov-smirnov-k-s-test-calculator calculator of KS-2samp
- def get_nan_indexes(data_frame):
- indexes = []
- print(data_frame)
- for column in data_frame:
- index = data_frame[column].index[data_frame[column].apply(np.isnan)]
- if len(index):
- indexes.append(index[0])
- df_index = data_frame.index.values.tolist()
- return [df_index.index(i) for i in set(indexes)]
- x = pd.Series([-1, np.NAN, 0, -44, 5,1, -6, 45 , 235,234 ,234 ,234,5,np.NAN,35,235,235,25, 23,52,52])
- y = pd.Series([-7, 1, -9, 3, np.NAN, -12, 534, 534,6 ,346,2 ,6323,234, 6 ,22,362 ,np.NAN, 3 ,232, 34, 20])
- z = pd.Series([346,2 ,47,234, 34 ,np.NAN,362 ,345, 3 ,234234, 34, 63])
- d = pd.Series([32, 34, 20])
- #z = pd.Series([-13, 14, -15, 0, -17, -18])
- #v = pd.Series([-7, np.NaN, -9, 3, -11, -12, 534, 534,6 ,346,2 ])
- xy = pd.DataFrame({'x': x, 'y': y, 'z' : z, 'd' : d})
- p_v_mass_1 = {}
- p_v_mass = {}
- for column in xy.columns:
- for column2 in xy.columns:
- df = pd.concat([xy[column], xy[column2]], axis=1)
- df = df.dropna(subset=[column])
- #print(xy[column2])
- #print(xy[column])
- #print(df)
- if column != column2:
- p_v_mass_1[(column, column2)] = scipy.stats.epps_singleton_2samp(xy[column2], df[column2])[0]
- #print(df, sep=' ')
- else:
- p_v_mass_1[(column, column2)] = (len(df[column2]) / len(xy[column2]))
- print(p_v_mass_1)
- for column in xy.columns:
- for column2 in xy.columns:
- df = pd.concat([xy[column], xy[column2]], axis=1)
- df = df.dropna(subset=[column])
- #print(xy[column2])
- #print(xy[column])
- #print(df)
- if column != column2:
- p_v_mass[(column, column2)] = scipy.stats.ks_2samp(xy[column2], df[column2])[0]
- #print(df, sep=' ')
- else:
- p_v_mass[(column, column2)] = (len(df[column2]) / len(xy[column2]))
- print(p_v_mass)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement