Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- path_right = r"C:\Users\Max-13700kf\PycharmProjects\pythonProject\right_fix.csv"
- path_left = r"C:\Users\Max-13700kf\PycharmProjects\pythonProject\left_fix.csv"
- elagin_right_df = pd.read_csv(path_right) # иногда нужно указать sep
- elagin_left_df = pd.read_csv(path_left)
- #
- # 1
- print(elagin_left_df.head())
- print('---------------------')
- # 2
- print(elagin_right_df.isnull().any(), end='\n-------------\n')
- print(elagin_right_df.info(), end='\n-------------\n')
- print(elagin_left_df.isnull().any(), end='\n-------------\n')
- print(elagin_left_df.info(), end='\n-------------\n')
- # 3
- print(elagin_right_df.duplicated(), end='\n-------------\n')
- print(elagin_left_df.duplicated(), end='\n-------------\n')
- #
- # #4
- elagin_left_df = elagin_left_df.drop_duplicates()
- df = pd.merge(elagin_left_df, elagin_right_df, how='outer',
- on=['id', 'male', 'age', 'is_spb', 'married', 'children', 'elagin_attractiv'])
- print(df.info())
- # 5
- df = df.sort_values(['education', 'age'], ascending=[True, True])
- print(df.head(20))
- # 6
- group = df.groupby(['elagin_attractiv'])
- print(group.groups)
- for i in group.groups.keys():
- print(f'{i}:{len(group.groups[i])}')
- print('-------')
- # 7
- df['att2'] = np.nan
- df['att2'] = df['att2'].mask(df['elagin_attractiv'] < 4, 0)
- df['att2'] = df['att2'].mask(df['elagin_attractiv'] >= 4, 1)
- group = df.groupby(['att2'])
- for i in group.groups.keys():
- print(f'{i}:{len(group.groups[i])}')
- #8
- print(round(df.groupby(['elagin_attractiv']).age.mean(), 2))
- df.to_csv('res.csv')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement