Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- sessions = pd.read_csv('sessions.csv')
- sessions['session_start'] = pd.to_datetime(sessions['session_start'])
- orders = pd.read_csv('book_orders.csv')
- orders['event_dt'] = pd.to_datetime(orders['event_dt'])
- # добавили второй аргумент
- def get_profiles(sessions, orders):
- profiles = (
- sessions.sort_values(by=['user_id', 'session_start'])
- .groupby('user_id')
- .agg(
- {
- 'session_start': 'first',
- 'channel': 'first',
- 'device': 'first',
- 'region': 'first',
- }
- )
- .rename(columns={'session_start': 'first_ts'})
- .reset_index()
- )
- profiles['dt'] = profiles['first_ts'].dt.date
- profiles['month'] = profiles['first_ts'].astype('datetime64[M]')
- # проверьте, есть ли ID пользователей из profiles в orders
- profiles['payer'] = profiles['user_id'].isin(profiles['user_id'].unique())
- # ваш код здесь
- return profiles
- # строим профили по двум наборам данных
- profiles = get_profiles(sessions, orders)
- print(
- profiles.groupby('region').agg({'payer':'mean'}).sort_values(by='payer',ascending=False))
- # допишите код
- # допишите код
Add Comment
Please, Sign In to add comment