Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Group by Year
- df_year_quantity = df_category_date_quantity.groupby(df_category_date_quantity.TransactionDate.dt.year).agg({'Quantity': 'sum'}).reset_index()
- df_year_quantity.head()
- # Group by Year and Month
- df_category_date_quantity.assign(yr = df_category_date_quantity['TransactionDate'].dt.year, mnth = df_category_date_quantity['TransactionDate'].dt.month).groupby(['yr', 'mnth']).agg({'Quantity': 'sum'}).reset_index()
- # Grouping by Category, Year and Month using 'assign'
- df_category_year_month_quantity = df_category_date_quantity.assign(category = df_category_date_quantity['ACM Application L2 Ver2'],yr = df_category_date_quantity['TransactionDate'].dt.year, mnth = df_category_date_quantity['TransactionDate'].dt.month).groupby(['category', 'yr', 'mnth']).agg({'Quantity': 'sum'}).reset_index()
- df_category_year_month_quantity
- ####################################################################
- directors = df.groupby('director').agg({'title' : 'count', 'revenue_musd' : ['sum', 'mean'], 'vote_count' : 'mean'})
- directors
- directors.nlargest(20, ('title', 'count'))
- directors.nlargest(20, ('revenue_musd', 'sum'))
- directors.nlargest(20, ('revenue_musd', 'mean'))
- directors.nlargest(20, ('vote_count', 'mean'))
- titanic.groupby('sex').agg(survived_total = ('survived_column', 'sum'), survival_rate = ('survived_column', 'mean'), mean_age = ('age', 'mean')) # we don't have multi indexes, we have separаted columns
- cars.groupby(['model_year', 'origin']).agg(mean_mpg = ('mpg', 'mean'),
- min_mpg = ('mpg', 'min'),
- max_mpg = ('mpg', 'max'))
- ##############################################################
- df.groupby('Franchise').budget_musd.mean()
- df.groupby('Franchise').revenue_musd.mean().nlargest(n=20)
- df.groupby('Franchise').ROI.median()
- df.groupby('Franchise').popularity.mean().sort_values(ascending = False)
- new_df = df.groupby('Franchise')[['budget_musd', 'ROI']].sum()
- new_df.plot(kind = 'bar', subplots = True, figsize = (8,15), fontsize = 13) # plotting after grouping and applying function
- plt.show()
- ################################################################
- # if you want a new column in a dataframe for specific groups, use transform method
- titanic['group_survived_rate'] = titanic.groupby(['sex','pclass']).survived.transform('mean')
- titanic.head()
- ####################################################################
- # Use apply on groups
- def five_oldest_survived(group):
- return group[group.survived ==1].nlargest(5, 'age')
- titanic.groupby('sex').apply(five_oldest_survived)
- def two_most_fuel_efficient_cars(group):
- return group.nlargest(2, 'mpg').loc[:, ["name", "mpg"]]
- df_the_most_fuel_eff_cars = cars.groupby(['model_year', 'origin']).apply(two_most_fuel_efficient_cars)
- #####################################################################
- # How to expand column after grouping and create visualization
- df_year_origin = cars.groupby(['model_year', 'origin']).mpg.mean().unstack().round(2)
- cars.groupby(['model_year', 'origin']).mpg.mean().unstack().T # transposing rows and columns
- df_year_origin.plot(kind = 'bar', subplots = True, sharex = False, figsize = (8,17), fontsize = 12)
- plt.ylabel('avg_mpg per year')
- plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement