Advertisement
elena1234

concatenate and merge two dataframes in Python

Jan 6th, 2023 (edited)
1,310
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.67 KB | None | 0 0
  1. # concatenate vertically two or more dataframes
  2. pd.concat([men2004, men2008], ignore_index = False, axis=0, keys = [2004,2008], names = 'Year')
  3.  
  4. # add (sum indexes): aggregate/add the total number of Gold, Silver and Bronze Medals over both editions
  5. ath_2008.set_index("Athlete", inplace= True)
  6. ath_2012.set_index("Athlete", inplace= True)
  7. add = ath_2008.add(ath_2012, fill_value=0)
  8. add.head(10)
  9.  
  10. # substract
  11. top1_df.sub(top2_df, fill_value = 0)
  12.  
  13. # outer join
  14. olimpic1.merge(olimpic2, how = 'outer', on = 'Athlete', suffixes = ('_2005','_2020'), indicator = True)
  15.  
  16. # inner join
  17. olimpic1.merge(olimpic2, how = 'inner', on = 'Athlete', suffixes = ('_2005','_2020'), indicator = True)
  18.  
  19. # outer join without intersection
  20. combo_df = olimpic1.merge(olimpic2, how = 'outer', on = 'Athlete', suffixes = ('_2005','_2020'), indicator = True)
  21. combo_df.loc[combo_df._merge != 'both']
  22.  
  23. # left join without intersection
  24. combo_df = olimpic1.merge(olimpic2, how = 'outer', on = 'Athlete', suffixes = ('_2005','_2020'), indicator = True)
  25. combo_df.loc[combo_df._merge == 'left_only']
  26.  
  27. # right join without intersection
  28. combo_df = olimpic1.merge(olimpic2, how = 'outer', on = 'Athlete', suffixes = ('_2005','_2020'), indicator = True)
  29. combo_df.loc[combo_df._merge == 'right_only']
  30.  
  31. # left join
  32. combo_df = olimpic1.merge(olimpic2, how = 'left', on = 'Athlete', suffixes = ('_2005','_2020'), indicator = True)
  33.  
  34. # join on more than one column
  35. combo_df = olimpic1.merge(olimpic2, how = 'outer', on = ['Athlete', 'Medal'], suffixes = ('_2005','_2020'), indicator = True)
  36. combo_df = olimpic1.merge(olimpic2, how = 'inner', on = ['Athlete', 'Medal'], suffixes = ('_2005','_2020'), indicator = True)
  37.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement