Advertisement
elena1234

Conditional Filtering, any() and all() in Python

Apr 18th, 2022 (edited)
250
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.60 KB | Source Code | 0 0
  1. import pandas as pd
  2.  
  3. # Check for all elements in the column "origin" whether they are equal to "europe"
  4. mask1 = cars.origin == 'europe'
  5. mask1.all()
  6.  
  7. mask2 = df.Country == 'ARG'
  8. mask2.any()
  9.  
  10. #########################################################
  11. df.loc[:,'Order_ID']
  12.  
  13. mask1 = df.Sport == "Volleyball"
  14. df.loc[mask1]
  15.  
  16. #########################################################
  17. mask1 = df['genres'].str.contains('Science Fiction')
  18. mask2 = df['genres'].str.contains('Action')
  19. mask3 = df['cast'].str.contains('Bruce Willis')
  20. df.loc[mask1 & mask2 & mask3, ['title', 'genres', 'cast', 'vote_average']].sort_values(by=['vote_average'],ascending = False)
  21.  
  22.  
  23. ###########################################################
  24. # Filter by Date
  25. mask1 = df['release_date'].dt.date.astype(str) >= '2010'
  26. mask2 = df['release_date'].dt.date.astype(str) <= '2015'
  27. mask3 = df['production_companies'].str.contains('Pixar')
  28. df.loc[mask1 & mask2 & mask3].sort_values(by=['revenue_musd'], ascending = False)
  29.  
  30. mask4 = df.release_date.between(1960, 1969, inclusive = 'both')
  31. df.loc[mask4]
  32.  
  33. mask5 = df.release_date.isin([1960,1961,1962,1964]) # isin
  34. df.loc[mask5]
  35.  
  36. mask6 = df.release_date >= 1992
  37. df.loc[mask6]
  38. df.loc[~mask6]
  39.  
  40.  
  41. ##########################################################
  42. # Filter by many conditions
  43. mask1 = df['genres'].str.contains('Action')
  44. mask2 = df['genres'].str.contains('Thriller')
  45. mask3 = df['spoken_languages'].str.contains('English')
  46. df.loc[df['vote_average'] >= 7.5].loc[(mask1 | mask2) & mask3].sort_values(by=['release_date'], ascending = False)
  47.  
  48.  
  49. mask1 = df.Country.isin(["ITA", "FRA", "ESP", "USA"])
  50. df.loc[mask1]
  51.  
  52.  
  53. ###############################################################
  54. mask1 = titanic.sex == male
  55. mask2 = titanic.dtypes == object
  56.  
  57. titanic.loc[:, ~mask2] # gen only non object values (numeric)
  58. titanic.loc[mask1,~maks2] # only males with numeric data
  59. male_survived = titanic.loc[mask1 & mask2, :]
  60. titanic.loc[mask1 | mask2, ['sex','survived']]
  61.  
  62.      
  63. ###########################################################
  64. df_auto =  df_auto.loc[df_auto.Kilometes > 1111]
  65. df_auto =  df_auto.loc[df_auto['Years Automobile'] > 0)]
  66.  
  67. titanic.loc[titanic.sex=="Male", Age] # get only Age column        
  68.  
  69.  
  70. ###########################################################
  71. index_babies = titanic.loc[titanic.age < 1, 'age'].index
  72. titanic.loc[titanic.age < 1, 'age'] = 1 # where age < 1 changed the value to 1
  73. titanic.loc[index_babies] # how to check the result
  74.  
  75.  
  76. ############################################################
  77. not_73_74 = cars.loc[~cars.model_year.isin ([73,74]), ['mpg', 'name']] # not
  78.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement