Advertisement
elena1234

outliers detection and handling in Python

Jan 5th, 2023 (edited)
1,044
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.38 KB | None | 0 0
  1. cars.plot(figsize = (15,15), subplots = True)
  2. plt.show()
  3.  
  4. cars.hist(figsize = (20,12), bins = 100)
  5. plt.show()
  6.  
  7. #####################################################
  8. # Outliers Detection
  9. titanic.describe() # use describe method to see minimum and maximum values
  10.  
  11. plt.figure(figsize = (12,6)) # boxplot
  12. titanic.boxplot("Age")
  13. plt.show()
  14.  
  15. plt.figure(figsize = (12,6)) # line plot
  16. titanic.Age.plot()
  17. plt.show()
  18.  
  19. titanic.Age.sort_values(ascending = False) # sorting to see if there are any outliers
  20. titanic.Age.sort_values(ascending = True)
  21.  
  22. titanic.loc[titanic.Age > 90] # filter df only with outliers
  23.  
  24. #############################################
  25. # Outliers Handling
  26. index_outliers = titanic.loc[titanic.Age > 90].index
  27. index_outliers
  28.  
  29. titanic.loc[titanic.Age > 90, "Age"] = titanic.loc[titanic.Age > 90, "Age"] / 10 # correction of the values
  30. titanic.loc[index_outliers]
  31.  
  32. titanic.loc[217, "Age"] = 42.0
  33.  
  34. plt.figure(figsize = (12,6)) # last checking
  35. titanic.Age.plot()
  36. plt.show()
  37.  
  38. ##########################################################################
  39. # Change all values with outliers in dataframe
  40. titanic.fare.plot(figsize = (12,8))
  41. plt.show()
  42. titanic.fare.describe()
  43. titanic.fare.sort_values(ascending = False)
  44.  
  45. fare_cap = 250
  46. titanic.loc[titanic.fare > fare_cap, "fare"] = fare_cap
  47.  
  48. fare_floor = 5
  49. titanic.loc[titanic.fare < fare_floor, "fare"] = fare_floor
  50.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement