Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- cars.plot(figsize = (15,15), subplots = True)
- plt.show()
- cars.hist(figsize = (20,12), bins = 100)
- plt.show()
- #####################################################
- # Outliers Detection
- titanic.describe() # use describe method to see minimum and maximum values
- plt.figure(figsize = (12,6)) # boxplot
- titanic.boxplot("Age")
- plt.show()
- plt.figure(figsize = (12,6)) # line plot
- titanic.Age.plot()
- plt.show()
- titanic.Age.sort_values(ascending = False) # sorting to see if there are any outliers
- titanic.Age.sort_values(ascending = True)
- titanic.loc[titanic.Age > 90] # filter df only with outliers
- #############################################
- # Outliers Handling
- index_outliers = titanic.loc[titanic.Age > 90].index
- index_outliers
- titanic.loc[titanic.Age > 90, "Age"] = titanic.loc[titanic.Age > 90, "Age"] / 10 # correction of the values
- titanic.loc[index_outliers]
- titanic.loc[217, "Age"] = 42.0
- plt.figure(figsize = (12,6)) # last checking
- titanic.Age.plot()
- plt.show()
- ##########################################################################
- # Change all values with outliers in dataframe
- titanic.fare.plot(figsize = (12,8))
- plt.show()
- titanic.fare.describe()
- titanic.fare.sort_values(ascending = False)
- fare_cap = 250
- titanic.loc[titanic.fare > fare_cap, "fare"] = fare_cap
- fare_floor = 5
- titanic.loc[titanic.fare < fare_floor, "fare"] = fare_floor
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement