Advertisement
makispaiktis

Kaggle - Exercise 7 - Create my own plots

Jul 2nd, 2023 (edited)
1,131
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.04 KB | None | 0 0
  1. import pandas as pd
  2. pd.plotting.register_matplotlib_converters()
  3. import matplotlib.pyplot as plt
  4. %matplotlib inline
  5. import seaborn as sns
  6. print("Setup Complete")
  7.  
  8. # 0. Read the data
  9. my_filepath = "/kaggle/input/fivethirtyeight-comic-characters-dataset/dc-wikia-data.csv"
  10. my_data = pd.read_csv(my_filepath, index_col="page_id")
  11. print(my_data.head())
  12. print(my_data.shape)
  13.  
  14. # 1. KDE: Distribution of appearances
  15. print("Average appearances in original data =", my_data["APPEARANCES"].mean(), end='\n\n')
  16. sns.kdeplot(data=my_data, x="APPEARANCES", fill=True)
  17. plt.title("Original Data: Heroes appearances density function")
  18. plt.show()
  19.  
  20. # 2. KDE while filtering: Keep the rows, where APPEARANCES <= 50 ---> A new dataframe is created
  21. new_data = my_data[my_data["APPEARANCES"] <= 50]
  22. print("Average appearances in filtered data =", new_data["APPEARANCES"].mean(), end='\n\n')
  23. sns.kdeplot(data=new_data, x="APPEARANCES", fill=True)
  24. plt.title("Filtered data: Heroes appearances density function")
  25. plt.show()
  26.  
  27. # 3. Joint plot: 2D-KDE
  28. sns.jointplot(data=my_data, x="YEAR", y="APPEARANCES", kind="kde")
  29. # plt.title("2D-KDE: Year vs Appearances")
  30. plt.show()
  31. sns.jointplot(data=my_data, x="YEAR", y="APPEARANCES", kind="hist")
  32. # plt.title("2D-HIST: Year vs Appearances")
  33. plt.show()
  34.  
  35. # 4. Scatter plot to reveal the relation between year, appearances and ID
  36. sns.scatterplot(x=my_data['YEAR'], y=my_data['APPEARANCES'])
  37. plt.title("Relation between: year and appearances")
  38. plt.show()
  39. sns.scatterplot(x=my_data['YEAR'], y=my_data['APPEARANCES'], hue=my_data['ID'])
  40. plt.title("Relation between: year, appearances and ID")
  41. plt.show()
  42.  
  43. df1 = my_data[my_data['ID'] == 'Secret Identity']
  44. df2 = my_data[my_data['ID'] == 'Public Identity']
  45. df3 = my_data[my_data['ID'] == 'Identity Unknown']
  46. SUM1 = df1.shape[0]
  47. SUM2 = df2.shape[0]
  48. SUM3 = df3.shape[0]
  49. SUM = my_data.shape[0]
  50. print("Heroes with Secret Identity  = {}/{}".format(SUM1, SUM))
  51. print("Heroes with Public Identity  = {}/{}".format(SUM2, SUM))
  52. print("Heroes with Identity Unknown = {}/{}".format(SUM3, SUM))
  53.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement