Advertisement
FranzVuttke

pandas_basics.py

Oct 4th, 2023
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.13 KB | Source Code | 0 0
  1. #!/home/ounis/pyapps/virtual_envs/pandas_test/bin/python
  2.  
  3.  
  4. #
  5. # by OuNiS 10.2023
  6. #
  7. #
  8. # https://www.geeksforgeeks.org/data-structures-in-pandas/?ref=shm
  9. # https://pandas.pydata.org/pandas-docs/stable/index.html
  10. # https://stackoverflow.com/questions/16476924/how-to-iterate-over-rows-in-a-dataframe-in-pandas
  11. # https://stackoverflow.com/posts/55557758/revisions
  12. #
  13. #
  14.  
  15. FILENAME = "/home/ounis/pyapps/mfkig.txt"
  16. FIELDSEP = "="
  17.  
  18. FIELD1 = "num"
  19. FIELD2 = "name"
  20.  
  21. FIELDS = [FIELD1, FIELD2]
  22. #
  23. #
  24. '''
  25. file content:
  26. 1=Egmont
  27. 2=Multiversum
  28. 3=Zin Zin Press
  29. 4=Wydawnictwo Kurc
  30. 5=Hanami
  31. 6=Scream
  32. 7=Timof Comics
  33. ...
  34.  
  35. no header with column names, no standard separator
  36. '''
  37.  
  38. print("Importing modules...")
  39. import pandas as pd
  40.  
  41. print(FILENAME)
  42.  
  43. def pressENTER():
  44.     input("ENTER...")
  45.  
  46. def load_pd(fname, fieldsep):
  47.     print("Loading data...")
  48.     return pd.read_csv(fname, sep=fieldsep, header=None, names=[field for field in FIELDS])
  49.    
  50. # loading data from file own way
  51. # if the file isn't proper CSV...
  52. def load_own(fname, fieldsep):
  53.     data_temp = {}
  54.     for field in FIELDS:
  55.         data_temp[field] = []
  56.     with open(fname) as file:
  57.          for line in file:
  58.              line = line.rstrip()
  59.              vals = line.split(fieldsep)
  60.              if len(vals) != len(FIELDS):
  61.                  return None
  62.              else:
  63.                  for i, field in enumerate(FIELDS):
  64.                      data_temp[FIELDS[i]].append(vals[i])
  65.     return pd.DataFrame(data_temp)                    
  66.    
  67.  
  68. data = load_own(FILENAME, FIELDSEP)
  69.  
  70. print("Listing first 25 rows:")
  71. print(data.head(25))
  72. pressENTER()
  73.  
  74. print("listing first 25 rows sorted by 'name':")
  75. print(data.sort_values(by="name").head(25))
  76. # or
  77. # sort by column number
  78. # print(data.sort_values(data.columns[1]).head(25))
  79.  
  80. # changing columns/fields names
  81. # manual:
  82. # https://www.geeksforgeeks.org/how-to-rename-columns-in-pandas-dataframe/
  83. pressENTER()
  84. print("Changing columns names\nfrom:")
  85. print(data.columns)
  86. data.rename(columns= {"num": "NUM", "name": "NAME"}, inplace=True)
  87. print("to:")
  88. print(data.columns)
  89. print(data.head(3))
  90.  
  91.  
  92.  
  93.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement