Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/home/ounis/pyapps/virtual_envs/pandas_test/bin/python
- #
- # by OuNiS 10.2023
- #
- #
- # https://www.geeksforgeeks.org/data-structures-in-pandas/?ref=shm
- # https://pandas.pydata.org/pandas-docs/stable/index.html
- # https://stackoverflow.com/questions/16476924/how-to-iterate-over-rows-in-a-dataframe-in-pandas
- # https://stackoverflow.com/posts/55557758/revisions
- #
- #
- FILENAME = "/home/ounis/pyapps/mfkig.txt"
- FIELDSEP = "="
- FIELD1 = "num"
- FIELD2 = "name"
- FIELDS = [FIELD1, FIELD2]
- #
- #
- '''
- file content:
- 1=Egmont
- 2=Multiversum
- 3=Zin Zin Press
- 4=Wydawnictwo Kurc
- 5=Hanami
- 6=Scream
- 7=Timof Comics
- ...
- no header with column names, no standard separator
- '''
- print("Importing modules...")
- import pandas as pd
- print(FILENAME)
- def pressENTER():
- input("ENTER...")
- def load_pd(fname, fieldsep):
- print("Loading data...")
- return pd.read_csv(fname, sep=fieldsep, header=None, names=[field for field in FIELDS])
- # loading data from file own way
- # if the file isn't proper CSV...
- def load_own(fname, fieldsep):
- data_temp = {}
- for field in FIELDS:
- data_temp[field] = []
- with open(fname) as file:
- for line in file:
- line = line.rstrip()
- vals = line.split(fieldsep)
- if len(vals) != len(FIELDS):
- return None
- else:
- for i, field in enumerate(FIELDS):
- data_temp[FIELDS[i]].append(vals[i])
- return pd.DataFrame(data_temp)
- data = load_own(FILENAME, FIELDSEP)
- print("Listing first 25 rows:")
- print(data.head(25))
- pressENTER()
- print("listing first 25 rows sorted by 'name':")
- print(data.sort_values(by="name").head(25))
- # or
- # sort by column number
- # print(data.sort_values(data.columns[1]).head(25))
- # changing columns/fields names
- # manual:
- # https://www.geeksforgeeks.org/how-to-rename-columns-in-pandas-dataframe/
- pressENTER()
- print("Changing columns names\nfrom:")
- print(data.columns)
- data.rename(columns= {"num": "NUM", "name": "NAME"}, inplace=True)
- print("to:")
- print(data.columns)
- print(data.head(3))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement