Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from random import randrange
- import pandas as pd
- # Create a DataFrame
- nums = [1, 4, 9, 16, 25, 36, 49]
- strs = ["aa", "bb", "cc", "dd", "ee", "ff", "gg"]
- bools = [True, False, True, True, False, False, True]
- df = pd.DataFrame(list(zip(nums, strs, bools)), columns=['ns', 'ss', 'bs'])
- print("**********************************")
- print(df)
- print()
- print(df.ns)
- print()
- print(df['ss'])
- print()
- print("**********************************")
- # Rows
- row_ind = 0
- row = df.loc[row_ind]
- print(str(row) + ", length = " + str(len(row)))
- print()
- # Columns
- col_name = 'bs'
- col = df.loc[:, col_name]
- print(str(col) + ", length = " + str(len(col)))
- print()
- # Elements
- element = df.loc[1, "bs"]
- print(element)
- print()
- # Columns with condition True
- rows_ind = df.loc[df.bs]
- print(rows_ind)
- print()
- print()
- # Load a file .csv
- print("**********************************")
- people = pd.read_csv("people.txt")
- print(people)
- print()
- people = pd.read_csv("people2.txt", sep=";", header=None, names=["Age", "Height", "Weight"])
- print(people)
- print()
- # Missing values are found by average
- people = people.fillna(people.mean())
- print(people)
- print()
- print()
- # Statistical info
- print("**********************************")
- print("Head: ")
- print(people.head())
- print()
- print("Describe: ")
- print(people.describe())
- print()
- print("Null elements in each column: ")
- print(people.isnull().sum())
- print()
- print()
- # Convert categorial data to numerical
- from sklearn.preprocessing import OneHotEncoder
- # Create Encoder
- encoder = OneHotEncoder(handle_unknown="ignore", sparse=False)
- encoder.fit(people)
- people_one_hot = encoder.transform(people)
- print("**********************************")
- print(people_one_hot)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement