Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- from sklearn.preprocessing import OneHotEncoder
- # Load a file .csv
- print("**********************************")
- people = pd.read_csv("people.txt", sep=";", header=None, names=["Age", "Height", "Weight"])
- print(people)
- print()
- # Missing values are found by average
- people = people.fillna(people.mean())
- print(people)
- print()
- print()
- # Statistical info
- print("**********************************")
- print("Head: ")
- print(people.head())
- print()
- print("Describe: ")
- print(people.describe())
- print()
- print("Null elements in each column: ")
- print(people.isnull().sum())
- print()
- print()
- # Convert categorial data to numerical
- # Create Encoder
- encoder = OneHotEncoder(handle_unknown="ignore", sparse=False)
- encoder.fit(people)
- people_one_hot = encoder.transform(people)
- print("**********************************")
- print(people_one_hot)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement