Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.datasets import load_iris
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- import seaborn as sns
- def print_hist(values, title=''):
- plt.hist(values, bins=10)
- plt.title(f'Діаграма розподілу {title}')
- plt.xlabel('Значення')
- plt.ylabel('Частота')
- plt.show()
- def info(df):
- info_df = pd.DataFrame({'MIN': [df['sepal_length'].min(),
- df['sepal_width'].min(),
- df['petal_length'].min(),
- df['petal_width'].min()],
- 'MAX': [df['sepal_length'].max(),
- df['sepal_width'].max(),
- df['petal_length'].max(),
- df['petal_width'].max()],
- 'MEAN': [df['sepal_length'].mean(),
- df['sepal_width'].mean(),
- df['petal_length'].mean(),
- df['petal_width'].mean()],
- 'VARIANCE': [df['sepal_length'].var(),
- df['sepal_width'].var(),
- df['petal_length'].var(),
- df['petal_width'].var()]}, index=df.columns[:-1])
- return info_df
- def normalization(df):
- for i in range(len(df.columns) - 1):
- df.iloc[:, i] = (df.iloc[:, i] - df[df.columns[i]].mean()) / df[df.columns[i]].var()
- return df
- iris = pd.read_csv('iris.csv')
- sns.pairplot(iris, hue='species')
- plt.show()
- info_iris = info(iris)
- print(info_iris)
- for i in range(len(iris.columns) - 1):
- print_hist(iris.iloc[:, i], iris.columns[i])
- iris_log = iris.copy()
- iris_log.iloc[:, :-1] = np.log(iris_log.iloc[:, :-1])
- info_iris_log = info(iris_log)
- print(info_iris_log)
- for i in range(len(iris.columns) - 1):
- print_hist(iris_log.iloc[:, i], f'log {iris.columns[i]}')
- iris_log_norm = iris_log.copy()
- iris_log_norm = normalization(iris_log_norm)
- info_iris_log_norm = info(iris_log_norm)
- print(info_iris_log_norm)
- for i in range(len(iris.columns) - 1):
- print_hist(iris_log_norm.iloc[:, i], f'log-norm {iris.columns[i]}')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement