Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #LAB 1
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- import seaborn as sns
- from sklearn.datasets import fetch_california_housing
- data=fetch_california_housing(as_frame=True)
- df=data.frame
- print("Dataset Sample:")
- print(df.head())
- def plot_histograms(df):
- df.hist(figsize=(12,10), bins=30,color='skyblue', edgecolor='black')
- plt.suptitle('Histograms of Numerical Features:',fontsize=16)
- plt.tight_layout(rect=(0,0,1,0.97))
- plt.show()
- def plot_boxplots(df):
- plt.figure(figsize=(14,10))
- for i,column in enumerate(df.columns,1):
- plt.subplot(3,3,i)
- sns.boxplot(y=df[column],color='skyblue')
- plt.title(f'Box Plot of(column)',fontsize=12)
- plt.tight_layout()
- plt.show()
- def analyze_features(df):
- print("\nFeature Analysis:")
- for column in df.columns:
- print(f'\nFeature:(column)')
- print(f"Mean: {df[column].mean():2f},Median: {df[column].median():2f}, Std Dev: {df[column].std():2f}")
- q1=df[column].quantile(0.25)
- q3=df[column].quantile(0.75)
- iqr=q3-q1
- lower_bound=q1-1.5*iqr
- upper_bound=q3+1.5*iqr
- outliers=df[(df[column]<lower_bound) | (df[column]>upper_bound)]
- print(f'Number of Outliers: {len(outliers)}')
- plot_histograms(df)
- plot_boxplots(df)
- analyze_features(df)
- #LAB 2
- import pandas as pd
- import seaborn as sns
- import matplotlib.pyplot as plt
- from sklearn.datasets import fetch_california_housing
- data=fetch_california_housing(as_frame=True)
- df=data.frame
- print("Data Sample")
- print(df.head())
- correlation_matrix=df.corr()
- print("Correlational Matrix:")
- print(correlation_matrix)
- def plot_heatmap(corr_matrix):
- plt.figure(figsize=(10,8))
- sns.heatmap(corr_matrix,annot=True,fmt=".2f",cmap="cool",cbar=True,square=True,linewidths=0.5)
- plt.title("correlation matrix heatmap", fontsize=16)
- plt.show()
- def plot_pairplot(df):
- sns.pairplot(df,diag_kind="kde",corner=True,plot_kws={'alpha':0.5},diag_kws={'fill':True})
- plt.suptitle("Pair Plot of Numerical Features",y=1.02,fontsize=16)
- plt.show()
- plot_heatmap(correlation_matrix)
- plot_pairplot(df)
- #LAB 3
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn.datasets import load_iris
- from sklearn.decomposition import PCA
- from sklearn.preprocessing import StandardScaler
- iris = load_iris()
- X = iris.data
- y = iris.target
- target_names = iris.target_names
- scaler = StandardScaler()
- X_standardized = scaler.fit_transform(X)
- pca = PCA(n_components=2)
- X_pca = pca.fit_transform(X_standardized)
- print(X_pca)
- pca_df = pd.DataFrame(data=X_pca, columns=['Principal Component 1', 'Principal Component 2'])
- pca_df['Target'] = y
- plt.figure(figsize=(8, 6))
- colors = ['r', 'g', 'b']
- for target, color, label in zip(range(len(target_names)), colors, target_names):
- plt.scatter(
- pca_df.loc[pca_df['Target'] == target, 'Principal Component 1'],
- pca_df.loc[pca_df['Target'] == target, 'Principal Component 2'],
- color=color,
- alpha=0.6,
- label=label
- )
- plt.title('PCA of Iris Dataset (2 components)', fontsize=16)
- plt.xlabel('Principal Component 1', fontsize=12)
- plt.ylabel('Principal Component 2', fontsize=12)
- plt.legend(title='Target', loc='best')
- plt.grid(alpha=0.3)
- plt.show()
- #LAB 5
- import numpy as np
- from sklearn.neighbors import KNeighborsClassifier
- np.random.seed(42)
- x = np.random.rand(100)
- labels = np.where(x[:50]<=0.5,1,2)
- x_train =x[:50].reshape(-1,1)
- y_train = labels
- x_test = x[:50].reshape(-1,1)
- k_values=[1,2,3,4,5,20,30]
- for k in k_values:
- knn =KNeighborsClassifier(n_neighbors=k)
- knn.fit(x_train,y_train)
- y_pred =knn.predict(x_test)
- print(f"\nk={k}:")
- print(f"Predicted classes :{y_pred}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement