Advertisement
risixl

ML

Apr 4th, 2025
239
0
19 hours
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.78 KB | Source Code | 0 0
  1. #LAB 1
  2. import pandas as pd
  3. import numpy as np
  4. import matplotlib.pyplot as plt
  5. import seaborn as sns
  6. from sklearn.datasets import fetch_california_housing
  7.  
  8. data=fetch_california_housing(as_frame=True)
  9. df=data.frame
  10. print("Dataset Sample:")
  11. print(df.head())
  12.  
  13. def plot_histograms(df):
  14.     df.hist(figsize=(12,10), bins=30,color='skyblue', edgecolor='black')
  15.     plt.suptitle('Histograms of Numerical Features:',fontsize=16)
  16.     plt.tight_layout(rect=(0,0,1,0.97))
  17.     plt.show()
  18.  
  19. def plot_boxplots(df):
  20.     plt.figure(figsize=(14,10))
  21.     for i,column in enumerate(df.columns,1):
  22.         plt.subplot(3,3,i)
  23.         sns.boxplot(y=df[column],color='skyblue')
  24.         plt.title(f'Box Plot of(column)',fontsize=12)
  25.         plt.tight_layout()
  26.     plt.show()
  27.  
  28. def analyze_features(df):
  29.     print("\nFeature Analysis:")
  30.     for column in df.columns:
  31.         print(f'\nFeature:(column)')
  32.         print(f"Mean: {df[column].mean():2f},Median: {df[column].median():2f}, Std Dev: {df[column].std():2f}")
  33.         q1=df[column].quantile(0.25)
  34.         q3=df[column].quantile(0.75)
  35.         iqr=q3-q1
  36.         lower_bound=q1-1.5*iqr
  37.         upper_bound=q3+1.5*iqr
  38.         outliers=df[(df[column]<lower_bound) | (df[column]>upper_bound)]
  39.         print(f'Number of Outliers: {len(outliers)}')
  40.  
  41. plot_histograms(df)
  42. plot_boxplots(df)
  43. analyze_features(df)
  44.  
  45. #LAB 2
  46. import pandas as pd
  47. import seaborn as sns
  48. import matplotlib.pyplot as plt
  49. from sklearn.datasets import fetch_california_housing
  50.  
  51. data=fetch_california_housing(as_frame=True)
  52. df=data.frame
  53. print("Data Sample")
  54. print(df.head())
  55. correlation_matrix=df.corr()
  56. print("Correlational Matrix:")
  57. print(correlation_matrix)
  58.  
  59. def plot_heatmap(corr_matrix):
  60.     plt.figure(figsize=(10,8))
  61.     sns.heatmap(corr_matrix,annot=True,fmt=".2f",cmap="cool",cbar=True,square=True,linewidths=0.5)
  62.     plt.title("correlation matrix heatmap", fontsize=16)
  63.     plt.show()
  64.  
  65. def plot_pairplot(df):
  66.     sns.pairplot(df,diag_kind="kde",corner=True,plot_kws={'alpha':0.5},diag_kws={'fill':True})
  67.     plt.suptitle("Pair Plot of Numerical Features",y=1.02,fontsize=16)
  68.     plt.show()
  69.  
  70. plot_heatmap(correlation_matrix)
  71. plot_pairplot(df)
  72.  
  73. #LAB 3
  74. import pandas as pd
  75. import numpy as np
  76. import matplotlib.pyplot as plt
  77. from sklearn.datasets import load_iris
  78. from sklearn.decomposition import PCA
  79. from sklearn.preprocessing import StandardScaler
  80.  
  81. iris = load_iris()
  82. X = iris.data
  83. y = iris.target
  84. target_names = iris.target_names
  85.  
  86.  
  87. scaler = StandardScaler()
  88. X_standardized = scaler.fit_transform(X)
  89.  
  90.  
  91. pca = PCA(n_components=2)
  92. X_pca = pca.fit_transform(X_standardized)
  93. print(X_pca)
  94.  
  95.  
  96. pca_df = pd.DataFrame(data=X_pca, columns=['Principal Component 1', 'Principal Component 2'])
  97. pca_df['Target'] = y
  98.  
  99.  
  100. plt.figure(figsize=(8, 6))
  101. colors = ['r', 'g', 'b']
  102.  
  103. for target, color, label in zip(range(len(target_names)), colors, target_names):
  104.     plt.scatter(
  105.         pca_df.loc[pca_df['Target'] == target, 'Principal Component 1'],
  106.         pca_df.loc[pca_df['Target'] == target, 'Principal Component 2'],
  107.         color=color,
  108.         alpha=0.6,
  109.         label=label
  110.     )
  111.  
  112. plt.title('PCA of Iris Dataset (2 components)', fontsize=16)
  113. plt.xlabel('Principal Component 1', fontsize=12)
  114. plt.ylabel('Principal Component 2', fontsize=12)
  115. plt.legend(title='Target', loc='best')
  116. plt.grid(alpha=0.3)
  117. plt.show()
  118.  
  119. #LAB 5
  120. import numpy as np
  121. from sklearn.neighbors import KNeighborsClassifier
  122. np.random.seed(42)
  123. x = np.random.rand(100)
  124. labels = np.where(x[:50]<=0.5,1,2)
  125. x_train =x[:50].reshape(-1,1)
  126. y_train = labels
  127. x_test = x[:50].reshape(-1,1)
  128. k_values=[1,2,3,4,5,20,30]
  129. for k in k_values:
  130.     knn =KNeighborsClassifier(n_neighbors=k)
  131.     knn.fit(x_train,y_train)
  132.     y_pred =knn.predict(x_test)
  133.     print(f"\nk={k}:")
  134.     print(f"Predicted classes :{y_pred}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement