Advertisement
mirosh111000

AdaBoost

Mar 24th, 2024
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.89 KB | None | 0 0
  1. import pandas as pd
  2. from sklearn.model_selection import train_test_split
  3. from sklearn.tree import DecisionTreeClassifier
  4. from sklearn.ensemble import AdaBoostClassifier
  5. from sklearn.metrics import accuracy_score
  6. import seaborn as sns
  7. import matplotlib.pyplot as plt
  8. import numpy as np
  9. from sklearn.preprocessing import LabelEncoder
  10. from IPython.display import Latex
  11. from sklearn.base import clone
  12.  
  13.  
  14.  
  15. class AdaBoost:
  16.     def __init__(self, base_model=None, n_estimators=50):
  17.         self.n_estimators = n_estimators
  18.         self.models = []
  19.         self.alphas = []
  20.         self.base_model = base_model
  21.  
  22.     def fit(self, X, y):
  23.         n_samples, _ = X.shape
  24.         weights = np.full(n_samples, (1 / n_samples))
  25.  
  26.         for _ in range(self.n_estimators):
  27.             if self.base_model is None:
  28.                 model = DecisionTreeClassifier(max_depth=1)
  29.             else:
  30.                 model = clone(self.base_model)
  31.             model.fit(X, y, sample_weight=weights)
  32.             predictions = model.predict(X)
  33.             err = weights.dot(predictions != y)
  34.  
  35.             alpha = 0.5 * np.log((1 - err) / err)
  36.             self.alphas.append(alpha)
  37.  
  38.             weights *= np.exp(-alpha * y * predictions)
  39.             weights /= np.sum(weights)
  40.  
  41.             self.models.append(model)
  42.  
  43.     def predict(self, X):
  44.         n_samples = X.shape[0]
  45.         y_pred = np.zeros(n_samples)
  46.  
  47.         for alpha, model in zip(self.alphas, self.models):
  48.             y_pred += alpha * model.predict(X)
  49.  
  50.         return np.sign(y_pred)
  51.  
  52.  
  53.  
  54. def visualize_data(X, Y, target, y_pred=None, model=None, title='$Initial\ Data$'):
  55.    
  56.     color_ = ['b', 'orange', 'green']
  57.     marker_ = ["o", "s", "D"]
  58.     df = pd.concat([X, pd.DataFrame(Y, columns=['target'], index=X.index)], axis=1)
  59.    
  60.     plt.figure(figsize=(10, 6))
  61.    
  62.     i = 0
  63.     for value in np.unique(Y):
  64.         plt.scatter(X.iloc[Y == value, 0], X.iloc[Y == value, 1], c=color_[i], marker=marker_[i], label=fr'${target[i]}$')
  65.         i += 1
  66.     if y_pred is not None:
  67.        
  68.         df = pd.concat([df, pd.DataFrame(y_pred, columns=['pred'], index=X.index)], axis=1)
  69.         misclassified_indices = df['pred'] != df['target']
  70.         df_miss = df.loc[misclassified_indices]
  71.         misclassified_indices = df.index
  72.        
  73.         t_i = 0
  74.         for t in np.unique(df_miss['target']):
  75.             p_i = 0
  76.             for p in np.unique(df_miss['pred']):
  77.                 df_miss_i = df_miss.loc[(df_miss['target'] == t) & (df_miss['pred'] == p)]
  78.                 if len(df_miss_i) > 0:
  79.                     plt.scatter(df_miss_i.iloc[:, 0], df_miss_i.iloc[:, 1],
  80.                                c='black', marker=marker_[t_i], edgecolors=color_[p_i],
  81.                                label=fr'$Misclassified\ {target[t_i]}\ as\ {target[p_i]}$')
  82.                 p_i += 1
  83.             t_i += 1
  84.            
  85.         n = 1
  86.         x_min, x_max = X.iloc[:, 0].min() - n, X.iloc[:, 0].max() + n
  87.         y_min, y_max = X.iloc[:, 1].min() - n, X.iloc[:, 1].max() + n
  88.  
  89.         xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.0025), np.arange(y_min, y_max, 0.0025))
  90.         Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
  91.         Z = Z.reshape(xx.shape)
  92.         plt.contourf(xx, yy, Z, alpha=0.4, cmap=plt.cm.coolwarm)
  93.        
  94.     plt.xlabel(r'$x$')
  95.     plt.ylabel(r'$y$')
  96.     plt.title(fr'{title}')
  97.     plt.grid(True)
  98.     plt.legend()
  99.     plt.show()
  100.    
  101.  
  102.  
  103. def quality_diagram(n_estimators, accuracy, title=''):
  104.    
  105.     plt.figure(figsize=(10, 6))
  106.     plt.plot(n_estimators, accuracy, c='red', marker='o', markeredgecolor='blue', markerfacecolor='blue')
  107.     plt.xlabel(r'$Кількість\ базових\ алгоритмів$')
  108.     plt.ylabel(r'$Якість\ ансамблю, \%$')
  109.     plt.title(fr'${title}$')
  110.     plt.grid(True)
  111.     plt.show()
  112.  
  113.  
  114. df = pd.read_csv('Moons.csv')
  115. df = df.iloc[:, 1:]
  116. df.columns = ['x', 'y', 'target']
  117. df.loc[df.target == 0, 'target'] = -1
  118. X = df.drop(labels=df.columns[-1], axis=1)
  119. Y = df[df.columns[-1]].values
  120. Y_unique = np.unique(Y)
  121. # label_encoder = LabelEncoder()
  122. # Y = label_encoder.fit_transform(Y)
  123.  
  124.  
  125. visualize_data(X, Y, Y_unique)
  126. x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
  127.  
  128. model = DecisionTreeClassifier(max_depth=3, min_samples_split=3)
  129. display(Latex(fr'Базова модель: {model}'))
  130.  
  131.  
  132. n_estimators = [1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
  133. train_accuracy, train_accuracy_sk = [], []
  134. test_accuracy, test_accuracy_sk = [], []
  135.  
  136. for n in n_estimators:
  137.    
  138.     adaboost = AdaBoost(model, n_estimators=n)
  139.     adaboost.fit(x_train, y_train)
  140.  
  141.     y_pred = adaboost.predict(x_train)
  142.     visualize_data(x_train, y_train, Y_unique, y_pred, adaboost, fr'$Train\ Data\ (n$_$estimators={n})$')
  143.     accuracy = accuracy_score(y_train, y_pred)*100
  144.     train_accuracy.append(accuracy)
  145.     accuracy = round(accuracy, 2)
  146.     display(Latex(fr'$ADABoost\ Train\ Accuracy: {accuracy}\%$'))
  147.  
  148.     y_pred = adaboost.predict(x_test)
  149.     visualize_data(x_test, y_test, Y_unique, y_pred, adaboost, fr'$Test\ Data\ (n$_$estimators={n})$')
  150.     accuracy = accuracy_score(y_test, y_pred)*100
  151.     test_accuracy.append(accuracy)
  152.     accuracy = round(accuracy, 2)
  153.     display(Latex(fr'$ADABoost\ Test\ Accuracy: {accuracy}\%$'))
  154.  
  155.  
  156.     # Sklearn
  157.     boosted_model = AdaBoostClassifier(model, n_estimators=n)
  158.     boosted_model.fit(x_train, y_train)
  159.  
  160.     y_pred = boosted_model.predict(x_train)
  161.     visualize_data(x_train, y_train, Y_unique, y_pred, boosted_model, fr'$Sklearn\ Train\ Data\ (n$_$estimators={n})$')
  162.     accuracy = accuracy_score(y_train, y_pred)*100
  163.     train_accuracy_sk.append(accuracy)
  164.     accuracy = round(accuracy, 2)
  165.     display(Latex(fr'$Sklearn\ ADABoost\ Train\ Accuracy: {accuracy}\%$'))
  166.  
  167.     y_pred = boosted_model.predict(x_test)
  168.     visualize_data(x_test, y_test, Y_unique, y_pred, boosted_model, fr'$Sklearn\ Test\ Data\ (n$_$estimators={n})$')
  169.     accuracy = accuracy_score(y_test, y_pred)*100
  170.     test_accuracy_sk.append(accuracy)
  171.     accuracy = round(accuracy, 2)
  172.     display(Latex(fr'$Sklearn\ ADABoost\ Test\ Accuracy: {accuracy}\%$'))
  173.  
  174.  
  175.  
  176.  
  177. quality_diagram(n_estimators, train_accuracy, title='Залежність\ якості\ алгоритму\ від\ кількості\ базових\ алгоритмів\ (Train\ Data)')
  178.  
  179. quality_diagram(n_estimators, test_accuracy, title='Залежність\ якості\ алгоритму\ від\ кількості\ базових\ алгоритмів\ (Test\ Data)')
  180.  
  181. quality_diagram(n_estimators, train_accuracy_sk, title='Залежність\ якості\ алгоритму\ від\ кількості\ базових\ алгоритмів\ (Sklearn\ Train\ Data)')
  182.  
  183. quality_diagram(n_estimators, test_accuracy_sk, title='Залежність\ якості\ алгоритму\ від\ кількості\ базових\ алгоритмів\ (Sklearn\ Test\ Data)')
  184.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement