Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- from sklearn.model_selection import train_test_split
- from sklearn.tree import DecisionTreeClassifier
- from sklearn.ensemble import AdaBoostClassifier
- from sklearn.metrics import accuracy_score
- import seaborn as sns
- import matplotlib.pyplot as plt
- import numpy as np
- from sklearn.preprocessing import LabelEncoder
- from IPython.display import Latex
- from sklearn.base import clone
- class AdaBoost:
- def __init__(self, base_model=None, n_estimators=50):
- self.n_estimators = n_estimators
- self.models = []
- self.alphas = []
- self.base_model = base_model
- def fit(self, X, y):
- n_samples, _ = X.shape
- weights = np.full(n_samples, (1 / n_samples))
- for _ in range(self.n_estimators):
- if self.base_model is None:
- model = DecisionTreeClassifier(max_depth=1)
- else:
- model = clone(self.base_model)
- model.fit(X, y, sample_weight=weights)
- predictions = model.predict(X)
- err = weights.dot(predictions != y)
- alpha = 0.5 * np.log((1 - err) / err)
- self.alphas.append(alpha)
- weights *= np.exp(-alpha * y * predictions)
- weights /= np.sum(weights)
- self.models.append(model)
- def predict(self, X):
- n_samples = X.shape[0]
- y_pred = np.zeros(n_samples)
- for alpha, model in zip(self.alphas, self.models):
- y_pred += alpha * model.predict(X)
- return np.sign(y_pred)
- def visualize_data(X, Y, target, y_pred=None, model=None, title='$Initial\ Data$'):
- color_ = ['b', 'orange', 'green']
- marker_ = ["o", "s", "D"]
- df = pd.concat([X, pd.DataFrame(Y, columns=['target'], index=X.index)], axis=1)
- plt.figure(figsize=(10, 6))
- i = 0
- for value in np.unique(Y):
- plt.scatter(X.iloc[Y == value, 0], X.iloc[Y == value, 1], c=color_[i], marker=marker_[i], label=fr'${target[i]}$')
- i += 1
- if y_pred is not None:
- df = pd.concat([df, pd.DataFrame(y_pred, columns=['pred'], index=X.index)], axis=1)
- misclassified_indices = df['pred'] != df['target']
- df_miss = df.loc[misclassified_indices]
- misclassified_indices = df.index
- t_i = 0
- for t in np.unique(df_miss['target']):
- p_i = 0
- for p in np.unique(df_miss['pred']):
- df_miss_i = df_miss.loc[(df_miss['target'] == t) & (df_miss['pred'] == p)]
- if len(df_miss_i) > 0:
- plt.scatter(df_miss_i.iloc[:, 0], df_miss_i.iloc[:, 1],
- c='black', marker=marker_[t_i], edgecolors=color_[p_i],
- label=fr'$Misclassified\ {target[t_i]}\ as\ {target[p_i]}$')
- p_i += 1
- t_i += 1
- n = 1
- x_min, x_max = X.iloc[:, 0].min() - n, X.iloc[:, 0].max() + n
- y_min, y_max = X.iloc[:, 1].min() - n, X.iloc[:, 1].max() + n
- xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.0025), np.arange(y_min, y_max, 0.0025))
- Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
- Z = Z.reshape(xx.shape)
- plt.contourf(xx, yy, Z, alpha=0.4, cmap=plt.cm.coolwarm)
- plt.xlabel(r'$x$')
- plt.ylabel(r'$y$')
- plt.title(fr'{title}')
- plt.grid(True)
- plt.legend()
- plt.show()
- def quality_diagram(n_estimators, accuracy, title=''):
- plt.figure(figsize=(10, 6))
- plt.plot(n_estimators, accuracy, c='red', marker='o', markeredgecolor='blue', markerfacecolor='blue')
- plt.xlabel(r'$Кількість\ базових\ алгоритмів$')
- plt.ylabel(r'$Якість\ ансамблю, \%$')
- plt.title(fr'${title}$')
- plt.grid(True)
- plt.show()
- df = pd.read_csv('Moons.csv')
- df = df.iloc[:, 1:]
- df.columns = ['x', 'y', 'target']
- df.loc[df.target == 0, 'target'] = -1
- X = df.drop(labels=df.columns[-1], axis=1)
- Y = df[df.columns[-1]].values
- Y_unique = np.unique(Y)
- # label_encoder = LabelEncoder()
- # Y = label_encoder.fit_transform(Y)
- visualize_data(X, Y, Y_unique)
- x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
- model = DecisionTreeClassifier(max_depth=3, min_samples_split=3)
- display(Latex(fr'Базова модель: {model}'))
- n_estimators = [1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
- train_accuracy, train_accuracy_sk = [], []
- test_accuracy, test_accuracy_sk = [], []
- for n in n_estimators:
- adaboost = AdaBoost(model, n_estimators=n)
- adaboost.fit(x_train, y_train)
- y_pred = adaboost.predict(x_train)
- visualize_data(x_train, y_train, Y_unique, y_pred, adaboost, fr'$Train\ Data\ (n$_$estimators={n})$')
- accuracy = accuracy_score(y_train, y_pred)*100
- train_accuracy.append(accuracy)
- accuracy = round(accuracy, 2)
- display(Latex(fr'$ADABoost\ Train\ Accuracy: {accuracy}\%$'))
- y_pred = adaboost.predict(x_test)
- visualize_data(x_test, y_test, Y_unique, y_pred, adaboost, fr'$Test\ Data\ (n$_$estimators={n})$')
- accuracy = accuracy_score(y_test, y_pred)*100
- test_accuracy.append(accuracy)
- accuracy = round(accuracy, 2)
- display(Latex(fr'$ADABoost\ Test\ Accuracy: {accuracy}\%$'))
- # Sklearn
- boosted_model = AdaBoostClassifier(model, n_estimators=n)
- boosted_model.fit(x_train, y_train)
- y_pred = boosted_model.predict(x_train)
- visualize_data(x_train, y_train, Y_unique, y_pred, boosted_model, fr'$Sklearn\ Train\ Data\ (n$_$estimators={n})$')
- accuracy = accuracy_score(y_train, y_pred)*100
- train_accuracy_sk.append(accuracy)
- accuracy = round(accuracy, 2)
- display(Latex(fr'$Sklearn\ ADABoost\ Train\ Accuracy: {accuracy}\%$'))
- y_pred = boosted_model.predict(x_test)
- visualize_data(x_test, y_test, Y_unique, y_pred, boosted_model, fr'$Sklearn\ Test\ Data\ (n$_$estimators={n})$')
- accuracy = accuracy_score(y_test, y_pred)*100
- test_accuracy_sk.append(accuracy)
- accuracy = round(accuracy, 2)
- display(Latex(fr'$Sklearn\ ADABoost\ Test\ Accuracy: {accuracy}\%$'))
- quality_diagram(n_estimators, train_accuracy, title='Залежність\ якості\ алгоритму\ від\ кількості\ базових\ алгоритмів\ (Train\ Data)')
- quality_diagram(n_estimators, test_accuracy, title='Залежність\ якості\ алгоритму\ від\ кількості\ базових\ алгоритмів\ (Test\ Data)')
- quality_diagram(n_estimators, train_accuracy_sk, title='Залежність\ якості\ алгоритму\ від\ кількості\ базових\ алгоритмів\ (Sklearn\ Train\ Data)')
- quality_diagram(n_estimators, test_accuracy_sk, title='Залежність\ якості\ алгоритму\ від\ кількості\ базових\ алгоритмів\ (Sklearn\ Test\ Data)')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement