Advertisement
mirosh111000

RandomForestClassifier

Mar 25th, 2024
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.11 KB | None | 0 0
  1. import pandas as pd
  2. from sklearn.model_selection import train_test_split
  3. from sklearn.tree import DecisionTreeClassifier
  4. from sklearn.ensemble import RandomForestClassifier as RFC
  5. from sklearn.metrics import accuracy_score
  6. import seaborn as sns
  7. import matplotlib.pyplot as plt
  8. import numpy as np
  9. from sklearn.preprocessing import LabelEncoder
  10. from IPython.display import Latex
  11. from sklearn.base import clone, BaseEstimator, ClassifierMixin
  12. from joblib import Parallel, delayed
  13.  
  14.  
  15.  
  16. class RandomForestClassifierParallel:
  17.     def __init__(self, n_estimators=100, max_depth=None, min_samples_split=2, n_jobs=-1):
  18.         self.n_estimators = n_estimators
  19.         self.max_depth = max_depth
  20.         self.min_samples_split = min_samples_split
  21.         self.n_jobs = n_jobs
  22.         self.estimators_ = []
  23.  
  24.     def fit(self, X, y):
  25.         def fit_tree(X, y):
  26.            
  27.             X = np.copy(X)
  28.             y = np.copy(y)
  29.            
  30.             idx = np.random.choice(len(X), len(X), replace=True)
  31.             X_subset, y_subset = X[idx], y[idx]
  32.            
  33.             tree = DecisionTreeClassifier(max_depth=self.max_depth, min_samples_split=self.min_samples_split)
  34.             tree.fit(X_subset, y_subset)
  35.             return tree
  36.  
  37.         self.estimators_ = Parallel(n_jobs=self.n_jobs)(
  38.             delayed(fit_tree)(X, y) for _ in range(self.n_estimators))
  39.  
  40.     def predict(self, X):
  41.         def predict_tree(tree, X):
  42.             return tree.predict(X)
  43.  
  44.         predictions = Parallel(n_jobs=self.n_jobs)(
  45.             delayed(predict_tree)(tree, X) for tree in self.estimators_)
  46.  
  47.         y_pred = np.array(predictions).T
  48.         majority_votes = []
  49.         for row in y_pred:
  50.             unique_elements, counts = np.unique(row, return_counts=True)
  51.             majority_votes.append(unique_elements[np.argmax(counts)])
  52.        
  53.         return np.array(majority_votes)
  54.  
  55.  
  56.    
  57.  
  58. def visualize_data(X, Y, target, y_pred=None, model=None, title='$Initial\ Data$'):
  59.    
  60.     color_ = ['b', 'orange', 'green']
  61.     marker_ = ["o", "s", "D"]
  62.     df = pd.concat([X, pd.DataFrame(Y, columns=['target'], index=X.index)], axis=1)
  63.    
  64.     plt.figure(figsize=(10, 6))
  65.    
  66.     for i, value in enumerate(np.unique(Y)):
  67.         plt.scatter(X.iloc[Y == value, 0], X.iloc[Y == value, 1], c=color_[i], marker=marker_[i], label=fr'${target[i]}$')
  68.    
  69.     if y_pred is not None:
  70.        
  71.         df = pd.concat([df, pd.DataFrame(y_pred, columns=['pred'], index=X.index)], axis=1)
  72.         misclassified_indices = df['pred'] != df['target']
  73.         df_miss = df.loc[misclassified_indices]
  74.         misclassified_indices = df.index
  75.        
  76.         for t_i, t in enumerate(np.unique(df_miss['target'])):
  77.             for p_i, p in enumerate(np.unique(df_miss['pred'])):
  78.                 df_miss_i = df_miss.loc[(df_miss['target'] == t) & (df_miss['pred'] == p)]
  79.                 if len(df_miss_i) > 0:
  80.                     plt.scatter(df_miss_i.iloc[:, 0], df_miss_i.iloc[:, 1],
  81.                                c='black', marker=marker_[t_i], edgecolors=color_[p_i],
  82.                                label=fr'$Misclassified\ {target[t_i]}\ as\ {target[p_i]}$')
  83.            
  84.         n = 1
  85.         x_min, x_max = X.iloc[:, 0].min() - n, X.iloc[:, 0].max() + n
  86.         y_min, y_max = X.iloc[:, 1].min() - n, X.iloc[:, 1].max() + n
  87.  
  88.         xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
  89.         Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
  90.         Z = Z.reshape(xx.shape)
  91.         plt.contourf(xx, yy, Z, alpha=0.35, cmap=plt.cm.coolwarm)
  92.        
  93.     plt.xlabel(r'$x$')
  94.     plt.ylabel(r'$y$')
  95.     plt.title(fr'{title}')
  96.     plt.grid(True)
  97.     plt.legend()
  98.     plt.show()
  99.        
  100.    
  101.    
  102. def quality_diagram(n_estimators, accuracy, title=''):
  103.    
  104.     plt.figure(figsize=(10, 6))
  105.     plt.plot(n_estimators, accuracy, c='red', marker='o', markeredgecolor='blue', markerfacecolor='blue')
  106.     plt.xlabel(r'$Кількість\ базових\ алгоритмів$')
  107.     plt.ylabel(r'$Якість\ ансамблю, \%$')
  108.     plt.title(fr'${title}$')
  109.     plt.grid(True)
  110.     plt.show()
  111.    
  112.    
  113.    
  114.  
  115. df = pd.read_csv('Moons.csv')
  116. df = df.iloc[:, 1:]
  117. df.columns = ['x', 'y', 'target']
  118. df.loc[df.target == 0, 'target'] = -1
  119. X = df.drop(labels=df.columns[-1], axis=1)
  120. Y = df[df.columns[-1]].values
  121. Y_unique = np.unique(Y)
  122.  
  123. visualize_data(X, Y, Y_unique)
  124. x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
  125.  
  126. n_estimators = [1, 5, 25, 50, 100, 250, 500, 750, 1000]
  127. train_accuracy, train_accuracy_sk = [], []
  128. test_accuracy, test_accuracy_sk = [], []
  129.  
  130. for n in n_estimators:
  131.    
  132.     rf = RandomForestClassifierParallel(n_estimators=n, max_depth=5, min_samples_split=2, n_jobs=-1)
  133.     rf.fit(x_train, y_train)
  134.    
  135.     y_pred = rf.predict(x_train)
  136.     visualize_data(x_train, y_train, Y_unique, y_pred, rf, fr'$Train\ Data\ (n$_$estimators={n})$')
  137.     accuracy = accuracy_score(y_train, y_pred)*100
  138.     train_accuracy.append(accuracy)
  139.     accuracy = round(accuracy, 2)
  140.     display(Latex(fr'$RandomForestClassifier\ Train\ Accuracy: {accuracy}\%$'))
  141.  
  142.     y_pred = rf.predict(x_test)
  143.     visualize_data(x_test, y_test, Y_unique, y_pred, rf, fr'$Test\ Data\ (n$_$estimators={n})$')
  144.     accuracy = accuracy_score(y_test, y_pred)*100
  145.     test_accuracy.append(accuracy)
  146.     accuracy = round(accuracy, 2)
  147.     display(Latex(fr'$RandomForestClassifier\ Test\ Accuracy: {accuracy}\%$'))
  148.  
  149.  
  150.     # Sklearn
  151.     rf_classifier = RFC(n_estimators=n, max_depth=5, min_samples_split=2, n_jobs=-1)
  152.     rf_classifier.fit(x_train, y_train)
  153.  
  154.     y_pred = rf_classifier.predict(x_train)
  155.     visualize_data(x_train, y_train, Y_unique, y_pred, rf_classifier, fr'$Sklearn\ Train\ Data\ (n$_$estimators={n})$')
  156.     accuracy = accuracy_score(y_train, y_pred)*100
  157.     train_accuracy_sk.append(accuracy)
  158.     accuracy = round(accuracy, 2)
  159.     display(Latex(fr'$Sklearn\ RandomForestClassifier\ Train\ Accuracy: {accuracy}\%$'))
  160.  
  161.     y_pred = rf_classifier.predict(x_test)
  162.     visualize_data(x_test, y_test, Y_unique, y_pred, rf_classifier, fr'$Sklearn\ Test\ Data\ (n$_$estimators={n})$')
  163.     accuracy = accuracy_score(y_test, y_pred)*100
  164.     test_accuracy_sk.append(accuracy)
  165.     accuracy = round(accuracy, 2)
  166.     display(Latex(fr'$Sklearn\ RandomForestClassifier\ Test\ Accuracy: {accuracy}\%$'))
  167.  
  168.  
  169.  
  170.  
  171. quality_diagram(n_estimators, train_accuracy, title='Залежність\ якості\ алгоритму\ від\ кількості\ базових\ алгоритмів\ (Train\ Data)')
  172.  
  173. quality_diagram(n_estimators, test_accuracy, title='Залежність\ якості\ алгоритму\ від\ кількості\ базових\ алгоритмів\ (Test\ Data)')
  174.  
  175. quality_diagram(n_estimators, train_accuracy_sk, title='Залежність\ якості\ алгоритму\ від\ кількості\ базових\ алгоритмів\ (Sklearn\ Train\ Data)')
  176.  
  177. quality_diagram(n_estimators, test_accuracy_sk, title='Залежність\ якості\ алгоритму\ від\ кількості\ базових\ алгоритмів\ (Sklearn\ Test\ Data)')
  178.  
  179.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement