Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # %%
- #import
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- from matplotlib.colors import ListedColormap
- import seaborn as sns
- from sklearn.preprocessing import LabelEncoder
- from sklearn.preprocessing import StandardScaler
- from sklearn.model_selection import train_test_split
- from sklearn.naive_bayes import GaussianNB
- from sklearn import metrics
- from sklearn.metrics import accuracy_score
- from sklearn.metrics import classification_report
- from sklearn.metrics import precision_recall_curve
- from sklearn.metrics import confusion_matrix
- from sklearn.metrics import f1_score
- # %%
- df_net = pd.read_csv('Social_Network_Ads.csv')
- df_net.head()
- # %%
- df_net.drop(columns=['User ID'], inplace=True)
- df_net.head()
- # %%
- df_net.describe()
- # %%
- sns.displot(df_net['EstimatedSalary'])
- # %%
- le = LabelEncoder()
- df_net['Gender'] = le.fit_transform(df_net['Gender'])
- # %%
- df_net.corr()
- sns.heatmap(df_net.corr())
- # %%
- df_net.drop(columns=['Gender'],inplace=True)
- df_net
- # %%
- X = df_net.iloc[:, :-1].values
- y = df_net.iloc[:, -1].values
- # %%
- X_train, X_test, y_train, y_test= train_test_split(X,y,test_size = 0.25, random_state = True)
- # %%
- sc = StandardScaler()
- X_train = sc.fit_transform(X_train)
- X_test = sc.transform(X_test)
- # %%
- classifier = GaussianNB()
- classifier.fit(X_train, y_train)
- # %%
- y_pred = classifier.predict(X_test)
- print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))
- # %%
- accuracy_score(y_test, y_pred)
- # %%
- print(f'Classification Reaport: \n{classification_report(y_test,y_pred)}')
- # %%
- print(f'F1 score :{f1_score(y_test,y_pred)}')
- # %%
- cf_matrix = confusion_matrix(y_test,y_pred)
- sns.heatmap(cf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False)
- # %%
- y_pred_proba = classifier.predict_proba(X_test)[:,1]
- precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba)
- # %%
- fig, ax = plt.subplots(figsize=(6,6))
- ax.plot(recall,precision, label = "Naive Bayes Classifier", color = 'firebrick')
- ax.set_title('Precison-Recall Curve')
- ax.set_xlabel('Recall')
- ax.set_ylabel('Precision')
- plt.box(False)
- ax.legend()
- # %%
- # Plot AUC/ROC curve
- y_pred_proba = classifier.predict_proba(X_test)[:,1]
- fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred_proba)
- fig, ax = plt.subplots(figsize=(6,6))
- ax.plot(fpr, tpr, label='Naive Bayes Classification', color = 'firebrick')
- ax.set_title("ROC Curve")
- ax.set_xlabel('False Positive Rate')
- ax.set_ylabel('True Positive Rate')
- plt.box( False)
- ax. legend();
- # %%
- # Predict purchase with Age(45) and Salary(97000)
- print(classifier.predict(sc.transform([[45, 97000]])))
- # %%
- # Visualize prediction results on the training set
- X_set, y_set = sc.inverse_transform(X_train), y_train
- # Create a meshgrid for plotting
- X1, X2 = np.meshgrid(
- np.arange(start=X_set[:, 0].min() - 10, stop=X_set[:, 0].max() + 10, step=1),
- np.arange(start=X_set[:, 1].min() - 1000, stop=X_set[:, 1].max() + 1000, step=1)
- )
- # Plot the decision boundary
- plt.contourf(X1, X2,
- classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
- alpha=0.75, cmap=ListedColormap(['red', 'green'])) # Fixed colormap
- # Set axis limits
- plt.xlim(X1.min(), X1.max())
- plt.ylim(X2.min(), X2.max())
- # Scatter plot for each class
- for i, j in enumerate(np.unique(y_set)):
- plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
- c=ListedColormap(['red', 'green'])(i), label=j)
- # Add labels, title, and legend
- plt.title('Naive Bayes (Training set)')
- plt.xlabel('Age')
- plt.ylabel('Estimated Salary')
- plt.legend()
- # Show plot
- plt.show()
- # %%
- # Visualize prediction results on the test set
- X_set, y_set = sc.inverse_transform(X_test), y_test # Use the test set
- # Create a meshgrid for plotting
- X1, X2 = np.meshgrid(
- np.arange(start=X_set[:, 0].min() - 10, stop=X_set[:, 0].max() + 10, step=1),
- np.arange(start=X_set[:, 1].min() - 1000, stop=X_set[:, 1].max() + 1000, step=1)
- )
- # Plot the decision boundary
- plt.contourf(X1, X2,
- classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
- alpha=0.75, cmap=ListedColormap(['red', 'green'])) # Fixed colormap
- # Set axis limits
- plt.xlim(X1.min(), X1.max())
- plt.ylim(X2.min(), X2.max())
- # Scatter plot for each class
- for i, j in enumerate(np.unique(y_set)):
- plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
- c=ListedColormap(['red', 'green'])(i), label=j)
- # Add labels, title, and legend
- plt.title('Naive Bayes (Test set)')
- plt.xlabel('Age')
- plt.ylabel('Estimated Salary')
- plt.legend()
- # Show plot
- plt.show()
- # Predict class for Age = 65 and Salary = 160000
- prediction = classifier.predict(sc.transform([[65, 160000]]))
- # Map prediction to label (if 0 = "Not Purchased" and 1 = "Purchased")
- labels = ['Not Purchased', 'Purchased'] # Assuming class 0 is 'Not Purchased' and class 1 is 'Purchased'
- print(f'Prediction: {labels[prediction[0]]}')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement