Lab_ML(10/02/25)

# %%
#import
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import  ListedColormap
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score

# %%
df_net = pd.read_csv('Social_Network_Ads.csv')
df_net.head()

# %%
df_net.drop(columns=['User ID'], inplace=True)
df_net.head()

# %%
df_net.describe()

# %%
sns.displot(df_net['EstimatedSalary'])

# %%
le = LabelEncoder()
df_net['Gender'] = le.fit_transform(df_net['Gender'])

# %%
df_net.corr()
sns.heatmap(df_net.corr())

# %%
df_net.drop(columns=['Gender'],inplace=True)
df_net

# %%
X = df_net.iloc[:, :-1].values
y = df_net.iloc[:, -1].values

# %%
X_train, X_test, y_train, y_test= train_test_split(X,y,test_size = 0.25, random_state = True)


# %%
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# %%
classifier = GaussianNB()
classifier.fit(X_train, y_train)

# %%
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

# %%
accuracy_score(y_test, y_pred)

# %%
print(f'Classification Reaport: \n{classification_report(y_test,y_pred)}')

# %%
print(f'F1 score :{f1_score(y_test,y_pred)}')

# %%
cf_matrix = confusion_matrix(y_test,y_pred)
sns.heatmap(cf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False)

# %%
y_pred_proba = classifier.predict_proba(X_test)[:,1]
precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba)

# %%
fig, ax = plt.subplots(figsize=(6,6))
ax.plot(recall,precision, label = "Naive Bayes Classifier", color = 'firebrick')
ax.set_title('Precison-Recall Curve')
ax.set_xlabel('Recall')
ax.set_ylabel('Precision')
plt.box(False)
ax.legend()

# %%
# Plot AUC/ROC curve
y_pred_proba = classifier.predict_proba(X_test)[:,1]
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred_proba)
fig, ax = plt.subplots(figsize=(6,6))
ax.plot(fpr, tpr, label='Naive Bayes Classification', color = 'firebrick')
ax.set_title("ROC Curve")
ax.set_xlabel('False Positive Rate')
ax.set_ylabel('True Positive Rate')
plt.box( False)
ax. legend();

# %%
# Predict purchase with Age(45) and Salary(97000)
print(classifier.predict(sc.transform([[45, 97000]])))

# %%
# Visualize prediction results on the training set
X_set, y_set = sc.inverse_transform(X_train), y_train

# Create a meshgrid for plotting
X1, X2 = np.meshgrid(
    np.arange(start=X_set[:, 0].min() - 10, stop=X_set[:, 0].max() + 10, step=1),
    np.arange(start=X_set[:, 1].min() - 1000, stop=X_set[:, 1].max() + 1000, step=1)
)

# Plot the decision boundary
plt.contourf(X1, X2,
             classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
             alpha=0.75, cmap=ListedColormap(['red', 'green']))  # Fixed colormap

# Set axis limits
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())

# Scatter plot for each class
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                c=ListedColormap(['red', 'green'])(i), label=j)

# Add labels, title, and legend
plt.title('Naive Bayes (Training set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()

# Show plot
plt.show()


# %%
# Visualize prediction results on the test set
X_set, y_set = sc.inverse_transform(X_test), y_test  # Use the test set

# Create a meshgrid for plotting
X1, X2 = np.meshgrid(
    np.arange(start=X_set[:, 0].min() - 10, stop=X_set[:, 0].max() + 10, step=1),
    np.arange(start=X_set[:, 1].min() - 1000, stop=X_set[:, 1].max() + 1000, step=1)
)

# Plot the decision boundary
plt.contourf(X1, X2,
             classifier.predict(sc.transform(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape),
             alpha=0.75, cmap=ListedColormap(['red', 'green']))  # Fixed colormap

# Set axis limits
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())

# Scatter plot for each class
for i, j in enumerate(np.unique(y_set)):
    plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                c=ListedColormap(['red', 'green'])(i), label=j)

# Add labels, title, and legend
plt.title('Naive Bayes (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()

# Show plot
plt.show()

# Predict class for Age = 65 and Salary = 160000
prediction = classifier.predict(sc.transform([[65, 160000]]))

# Map prediction to label (if 0 = "Not Purchased" and 1 = "Purchased")
labels = ['Not Purchased', 'Purchased']  # Assuming class 0 is 'Not Purchased' and class 1 is 'Purchased'
print(f'Prediction: {labels[prediction[0]]}')