Proj

# Mount Google Drive (Optional, for saving model)
from google.colab import drive
drive.mount('/content/drive')

# Import Libraries
import os
import zipfile
import cv2
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt

# Dataset Parameters
IMG_HEIGHT, IMG_WIDTH = 100, 100  # Resize images to 100x100
DATASET_ZIP = '/content/lfw-deepfunneled.zip'  # Path to dataset zip file
EXTRACTED_FOLDER = '/content/lfw-deepfunneled'

# Step 1: Load and Preprocess Dataset
def load_lfw_data(data_dir):
    X, y = [], []
    for person in os.listdir(data_dir):
        person_dir = os.path.join(data_dir, person)
        if os.path.isdir(person_dir):
            for img_name in os.listdir(person_dir):
                img_path = os.path.join(person_dir, img_name)
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.resize(img, (IMG_HEIGHT, IMG_WIDTH))  # Resize image
                    img = img / 255.0  # Normalize pixel values to [0, 1]
                    X.append(img)
                    y.append(person)
    return np.array(X), np.array(y)

# Unzip Dataset if Necessary
if not os.path.exists(EXTRACTED_FOLDER):
    print("Extracting dataset...")
    with zipfile.ZipFile(DATASET_ZIP, 'r') as zip_ref:
        zip_ref.extractall('/content/')
print("Dataset extracted!")

# Load Data
print("Loading data...")
X, y = load_lfw_data(EXTRACTED_FOLDER)

# Encode Labels
print("Encoding labels...")
le = LabelEncoder()
y_encoded = le.fit_transform(y)  # Convert string labels to integers
y_categorical = to_categorical(y_encoded)  # Convert to one-hot encoding

# Split Data into Train/Test Sets
print("Splitting data...")
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

# Step 2: Data Augmentation
print("Applying data augmentation...")
datagen = ImageDataGenerator(
    rotation_range=10,       # Random rotation
    width_shift_range=0.1,   # Horizontal shift
    height_shift_range=0.1,  # Vertical shift
    horizontal_flip=True     # Random horizontal flips
)
datagen.fit(X_train)

# Step 3: Build the Model
print("Building model...")
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Conv2D(256, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(y_categorical.shape[1], activation='softmax')  # Output layer
])

# Compile the Model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Model Summary
model.summary()

# Step 4: Train the Model
print("Training model...")
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    epochs=20,
    validation_data=(X_test, y_test),
    verbose=2
)

# Step 5: Evaluate the Model
print("Evaluating model...")
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

# Step 6: Analyze Predictions
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

# Classification Report
print("Classification Report:")
print(classification_report(y_test_classes, y_pred_classes, target_names=le.classes_))

# Confusion Matrix
print("Confusion Matrix:")
cm = confusion_matrix(y_test_classes, y_pred_classes)
plt.figure(figsize=(10, 8))
plt.title("Confusion Matrix")
plt.imshow(cm, cmap="viridis")
plt.colorbar()
plt.show()

# Step 7: Save the Model
model.save("lfw_funneled_5layer_model.h5")
print("Model saved as 'lfw_funneled_5layer_model.h5'")

# Optional: Visualize Accuracy and Loss
plt.figure(figsize=(12, 6))

# Plot Accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Plot Loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()