PHOTO-VIDEO.pyw

import tkinter as tk
from tkinter import ttk, filedialog, messagebox
from bs4 import BeautifulSoup
import requests
from PIL import Image, ImageTk
import io
from urllib.parse import urljoin, urlparse
import os
import subprocess


class WebScraperApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Najeeb Images & Videos Web Scraper")
        self.root.geometry("1000x700")
        self.root.configure(bg="#f0f0f0")  # Light gray background

        # Top Frame for URL Entry and Buttons
        self.top_frame = ttk.Frame(self.root, style="Top.TFrame")
        self.top_frame.pack(fill=tk.X, padx=10, pady=10)

        # URL Entry Field
        self.url_label = ttk.Label(self.top_frame, text="Enter URL:", font=("Arial", 12), background="#ffffff")
        self.url_label.pack(side=tk.LEFT, padx=(0, 5))
        self.url_entry = ttk.Entry(self.top_frame, width=50, font=("Arial", 12))
        self.url_entry.pack(side=tk.LEFT, padx=(0, 10))

        # Scrape Media Button (Green)
        self.scrape_image_btn = ttk.Button(
            self.top_frame, text="Scrape Media", command=self.process_url, style="Green.TButton"
        )
        self.scrape_image_btn.pack(side=tk.LEFT, padx=(0, 5))

        # Scrape Links Button (Blue)
        self.scrape_links_btn = ttk.Button(
            self.top_frame, text="Scrape Links", command=self.scrape_links, style="Blue.TButton"
        )
        self.scrape_links_btn.pack(side=tk.LEFT, padx=(0, 5))

        # Scrape All Links Button (Orange)
        self.scrape_all_links_btn = ttk.Button(
            self.top_frame, text="Scrape All Links", command=self.scrape_all_button, style="Orange.TButton"
        )
        self.scrape_all_links_btn.pack(side=tk.LEFT, padx=(0, 5))

        # Clear Results and URL Button (Gray)
        self.clear_button = ttk.Button(
            self.top_frame, text="Clear", command=self.clear_fields_and_results, style="Gray.TButton"
        )
        self.clear_button.pack(side=tk.LEFT, padx=(0, 5))

        # Save Results Button (Red)
        self.save_results_btn = ttk.Button(
            self.top_frame, text="Save", command=self.save_results, style="Red.TButton"
        )
        self.save_results_btn.pack(side=tk.LEFT)

        # Left Frame for Results with Scrollbar
        self.left_frame = ttk.Frame(self.root, style="Left.TFrame")
        self.left_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=False, padx=10, pady=10)

        self.result_label = ttk.Label(self.left_frame, text="Results:", font=("Arial", 12), background="#ffffff")
        self.result_label.pack(anchor=tk.W)

        # Scrollable Listbox for Results
        self.result_scrollbar = ttk.Scrollbar(self.left_frame, orient=tk.VERTICAL)
        self.result_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
        self.result_listbox = tk.Listbox(
            self.left_frame,
            width=40,
            height=20,
            font=("Arial", 10),
            yscrollcommand=self.result_scrollbar.set
        )
        self.result_listbox.pack(fill=tk.BOTH, expand=True)
        self.result_scrollbar.config(command=self.result_listbox.yview)

        # Double-click to preview image, play video/M3U in VLC
        self.result_listbox.bind("<Double-Button-1>", self.handle_double_click)
        # Browse Button (Yellow)
        self.browse_button = ttk.Button(
            self.left_frame, text="Browse Text File", command=self.browse_text_file, style="Yellow.TButton"
        )
        self.browse_button.pack(side=tk.LEFT, padx=(0, 5))
        # Download Button in Left Frame (Purple)
        self.download_btn = ttk.Button(
            self.left_frame, text="Download Selected Image", command=self.download_image, style="Purple.TButton"
        )
        self.download_btn.pack(side=tk.LEFT)

        # Right Frame for Image Display
        self.right_frame = ttk.Frame(self.root, style="Right.TFrame")
        self.right_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True, padx=10, pady=10)

        self.image_label = ttk.Label(self.right_frame, text="Image Preview", anchor=tk.CENTER, font=("Arial", 12))
        self.image_label.pack(expand=True)

        # Variables
        self.images = []  # To store scraped image URLs
        self.videos = []  # To store scraped video URLs
        self.m3u_urls = []  # To store scraped M3U URLs
        self.links = []  # To store scraped links

        # Styling
        self.style = ttk.Style()
        self.style.configure("Top.TFrame", background="#ffffff")
        self.style.configure("Left.TFrame", background="#e0e0e0")
        self.style.configure("Right.TFrame", background="#d0d0d0")

        # Button Styles
        self.style.configure("Green.TButton", font=("Arial", 10), background="#4CAF50", foreground="black")
        self.style.configure("Blue.TButton", font=("Arial", 10), background="#008CBA", foreground="yellow")
        self.style.configure("Orange.TButton", font=("Arial", 10), background="#FF9800", foreground="green")
        self.style.configure("Purple.TButton", font=("Arial", 10), background="#9C27B0", foreground="red")
        self.style.configure("Red.TButton", font=("Arial", 10), background="#F44336", foreground="blue")
        self.style.configure("Gray.TButton", font=("Arial", 10), background="#808080", foreground="red")
        self.style.configure("Yellow.TButton", font=("Arial", 10), background="#FFEB3B", foreground="black")

    def process_url(self):
        """Process the entered URL and scrape media."""
        url = self.url_entry.get().strip()
        if not url:
            messagebox.showwarning("Input Error", "Please enter a valid URL.")
            return
        self.images.clear()
        self.videos.clear()
        self.m3u_urls.clear()
        self.result_listbox.delete(0, tk.END)
        try:
            parsed = urlparse(url)
            ext = os.path.splitext(parsed.path)[1].lower()

            if ext in ['.m3u', '.m3u8']:
                scraped_media = self.scrape_m3u_playlist(url)
            else:
                scraped_media = self.scrape_normal_site(url)

            self.images.extend(scraped_media["images"])
            self.videos.extend(scraped_media["videos"])
            self.m3u_urls.extend(scraped_media["m3u"])

            # Populate listbox with scraped media URLs
            for media_url in scraped_media["images"]:
                self.result_listbox.insert(tk.END, media_url)
            for video_url in scraped_media["videos"]:
                self.result_listbox.insert(tk.END, video_url)
            for m3u_url in scraped_media["m3u"]:
                self.result_listbox.insert(tk.END, m3u_url)

            total_media = len(self.images) + len(self.videos) + len(self.m3u_urls)
            if total_media == 0:
                messagebox.showinfo("Info", "No media URLs found.")
            else:
                messagebox.showinfo("Success", f"{total_media} media URL(s) found!")
        except Exception as e:
            messagebox.showerror("Error", str(e))

    def scrape_normal_site(self, url):
        """Scrape images, videos, and M3U URLs from a normal site."""
        found_images = set()
        found_videos = set()
        found_m3u = set()
        try:
            response = requests.get(url, timeout=10)
            if response.status_code != 200:
                return {"images": found_images, "videos": found_videos, "m3u": found_m3u}

            soup = BeautifulSoup(response.text, 'html.parser')
            for tag in soup.find_all(['img', 'video', 'source', 'a']):
                src = tag.get('src') or tag.get('href')
                if src:
                    full_url = urljoin(url, src)
                    parsed = urlparse(full_url)
                    ext = os.path.splitext(parsed.path)[1].lower()
                    if ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg', '.ico']:
                        found_images.add(full_url)
                    elif ext in ['.mp4', '.mkv', '.avi', '.mov', '.flv', '.wmv']:
                        found_videos.add(full_url)
                    elif ext in ['.m3u', '.m3u8']:
                        found_m3u.add(full_url)
        except Exception as e:
            print(f"Error scraping site: {e}")
        return {"images": found_images, "videos": found_videos, "m3u": found_m3u}

    def scrape_m3u_playlist(self, url):
        """Scrape and parse an M3U playlist file."""
        found_m3u = set()
        try:
            response = requests.get(url, timeout=10)
            if response.status_code != 200:
                return {"images": set(), "videos": set(), "m3u": found_m3u}

            # Parse the M3U content
            lines = response.text.splitlines()
            for line in lines:
                line = line.strip()
                if line.startswith("http"):
                    found_m3u.add(line)
        except Exception as e:
            print(f"Error scraping M3U playlist: {e}")
        return {"images": set(), "videos": set(), "m3u": found_m3u}

    def handle_double_click(self, event):
        """Handle double-click on a result item."""
        selected_index = self.result_listbox.curselection()
        if not selected_index:
            return
        selected_url = self.result_listbox.get(selected_index)
        parsed = urlparse(selected_url)
        ext = os.path.splitext(parsed.path)[1].lower()

        if ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg', '.ico']:
            # Display image preview
            try:
                response = requests.get(selected_url)
                image_data = Image.open(io.BytesIO(response.content))
                image_data.thumbnail((600, 600))  # Resize for display
                photo = ImageTk.PhotoImage(image_data)
                self.image_label.config(image=photo)
                self.image_label.image = photo  # Keep reference to avoid garbage collection
            except Exception as e:
                print(f"Error loading image: {e}")
        elif ext in ['.mp4', '.mkv', '.avi', '.mov', '.flv', '.wmv', '.m3u', '.m3u8']:
            # Open video or M3U in VLC
            vlc_path = r"C:\Program Files\VideoLAN\VLC\vlc.exe"
            if os.path.exists(vlc_path):
                try:
                    subprocess.Popen([vlc_path, selected_url])
                except Exception as e:
                    messagebox.showerror("Error", f"Failed to open VLC: {e}")
            else:
                messagebox.showerror("Error", "VLC Media Player not found at the specified path.")

    def download_image(self):
        """Download the selected image."""
        selected_index = self.result_listbox.curselection()
        if not selected_index:
            return
        selected_url = self.result_listbox.get(selected_index)
        save_path = filedialog.asksaveasfilename(defaultextension=".jpg", filetypes=[("JPEG", "*.jpg"), ("PNG", "*.png")])
        if not save_path:
            return
        try:
            response = requests.get(selected_url)
            with open(save_path, "wb") as f:
                f.write(response.content)
            print(f"Image saved to {save_path}")
        except Exception as e:
            print(f"Error downloading image: {e}")

    def save_results(self):
        """Save the results (images, videos, M3U URLs) to a file."""
        # Get all items from the result listbox
        results = self.result_listbox.get(0, tk.END)
        if not results:
            messagebox.showwarning("No Results", "No results to save.")
            return

        # Ask user for file type (Text or M3U)
        file_type = messagebox.askquestion(
            "Save Format", "Save as Text (Yes) or M3U (No)?", icon="question"
        )

        # Open file dialog to choose save location
        file_path = filedialog.asksaveasfilename(
            defaultextension=".txt" if file_type == "yes" else ".m3u",
            filetypes=[
                ("Text File", "*.txt"),
                ("M3U Playlist", "*.m3u"),
            ],
        )
        if not file_path:
            return  # User canceled the save dialog

        try:
            # Write results to the selected file
            with open(file_path, "w", encoding="utf-8") as f:
                if file_type == "yes":  # Save as plain text
                    f.write("\n".join(results))
                else:  # Save as M3U format
                    f.write("#EXTM3U\n")  # M3U header
                    for result in results:
                        f.write(f"#EXTINF:-1,{result}\n")  # Metadata line
                        f.write(f"{result}\n")  # URL line
            messagebox.showinfo("Success", f"Results saved to {file_path}")
        except Exception as e:
            messagebox.showerror("Error", f"Failed to save results: {e}")

    def clear_fields_and_results(self):
        """Clear the URL entry field, result listbox, and reset image preview."""
        # Clear URL entry field
        self.url_entry.delete(0, tk.END)
        # Clear result listbox
        self.result_listbox.delete(0, tk.END)
        # Reset internal storage
        self.images.clear()
        self.videos.clear()
        self.m3u_urls.clear()
        # Reset image preview
        self.image_label.config(text="Image Preview", image=None)
        self.image_label.image = None  # Clear reference to avoid memory leaks
        # Notify user
        messagebox.showinfo("Cleared", "URL and results have been cleared.")


    def scrape_all_links(self, url):
        """Scrape all links from the given webpage."""
        all_links = set()
        try:
            response = requests.get(url, timeout=10)
            soup = BeautifulSoup(response.text, 'html.parser')
            for a_tag in soup.find_all('a', href=True):
                full_url = urljoin(url, a_tag['href'])
                all_links.add(full_url)
        except Exception as e:
            print(f"[Scrape All Error] {e}")
        return all_links

    def scrape_links(self):
        """Scrape all links from the given webpage."""
        url = self.url_entry.get()
        if not url:
            return

        try:
            response = requests.get(url)
            soup = BeautifulSoup(response.content, 'html.parser')
            self.links = [a['href'] for a in soup.find_all('a') if 'href' in a.attrs]

            # Clear previous results
            self.result_listbox.delete(0, tk.END)

            # Populate listbox with links
            for link in self.links:
                self.result_listbox.insert(tk.END, link)
        except Exception as e:
            print(f"Error scraping links: {e}")

    def scrape_all_button(self):
        """Scrape all links from the given webpage."""
        url = self.url_entry.get().strip()
        if not url:
            messagebox.showwarning("Input Error", "Please enter a valid URL.")
            return

        self.result_listbox.delete(0, tk.END)

        try:
            all_links = self.scrape_all_links(url)
            self.links.extend(all_links)

            # Populate listbox with links
            for link in all_links:
                self.result_listbox.insert(tk.END, link)

            messagebox.showinfo("Done", f"{len(all_links)} total link(s) scraped.")
        except Exception as e:
            messagebox.showerror("Error", str(e))

    def browse_text_file(self):
        """Open a text file and display its contents in the result listbox."""
        file_path = filedialog.askopenfilename(
            filetypes=[("Text Files", "*.txt"), ("All Files", "*.*")]
        )
        if not file_path:
            return  # User canceled the file dialog

        try:
            # Clear existing results
            self.result_listbox.delete(0, tk.END)

            # Read the file and populate the listbox
            with open(file_path, "r", encoding="utf-8") as file:
                lines = file.readlines()
                for line in lines:
                    line = line.strip()  # Remove leading/trailing whitespace
                    if line:  # Ignore empty lines
                        self.result_listbox.insert(tk.END, line)

            messagebox.showinfo("Success", f"Contents of '{file_path}' loaded successfully.")
        except Exception as e:
            messagebox.showerror("Error", f"Failed to load file: {e}")


if __name__ == "__main__":
    root = tk.Tk()
    app = WebScraperApp(root)
    root.mainloop()