SCRAPE-URLS-DOWNLOAD.pyw

import os
import requests
from bs4 import BeautifulSoup
from tkinter import *
from tkinter import messagebox, filedialog
from urllib.parse import urljoin, urlparse
import yt_dlp
import subprocess
from PIL import Image, ImageTk
import io
import threading

stop_download_flag = False
#================ADD-IMAGE-ICON=================
import sys

def resource_path(relative_path):
    """ Get the absolute path to the resource, works for PyInstaller. """
    if getattr(sys, '_MEIPASS', False):
        return os.path.join(sys._MEIPASS, relative_path)
    return os.path.join(os.path.abspath("."), relative_path)

# Use this function to load files:
#splash_image = resource_path("splash-1.png")
icon_path = resource_path("D.ico")
#================ADD-IMAGE-ICON=================
media_urls = []

special_sites = ['youtube.com', 'youtu.be', 'facebook.com', 'fb.watch', 'tiktok.com', 'instagram.com']
image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg', '.ico']
video_exts = ['.mp4', '.webm', '.ogg', '.mov', '.avi', '.mkv', '.flv', '.3gp', '.wmv', '.m3u', '.m3u8']

def is_special_site(url):
    return any(domain in url for domain in special_sites)

def browse_url_file():
    file_path = filedialog.askopenfilename(title="Open URL File", filetypes=[("Text files", "*.txt")])
    if file_path:
        with open(file_path, 'r') as f:
            for line in f:
                url = line.strip()
                if url and url not in media_urls:
                    media_urls.append(url)
                    result_box.insert(END, url + "\n")

def save_urls_to_file():
    file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text files", "*.txt")])
    if file_path:
        with open(file_path, 'w') as f:
            f.write(result_box.get("1.0", END).strip())
        messagebox.showinfo("Saved", f"URLs saved to {file_path}")

def scrape_normal_site(url):
    found_urls = set()
    try:
        response = requests.get(url, timeout=10)
        if response.status_code != 200:
            return found_urls

        soup = BeautifulSoup(response.text, 'html.parser')
        for tag in soup.find_all(['img', 'video', 'source', 'a']):
            src = tag.get('src') or tag.get('href')
            if src:
                full_url = urljoin(url, src)
                parsed = urlparse(full_url)
                ext = os.path.splitext(parsed.path)[1].lower()
                if ext in image_exts + video_exts:
                    found_urls.add(full_url)
    except Exception:
        pass
    return found_urls

def process_url():
    url = url_entry.get().strip()
    if not url:
        messagebox.showwarning("Input Error", "Please enter a valid URL.")
        return

    media_urls.clear()
    result_box.delete("1.0", END)

    try:
        if is_special_site(url):
            ydl_opts = {
                'quiet': True,
                'skip_download': True,
                'force_generic_extractor': False
            }
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                info = ydl.extract_info(url, download=False)
                if 'entries' in info:
                    for entry in info['entries']:
                        media_urls.append(entry['webpage_url'])
                        result_box.insert(END, entry['webpage_url'] + "\n")
                else:
                    media_urls.append(info['webpage_url'])
                    result_box.insert(END, info['webpage_url'] + "\n")
        else:
            scraped = scrape_normal_site(url)
            media_urls.extend(scraped)
            for media_url in scraped:
                result_box.insert(END, media_url + "\n")

        if not media_urls:
            messagebox.showinfo("Info", "No media URLs found.")
        else:
            messagebox.showinfo("Success", f"{len(media_urls)} media URL(s) found!")

    except Exception as e:
        messagebox.showerror("Error", str(e))

def download_media(url, save_path):
    try:
        if is_special_site(url):
            ytdlp_path = r"C:\Windows\yt-dlp.exe"  # Replace if needed
            command = [
                ytdlp_path,
                "-f", "best",
                "--no-playlist",
                "--extractor-args", "youtube:player_client=web",
                "-o", save_path,
                url
            ]
            result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
            if result.returncode != 0:
                raise Exception(result.stderr.strip())
        else:
            response = requests.get(url, stream=True)
            if response.status_code == 200:
                with open(save_path, 'wb') as f:
                    for chunk in response.iter_content(1024):
                        f.write(chunk)
    except Exception as e:
        messagebox.showerror("Download Error", f"Failed to download:\n{url}\n\n{str(e)}")

def download_selected_line():
    try:
        line_index = result_box.index(INSERT).split(".")[0]
        selected_url = result_box.get(f"{line_index}.0", f"{line_index}.end").strip()
        if not selected_url:
            raise Exception("No line selected.")

        folder = filedialog.askdirectory(title="Select Folder to Save File")
        if not folder:
            return

        parsed = urlparse(selected_url)
        filename = os.path.basename(parsed.path)
        if not filename:
            filename = "downloaded_file"

        save_path = os.path.join(folder, filename)
        threading.Thread(target=threaded_download, args=(selected_url, save_path), daemon=True).start()

    except Exception as e:
        messagebox.showerror("Error", str(e))

def download_selected():
    selected_urls = result_box.get("1.0", END).strip().splitlines()
    if not selected_urls:
        messagebox.showwarning("Selection Error", "No URLs to download.")
        return

    selected = filedialog.askdirectory(title="Select Folder to Save Files")
    if not selected:
        return

    for url in selected_urls:
        parsed = urlparse(url)
        filename = os.path.basename(parsed.path)
        if not filename:
            filename = "downloaded_file.mp4"

        save_path = os.path.join(selected, filename)
        download_media(url, save_path)

    messagebox.showinfo("Download Complete", f"Downloaded {len(selected_urls)} media files.")

from threading import Thread
stop_flag = False

def fix_partial_video(input_path):
    try:
        if not os.path.exists(input_path) or not input_path.lower().endswith(".mp4"):
            return
        output_path = input_path.replace(".mp4", "_fixed.mp4")

        ffmpeg_path = r"C:\Program Files\ffmpeg\bin\ffmpeg.exe"  # ✅ Your FFmpeg location here

        # Try quick remux
        command = [
            ffmpeg_path,
            "-y",
            "-i", input_path,
            "-c", "copy",
            "-movflags", "+faststart",
            output_path
        ]
        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

        # Fallback to re-encode if remux fails or small file
        if result.returncode != 0 or not os.path.exists(output_path) or os.path.getsize(output_path) < 1024 * 1024:
            print("[INFO] Remux failed or file too small, retrying with re-encode...")
            command = [
                ffmpeg_path,
                "-y",
                "-i", input_path,
                "-preset", "ultrafast",
                output_path
            ]
            subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

        # Replace original file if fixed
        if os.path.exists(output_path):
            os.remove(input_path)
            os.rename(output_path, input_path)

    except Exception as e:
        print(f"[FFmpeg Fix Error] {e}")

def threaded_download(url, save_path):
    global stop_download_flag
    stop_download_flag = False
    try:
        if is_special_site(url):
            ytdlp_path = r"C:\Windows\yt-dlp.exe"  # ✅ Ensure correct path
            command = [
                ytdlp_path,
                "-f", "mp4",
                "--no-part",  # Saves directly as .mp4
                "--downloader", "ffmpeg",
                "--downloader-args", "ffmpeg_i:-movflags +faststart",
                "-o", save_path,
                url
            ]
            proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
            while proc.poll() is None:
                if stop_download_flag:
                    proc.kill()
                    break

        else:
            response = requests.get(url, stream=True, timeout=10)
            if response.status_code == 200:
                with open(save_path, 'wb') as f:
                    for chunk in response.iter_content(1024 * 1024):  # 1MB
                        if stop_download_flag:
                            break
                        if chunk:
                            f.write(chunk)

        if stop_download_flag:
            fix_partial_video(save_path)  # Try to repair it
            messagebox.showinfo("Download Stopped", f"Download was stopped by user.\nSaved: {save_path}")
        else:
            messagebox.showinfo("Download Complete", f"Downloaded successfully to:\n{save_path}")

    except Exception as e:
        messagebox.showerror("Download Error", str(e))

def start_download(url, save_path):
    global stop_flag
    stop_flag = False
    Thread(target=threaded_download, args=(url, save_path)).start()

def stop_download():
    global stop_download_flag
    stop_download_flag = True

def scrape_all_links(url):
    all_links = set()
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.text, 'html.parser')
        for a_tag in soup.find_all('a', href=True):
            full_url = urljoin(url, a_tag['href'])
            all_links.add(full_url)
    except Exception as e:
        print(f"[Scrape All Error] {e}")
    return all_links

def scrape_all_button():
    url = url_entry.get().strip()
    if not url:
        messagebox.showwarning("Input Error", "Please enter a valid URL.")
        return

    result_box.delete("1.0", END)
    media_urls.clear()
    all_links = scrape_all_links(url)
    media_urls.extend(all_links)
    for link in all_links:
        result_box.insert(END, link + "\n")
    messagebox.showinfo("Done", f"{len(all_links)} total link(s) scraped.")

def open_in_vlc():
    line_index = result_box.index(INSERT).split(".")[0]
    selected_url = result_box.get(f"{line_index}.0", f"{line_index}.end").strip()
    if not selected_url:
        messagebox.showwarning("No Selection", "Select a valid media URL.")
        return

    vlc_path = r"C:\Program Files\VideoLAN\VLC\vlc.exe"
    try:
        subprocess.Popen([vlc_path, selected_url])
    except Exception as e:
        messagebox.showerror("VLC Error", f"Could not open VLC:\n{e}")
def preview_image_popup():
    try:
        line_index = result_box.index(INSERT).split(".")[0]
        selected_url = result_box.get(f"{line_index}.0", f"{line_index}.end").strip()
        if not selected_url.lower().endswith(tuple(image_exts)):
            raise Exception("Selected link is not an image.")

        response = requests.get(selected_url, timeout=10)
        image = Image.open(io.BytesIO(response.content))

        popup = Toplevel(root)
        popup.title("Image Preview")
        popup.geometry("600x600")

        img_resized = image.resize((500, 500), Image.ANTIALIAS)
        img_tk = ImageTk.PhotoImage(img_resized)

        label = Label(popup, image=img_tk)
        label.image = img_tk
        label.pack()

    except Exception as e:
        messagebox.showerror("Preview Error", str(e))
def clear_url_field():
    url_entry.delete(0, END)

def clear_result_box():
    result_box.delete("1.0", END)
    media_urls.clear()

def load_m3u_file():
    file_path = filedialog.askopenfilename(title="Open M3U File", filetypes=[("M3U/M3U8 Files", "*.m3u *.m3u8")])
    if file_path:
        result_box.delete("1.0", END)
        media_urls.clear()
        with open(file_path, 'r', encoding="utf-8", errors="ignore") as f:
            for line in f:
                url = line.strip()
                if url and url.startswith("http"):
                    media_urls.append(url)
                    result_box.insert(END, url + "\n")
        messagebox.showinfo("Loaded", f"{len(media_urls)} media URLs loaded from playlist.")

def load_online_m3u():
    url = url_entry.get().strip()
    if not url.lower().endswith((".m3u", ".m3u8")):
        messagebox.showwarning("URL Error", "Please enter a valid .m3u or .m3u8 URL.")
        return

    try:
        response = requests.get(url, timeout=10)
        if response.status_code != 200:
            raise Exception("Unable to fetch playlist.")

        result_box.delete("1.0", END)
        media_urls.clear()

        for line in response.text.splitlines():
            line = line.strip()
            if line and line.startswith("http"):
                media_urls.append(line)
                result_box.insert(END, line + "\n")

        messagebox.showinfo("Online M3U Loaded", f"{len(media_urls)} stream(s) loaded.")

    except Exception as e:
        messagebox.showerror("Error", str(e))

def save_as_m3u():
    file_path = filedialog.asksaveasfilename(defaultextension=".m3u", filetypes=[("Text File", "*.txt"), ("M3U Playlist", "*.m3u"), ("M3U8 Playlist", "*.m3u8")])
    if file_path:
        with open(file_path, 'w', encoding="utf-8") as f:
            f.write(result_box.get("1.0", END).strip())
        messagebox.showinfo("Saved", f"Playlist saved to:\n{file_path}")

def scrape_xtream_m3u_url():
    url = url_entry.get().strip()
    if not url or "get.php" not in url:
        messagebox.showwarning("Input Error", "Please enter a valid Xtream M3U URL.")
        return

    try:
        headers = {
            "User-Agent": "VLC/3.0.18 LibVLC/3.0.18"
        }
        response = requests.get(url, headers=headers, timeout=15)

        if response.status_code == 404:
            raise Exception("404 Not Found — the playlist URL might be wrong or expired.")
        if response.status_code != 200:
            raise Exception(f"Failed to fetch playlist. Status code: {response.status_code}")

        content = response.text
        if "#EXTM3U" not in content:
            raise Exception("Invalid playlist. No M3U content found.")

        result_box.delete("1.0", END)
        media_urls.clear()

        for line in content.splitlines():
            if line.startswith("http"):
                media_urls.append(line)
                result_box.insert(END, line + "\n")

        if media_urls:
            messagebox.showinfo("Success", f"Scraped {len(media_urls)} stream URLs from Xtream playlist.")
        else:
            messagebox.showwarning("No URLs", "Playlist loaded, but no stream URLs found.")

    except Exception as e:
        messagebox.showerror("Error", str(e))
def search_urls():
    query = search_entry.get().strip().lower()
    if not query:
        return
    result_box.tag_remove("highlight", "1.0", END)

    lines = result_box.get("1.0", END).splitlines()
    for i, line in enumerate(lines, 1):
        if query in line.lower():
            result_box.tag_add("highlight", f"{i}.0", f"{i}.end")

    result_box.tag_config("highlight", background="yellow", foreground="black")

def clear_search():
    search_entry.delete(0, END)
    result_box.tag_remove("highlight", "1.0", END)

def scrape_directory_media(url):
    """
    Scrape media URLs from subdirectories of the given URL.
    :param url: The base URL to start scraping from.
    """
    global media_urls
    result_box.delete("1.0", END)
    media_urls.clear()

    def extract_directories(soup, base_url):
        """
        Extract directory links from the page.
        :param soup: BeautifulSoup object of the page.
        :param base_url: Base URL to resolve relative paths.
        :return: List of directory URLs.
        """
        directories = []
        for a_tag in soup.find_all('a', href=True):
            href = a_tag['href']
            if href.endswith("/") and not href.startswith("#"):  # Subdirectory link
                full_href = urljoin(base_url, href)
                if full_href != base_url:  # Avoid infinite loops
                    directories.append(full_href)
        return directories

    def extract_media_urls(soup, base_url):
        """
        Extract media URLs from the page.
        :param soup: BeautifulSoup object of the page.
        :param base_url: Base URL to resolve relative paths.
        :return: Set of media URLs.
        """
        media_links = set()
        for tag in soup.find_all(['img', 'video', 'source', 'a']):
            src = tag.get('src') or tag.get('href')
            if src:
                full_url = urljoin(base_url, src)
                parsed = urlparse(full_url)
                ext = os.path.splitext(parsed.path)[1].lower()
                if ext in image_exts + video_exts:
                    media_links.add(full_url)
        return media_links

    try:
        # Fetch the base URL content
        response = requests.get(url, timeout=10)
        if response.status_code != 200:
            messagebox.showerror("Error", f"Failed to fetch {url} (Status Code: {response.status_code})")
            return

        soup = BeautifulSoup(response.text, 'html.parser')

        # Step 1: Extract all subdirectories
        directories = extract_directories(soup, url)

        # Step 2: Scrape media URLs from each subdirectory
        found_media = False
        for directory in directories:
            try:
                dir_response = requests.get(directory, timeout=10)
                if dir_response.status_code == 200:
                    dir_soup = BeautifulSoup(dir_response.text, 'html.parser')
                    media_links = extract_media_urls(dir_soup, directory)
                    if media_links:
                        found_media = True
                        for media_url in media_links:
                            if media_url not in media_urls:
                                media_urls.append(media_url)
                                result_box.insert(END, media_url + "\n")
            except Exception as e:
                print(f"Error scraping directory {directory}: {e}")

        if not found_media:
            messagebox.showinfo("Info", "No media URLs found in subdirectories.")
        else:
            messagebox.showinfo("Success", f"{len(media_urls)} media URL(s) found!")

    except Exception as e:
        messagebox.showerror("Error", str(e))

# GUI Setup
root = Tk()
root.title("Najeeb Scrape Media Downloader + Batch Support")
root.geometry("965x700")
#root.configure(bg="#2c3e50")
root.iconbitmap(icon_path)

Label(root, text="Najeeb Downloader Enter URL Picture And Video(any site or platform):").pack(pady=5)
search_frame = Frame(root)
search_frame.pack(pady=5)

search_entry = Entry(search_frame, width=40)
search_entry.pack(side=LEFT, padx=5)
Button(search_frame, text="Search", command=search_urls, bg="lightblue").pack(side=LEFT, padx=5)
url_entry = Entry(search_frame, width=100)
url_entry.pack(pady=5)

frame_buttons = Frame(root)
frame_buttons.pack(pady=5)

Button(frame_buttons, text="Scrape Media", command=process_url, bg="lightgreen", width=20).pack(side=LEFT, padx=5)
Button(frame_buttons, text="Browse URL File", command=browse_url_file, bg="lightyellow", width=20).pack(side=LEFT, padx=5)
Button(frame_buttons, text="Download All URLs", command=download_selected, bg="lightblue", width=20).pack(side=LEFT, padx=5)
Button(frame_buttons, text="Download Selected URL", command=download_selected_line, bg="orange", width=20).pack(side=LEFT, padx=5)
Button(frame_buttons, text="Save URLs to File", command=save_urls_to_file, bg="lightgray", width=20).pack(side=LEFT, padx=5)
Button(frame_buttons, text="Stop Download", command=lambda: stop_download(), bg="red", width=20).pack(side=LEFT, padx=5)

frame_button = Frame(root)
frame_button.pack(pady=5)

Button(frame_button, text="Scrape All Links", command=scrape_all_button, bg="#e0c3fc", width=20).pack(side=LEFT, padx=5)
Button(frame_button, text="Open in VLC", command=open_in_vlc, bg="#c1f0c1", width=20).pack(side=LEFT, padx=5)
Button(frame_button, text="Preview Image", command=preview_image_popup, bg="#f0c1c1", width=20).pack(side=LEFT, padx=5)
Button(frame_button, text="Load Online M3U", command=load_online_m3u, bg="#c9f2ff", width=20).pack(side=LEFT, padx=5)
Button(frame_button, text="Scrape Xtream M3U", command=scrape_xtream_m3u_url, bg="#fff0b3", width=20).pack(side=LEFT, padx=5)
Button(frame_button, text="Load M3U File", command=load_m3u_file, bg="#d0f0fd", width=20).pack(side=LEFT, padx=5)


#Label(root, text="Editable Media URL List:").pack(pady=10)
result_frame = Frame(root)
result_frame.pack(pady=5)

scrollbar = Scrollbar(result_frame)
scrollbar.pack(side=RIGHT, fill=Y)

result_box = Text(result_frame, height=28, width=124, yscrollcommand=scrollbar.set)
result_box.pack(side=LEFT, fill=BOTH)

scrollbar.config(command=result_box.yview)

frame_clear = Frame(root)
frame_clear.pack(pady=5)

Button(frame_clear, text="Save Result", command=save_as_m3u, bg="#a7ffcc", width=20).pack(side=LEFT, padx=5)
Button(frame_clear, text="Clear Search", command=clear_search, bg="lightgray").pack(side=LEFT, padx=2)
Button(frame_clear, text="Clear URL Field", command=clear_url_field, bg="#ffd580", width=20).pack(side=LEFT, padx=5)
Button(frame_clear, text="Clear Result Field", command=clear_result_box, bg="#ffb3b3", width=20).pack(side=LEFT, padx=5)
# Add a button for scraping subdirectories
Button(frame_clear, text="Scrape Subdirectories", command=lambda: scrape_directory_media(url_entry.get().strip()), bg="#ffcccb", width=20).pack(side=LEFT, padx=5)

root.mainloop()