Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import requests
- from bs4 import BeautifulSoup
- from tkinter import *
- from tkinter import messagebox, filedialog
- from urllib.parse import urljoin, urlparse
- import yt_dlp
- import subprocess
- from PIL import Image, ImageTk
- import io
- import threading
- stop_download_flag = False
- #================ADD-IMAGE-ICON=================
- import sys
- def resource_path(relative_path):
- """ Get the absolute path to the resource, works for PyInstaller. """
- if getattr(sys, '_MEIPASS', False):
- return os.path.join(sys._MEIPASS, relative_path)
- return os.path.join(os.path.abspath("."), relative_path)
- # Use this function to load files:
- #splash_image = resource_path("splash-1.png")
- icon_path = resource_path("D.ico")
- #================ADD-IMAGE-ICON=================
- media_urls = []
- special_sites = ['youtube.com', 'youtu.be', 'facebook.com', 'fb.watch', 'tiktok.com', 'instagram.com']
- image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg', '.ico']
- video_exts = ['.mp4', '.webm', '.ogg', '.mov', '.avi', '.mkv', '.flv', '.3gp', '.wmv', '.m3u', '.m3u8']
- def is_special_site(url):
- return any(domain in url for domain in special_sites)
- def browse_url_file():
- file_path = filedialog.askopenfilename(title="Open URL File", filetypes=[("Text files", "*.txt")])
- if file_path:
- with open(file_path, 'r') as f:
- for line in f:
- url = line.strip()
- if url and url not in media_urls:
- media_urls.append(url)
- result_box.insert(END, url + "\n")
- def save_urls_to_file():
- file_path = filedialog.asksaveasfilename(defaultextension=".txt", filetypes=[("Text files", "*.txt")])
- if file_path:
- with open(file_path, 'w') as f:
- f.write(result_box.get("1.0", END).strip())
- messagebox.showinfo("Saved", f"URLs saved to {file_path}")
- def scrape_normal_site(url):
- found_urls = set()
- try:
- response = requests.get(url, timeout=10)
- if response.status_code != 200:
- return found_urls
- soup = BeautifulSoup(response.text, 'html.parser')
- for tag in soup.find_all(['img', 'video', 'source', 'a']):
- src = tag.get('src') or tag.get('href')
- if src:
- full_url = urljoin(url, src)
- parsed = urlparse(full_url)
- ext = os.path.splitext(parsed.path)[1].lower()
- if ext in image_exts + video_exts:
- found_urls.add(full_url)
- except Exception:
- pass
- return found_urls
- def process_url():
- url = url_entry.get().strip()
- if not url:
- messagebox.showwarning("Input Error", "Please enter a valid URL.")
- return
- media_urls.clear()
- result_box.delete("1.0", END)
- try:
- if is_special_site(url):
- ydl_opts = {
- 'quiet': True,
- 'skip_download': True,
- 'force_generic_extractor': False
- }
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
- info = ydl.extract_info(url, download=False)
- if 'entries' in info:
- for entry in info['entries']:
- media_urls.append(entry['webpage_url'])
- result_box.insert(END, entry['webpage_url'] + "\n")
- else:
- media_urls.append(info['webpage_url'])
- result_box.insert(END, info['webpage_url'] + "\n")
- else:
- scraped = scrape_normal_site(url)
- media_urls.extend(scraped)
- for media_url in scraped:
- result_box.insert(END, media_url + "\n")
- if not media_urls:
- messagebox.showinfo("Info", "No media URLs found.")
- else:
- messagebox.showinfo("Success", f"{len(media_urls)} media URL(s) found!")
- except Exception as e:
- messagebox.showerror("Error", str(e))
- def download_media(url, save_path):
- try:
- if is_special_site(url):
- ytdlp_path = r"C:\Windows\yt-dlp.exe" # Replace if needed
- command = [
- ytdlp_path,
- "-f", "best",
- "--no-playlist",
- "--extractor-args", "youtube:player_client=web",
- "-o", save_path,
- url
- ]
- result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
- if result.returncode != 0:
- raise Exception(result.stderr.strip())
- else:
- response = requests.get(url, stream=True)
- if response.status_code == 200:
- with open(save_path, 'wb') as f:
- for chunk in response.iter_content(1024):
- f.write(chunk)
- except Exception as e:
- messagebox.showerror("Download Error", f"Failed to download:\n{url}\n\n{str(e)}")
- def download_selected_line():
- try:
- line_index = result_box.index(INSERT).split(".")[0]
- selected_url = result_box.get(f"{line_index}.0", f"{line_index}.end").strip()
- if not selected_url:
- raise Exception("No line selected.")
- folder = filedialog.askdirectory(title="Select Folder to Save File")
- if not folder:
- return
- parsed = urlparse(selected_url)
- filename = os.path.basename(parsed.path)
- if not filename:
- filename = "downloaded_file"
- save_path = os.path.join(folder, filename)
- threading.Thread(target=threaded_download, args=(selected_url, save_path), daemon=True).start()
- except Exception as e:
- messagebox.showerror("Error", str(e))
- def download_selected():
- selected_urls = result_box.get("1.0", END).strip().splitlines()
- if not selected_urls:
- messagebox.showwarning("Selection Error", "No URLs to download.")
- return
- selected = filedialog.askdirectory(title="Select Folder to Save Files")
- if not selected:
- return
- for url in selected_urls:
- parsed = urlparse(url)
- filename = os.path.basename(parsed.path)
- if not filename:
- filename = "downloaded_file.mp4"
- save_path = os.path.join(selected, filename)
- download_media(url, save_path)
- messagebox.showinfo("Download Complete", f"Downloaded {len(selected_urls)} media files.")
- from threading import Thread
- stop_flag = False
- def fix_partial_video(input_path):
- try:
- if not os.path.exists(input_path) or not input_path.lower().endswith(".mp4"):
- return
- output_path = input_path.replace(".mp4", "_fixed.mp4")
- ffmpeg_path = r"C:\Program Files\ffmpeg\bin\ffmpeg.exe" # ✅ Your FFmpeg location here
- # Try quick remux
- command = [
- ffmpeg_path,
- "-y",
- "-i", input_path,
- "-c", "copy",
- "-movflags", "+faststart",
- output_path
- ]
- result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
- # Fallback to re-encode if remux fails or small file
- if result.returncode != 0 or not os.path.exists(output_path) or os.path.getsize(output_path) < 1024 * 1024:
- print("[INFO] Remux failed or file too small, retrying with re-encode...")
- command = [
- ffmpeg_path,
- "-y",
- "-i", input_path,
- "-preset", "ultrafast",
- output_path
- ]
- subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
- # Replace original file if fixed
- if os.path.exists(output_path):
- os.remove(input_path)
- os.rename(output_path, input_path)
- except Exception as e:
- print(f"[FFmpeg Fix Error] {e}")
- def threaded_download(url, save_path):
- global stop_download_flag
- stop_download_flag = False
- try:
- if is_special_site(url):
- ytdlp_path = r"C:\Windows\yt-dlp.exe" # ✅ Ensure correct path
- command = [
- ytdlp_path,
- "-f", "mp4",
- "--no-part", # Saves directly as .mp4
- "--downloader", "ffmpeg",
- "--downloader-args", "ffmpeg_i:-movflags +faststart",
- "-o", save_path,
- url
- ]
- proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
- while proc.poll() is None:
- if stop_download_flag:
- proc.kill()
- break
- else:
- response = requests.get(url, stream=True, timeout=10)
- if response.status_code == 200:
- with open(save_path, 'wb') as f:
- for chunk in response.iter_content(1024 * 1024): # 1MB
- if stop_download_flag:
- break
- if chunk:
- f.write(chunk)
- if stop_download_flag:
- fix_partial_video(save_path) # Try to repair it
- messagebox.showinfo("Download Stopped", f"Download was stopped by user.\nSaved: {save_path}")
- else:
- messagebox.showinfo("Download Complete", f"Downloaded successfully to:\n{save_path}")
- except Exception as e:
- messagebox.showerror("Download Error", str(e))
- def start_download(url, save_path):
- global stop_flag
- stop_flag = False
- Thread(target=threaded_download, args=(url, save_path)).start()
- def stop_download():
- global stop_download_flag
- stop_download_flag = True
- def scrape_all_links(url):
- all_links = set()
- try:
- response = requests.get(url, timeout=10)
- soup = BeautifulSoup(response.text, 'html.parser')
- for a_tag in soup.find_all('a', href=True):
- full_url = urljoin(url, a_tag['href'])
- all_links.add(full_url)
- except Exception as e:
- print(f"[Scrape All Error] {e}")
- return all_links
- def scrape_all_button():
- url = url_entry.get().strip()
- if not url:
- messagebox.showwarning("Input Error", "Please enter a valid URL.")
- return
- result_box.delete("1.0", END)
- media_urls.clear()
- all_links = scrape_all_links(url)
- media_urls.extend(all_links)
- for link in all_links:
- result_box.insert(END, link + "\n")
- messagebox.showinfo("Done", f"{len(all_links)} total link(s) scraped.")
- def open_in_vlc():
- line_index = result_box.index(INSERT).split(".")[0]
- selected_url = result_box.get(f"{line_index}.0", f"{line_index}.end").strip()
- if not selected_url:
- messagebox.showwarning("No Selection", "Select a valid media URL.")
- return
- vlc_path = r"C:\Program Files\VideoLAN\VLC\vlc.exe"
- try:
- subprocess.Popen([vlc_path, selected_url])
- except Exception as e:
- messagebox.showerror("VLC Error", f"Could not open VLC:\n{e}")
- def preview_image_popup():
- try:
- line_index = result_box.index(INSERT).split(".")[0]
- selected_url = result_box.get(f"{line_index}.0", f"{line_index}.end").strip()
- if not selected_url.lower().endswith(tuple(image_exts)):
- raise Exception("Selected link is not an image.")
- response = requests.get(selected_url, timeout=10)
- image = Image.open(io.BytesIO(response.content))
- popup = Toplevel(root)
- popup.title("Image Preview")
- popup.geometry("600x600")
- img_resized = image.resize((500, 500), Image.ANTIALIAS)
- img_tk = ImageTk.PhotoImage(img_resized)
- label = Label(popup, image=img_tk)
- label.image = img_tk
- label.pack()
- except Exception as e:
- messagebox.showerror("Preview Error", str(e))
- def clear_url_field():
- url_entry.delete(0, END)
- def clear_result_box():
- result_box.delete("1.0", END)
- media_urls.clear()
- def load_m3u_file():
- file_path = filedialog.askopenfilename(title="Open M3U File", filetypes=[("M3U/M3U8 Files", "*.m3u *.m3u8")])
- if file_path:
- result_box.delete("1.0", END)
- media_urls.clear()
- with open(file_path, 'r', encoding="utf-8", errors="ignore") as f:
- for line in f:
- url = line.strip()
- if url and url.startswith("http"):
- media_urls.append(url)
- result_box.insert(END, url + "\n")
- messagebox.showinfo("Loaded", f"{len(media_urls)} media URLs loaded from playlist.")
- def load_online_m3u():
- url = url_entry.get().strip()
- if not url.lower().endswith((".m3u", ".m3u8")):
- messagebox.showwarning("URL Error", "Please enter a valid .m3u or .m3u8 URL.")
- return
- try:
- response = requests.get(url, timeout=10)
- if response.status_code != 200:
- raise Exception("Unable to fetch playlist.")
- result_box.delete("1.0", END)
- media_urls.clear()
- for line in response.text.splitlines():
- line = line.strip()
- if line and line.startswith("http"):
- media_urls.append(line)
- result_box.insert(END, line + "\n")
- messagebox.showinfo("Online M3U Loaded", f"{len(media_urls)} stream(s) loaded.")
- except Exception as e:
- messagebox.showerror("Error", str(e))
- def save_as_m3u():
- file_path = filedialog.asksaveasfilename(defaultextension=".m3u", filetypes=[("Text File", "*.txt"), ("M3U Playlist", "*.m3u"), ("M3U8 Playlist", "*.m3u8")])
- if file_path:
- with open(file_path, 'w', encoding="utf-8") as f:
- f.write(result_box.get("1.0", END).strip())
- messagebox.showinfo("Saved", f"Playlist saved to:\n{file_path}")
- def scrape_xtream_m3u_url():
- url = url_entry.get().strip()
- if not url or "get.php" not in url:
- messagebox.showwarning("Input Error", "Please enter a valid Xtream M3U URL.")
- return
- try:
- headers = {
- "User-Agent": "VLC/3.0.18 LibVLC/3.0.18"
- }
- response = requests.get(url, headers=headers, timeout=15)
- if response.status_code == 404:
- raise Exception("404 Not Found — the playlist URL might be wrong or expired.")
- if response.status_code != 200:
- raise Exception(f"Failed to fetch playlist. Status code: {response.status_code}")
- content = response.text
- if "#EXTM3U" not in content:
- raise Exception("Invalid playlist. No M3U content found.")
- result_box.delete("1.0", END)
- media_urls.clear()
- for line in content.splitlines():
- if line.startswith("http"):
- media_urls.append(line)
- result_box.insert(END, line + "\n")
- if media_urls:
- messagebox.showinfo("Success", f"Scraped {len(media_urls)} stream URLs from Xtream playlist.")
- else:
- messagebox.showwarning("No URLs", "Playlist loaded, but no stream URLs found.")
- except Exception as e:
- messagebox.showerror("Error", str(e))
- def search_urls():
- query = search_entry.get().strip().lower()
- if not query:
- return
- result_box.tag_remove("highlight", "1.0", END)
- lines = result_box.get("1.0", END).splitlines()
- for i, line in enumerate(lines, 1):
- if query in line.lower():
- result_box.tag_add("highlight", f"{i}.0", f"{i}.end")
- result_box.tag_config("highlight", background="yellow", foreground="black")
- def clear_search():
- search_entry.delete(0, END)
- result_box.tag_remove("highlight", "1.0", END)
- def scrape_directory_media(url):
- """
- Scrape media URLs from subdirectories of the given URL.
- :param url: The base URL to start scraping from.
- """
- global media_urls
- result_box.delete("1.0", END)
- media_urls.clear()
- def extract_directories(soup, base_url):
- """
- Extract directory links from the page.
- :param soup: BeautifulSoup object of the page.
- :param base_url: Base URL to resolve relative paths.
- :return: List of directory URLs.
- """
- directories = []
- for a_tag in soup.find_all('a', href=True):
- href = a_tag['href']
- if href.endswith("/") and not href.startswith("#"): # Subdirectory link
- full_href = urljoin(base_url, href)
- if full_href != base_url: # Avoid infinite loops
- directories.append(full_href)
- return directories
- def extract_media_urls(soup, base_url):
- """
- Extract media URLs from the page.
- :param soup: BeautifulSoup object of the page.
- :param base_url: Base URL to resolve relative paths.
- :return: Set of media URLs.
- """
- media_links = set()
- for tag in soup.find_all(['img', 'video', 'source', 'a']):
- src = tag.get('src') or tag.get('href')
- if src:
- full_url = urljoin(base_url, src)
- parsed = urlparse(full_url)
- ext = os.path.splitext(parsed.path)[1].lower()
- if ext in image_exts + video_exts:
- media_links.add(full_url)
- return media_links
- try:
- # Fetch the base URL content
- response = requests.get(url, timeout=10)
- if response.status_code != 200:
- messagebox.showerror("Error", f"Failed to fetch {url} (Status Code: {response.status_code})")
- return
- soup = BeautifulSoup(response.text, 'html.parser')
- # Step 1: Extract all subdirectories
- directories = extract_directories(soup, url)
- # Step 2: Scrape media URLs from each subdirectory
- found_media = False
- for directory in directories:
- try:
- dir_response = requests.get(directory, timeout=10)
- if dir_response.status_code == 200:
- dir_soup = BeautifulSoup(dir_response.text, 'html.parser')
- media_links = extract_media_urls(dir_soup, directory)
- if media_links:
- found_media = True
- for media_url in media_links:
- if media_url not in media_urls:
- media_urls.append(media_url)
- result_box.insert(END, media_url + "\n")
- except Exception as e:
- print(f"Error scraping directory {directory}: {e}")
- if not found_media:
- messagebox.showinfo("Info", "No media URLs found in subdirectories.")
- else:
- messagebox.showinfo("Success", f"{len(media_urls)} media URL(s) found!")
- except Exception as e:
- messagebox.showerror("Error", str(e))
- # GUI Setup
- root = Tk()
- root.title("Najeeb Scrape Media Downloader + Batch Support")
- root.geometry("965x700")
- #root.configure(bg="#2c3e50")
- root.iconbitmap(icon_path)
- Label(root, text="Najeeb Downloader Enter URL Picture And Video(any site or platform):").pack(pady=5)
- search_frame = Frame(root)
- search_frame.pack(pady=5)
- search_entry = Entry(search_frame, width=40)
- search_entry.pack(side=LEFT, padx=5)
- Button(search_frame, text="Search", command=search_urls, bg="lightblue").pack(side=LEFT, padx=5)
- url_entry = Entry(search_frame, width=100)
- url_entry.pack(pady=5)
- frame_buttons = Frame(root)
- frame_buttons.pack(pady=5)
- Button(frame_buttons, text="Scrape Media", command=process_url, bg="lightgreen", width=20).pack(side=LEFT, padx=5)
- Button(frame_buttons, text="Browse URL File", command=browse_url_file, bg="lightyellow", width=20).pack(side=LEFT, padx=5)
- Button(frame_buttons, text="Download All URLs", command=download_selected, bg="lightblue", width=20).pack(side=LEFT, padx=5)
- Button(frame_buttons, text="Download Selected URL", command=download_selected_line, bg="orange", width=20).pack(side=LEFT, padx=5)
- Button(frame_buttons, text="Save URLs to File", command=save_urls_to_file, bg="lightgray", width=20).pack(side=LEFT, padx=5)
- Button(frame_buttons, text="Stop Download", command=lambda: stop_download(), bg="red", width=20).pack(side=LEFT, padx=5)
- frame_button = Frame(root)
- frame_button.pack(pady=5)
- Button(frame_button, text="Scrape All Links", command=scrape_all_button, bg="#e0c3fc", width=20).pack(side=LEFT, padx=5)
- Button(frame_button, text="Open in VLC", command=open_in_vlc, bg="#c1f0c1", width=20).pack(side=LEFT, padx=5)
- Button(frame_button, text="Preview Image", command=preview_image_popup, bg="#f0c1c1", width=20).pack(side=LEFT, padx=5)
- Button(frame_button, text="Load Online M3U", command=load_online_m3u, bg="#c9f2ff", width=20).pack(side=LEFT, padx=5)
- Button(frame_button, text="Scrape Xtream M3U", command=scrape_xtream_m3u_url, bg="#fff0b3", width=20).pack(side=LEFT, padx=5)
- Button(frame_button, text="Load M3U File", command=load_m3u_file, bg="#d0f0fd", width=20).pack(side=LEFT, padx=5)
- #Label(root, text="Editable Media URL List:").pack(pady=10)
- result_frame = Frame(root)
- result_frame.pack(pady=5)
- scrollbar = Scrollbar(result_frame)
- scrollbar.pack(side=RIGHT, fill=Y)
- result_box = Text(result_frame, height=28, width=124, yscrollcommand=scrollbar.set)
- result_box.pack(side=LEFT, fill=BOTH)
- scrollbar.config(command=result_box.yview)
- frame_clear = Frame(root)
- frame_clear.pack(pady=5)
- Button(frame_clear, text="Save Result", command=save_as_m3u, bg="#a7ffcc", width=20).pack(side=LEFT, padx=5)
- Button(frame_clear, text="Clear Search", command=clear_search, bg="lightgray").pack(side=LEFT, padx=2)
- Button(frame_clear, text="Clear URL Field", command=clear_url_field, bg="#ffd580", width=20).pack(side=LEFT, padx=5)
- Button(frame_clear, text="Clear Result Field", command=clear_result_box, bg="#ffb3b3", width=20).pack(side=LEFT, padx=5)
- # Add a button for scraping subdirectories
- Button(frame_clear, text="Scrape Subdirectories", command=lambda: scrape_directory_media(url_entry.get().strip()), bg="#ffcccb", width=20).pack(side=LEFT, padx=5)
- root.mainloop()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement