Najeebsk

PHOTO-VIDEO.pyw

Apr 27th, 2025
11
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 16.48 KB | None | 0 0
  1. import tkinter as tk
  2. from tkinter import ttk, filedialog, messagebox
  3. from bs4 import BeautifulSoup
  4. import requests
  5. from PIL import Image, ImageTk
  6. import io
  7. from urllib.parse import urljoin, urlparse
  8. import os
  9. import subprocess
  10.  
  11.  
  12. class WebScraperApp:
  13.     def __init__(self, root):
  14.         self.root = root
  15.         self.root.title("Najeeb Images & Videos Web Scraper")
  16.         self.root.geometry("1000x700")
  17.         self.root.configure(bg="#f0f0f0")  # Light gray background
  18.  
  19.         # Top Frame for URL Entry and Buttons
  20.         self.top_frame = ttk.Frame(self.root, style="Top.TFrame")
  21.         self.top_frame.pack(fill=tk.X, padx=10, pady=10)
  22.  
  23.         # URL Entry Field
  24.         self.url_label = ttk.Label(self.top_frame, text="Enter URL:", font=("Arial", 12), background="#ffffff")
  25.         self.url_label.pack(side=tk.LEFT, padx=(0, 5))
  26.         self.url_entry = ttk.Entry(self.top_frame, width=50, font=("Arial", 12))
  27.         self.url_entry.pack(side=tk.LEFT, padx=(0, 10))
  28.  
  29.         # Scrape Media Button (Green)
  30.         self.scrape_image_btn = ttk.Button(
  31.             self.top_frame, text="Scrape Media", command=self.process_url, style="Green.TButton"
  32.         )
  33.         self.scrape_image_btn.pack(side=tk.LEFT, padx=(0, 5))
  34.  
  35.         # Scrape Links Button (Blue)
  36.         self.scrape_links_btn = ttk.Button(
  37.             self.top_frame, text="Scrape Links", command=self.scrape_links, style="Blue.TButton"
  38.         )
  39.         self.scrape_links_btn.pack(side=tk.LEFT, padx=(0, 5))
  40.  
  41.         # Scrape All Links Button (Orange)
  42.         self.scrape_all_links_btn = ttk.Button(
  43.             self.top_frame, text="Scrape All Links", command=self.scrape_all_button, style="Orange.TButton"
  44.         )
  45.         self.scrape_all_links_btn.pack(side=tk.LEFT, padx=(0, 5))
  46.  
  47.         # Clear Results and URL Button (Gray)
  48.         self.clear_button = ttk.Button(
  49.             self.top_frame, text="Clear", command=self.clear_fields_and_results, style="Gray.TButton"
  50.         )
  51.         self.clear_button.pack(side=tk.LEFT, padx=(0, 5))
  52.        
  53.         # Save Results Button (Red)
  54.         self.save_results_btn = ttk.Button(
  55.             self.top_frame, text="Save", command=self.save_results, style="Red.TButton"
  56.         )
  57.         self.save_results_btn.pack(side=tk.LEFT)
  58.  
  59.         # Left Frame for Results with Scrollbar
  60.         self.left_frame = ttk.Frame(self.root, style="Left.TFrame")
  61.         self.left_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=False, padx=10, pady=10)
  62.  
  63.         self.result_label = ttk.Label(self.left_frame, text="Results:", font=("Arial", 12), background="#ffffff")
  64.         self.result_label.pack(anchor=tk.W)
  65.  
  66.         # Scrollable Listbox for Results
  67.         self.result_scrollbar = ttk.Scrollbar(self.left_frame, orient=tk.VERTICAL)
  68.         self.result_scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
  69.         self.result_listbox = tk.Listbox(
  70.             self.left_frame,
  71.             width=40,
  72.             height=20,
  73.             font=("Arial", 10),
  74.             yscrollcommand=self.result_scrollbar.set
  75.         )
  76.         self.result_listbox.pack(fill=tk.BOTH, expand=True)
  77.         self.result_scrollbar.config(command=self.result_listbox.yview)
  78.  
  79.         # Double-click to preview image, play video/M3U in VLC
  80.         self.result_listbox.bind("<Double-Button-1>", self.handle_double_click)
  81.         # Browse Button (Yellow)
  82.         self.browse_button = ttk.Button(
  83.             self.left_frame, text="Browse Text File", command=self.browse_text_file, style="Yellow.TButton"
  84.         )
  85.         self.browse_button.pack(side=tk.LEFT, padx=(0, 5))
  86.         # Download Button in Left Frame (Purple)
  87.         self.download_btn = ttk.Button(
  88.             self.left_frame, text="Download Selected Image", command=self.download_image, style="Purple.TButton"
  89.         )
  90.         self.download_btn.pack(side=tk.LEFT)
  91.  
  92.         # Right Frame for Image Display
  93.         self.right_frame = ttk.Frame(self.root, style="Right.TFrame")
  94.         self.right_frame.pack(side=tk.RIGHT, fill=tk.BOTH, expand=True, padx=10, pady=10)
  95.  
  96.         self.image_label = ttk.Label(self.right_frame, text="Image Preview", anchor=tk.CENTER, font=("Arial", 12))
  97.         self.image_label.pack(expand=True)
  98.  
  99.         # Variables
  100.         self.images = []  # To store scraped image URLs
  101.         self.videos = []  # To store scraped video URLs
  102.         self.m3u_urls = []  # To store scraped M3U URLs
  103.         self.links = []  # To store scraped links
  104.  
  105.         # Styling
  106.         self.style = ttk.Style()
  107.         self.style.configure("Top.TFrame", background="#ffffff")
  108.         self.style.configure("Left.TFrame", background="#e0e0e0")
  109.         self.style.configure("Right.TFrame", background="#d0d0d0")
  110.  
  111.         # Button Styles
  112.         self.style.configure("Green.TButton", font=("Arial", 10), background="#4CAF50", foreground="black")
  113.         self.style.configure("Blue.TButton", font=("Arial", 10), background="#008CBA", foreground="yellow")
  114.         self.style.configure("Orange.TButton", font=("Arial", 10), background="#FF9800", foreground="green")
  115.         self.style.configure("Purple.TButton", font=("Arial", 10), background="#9C27B0", foreground="red")
  116.         self.style.configure("Red.TButton", font=("Arial", 10), background="#F44336", foreground="blue")
  117.         self.style.configure("Gray.TButton", font=("Arial", 10), background="#808080", foreground="red")
  118.         self.style.configure("Yellow.TButton", font=("Arial", 10), background="#FFEB3B", foreground="black")
  119.  
  120.     def process_url(self):
  121.         """Process the entered URL and scrape media."""
  122.         url = self.url_entry.get().strip()
  123.         if not url:
  124.             messagebox.showwarning("Input Error", "Please enter a valid URL.")
  125.             return
  126.         self.images.clear()
  127.         self.videos.clear()
  128.         self.m3u_urls.clear()
  129.         self.result_listbox.delete(0, tk.END)
  130.         try:
  131.             parsed = urlparse(url)
  132.             ext = os.path.splitext(parsed.path)[1].lower()
  133.  
  134.             if ext in ['.m3u', '.m3u8']:
  135.                 scraped_media = self.scrape_m3u_playlist(url)
  136.             else:
  137.                 scraped_media = self.scrape_normal_site(url)
  138.  
  139.             self.images.extend(scraped_media["images"])
  140.             self.videos.extend(scraped_media["videos"])
  141.             self.m3u_urls.extend(scraped_media["m3u"])
  142.  
  143.             # Populate listbox with scraped media URLs
  144.             for media_url in scraped_media["images"]:
  145.                 self.result_listbox.insert(tk.END, media_url)
  146.             for video_url in scraped_media["videos"]:
  147.                 self.result_listbox.insert(tk.END, video_url)
  148.             for m3u_url in scraped_media["m3u"]:
  149.                 self.result_listbox.insert(tk.END, m3u_url)
  150.  
  151.             total_media = len(self.images) + len(self.videos) + len(self.m3u_urls)
  152.             if total_media == 0:
  153.                 messagebox.showinfo("Info", "No media URLs found.")
  154.             else:
  155.                 messagebox.showinfo("Success", f"{total_media} media URL(s) found!")
  156.         except Exception as e:
  157.             messagebox.showerror("Error", str(e))
  158.  
  159.     def scrape_normal_site(self, url):
  160.         """Scrape images, videos, and M3U URLs from a normal site."""
  161.         found_images = set()
  162.         found_videos = set()
  163.         found_m3u = set()
  164.         try:
  165.             response = requests.get(url, timeout=10)
  166.             if response.status_code != 200:
  167.                 return {"images": found_images, "videos": found_videos, "m3u": found_m3u}
  168.  
  169.             soup = BeautifulSoup(response.text, 'html.parser')
  170.             for tag in soup.find_all(['img', 'video', 'source', 'a']):
  171.                 src = tag.get('src') or tag.get('href')
  172.                 if src:
  173.                     full_url = urljoin(url, src)
  174.                     parsed = urlparse(full_url)
  175.                     ext = os.path.splitext(parsed.path)[1].lower()
  176.                     if ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg', '.ico']:
  177.                         found_images.add(full_url)
  178.                     elif ext in ['.mp4', '.mkv', '.avi', '.mov', '.flv', '.wmv']:
  179.                         found_videos.add(full_url)
  180.                     elif ext in ['.m3u', '.m3u8']:
  181.                         found_m3u.add(full_url)
  182.         except Exception as e:
  183.             print(f"Error scraping site: {e}")
  184.         return {"images": found_images, "videos": found_videos, "m3u": found_m3u}
  185.  
  186.     def scrape_m3u_playlist(self, url):
  187.         """Scrape and parse an M3U playlist file."""
  188.         found_m3u = set()
  189.         try:
  190.             response = requests.get(url, timeout=10)
  191.             if response.status_code != 200:
  192.                 return {"images": set(), "videos": set(), "m3u": found_m3u}
  193.  
  194.             # Parse the M3U content
  195.             lines = response.text.splitlines()
  196.             for line in lines:
  197.                 line = line.strip()
  198.                 if line.startswith("http"):
  199.                     found_m3u.add(line)
  200.         except Exception as e:
  201.             print(f"Error scraping M3U playlist: {e}")
  202.         return {"images": set(), "videos": set(), "m3u": found_m3u}
  203.  
  204.     def handle_double_click(self, event):
  205.         """Handle double-click on a result item."""
  206.         selected_index = self.result_listbox.curselection()
  207.         if not selected_index:
  208.             return
  209.         selected_url = self.result_listbox.get(selected_index)
  210.         parsed = urlparse(selected_url)
  211.         ext = os.path.splitext(parsed.path)[1].lower()
  212.  
  213.         if ext in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg', '.ico']:
  214.             # Display image preview
  215.             try:
  216.                 response = requests.get(selected_url)
  217.                 image_data = Image.open(io.BytesIO(response.content))
  218.                 image_data.thumbnail((600, 600))  # Resize for display
  219.                 photo = ImageTk.PhotoImage(image_data)
  220.                 self.image_label.config(image=photo)
  221.                 self.image_label.image = photo  # Keep reference to avoid garbage collection
  222.             except Exception as e:
  223.                 print(f"Error loading image: {e}")
  224.         elif ext in ['.mp4', '.mkv', '.avi', '.mov', '.flv', '.wmv', '.m3u', '.m3u8']:
  225.             # Open video or M3U in VLC
  226.             vlc_path = r"C:\Program Files\VideoLAN\VLC\vlc.exe"
  227.             if os.path.exists(vlc_path):
  228.                 try:
  229.                     subprocess.Popen([vlc_path, selected_url])
  230.                 except Exception as e:
  231.                     messagebox.showerror("Error", f"Failed to open VLC: {e}")
  232.             else:
  233.                 messagebox.showerror("Error", "VLC Media Player not found at the specified path.")
  234.  
  235.     def download_image(self):
  236.         """Download the selected image."""
  237.         selected_index = self.result_listbox.curselection()
  238.         if not selected_index:
  239.             return
  240.         selected_url = self.result_listbox.get(selected_index)
  241.         save_path = filedialog.asksaveasfilename(defaultextension=".jpg", filetypes=[("JPEG", "*.jpg"), ("PNG", "*.png")])
  242.         if not save_path:
  243.             return
  244.         try:
  245.             response = requests.get(selected_url)
  246.             with open(save_path, "wb") as f:
  247.                 f.write(response.content)
  248.             print(f"Image saved to {save_path}")
  249.         except Exception as e:
  250.             print(f"Error downloading image: {e}")
  251.  
  252.     def save_results(self):
  253.         """Save the results (images, videos, M3U URLs) to a file."""
  254.         # Get all items from the result listbox
  255.         results = self.result_listbox.get(0, tk.END)
  256.         if not results:
  257.             messagebox.showwarning("No Results", "No results to save.")
  258.             return
  259.  
  260.         # Ask user for file type (Text or M3U)
  261.         file_type = messagebox.askquestion(
  262.             "Save Format", "Save as Text (Yes) or M3U (No)?", icon="question"
  263.         )
  264.  
  265.         # Open file dialog to choose save location
  266.         file_path = filedialog.asksaveasfilename(
  267.             defaultextension=".txt" if file_type == "yes" else ".m3u",
  268.             filetypes=[
  269.                 ("Text File", "*.txt"),
  270.                 ("M3U Playlist", "*.m3u"),
  271.             ],
  272.         )
  273.         if not file_path:
  274.             return  # User canceled the save dialog
  275.  
  276.         try:
  277.             # Write results to the selected file
  278.             with open(file_path, "w", encoding="utf-8") as f:
  279.                 if file_type == "yes":  # Save as plain text
  280.                     f.write("\n".join(results))
  281.                 else:  # Save as M3U format
  282.                     f.write("#EXTM3U\n")  # M3U header
  283.                     for result in results:
  284.                         f.write(f"#EXTINF:-1,{result}\n")  # Metadata line
  285.                         f.write(f"{result}\n")  # URL line
  286.             messagebox.showinfo("Success", f"Results saved to {file_path}")
  287.         except Exception as e:
  288.             messagebox.showerror("Error", f"Failed to save results: {e}")
  289.  
  290.     def clear_fields_and_results(self):
  291.         """Clear the URL entry field, result listbox, and reset image preview."""
  292.         # Clear URL entry field
  293.         self.url_entry.delete(0, tk.END)
  294.         # Clear result listbox
  295.         self.result_listbox.delete(0, tk.END)
  296.         # Reset internal storage
  297.         self.images.clear()
  298.         self.videos.clear()
  299.         self.m3u_urls.clear()
  300.         # Reset image preview
  301.         self.image_label.config(text="Image Preview", image=None)
  302.         self.image_label.image = None  # Clear reference to avoid memory leaks
  303.         # Notify user
  304.         messagebox.showinfo("Cleared", "URL and results have been cleared.")
  305.  
  306.  
  307.     def scrape_all_links(self, url):
  308.         """Scrape all links from the given webpage."""
  309.         all_links = set()
  310.         try:
  311.             response = requests.get(url, timeout=10)
  312.             soup = BeautifulSoup(response.text, 'html.parser')
  313.             for a_tag in soup.find_all('a', href=True):
  314.                 full_url = urljoin(url, a_tag['href'])
  315.                 all_links.add(full_url)
  316.         except Exception as e:
  317.             print(f"[Scrape All Error] {e}")
  318.         return all_links
  319.    
  320.     def scrape_links(self):
  321.         """Scrape all links from the given webpage."""
  322.         url = self.url_entry.get()
  323.         if not url:
  324.             return
  325.  
  326.         try:
  327.             response = requests.get(url)
  328.             soup = BeautifulSoup(response.content, 'html.parser')
  329.             self.links = [a['href'] for a in soup.find_all('a') if 'href' in a.attrs]
  330.  
  331.             # Clear previous results
  332.             self.result_listbox.delete(0, tk.END)
  333.  
  334.             # Populate listbox with links
  335.             for link in self.links:
  336.                 self.result_listbox.insert(tk.END, link)
  337.         except Exception as e:
  338.             print(f"Error scraping links: {e}")
  339.  
  340.     def scrape_all_button(self):
  341.         """Scrape all links from the given webpage."""
  342.         url = self.url_entry.get().strip()
  343.         if not url:
  344.             messagebox.showwarning("Input Error", "Please enter a valid URL.")
  345.             return
  346.  
  347.         self.result_listbox.delete(0, tk.END)
  348.  
  349.         try:
  350.             all_links = self.scrape_all_links(url)
  351.             self.links.extend(all_links)
  352.  
  353.             # Populate listbox with links
  354.             for link in all_links:
  355.                 self.result_listbox.insert(tk.END, link)
  356.  
  357.             messagebox.showinfo("Done", f"{len(all_links)} total link(s) scraped.")
  358.         except Exception as e:
  359.             messagebox.showerror("Error", str(e))
  360.  
  361.     def browse_text_file(self):
  362.         """Open a text file and display its contents in the result listbox."""
  363.         file_path = filedialog.askopenfilename(
  364.             filetypes=[("Text Files", "*.txt"), ("All Files", "*.*")]
  365.         )
  366.         if not file_path:
  367.             return  # User canceled the file dialog
  368.  
  369.         try:
  370.             # Clear existing results
  371.             self.result_listbox.delete(0, tk.END)
  372.  
  373.             # Read the file and populate the listbox
  374.             with open(file_path, "r", encoding="utf-8") as file:
  375.                 lines = file.readlines()
  376.                 for line in lines:
  377.                     line = line.strip()  # Remove leading/trailing whitespace
  378.                     if line:  # Ignore empty lines
  379.                         self.result_listbox.insert(tk.END, line)
  380.  
  381.             messagebox.showinfo("Success", f"Contents of '{file_path}' loaded successfully.")
  382.         except Exception as e:
  383.             messagebox.showerror("Error", f"Failed to load file: {e}")    
  384.                
  385.  
  386. if __name__ == "__main__":
  387.     root = tk.Tk()
  388.     app = WebScraperApp(root)
  389.     root.mainloop()
  390.  
Add Comment
Please, Sign In to add comment