Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import tkinter as tk
- from tkinter import ttk, messagebox
- from tkinter.scrolledtext import ScrolledText
- import requests
- from bs4 import BeautifulSoup
- from urllib.parse import urljoin
- import subprocess
- class LinkScraperGUI:
- def __init__(self, master):
- self.master = master
- self.master.title("Web Links Scraper and Save Text")
- self.master.geometry("800x600")
- # Style configuration
- self.style = ttk.Style()
- self.style.theme_use("clam") # Use 'clam' theme for modern look
- self.style.configure("TLabel", font=("Helvetica", 12))
- self.style.configure("TButton", font=("Helvetica", 12), background="#4CAF50", foreground="white")
- self.style.configure("TEntry", font=("Helvetica", 12), padding=5)
- self.style.configure("TScrollbar", gripcount=0, background="#f1f1f1")
- # URL entry frame
- self.url_frame = ttk.Frame(master)
- self.url_frame.pack(pady=10, padx=10, fill=tk.X)
- self.url_label = ttk.Label(self.url_frame, text="Enter URL:")
- self.url_label.pack(side=tk.LEFT, padx=(0, 10))
- self.url_entry = ttk.Entry(self.url_frame, width=70)
- self.url_entry.pack(side=tk.LEFT, fill=tk.X, expand=True)
- self.scrape_button = ttk.Button(self.url_frame, text="Scrape Links", command=self.scrape_links)
- self.scrape_button.pack(side=tk.LEFT, padx=(10, 0))
- # Links editable text frame
- self.links_frame = ttk.Frame(master)
- self.links_frame.pack(pady=10, padx=10, fill=tk.BOTH, expand=True)
- self.links_text = ScrolledText(self.links_frame, width=100, height=10, font=("Helvetica", 12))
- self.links_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
- self.listbox_scrollbar = ttk.Scrollbar(self.links_frame, orient=tk.VERTICAL, command=self.links_text.yview)
- self.listbox_scrollbar.pack(side=tk.LEFT, fill=tk.Y)
- self.links_text.config(yscrollcommand=self.listbox_scrollbar.set)
- self.links_text.bind("<Double-1>", self.open_in_vlc)
- # Scrape selected link button
- self.scrape_selected_button = ttk.Button(master, text="Scrape Selected Link", command=self.scrape_selected_link)
- self.scrape_selected_button.pack(pady=10)
- # Result text frame
- self.result_frame = ttk.Frame(master)
- self.result_frame.pack(pady=10, padx=10, fill=tk.BOTH, expand=True)
- self.result_text = ScrolledText(self.result_frame, width=100, height=10, font=("Helvetica", 12))
- self.result_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
- self.text_scrollbar = ttk.Scrollbar(self.result_frame, orient=tk.VERTICAL, command=self.result_text.yview)
- self.text_scrollbar.pack(side=tk.LEFT, fill=tk.Y)
- self.result_text.config(yscrollcommand=self.text_scrollbar.set)
- self.result_text.bind("<Double-1>", self.open_in_vlc)
- # Save results button
- self.save_button = ttk.Button(master, text="Save Results", command=self.save_results)
- self.save_button.pack(pady=10)
- def scrape_links(self):
- url = self.url_entry.get()
- if not url:
- messagebox.showerror("Error", "Please enter a valid URL.")
- return
- try:
- response = requests.get(url)
- response.raise_for_status()
- except requests.exceptions.RequestException as e:
- messagebox.showerror("Error", f"Error fetching URL: {e}")
- return
- soup = BeautifulSoup(response.text, 'html.parser')
- # Extract all links from the webpage
- links = soup.find_all('a', href=True)
- if not links:
- messagebox.showinfo("Info", "No links found on the given URL.")
- return
- # Ensure the links are complete URLs
- complete_links = [urljoin(url, link['href']) for link in links]
- # Clear the ScrolledText before inserting new links
- self.links_text.delete(1.0, tk.END)
- # Insert links into the ScrolledText
- for link in complete_links:
- self.links_text.insert(tk.END, link + '\n')
- def scrape_selected_link(self):
- selected_text = self.links_text.get(tk.SEL_FIRST, tk.SEL_LAST).strip()
- if not selected_text:
- messagebox.showerror("Error", "Please select a link from the text.")
- return
- try:
- response = requests.get(selected_text)
- response.raise_for_status()
- except requests.exceptions.RequestException as e:
- messagebox.showerror("Error", f"Error fetching URL: {e}")
- return
- soup = BeautifulSoup(response.text, 'html.parser')
- # Extract all links from the selected webpage
- links = soup.find_all('a', href=True)
- if not links:
- messagebox.showinfo("Info", "No links found on the selected URL.")
- return
- # Ensure the links are complete URLs
- complete_links = [urljoin(selected_text, link['href']) for link in links]
- # Display results in the ScrolledText widget
- self.result_text.delete(1.0, tk.END)
- for link in complete_links:
- self.result_text.insert(tk.END, link + '\n')
- def save_results(self):
- file_path = "Files-Links.txt"
- with open(file_path, 'a', encoding='utf-8') as file:
- links = self.result_text.get(1.0, tk.END).strip()
- file.write(links + '\n\n') # Add a blank line after each set of links
- messagebox.showinfo("Success", f"Links appended to {file_path}")
- def open_in_vlc(self, event):
- try:
- widget = event.widget
- index = widget.index("@%s,%s" % (event.x, event.y))
- line_start = f"{index.split('.')[0]}.0"
- line_end = f"{index.split('.')[0]}.end"
- selected_text = widget.get(line_start, line_end).strip()
- if not selected_text:
- messagebox.showerror("Error", "Please select a URL.")
- return
- vlc_path = r"C:\Program Files\VideoLAN\VLC\vlc.exe"
- subprocess.Popen([vlc_path, selected_text])
- except Exception as e:
- messagebox.showerror("Error", f"Failed to open VLC: {e}")
- if __name__ == "__main__":
- root = tk.Tk()
- app = LinkScraperGUI(root)
- root.mainloop()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement