Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import tkinter as tk
- from tkinter import ttk
- import requests
- from bs4 import BeautifulSoup
- from urllib.parse import urljoin
- class IndexOfScraperApp:
- def __init__(self, root):
- self.root = root
- self.root.title("Najeeb Shah Khan Index Of URL Scraper By Extension")
- self.url_label = ttk.Label(root, text="Enter Index Of URL:")
- self.url_entry = ttk.Entry(root, width=50)
- self.extension_label = ttk.Label(root, text="Enter File Extension:")
- self.extension_entry = ttk.Entry(root, width=10)
- self.scrape_button = ttk.Button(root, text="Scrape URLs", command=self.scrape_urls)
- self.result_text = tk.Text(root, height=36, width=124, wrap="none", state=tk.DISABLED)
- self.url_label.grid(row=0, column=0, pady=5)
- self.url_entry.grid(row=0, column=1, pady=5)
- self.extension_label.grid(row=0, column=2, pady=5)
- self.extension_entry.grid(row=0, column=3, pady=5)
- self.scrape_button.grid(row=0, column=4, pady=5)
- self.result_text.grid(row=1, column=0, columnspan=5, pady=10)
- def scrape_urls(self):
- url = self.url_entry.get()
- extension = self.extension_entry.get()
- if not url or not extension:
- return
- try:
- response = requests.get(url)
- response.raise_for_status()
- except requests.exceptions.RequestException as e:
- self.display_result(f"Error: {e}")
- return
- soup = BeautifulSoup(response.content, 'html.parser')
- links = [a['href'] for a in soup.find_all('a', href=True) if a['href'].endswith(f'.{extension}')]
- if links:
- absolute_links = [urljoin(url, link) for link in links]
- result_text = "\n".join(absolute_links)
- self.display_result(result_text)
- else:
- self.display_result(f"No {extension} links found in the directory.")
- def display_result(self, text):
- self.result_text.config(state=tk.NORMAL)
- self.result_text.delete(1.0, tk.END)
- self.result_text.insert(tk.END, text)
- self.result_text.config(state=tk.DISABLED)
- if __name__ == "__main__":
- root = tk.Tk()
- app = IndexOfScraperApp(root)
- root.mainloop()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement