Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- import os
- import time
- import random
- import argparse
- from concurrent.futures import ThreadPoolExecutor, as_completed
- from bs4 import BeautifulSoup
- import requests
- from termcolor import colored
- def get_proxies():
- proxies = []
- if not os.path.exists("proxies.txt"):
- url = "https://api.proxyscrape.com/v2/?request=getproxies&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all&limit=5000"
- proxies = requests.get(url).text.split("\n")
- with open("proxies.txt", "w") as f:
- f.write("\n".join(proxies))
- else:
- with open("proxies.txt", "r") as f:
- proxies = f.read().split("\n")
- return proxies
- def test_proxy(proxy, user_agent, verbose):
- test_url = "https://bing.com"
- headers = {"User-Agent": user_agent}
- try:
- proxies = {"http": f"http://{proxy}", "https": f"http://{proxy}"}
- response = requests.get(test_url, headers=headers, proxies=proxies, timeout=3)
- print(colored(f"Scraping good proxies...","blue"))
- if response.status_code == 200:
- print(colored(f"Good proxy found: {proxy}", "green"))
- return True
- except requests.exceptions.ConnectTimeout:
- if verbose:
- print(colored(f"Connection timeout for proxy: {proxy}", "red"))
- except requests.exceptions.ProxyError:
- if verbose:
- print(colored(f"Proxy error for proxy: {proxy}", "red"))
- except requests.exceptions.RequestException as e:
- if verbose:
- print(colored(f"Request exception for proxy: {proxy}, error: {e}", "red"))
- return False
- def filter_working_proxies(proxies, user_agents, verbose):
- working_proxies = []
- user_agent = random.choice(user_agents)
- with ThreadPoolExecutor(max_workers=50) as executor:
- futures_to_proxies = {executor.submit(test_proxy, proxy, user_agent, verbose): proxy for proxy in proxies}
- for future in as_completed(futures_to_proxies):
- if future.result():
- working_proxies.append(futures_to_proxies[future])
- return working_proxies
- def get_user_agents():
- with open("useragents.txt", "r") as f:
- return f.read().split("\n")
- def google_search(query, user_agent, proxy):
- url = f"https://www.google.com/search?q={query}"
- headers = {"User-Agent": user_agent}
- proxies = {"http": f"http://{proxy}", "https": f"http://{proxy}"}
- response = requests.get(url, headers=headers, proxies=proxies, timeout=10)
- soup = BeautifulSoup(response.text, "html.parser")
- return [result["href"] for result in soup.select(".yuRUbf a")]
- def search_dork(dork, proxies, user_agents, verbose, max_retries=3, backoff_factor=1.0):
- print(colored(f"Searching for dork: {dork}", "yellow"))
- def try_search_dork(dork, proxy, user_agent):
- try:
- results = google_search(dork, user_agent, proxy)
- return results
- except requests.exceptions.RequestException as e:
- if verbose:
- print(colored(f"Error with proxy {proxy}: {e}, rotating proxy...", "magenta"))
- return None
- retries = 0
- while retries <= max_retries:
- proxy = random.choice(proxies)
- user_agent = random.choice(user_agents)
- results = try_search_dork(dork, proxy, user_agent)
- if results is not None:
- if results:
- with open(f"results/{dork}_results.txt", "w") as f:
- f.write("\n".join(results[:20]))
- print(colored(f"Saved top 20 results for dork '{dork}'", "green"))
- else:
- print(colored(f"No results found for dork '{dork}'", "red"))
- break
- retries += 1
- time.sleep(backoff_factor * (2 ** (retries - 1)) + random.uniform(1, 5))
- def main():
- parser = argparse.ArgumentParser()
- parser.add_argument("-v", "--verbose", help="Display errors with proxies.", action="store_true")
- args = parser.parse_args()
- dorks = []
- with open("dorks.txt", "r") as f:
- dorks = f.read().split("\n")
- user_agents = get_user_agents()
- proxies = filter_working_proxies(get_proxies(), user_agents, args.verbose)
- if not os.path.exists("results"):
- os.makedirs("results")
- with ThreadPoolExecutor(max_workers=20) as executor:
- futures = {executor.submit(search_dork, dork, proxies, user_agents, args.verbose): dork for dork in dorks}
- for future in as_completed(futures):
- future.result()
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement