Advertisement
shoaib-santo

Google Bulk Index Checker

Oct 31st, 2024
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.75 KB | None | 0 0
  1. import csv
  2. import time
  3. import requests
  4.  
  5. # Load the domains to check from a text file
  6. with open("urls.txt", "r") as file:
  7.     domains_to_check = [line.strip() for line in file.readlines()]
  8.  
  9. # Prepare CSV file to save results
  10. output_file = "index_status.csv"
  11. with open(output_file, mode="w", newline="") as csvfile:
  12.     fieldnames = ["urls", "index_status"]
  13.     writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
  14.     writer.writeheader()
  15.  
  16.     # Function to check if a domain is indexed on Google
  17.     def check_index_status(domain):
  18.         query = f"site:{domain}"
  19.         url = f"https://www.google.com/search?q={query}"
  20.         headers = {
  21.             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36"
  22.         }
  23.  
  24.         try:
  25.             response = requests.get(url, headers=headers)
  26.             response.raise_for_status()
  27.             # Check if "did not match any documents" is in the response, indicating not indexed
  28.             if "did not match any documents" in response.text:
  29.                 return False
  30.             return True
  31.         except requests.exceptions.RequestException as e:
  32.             print(f"Error checking {domain}: {e}")
  33.             return False
  34.  
  35.     # Iterate over each domain and check indexing status
  36.     for domain in domains_to_check:
  37.         is_indexed = check_index_status(domain)
  38.         index_status = "Indexed" if is_indexed else "Not Indexed"
  39.         print(f"{domain} - {index_status}")
  40.  
  41.         # Write the result to CSV
  42.         writer.writerow({"urls": domain, "index_status": index_status})
  43.  
  44.         # Add a delay to avoid rate-limiting by Google
  45.         time.sleep(5)
  46.  
  47. print(f"Indexing results saved to {output_file}")
  48.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement