Advertisement
shoaib-santo

Backlink Index Status Checker

Oct 17th, 2024
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.74 KB | None | 0 0
  1. import time
  2. from googlesearch import search
  3. import streamlit as st
  4.  
  5. # Streamlit UI elements
  6. st.title("Backlink Index Status Checker")
  7. st.write("Upload a file containing the url's to check (one per line).")
  8.  
  9. # File uploader widget in Streamlit
  10. uploaded_file = st.file_uploader("Choose a file", type=["txt"])
  11.  
  12. if uploaded_file is not None:
  13.     # Read domains from the uploaded file
  14.     domains_to_check = [line.decode('utf-8').strip() for line in uploaded_file.readlines()]
  15.  
  16.     # Lists to hold indexed and non-indexed URLs
  17.     indexed_urls = []
  18.     not_indexed_urls = []
  19.  
  20.     # Function to handle Google search with retry logic
  21.     def check_index_status(domain):
  22.         query = f"site:{domain}"
  23.         found = False
  24.         retries = 3  # Maximum number of retries for handling 429 error
  25.         for attempt in range(retries):
  26.             try:
  27.                 for _ in search(query, tld="co.in", num=1, stop=1, pause=5):  # Increase pause time
  28.                     found = True
  29.                     break
  30.                 return found
  31.             except Exception as e:
  32.                 if '429' in str(e):
  33.                     st.warning(f"429 error occurred. Retrying in {2 ** attempt} seconds for {domain}...")
  34.                     time.sleep(2 ** attempt)  # Exponential backoff strategy
  35.                 else:
  36.                     st.error(f"An error occurred: {e}")
  37.                     break
  38.         return found
  39.  
  40.     # Streamlit progress bar
  41.     progress = st.progress(0)
  42.     total_domains = len(domains_to_check)
  43.  
  44.     # Iterate over the domains and check their indexing status
  45.     for idx, domain in enumerate(domains_to_check):
  46.         found = check_index_status(domain)
  47.  
  48.         # Display the result in Streamlit
  49.         if found:
  50.             indexed_urls.append(domain)
  51.             st.write(f"{domain} - Indexed")
  52.         else:
  53.             not_indexed_urls.append(domain)
  54.             st.write(f"{domain} - Not Indexed")
  55.  
  56.         # Update progress bar
  57.         progress.progress((idx + 1) / total_domains)
  58.  
  59.     # Convert the indexed and non-indexed URLs to downloadable text
  60.     indexed_urls_data = "\n".join(indexed_urls)
  61.     not_indexed_urls_data = "\n".join(not_indexed_urls)
  62.  
  63.     # Let the user download the indexed URLs file
  64.     if indexed_urls:
  65.         st.download_button(
  66.             label="Download Indexed URLs",
  67.             data=indexed_urls_data,
  68.             file_name="indexed_urls.txt",
  69.             mime="text/plain"
  70.         )
  71.  
  72.     # Let the user download the non-indexed URLs file
  73.     if not_indexed_urls:
  74.         st.download_button(
  75.             label="Download Not Indexed URLs",
  76.             data=not_indexed_urls_data,
  77.             file_name="not_indexed_urls.txt",
  78.             mime="text/plain"
  79.         )
  80.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement