Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time
- from googlesearch import search
- import streamlit as st
- # Streamlit UI elements
- st.title("Backlink Index Status Checker")
- st.write("Upload a file containing the url's to check (one per line).")
- # File uploader widget in Streamlit
- uploaded_file = st.file_uploader("Choose a file", type=["txt"])
- if uploaded_file is not None:
- # Read domains from the uploaded file
- domains_to_check = [line.decode('utf-8').strip() for line in uploaded_file.readlines()]
- # Lists to hold indexed and non-indexed URLs
- indexed_urls = []
- not_indexed_urls = []
- # Function to handle Google search with retry logic
- def check_index_status(domain):
- query = f"site:{domain}"
- found = False
- retries = 3 # Maximum number of retries for handling 429 error
- for attempt in range(retries):
- try:
- for _ in search(query, tld="co.in", num=1, stop=1, pause=5): # Increase pause time
- found = True
- break
- return found
- except Exception as e:
- if '429' in str(e):
- st.warning(f"429 error occurred. Retrying in {2 ** attempt} seconds for {domain}...")
- time.sleep(2 ** attempt) # Exponential backoff strategy
- else:
- st.error(f"An error occurred: {e}")
- break
- return found
- # Streamlit progress bar
- progress = st.progress(0)
- total_domains = len(domains_to_check)
- # Iterate over the domains and check their indexing status
- for idx, domain in enumerate(domains_to_check):
- found = check_index_status(domain)
- # Display the result in Streamlit
- if found:
- indexed_urls.append(domain)
- st.write(f"{domain} - Indexed")
- else:
- not_indexed_urls.append(domain)
- st.write(f"{domain} - Not Indexed")
- # Update progress bar
- progress.progress((idx + 1) / total_domains)
- # Convert the indexed and non-indexed URLs to downloadable text
- indexed_urls_data = "\n".join(indexed_urls)
- not_indexed_urls_data = "\n".join(not_indexed_urls)
- # Let the user download the indexed URLs file
- if indexed_urls:
- st.download_button(
- label="Download Indexed URLs",
- data=indexed_urls_data,
- file_name="indexed_urls.txt",
- mime="text/plain"
- )
- # Let the user download the non-indexed URLs file
- if not_indexed_urls:
- st.download_button(
- label="Download Not Indexed URLs",
- data=not_indexed_urls_data,
- file_name="not_indexed_urls.txt",
- mime="text/plain"
- )
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement