Backlink Index Status Checker

import time
from googlesearch import search
import streamlit as st

# Streamlit UI elements
st.title("Backlink Index Status Checker")
st.write("Upload a file containing the url's to check (one per line).")

# File uploader widget in Streamlit
uploaded_file = st.file_uploader("Choose a file", type=["txt"])

if uploaded_file is not None:
    # Read domains from the uploaded file
    domains_to_check = [line.decode('utf-8').strip() for line in uploaded_file.readlines()]

    # Lists to hold indexed and non-indexed URLs
    indexed_urls = []
    not_indexed_urls = []

    # Function to handle Google search with retry logic
    def check_index_status(domain):
        query = f"site:{domain}"
        found = False
        retries = 3  # Maximum number of retries for handling 429 error
        for attempt in range(retries):
            try:
                for _ in search(query, tld="co.in", num=1, stop=1, pause=5):  # Increase pause time
                    found = True
                    break
                return found
            except Exception as e:
                if '429' in str(e):
                    st.warning(f"429 error occurred. Retrying in {2 ** attempt} seconds for {domain}...")
                    time.sleep(2 ** attempt)  # Exponential backoff strategy
                else:
                    st.error(f"An error occurred: {e}")
                    break
        return found

    # Streamlit progress bar
    progress = st.progress(0)
    total_domains = len(domains_to_check)

    # Iterate over the domains and check their indexing status
    for idx, domain in enumerate(domains_to_check):
        found = check_index_status(domain)

        # Display the result in Streamlit
        if found:
            indexed_urls.append(domain)
            st.write(f"{domain} - Indexed")
        else:
            not_indexed_urls.append(domain)
            st.write(f"{domain} - Not Indexed")

        # Update progress bar
        progress.progress((idx + 1) / total_domains)

    # Convert the indexed and non-indexed URLs to downloadable text
    indexed_urls_data = "\n".join(indexed_urls)
    not_indexed_urls_data = "\n".join(not_indexed_urls)

    # Let the user download the indexed URLs file
    if indexed_urls:
        st.download_button(
            label="Download Indexed URLs",
            data=indexed_urls_data,
            file_name="indexed_urls.txt",
            mime="text/plain"
        )

    # Let the user download the non-indexed URLs file
    if not_indexed_urls:
        st.download_button(
            label="Download Not Indexed URLs",
            data=not_indexed_urls_data,
            file_name="not_indexed_urls.txt",
            mime="text/plain"
        )