Advertisement
shoaib-santo

Google Index Checker

Oct 18th, 2024
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.64 KB | None | 0 0
  1. import csv
  2. import requests
  3. from bs4 import BeautifulSoup as bs
  4. import time
  5. import streamlit as st
  6. import pandas as pd
  7.  
  8. # Function to check index status of a domain
  9. def check_index_status(url):
  10.     base = f'https://www.google.com/search?q=site%3A{url}'
  11.     headers = {
  12.         'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36'
  13.     }
  14.  
  15.     # Create a session and make a request
  16.     s = requests.session()
  17.     r = s.get(base, headers=headers)
  18.  
  19.     # Parse the page content
  20.     soup = bs(r.content, 'html.parser')
  21.  
  22.     # Try to find the element with id='result-stats'
  23.     result_stats = soup.find('div', attrs={'id': 'result-stats'})
  24.  
  25.     if result_stats:
  26.         # Extract the number of results from the text if available
  27.         result_text = result_stats.text.strip().split(' ')[1]
  28.         result = int(result_text.replace(',', ''))  # Remove commas and convert to an integer
  29.  
  30.         return result > 0
  31.     else:
  32.         return False
  33.  
  34. # Streamlit app setup
  35. st.title("Google Index Checker")
  36.  
  37. # File uploader to upload the text file containing URLs
  38. uploaded_file = st.file_uploader("Choose a file", type="txt")
  39.  
  40. # If the file is uploaded
  41. if uploaded_file is not None:
  42.     # Decode the uploaded file to get the URLs as a list of strings
  43.     content = uploaded_file.getvalue().decode("utf-8")
  44.     domains_to_check = [line.strip() for line in content.splitlines()]
  45.  
  46.     # Create a dataframe to store the results
  47.     results = []
  48.    
  49.     # Progress bar
  50.     progress_bar = st.progress(0)
  51.     status_text = st.empty()
  52.  
  53.     # Prepare CSV file to save results
  54.     output_file = "index_status.csv"
  55.    
  56.     # Iterate over the domains and check their indexing status
  57.     for i, domain in enumerate(domains_to_check):
  58.         found = check_index_status(domain)
  59.  
  60.         # Display the result
  61.         index_status = "Indexed" if found else "Not Indexed"
  62.         results.append({"urls": domain, "index_status": index_status})
  63.  
  64.         # Update progress bar and status
  65.         progress_bar.progress((i + 1) / len(domains_to_check))
  66.         status_text.text(f"Checking: {domain} - {index_status}")
  67.        
  68.         # Add a delay between requests
  69.         time.sleep(2)
  70.  
  71.     # Create a dataframe from the results
  72.     df = pd.DataFrame(results)
  73.  
  74.     # Display the dataframe
  75.     st.write("Results:", df)
  76.  
  77.     # Convert dataframe to CSV
  78.     csv = df.to_csv(index=False).encode('utf-8')
  79.  
  80.     # Download button for the CSV file
  81.     st.download_button(
  82.         label="Download CSV",
  83.         data=csv,
  84.         file_name=output_file,
  85.         mime='text/csv',
  86.     )
  87.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement