Advertisement
tommyosheawebdesign

Scrape Google

Jun 8th, 2022
743
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.98 KB | None | 0 0
  1. ######
  2. #
  3. #   Script to scrape Google Search results
  4. #
  5. #   Created by https://twitter.com/shanejones
  6. #   Go and give him a follow being so he gave you this script for free
  7. #
  8. ######
  9. import re
  10. import time
  11. import requests
  12. from bs4 import BeautifulSoup
  13.  
  14. sleep = 0.025
  15.  
  16. search = 'google.com'
  17.  
  18. outputFile = open("output.csv", "a")
  19. outputFile.write("\n" + 'Keyword, Results, Quoted, All In Title')
  20.  
  21. with open('keywords.txt') as f:
  22.     for line in f:
  23.  
  24.         headers = {
  25.             "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/66.0"}
  26.  
  27.         # search a
  28.         standardSearch = requests.get(
  29.             'https://www.' + search + '/search?q=' + line.replace(" ", "+"),
  30.             headers=headers
  31.         )
  32.         time.sleep(sleep)
  33.         quotedSearch = requests.get(
  34.             'https://www.' + search + '/search?q="' + line.replace(" ", "+") + '"', headers=headers
  35.         )
  36.         time.sleep(sleep)
  37.         allInSearch = requests.get(
  38.             'https://www.' + search + '/search?q=allintitle%3A"' +
  39.             line.replace(" ", "+") + '"',
  40.             headers=headers
  41.         )
  42.         time.sleep(sleep)
  43.  
  44.         standardSearchSoup = BeautifulSoup(
  45.             standardSearch.content, 'html.parser')
  46.         quotedSearchSoup = BeautifulSoup(
  47.             quotedSearch.content, 'html.parser')
  48.         allInSearchSoup = BeautifulSoup(allInSearch.content, 'html.parser')
  49.  
  50.         standardSearchResultsContainer = standardSearchSoup.find(
  51.             id='result-stats')
  52.         quotedSearchResultsContainer = quotedSearchSoup.find(
  53.             id='result-stats')
  54.         allInSearchResultsContainer = allInSearchSoup.find(
  55.             id='result-stats')
  56.  
  57.         if(standardSearchResultsContainer):
  58.             standardSearchResultsText = standardSearchResultsContainer.get_text(
  59.                 strip=True)
  60.             standardSearchResults = re.findall(
  61.                 '([0-9,]+)', standardSearchResultsText)
  62.         else:
  63.             standardSearchResults = '0'
  64.  
  65.         if(quotedSearchResultsContainer):
  66.             quotedSearchResultsText = quotedSearchResultsContainer.get_text(
  67.                 strip=True)
  68.             quotedSearchResults = re.findall(
  69.                 '([0-9,]+)', quotedSearchResultsText)
  70.         else:
  71.             quotedSearchResults = '0'
  72.  
  73.         if(allInSearchResultsContainer):
  74.             allInSearchResultsText = allInSearchResultsContainer.get_text(
  75.                 strip=True)
  76.             allInSearchResults = re.findall(
  77.                 '([0-9,]+)', allInSearchResultsText)
  78.         else:
  79.             allInSearchResults = '0'
  80.  
  81.         print(line.rstrip() + " - complete")
  82.  
  83.         outputFile.write("\n" + line.rstrip() + ", " +
  84.                         standardSearchResults[0].replace(",", "") + ", " +
  85.                         quotedSearchResults[0].replace(",", "") + ", " +
  86.                         allInSearchResults[0].replace(",", "")
  87.                         )
  88.  
  89. outputFile.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement