Advertisement
ssdnet

Untitled

Feb 29th, 2024
21
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.63 KB | None | 0 0
  1. def ParseGoogle(kw:str, maxresults:int=10)->str:
  2. arr = []
  3. ret = GetPage("https://www.google.com/search?q="+kw)
  4. if ret != 200:
  5. return("")
  6.  
  7. ClickElement_ByXPATH('//*[@id="L2AGLb"]',5,2)
  8.  
  9. scroll_count = 0
  10. max_scrolls = 20
  11. prev_height = -1
  12.  
  13. while scroll_count < max_scrolls:
  14. page_source = driver.page_source
  15. soup = BeautifulSoup(page_source, 'html.parser')
  16. search_results = soup.find_all('div', class_=["tF2Cxc", "VttTV"])
  17. if len(search_results) >= maxresults:
  18. break;
  19.  
  20. driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
  21. time.sleep(1)
  22.  
  23. page_source = driver.page_source
  24. soup = BeautifulSoup(page_source, 'html.parser')
  25. search_results = soup.find_all('div', class_=["tF2Cxc", "VttTV"])
  26. if len(search_results) >= maxresults:
  27. break;
  28. new_height = driver.execute_script("return document.body.scrollHeight")
  29. if new_height == prev_height:
  30. s = ClickElement_ByClassName("RVQdVd",2,0)
  31. if s != "OK":
  32. break
  33. else:
  34. new_height = driver.execute_script("return document.body.scrollHeight")
  35. prev_height = new_height
  36. scroll_count += 1
  37.  
  38. page_source = driver.page_source
  39. soup = BeautifulSoup(page_source, 'html.parser')
  40.  
  41. search_results = soup.find_all('div', class_=["tF2Cxc", "VttTV"])
  42. for result in search_results:
  43. link = result.a['href']
  44. arr.append(link)
  45. if len(arr) >= maxresults:
  46. break
  47. s = '\n'.join(arr)
  48. return(s)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement