Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def ParseGoogle(kw:str, maxresults:int=10)->str:
- arr = []
- ret = GetPage("https://www.google.com/search?q="+kw)
- if ret != 200:
- return("")
- ClickElement_ByXPATH('//*[@id="L2AGLb"]',5,2)
- scroll_count = 0
- max_scrolls = 20
- prev_height = -1
- while scroll_count < max_scrolls:
- page_source = driver.page_source
- soup = BeautifulSoup(page_source, 'html.parser')
- search_results = soup.find_all('div', class_=["tF2Cxc", "VttTV"])
- if len(search_results) >= maxresults:
- break;
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
- time.sleep(1)
- page_source = driver.page_source
- soup = BeautifulSoup(page_source, 'html.parser')
- search_results = soup.find_all('div', class_=["tF2Cxc", "VttTV"])
- if len(search_results) >= maxresults:
- break;
- new_height = driver.execute_script("return document.body.scrollHeight")
- if new_height == prev_height:
- s = ClickElement_ByClassName("RVQdVd",2,0)
- if s != "OK":
- break
- else:
- new_height = driver.execute_script("return document.body.scrollHeight")
- prev_height = new_height
- scroll_count += 1
- page_source = driver.page_source
- soup = BeautifulSoup(page_source, 'html.parser')
- search_results = soup.find_all('div', class_=["tF2Cxc", "VttTV"])
- for result in search_results:
- link = result.a['href']
- arr.append(link)
- if len(arr) >= maxresults:
- break
- s = '\n'.join(arr)
- return(s)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement