joelnazarene

CORE

Mar 31st, 2019
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.93 KB | None | 0 0
  1. import time
  2. import bs4
  3. import re
  4. from selenium import webdriver
  5. from bs4 import BeautifulSoup as soup
  6. driver = webdriver.Firefox()
  7. url="https://core.ac.uk/search?q="
  8. #b="web%20scrape"
  9. b1=input("enter text \n")
  10. b2=b1.replace(" ","%20")
  11. #print(b2)
  12. print("\n\n")
  13. url=url+b2
  14. driver.get(url)
  15. time.sleep(2)
  16. refs=driver.page_source
  17. pagesoup=soup(refs,"html.parser")
  18. #containers=pagesoup.findAll("div",{"class":'main-section'})
  19. containers=pagesoup.findAll("a",{"href":re.compile('/display/.*')})
  20. #print(containers[0].a['href'])
  21.  
  22. i=0
  23. u="https://core.ac.uk"
  24. for container in containers :
  25.  
  26.     if(len(container.text)>20):
  27.         i=i+1
  28.        
  29.        # print(i)
  30.         if(i%2!=0):
  31.        
  32.            driver.get(u + container['href'])
  33.            refs=driver.page_source
  34.            s=soup(refs,"html.parser")
  35.            c=s.find("p",{"class":"abstract"})
  36.            print(c.text)
  37.         print("\n")
  38.  
  39. driver.close()
Add Comment
Please, Sign In to add comment