joelnazarene

py

Mar 31st, 2019
112
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.19 KB | None | 0 0
  1. import time
  2. import bs4
  3. import re
  4. from selenium import webdriver
  5. from bs4 import BeautifulSoup as soup
  6. driver = webdriver.Firefox()
  7. url="https://paperity.org/search/?q="
  8. #b3="&show=25&sortBy=relevance"
  9.  
  10. b1=input("enter text \n")
  11. b2=b1.replace(" ","+")
  12. #print(b2)
  13. print("\n\n")
  14. url=url+b2
  15. driver.get(url)
  16.  
  17. time.sleep(2)
  18.  
  19. refs=driver.page_source
  20.  
  21. pagesoup=soup(refs,"html.parser")
  22. #containers=pagesoup.findAll("div",{"class":'result-item-content'})
  23. containers=pagesoup.findAll("a",{"href":re.compile('/p/.*')})
  24. #print(containers)
  25.  
  26. i=0
  27. u="https://paperity.org"
  28. for container in containers :
  29.   try :
  30.     if(len(container.text)>20):
  31.         i=i+1
  32.        
  33.        # print(i)
  34.         if(i%2!=0):
  35.        
  36.            driver.get(u + container['href'])
  37.            refs=driver.page_source
  38.            s=soup(refs,"html.parser")
  39.            c=s.find("blockquote")
  40.            print(c.text)
  41.         print("\n")
  42.   except :
  43.  
  44.  
  45.         continue
  46. driver.close()
  47.  
  48. ===========================================================================================
  49.  
  50. import time
  51. import bs4
  52. import re
  53. from selenium import webdriver
  54. from bs4 import BeautifulSoup as soup
  55. driver = webdriver.Firefox()
  56. url="https://dblp.org/search?q="
  57. #b3="&show=25&sortBy=relevance"
  58.  
  59. b1=input("enter text \n")
  60. b2=b1.replace(" ","+")
  61. #print(b2)
  62. print("\n\n")
  63. url=url+b2
  64. driver.get(url)
  65. '''
  66. time.sleep(2)
  67.  
  68. refs=driver.page_source
  69.  
  70. pagesoup=soup(refs,"html.parser")
  71. #containers=pagesoup.findAll("div",{"class":'result-item-content'})
  72. containers=pagesoup.findAll("a",{"href":re.compile('/pii/.*')})
  73. #print(containers)
  74.  
  75. i=0
  76. u="https://www.sciencedirect.com"
  77. for container in containers :
  78.  
  79.    if(len(container.text)>20):
  80.        i=i+1
  81.      
  82.       # print(i)
  83.        if(i%2!=0):
  84.        
  85.           driver.get(u + container['href'])
  86.           refs=driver.page_source
  87.           s=soup(refs,"html.parser")
  88.           c=s.find("div",{"id":"abstracts"})
  89.           print(c.text)
  90.        print("\n")
  91.  
  92. driver.close()
  93. '''
  94.  
  95. ================================================================
  96.  
  97. import time
  98. import bs4
  99. import re
  100. from selenium import webdriver
  101. from bs4 import BeautifulSoup as soup
  102. driver = webdriver.Firefox()
  103. url='https://arxiv.org/search/?query='
  104.  
  105. b1=input("enter text \n")
  106. b2=b1.replace(" ","+")
  107. b3='&searchtype=all&source=header'
  108. #print(b2)
  109. print("\n\n")
  110. url=url+b2+b3
  111. driver.get(url)
  112. time.sleep(2)
  113. refs=driver.page_source
  114. pagesoup=soup(refs,"html.parser")
  115. #containers=pagesoup.findAll("div",{"class":'main-section'})
  116. containers=pagesoup.findAll("span",{"class":"abstract-full has-text-grey-dark mathjax"})
  117. #print(containers[0].a['href'])
  118. #print("reached contA")
  119. i=0
  120. #@print(containers)
  121. #u="https://www.scienceopen.com"
  122. for container in containers :
  123.  # print("for loop")
  124.   try :
  125.    if(len(container.text)>200):
  126.    #     i=i+1
  127.        
  128.        # print(i)
  129.         #if(i%2!=0):
  130.        
  131.         #   driver.get(u + container['href'])
  132.          #  refs=driver.page_source
  133.         #   s=soup(refs,"html.parser")
  134.           # c=s.find("div",{"itemprop":"description"})
  135.         print(container.text)
  136.         print("\n")
  137.   except :
  138.         continue
  139. driver.close()
Add Comment
Please, Sign In to add comment