Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import time
- import bs4
- import re
- from selenium import webdriver
- from bs4 import BeautifulSoup as soup
- print("=================================================WELCOME TO RESEARCHER'S APP===========================================================================================")
- b1=input("enter text \n")
- y='1'
- while(y != 'q'):
- print("=========================================================================================================================================================================")
- print("1 IEEE")
- print("2 SCIENCE OPEN ")
- print("3 SCIENCE DIRECT")
- print("4 ARXIV")
- print("5 PAPERITY")
- print("6 DOAJ \n")
- y =input("enter choice enter 'q' to exit ")
- r = input("enter the number of abstracts to be displayed")
- if(y=='1'):
- driver = webdriver.Firefox()
- url="https://ieeexplore.ieee.org/search/searchresult.jsp?newsearch=true&queryText="
- b2=b1.replace(" ","%20")
- url=url+b2
- driver.get(url)
- time.sleep(5)
- refs=driver.page_source
- pagesoup=soup(refs,"html.parser")
- containers=pagesoup.findAll("a",{"href":re.compile('/document/.*')})
- i=0
- u="https://ieeexplore.ieee.org"
- for container in containers :
- try:
- if(len(container.text)>20):
- i=i+1
- if(i%2 != 0 and i/2 < int(r)):
- print("=====================================================================================================================================================")
- print("\n")
- print(container.text)
- driver.get(u+ container['href'])
- time.sleep(2)
- refs=driver.page_source
- s=soup(refs,"html.parser")
- c=s.find("div",{"class":"abstract-text row"})
- print(c.div.div.text)
- print("\n")
- print("===================================================================================================================================================")
- except(TypeError,AttributeError):
- print("ABSTRACT IS NOT AVAILABLE FOR THIS ARTICLE")
- driver.close()
- elif(y=='2'):
- driver = webdriver.Firefox()
- url="https://www.scienceopen.com/search#('v'~3_'id'~''_'isExactMatch'~true_'context'~null_'kind'~77_'order'~0_'orderLowestFirst'~false_'query'~'"
- b2=b1.replace(" ","%20")
- b3="'_'filters'~!*)"
- print("\n\n")
- url=url+b2+b3
- driver.get(url)
- time.sleep(2)
- refs=driver.page_source
- pagesoup=soup(refs,"html.parser")
- #containers=pagesoup.findAll("div",{"class":'main-section'})
- containers=pagesoup.findAll("a",{"href":re.compile('/document.*')})
- #print(containers[0].a['href'])
- u="https://www.scienceopen.com"
- for container in containers :
- try :
- if(len(container.text)>20):
- driver.get(u + container['href'])
- refs=driver.page_source
- s=soup(refs,"html.parser")
- t=s.find("title")
- c=s.find("div",{"itemprop":"description"})
- print(t.text + "\n")
- print(c.text)
- except :
- continue
- driver.close()
- elif(y=='3'):
- driver = webdriver.Firefox()
- url="https://www.sciencedirect.com/search?qs="
- b3="&show=25&sortBy=relevance"
- b2=b1.replace(" ","%20")
- print("\n\n")
- url=url+b2+b3
- driver.get(url)
- time.sleep(2)
- refs=driver.page_source
- pagesoup=soup(refs,"html.parser")
- containers=pagesoup.findAll("a",{"href":re.compile('/pii/.*')})
- u="https://www.sciencedirect.com"
- i=0
- for container in containers :
- i=i+1
- if(len(container.text)>20 and i <= int(r)):
- print(container.text)
- driver.get(u + container['href'])
- refs=driver.page_source
- s=soup(refs,"html.parser")
- c=s.find("div",{"id":"abstracts"})
- print(c.text)
- print("\n========================================================================================================================================")
- driver.close()
- elif(y=='4'):
- driver = webdriver.Firefox()
- url='https://arxiv.org/search/?query='
- b2=b1.replace(" ","+")
- b3='&searchtype=all&source=header'
- print("\n\n")
- url=url+b2+b3
- driver.get(url)
- time.sleep(2)
- refs=driver.page_source
- pagesoup=soup(refs,"html.parser")
- containers=pagesoup.findAll("span",{"class":"abstract-full has-text-grey-dark mathjax"})
- i=0
- for container in containers :
- d=container.parent.parent.contents[3].text
- try :
- if(len(container.text)>200):
- print(d)
- print(container.text)
- print("\n")
- except :
- continue
- driver.close()
- elif(y=='5'):
- driver = webdriver.Firefox()
- url="https://paperity.org/search/?q="
- b2=b1.replace(" ","+")
- print("\n\n")
- url=url+b2
- driver.get(url)
- time.sleep(2)
- refs=driver.page_source
- pagesoup=soup(refs,"html.parser")
- #containers=pagesoup.findAll("div",{"class":'result-item-content'})
- containers=pagesoup.findAll("a",{"href":re.compile('/p/.*')})
- #print(containers)
- i=0
- u="https://paperity.org"
- for container in containers :
- try :
- if(len(container.text)>20):
- i=i+1
- # print(i)
- if(i%2!=0):
- print(container.text)
- driver.get(u + container['href'])
- refs=driver.page_source
- s=soup(refs,"html.parser")
- c=s.find("blockquote")
- print(c.text)
- print("\n")
- except :
- continue
- driver.close()
- elif(y=='6'):
- driver = webdriver.Firefox()
- url='https://doaj.org/search?source={"query"%3A{"query_string"%3A{"query"%3A"'
- b2=b1.replace(" ","%20")
- b3='"%2C"default_operator"%3A"AND"}}%2C"from"%3A0%2C"size"%3A10}'
- print("\n\n")
- url=url+b2+b3
- driver.get(url)
- time.sleep(2)
- refs=driver.page_source
- pagesoup=soup(refs,"html.parser")
- #containers=pagesoup.findAll("div",{"class":'main-section'})
- containers=pagesoup.findAll("div",{"class":"abstract_text"})
- #print(containers[0].a['href'])
- #print("reached contA")
- c2=pagesoup.findAll("a",{"href":re.compile('/article.*')})
- i=0
- #u="https://www.scienceopen.com"
- for container in containers :
- # print("for loop")
- try :
- if(len(container.text)>20):
- # print(i)
- #if(i%2!=0):
- # driver.get(u + container['href'])
- # refs=driver.page_source
- # s=soup(refs,"html.parser")
- # c=s.find("div",{"itemprop":"description"})
- if("Full Text" in c2[i].text):
- i=i+1
- print(c2[i].text+"\n"+container.text+"\n")
- i=i+1
- #print(c2[i].text+"\n")
- except :
- continue
- driver.close()
Add Comment
Please, Sign In to add comment