Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import time
- import bs4
- import re
- import sys
- from selenium import webdriver
- from bs4 import BeautifulSoup as soup
- b1=sys.argv[1]
- y=sys.argv[2]
- r=sys.argv[3]
- if(y != '8'):
- print('<html> <head><link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css"> </head><body ><br><div class="container"><button class="btn btn-info"><a style="color:white" href="gui.html">< Back</a></button><div class="jumbotron"><h1>RESULTS FOR '+b1+'</h1><h2>ABSTRACTS</h2><p> </p></div>')
- if(y=='1' or y=='7'):
- print("<img src='https://brocku.ca/library/wp-content/uploads/sites/51/Untitled-design-7.png?x63146' height='100'> <br>")
- driver = webdriver.Firefox()
- url="https://ieeexplore.ieee.org/search/searchresult.jsp?newsearch=true&queryText="
- b2=b1.replace(" ","%20")
- url=url+b2
- driver.get(url)
- time.sleep(5)
- refs=driver.page_source
- pagesoup=soup(refs,"html.parser")
- containers=pagesoup.findAll("a",{"href":re.compile('/document/.*')})
- i=0
- urlist=[]
- u="https://ieeexplore.ieee.org"
- for container in containers :
- try:
- if(len(container.text)>20):
- i=i+1
- if(i%2 != 0 and i/2 < int(r)):
- print("<br>=====================================================IEEERESULTS================================================================================================<br>")
- print(container.text)
- driver.get(u+ container['href'])
- time.sleep(2)
- refs=driver.page_source
- urlist.append(u+ container['href'])
- s=soup(refs,"html.parser")
- c=s.find("div",{"class":"abstract-text row"})
- print(c.div.div.text)
- print("<a href='"+u+container['href']+"'>(ctrl +click here) </a>")
- print("\n")
- print("<br>===================================================================================================================================================<br>")
- except(TypeError,AttributeError):
- print("ABSTRACT IS NOT AVAILABLE FOR THIS ARTICLE")
- #print(urlist)
- # x=input("enter number \n")
- #b=int(x)-1
- #print(urlist[b])
- driver.close()
- if(y=='2' or y=='7'):
- driver = webdriver.Firefox()
- url="https://www.scienceopen.com/search#('v'~3_'id'~''_'isExactMatch'~true_'context'~null_'kind'~77_'order'~0_'orderLowestFirst'~false_'query'~'"
- b2=b1.replace(" ","%20")
- b3="'_'filters'~!*)"
- print("<img src='http://blog.scienceopen.com/wp-content/uploads/2015/12/SOlogowbox.png' height='100'> <br>")
- print("\n\n")
- url=url+b2+b3
- driver.get(url)
- time.sleep(2)
- refs=driver.page_source
- pagesoup=soup(refs,"html.parser")
- #containers=pagesoup.findAll("div",{"class":'main-section'})
- containers=pagesoup.findAll("a",{"href":re.compile('/document.*')})
- #print(containers[0].a['href'])
- i=0
- u="https://www.scienceopen.com"
- urlist=[]
- for container in containers :
- try :
- if(len(container.text)>20 and i < int(r)):
- i=i+1
- driver.get(u + container['href'])
- urlist.append(u+ container['href'])
- print("<a href='"+u+container['href']+"'>(ctrl +click here )</a>")
- refs=driver.page_source
- time.sleep(2)
- s=soup(refs,"html.parser")
- t=s.find("title")
- print(container)
- c=s.find("div",{"itemprop":"description"})
- print("<br>=======================================================================================================================================================================================\n"+t.text + "\n========================================================SCIENCEOPEN======================================================================================<br>")
- print(c.text + "\n=========================================================SCIENCEOPEN==========================================================================================<br>")
- except :
- continue
- #x=input("enter number \n")
- #b=int(x)-1
- #print(urlist[b])
- driver.close()
- if(y=='3'or y=='7'):
- driver = webdriver.Firefox()
- url="https://www.sciencedirect.com/search?qs="
- b3="&show=25&sortBy=relevance"
- print("<img src='https://www.thesisscientist.com/images/blog/ScienceDirect-Thesisscientist.png' height='100'><br>")
- b2=b1.replace(" ","%20")
- print("\n\n")
- url=url+b2+b3
- driver.get(url)
- time.sleep(2)
- refs=driver.page_source
- pagesoup=soup(refs,"html.parser")
- urlist=[]
- containers=pagesoup.findAll("a",{"href":re.compile('/pii/.*')})
- u="https://www.sciencedirect.com"
- i=0
- for container in containers :
- i=i+1
- if(len(container.text)>20 and i <= int(r)):
- print("<br>======================================================================================================================================================================\n"+container.text+"<br>")
- driver.get(u + container['href'])
- refs=driver.page_source
- urlist.append(u+ container['href'])
- s=soup(refs,"html.parser")
- c=s.find("div",{"id":"abstracts"})
- print("<a href='"+u+container['href']+"'>(ctrl +click here )</a>")
- print(c.text)
- print("\n<br>==================================================SCIENCEDIRECT======================================================================================<br>")
- # x=input("enter number \n")
- #b=int(x)-1
- # print(urlist[b])
- driver.close()
- if(y=='4' or y=='7'):
- driver = webdriver.Firefox()
- url='https://arxiv.org/search/?query='
- print("<img src='https://upload.wikimedia.org/wikipedia/commons/thumb/a/a8/ArXiv_web.svg/1280px-ArXiv_web.svg.png' height='100'><br>")
- b2=b1.replace(" ","+")
- b3='&searchtype=all&source=header'
- print("\n\n")
- url=url+b2+b3
- driver.get(url)
- time.sleep(2)
- refs=driver.page_source
- pagesoup=soup(refs,"html.parser")
- containers=pagesoup.findAll("span",{"class":"abstract-full has-text-grey-dark mathjax"})
- urlist=[]
- i=0
- u="https://arxiv.org/abs/"
- for container in containers :
- d=container.parent.parent.contents[3].text
- e=container.parent.parent.a.text
- try :
- if(len(container.text)>200 and i <= int(r)):
- print(d+"\n<br>====================================================================================================================================================================================================\n")
- g=e[6:]
- urlist.append(u+g)
- print("<a href='"+u+g+"'>(ctrl+click here) </a>")
- print(container.text +"\n====================================================ARXIV=================================================================================================================<br>")
- print("\n")
- i=i+1
- except :
- continue
- #print(urlist)
- #x=input("enter number \n")
- #b=int(x)-1
- #print(urlist[b])
- driver.close()
- if(y=='5' or y=='7'):
- driver = webdriver.Firefox()
- url="https://paperity.org/search/?q="
- print("<img src='http://paperity.org/static/img/logo/wide.png'height='100'><br>")
- b2=b1.replace(" ","+")
- print("\n\n")
- url=url+b2
- driver.get(url)
- time.sleep(2)
- refs=driver.page_source
- pagesoup=soup(refs,"html.parser")
- #containers=pagesoup.findAll("div",{"class":'result-item-content'})
- containers=pagesoup.findAll("a",{"href":re.compile('/p/.*')})
- #print(containers)
- urlist=[]
- i=0
- u="https://paperity.org"
- for container in containers :
- try :
- if(len(container.text)>20):
- i=i+1
- # print(i)
- if(i%2!=0 and i <= int(r)):
- print("<br>========================================================================================================================================================================="+container.text)
- driver.get(u + container['href'])
- refs=driver.page_source
- s=soup(refs,"html.parser")
- c=s.find("blockquote")
- urlist.append(u+ container['href'])
- print("<a href='"+u+ container['href']+"'>(ctrl+click here) </a>")
- print(c.text +"\n<br>================================================PAPERITY================================================================================================================<br>")
- except :
- continue
- #print(urlist)
- #x=input("enter number \n")
- #b=int(x)-1
- #print(urlist[b])
- driver.close()
- if(y=='6' or y=='7'):
- driver = webdriver.Firefox()
- url='https://doaj.org/search?source={"query"%3A{"query_string"%3A{"query"%3A"'
- b2=b1.replace(" ","%20")
- b3='"%2C"default_operator"%3A"AND"}}%2C"from"%3A0%2C"size"%3A10}'
- print("<img src='https://doaj.org/static/doaj/images/logo_square.jpg' height='100'><br>")
- print("\n\n")
- url=url+b2+b3
- driver.get(url)
- time.sleep(2)
- refs=driver.page_source
- pagesoup=soup(refs,"html.parser")
- #containers=pagesoup.findAll("div",{"class":'main-section'})
- containers=pagesoup.findAll("div",{"class":"abstract_text"})
- #print(containers[0].a['href'])
- #print("reached contA")
- c2=pagesoup.findAll("a",{"href":re.compile('/article.*')})
- i=0
- urlist=[]
- u="https://doaj.org"
- for container in containers :
- # print("for loop")
- try :
- if(len(container.text)>20 and i <= int(r)):
- # print(i)
- #if(i%2!=0):
- # driver.get(u + container['href'])
- # refs=driver.page_source
- # s=soup(refs,"html.parser")
- # c=s.find("div",{"itemprop":"description"})
- if("Full Text" in c2[i].text):
- i=i+1
- print("<br>=================================================================================================================================================<BR>"+c2[i].text+"\n=====================================================DOAJ==========================================================================================<BR>"+container.text+"\n==============================================================================================================================================================================<br>")
- i=i+1
- urlist.append(u+ c2[i]['href'])
- #print(c2[i].text+"\n")
- print("<a href='"+u+ c2[i]['href']+"'>(ctrl+click here )</a>")
- except :
- continue
- driver.close()
- #print(urlist)
- #x=input("enter number \n")
- # b=int(x)-1
- #print(urlist[b])
- if(y == '8'):
- k="https://www.wolframalpha.com/input/?i="
- b2=b1.replace(" ","+")
- url=k+b2
- print("<a href='"+url+"'>(ctrl+click here )</a>")
- if(y == '9'):
- myurl="https://search.yahoo.com/yhs/search?hspart=ddc&hsimp=yhs-linuxmint&type=__alt__ddc_linuxmint_com&p="
- #a="jesus"
- b=sys.argv[1]
- myurl=myurl+b
- pagehtml = requests.get(myurl)
- pagesoup=soup(pagehtml.content,"html.parser")
- containers=pagesoup.findAll("h3",{"class":"title"})
- for container in containers :
- try:
- if len(container.a['href']) < 80 :
- # print(container.a['href'])
- # print("<a href='"+u+ c2[i]['href']+"'>(ctrl+click here )</a>")
- print("<a href='"+container.a['href']+"'>(ctrl+click here )</a>")
- #pagehtml = requests.get(container.a['href'])
- #pagesoup=soup(pagehtml.content,"html.parser")
- #containers=pagesoup.find_all('body')
- #c=containers[0].get_text()
- #c=containers[0]
- #print(c[:1000])
- c=container.parent.parent.get_text()
- print(c)
- print('<br><br>')
- except :
- continue
- if(y != '8'):
- print('<button class="btn btn-info"><a style="color:white" href="gui.html">< Back</a></button></BODY></html>')
Add Comment
Please, Sign In to add comment