Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import bs4
- from urllib import urlopen as uReq
- from bs4 import BeautifulSoup as soup
- myurl="https://ieeexplore.ieee.org/search/searchresult.jsp?newsearch=true&queryText=web%20scraping"
- a="jesus"
- #b=raw_input('enter the keyword \n\n')
- #myurl=myurl+b
- uClient=uReq(myurl)
- pagehtml=uClient.read()
- uClient.close()
- pagesoup=soup(pagehtml,"html.parser")
- #containers=pagesoup.findAll("div",{"class":"main-section"})
- #
- #print(pagesoup.body)
- containers=pagesoup.find_all('div')
- #print(containers[21])
- i=0
- for container in containers :
- i=i+1
- print("\n\nthe container count is \n\n")
- print(i)
- print(container)
- =============================================================================================================
- import sys
- from PyQt4.QtGui import QApplication
- from PyQt4.QtCore import QUrl
- from PyQt4.QtWebKit import QWebPage
- import bs4 as bs
- import urllib
- class Client(QWebPage):
- def __init__(self,url):
- self.app = QApplication(sys.argv)
- QWebPage.__init__(self)
- self.loadFinished.connect(self.on_page_load)
- self.mainFrame().load(QUrl(url))
- self.app.exec_()
- def on_page_load(self):
- self.app.quit()
- url='https://pythonprogramming.net/parsememcparseface/'
- client_response = Client(url)
- source = client_response.mainFrame().toHtml()
- soup = bs.BeautifulSoup(source,'lxml')
- js_test = soup.find('p', class_='jstest')
- print(js_test.text)
- =============================================================================
Add Comment
Please, Sign In to add comment