Advertisement
apl-mhd

Python Web scraping

Mar 19th, 2017
237
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.68 KB | None | 0 0
  1. #web scraping using beautiful soup
  2. #author : Apel Mahmud
  3. import  requests
  4.  
  5. from urllib.request import urlopen
  6.  
  7. webAdd = 'https://www.google.com/search?q=data+scraping+blog&client=ubuntu&espv=2&source=lnms&tbm=isch&sa=X&ved=0ahUKEwj7ztjtleLSAhWJtY8KHdzUAsYQ_AUICCgD&biw=1088&bih=733'
  8.  
  9. dailyStar = requests.get(webAdd)
  10. from bs4 import  BeautifulSoup
  11.  
  12. parsedHtml = BeautifulSoup(dailyStar.text, 'lxml')
  13.  
  14. headLine = parsedHtml.find_all('a')
  15.  
  16. #findAll('table', {'class':'theclass'} ):
  17.  
  18.  
  19. link = parsedHtml.find_all('a', {'class':'fl'})
  20.  
  21. print(len(link))
  22.  
  23. for i in range(len(link)):
  24.     if link[i].parent.name == 'td':
  25.         print('www.google.com'+link[i].get("href"))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement