fahadkalil

edd_extracao_texto_web_bs4

Jun 23rd, 2020
258
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.63 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. from urllib.request import Request, urlopen
  3. from io import StringIO
  4.  
  5. ## usando Request e bs4
  6.  
  7. link = Request('https://rduirapuru.com.br/geral/fim-do-impasse-empresa-confirma-execucao-do-projeto-para-o-aeroporto-de-passo-fundo-e-estado-da-prazo-para-documentos/',
  8.                headers={'User-Agent': 'Mozilla/5.0'})
  9.  
  10. pagina = urlopen(link).read().decode('utf-8', 'ignore')
  11.  
  12. soup = BeautifulSoup(pagina, "lxml")
  13.  
  14. paragrafos = soup.find("article").find_all('p')
  15.  
  16. texto_buf = StringIO()
  17. for p in paragrafos:    
  18.     texto_buf.write(p.text + " ")
  19.  
  20. texto = texto_buf.getvalue()
  21.  
  22. print(texto)
Add Comment
Please, Sign In to add comment