Advertisement
arfin97

Web Scraping to CSV.py

Nov 14th, 2018
148
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.19 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import requests
  3. import csv
  4.  
  5. # with open('scrape_html.html') as html_file:
  6. #   soup = BeautifulSoup(html_file, 'lxml');
  7.  
  8. # print(soup.prettify())
  9.  
  10. # for article in soup.find_all('div', class_='article'):
  11. #   headline = article.h2.a.text
  12. #   print(headline)
  13.    
  14. #   summery = article.p.text
  15. #   print(summery)
  16.  
  17. source = requests.get('http://coreyms.com').text
  18. soup = BeautifulSoup(source, 'lxml');
  19.  
  20. # print(soup.prettify())
  21.  
  22. csv_file = open('cms_scrape.csv', 'w')
  23. csv_writer = csv.writer(csv_file);
  24. csv_writer.writerow(['headline', 'summery', 'video_link'])
  25.  
  26.  
  27.  
  28. for article in soup.find_all('article'):
  29.     # print(article.prettify())
  30.  
  31.     headline = article.h2.a.text
  32.     print(headline)
  33.  
  34.     summery = article.find('div', class_='entry-content').p.text
  35.     print(summery)
  36.  
  37.     try:
  38.         vid_src = article.find('iframe', class_='youtube-player')['src']
  39.         # print(vid_src)
  40.         vid_id = vid_src.split('/')[4]
  41.         vid_id = vid_id.split('?')[0]
  42.         # print(vid_id)
  43.         youtube_link = f'https://youtube.com/watch?v={vid_id}'
  44.     except Exception as e:
  45.         youtube_link = None
  46.  
  47.     print(youtube_link)
  48.     print('')
  49.  
  50.     csv_writer.writerow([headline, summery, youtube_link])
  51.  
  52. csv_file.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement