Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup
- import requests
- import csv
- # with open('scrape_html.html') as html_file:
- # soup = BeautifulSoup(html_file, 'lxml');
- # print(soup.prettify())
- # for article in soup.find_all('div', class_='article'):
- # headline = article.h2.a.text
- # print(headline)
- # summery = article.p.text
- # print(summery)
- source = requests.get('http://coreyms.com').text
- soup = BeautifulSoup(source, 'lxml');
- # print(soup.prettify())
- csv_file = open('cms_scrape.csv', 'w')
- csv_writer = csv.writer(csv_file);
- csv_writer.writerow(['headline', 'summery', 'video_link'])
- for article in soup.find_all('article'):
- # print(article.prettify())
- headline = article.h2.a.text
- print(headline)
- summery = article.find('div', class_='entry-content').p.text
- print(summery)
- try:
- vid_src = article.find('iframe', class_='youtube-player')['src']
- # print(vid_src)
- vid_id = vid_src.split('/')[4]
- vid_id = vid_id.split('?')[0]
- # print(vid_id)
- youtube_link = f'https://youtube.com/watch?v={vid_id}'
- except Exception as e:
- youtube_link = None
- print(youtube_link)
- print('')
- csv_writer.writerow([headline, summery, youtube_link])
- csv_file.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement