Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/data/data/com.termux/files/usr/bin/python3
- # -*- coding: utf-8 -*-
- import requests
- from bs4 import BeautifulSoup
- from gtts import gTTS
- import re
- with open(input("Enter your The Hindu Articles urls txt file name:")) as urls:
- for URL in urls:
- EX1 = ".txt"
- EX2 = ".mp3"
- Fn = "New"
- page = requests.get(URL)
- soup = BeautifulSoup(page.content, 'html.parser')
- #body = soup.find('div', {'class' : 'article'})
- body = soup.find('div', {'class' : 'paywall'})
- H1=soup.h1.text.strip()
- H2=soup.h2.text.strip()
- out_file = open(Fn+H1+EX1, "w")
- out_file.write("\n"+H1)
- out_file.write("\n"+H2)
- print(""+H1)
- for paras in body.find_all('p'):
- out_file.write("\n"+paras.text.strip())
- out_file.close()
- with open(Fn+H1+EX1, 'r') as inp:
- Pfile =open(H1+EX1, "w")
- for line in inp:
- if re.search('\S', line):
- Pfile.write(line)
- Pfile.close()
- with open (H1+EX1, encoding="utf-8") as file:
- file=file.read()
- speak = gTTS(file,lang='en')
- speak.save (H1+EX2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement