Advertisement
karim0209

THAurls

Oct 31st, 2020
1,317
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.01 KB | None | 0 0
  1. #!/data/data/com.termux/files/usr/bin/python3
  2. # -*- coding: utf-8 -*-
  3. import requests
  4. from bs4 import BeautifulSoup
  5. from gtts import gTTS
  6. import re
  7.  
  8. with open(input("Enter your The Hindu Articles urls txt file name:")) as urls:
  9.     for URL in urls:
  10.         EX1 = ".txt"
  11.         EX2 = ".mp3"
  12.         Fn = "New"
  13.         page = requests.get(URL)
  14.         soup = BeautifulSoup(page.content, 'html.parser')
  15.         #body = soup.find('div', {'class' : 'article'})
  16.         body = soup.find('div', {'class' : 'paywall'})
  17.         H1=soup.h1.text.strip()
  18.         H2=soup.h2.text.strip()
  19.         out_file = open(Fn+H1+EX1, "w")
  20.         out_file.write("\n"+H1)
  21.         out_file.write("\n"+H2)
  22.         print(""+H1)
  23.         for paras in body.find_all('p'):
  24.             out_file.write("\n"+paras.text.strip())
  25.         out_file.close()
  26.         with open(Fn+H1+EX1, 'r') as inp:
  27.             Pfile =open(H1+EX1, "w")
  28.             for line in inp:
  29.                 if re.search('\S', line):
  30.                     Pfile.write(line)
  31.             Pfile.close()
  32.         with open (H1+EX1, encoding="utf-8") as file:
  33.             file=file.read()
  34.         speak = gTTS(file,lang='en')
  35.         speak.save (H1+EX2)
  36.        
  37.        
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement