html2txt

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Filename: html2txt.py
# Version: 1.0.0
# Author: Jeoi Reqi

"""
Description:
This script extracts text content from an HTML file (.html) and saves it to a text file (.txt).
It uses BeautifulSoup to parse the HTML content and extracts the text using the 'get_text' method.

Requirements:
- Python 3.x
- BeautifulSoup library (install using: pip install beautifulsoup4)

Usage:
1. Save this script as 'html2txt.py'.
2. Ensure your HTML file ('example.html') is in the same directory as the script.
3. Install the BeautifulSoup library using the command: 'pip install beautifulsoup4'
4. Run the script.
5. The extracted text file ('html2txt.txt') will be generated in the same directory.

Note: Adjust the 'html_filename' and 'txt_filename' variables in the script as needed.
"""
from bs4 import BeautifulSoup

def html_to_txt(html_filename, txt_filename):
    with open(html_filename, 'r') as htmlfile, open(txt_filename, 'w') as txtfile:
        soup = BeautifulSoup(htmlfile, 'html.parser')
        txtfile.write(soup.get_text('\n'))

if __name__ == "__main__":
    html_filename = 'example.html'
    txt_filename = 'html2txt.txt'
    html_to_txt(html_filename, txt_filename)
    print(f"Converted '{html_filename}' to '{txt_filename}'.")