pdf2csv

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Filename: pdf2csv.py
# Version: 1.0.0
# Author: Jeoi Reqi

"""
Description:
This script converts a PDF file (.pdf) to a CSV file (.csv).
It extracts text from each page of the PDF and writes it to a CSV file.

Requirements:
- Python 3.x
- PyMuPDF library (install using: pip install PyMuPDF)

Usage:
1. Save this script as 'pdf2csv.py'.
2. Ensure your PDF file ('example.pdf') is in the same directory as the script.
3. Install the PyMuPDF library using the command: 'pip install PyMuPDF'
4. Run the script.

Note: Adjust the 'pdf_filename' and 'csv_filename' variables in the script as needed.
"""

import fitz  # PyMuPDF
import csv

def pdf_to_csv(pdf_filename, csv_filename):
    pdf_document = fitz.open(pdf_filename)

    with open(csv_filename, 'w', newline='', encoding='utf-8') as csv_file:
        csv_writer = csv.writer(csv_file)

        for page_num in range(pdf_document.page_count):
            page = pdf_document[page_num]
            text_lines = page.get_text().split('\n')

            for line in text_lines:
                csv_writer.writerow([line])

if __name__ == "__main__":
    # Set the filenames for the PDF and CSV files
    pdf_filename = 'example.pdf'
    csv_filename = 'pdf2csv.csv'

    # Convert the PDF to a CSV file
    pdf_to_csv(pdf_filename, csv_filename)

    print(f"Converted '{pdf_filename}' to '{csv_filename}'.")