Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- # Filename: pdf2csv.py
- # Version: 1.0.0
- # Author: Jeoi Reqi
- """
- Description:
- This script converts a PDF file (.pdf) to a CSV file (.csv).
- It extracts text from each page of the PDF and writes it to a CSV file.
- Requirements:
- - Python 3.x
- - PyMuPDF library (install using: pip install PyMuPDF)
- Usage:
- 1. Save this script as 'pdf2csv.py'.
- 2. Ensure your PDF file ('example.pdf') is in the same directory as the script.
- 3. Install the PyMuPDF library using the command: 'pip install PyMuPDF'
- 4. Run the script.
- Note: Adjust the 'pdf_filename' and 'csv_filename' variables in the script as needed.
- """
- import fitz # PyMuPDF
- import csv
- def pdf_to_csv(pdf_filename, csv_filename):
- pdf_document = fitz.open(pdf_filename)
- with open(csv_filename, 'w', newline='', encoding='utf-8') as csv_file:
- csv_writer = csv.writer(csv_file)
- for page_num in range(pdf_document.page_count):
- page = pdf_document[page_num]
- text_lines = page.get_text().split('\n')
- for line in text_lines:
- csv_writer.writerow([line])
- if __name__ == "__main__":
- # Set the filenames for the PDF and CSV files
- pdf_filename = 'example.pdf'
- csv_filename = 'pdf2csv.csv'
- # Convert the PDF to a CSV file
- pdf_to_csv(pdf_filename, csv_filename)
- print(f"Converted '{pdf_filename}' to '{csv_filename}'.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement