Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pyttsx3
- import PyPDF2
- from pydub import AudioSegment
- import re
- import os
- from contextlib import closing
- def clean_text(text):
- text = re.sub(r'[^\x00-\x7F]+', ' ', text)
- text = re.sub(r'\s+', ' ', text)
- return text.strip()
- def dynamic_chunk_text(text, delimiter='. ', max_chunk_length=1000):
- sentences = re.split(f'(?<={delimiter})', text)
- chunks = []
- current_chunk = ''
- for sentence in sentences:
- if len(current_chunk) + len(sentence) <= max_chunk_length:
- current_chunk += sentence
- else:
- if current_chunk:
- chunks.append(current_chunk)
- current_chunk = sentence
- if current_chunk:
- chunks.append(current_chunk)
- return chunks
- def export_audio(combined_audio, filename, format='wav'):
- try:
- combined_audio.export(filename, format=format, bitrate="192k")
- print(f"Exported audio file: {filename}")
- except Exception as e:
- print(f"Failed to export audio file: {e}")
- def pdf_to_audio_converter(pdf_path, audio_format='mp3'):
- speaker = pyttsx3.init()
- combined_audio = AudioSegment.empty()
- try:
- with open(pdf_path, 'rb') as pdf_file:
- pdfreader = PyPDF2.PdfReader(pdf_file)
- full_text = ''
- for page_num in range(len(pdfreader.pages)):
- text = pdfreader.pages[page_num].extract_text()
- full_text += clean_text(text) + ' '
- chunks = dynamic_chunk_text(full_text)
- for i, chunk in enumerate(chunks):
- with closing(speaker) as engine:
- audio_filename = f'temp_part_{i+1}.{audio_format}'
- engine.save_to_file(chunk, audio_filename)
- engine.runAndWait()
- combined_audio += AudioSegment.from_file(audio_filename, format=audio_format)
- os.remove(audio_filename)
- final_audio_filename = f"final_story.{audio_format}"
- export_audio(combined_audio, final_audio_filename, audio_format)
- except (PyPDF2.errors.PdfReadError, IOError) as e:
- print(f"Error reading PDF file: {e}")
- except Exception as e:
- print(f"An error occurred: {e}")
- pdf_to_audio_converter('Brief answers.pdf', 'mp3')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement