Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import ebooklib
- from ebooklib import epub
- from bs4 import BeautifulSoup
- def chapter_to_str(chapter):
- soup = BeautifulSoup(chapter.get_body_content(), 'html.parser')
- text = [para.get_text() for para in soup.find_all('p')]
- return ' '.join(text)
- file_name: str = r'C:\Users\C191773\OneDrive - Thomson Reuters Incorporated\Documents\The Data Vault Guru_ a pragmati - Patrick Cuba.epub'
- book = epub.read_epub(file_name)
- documents = list(book.get_items_of_type(ebooklib.ITEM_DOCUMENT))
- texts = {}
- for document in documents:
- texts[document.get_name()] = chapter_to_str(document)
- pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement