Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- from docx import Document
- # Map highlight colors to actors
- COLOR_TO_ACTOR = {
- "YELLOW": "NARRATOR",
- "GREEN": "ACTOR 1",
- "RED": "Cade"
- }
- def extract_highlighted_text_with_order(file_path):
- """
- Extract highlighted text, map it to actors, and add order and color information.
- :param file_path: Path to the Word document.
- :return: List of dictionaries with 'Order', 'Actor', 'Color', and 'Line'.
- """
- document = Document(file_path)
- extracted_data = []
- order = 1
- for paragraph in document.paragraphs:
- for run in paragraph.runs:
- if run.font.highlight_color:
- highlight_color = run.font.highlight_color.name # Use .name to get the color name
- actor = COLOR_TO_ACTOR.get(highlight_color, "Unknown")
- extracted_data.append({
- "Order": order,
- "Actor": actor,
- "Color": highlight_color,
- "Line": run.text.strip()
- })
- order += 1
- return extracted_data
- def save_to_csv(data, output_file):
- """
- Save extracted data to a CSV file.
- :param data: List of dictionaries with extracted data.
- :param output_file: Path to the output CSV file.
- """
- with open(output_file, mode="w", newline="", encoding="utf-8") as csv_file:
- writer = csv.DictWriter(csv_file, fieldnames=["Order", "Actor", "Color", "Line"])
- writer.writeheader()
- writer.writerows(data)
- if __name__ == "__main__":
- # Path to your Word document
- input_file = "Python\Personal\Highlighted_document_extractor\Document.docx" # Replace with your document path
- output_file = "Python\Personal\Highlighted_document_extractor\output.csv" # Replace with desired output CSV path
- # Extract highlighted text
- highlighted_text = extract_highlighted_text_with_order(input_file)
- # Save to CSV
- save_to_csv(highlighted_text, output_file)
- print(f"Extracted data has been saved to {output_file}.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement