Advertisement
Kalidor_Vorlich

GUI'less Highlight Extraction

Jan 6th, 2025
96
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.05 KB | None | 0 0
  1. import csv
  2. from docx import Document
  3.  
  4. # Map highlight colors to actors
  5. COLOR_TO_ACTOR = {
  6.     "YELLOW": "NARRATOR",
  7.     "GREEN": "ACTOR 1",
  8.     "RED": "Cade"
  9. }
  10.  
  11. def extract_highlighted_text_with_order(file_path):
  12.     """
  13.    Extract highlighted text, map it to actors, and add order and color information.
  14.    :param file_path: Path to the Word document.
  15.    :return: List of dictionaries with 'Order', 'Actor', 'Color', and 'Line'.
  16.    """
  17.     document = Document(file_path)
  18.     extracted_data = []
  19.     order = 1
  20.  
  21.     for paragraph in document.paragraphs:
  22.         for run in paragraph.runs:
  23.             if run.font.highlight_color:
  24.                 highlight_color = run.font.highlight_color.name  # Use .name to get the color name
  25.                 actor = COLOR_TO_ACTOR.get(highlight_color, "Unknown")
  26.                 extracted_data.append({
  27.                     "Order": order,
  28.                     "Actor": actor,
  29.                     "Color": highlight_color,
  30.                     "Line": run.text.strip()
  31.                 })
  32.                 order += 1
  33.  
  34.     return extracted_data
  35.  
  36. def save_to_csv(data, output_file):
  37.     """
  38.    Save extracted data to a CSV file.
  39.    :param data: List of dictionaries with extracted data.
  40.    :param output_file: Path to the output CSV file.
  41.    """
  42.     with open(output_file, mode="w", newline="", encoding="utf-8") as csv_file:
  43.         writer = csv.DictWriter(csv_file, fieldnames=["Order", "Actor", "Color", "Line"])
  44.         writer.writeheader()
  45.         writer.writerows(data)
  46.  
  47. if __name__ == "__main__":
  48.     # Path to your Word document
  49.     input_file = "Python\Personal\Highlighted_document_extractor\Document.docx"  # Replace with your document path
  50.     output_file = "Python\Personal\Highlighted_document_extractor\output.csv"  # Replace with desired output CSV path
  51.  
  52.     # Extract highlighted text
  53.     highlighted_text = extract_highlighted_text_with_order(input_file)
  54.  
  55.     # Save to CSV
  56.     save_to_csv(highlighted_text, output_file)
  57.  
  58.     print(f"Extracted data has been saved to {output_file}.")
  59.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement