Advertisement
Kalidor_Vorlich

highlight extractor

Jan 6th, 2025
101
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.87 KB | None | 0 0
  1. import csv
  2. from tkinter import Tk, Label, Button, Entry, filedialog, StringVar, OptionMenu, Frame, Text, Scrollbar, END
  3. from docx import Document
  4.  
  5. #Choose the color map for tkinter
  6. TKINTER_COLOR_MAP = {
  7.     "YELLOW": ("yellow",),
  8.     "GREEN": ("light green",),
  9.     "PINK": ("pink",),
  10.     "BLUE": ("light blue",),
  11.     "RED": ("red",),
  12.     "TURQUOISE": ("turquoise",),
  13.     "GRAY": ("gray",)
  14. }
  15.  
  16. class HighlightExtractorGUI:
  17.     def __init__(self, master):
  18.         """
  19.        Initialize the GUI application and create the necessary components.
  20.        """
  21.         self.master = master
  22.         self.master.title("Highlight Extractor")  # Set the window title
  23.         self.file_path = ""  # To store the selected file path
  24.         self.color_actor_map = {}  # Mapping of colors to actor names
  25.         self.fields = []  # List to store the actor-color fields
  26.         self.row_count = 0  # Keeps track of rows added to the actor-color section
  27.  
  28.         # Make the window resizable
  29.         self.master.resizable(True, True)
  30.         self.master.grid_rowconfigure(1, weight=1)  # Make row 1 expandable
  31.         self.master.grid_columnconfigure(3, weight=1)  # Make column 3 expandable
  32.  
  33.         #file selection ui
  34.         Label(master, text="Word File:").grid(row=0, column=0, padx=10, pady=10, sticky="e")
  35.         self.file_label = Label(master, text="No file selected", width=40, anchor="w")
  36.         self.file_label.grid(row=0, column=1, padx=10, pady=10, sticky="w")
  37.         Button(master, text="Browse", command=self.select_file).grid(row=0, column=2, padx=10, pady=10)
  38.  
  39.         #fields for actor and color
  40.         self.fields_frame = Frame(master)
  41.         self.fields_frame.grid(row=1, column=0, columnspan=3, pady=10, sticky="nsew")
  42.         self.add_actor_color_field()  # Add the initial actor-color field
  43.  
  44.         #buttons for adding fields, extracting and previewing data, and saving the CSV
  45.         Button(master, text="Add Actor-Color Field", command=self.add_actor_color_field).grid(row=2, column=0, columnspan=3, pady=10)
  46.         Button(master, text="Extract & Preview CSV", command=self.extract_and_preview).grid(row=3, column=0, columnspan=3, pady=10)
  47.         Button(master, text="Save CSV", command=self.save_csv).grid(row=4, column=0, columnspan=3, pady=10)
  48.  
  49.         #preview section for the CSV
  50.         Label(master, text="CSV Preview:").grid(row=0, column=3, padx=10, pady=10, sticky="w")
  51.         preview_frame = Frame(master)
  52.         preview_frame.grid(row=1, column=3, rowspan=4, padx=10, pady=10, sticky="nsew")
  53.  
  54.         # Configure the preview area to be scrollable and resizable
  55.         preview_frame.grid_rowconfigure(0, weight=1)
  56.         preview_frame.grid_columnconfigure(0, weight=1)
  57.         self.preview_text = Text(preview_frame, wrap="word")
  58.         self.preview_text.grid(row=0, column=0, sticky="nsew")
  59.  
  60.         # Scrollbar for the preview text area
  61.         preview_scrollbar = Scrollbar(preview_frame, command=self.preview_text.yview)
  62.         preview_scrollbar.grid(row=0, column=1, sticky="ns")
  63.         self.preview_text.config(yscrollcommand=preview_scrollbar.set)
  64.  
  65.  
  66.     def select_file(self):
  67.         """
  68.        Open a file dialog to select a Word document and update the file path.
  69.        """
  70.         self.file_path = filedialog.askopenfilename(filetypes=[("Word Documents", "*.docx")])
  71.         if self.file_path:
  72.             # Display only the filename in the UI
  73.             self.file_label.config(text=self.file_path.split("/")[-1])
  74.  
  75.  
  76.     def add_actor_color_field(self):
  77.         """
  78.        Add a new row for mapping an actor to a highlight color.
  79.        """
  80.         color_var = StringVar(value=list(TKINTER_COLOR_MAP.keys())[0])  # Default color
  81.         actor_var = StringVar()
  82.         Label(self.fields_frame, text="Actor:").grid(row=self.row_count, column=0, padx=5, pady=2, sticky="e")
  83.         Entry(self.fields_frame, textvariable=actor_var, width=20).grid(row=self.row_count, column=1, padx=5, pady=2, sticky="w")
  84.         Label(self.fields_frame, text="Color:").grid(row=self.row_count, column=2, padx=5, pady=2, sticky="e")
  85.         OptionMenu(self.fields_frame, color_var, *TKINTER_COLOR_MAP.keys()).grid(row=self.row_count, column=3, padx=5, pady=2, sticky="w")
  86.         self.fields.append((actor_var, color_var))  # Store the actor-color pair
  87.         self.row_count += 1
  88.  
  89.     def extract_highlighted_text(self):
  90.         if not self.file_path:  # No file selected
  91.             return []
  92.        
  93.         # Create a mapping of color to actor name
  94.         self.color_actor_map = {color_var.get(): actor_var.get() or "Unknown" for actor_var, color_var in self.fields}
  95.        
  96.         document = Document(self.file_path) # Load the Word document
  97.         extracted_data = [] # Store the extracted data
  98.         order = 1 # Order of the highlighted text
  99.  
  100.         for paragraph in document.paragraphs:
  101.             for run in paragraph.runs:
  102.                 if run.font.highlight_color: # Check if text is highlighted
  103.                     highlight_color = run.font.highlight_color.name # Get the colour name
  104.                     actor = self.color_actor_map.get(highlight_color, "Unknown") # Get the actor name and match to colour
  105.                     extracted_data.append({"Order": order,
  106.                                            "Actor": actor,
  107.                                            "Color": highlight_color,
  108.                                            "Line": run.text.strip()})  # Store the highlighted text
  109.                     order += 1
  110.         return extracted_data
  111.  
  112.     def extract_and_preview(self):
  113.         extracted_data = self.extract_highlighted_text()
  114.         if not extracted_data:
  115.             self.preview_text.delete(1.0, END)
  116.             self.preview_text.insert(END, "No highlighted text found or no file selected.")
  117.             return
  118.  
  119.         # Display the extracted data in the preview text area
  120.         self.preview_text.delete(1.0, END)
  121.         for row in extracted_data:
  122.             text_color = TKINTER_COLOR_MAP.get(row["Color"], ("black",))[0]
  123.  
  124.             # Insert the order and actor name with styling
  125.             self.preview_text.insert(END, f"Order: {row['Order']}, ", "default")
  126.             self.preview_text.insert(END, f"{row['Actor']}: ", f"actor_color_{row['Color']}")
  127.  
  128.             # Insert the actor's line with default styling
  129.             self.preview_text.insert(END, f"{row['Line']}\n\n", "default")
  130.  
  131.             # Configure text tags for foreground and black background
  132.             self.preview_text.tag_configure(
  133.                 f"actor_color_{row['Color']}",
  134.                 foreground=text_color,
  135.                 background="black"
  136.             )
  137.  
  138.         self.extracted_data = extracted_data  # Save the extracted data for saving to CSV
  139.  
  140.     def save_csv(self):
  141.         if not hasattr(self, "extracted_data") or not self.extracted_data:
  142.             # If no data is available, display an error message
  143.             self.preview_text.delete(1.0, END)
  144.             self.preview_text.insert(END, "No data to save. Please extract data first.")
  145.             return
  146.  
  147.         output_file = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV Files", "*.csv")])
  148.         if not output_file:
  149.             return  # No file selected
  150.  
  151.         # Write the extracted data to a CSV file
  152.         with open(output_file, mode="w", newline="", encoding="utf-8") as csv_file:
  153.             writer = csv.DictWriter(csv_file, fieldnames=["Order", "Actor", "Line"])
  154.             writer.writeheader()
  155.             for row in self.extracted_data:
  156.                 writer.writerow({"Order": row["Order"], "Actor": row["Actor"], "Line": row["Line"]})
  157.  
  158.         self.preview_text.delete(1.0, END)
  159.         self.preview_text.insert(END, f"CSV saved successfully at {output_file}.")
  160.  
  161. if __name__ == "__main__":
  162.     root = Tk()  # Create the main window
  163.     app = HighlightExtractorGUI(root)
  164.     root.mainloop()  # Run the application
  165.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement