Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import csv
- from tkinter import Tk, Label, Button, Entry, filedialog, StringVar, OptionMenu, Frame, Text, Scrollbar, END
- from docx import Document
- #Choose the color map for tkinter
- TKINTER_COLOR_MAP = {
- "YELLOW": ("yellow",),
- "GREEN": ("light green",),
- "PINK": ("pink",),
- "BLUE": ("light blue",),
- "RED": ("red",),
- "TURQUOISE": ("turquoise",),
- "GRAY": ("gray",)
- }
- class HighlightExtractorGUI:
- def __init__(self, master):
- """
- Initialize the GUI application and create the necessary components.
- """
- self.master = master
- self.master.title("Highlight Extractor") # Set the window title
- self.file_path = "" # To store the selected file path
- self.color_actor_map = {} # Mapping of colors to actor names
- self.fields = [] # List to store the actor-color fields
- self.row_count = 0 # Keeps track of rows added to the actor-color section
- # Make the window resizable
- self.master.resizable(True, True)
- self.master.grid_rowconfigure(1, weight=1) # Make row 1 expandable
- self.master.grid_columnconfigure(3, weight=1) # Make column 3 expandable
- #file selection ui
- Label(master, text="Word File:").grid(row=0, column=0, padx=10, pady=10, sticky="e")
- self.file_label = Label(master, text="No file selected", width=40, anchor="w")
- self.file_label.grid(row=0, column=1, padx=10, pady=10, sticky="w")
- Button(master, text="Browse", command=self.select_file).grid(row=0, column=2, padx=10, pady=10)
- #fields for actor and color
- self.fields_frame = Frame(master)
- self.fields_frame.grid(row=1, column=0, columnspan=3, pady=10, sticky="nsew")
- self.add_actor_color_field() # Add the initial actor-color field
- #buttons for adding fields, extracting and previewing data, and saving the CSV
- Button(master, text="Add Actor-Color Field", command=self.add_actor_color_field).grid(row=2, column=0, columnspan=3, pady=10)
- Button(master, text="Extract & Preview CSV", command=self.extract_and_preview).grid(row=3, column=0, columnspan=3, pady=10)
- Button(master, text="Save CSV", command=self.save_csv).grid(row=4, column=0, columnspan=3, pady=10)
- #preview section for the CSV
- Label(master, text="CSV Preview:").grid(row=0, column=3, padx=10, pady=10, sticky="w")
- preview_frame = Frame(master)
- preview_frame.grid(row=1, column=3, rowspan=4, padx=10, pady=10, sticky="nsew")
- # Configure the preview area to be scrollable and resizable
- preview_frame.grid_rowconfigure(0, weight=1)
- preview_frame.grid_columnconfigure(0, weight=1)
- self.preview_text = Text(preview_frame, wrap="word")
- self.preview_text.grid(row=0, column=0, sticky="nsew")
- # Scrollbar for the preview text area
- preview_scrollbar = Scrollbar(preview_frame, command=self.preview_text.yview)
- preview_scrollbar.grid(row=0, column=1, sticky="ns")
- self.preview_text.config(yscrollcommand=preview_scrollbar.set)
- def select_file(self):
- """
- Open a file dialog to select a Word document and update the file path.
- """
- self.file_path = filedialog.askopenfilename(filetypes=[("Word Documents", "*.docx")])
- if self.file_path:
- # Display only the filename in the UI
- self.file_label.config(text=self.file_path.split("/")[-1])
- def add_actor_color_field(self):
- """
- Add a new row for mapping an actor to a highlight color.
- """
- color_var = StringVar(value=list(TKINTER_COLOR_MAP.keys())[0]) # Default color
- actor_var = StringVar()
- Label(self.fields_frame, text="Actor:").grid(row=self.row_count, column=0, padx=5, pady=2, sticky="e")
- Entry(self.fields_frame, textvariable=actor_var, width=20).grid(row=self.row_count, column=1, padx=5, pady=2, sticky="w")
- Label(self.fields_frame, text="Color:").grid(row=self.row_count, column=2, padx=5, pady=2, sticky="e")
- OptionMenu(self.fields_frame, color_var, *TKINTER_COLOR_MAP.keys()).grid(row=self.row_count, column=3, padx=5, pady=2, sticky="w")
- self.fields.append((actor_var, color_var)) # Store the actor-color pair
- self.row_count += 1
- def extract_highlighted_text(self):
- if not self.file_path: # No file selected
- return []
- # Create a mapping of color to actor name
- self.color_actor_map = {color_var.get(): actor_var.get() or "Unknown" for actor_var, color_var in self.fields}
- document = Document(self.file_path) # Load the Word document
- extracted_data = [] # Store the extracted data
- order = 1 # Order of the highlighted text
- for paragraph in document.paragraphs:
- for run in paragraph.runs:
- if run.font.highlight_color: # Check if text is highlighted
- highlight_color = run.font.highlight_color.name # Get the colour name
- actor = self.color_actor_map.get(highlight_color, "Unknown") # Get the actor name and match to colour
- extracted_data.append({"Order": order,
- "Actor": actor,
- "Color": highlight_color,
- "Line": run.text.strip()}) # Store the highlighted text
- order += 1
- return extracted_data
- def extract_and_preview(self):
- extracted_data = self.extract_highlighted_text()
- if not extracted_data:
- self.preview_text.delete(1.0, END)
- self.preview_text.insert(END, "No highlighted text found or no file selected.")
- return
- # Display the extracted data in the preview text area
- self.preview_text.delete(1.0, END)
- for row in extracted_data:
- text_color = TKINTER_COLOR_MAP.get(row["Color"], ("black",))[0]
- # Insert the order and actor name with styling
- self.preview_text.insert(END, f"Order: {row['Order']}, ", "default")
- self.preview_text.insert(END, f"{row['Actor']}: ", f"actor_color_{row['Color']}")
- # Insert the actor's line with default styling
- self.preview_text.insert(END, f"{row['Line']}\n\n", "default")
- # Configure text tags for foreground and black background
- self.preview_text.tag_configure(
- f"actor_color_{row['Color']}",
- foreground=text_color,
- background="black"
- )
- self.extracted_data = extracted_data # Save the extracted data for saving to CSV
- def save_csv(self):
- if not hasattr(self, "extracted_data") or not self.extracted_data:
- # If no data is available, display an error message
- self.preview_text.delete(1.0, END)
- self.preview_text.insert(END, "No data to save. Please extract data first.")
- return
- output_file = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV Files", "*.csv")])
- if not output_file:
- return # No file selected
- # Write the extracted data to a CSV file
- with open(output_file, mode="w", newline="", encoding="utf-8") as csv_file:
- writer = csv.DictWriter(csv_file, fieldnames=["Order", "Actor", "Line"])
- writer.writeheader()
- for row in self.extracted_data:
- writer.writerow({"Order": row["Order"], "Actor": row["Actor"], "Line": row["Line"]})
- self.preview_text.delete(1.0, END)
- self.preview_text.insert(END, f"CSV saved successfully at {output_file}.")
- if __name__ == "__main__":
- root = Tk() # Create the main window
- app = HighlightExtractorGUI(root)
- root.mainloop() # Run the application
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement