Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #@title KJV Bible Formatter Setup and UI (v8.1 - Corrected Exception Syntax)
- import requests
- import xml.etree.ElementTree as ET
- import ipywidgets as widgets
- from IPython.display import display, clear_output, HTML
- import re
- import io
- import traceback
- # --- Configuration ---
- BIBLE_XML_URL = "https://raw.githubusercontent.com/seven1m/open-bibles/master/eng-kjv.osis.xml"
- OSIS_NAMESPACE = "http://www.bibletechnologies.net/2003/OSIS/namespace"
- NS_MAP = {'osis': OSIS_NAMESPACE}
- # --- Configuration Variables ---
- BOOK_ID_ATTR = 'osisID'
- CHAPTER_ID_ATTR = 'osisRef' # Identifies the chapter marker tag
- VERSE_ID_ATTR = 'osisID' # Identifies the verse marker tag AND contains chapter/verse info
- # --- Global Variables ---
- bible_data = {} # { Book: { Chap: { VerseNum: verse_element } } }
- book_list = []
- raw_xml_root = None
- _parent_map_cache = None
- # --- UI Elements --- (No changes)
- load_button = widgets.Button(description="Load & Process XML", button_style='info', tooltip='Download, parse, and process the Bible XML.')
- load_status = widgets.Label(value="Status: Not Loaded")
- diagnostic_output = widgets.Output(layout={'border': '1px solid black', 'max_height': '100px', 'overflow_y': 'scroll'})
- book_dropdown = widgets.Dropdown(description="Book:", tooltip='Select the Bible book.')
- chapter_dropdown = widgets.Dropdown(description="Chapter:", tooltip='Select the chapter number.')
- verse_selection_text = widgets.Text(description="Verses:", value="all", placeholder="e.g., all, 1-5, 1,3,7-9", tooltip='Enter verses: "all", single number (5), range (1-5), or mixed (1,3,5-7).')
- title_checkbox = widgets.Checkbox(value=True, description="Include Title", tooltip='Prepend output with "Book Chapter".')
- italic_style_radio = widgets.RadioButtons(
- options=[('Markdown (*italic*)', 'markdown'), ('HTML (<i>italic</i>)', 'html'), ('Plain (no italics)', 'plain')],
- description='Italics:', value='markdown', tooltip='Choose how italicized text (KJV added words) is formatted.'
- )
- verse_separator_radio = widgets.RadioButtons(
- options=[('Blank Line', '\n\n'), ('Single Newline', '\n')], description='Separator:', value='\n\n', tooltip='Choose the separator between verses.'
- )
- generate_button = widgets.Button(description="Generate", button_style='success', disabled=True, tooltip='Generate the formatted text based on selections.')
- output_textarea = widgets.Textarea(
- value='', placeholder='Formatted text will appear here...', description='Output:',
- layout={'height': '300px', 'width': '95%'}, disabled=False
- )
- # --- Helper Functions --- (No changes from v8)
- def build_parent_map(root):
- global _parent_map_cache
- if _parent_map_cache is None:
- _parent_map_cache = {c: p for p in root.iter() for c in p}
- return _parent_map_cache
- def find_parent(root, element):
- parent_map = build_parent_map(root)
- return parent_map.get(element)
- def parse_osis_id(osis_id_str):
- """Parses 'Book.Chap.Verse' into (book, chap_num, verse_num). Returns None on failure."""
- if not osis_id_str: return None
- parts = osis_id_str.split('.')
- if len(parts) >= 3: # Allow for potential subparts like Rev.12.7a
- book = parts[0]
- chap_str = parts[1]
- verse_str = parts[2]
- try:
- verse_match = re.match(r'^(\d+)', verse_str)
- if verse_match:
- return book, int(chap_str), int(verse_match.group(1))
- except (ValueError, TypeError):
- return None
- return None
- def fetch_and_populate_data_v8(root):
- """Parses XML by iterating book children, identifying chapters and verses."""
- global bible_data, book_list
- bible_data = {}
- book_list = []
- if root is None: raise ValueError("XML Root is None.")
- book_tag = f'{{{OSIS_NAMESPACE}}}div'
- chapter_tag = f'{{{OSIS_NAMESPACE}}}chapter'
- verse_tag = f'{{{OSIS_NAMESPACE}}}verse'
- found_books = 0; found_chapters = 0; processed_verses = 0
- with diagnostic_output:
- clear_output(wait=True)
- print("Processing (v8): Finding books...")
- osis_text_element = root.find(f".//{{{OSIS_NAMESPACE}}}osisText", namespaces=NS_MAP)
- if osis_text_element is None:
- with diagnostic_output: print("ERROR: Cannot find <osisText> element.")
- raise ValueError("<osisText> not found.")
- # Combine direct book divs and those under bookGroup
- book_elements_query = f"./{{{OSIS_NAMESPACE}}}div[@type='book']"
- book_group_query = f"./{{{OSIS_NAMESPACE}}}div[@type='bookGroup']/{{{OSIS_NAMESPACE}}}div[@type='book']"
- all_potential_books = osis_text_element.findall(book_elements_query, namespaces=NS_MAP) + \
- osis_text_element.findall(book_group_query, namespaces=NS_MAP)
- valid_book_elements = [b for b in all_potential_books if b.get(BOOK_ID_ATTR)]
- if not valid_book_elements:
- with diagnostic_output: print(f"ERROR: No book elements with '{BOOK_ID_ATTR}' found.")
- raise ValueError("No valid book elements found.")
- for book_element in valid_book_elements:
- book_name = book_element.get(BOOK_ID_ATTR)
- found_books += 1
- book_list.append(book_name)
- bible_data[book_name] = {}
- current_chapter_num = None
- for child in book_element:
- if child.tag == chapter_tag and child.get(CHAPTER_ID_ATTR):
- chapter_ref = child.get(CHAPTER_ID_ATTR)
- try:
- parsed_ref = parse_osis_id(chapter_ref + ".0")
- if parsed_ref:
- _, chap_num, _ = parsed_ref
- n_attr = child.get('n')
- if n_attr and int(n_attr) == chap_num:
- current_chapter_num = chap_num
- if current_chapter_num not in bible_data[book_name]:
- bible_data[book_name][current_chapter_num] = {}
- found_chapters += 1
- except Exception as e_chap:
- print(f"Warning: Error processing chapter marker {chapter_ref}: {e_chap}")
- continue
- container_tags = {f'{{{OSIS_NAMESPACE}}}p', f'{{{OSIS_NAMESPACE}}}lg',
- f'{{{OSIS_NAMESPACE}}}list', f'{{{OSIS_NAMESPACE}}}table'}
- if child.tag in container_tags:
- for element in child.iter():
- if element.tag == verse_tag and element.get(VERSE_ID_ATTR):
- verse_osisID = element.get(VERSE_ID_ATTR)
- parsed_verse_ref = parse_osis_id(verse_osisID)
- if parsed_verse_ref:
- v_book, v_chap, v_num = parsed_verse_ref
- if v_book == book_name and v_chap == current_chapter_num:
- if current_chapter_num is not None and v_num not in bible_data[book_name][current_chapter_num]:
- bible_data[book_name][current_chapter_num][v_num] = element
- processed_verses += 1
- with diagnostic_output:
- print("Population Summary (v8):")
- print(f"- Found and processed {found_books} books.")
- print(f"- Found and processed {found_chapters} chapters.")
- print(f"- Found and stored {processed_verses} verse elements (using osisID).")
- if processed_verses == 0 and found_chapters > 0:
- print(f"ERROR: Failed to store any verse elements. Check parsing logic within book's children loop.")
- elif found_chapters == 0 and found_books > 0:
- print(f"ERROR: No chapters identified correctly.")
- def extract_and_format_verse_text_v8(verse_start_element, italic_style):
- """Extracts text starting from verse_start_element until the next verse osisID marker, iterating within parent container."""
- verse_tag = f'{{{OSIS_NAMESPACE}}}verse'
- transChange_tag = f'{{{OSIS_NAMESPACE}}}transChange'
- container_tags = {f'{{{OSIS_NAMESPACE}}}p', f'{{{OSIS_NAMESPACE}}}lg'}
- ignore_tags = {f'{{{OSIS_NAMESPACE}}}note', f'{{{OSIS_NAMESPACE}}}rdg', f'{{{OSIS_NAMESPACE}}}title'}
- text_parts = []
- in_target_verse = False
- start_verse_osisID = verse_start_element.get(VERSE_ID_ATTR)
- current = verse_start_element
- parent_container = None
- while current is not None:
- parent = find_parent(raw_xml_root, current)
- if parent is None: break
- if parent.tag in container_tags:
- parent_container = parent; break
- if parent.tag == f'{{{OSIS_NAMESPACE}}}chapter' or \
- (parent.tag == f'{{{OSIS_NAMESPACE}}}div' and parent.get('type') == 'book'):
- break
- current = parent
- if parent_container is None:
- print(f"Warning: Could not find parent container (e.g., <p>) for verse {start_verse_osisID}. Text incomplete.")
- return (verse_start_element.tail or '').strip()
- for node in parent_container.iter():
- if node == verse_start_element:
- in_target_verse = True
- if node.tail: text_parts.append(node.tail.strip())
- continue
- if in_target_verse and node.tag == verse_tag and node.get(VERSE_ID_ATTR) and node != verse_start_element:
- if node.get(VERSE_ID_ATTR) != start_verse_osisID:
- in_target_verse = False; break
- if in_target_verse:
- if node.tag in ignore_tags:
- if node.tail: text_parts.append(node.tail.strip())
- continue
- if node.tag == transChange_tag:
- start_marker, end_marker = "", ""
- if italic_style == 'markdown': start_marker, end_marker = "*", "*"
- elif italic_style == 'html': start_marker, end_marker = "<i>", "</i>"
- if node.text: text_parts.append(start_marker + node.text.strip() + end_marker)
- if node.tail: text_parts.append(node.tail.strip())
- elif node.tag != verse_tag:
- if node.text:
- is_sub_container = node.tag in {f'{{{OSIS_NAMESPACE}}}div', f'{{{OSIS_NAMESPACE}}}list'}
- if not is_sub_container: text_parts.append(node.text.strip())
- if node.tail:
- text_parts.append(node.tail.strip())
- full_text = " ".join(filter(None, text_parts))
- return ' '.join(full_text.split())
- def update_chapter_dropdown(change):
- selected_book = change['new']
- chapter_dropdown.options = []
- if selected_book in bible_data:
- if isinstance(bible_data[selected_book], dict):
- chapters = sorted([ch for ch in bible_data[selected_book].keys() if isinstance(ch, int)])
- if chapters:
- chapter_dropdown.options = chapters
- chapter_dropdown.value = chapters[0]
- else:
- if load_status.value.startswith("Status: Ready"):
- output_textarea.value = f"No chapters found for {selected_book}."
- print(f"Warning: No chapters populated for {selected_book}.")
- else: print(f"Error: Data format issue for book {selected_book}.")
- def parse_verse_selection(selection_str, available_verses_set):
- selected_verses = set()
- if not available_verses_set: return []
- max_verse = max(available_verses_set); min_verse = min(available_verses_set)
- if not selection_str: return []
- selection_str = selection_str.lower().strip()
- if selection_str == 'all': return sorted(list(available_verses_set))
- parts = selection_str.split(',')
- for part in parts:
- part = part.strip();
- if not part: continue
- try:
- if '-' in part:
- start_str, end_str = part.split('-'); start, end = int(start_str.strip()), int(end_str.strip())
- if start < min_verse or end > max_verse or start > end: raise ValueError(f"Range {part} invalid ({min_verse}-{max_verse})")
- selected_verses.update(v for v in available_verses_set if start <= v <= end)
- else:
- verse_num = int(part)
- if verse_num not in available_verses_set: raise ValueError(f"Verse {part} not available ({min_verse}-{max_verse})")
- selected_verses.add(verse_num)
- except ValueError as e: raise ValueError(f"Invalid format/verse in '{part}': {e}")
- return sorted(list(selected_verses))
- # --- Event Handlers ---
- def on_load_button_clicked(b):
- """Handler for the load button: Downloads, Parses, Populates using v8 logic."""
- global raw_xml_root, _parent_map_cache
- raw_xml_root = None; _parent_map_cache = None; bible_data.clear(); book_list.clear()
- book_dropdown.options = []; chapter_dropdown.options = []
- generate_button.disabled = True; output_textarea.value = ""; load_status.value = "Status: Not Loaded"
- diagnostic_output.clear_output()
- clear_output(wait=True); display_ui()
- try:
- load_status.value = "Status: Downloading..."
- response = requests.get(BIBLE_XML_URL); response.raise_for_status()
- response.encoding = response.apparent_encoding or 'utf-8'; xml_content = response.text
- load_status.value = "Status: Parsing XML..."
- raw_xml_root = ET.fromstring(xml_content)
- load_status.value = "Status: XML Parsed. Populating data (v8)..."
- fetch_and_populate_data_v8(raw_xml_root) # Use v8 population logic
- if not book_list:
- load_status.value = "Status: Failed: No books loaded."
- generate_button.disabled = True
- # ** SYNTAX FIX **
- with diagnostic_output:
- print("ERROR: No books found/processed.")
- elif not any(bible_data[b][c] for b in bible_data for c in bible_data.get(b, {}) if isinstance(bible_data[b].get(c), dict) and bible_data[b][c]): # More robust check for non-empty verse dicts
- load_status.value = "Status: Failed: No verses loaded. Check Warnings/XML."
- generate_button.disabled = True
- # ** SYNTAX FIX **
- with diagnostic_output:
- print("ERROR: Books/Chapters processed, but NO verse data stored.")
- else:
- load_status.value = f"Status: Ready. {len(book_list)} books processed."
- generate_button.disabled = False
- book_dropdown.options = book_list
- if book_list: update_chapter_dropdown({'new': book_list[0]})
- except requests.exceptions.RequestException as e:
- load_status.value = f"Status: Error downloading - {e}"
- # ** SYNTAX FIX **
- with diagnostic_output:
- print(f"Download Error: {e}")
- except ET.ParseError as e:
- load_status.value = f"Status: Error parsing XML - {e}"
- # ** SYNTAX FIX **
- with diagnostic_output:
- print(f"XML Parse Error: {e}")
- except Exception as e:
- load_status.value = f"Status: Error during processing - {e}"
- generate_button.disabled = True
- # ** SYNTAX FIX **
- with diagnostic_output:
- print(f"Unexpected Error:")
- traceback.print_exc()
- def on_generate_button_clicked(b):
- """Handler for the generate button - uses v8 data and formatting."""
- output_textarea.value = "Generating..."
- if raw_xml_root is None:
- output_textarea.value = "Error: XML data not loaded. Click 'Load & Process XML'."; return
- # Ensure parent map is built before extracting text
- # It's built on demand by find_parent if needed, but building upfront might be slightly cleaner
- build_parent_map(raw_xml_root)
- try:
- book = book_dropdown.value;
- if not chapter_dropdown.value: raise ValueError("Please select a chapter.")
- chapter = int(chapter_dropdown.value)
- verse_sel_str = verse_selection_text.value; show_title = title_checkbox.value
- italic_style = italic_style_radio.value; separator = verse_separator_radio.value
- if not book: raise ValueError("Please select a book.")
- if book not in bible_data: raise ValueError(f"Book '{book}' not found.")
- if chapter not in bible_data.get(book, {}): raise ValueError(f"Chapter {chapter} not found for book '{book}'.") # Safer check
- chapter_verse_elements = bible_data[book][chapter]
- if not isinstance(chapter_verse_elements, dict):
- raise TypeError(f"Data error: Expected dict for {book} {chapter}, got {type(chapter_verse_elements)}")
- available_verses_set = set(chapter_verse_elements.keys())
- if not available_verses_set:
- output_textarea.value = f"{book} {chapter}: No verses loaded for this chapter."; return
- selected_verse_numbers = parse_verse_selection(verse_sel_str, available_verses_set)
- if not selected_verse_numbers and verse_sel_str.lower() != 'all' and verse_sel_str.strip() != '':
- min_v, max_v = min(available_verses_set), max(available_verses_set)
- raise ValueError(f"No valid verses selected/found: '{verse_sel_str}'. Available: {min_v}-{max_v}")
- verse_texts = []
- for verse_num in selected_verse_numbers:
- if verse_num in chapter_verse_elements:
- verse_start_element = chapter_verse_elements[verse_num]
- formatted_text = extract_and_format_verse_text_v8(verse_start_element, italic_style)
- verse_texts.append(formatted_text)
- final_output_lines = []
- if show_title: final_output_lines.append(f"{book} {chapter}")
- if verse_texts:
- verses_block = separator.join(verse_texts)
- if show_title: final_output_lines.append("")
- final_output_lines.append(verses_block)
- elif show_title: pass
- else: final_output_lines.append("(No verses selected or found)")
- output_textarea.value = "\n".join(final_output_lines)
- except ValueError as e: output_textarea.value = f"Input Error: {e}"
- except Exception as e:
- output_textarea.value = f"Generation Error: {e}"; print(f"Detailed generation error:"); traceback.print_exc()
- # --- Wire Up Events ---
- load_button.on_click(on_load_button_clicked)
- book_dropdown.observe(update_chapter_dropdown, names='value')
- generate_button.on_click(on_generate_button_clicked)
- # --- Display UI ---
- def display_ui():
- controls_col1 = widgets.VBox([book_dropdown, chapter_dropdown, verse_selection_text])
- controls_col2 = widgets.VBox([title_checkbox, italic_style_radio, verse_separator_radio])
- controls_row = widgets.HBox([controls_col1, controls_col2])
- display(widgets.VBox([widgets.HBox([load_button, load_status]), diagnostic_output, controls_row, generate_button, output_textarea]))
- # --- Initial Display ---
- display_ui()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement