Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # -*- coding: UTF-8 -*-
- # Filename: all_utf8_symbols.py
- # Author: Jeoi Reqi
- """
- Full UTF-8 Symbols & Site Generator Script
- This Python script creates an index page for a comprehensive UTF-8 symbol HTML reference, covering the entire Unicode range of 1,114,112 characters.
- The index organizes these characters into pages, with each page containing 10,000 characters broken down into stylized tables displaying 100 characters at a time.
- Requirements:
- - Python 3
- - Web Browser
- Usage:
- 1. Ensure you have Python 3 installed on your system.
- 2. Run this script to generate the UTF-8 symbol HTML reference.
- 3. When prompted, choose whether to save the archive of symbols in groups of 10,000 per text file.
- 4. After all files have saved, open `index.html` in a web browser.
- 5. Keep the saved HTML files in the same folder as the index file.
- Note:
- - If chosen, individual TXT files for each range will be saved in the 'Archive' folder.
- - The output HTML menu will be saved to `index.html`.
- - All HTML files for the pages will be saved in the current working directory.
- - Ensure an active internet connection for the script to fetch the necessary styles and fonts.
- """
- # IMPORTS
- import os
- # Function to create the individual pages
- def generate_html_page(start, end, filename):
- """
- Generate an HTML page with stylized tables for a specified Unicode range.
- Parameters:
- - start (int): The starting Unicode code point of the range.
- - end (int): The ending Unicode code point of the range.
- - filename (str): The name of the HTML file to be generated.
- Returns:
- None
- """
- # Open the HTML file for writing
- with open(filename, "w", encoding="utf-8") as html_file:
- # Write the HTML header with metadata and styles
- html_file.write(f"<!DOCTYPE html>\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>"
- f"<meta content=\"text/html; charset=utf-8\" http-equiv=\"Content-Type\"/>"
- f"<title>UTF-8 Symbols - Range {start}-{end}</title>"
- f"<link rel=\"stylesheet\" href=\"https://fonts.googleapis.com/css2?family=Amatic+SC:wght@700&display=swap\">"
- f"<style>"
- f"body {{ font-family: 'Arial', sans-serif; background: linear-gradient(90deg, #000000 0%, #111e3a 100%); color: #808080; }}"
- f"h1, h2 {{ font-family: 'Amatic SC', cursive; text-align: center; }}"
- f"h1 {{ font-size: 90px; color: #336699; }}"
- f"h2 {{ font-size: 30px; color: #808080; }}"
- f"table {{ border-collapse: collapse; width: 100%; }}"
- f"table, th, td {{ border: 1px solid #ddd; padding: 12px; }}"
- f"th {{ background-color: #333333; color: #336699; font-size: 24px; }}"
- f"details {{ margin-bottom: 10px; }}"
- f"summary {{ cursor: pointer; color: #808080; font-size: 30px; font-family: 'Amatic SC', cursive; font-weight: bold; }}"
- f"footer {{ text-align: center; margin-top: 40px; color: #808080; font-size: 20px; font-family: 'Amatic SC', cursive; font-weight: bold; }}"
- f"</style>"
- f"</head>\n<body>\n\t<h1>UTF-8 Symbols - Range {start}-{end}</h1>"
- f"\t<h2>SELECT YOUR RANGE BELOW:</h2>"
- f"\t<h2>⮛ ⮛ ⮛ ⮛ ⮛</h2>\n")
- # Loop through each symbol code in the specified range
- sym_code = start
- while sym_code <= end:
- # Check if the current symbol code is a multiple of 100 (for table organization)
- modZero = sym_code % 100
- if modZero == 0:
- range_id = f"range_{sym_code}_{sym_code + 99}"
- unicode_range = f"U+{sym_code:04X} - U+{sym_code + 99:04X}"
- hex_range = f"{sym_code:X} - {sym_code + 99:X}"
- # Write the details section with Unicode and Hex range in the summary
- html_file.write(f"\n<details>\n\t<summary style='font-family: \"Amatic SC\", cursive; font-weight: bold; font-size: 36px; color: #336699;'>{sym_code}-{sym_code + 99} (Uni: {unicode_range} / Hex: {hex_range})</summary>\n")
- # Write the table header
- html_file.write(f"\t<table>\n\t\t<tr>\n\t\t\t<th style='font-family: \"Amatic SC\", cursive; font-weight: bold; font-size: 30px; color: #336699;'>Unicode</th>\n"
- f"\t\t\t<th style='font-family: \"Amatic SC\", cursive; font-weight: bold; font-size: 30px; color: #336699;'>Hex</th>\n"
- f"\t\t\t<th style='font-family: \"Amatic SC\", cursive; font-weight: bold; font-size: 30px; color: #336699;'>Symbol</th>\n\t\t</tr>\n\n")
- # Write each symbol entry in the table
- unicode_str = f"U+{sym_code:04X}"
- html_file.write(f"\t\t<tr>\n\t\t\t<td style='color: yellow; font-size: 24px;'>{unicode_str}</td>\n"
- f"\t\t\t<td style='color: red; font-size: 24px;'>{sym_code:X}</td>\n"
- f"\t\t\t<td style='color: #808080; font-size: 24px;'>&#x{sym_code:X};</td>\n"
- f"\t\t</tr>\n\n")
- # Check if the current symbol code is the last in the 100 block
- modFF = sym_code % 100
- if modFF == 99:
- # Close the table and details section
- html_file.write("\t</table>\n</details>\n")
- # Move to the next symbol code
- sym_code += 1
- # Write the footer with script information
- html_file.write("\n\t<footer>\n"
- "\t\t<p>Generated by utf8_symbols.py</p>\n"
- "\t\t<p>Author: Jeoi Reqi</p>\n"
- "\t</footer>\n")
- # Close the HTML file
- html_file.write("\n\t</body></html>\n")
- # Function to create the index page
- def generate_index_page(ranges, ranges_per_row):
- """
- Generate an index HTML page with links to individual range pages.
- Parameters:
- - ranges (list): A list of tuples representing Unicode ranges.
- - ranges_per_row (int): The number of ranges to display per row in the index.
- Returns:
- None
- """
- # Open the index HTML file for writing
- with open("index.html", "w", encoding="utf-8") as index_file:
- # Write the HTML header with metadata and styles
- index_file.write(f"<!DOCTYPE html>\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>"
- f"<meta content=\"text/html; charset=utf-8\" http-equiv=\"Content-Type\"/>"
- f"<title>UTF-8 Symbols - Index</title>"
- f"<link rel=\"stylesheet\" href=\"https://fonts.googleapis.com/css2?family=Amatic+SC:wght@700&display=swap\">"
- f"<style>"
- f"body {{ font-family: 'Arial', sans-serif; background: linear-gradient(90deg, #000000 0%, #111e3a 100%); color: #808080; }}"
- f"h1, h2 {{ font-family: 'Amatic SC', cursive; text-align: center; }}"
- f"h1 {{ font-size: 90px; color: #336699; }}"
- f"h2 {{ font-size: 30px; color: #808080; }}"
- f"#grid-container {{ display: grid; grid-template-columns: repeat({ranges_per_row}, 1fr); grid-gap: 20px; }}"
- f".grid-cell {{ text-align: center; padding: 20px; border: 1px solid #ddd; }}"
- f"a {{ color: #336699; text-decoration: none; }}"
- f"footer {{ text-align: center; margin-top: 40px; color: #808080; font-size: 20px; font-family: 'Amatic SC', cursive; font-weight: bold; }}"
- f"</style>"
- f"</head>\n<body>\n\t<h1>UTF-8 Symbols - Index</h1>"
- f"\t<h2>SELECT YOUR RANGE BELOW:</h2>\n"
- f"\t<h2>⮛ ⮛ ⮛ ⮛ ⮛</h2>\n"
- f"\t<div id=\"grid-container\">\n")
- # Write links to individual range pages in a grid layout
- for start, end, filename in ranges:
- index_file.write(f"\t\t<div class=\"grid-cell\"><a href=\"{filename}\">{start}-{end}</a></div>\n")
- # Close the grid container
- index_file.write("\n\t</div>")
- # Add extra blank lines after the footer for padding
- index_file.write("\n\t<br />\n\t<br />\n\t<br />\n\t<br />\n")
- # Write the footer with script information
- index_file.write("\n\t<footer>\n"
- "\t\t<p>Generated by utf8_symbols.py</p>\n"
- "\t\t<p>Author: Jeoi Reqi</p>\n"
- "\t</footer>\n")
- # Close the HTML file
- index_file.write("\n\t</body></html>\n")
- # Function to write Unicode symbols to a file with header
- def write_unicode_to_file(file_name, start, end):
- with open(file_name, 'w', encoding='utf-8', errors='replace') as file:
- # Write header to the file
- file.write("\n------------------------\nUNICODE \t\t SYMBOL \n------------------------\n")
- # Write Unicode symbols and corresponding strings
- for code_point in range(start, end + 1):
- unicode_str = f'U+{code_point:04X}'
- try:
- symbol_str = chr(code_point)
- except ValueError:
- # Handle surrogate pairs explicitly
- high_surrogate = 0xD800 + ((code_point - 0x10000) >> 10)
- low_surrogate = 0xDC00 + ((code_point - 0x10000) & 0x3FF)
- symbol_str = f'{chr(high_surrogate)}{chr(low_surrogate)}'
- file.write(f'{unicode_str}\t\t\t\t{symbol_str}\t\n------------------------\n')
- # Function to create a folder called Archive & Save the data to file.
- def write_unicode_to_txt(start, end, ranges, chars_per_file):
- """
- Write Unicode values, hex values, and symbols to TXT files in groups of chars_per_file.
- Parameters:
- - start (int): The starting Unicode code point of the entire range.
- - end (int): The ending Unicode code point of the entire range.
- - ranges (list): A list of tuples representing Unicode ranges.
- - chars_per_file (int): Number of characters per TXT file.
- Returns:
- None
- """
- choice = input("\nDo you want to save the full archive?\n1: YES\n2: NO\nWhat is your choice: (1 Or 2)?\n")
- full_archive = choice == '1'
- if full_archive:
- print("Saving The Archive May Take Some Time To Complete...")
- # Create 'Archive' folder in the current working directory
- archive_folder = os.path.join(os.getcwd(), 'Archive')
- os.makedirs(archive_folder, exist_ok=True)
- for start_range, end_range, _ in ranges:
- archive_filename = os.path.join(archive_folder, f"archive_{start_range:06X}_{end_range:06X}.txt")
- # Call the function to write Unicode symbols to file
- write_unicode_to_file(archive_filename, start_range, end_range)
- # Archive File Creation Notifications
- print(f"Output Archive File {archive_filename} Completed!")
- # Define the total number of characters in the Unicode range
- total_chars = 1114112
- # Define the number of characters per page
- chars_per_page = 10000
- # Define the number of characters per file
- chars_per_file = 10000
- # Calculate the number of pages needed
- num_pages = total_chars // chars_per_page
- # Define the number of ranges to display per row in the index
- ranges_per_row = 5
- # Generate a list of ranges for each page
- ranges = [(i * chars_per_page, (i + 1) * chars_per_page - 1, f"utf8_symbols_{i * chars_per_page}_{(i + 1) * chars_per_page - 1}.html") for i in range(num_pages)]
- # Generate Unicode TXT files
- write_unicode_to_txt(0, total_chars - 1, ranges, chars_per_page)
- # Generate HTML pages for each range
- for start, end, filename in ranges:
- generate_html_page(start, end, filename)
- # Generate the main index page
- generate_index_page(ranges, ranges_per_row)
- # Archive File Creation Completion Notification
- print(f"\nAll Archive Files Completed & Saved In The Archive Folder\n")
- # Script & File Creation Completion Notification
- print("\nOutput Saved To index.html & Individual HTML Files For Each Range.\n")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement