Advertisement
Python253

all_utf8_symbols

Mar 8th, 2024 (edited)
611
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 12.32 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. # -*- coding: UTF-8 -*-
  3. # Filename: all_utf8_symbols.py
  4. # Author: Jeoi Reqi
  5.  
  6. """
  7. Full UTF-8 Symbols & Site Generator Script
  8.  
  9. This Python script creates an index page for a comprehensive UTF-8 symbol HTML reference, covering the entire Unicode range of 1,114,112 characters.
  10. The index organizes these characters into pages, with each page containing 10,000 characters broken down into stylized tables displaying 100 characters at a time.
  11.  
  12. Requirements:
  13. - Python 3
  14. - Web Browser
  15.  
  16. Usage:
  17. 1. Ensure you have Python 3 installed on your system.
  18. 2. Run this script to generate the UTF-8 symbol HTML reference.
  19. 3. When prompted, choose whether to save the archive of symbols in groups of 10,000 per text file.
  20. 4. After all files have saved, open `index.html` in a web browser.
  21. 5. Keep the saved HTML files in the same folder as the index file.
  22.  
  23. Note:
  24. - If chosen, individual TXT files for each range will be saved in the 'Archive' folder.
  25. - The output HTML menu will be saved to `index.html`.
  26. - All HTML files for the pages will be saved in the current working directory.
  27. - Ensure an active internet connection for the script to fetch the necessary styles and fonts.
  28. """
  29.  
  30. # IMPORTS
  31. import os
  32.  
  33. # Function to create the individual pages
  34. def generate_html_page(start, end, filename):
  35.     """
  36.    Generate an HTML page with stylized tables for a specified Unicode range.
  37.  
  38.    Parameters:
  39.    - start (int): The starting Unicode code point of the range.
  40.    - end (int): The ending Unicode code point of the range.
  41.    - filename (str): The name of the HTML file to be generated.
  42.  
  43.    Returns:
  44.    None
  45.    """
  46.     # Open the HTML file for writing
  47.     with open(filename, "w", encoding="utf-8") as html_file:
  48.         # Write the HTML header with metadata and styles
  49.         html_file.write(f"<!DOCTYPE html>\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>"
  50.                         f"<meta content=\"text/html; charset=utf-8\" http-equiv=\"Content-Type\"/>"
  51.                         f"<title>UTF-8 Symbols - Range {start}-{end}</title>"
  52.                         f"<link rel=\"stylesheet\" href=\"https://fonts.googleapis.com/css2?family=Amatic+SC:wght@700&display=swap\">"
  53.                         f"<style>"
  54.                         f"body {{ font-family: 'Arial', sans-serif; background: linear-gradient(90deg, #000000 0%, #111e3a 100%); color: #808080; }}"
  55.                         f"h1, h2 {{ font-family: 'Amatic SC', cursive; text-align: center; }}"
  56.                         f"h1 {{ font-size: 90px; color: #336699; }}"
  57.                         f"h2 {{ font-size: 30px; color: #808080; }}"
  58.                         f"table {{ border-collapse: collapse; width: 100%; }}"
  59.                         f"table, th, td {{ border: 1px solid #ddd; padding: 12px; }}"
  60.                         f"th {{ background-color: #333333; color: #336699; font-size: 24px; }}"
  61.                         f"details {{ margin-bottom: 10px; }}"
  62.                         f"summary {{ cursor: pointer; color: #808080; font-size: 30px; font-family: 'Amatic SC', cursive; font-weight: bold; }}"
  63.                         f"footer {{ text-align: center; margin-top: 40px; color: #808080; font-size: 20px; font-family: 'Amatic SC', cursive; font-weight: bold; }}"
  64.                         f"</style>"
  65.                         f"</head>\n<body>\n\t<h1>UTF-8 Symbols - Range {start}-{end}</h1>"
  66.                         f"\t<h2>SELECT YOUR RANGE BELOW:</h2>"
  67.                         f"\t<h2>⮛ ⮛ ⮛ ⮛ ⮛</h2>\n")
  68.  
  69.         # Loop through each symbol code in the specified range
  70.         sym_code = start
  71.         while sym_code <= end:
  72.             # Check if the current symbol code is a multiple of 100 (for table organization)
  73.             modZero = sym_code % 100
  74.             if modZero == 0:
  75.                 range_id = f"range_{sym_code}_{sym_code + 99}"
  76.                 unicode_range = f"U+{sym_code:04X} - U+{sym_code + 99:04X}"
  77.                 hex_range = f"{sym_code:X} - {sym_code + 99:X}"
  78.                 # Write the details section with Unicode and Hex range in the summary
  79.                 html_file.write(f"\n<details>\n\t<summary style='font-family: \"Amatic SC\", cursive; font-weight: bold; font-size: 36px; color: #336699;'>{sym_code}-{sym_code + 99} (Uni: {unicode_range} / Hex: {hex_range})</summary>\n")
  80.                 # Write the table header
  81.                 html_file.write(f"\t<table>\n\t\t<tr>\n\t\t\t<th style='font-family: \"Amatic SC\", cursive; font-weight: bold; font-size: 30px; color: #336699;'>Unicode</th>\n"
  82.                                 f"\t\t\t<th style='font-family: \"Amatic SC\", cursive; font-weight: bold; font-size: 30px; color: #336699;'>Hex</th>\n"
  83.                                 f"\t\t\t<th style='font-family: \"Amatic SC\", cursive; font-weight: bold; font-size: 30px; color: #336699;'>Symbol</th>\n\t\t</tr>\n\n")
  84.  
  85.             # Write each symbol entry in the table
  86.             unicode_str = f"U+{sym_code:04X}"
  87.             html_file.write(f"\t\t<tr>\n\t\t\t<td style='color: yellow; font-size: 24px;'>{unicode_str}</td>\n"
  88.                             f"\t\t\t<td style='color: red; font-size: 24px;'>{sym_code:X}</td>\n"
  89.                             f"\t\t\t<td style='color: #808080; font-size: 24px;'>&#x{sym_code:X};</td>\n"
  90.                             f"\t\t</tr>\n\n")
  91.  
  92.             # Check if the current symbol code is the last in the 100 block
  93.             modFF = sym_code % 100
  94.             if modFF == 99:
  95.                 # Close the table and details section
  96.                 html_file.write("\t</table>\n</details>\n")
  97.  
  98.             # Move to the next symbol code
  99.             sym_code += 1
  100.  
  101.         # Write the footer with script information
  102.         html_file.write("\n\t<footer>\n"
  103.                         "\t\t<p>Generated by utf8_symbols.py</p>\n"
  104.                         "\t\t<p>Author: Jeoi Reqi</p>\n"
  105.                         "\t</footer>\n")
  106.  
  107.         # Close the HTML file
  108.         html_file.write("\n\t</body></html>\n")
  109.  
  110. # Function to create the index page
  111. def generate_index_page(ranges, ranges_per_row):
  112.     """
  113.    Generate an index HTML page with links to individual range pages.
  114.  
  115.    Parameters:
  116.    - ranges (list): A list of tuples representing Unicode ranges.
  117.    - ranges_per_row (int): The number of ranges to display per row in the index.
  118.  
  119.    Returns:
  120.    None
  121.    """
  122.     # Open the index HTML file for writing
  123.     with open("index.html", "w", encoding="utf-8") as index_file:
  124.         # Write the HTML header with metadata and styles
  125.         index_file.write(f"<!DOCTYPE html>\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>"
  126.                          f"<meta content=\"text/html; charset=utf-8\" http-equiv=\"Content-Type\"/>"
  127.                          f"<title>UTF-8 Symbols - Index</title>"
  128.                          f"<link rel=\"stylesheet\" href=\"https://fonts.googleapis.com/css2?family=Amatic+SC:wght@700&display=swap\">"
  129.                          f"<style>"
  130.                          f"body {{ font-family: 'Arial', sans-serif; background: linear-gradient(90deg, #000000 0%, #111e3a 100%); color: #808080; }}"
  131.                          f"h1, h2 {{ font-family: 'Amatic SC', cursive; text-align: center; }}"
  132.                          f"h1 {{ font-size: 90px; color: #336699; }}"
  133.                          f"h2 {{ font-size: 30px; color: #808080; }}"
  134.                          f"#grid-container {{ display: grid; grid-template-columns: repeat({ranges_per_row}, 1fr); grid-gap: 20px; }}"
  135.                          f".grid-cell {{ text-align: center; padding: 20px; border: 1px solid #ddd; }}"
  136.                          f"a {{ color: #336699; text-decoration: none; }}"
  137.                          f"footer {{ text-align: center; margin-top: 40px; color: #808080; font-size: 20px; font-family: 'Amatic SC', cursive; font-weight: bold; }}"
  138.                          f"</style>"
  139.                          f"</head>\n<body>\n\t<h1>UTF-8 Symbols - Index</h1>"
  140.                          f"\t<h2>SELECT YOUR RANGE BELOW:</h2>\n"
  141.                          f"\t<h2>⮛ ⮛ ⮛ ⮛ ⮛</h2>\n"
  142.                          f"\t<div id=\"grid-container\">\n")
  143.  
  144.         # Write links to individual range pages in a grid layout
  145.         for start, end, filename in ranges:
  146.             index_file.write(f"\t\t<div class=\"grid-cell\"><a href=\"{filename}\">{start}-{end}</a></div>\n")
  147.  
  148.         # Close the grid container
  149.         index_file.write("\n\t</div>")
  150.        
  151.         # Add extra blank lines after the footer for padding
  152.         index_file.write("\n\t<br />\n\t<br />\n\t<br />\n\t<br />\n")
  153.  
  154.         # Write the footer with script information
  155.         index_file.write("\n\t<footer>\n"
  156.                          "\t\t<p>Generated by utf8_symbols.py</p>\n"
  157.                          "\t\t<p>Author: Jeoi Reqi</p>\n"
  158.                          "\t</footer>\n")
  159.  
  160.         # Close the HTML file
  161.         index_file.write("\n\t</body></html>\n")
  162.  
  163. # Function to write Unicode symbols to a file with header
  164. def write_unicode_to_file(file_name, start, end):
  165.     with open(file_name, 'w', encoding='utf-8', errors='replace') as file:
  166.         # Write header to the file
  167.         file.write("\n------------------------\nUNICODE \t\t SYMBOL \n------------------------\n")
  168.        
  169.         # Write Unicode symbols and corresponding strings
  170.         for code_point in range(start, end + 1):
  171.             unicode_str = f'U+{code_point:04X}'
  172.             try:
  173.                 symbol_str = chr(code_point)
  174.             except ValueError:
  175.                 # Handle surrogate pairs explicitly
  176.                 high_surrogate = 0xD800 + ((code_point - 0x10000) >> 10)
  177.                 low_surrogate = 0xDC00 + ((code_point - 0x10000) & 0x3FF)
  178.                 symbol_str = f'{chr(high_surrogate)}{chr(low_surrogate)}'
  179.  
  180.            
  181.             file.write(f'{unicode_str}\t\t\t\t{symbol_str}\t\n------------------------\n')
  182.            
  183.  
  184.  
  185. # Function to create a folder called Archive & Save the data to file.
  186. def write_unicode_to_txt(start, end, ranges, chars_per_file):
  187.     """
  188.    Write Unicode values, hex values, and symbols to TXT files in groups of chars_per_file.
  189.  
  190.    Parameters:
  191.    - start (int): The starting Unicode code point of the entire range.
  192.    - end (int): The ending Unicode code point of the entire range.
  193.    - ranges (list): A list of tuples representing Unicode ranges.
  194.    - chars_per_file (int): Number of characters per TXT file.
  195.  
  196.    Returns:
  197.    None
  198.    """
  199.     choice = input("\nDo you want to save the full archive?\n1: YES\n2: NO\nWhat is your choice: (1 Or 2)?\n")
  200.  
  201.     full_archive = choice == '1'
  202.  
  203.     if full_archive:
  204.         print("Saving The Archive May Take Some Time To Complete...")
  205.         # Create 'Archive' folder in the current working directory
  206.         archive_folder = os.path.join(os.getcwd(), 'Archive')
  207.         os.makedirs(archive_folder, exist_ok=True)
  208.  
  209.         for start_range, end_range, _ in ranges:
  210.             archive_filename = os.path.join(archive_folder, f"archive_{start_range:06X}_{end_range:06X}.txt")
  211.  
  212.             # Call the function to write Unicode symbols to file
  213.             write_unicode_to_file(archive_filename, start_range, end_range)
  214.  
  215.             # Archive File Creation Notifications
  216.             print(f"Output Archive File {archive_filename} Completed!")
  217.  
  218. # Define the total number of characters in the Unicode range
  219. total_chars = 1114112
  220.  
  221. # Define the number of characters per page
  222. chars_per_page = 10000
  223.  
  224. # Define the number of characters per file
  225. chars_per_file = 10000
  226.  
  227. # Calculate the number of pages needed
  228. num_pages = total_chars // chars_per_page
  229.  
  230. # Define the number of ranges to display per row in the index
  231. ranges_per_row = 5
  232.  
  233. # Generate a list of ranges for each page
  234. ranges = [(i * chars_per_page, (i + 1) * chars_per_page - 1, f"utf8_symbols_{i * chars_per_page}_{(i + 1) * chars_per_page - 1}.html") for i in range(num_pages)]
  235.  
  236. # Generate Unicode TXT files
  237. write_unicode_to_txt(0, total_chars - 1, ranges, chars_per_page)
  238.  
  239. # Generate HTML pages for each range
  240. for start, end, filename in ranges:
  241.     generate_html_page(start, end, filename)
  242.  
  243. # Generate the main index page
  244. generate_index_page(ranges, ranges_per_row)
  245.  
  246. # Archive File Creation Completion Notification
  247. print(f"\nAll Archive Files Completed & Saved In The Archive Folder\n")
  248.  
  249. # Script & File Creation Completion Notification
  250. print("\nOutput Saved To index.html & Individual HTML Files For Each Range.\n")
  251.  
  252.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement