Advertisement
Python253

ipp7_0_voldemort_british

Jun 1st, 2024
698
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 11.83 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # Filename: ipp7_0_voldemort_british.py
  4. # Version: 1.0.0
  5. # Author: Jeoi Reqi
  6.  
  7. """
  8. Description:
  9.    - This script demonstrates "Chapter 3: Practice Project #7: Finding Voldemort: The British Brute-Force" from the book "Impractical Python Projects" by Lee Vaughan.
  10.    - It aims to find possible names that can be formed from the letters in "tmvoordle" using various filters such as consonant-vowel patterns, trigrams, and letter pairs.
  11.    - The script downloads a dictionary file containing English words from a URL and saves it alphabetically.
  12.    - It also downloads a trigrams file and saves it for later use.
  13.    
  14. Requirements:
  15.    - Python 3.x
  16.    - The following modules
  17.        - os
  18.        - sys
  19.        - requests
  20.        - itertools (specifically the permutations function)
  21.        - collections (specifically the counter function)
  22.  
  23. Functions:
  24.    - main():
  25.        - Executes the main functionality of the script.
  26.    - download_dictionary(url, file_name):
  27.        - Download a dictionary file from a URL and save it alphabetically.
  28.    - load_dictionary(file):
  29.        - Open a text file & turn contents into a set of lowercase strings.
  30.    - download_trigrams_file(url, filename):
  31.        - Download a trigrams file from a URL and save it.
  32.    - prep_words(name, word_list_ini):
  33.        - Filter words from the initial word list based on the length.
  34.    - cv_map_words(word_list):
  35.        - Map words from the word list to their consonant-vowel patterns.
  36.    - cv_map_filter(name, filtered_cv_map):
  37.        - Filter words based on consonant-vowel patterns.
  38.    - trigram_filter(filter_1, trigrams_filtered):
  39.        - Filter words based on trigrams.
  40.    - letter_pair_filter(filter_2):
  41.        - Filter words based on letter pairs.
  42.    - view_by_letter(name, filter_3):
  43.        - Display filtered words based on the starting letter provided by the user.
  44.    
  45. Usage:
  46.    - Run the script directly in a Python 3.x environment:
  47.            
  48.            $ python ipp7_0_voldemort_british.py
  49.    
  50. Additional Notes:
  51.    - The script uses various filtering techniques to narrow down the list of possible names that can be formed from the given set of letters.
  52.    - It prompts the user to input a starting letter or press Enter to see all possible names.
  53. """
  54.  
  55. import os
  56. import sys
  57. import requests
  58.  
  59. from itertools import permutations
  60. from collections import Counter
  61.  
  62. def download_dictionary(url, file_name):
  63.     """
  64.    Download a dictionary file from a URL and save it alphabetically.
  65.    
  66.    Parameters:
  67.        url (str): The URL from which to download the dictionary file.
  68.        file_name (str): The name to save the downloaded file as.
  69.    
  70.    Raises:
  71.        requests.RequestException: If an error occurs during the HTTP request.
  72.    """
  73.     print("\nDownloading dictionary file from:\n" + "{}".format(url))
  74.     try:
  75.         response = requests.get(url)
  76.         response.raise_for_status()  # Check if the request was successful
  77.        
  78.         # Split the content by lines, strip whitespace, and sort alphabetically
  79.         sorted_content = sorted(line.strip() for line in response.text.strip().split('\n'))
  80.        
  81.         # Save the sorted content to the file
  82.         with open(file_name, 'w', encoding = 'UTF-8') as f:
  83.             f.write('\n'.join(sorted_content))
  84.        
  85.     except requests.RequestException as e:
  86.         print("\nError downloading dictionary from {}: {}".format(url, e))
  87.         sys.exit(1)
  88.     else:
  89.         print("\nDictionary file downloaded and saved alphabetically as: '{}'.".format(file_name))
  90.  
  91. def load_dictionary(file):
  92.     """
  93.    Open a text file & turn contents into a set of lowercase strings.
  94.    
  95.    Parameters:
  96.        file (str): The name of the file to open.
  97.    
  98.    Returns:
  99.        set: A set of lowercase strings containing the words from the file.
  100.    """
  101.     try:
  102.         with open(file, encoding='utf-8') as in_file:
  103.             loaded_txt = in_file.read().strip().split('\n')
  104.             loaded_txt_set = {x.lower() for x in loaded_txt}  # Ensure loaded_txt is converted to a set
  105.             return loaded_txt_set  # Return the set of lowercase strings
  106.     except IOError as e:
  107.         print("\n{}\nError opening {}. Terminating program.\n".format(e, file))
  108.         sys.exit(1)
  109.  
  110. def download_trigrams_file(url, filename):
  111.     """
  112.    Download a trigrams file from a URL and save it.
  113.    
  114.    Parameters:
  115.        url (str): The URL from which to download the trigrams file.
  116.        filename (str): The name to save the downloaded file as.
  117.    """
  118.     print("\nDownloading least-likely_trigrams.txt...\n")
  119.     response = requests.get(url)
  120.     with open(filename, 'wb') as f:
  121.         f.write(response.content)
  122.     print("\nDownload completed!\n")
  123.  
  124. def main():
  125.     """
  126.    Main function to orchestrate the execution of the program.
  127.    """
  128.     # Print description of the script
  129.     print("_" * 100)
  130.     print("\n\t  :: Chapter 3: Practice Project #7: Finding Voldemort: The British Brute-Force ::")
  131.     print("_" * 100)
  132.     print("\n- This script aims to find possible names that can be formed from the letters in 'tmvoordle'.\n"
  133.           "- It uses various filters such as consonant-vowel patterns, trigrams, and letter pairs.\n"
  134.           "- It downloads a dictionary file containing English words from a URL and saves it alphabetically.\n"
  135.           "- It also downloads a trigrams file and saves it for later use.\n")
  136.     print("\nLoading dictionary file...\n")
  137.     name = 'tmvoordle'
  138.     name = name.lower()
  139.    
  140.     # Define the URL and file name for the dictionary
  141.     dictionary_url = "https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt"
  142.     dictionary_file = "dictionary.txt"
  143.  
  144.     # Download and load the dictionary file
  145.     if dictionary_file not in os.listdir():
  146.         print("\nLoading dictionary file...\n")
  147.         download_dictionary(dictionary_url, dictionary_file)
  148.     word_list_ini = load_dictionary(dictionary_file)
  149.    
  150.     # Check if least_likely_trigrams.txt is in the cwd
  151.     trigrams_filename = 'least_likely_trigrams.txt'
  152.     if trigrams_filename not in os.listdir():
  153.         trigrams_url = 'https://github.com/rlvaugh/Impractical_Python_Projects/raw/master/Chapter_3/least-likely_trigrams.txt'
  154.         download_trigrams_file(trigrams_url, trigrams_filename)
  155.  
  156.     trigrams_filtered = load_dictionary(trigrams_filename)
  157.  
  158.     word_list = prep_words(name, word_list_ini)
  159.     filtered_cv_map = cv_map_words(word_list)
  160.     filter_1 = cv_map_filter(name, filtered_cv_map)
  161.     filter_2 = trigram_filter(filter_1, trigrams_filtered)
  162.     filter_3 = letter_pair_filter(filter_2)
  163.     view_by_letter(name, filter_3)
  164.  
  165. def prep_words(name, word_list_ini):
  166.     """
  167.    Filter words from the initial word list based on the length.
  168.    
  169.    Parameters:
  170.        name (str): The name to filter words for.
  171.        word_list_ini (list): The initial list of words.
  172.    
  173.    Returns:
  174.        list: A filtered list of words based on the length.
  175.    """
  176.     print("- length initial word_list = {}".format(len(word_list_ini)))
  177.     len_name = len(name)
  178.     word_list = [word.lower() for word in word_list_ini if len(word) == len_name]
  179.     print("- length of new word_list = {}".format(len(word_list)))
  180.     return word_list
  181.  
  182. def cv_map_words(word_list):
  183.     """
  184.    Map words from the word list to their consonant-vowel patterns.
  185.    
  186.    Parameters:
  187.        word_list (list): The list of words to map.
  188.    
  189.    Returns:
  190.        set: A set containing the consonant-vowel patterns.
  191.    """
  192.     vowels = 'aeiouy'
  193.     cv_mapped_words = []
  194.     for word in word_list:
  195.         temp = ''
  196.         for letter in word:
  197.             if letter in vowels:
  198.                 temp += 'v'
  199.             else:
  200.                 temp += 'c'
  201.         cv_mapped_words.append(temp)
  202.  
  203.     total = len(set(cv_mapped_words))
  204.     target = 0.05
  205.     n = int(total * target)
  206.     count_pruned = Counter(cv_mapped_words).most_common(total - n)
  207.     filtered_cv_map = set()
  208.     for pattern, _ in count_pruned:
  209.         filtered_cv_map.add(pattern)
  210.     print("- length filtered_cv_map = {}".format(len(filtered_cv_map)))
  211.     return filtered_cv_map
  212.  
  213. def cv_map_filter(name, filtered_cv_map):
  214.     """
  215.    Filter words based on consonant-vowel patterns.
  216.    
  217.    Parameters:
  218.        name (str): The name to filter words for.
  219.        filtered_cv_map (set): The set of filtered consonant-vowel patterns.
  220.    
  221.    Returns:
  222.        set: A set containing the filtered words.
  223.    """
  224.     perms = {''.join(i) for i in permutations(name)}
  225.     print("- length of initial permutations set = {}".format(len(perms)))
  226.     vowels = 'aeiouy'
  227.     filter_1 = set()
  228.     for candidate in perms:
  229.         temp = ''
  230.         for letter in candidate:
  231.             if letter in vowels:
  232.                 temp += 'v'
  233.             else:
  234.                 temp += 'c'
  235.         if temp in filtered_cv_map:
  236.             filter_1.add(candidate)
  237.     print("\n\t# choices after filter_1 = {}".format(len(filter_1)))
  238.     return filter_1
  239.  
  240. """DEFINING THE 3 FILTERS"""
  241.  
  242. def trigram_filter(filter_1, trigrams_filtered):
  243.     """
  244.    Filter words based on trigrams.
  245.    
  246.    Parameters:
  247.        filter_1 (set): The set of words after the first filter.
  248.        trigrams_filtered (set): The set of filtered trigrams.
  249.    
  250.    Returns:
  251.        set: A set containing the filtered words.
  252.    """
  253.     filtered = set()
  254.     for candidate in filter_1:
  255.         for triplet in trigrams_filtered:
  256.             triplet = triplet.lower()
  257.             if triplet in candidate:
  258.                 filtered.add(candidate)
  259.     filter_2 = filter_1 - filtered
  260.     print("\t# of choices after filter_2 = {}".format(len(filter_2)))
  261.     return filter_2
  262.  
  263. def letter_pair_filter(filter_2):
  264.     """
  265.    Filter words based on letter pairs.
  266.    
  267.    Parameters:
  268.        filter_2 (set): The set of words after the second filter.
  269.    
  270.    Returns:
  271.        set: A set containing the filtered words.
  272.    """
  273.     filtered = set()
  274.     rejects = ['dt', 'lr', 'md', 'ml', 'mr', 'mt', 'mv', 'td', 'tv', 'vd', 'vl', 'vm', 'vr', 'vt']
  275.     first_pair_rejects = ['ld', 'lm', 'lt', 'lv', 'rd', 'rl', 'rm', 'rt', 'rv', 'tl', 'tm']
  276.     for candidate in filter_2:
  277.         for r in rejects:
  278.             if r in candidate:
  279.                 filtered.add(candidate)
  280.         for fp in first_pair_rejects:
  281.             if candidate.startswith(fp):
  282.                 filtered.add(candidate)
  283.     filter_3 = filter_2 - filtered
  284.     print("\t# of choices after filter_3 = {}".format(len(filter_3)))
  285.     if 'voldemort' in filter_3:
  286.         print("\nVoldemort found!", file=sys.stderr)
  287.     return filter_3
  288.  
  289. def view_by_letter(name, filter_3):
  290.     """
  291.    Display filtered words based on the starting letter provided by the user.
  292.    
  293.    Parameters:
  294.        name (str): The name to display filtered words for.
  295.        filter_3 (set): The set of filtered words.
  296.    """
  297.     print("\nRemaining letters = {}".format(name))
  298.     first = input("\nselect a starting letter or press Enter to see all: ")
  299.     print()
  300.     subset = []
  301.     for candidate in filter_3:
  302.         if candidate.startswith(first):
  303.             subset.append(candidate)
  304.     print(*sorted(subset), sep='\n')
  305.     if first:
  306.         print("\nNumber of choices starting with [{}] = {}".format(first, len(subset)))
  307.     else:
  308.         print("\nNumber of choices starting with [ALL] = {}".format(len(subset)))
  309.     try_again = input("\nPress [ENTER] to try again. (or... Press any other key to Exit):")
  310.     if try_again.lower() == '':
  311.         view_by_letter(name, filter_3)
  312.     else:
  313.         print("_" * 100)
  314.         print("\nThis concludes the demonstration of Chapter 3: Practice Project #7: Finding Voldemort: The British Brute-Force\n\n\t\t\t   Thank you for your attention...   Goodbye!")
  315.         print("_" * 100)
  316.    
  317. if __name__ == '__main__':
  318.     main()
  319.  
  320.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement