Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # Filename: ipp7_0_voldemort_british.py
- # Version: 1.0.0
- # Author: Jeoi Reqi
- """
- Description:
- - This script demonstrates "Chapter 3: Practice Project #7: Finding Voldemort: The British Brute-Force" from the book "Impractical Python Projects" by Lee Vaughan.
- - It aims to find possible names that can be formed from the letters in "tmvoordle" using various filters such as consonant-vowel patterns, trigrams, and letter pairs.
- - The script downloads a dictionary file containing English words from a URL and saves it alphabetically.
- - It also downloads a trigrams file and saves it for later use.
- Requirements:
- - Python 3.x
- - The following modules
- - os
- - sys
- - requests
- - itertools (specifically the permutations function)
- - collections (specifically the counter function)
- Functions:
- - main():
- - Executes the main functionality of the script.
- - download_dictionary(url, file_name):
- - Download a dictionary file from a URL and save it alphabetically.
- - load_dictionary(file):
- - Open a text file & turn contents into a set of lowercase strings.
- - download_trigrams_file(url, filename):
- - Download a trigrams file from a URL and save it.
- - prep_words(name, word_list_ini):
- - Filter words from the initial word list based on the length.
- - cv_map_words(word_list):
- - Map words from the word list to their consonant-vowel patterns.
- - cv_map_filter(name, filtered_cv_map):
- - Filter words based on consonant-vowel patterns.
- - trigram_filter(filter_1, trigrams_filtered):
- - Filter words based on trigrams.
- - letter_pair_filter(filter_2):
- - Filter words based on letter pairs.
- - view_by_letter(name, filter_3):
- - Display filtered words based on the starting letter provided by the user.
- Usage:
- - Run the script directly in a Python 3.x environment:
- $ python ipp7_0_voldemort_british.py
- Additional Notes:
- - The script uses various filtering techniques to narrow down the list of possible names that can be formed from the given set of letters.
- - It prompts the user to input a starting letter or press Enter to see all possible names.
- """
- import os
- import sys
- import requests
- from itertools import permutations
- from collections import Counter
- def download_dictionary(url, file_name):
- """
- Download a dictionary file from a URL and save it alphabetically.
- Parameters:
- url (str): The URL from which to download the dictionary file.
- file_name (str): The name to save the downloaded file as.
- Raises:
- requests.RequestException: If an error occurs during the HTTP request.
- """
- print("\nDownloading dictionary file from:\n" + "{}".format(url))
- try:
- response = requests.get(url)
- response.raise_for_status() # Check if the request was successful
- # Split the content by lines, strip whitespace, and sort alphabetically
- sorted_content = sorted(line.strip() for line in response.text.strip().split('\n'))
- # Save the sorted content to the file
- with open(file_name, 'w', encoding = 'UTF-8') as f:
- f.write('\n'.join(sorted_content))
- except requests.RequestException as e:
- print("\nError downloading dictionary from {}: {}".format(url, e))
- sys.exit(1)
- else:
- print("\nDictionary file downloaded and saved alphabetically as: '{}'.".format(file_name))
- def load_dictionary(file):
- """
- Open a text file & turn contents into a set of lowercase strings.
- Parameters:
- file (str): The name of the file to open.
- Returns:
- set: A set of lowercase strings containing the words from the file.
- """
- try:
- with open(file, encoding='utf-8') as in_file:
- loaded_txt = in_file.read().strip().split('\n')
- loaded_txt_set = {x.lower() for x in loaded_txt} # Ensure loaded_txt is converted to a set
- return loaded_txt_set # Return the set of lowercase strings
- except IOError as e:
- print("\n{}\nError opening {}. Terminating program.\n".format(e, file))
- sys.exit(1)
- def download_trigrams_file(url, filename):
- """
- Download a trigrams file from a URL and save it.
- Parameters:
- url (str): The URL from which to download the trigrams file.
- filename (str): The name to save the downloaded file as.
- """
- print("\nDownloading least-likely_trigrams.txt...\n")
- response = requests.get(url)
- with open(filename, 'wb') as f:
- f.write(response.content)
- print("\nDownload completed!\n")
- def main():
- """
- Main function to orchestrate the execution of the program.
- """
- # Print description of the script
- print("_" * 100)
- print("\n\t :: Chapter 3: Practice Project #7: Finding Voldemort: The British Brute-Force ::")
- print("_" * 100)
- print("\n- This script aims to find possible names that can be formed from the letters in 'tmvoordle'.\n"
- "- It uses various filters such as consonant-vowel patterns, trigrams, and letter pairs.\n"
- "- It downloads a dictionary file containing English words from a URL and saves it alphabetically.\n"
- "- It also downloads a trigrams file and saves it for later use.\n")
- print("\nLoading dictionary file...\n")
- name = 'tmvoordle'
- name = name.lower()
- # Define the URL and file name for the dictionary
- dictionary_url = "https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt"
- dictionary_file = "dictionary.txt"
- # Download and load the dictionary file
- if dictionary_file not in os.listdir():
- print("\nLoading dictionary file...\n")
- download_dictionary(dictionary_url, dictionary_file)
- word_list_ini = load_dictionary(dictionary_file)
- # Check if least_likely_trigrams.txt is in the cwd
- trigrams_filename = 'least_likely_trigrams.txt'
- if trigrams_filename not in os.listdir():
- trigrams_url = 'https://github.com/rlvaugh/Impractical_Python_Projects/raw/master/Chapter_3/least-likely_trigrams.txt'
- download_trigrams_file(trigrams_url, trigrams_filename)
- trigrams_filtered = load_dictionary(trigrams_filename)
- word_list = prep_words(name, word_list_ini)
- filtered_cv_map = cv_map_words(word_list)
- filter_1 = cv_map_filter(name, filtered_cv_map)
- filter_2 = trigram_filter(filter_1, trigrams_filtered)
- filter_3 = letter_pair_filter(filter_2)
- view_by_letter(name, filter_3)
- def prep_words(name, word_list_ini):
- """
- Filter words from the initial word list based on the length.
- Parameters:
- name (str): The name to filter words for.
- word_list_ini (list): The initial list of words.
- Returns:
- list: A filtered list of words based on the length.
- """
- print("- length initial word_list = {}".format(len(word_list_ini)))
- len_name = len(name)
- word_list = [word.lower() for word in word_list_ini if len(word) == len_name]
- print("- length of new word_list = {}".format(len(word_list)))
- return word_list
- def cv_map_words(word_list):
- """
- Map words from the word list to their consonant-vowel patterns.
- Parameters:
- word_list (list): The list of words to map.
- Returns:
- set: A set containing the consonant-vowel patterns.
- """
- vowels = 'aeiouy'
- cv_mapped_words = []
- for word in word_list:
- temp = ''
- for letter in word:
- if letter in vowels:
- temp += 'v'
- else:
- temp += 'c'
- cv_mapped_words.append(temp)
- total = len(set(cv_mapped_words))
- target = 0.05
- n = int(total * target)
- count_pruned = Counter(cv_mapped_words).most_common(total - n)
- filtered_cv_map = set()
- for pattern, _ in count_pruned:
- filtered_cv_map.add(pattern)
- print("- length filtered_cv_map = {}".format(len(filtered_cv_map)))
- return filtered_cv_map
- def cv_map_filter(name, filtered_cv_map):
- """
- Filter words based on consonant-vowel patterns.
- Parameters:
- name (str): The name to filter words for.
- filtered_cv_map (set): The set of filtered consonant-vowel patterns.
- Returns:
- set: A set containing the filtered words.
- """
- perms = {''.join(i) for i in permutations(name)}
- print("- length of initial permutations set = {}".format(len(perms)))
- vowels = 'aeiouy'
- filter_1 = set()
- for candidate in perms:
- temp = ''
- for letter in candidate:
- if letter in vowels:
- temp += 'v'
- else:
- temp += 'c'
- if temp in filtered_cv_map:
- filter_1.add(candidate)
- print("\n\t# choices after filter_1 = {}".format(len(filter_1)))
- return filter_1
- """DEFINING THE 3 FILTERS"""
- def trigram_filter(filter_1, trigrams_filtered):
- """
- Filter words based on trigrams.
- Parameters:
- filter_1 (set): The set of words after the first filter.
- trigrams_filtered (set): The set of filtered trigrams.
- Returns:
- set: A set containing the filtered words.
- """
- filtered = set()
- for candidate in filter_1:
- for triplet in trigrams_filtered:
- triplet = triplet.lower()
- if triplet in candidate:
- filtered.add(candidate)
- filter_2 = filter_1 - filtered
- print("\t# of choices after filter_2 = {}".format(len(filter_2)))
- return filter_2
- def letter_pair_filter(filter_2):
- """
- Filter words based on letter pairs.
- Parameters:
- filter_2 (set): The set of words after the second filter.
- Returns:
- set: A set containing the filtered words.
- """
- filtered = set()
- rejects = ['dt', 'lr', 'md', 'ml', 'mr', 'mt', 'mv', 'td', 'tv', 'vd', 'vl', 'vm', 'vr', 'vt']
- first_pair_rejects = ['ld', 'lm', 'lt', 'lv', 'rd', 'rl', 'rm', 'rt', 'rv', 'tl', 'tm']
- for candidate in filter_2:
- for r in rejects:
- if r in candidate:
- filtered.add(candidate)
- for fp in first_pair_rejects:
- if candidate.startswith(fp):
- filtered.add(candidate)
- filter_3 = filter_2 - filtered
- print("\t# of choices after filter_3 = {}".format(len(filter_3)))
- if 'voldemort' in filter_3:
- print("\nVoldemort found!", file=sys.stderr)
- return filter_3
- def view_by_letter(name, filter_3):
- """
- Display filtered words based on the starting letter provided by the user.
- Parameters:
- name (str): The name to display filtered words for.
- filter_3 (set): The set of filtered words.
- """
- print("\nRemaining letters = {}".format(name))
- first = input("\nselect a starting letter or press Enter to see all: ")
- print()
- subset = []
- for candidate in filter_3:
- if candidate.startswith(first):
- subset.append(candidate)
- print(*sorted(subset), sep='\n')
- if first:
- print("\nNumber of choices starting with [{}] = {}".format(first, len(subset)))
- else:
- print("\nNumber of choices starting with [ALL] = {}".format(len(subset)))
- try_again = input("\nPress [ENTER] to try again. (or... Press any other key to Exit):")
- if try_again.lower() == '':
- view_by_letter(name, filter_3)
- else:
- print("_" * 100)
- print("\nThis concludes the demonstration of Chapter 3: Practice Project #7: Finding Voldemort: The British Brute-Force\n\n\t\t\t Thank you for your attention... Goodbye!")
- print("_" * 100)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement