Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # Filename: ipp7_1_count_digrams.py
- # Version: 1.0.0
- # Author: Jeoi Reqi
- """
- Description:
- - This script demonstrates "Chapter 3: Practice Project #1: Finding Digrams: Count Digrams" from the book "Impractical Python Projects" by Lee Vaughan.
- - Generate letter pairs in Voldemort & find their frequency in a dictionary.
- - Requires dictionary.txt (English dictionary) file in the current working directory.
- Requirements:
- - Python 3.x
- - The following modules:
- - sys
- - collections
- Functions:
- - main():
- Main function to generate letter pairs from Voldemort and find their frequency in a dictionary.
- Usage:
- - Run the script directly in a Python 3.x environment:
- $ python ipp7_1_count_digrams.py
- Additional Notes:
- - This script uses a dictionary.txt file containing English words to find the frequency of letter pairs in the name 'Voldemort'.
- - It generates unique letter pairs from the name and then counts their occurrences in the dictionary file.
- """
- import sys
- from collections import defaultdict
- def load(file):
- """
- Open a text file & turn contents into a list of lowercase strings.
- Arguments:
- file (str): The name of the text file to open.
- Returns:
- list: A list of lowercase strings representing the contents of the file.
- """
- try:
- with open(file, encoding='utf-8') as in_file:
- loaded_txt = in_file.read().strip().split('\n')
- loaded_txt = [x.lower() for x in loaded_txt]
- return loaded_txt
- except IOError as e:
- print("{}\nError opening {}. Terminating program.".format(e, file))
- sys.exit(1)
- def main():
- """
- Main function to generate letter pairs from Voldemort and find their frequency in a dictionary.
- """
- # Load dictionary
- print("Loading Dictionary...\n")
- word_list = load('dictionary.txt')
- # Define name and convert to lowercase
- name = 'Voldemort' # (tmvoordle)
- print("Name:", name, "\n\nGathering Digrams...\n")
- name = name.lower()
- # Generate unique letter pairs from name
- digrams = {''.join(pair) for pair in zip(name, name[1:])}
- print(*sorted(digrams), sep='\n')
- print("\nNumber of Digrams = {}\n".format(len(digrams)))
- # Use regular expressions to find repeating digrams in a word
- mapped: defaultdict[str, int] = defaultdict(int)
- for word in word_list:
- word = word.lower()
- for digram in digrams:
- mapped[digram] += word.count(digram)
- print("Digram Frequency Count:\n")
- for k in sorted(mapped):
- print("{} {}".format(k, mapped[k]))
- print("")
- print("_" * 100)
- print("\nThis concludes the demonstration of \"Chapter 3: Practice Project #1: Finding Digrams: Count Digrams\"\n\n\t\t\t Thank you for your attention... Goodbye!")
- print("_" * 100)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement