Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- # Filename: nltk_nlp_program.py
- # Author: Jeoi Reqi
- """
- Welcome to the NLTK NLP Tool!
- This interactive script provides a Natural Language Processing (NLP) tool powered by the Natural Language Toolkit (NLTK).
- It offers various text analysis functionalities through a user-friendly menu.
- Requirements:
- - Python 3
- - NLTK library with 'punkt', 'averaged_perceptron_tagger', 'maxent_ne_chunker', and 'vader_lexicon' resources
- Usage:
- 1. Run the script.
- 2. Follow the menu prompts to select the desired analysis.
- 3. Enter the text for analysis.
- Menu Options:
- 1. Tokenization: Breaks text into words and sentences.
- 2. Part-of-Speech Tagging: Identifies grammatical parts of speech for each word.
- 3. Named Entity Recognition: Identifies entities such as persons, organizations, and locations.
- 4. Sentiment Analysis: Determines sentiment polarity (negative, neutral, positive).
- Examples:
- 1. Tokenization
- Enter the text: Hello, World!
- Tokenized Words: ['Hello', ',', 'World', '!']
- Tokenized Sentences: ['Hello, World!']
- 2. Part-of-Speech Tagging
- Enter the text: Hello, World!
- Part-of-Speech Tags: [('Hello', 'NNP'), (',', ','), ('World', 'NNP'), ('!', '.')]
- 3. Named Entity Recognition
- Enter the text: Hello, World!
- Named Entity Recognition: (S (GPE Hello/NNP) ,/, (PERSON World/NNP) !/.)
- 4. Sentiment Analysis
- Enter the text: Hello, World!
- Sentiment Analysis: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}
- Enjoy exploring the world of Natural Language Processing with NLTK!
- """
- # Imports
- import nltk
- from nltk.tokenize import word_tokenize, sent_tokenize
- from nltk import pos_tag, ne_chunk
- from nltk.sentiment import SentimentIntensityAnalyzer
- # Downloads
- nltk.download('punkt')
- nltk.download('averaged_perceptron_tagger')
- nltk.download('maxent_ne_chunker')
- nltk.download('vader_lexicon')
- # Tokenization Function
- def tokenize_nltk(text):
- """
- Tokenizes the input text into words and sentences.
- Parameters:
- - text (str): Input text for tokenization.
- Returns:
- Tuple(List[str], List[str]): Tokenized words and sentences.
- """
- words = word_tokenize(text)
- sentences = sent_tokenize(text)
- return words, sentences
- # Part-of-Speech Tagging Function
- def pos_tagging_nltk(text):
- """
- Performs part-of-speech tagging on the input text.
- Parameters:
- - text (str): Input text for part-of-speech tagging.
- Returns:
- List[Tuple[str, str]]: Part-of-speech tags for each word.
- """
- words = word_tokenize(text)
- pos_tags = pos_tag(words)
- return pos_tags
- # Named Entity Recognition Function
- def named_entity_recognition_nltk(text):
- """
- Identifies named entities in the input text.
- Parameters:
- - text (str): Input text for named entity recognition.
- Returns:
- nltk.tree.Tree: Named entity recognition result.
- """
- words = word_tokenize(text)
- pos_tags = pos_tag(words)
- ner_result = ne_chunk(pos_tags)
- return ner_result
- # Sentiment Analysis Function
- def sentiment_analysis_nltk(text):
- """
- Analyzes the sentiment of the input text.
- Parameters:
- - text (str): Input text for sentiment analysis.
- Returns:
- Dict[str, float]: Sentiment scores (negative, neutral, positive, compound).
- """
- sia = SentimentIntensityAnalyzer()
- sentiment_score = sia.polarity_scores(text)
- return sentiment_score
- # Menu & Options Function
- def main_nltk():
- """
- Main function for the NLTK NLP tool, providing a user-friendly menu for text analysis.
- """
- print("Welcome to the NLTK NLP Tool!")
- # Menu Printing Function
- while True:
- print("\nMenu:")
- print("1. Tokenization")
- print("2. Part-of-Speech Tagging")
- print("3. Named Entity Recognition")
- print("4. Sentiment Analysis")
- print("0. Exit")
- choice = input("Enter your choice (0-4): ")
- if choice == '0':
- print("Exiting the NLTK NLP Tool. Goodbye!")
- break
- text = input("Enter the text: ")
- if choice == '1':
- words, sentences = tokenize_nltk(text)
- print("Tokenized Words:", words)
- print("Tokenized Sentences:", sentences)
- elif choice == '2':
- pos_tags = pos_tagging_nltk(text)
- print("Part-of-Speech Tags:", pos_tags)
- elif choice == '3':
- ner_result = named_entity_recognition_nltk(text)
- print("Named Entity Recognition:", ner_result)
- elif choice == '4':
- sentiment_score = sentiment_analysis_nltk(text)
- print("Sentiment Analysis:", sentiment_score)
- else:
- print("Invalid choice. Please enter a number between 0 and 4.")
- if __name__ == "__main__":
- main_nltk()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement