nltk_nlp_program

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Filename: nltk_nlp_program.py
# Author: Jeoi Reqi

"""
Welcome to the NLTK NLP Tool!

This interactive script provides a Natural Language Processing (NLP) tool powered by the Natural Language Toolkit (NLTK).
It offers various text analysis functionalities through a user-friendly menu.

Requirements:
- Python 3
- NLTK library with 'punkt', 'averaged_perceptron_tagger', 'maxent_ne_chunker', and 'vader_lexicon' resources

Usage:
1. Run the script.
2. Follow the menu prompts to select the desired analysis.
3. Enter the text for analysis.

Menu Options:
1. Tokenization: Breaks text into words and sentences.
2. Part-of-Speech Tagging: Identifies grammatical parts of speech for each word.
3. Named Entity Recognition: Identifies entities such as persons, organizations, and locations.
4. Sentiment Analysis: Determines sentiment polarity (negative, neutral, positive).

Examples:

1. Tokenization
Enter the text: Hello, World!
Tokenized Words: ['Hello', ',', 'World', '!']
Tokenized Sentences: ['Hello, World!']

2. Part-of-Speech Tagging
Enter the text: Hello, World!
Part-of-Speech Tags: [('Hello', 'NNP'), (',', ','), ('World', 'NNP'), ('!', '.')]

3. Named Entity Recognition
Enter the text: Hello, World!
Named Entity Recognition: (S (GPE Hello/NNP) ,/, (PERSON World/NNP) !/.)

4. Sentiment Analysis
Enter the text: Hello, World!
Sentiment Analysis: {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}


Enjoy exploring the world of Natural Language Processing with NLTK!
"""

# Imports
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk import pos_tag, ne_chunk
from nltk.sentiment import SentimentIntensityAnalyzer

# Downloads
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('vader_lexicon')

# Tokenization Function
def tokenize_nltk(text):
    """
    Tokenizes the input text into words and sentences.

    Parameters:
    - text (str): Input text for tokenization.

    Returns:
    Tuple(List[str], List[str]): Tokenized words and sentences.
    """
    words = word_tokenize(text)
    sentences = sent_tokenize(text)
    return words, sentences

# Part-of-Speech Tagging Function
def pos_tagging_nltk(text):
    """
    Performs part-of-speech tagging on the input text.

    Parameters:
    - text (str): Input text for part-of-speech tagging.

    Returns:
    List[Tuple[str, str]]: Part-of-speech tags for each word.
    """
    words = word_tokenize(text)
    pos_tags = pos_tag(words)
    return pos_tags

# Named Entity Recognition Function
def named_entity_recognition_nltk(text):
    """
    Identifies named entities in the input text.

    Parameters:
    - text (str): Input text for named entity recognition.

    Returns:
    nltk.tree.Tree: Named entity recognition result.
    """
    words = word_tokenize(text)
    pos_tags = pos_tag(words)
    ner_result = ne_chunk(pos_tags)
    return ner_result

# Sentiment Analysis Function
def sentiment_analysis_nltk(text):
    """
    Analyzes the sentiment of the input text.

    Parameters:
    - text (str): Input text for sentiment analysis.

    Returns:
    Dict[str, float]: Sentiment scores (negative, neutral, positive, compound).
    """
    sia = SentimentIntensityAnalyzer()
    sentiment_score = sia.polarity_scores(text)
    return sentiment_score

# Menu & Options Function
def main_nltk():
    """
    Main function for the NLTK NLP tool, providing a user-friendly menu for text analysis.
    """
    print("Welcome to the NLTK NLP Tool!")
    # Menu Printing Function
    while True:
        print("\nMenu:")
        print("1. Tokenization")
        print("2. Part-of-Speech Tagging")
        print("3. Named Entity Recognition")
        print("4. Sentiment Analysis")
        print("0. Exit")

        choice = input("Enter your choice (0-4): ")

        if choice == '0':
            print("Exiting the NLTK NLP Tool. Goodbye!")
            break

        text = input("Enter the text: ")

        if choice == '1':
            words, sentences = tokenize_nltk(text)
            print("Tokenized Words:", words)
            print("Tokenized Sentences:", sentences)

        elif choice == '2':
            pos_tags = pos_tagging_nltk(text)
            print("Part-of-Speech Tags:", pos_tags)

        elif choice == '3':
            ner_result = named_entity_recognition_nltk(text)
            print("Named Entity Recognition:", ner_result)

        elif choice == '4':
            sentiment_score = sentiment_analysis_nltk(text)
            print("Sentiment Analysis:", sentiment_score)

        else:
            print("Invalid choice. Please enter a number between 0 and 4.")

if __name__ == "__main__":
    main_nltk()