Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # Filename: yt_transcriber.py
- # Version: 1.0.1
- # Author: Jeoi Reqi
- """
- This script is a utility for extracting transcripts from YouTube shorts & videos.
- It provides functions to extract the video ID from a YouTube URL, fetch the transcript for the specified video, and save the transcript to a text file.
- The script also includes a user-friendly interface for selecting options to input a YouTube URL or use an example URL, and it supports fetching transcripts in multiple languages.
- Requirements:
- - youtube_transcript_api: This script requires the 'youtube_transcript_api' library.
- - Install via pip command:
- 'pip install youtube_transcript_api'
- Functions:
- 1. extract_video_id(video_url): Extracts the video ID from a YouTube URL.
- 2. save_transcript_to_file(transcript_text): Saves the transcript text to a text file.
- 3. main(): Provides a user interface for selecting options to input a YouTube URL or use an example URL, fetches the transcript, and offers the option to save the transcript to a file.
- Usage:
- - Run the script using Python: `python yt_transcriber.py`
- - Select an option:
- 1. Input YouTube Video Url: Enter a YouTube video or Shorts URL to fetch the transcript.
- 2. Use Example Video Url: Use an example video URL provided in the script to fetch the transcript.
- - If selecting option 1:
- - Enter the YouTube Video or Shorts URL.
- - The script attempts to fetch the transcript in specified languages (English, Spanish, Arabic, Simplified Chinese).
- - The transcript is displayed, and the user is prompted to save it to a file.
- - If selecting option 2:
- - The transcript for the example video URL is fetched and displayed.
- - The user is prompted to save the transcript to a file.
- Additional Notes:
- - This script supports fetching transcripts in multiple languages, including English, Spanish, Arabic, and Simplified Chinese.
- - The script handles various user inputs and provides appropriate error messages for invalid inputs.
- - Make sure to install the 'youtube_transcript_api' library before running the script.
- """
- from youtube_transcript_api import YouTubeTranscriptApi
- def extract_video_id(video_url):
- """
- Extracts the video ID from a YouTube URL.
- Args:
- video_url (str): The YouTube video URL.
- Returns:
- str or None: The extracted video ID if found, otherwise None.
- """
- # Extract video ID from YouTube URL
- if "youtube.com" in video_url:
- if "/shorts/" in video_url:
- video_id = video_url.split("/shorts/")[-1]
- return video_id
- else:
- video_id = video_url.split("v=")[-1]
- return video_id
- elif "youtu.be" in video_url:
- video_id = video_url.split("/")[-1]
- return video_id
- else:
- return None
- def save_transcript_to_file(transcript_text):
- """
- Saves the transcript text to a text file.
- Args:
- transcript_text (str): The transcript text to be saved.
- """
- file_name = input(
- "\nEnter the file name to save the transcript (without extension): "
- )
- file_path = f"{file_name}.txt"
- with open(file_path, "w", encoding="utf-8") as file:
- file.write(transcript_text)
- print(f"\nTranscript saved as {file_path}")
- def main():
- # Print The Header
- print(
- r"""
- __ __ _____ _
- \ \ / /__ _ |_ _| _| |__ ___
- \ V / _ \| | | || || | | | '_ \ / _ \
- | | (_) | |_| || || |_| | |_) | __/
- _____ |_|\___/ \__,_||_| \__,_|_.__/_\___|
- |_ _| __ __ _ _ __ ___ ___ _ __(_) |__ ___ _ __
- | || '__/ _` | '_ \/ __|/ __| '__| | '_ \ / _ \ '__|
- | || | | (_| | | | \__ \ (__| | | | |_) | __/ |
- |_||_| \__,_|_| |_|___/\___|_| |_|_.__/ \___|_|
- """
- )
- # Main Menu
- print("\n\tSelect an option:\n")
- print("\t1: Input YouTube Video or Shorts Url")
- print("\t2: Use Example Video Url")
- option = input("\n\tEnter the option number (1 or 2): ")
- if option == "1":
- video_url = input("\n\tEnter the YouTube Video URL: ")
- video_id = extract_video_id(video_url)
- if not video_id:
- print("\nInvalid YouTube Video URL. Exiting...")
- return
- # Scan CC for the specified languages
- languages_to_try = ["en", "es", "ar", "zh-Hans"]
- transcript = None
- # Attempt to fetch transcripts in specified languages
- for lang in languages_to_try:
- transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
- if transcript:
- break
- if not transcript:
- print("\nNo transcript available for the selected languages. Exiting...")
- return
- transcript_text = ""
- for item in transcript:
- transcript_text += f"{item['start']:>6.2f}: {item['text']}\n"
- print(transcript_text)
- # Save Menu
- print("\n\tDo you want to save the transcript to a file?\n")
- print("\t1: Yes")
- print("\t2: No")
- save_option = input("\nEnter the option number: ")
- # Function to save the transcript to a txt file in the current working directory
- if save_option == "1":
- save_transcript_to_file(transcript_text)
- elif save_option == "2":
- print("\nTranscript not saved!")
- else:
- print("\nInvalid option!")
- elif option == "2":
- video_id = "jNQXAC9IVRw" # Example video URL: https://www.youtube.com/watch?v=jNQXAC9IVRw
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
- transcript_text = ""
- for item in transcript:
- transcript_text += f"{item['start']:>6.2f}: {item['text']}\n"
- print(transcript_text)
- # Save Menu
- print("\n\tDo you want to save the transcript to a file?\n")
- print("\t1: Yes")
- print("\t2: No")
- save_option = input("\n\tEnter the option number: ")
- # Function to save the transcript to a txt file in the current working directory
- if save_option == "1":
- save_transcript_to_file(transcript_text)
- elif save_option == "2":
- print("\nTranscript not saved!")
- else:
- print("\nInvalid option!")
- else:
- print("\nInvalid option!")
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement