Advertisement
Python253

yt_transcriber

May 10th, 2024 (edited)
784
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.53 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # Filename: yt_transcriber.py
  4. # Version: 1.0.1
  5. # Author: Jeoi Reqi
  6.  
  7. """
  8. This script is a utility for extracting transcripts from YouTube shorts & videos.
  9. It provides functions to extract the video ID from a YouTube URL, fetch the transcript for the specified video, and save the transcript to a text file.
  10. The script also includes a user-friendly interface for selecting options to input a YouTube URL or use an example URL, and it supports fetching transcripts in multiple languages.
  11.  
  12. Requirements:
  13.    - youtube_transcript_api: This script requires the 'youtube_transcript_api' library.
  14.    - Install via pip command:
  15.        
  16.        'pip install youtube_transcript_api'
  17.  
  18. Functions:
  19.    1. extract_video_id(video_url): Extracts the video ID from a YouTube URL.
  20.    2. save_transcript_to_file(transcript_text): Saves the transcript text to a text file.
  21.    3. main(): Provides a user interface for selecting options to input a YouTube URL or use an example URL, fetches the transcript, and offers the option to save the transcript to a file.
  22.  
  23. Usage:
  24.    - Run the script using Python: `python yt_transcriber.py`
  25.    - Select an option:
  26.        1. Input YouTube Video Url: Enter a YouTube video or Shorts URL to fetch the transcript.
  27.        2. Use Example Video Url: Use an example video URL provided in the script to fetch the transcript.
  28.    - If selecting option 1:
  29.        - Enter the YouTube Video or Shorts URL.
  30.        - The script attempts to fetch the transcript in specified languages (English, Spanish, Arabic, Simplified Chinese).
  31.        - The transcript is displayed, and the user is prompted to save it to a file.
  32.    - If selecting option 2:
  33.        - The transcript for the example video URL is fetched and displayed.
  34.        - The user is prompted to save the transcript to a file.
  35.  
  36. Additional Notes:
  37.    - This script supports fetching transcripts in multiple languages, including English, Spanish, Arabic, and Simplified Chinese.
  38.    - The script handles various user inputs and provides appropriate error messages for invalid inputs.
  39.    - Make sure to install the 'youtube_transcript_api' library before running the script.
  40. """
  41.  
  42. from youtube_transcript_api import YouTubeTranscriptApi
  43.  
  44. def extract_video_id(video_url):
  45.     """
  46.    Extracts the video ID from a YouTube URL.
  47.  
  48.    Args:
  49.        video_url (str): The YouTube video URL.
  50.  
  51.    Returns:
  52.        str or None: The extracted video ID if found, otherwise None.
  53.    """
  54.     # Extract video ID from YouTube URL
  55.     if "youtube.com" in video_url:
  56.         if "/shorts/" in video_url:
  57.             video_id = video_url.split("/shorts/")[-1]
  58.             return video_id
  59.         else:
  60.             video_id = video_url.split("v=")[-1]
  61.             return video_id
  62.     elif "youtu.be" in video_url:
  63.         video_id = video_url.split("/")[-1]
  64.         return video_id
  65.     else:
  66.         return None
  67.  
  68. def save_transcript_to_file(transcript_text):
  69.     """
  70.    Saves the transcript text to a text file.
  71.  
  72.    Args:
  73.        transcript_text (str): The transcript text to be saved.
  74.    """
  75.     file_name = input(
  76.         "\nEnter the file name to save the transcript (without extension): "
  77.     )
  78.     file_path = f"{file_name}.txt"
  79.     with open(file_path, "w", encoding="utf-8") as file:
  80.         file.write(transcript_text)
  81.     print(f"\nTranscript saved as {file_path}")
  82.  
  83. def main():
  84.     # Print The Header
  85.     print(
  86.         r"""
  87.      __   __         _____      _                    
  88.      \ \ / /__  _   |_   _|   _| |__   ___          
  89.       \ V / _ \| | | || || | | | '_ \ / _ \        
  90.        | | (_) | |_| || || |_| | |_) |  __/          
  91. _____  |_|\___/ \__,_||_| \__,_|_.__/_\___|          
  92. |_   _| __ __ _ _ __  ___  ___ _ __(_) |__   ___ _ __
  93.  | || '__/ _` | '_ \/ __|/ __| '__| | '_ \ / _ \ '__|
  94.  | || | | (_| | | | \__ \ (__| |  | | |_) |  __/ |  
  95.  |_||_|  \__,_|_| |_|___/\___|_|  |_|_.__/ \___|_|  
  96.        """
  97.     )
  98.  
  99.     # Main Menu
  100.     print("\n\tSelect an option:\n")
  101.     print("\t1: Input YouTube Video or Shorts Url")
  102.     print("\t2: Use Example Video Url")
  103.     option = input("\n\tEnter the option number (1 or 2): ")
  104.  
  105.     if option == "1":
  106.         video_url = input("\n\tEnter the YouTube Video URL: ")
  107.         video_id = extract_video_id(video_url)
  108.  
  109.         if not video_id:
  110.             print("\nInvalid YouTube Video URL. Exiting...")
  111.             return
  112.  
  113.         # Scan CC for the specified languages
  114.         languages_to_try = ["en", "es", "ar", "zh-Hans"]
  115.         transcript = None
  116.  
  117.         # Attempt to fetch transcripts in specified languages
  118.         for lang in languages_to_try:
  119.             transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[lang])
  120.             if transcript:
  121.                 break
  122.  
  123.         if not transcript:
  124.             print("\nNo transcript available for the selected languages. Exiting...")
  125.             return
  126.  
  127.         transcript_text = ""
  128.         for item in transcript:
  129.             transcript_text += f"{item['start']:>6.2f}: {item['text']}\n"
  130.         print(transcript_text)
  131.  
  132.         # Save Menu
  133.         print("\n\tDo you want to save the transcript to a file?\n")
  134.         print("\t1: Yes")
  135.         print("\t2: No")
  136.         save_option = input("\nEnter the option number: ")
  137.  
  138.         # Function to save the transcript to a txt file in the current working directory
  139.         if save_option == "1":
  140.             save_transcript_to_file(transcript_text)
  141.         elif save_option == "2":
  142.             print("\nTranscript not saved!")
  143.         else:
  144.             print("\nInvalid option!")
  145.     elif option == "2":
  146.         video_id = "jNQXAC9IVRw"  # Example video URL: https://www.youtube.com/watch?v=jNQXAC9IVRw
  147.         transcript = YouTubeTranscriptApi.get_transcript(video_id)
  148.         transcript_text = ""
  149.         for item in transcript:
  150.             transcript_text += f"{item['start']:>6.2f}: {item['text']}\n"
  151.         print(transcript_text)
  152.  
  153.         # Save Menu
  154.         print("\n\tDo you want to save the transcript to a file?\n")
  155.         print("\t1: Yes")
  156.         print("\t2: No")
  157.         save_option = input("\n\tEnter the option number: ")
  158.  
  159.         # Function to save the transcript to a txt file in the current working directory
  160.         if save_option == "1":
  161.             save_transcript_to_file(transcript_text)
  162.         elif save_option == "2":
  163.             print("\nTranscript not saved!")
  164.         else:
  165.             print("\nInvalid option!")
  166.     else:
  167.         print("\nInvalid option!")
  168.  
  169. if __name__ == "__main__":
  170.     main()
  171.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement