Advertisement
Friendsincode

Untitled

Nov 16th, 2023
709
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.29 KB | None | 0 0
  1.  
  2. import re
  3. import sys
  4.  
  5. def extract_mp3_links(file_path, output_file='free-output.txt'):
  6.     try:
  7.         with open(file_path, 'r', encoding='utf-8') as file:
  8.             html_content = file.read()
  9.     except FileNotFoundError:
  10.         print(f"Error: File not found at {file_path}")
  11.         return
  12.     except Exception as e:
  13.         print(f"Error: {e}")
  14.         return
  15.  
  16.     # Find all .mp3 and then capture from the preceding http to .mp3
  17.     pattern = r'http[s]?://.*?\.mp3'
  18.     all_links = re.findall(pattern, html_content)
  19.     mp3_links = []
  20.  
  21.     for link in all_links:
  22.         # Check if .mp3 is in the link and find the last http before .mp3
  23.         if '.mp3' in link:
  24.             # Split the string by http and take the last part
  25.             parts = link.rsplit('http', 1)
  26.             if len(parts) == 2:
  27.                 mp3_link = 'http' + parts[1]
  28.                 mp3_links.append(mp3_link)
  29.  
  30.     with open(output_file, 'w', encoding='utf-8') as output:
  31.         for link in mp3_links:
  32.             output.write(link + '\n')
  33.  
  34.     print(f"Extraction complete. {len(mp3_links)} links written to {output_file}")
  35.  
  36. if __name__ == "__main__":
  37.     if len(sys.argv) != 2:
  38.         print("Usage: python script_name.py <path_to_html_file>")
  39.     else:
  40.         extract_mp3_links(sys.argv[1])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement