Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- import sys
- def extract_mp3_links(file_path, output_file='free-output.txt'):
- try:
- with open(file_path, 'r', encoding='utf-8') as file:
- html_content = file.read()
- except FileNotFoundError:
- print(f"Error: File not found at {file_path}")
- return
- except Exception as e:
- print(f"Error: {e}")
- return
- # Find all .mp3 and then capture from the preceding http to .mp3
- pattern = r'http[s]?://.*?\.mp3'
- all_links = re.findall(pattern, html_content)
- mp3_links = []
- for link in all_links:
- # Check if .mp3 is in the link and find the last http before .mp3
- if '.mp3' in link:
- # Split the string by http and take the last part
- parts = link.rsplit('http', 1)
- if len(parts) == 2:
- mp3_link = 'http' + parts[1]
- mp3_links.append(mp3_link)
- with open(output_file, 'w', encoding='utf-8') as output:
- for link in mp3_links:
- output.write(link + '\n')
- print(f"Extraction complete. {len(mp3_links)} links written to {output_file}")
- if __name__ == "__main__":
- if len(sys.argv) != 2:
- print("Usage: python script_name.py <path_to_html_file>")
- else:
- extract_mp3_links(sys.argv[1])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement