2214b2e5-fc19-4d11-96aa-54c58a864f18

"""This script allows you to save URLs from a specified text file to Archive.ph."""

import logging
import re
import time
import webbrowser
import subprocess
from os import system
from colorama import Fore, Style, init
from rich.traceback import install

# ----------------------- Initialization -----------------------
system("title " + "Archive.ph Link Saver")
init(autoreset=True)
install()

# ----------------------- Config -----------------------
DELAY_BETWEEN_URLS = 5  # Delay between URL submissions
URL_INPUT_PATH = r"C:\Scripts\Archive.ph Link Saver\URLs.txt"
MODIFIED_URLS_PATH = r"C:\Scripts\Archive.ph Link Saver\Logs\Newly Modified URLs.txt"
LOG_FILE_PATH = r"C:\Scripts\Archive.ph Link Saver\Logs\Archive.ph.log"

# ----------------------- Logging Setup -----------------------
logging.basicConfig(filename=LOG_FILE_PATH,
                    level=logging.DEBUG,
                    format='%(asctime)s - %(levelname)s - %(message)s')

logging.info("Script started")


# ----------------------- Regex Pattern -----------------------
general_url_pattern = re.compile(
    r'^(https?://)?'  # optional http or https
    r'(www\.)?'       # optional www.
    r'([a-zA-Z0-9]+(-[a-zA-Z0-9]+)*\.)+'  # domain name
    r'[a-zA-Z]{2,}'   # TLD
    r'(/[a-zA-Z0-9-._~%!$&\'()*+,;=]*)*'  # optional path
    r'(\?[a-zA-Z0-9-._~%!$&\'()*+,;=]*)?'  # optional query
    r'(#.*)?$'  # optional fragment
)


# ----------------------- Utility -----------------------
def log_and_print(message, level="info", color=None):
    getattr(logging, level)(message)
    print((color or "") + message)


# ----------------------- Main Function -----------------------
def main():
    invalid_urls = False
    archive_base = "https://archive.ph/?run=1&url="

    try:
        with open(URL_INPUT_PATH, "r") as input_file, open(MODIFIED_URLS_PATH, "w") as output_file:
            count = 1
            for line in input_file:
                url = line.strip()
                if not url:
                    log_and_print("Skipped empty line", "info")
                    continue

                if general_url_pattern.match(url):
                    new_url = f"{archive_base}{url}"
                    output_file.write(new_url + "\n")
                    log_and_print(f"Saved URL #{count}: {new_url}", "info")
                    webbrowser.open(new_url)
                    log_and_print(f"> Saving URL #{count}", color=Fore.YELLOW)
                    time.sleep(DELAY_BETWEEN_URLS)
                    count += 1
                else:
                    log_and_print(f"> Invalid URL: {url}", level="warning", color=Fore.RED + Fore.LIGHTCYAN_EX)
                    invalid_urls = True

    except FileNotFoundError as e:
        log_and_print(f"Error: {e}", level="error", color=Fore.RED)
        return

    if invalid_urls:
        log_and_print("> Some URLs were invalid and not saved.", level="warning", color=Fore.YELLOW)
        input(f"> Press {Fore.YELLOW}Enter{Style.RESET_ALL} to exit and open the {Fore.YELLOW}URLs{Style.RESET_ALL} text file...")
        subprocess.Popen(["notepad.exe", URL_INPUT_PATH])
    else:
        log_and_print("> All URLs are being saved.", color=Fore.GREEN)

    time.sleep(1)
    clean_trailing_newline(MODIFIED_URLS_PATH)
    logging.info("Script completed")


# ----------------------- Trailing Newline Cleanup -----------------------
def clean_trailing_newline(path):
    try:
        with open(path, "r+") as file:
            data = file.read().rstrip('\n')
            file.seek(0)
            file.write(data)
            file.truncate()
        logging.info("Removed empty line at the end of the output file")
    except Exception as e:
        log_and_print(f"Error cleaning file: {e}", level="error", color=Fore.RED)


# ----------------------- Entrypoint -----------------------
if __name__ == "__main__":
    main()