info.py

import requests
import re
import time
import logging
import sys

# Configure logging
logging.basicConfig(filename='log.txt', level=logging.INFO, format='%(asctime)s %(message)s')

def convert_to_readable_time(seconds):
    """Converts seconds to a string with days, hours, minutes, and seconds."""
    days, remaining_seconds = divmod(seconds, 86400)  # 86400 seconds in a day
    hours, remaining_seconds = divmod(remaining_seconds, 3600)
    minutes, remaining_seconds = divmod(remaining_seconds, 60)
    return f"{days}d {hours}h {minutes}m {remaining_seconds:.2f}s"

# Define default values
default_type = "anime"
default_start = "1"
default_end = "61200"
default_delay = "5"

# Get CLI arguments, or use default values if they are null
type = sys.argv[1] if len(sys.argv) > 1 else default_type
start = int(sys.argv[2]) if len(sys.argv) > 2 else int(default_start)

def fetch_latest_id(url):
    response = requests.get(url)
    response.raise_for_status()  # Raise an exception for error responses

    # Extract the anime/manga ID from the HTML content using regular expressions
    if type == "anime":
        match = re.search(r'/anime/(\d+)', response.text)
    else:
        match = re.search(r'/manga/(\d+)', response.text)

    if match:
        return match.group(1)
    else:
        raise ValueError("Unable to find anime/manga ID in the response")

# anime/manga search url:
url1 = "https://myanimelist.net/anime.php?o=9&c%5B0%5D=a&c%5B1%5D=d&cv=2&w=1"
url2 = "https://myanimelist.net/manga.php?o=9&c%5B0%5D=a&c%5B1%5D=d&cv=2&w=1"

if type == "anime":
    default_end = fetch_latest_id(url1)
else:
    default_end = fetch_latest_id(url2)

# Get CLI arguments, or use default values if they are null
end = int(sys.argv[3]) if len(sys.argv) > 3 else int(default_end)
delay = int(sys.argv[4]) if len(sys.argv) > 4 else int(default_delay)

# Calculate total URLs
total_urls = end - start + 1

# Estimate total execution time (assuming constant execution time per request)
estimated_total_time = total_urls * delay

# End time measurement
start_time = time.time()

print(f"{type} {start} {end} {delay}")
# Print estimated time prominently
print("-" * 50)  # Optional divider for clarity
print(f"Estimated total execution time: {convert_to_readable_time(estimated_total_time)}")
print("-" * 50)  # Optional divider for clarity

# Counter for consecutive 404s
consecutive_404s = 0

# Loop through ids and check URLs
for i in range(start, end + 1):
    url = f"https://shaggyze.website/msa/info?t={type}&id={i}"

    # Estimate remaining time within the loop for potentially more accurate estimate
    current_urls = end - i + 1
    estimated_remaining_time = (current_urls - 1) * delay  # Subtract 1 for the completed loop
    print(f"Estimated remaining time: {convert_to_readable_time(estimated_remaining_time)}")
    logging.info(f"Estimated remaining time: {convert_to_readable_time(estimated_remaining_time)}")

    try:
      response = requests.get(url)
      response.raise_for_status()  # Raise an exception for non-200 status codes
      consecutive_404s = 0  # Reset counter if successful request
      logging.info(f"Successfully accessed URL: {url}")
      if response.status_code == 200:
        print(f"Success! URL {url} returned status code {response.status_code}")
        time.sleep(delay)  # Pause between requests
    except requests.exceptions.HTTPError as err:
      if err.response.status_code == 404:
        consecutive_404s += 1
        print(f"Warning! URL {url} returned status code {response.status_code}")
        logging.info(f"Received {response.status_code} for URL: {url} {consecutive_404s} times in a row.")
        if consecutive_404s >= 1000:
          print(f"Received {response.status_code} {consecutive_404s} times in a row. Exiting loop.")
          logging.info(f"Received {response.status_code} {consecutive_404s} times in a row. Exiting loop.")
          break
      elif err.response.status_code == 405:
        print(f"Error! URL {url} returned status code 405 (Method Not Allowed)")
        logging.info(f"Received 405 for URL: {url}")
        break
      else:
        # Handle other HTTP errors (optional)
        print(f"Warning! URL {url} returned status code {response.status_code}")
    except requests.exceptions.RequestException as err:
      # Handle other request exceptions (optional)
      print(f"An error occurred during the request: {err}")
      logging.info(f"An error occurred during the request: {err}")
      time.sleep(delay / 2)  # Pause between requests
      continue # Can be adjusted based on desired behavior

# End time measurement
end_time = time.time()

# Calculate elapsed time
elapsed_time = end_time - start_time

# Print results
print("-" * 50)  # Optional divider for clarity
print(f"Total URLs checked: {total_urls}")
print(f"Elapsed time: {convert_to_readable_time(elapsed_time)}")
logging.info(f"Elapsed time: {convert_to_readable_time(elapsed_time)}")