Advertisement
ShaggyZE1

info.py

Oct 21st, 2024 (edited)
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.95 KB | None | 0 0
  1. import requests
  2. import re
  3. import time
  4. import logging
  5. import sys
  6.  
  7. # Configure logging
  8. logging.basicConfig(filename='log.txt', level=logging.INFO, format='%(asctime)s %(message)s')
  9.  
  10. def convert_to_readable_time(seconds):
  11.     """Converts seconds to a string with days, hours, minutes, and seconds."""
  12.     days, remaining_seconds = divmod(seconds, 86400)  # 86400 seconds in a day
  13.     hours, remaining_seconds = divmod(remaining_seconds, 3600)
  14.     minutes, remaining_seconds = divmod(remaining_seconds, 60)
  15.     return f"{days}d {hours}h {minutes}m {remaining_seconds:.2f}s"
  16.  
  17. # Define default values
  18. default_type = "anime"
  19. default_start = "1"
  20. default_end = "61200"
  21. default_delay = "5"
  22.  
  23. # Get CLI arguments, or use default values if they are null
  24. type = sys.argv[1] if len(sys.argv) > 1 else default_type
  25. start = int(sys.argv[2]) if len(sys.argv) > 2 else int(default_start)
  26.  
  27. def fetch_latest_id(url):
  28.     response = requests.get(url)
  29.     response.raise_for_status()  # Raise an exception for error responses
  30.  
  31.     # Extract the anime/manga ID from the HTML content using regular expressions
  32.     if type == "anime":
  33.         match = re.search(r'/anime/(\d+)', response.text)
  34.     else:
  35.         match = re.search(r'/manga/(\d+)', response.text)
  36.  
  37.     if match:
  38.         return match.group(1)
  39.     else:
  40.         raise ValueError("Unable to find anime/manga ID in the response")
  41.  
  42. # anime/manga search url:
  43. url1 = "https://myanimelist.net/anime.php?o=9&c%5B0%5D=a&c%5B1%5D=d&cv=2&w=1"
  44. url2 = "https://myanimelist.net/manga.php?o=9&c%5B0%5D=a&c%5B1%5D=d&cv=2&w=1"
  45.  
  46. if type == "anime":
  47.     default_end = fetch_latest_id(url1)
  48. else:
  49.     default_end = fetch_latest_id(url2)
  50.  
  51. # Get CLI arguments, or use default values if they are null
  52. end = int(sys.argv[3]) if len(sys.argv) > 3 else int(default_end)
  53. delay = int(sys.argv[4]) if len(sys.argv) > 4 else int(default_delay)
  54.  
  55. # Calculate total URLs
  56. total_urls = end - start + 1
  57.  
  58. # Estimate total execution time (assuming constant execution time per request)
  59. estimated_total_time = total_urls * delay
  60.  
  61. # End time measurement
  62. start_time = time.time()
  63.  
  64. print(f"{type} {start} {end} {delay}")
  65. # Print estimated time prominently
  66. print("-" * 50)  # Optional divider for clarity
  67. print(f"Estimated total execution time: {convert_to_readable_time(estimated_total_time)}")
  68. print("-" * 50)  # Optional divider for clarity
  69.  
  70. # Counter for consecutive 404s
  71. consecutive_404s = 0
  72.  
  73. # Loop through ids and check URLs
  74. for i in range(start, end + 1):
  75.     url = f"https://shaggyze.website/msa/info?t={type}&id={i}"
  76.  
  77.     # Estimate remaining time within the loop for potentially more accurate estimate
  78.     current_urls = end - i + 1
  79.     estimated_remaining_time = (current_urls - 1) * delay  # Subtract 1 for the completed loop
  80.     print(f"Estimated remaining time: {convert_to_readable_time(estimated_remaining_time)}")
  81.     logging.info(f"Estimated remaining time: {convert_to_readable_time(estimated_remaining_time)}")
  82.  
  83.     try:
  84.       response = requests.get(url)
  85.       response.raise_for_status()  # Raise an exception for non-200 status codes
  86.       consecutive_404s = 0  # Reset counter if successful request
  87.       logging.info(f"Successfully accessed URL: {url}")
  88.       if response.status_code == 200:
  89.         print(f"Success! URL {url} returned status code {response.status_code}")
  90.         time.sleep(delay)  # Pause between requests
  91.     except requests.exceptions.HTTPError as err:
  92.       if err.response.status_code == 404:
  93.         consecutive_404s += 1
  94.         print(f"Warning! URL {url} returned status code {response.status_code}")
  95.         logging.info(f"Received {response.status_code} for URL: {url} {consecutive_404s} times in a row.")
  96.         if consecutive_404s >= 1000:
  97.           print(f"Received {response.status_code} {consecutive_404s} times in a row. Exiting loop.")
  98.           logging.info(f"Received {response.status_code} {consecutive_404s} times in a row. Exiting loop.")
  99.           break
  100.       elif err.response.status_code == 405:
  101.         print(f"Error! URL {url} returned status code 405 (Method Not Allowed)")
  102.         logging.info(f"Received 405 for URL: {url}")
  103.         break
  104.       else:
  105.         # Handle other HTTP errors (optional)
  106.         print(f"Warning! URL {url} returned status code {response.status_code}")
  107.     except requests.exceptions.RequestException as err:
  108.       # Handle other request exceptions (optional)
  109.       print(f"An error occurred during the request: {err}")
  110.       logging.info(f"An error occurred during the request: {err}")
  111.       time.sleep(delay / 2)  # Pause between requests
  112.       continue # Can be adjusted based on desired behavior
  113.  
  114. # End time measurement
  115. end_time = time.time()
  116.  
  117. # Calculate elapsed time
  118. elapsed_time = end_time - start_time
  119.  
  120. # Print results
  121. print("-" * 50)  # Optional divider for clarity
  122. print(f"Total URLs checked: {total_urls}")
  123. print(f"Elapsed time: {convert_to_readable_time(elapsed_time)}")
  124. logging.info(f"Elapsed time: {convert_to_readable_time(elapsed_time)}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement