Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import re
- import time
- import logging
- import sys
- # Configure logging
- logging.basicConfig(filename='log.txt', level=logging.INFO, format='%(asctime)s %(message)s')
- def convert_to_readable_time(seconds):
- """Converts seconds to a string with days, hours, minutes, and seconds."""
- days, remaining_seconds = divmod(seconds, 86400) # 86400 seconds in a day
- hours, remaining_seconds = divmod(remaining_seconds, 3600)
- minutes, remaining_seconds = divmod(remaining_seconds, 60)
- return f"{days}d {hours}h {minutes}m {remaining_seconds:.2f}s"
- # Define default values
- default_type = "anime"
- default_start = "1"
- default_end = "61200"
- default_delay = "5"
- # Get CLI arguments, or use default values if they are null
- type = sys.argv[1] if len(sys.argv) > 1 else default_type
- start = int(sys.argv[2]) if len(sys.argv) > 2 else int(default_start)
- def fetch_latest_id(url):
- response = requests.get(url)
- response.raise_for_status() # Raise an exception for error responses
- # Extract the anime/manga ID from the HTML content using regular expressions
- if type == "anime":
- match = re.search(r'/anime/(\d+)', response.text)
- else:
- match = re.search(r'/manga/(\d+)', response.text)
- if match:
- return match.group(1)
- else:
- raise ValueError("Unable to find anime/manga ID in the response")
- # anime/manga search url:
- url1 = "https://myanimelist.net/anime.php?o=9&c%5B0%5D=a&c%5B1%5D=d&cv=2&w=1"
- url2 = "https://myanimelist.net/manga.php?o=9&c%5B0%5D=a&c%5B1%5D=d&cv=2&w=1"
- if type == "anime":
- default_end = fetch_latest_id(url1)
- else:
- default_end = fetch_latest_id(url2)
- # Get CLI arguments, or use default values if they are null
- end = int(sys.argv[3]) if len(sys.argv) > 3 else int(default_end)
- delay = int(sys.argv[4]) if len(sys.argv) > 4 else int(default_delay)
- # Calculate total URLs
- total_urls = end - start + 1
- # Estimate total execution time (assuming constant execution time per request)
- estimated_total_time = total_urls * delay
- # End time measurement
- start_time = time.time()
- print(f"{type} {start} {end} {delay}")
- # Print estimated time prominently
- print("-" * 50) # Optional divider for clarity
- print(f"Estimated total execution time: {convert_to_readable_time(estimated_total_time)}")
- print("-" * 50) # Optional divider for clarity
- # Counter for consecutive 404s
- consecutive_404s = 0
- # Loop through ids and check URLs
- for i in range(start, end + 1):
- url = f"https://shaggyze.website/msa/info?t={type}&id={i}"
- # Estimate remaining time within the loop for potentially more accurate estimate
- current_urls = end - i + 1
- estimated_remaining_time = (current_urls - 1) * delay # Subtract 1 for the completed loop
- print(f"Estimated remaining time: {convert_to_readable_time(estimated_remaining_time)}")
- logging.info(f"Estimated remaining time: {convert_to_readable_time(estimated_remaining_time)}")
- try:
- response = requests.get(url)
- response.raise_for_status() # Raise an exception for non-200 status codes
- consecutive_404s = 0 # Reset counter if successful request
- logging.info(f"Successfully accessed URL: {url}")
- if response.status_code == 200:
- print(f"Success! URL {url} returned status code {response.status_code}")
- time.sleep(delay) # Pause between requests
- except requests.exceptions.HTTPError as err:
- if err.response.status_code == 404:
- consecutive_404s += 1
- print(f"Warning! URL {url} returned status code {response.status_code}")
- logging.info(f"Received {response.status_code} for URL: {url} {consecutive_404s} times in a row.")
- if consecutive_404s >= 1000:
- print(f"Received {response.status_code} {consecutive_404s} times in a row. Exiting loop.")
- logging.info(f"Received {response.status_code} {consecutive_404s} times in a row. Exiting loop.")
- break
- elif err.response.status_code == 405:
- print(f"Error! URL {url} returned status code 405 (Method Not Allowed)")
- logging.info(f"Received 405 for URL: {url}")
- break
- else:
- # Handle other HTTP errors (optional)
- print(f"Warning! URL {url} returned status code {response.status_code}")
- except requests.exceptions.RequestException as err:
- # Handle other request exceptions (optional)
- print(f"An error occurred during the request: {err}")
- logging.info(f"An error occurred during the request: {err}")
- time.sleep(delay / 2) # Pause between requests
- continue # Can be adjusted based on desired behavior
- # End time measurement
- end_time = time.time()
- # Calculate elapsed time
- elapsed_time = end_time - start_time
- # Print results
- print("-" * 50) # Optional divider for clarity
- print(f"Total URLs checked: {total_urls}")
- print(f"Elapsed time: {convert_to_readable_time(elapsed_time)}")
- logging.info(f"Elapsed time: {convert_to_readable_time(elapsed_time)}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement