Advertisement
r1rk

FRESH HTTP(s)Proxy collection program by Python

Sep 6th, 2023
310
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.05 KB | Source Code | 0 0
  1. import requests
  2. from datetime import datetime
  3. import re
  4. import time
  5. import pytz
  6.  
  7. # I've only translated the text properly, so please don't mind if some sentences are printed in Japanese sometimes.
  8. # Install the pytz and requests modules before use.
  9.  
  10. print("Start a program to automatically retrieve HTTP(s) Proxy from 27 sites.\nCreated by Ririka\n(https://misskey.kindworld.one/@KisaragiRirika)\n")
  11.  
  12. url2 = "https://api.proxyscrape.com/proxytable.php"
  13.  
  14. response = requests.get(url2)
  15. data = response.json()
  16.  
  17. http_proxies = data.get("http")
  18.  
  19. if http_proxies:
  20.     proxies = []
  21.     for proxy, _ in http_proxies.items():
  22.         proxies.append(proxy)
  23.  
  24.     current_datetime = datetime.now()
  25.     formatted_datetime = current_datetime.strftime('%Y-%m-%d_%H-%M')
  26.     filename = f"httpProxies_{formatted_datetime}.txt"
  27.  
  28.     with open(filename, "w") as file:
  29.         file.write("\n".join(proxies) + "\n")
  30.  
  31.     print(f"1.ProxyScrapeからの取得が完了しました。\nProxyを「{filename}」に保存しました。\n")
  32. else:
  33.     print("Proxy情報が見つかりませんでした。")
  34.  
  35. def extract_proxies(url):
  36.     response = requests.get(url)
  37.     proxy_data = response.text
  38.  
  39.     pattern = re.compile(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+\b')
  40.  
  41.     proxy_list = re.findall(pattern, proxy_data)
  42.     return proxy_list
  43.  
  44. proxy_url = "https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS.txt"
  45.  
  46. proxies = extract_proxies(proxy_url)
  47.  
  48. def append_proxies_to_file(filename, proxy_list):
  49.     with open(filename, "a") as file:
  50.         file.write("\n".join(proxy_list) + "\n")
  51.  
  52. append_proxies_to_file(filename, proxies)
  53.  
  54. print(f"2.RoostarKidからの取得が完了しました。\nProxyを「{filename}」に追記しました。\n")
  55.  
  56. proxy_url2 = "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt"
  57.  
  58. http_response = requests.get(proxy_url2)
  59. http_proxies = http_response.text.strip().split("\n")
  60.  
  61. with open(filename, "a") as file:
  62.    file.write("\n".join(http_proxies) + "\n")
  63.  
  64. print(f"3.TheSpeedXからの取得が完了しました。\nProxyを「{filename}」に追記しました。\n")
  65.  
  66. moscow_timezone = pytz.timezone('Europe/Moscow')
  67. current_datetime_moscow = datetime.now(moscow_timezone)
  68. url_date = current_datetime_moscow.strftime('%Y-%m-%d')
  69.  
  70. url = f"https://checkerproxy.net/api/archive/{url_date}"
  71. response = requests.get(url)
  72. data = response.json()
  73.  
  74. proxy_list = []
  75.  
  76. for record in data:
  77.     proxy_type = record.get("type")
  78.     addr = record.get("addr")
  79.     if proxy_type in [1, 2, 5] and addr:
  80.         proxy_list.append(addr)
  81.  
  82. with open(filename, "a") as file:
  83.     file.write("\n".join(proxy_list) + "\n")
  84.  
  85. print(f"4.CheckerProxyからの取得が完了しました。\nProxyを「{filename}」に追記しました。\n")
  86.  
  87. http_url = "https://freeproxyupdate.com/files/txt/http.txt"
  88. https_url = "https://freeproxyupdate.com/files/txt/https-ssl.txt"
  89.  
  90. proxy_pattern = re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}:\d+\b')
  91.  
  92. response = requests.get(http_url)
  93. http_proxies = re.findall(proxy_pattern, response.text)
  94.  
  95. with open(filename, "a") as file:
  96.     for proxy in http_proxies:
  97.         file.write(proxy + "\n")
  98.  
  99. response = requests.get(https_url)
  100. https_proxies = re.findall(proxy_pattern, response.text)
  101.  
  102. with open(filename, "a") as file:
  103.     for proxy in https_proxies:
  104.         file.write(proxy + "\n")
  105.  
  106. print(f"6.FreeProxyUpdateからの取得が完了しました。\nProxyを「{filename}」に追記しました。\n")
  107.  
  108. url = "https://api.openproxy.space/lists/http"
  109. response = requests.get(url)
  110. data = response.json()
  111.  
  112. with open(filename, "a") as file:
  113.     for item in data.get("data", []):
  114.         for proxy in item.get("items", []):
  115.             file.write(proxy + "\n")
  116.  
  117. print(f"7.OpenProxyからの取得が完了しました。\nProxyを「{filename}」に追記しました。\n\n大量のtxt形式のProxy listからまとめて取得する関数を始動します。")
  118.  
  119. def get_and_append_proxies_from_url(url, filename):
  120.     proxies = extract_proxies(url)
  121.     append_proxies_to_file(filename, proxies)
  122.     print(f"{url}\nからの取得が完了しました。\nProxyを「{filename}」に追記しました。\n")
  123.  
  124. new_proxy_urls = [
  125.     "https://raw.githubusercontent.com/mmpx12/proxy-list/master/http.txt",
  126.     "https://api.openproxylist.xyz/http.txt",
  127.     "https://raw.githubusercontent.com/sunny9577/proxy-scraper/master/proxies.txt",
  128.     "https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-http.txt",
  129.     "https://raw.githubusercontent.com/shiftytr/proxy-list/master/proxy.txt",
  130.     "https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt",
  131.     "https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/http.txt",
  132.     "https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt",
  133.     "https://proxy-spider.com/api/proxies.example.txt",
  134.     "https://multiproxy.org/txt_all/proxy.txt",
  135.     "https://proxyspace.pro/http.txt",
  136.     "https://proxyspace.pro/https.txt",
  137.     "https://alexa.lr2b.com/proxylist.txt",
  138.     "https://api.openproxylist.xyz/http.txt",
  139.     "https://rootjazz.com/proxies/proxies.txt",
  140.     "https://sheesh.rip/http.txt",
  141.     "https://raw.githubusercontent.com/proxy4parsing/proxy-list/main/http.txt",
  142.     "https://raw.githubusercontent.com/opsxcq/proxy-list/master/list.txt",
  143.     "https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt",
  144.     "https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/HTTP.txt"
  145. ]
  146.  
  147. for idx, new_url in enumerate(new_proxy_urls, start=8):
  148.     print(f"{idx}.{new_url}\nからの取得を開始します。")
  149.     get_and_append_proxies_from_url(new_url, filename)
  150.  
  151. def extract_proxies2(url):
  152.     response = requests.get(url)
  153.     proxy_data = response.text
  154.  
  155.     pattern = re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}:\d+\b')
  156.     proxy_list = re.findall(pattern, proxy_data)
  157.  
  158.     return proxy_list
  159.  
  160. proxy_url = "https://spys.me/proxy.txt"
  161.  
  162. proxies = extract_proxies2(proxy_url)
  163.  
  164. append_proxies_to_file(filename, proxies)
  165.  
  166. print(f"28.Spysmeからの取得が完了しました。\nProxyを「{filename}」に追記しました。\n")
  167.  
  168. def count_lines_in_file(filename):
  169.     with open(filename, "r") as file:
  170.         line_count = sum(1 for line in file)
  171.     return line_count
  172.  
  173. def remove_duplicates_and_empty_lines(filename):
  174.     with open(filename, "r") as file:
  175.         lines = file.readlines()
  176.  
  177.     unique_lines = set()
  178.     clean_lines = []
  179.  
  180.     for line in lines:
  181.         line = line.strip()
  182.         if line and line not in unique_lines:
  183.             unique_lines.add(line)
  184.             clean_lines.append(line)
  185.  
  186.     with open(filename, "w") as file:
  187.         file.write("\n".join(clean_lines))
  188.  
  189. remove_duplicates_and_empty_lines(filename)
  190.  
  191. print("Duplicate and blank lines have been removed.")
  192.  
  193. line_count = count_lines_in_file(filename)
  194.  
  195. print(f"All Proxy information has been retrieved. Number of HTTP(s) Proxy(s) obtained: {line_count}\nProxy list is saved to「{filename}」. 3 seconds later it exits.")
  196. time.sleep(3)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement