Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- from datetime import datetime
- import re
- import time
- import pytz
- # I've only translated the text properly, so please don't mind if some sentences are printed in Japanese sometimes.
- # Install the pytz and requests modules before use.
- print("Start a program to automatically retrieve HTTP(s) Proxy from 27 sites.\nCreated by Ririka\n(https://misskey.kindworld.one/@KisaragiRirika)\n")
- url2 = "https://api.proxyscrape.com/proxytable.php"
- response = requests.get(url2)
- data = response.json()
- http_proxies = data.get("http")
- if http_proxies:
- proxies = []
- for proxy, _ in http_proxies.items():
- proxies.append(proxy)
- current_datetime = datetime.now()
- formatted_datetime = current_datetime.strftime('%Y-%m-%d_%H-%M')
- filename = f"httpProxies_{formatted_datetime}.txt"
- with open(filename, "w") as file:
- file.write("\n".join(proxies) + "\n")
- print(f"1.ProxyScrapeからの取得が完了しました。\nProxyを「{filename}」に保存しました。\n")
- else:
- print("Proxy情報が見つかりませんでした。")
- def extract_proxies(url):
- response = requests.get(url)
- proxy_data = response.text
- pattern = re.compile(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+\b')
- proxy_list = re.findall(pattern, proxy_data)
- return proxy_list
- proxy_url = "https://raw.githubusercontent.com/roosterkid/openproxylist/main/HTTPS.txt"
- proxies = extract_proxies(proxy_url)
- def append_proxies_to_file(filename, proxy_list):
- with open(filename, "a") as file:
- file.write("\n".join(proxy_list) + "\n")
- append_proxies_to_file(filename, proxies)
- print(f"2.RoostarKidからの取得が完了しました。\nProxyを「{filename}」に追記しました。\n")
- proxy_url2 = "https://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt"
- http_response = requests.get(proxy_url2)
- http_proxies = http_response.text.strip().split("\n")
- with open(filename, "a") as file:
- file.write("\n".join(http_proxies) + "\n")
- print(f"3.TheSpeedXからの取得が完了しました。\nProxyを「{filename}」に追記しました。\n")
- moscow_timezone = pytz.timezone('Europe/Moscow')
- current_datetime_moscow = datetime.now(moscow_timezone)
- url_date = current_datetime_moscow.strftime('%Y-%m-%d')
- url = f"https://checkerproxy.net/api/archive/{url_date}"
- response = requests.get(url)
- data = response.json()
- proxy_list = []
- for record in data:
- proxy_type = record.get("type")
- addr = record.get("addr")
- if proxy_type in [1, 2, 5] and addr:
- proxy_list.append(addr)
- with open(filename, "a") as file:
- file.write("\n".join(proxy_list) + "\n")
- print(f"4.CheckerProxyからの取得が完了しました。\nProxyを「{filename}」に追記しました。\n")
- http_url = "https://freeproxyupdate.com/files/txt/http.txt"
- https_url = "https://freeproxyupdate.com/files/txt/https-ssl.txt"
- proxy_pattern = re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}:\d+\b')
- response = requests.get(http_url)
- http_proxies = re.findall(proxy_pattern, response.text)
- with open(filename, "a") as file:
- for proxy in http_proxies:
- file.write(proxy + "\n")
- response = requests.get(https_url)
- https_proxies = re.findall(proxy_pattern, response.text)
- with open(filename, "a") as file:
- for proxy in https_proxies:
- file.write(proxy + "\n")
- print(f"6.FreeProxyUpdateからの取得が完了しました。\nProxyを「{filename}」に追記しました。\n")
- url = "https://api.openproxy.space/lists/http"
- response = requests.get(url)
- data = response.json()
- with open(filename, "a") as file:
- for item in data.get("data", []):
- for proxy in item.get("items", []):
- file.write(proxy + "\n")
- print(f"7.OpenProxyからの取得が完了しました。\nProxyを「{filename}」に追記しました。\n\n大量のtxt形式のProxy listからまとめて取得する関数を始動します。")
- def get_and_append_proxies_from_url(url, filename):
- proxies = extract_proxies(url)
- append_proxies_to_file(filename, proxies)
- print(f"{url}\nからの取得が完了しました。\nProxyを「{filename}」に追記しました。\n")
- new_proxy_urls = [
- "https://raw.githubusercontent.com/mmpx12/proxy-list/master/http.txt",
- "https://api.openproxylist.xyz/http.txt",
- "https://raw.githubusercontent.com/sunny9577/proxy-scraper/master/proxies.txt",
- "https://raw.githubusercontent.com/jetkai/proxy-list/main/online-proxies/txt/proxies-http.txt",
- "https://raw.githubusercontent.com/shiftytr/proxy-list/master/proxy.txt",
- "https://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt",
- "https://raw.githubusercontent.com/monosans/proxy-list/main/proxies_anonymous/http.txt",
- "https://raw.githubusercontent.com/monosans/proxy-list/main/proxies/http.txt",
- "https://proxy-spider.com/api/proxies.example.txt",
- "https://multiproxy.org/txt_all/proxy.txt",
- "https://proxyspace.pro/http.txt",
- "https://proxyspace.pro/https.txt",
- "https://alexa.lr2b.com/proxylist.txt",
- "https://api.openproxylist.xyz/http.txt",
- "https://rootjazz.com/proxies/proxies.txt",
- "https://sheesh.rip/http.txt",
- "https://raw.githubusercontent.com/proxy4parsing/proxy-list/main/http.txt",
- "https://raw.githubusercontent.com/opsxcq/proxy-list/master/list.txt",
- "https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt",
- "https://raw.githubusercontent.com/B4RC0DE-TM/proxy-list/main/HTTP.txt"
- ]
- for idx, new_url in enumerate(new_proxy_urls, start=8):
- print(f"{idx}.{new_url}\nからの取得を開始します。")
- get_and_append_proxies_from_url(new_url, filename)
- def extract_proxies2(url):
- response = requests.get(url)
- proxy_data = response.text
- pattern = re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}:\d+\b')
- proxy_list = re.findall(pattern, proxy_data)
- return proxy_list
- proxy_url = "https://spys.me/proxy.txt"
- proxies = extract_proxies2(proxy_url)
- append_proxies_to_file(filename, proxies)
- print(f"28.Spysmeからの取得が完了しました。\nProxyを「{filename}」に追記しました。\n")
- def count_lines_in_file(filename):
- with open(filename, "r") as file:
- line_count = sum(1 for line in file)
- return line_count
- def remove_duplicates_and_empty_lines(filename):
- with open(filename, "r") as file:
- lines = file.readlines()
- unique_lines = set()
- clean_lines = []
- for line in lines:
- line = line.strip()
- if line and line not in unique_lines:
- unique_lines.add(line)
- clean_lines.append(line)
- with open(filename, "w") as file:
- file.write("\n".join(clean_lines))
- remove_duplicates_and_empty_lines(filename)
- print("Duplicate and blank lines have been removed.")
- line_count = count_lines_in_file(filename)
- print(f"All Proxy information has been retrieved. Number of HTTP(s) Proxy(s) obtained: {line_count}\nProxy list is saved to「{filename}」. 3 seconds later it exits.")
- time.sleep(3)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement