Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python2
- from gevent import monkey
- monkey.patch_all()
- import requests
- import ast
- import gevent
- import sys, re, time, os, argparse
- def parse_args():
- parser = argparse.ArgumentParser()
- parser.add_argument('-s', '--show', help='Show this number of results. Example: -s 5 will show the 5 fastest proxies then stop')
- return parser.parse_args()
- class find_http_proxy():
- ''' Will only gather L1 (elite anonymity) proxies
- which should not give out your IP or advertise
- that you are using a proxy at all '''
- def __init__(self, args):
- self.checked_proxies = []
- self.proxy_list = []
- self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36'}
- self.show = args.show
- self.proxy_counter = 0
- self.errors = []
- def run(self):
- ''' Gets raw high anonymity (L1) proxy data then calls make_proxy_list()
- Currently parses data from gatherproxy.com and letushide.com '''
- letushide_list = self.letushide_req()
- gatherproxy_list = self.gatherproxy_req()
- self.proxy_list.append(letushide_list)
- self.proxy_list.append(gatherproxy_list)
- # Flatten list of lists (1 master list containing 1 list of ips per proxy website)
- self.proxy_list = [ips for proxy_site in self.proxy_list for ips in proxy_site]
- print '[*] %d high anonymity proxies found' % len(self.proxy_list)
- print '[*] Testing proxy speeds ...'
- print ''
- print ' Proxy | Domain - Load Time/Errors'
- self.proxy_checker()
- def letushide_req(self):
- ''' Make the request to the proxy site and create a master list from that site '''
- letushide_ips = []
- for i in xrange(1,20): # can search maximum of 20 pages
- try:
- url = 'http://letushide.com/filter/http,hap,all/%s/list_of_free_HTTP_High_Anonymity_proxy_servers' % str(i)
- r = requests.get(url, headers=self.headers)
- html = r.text
- ips = self.parse_letushide(html)
- # Check html for a link to the next page
- if '/filter/http,hap,all/%s/list_of_free_HTTP_High_Anonymity_proxy_servers' % str(i+1) in html:
- pass
- else:
- letushide_ips.append(ips)
- break
- letushide_ips.append(ips)
- except:
- print '[!] Failed get reply from %s' % url
- break
- # Flatten list of lists (1 list containing 1 list of ips for each page)
- letushide_list = [item for sublist in letushide_ips for item in sublist]
- return letushide_list
- def parse_letushide(self, html):
- ''' Parse out list of IP:port strings from the html '''
- # \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} - matches IP addresses
- # </a></td><td> - is in between the IP and the port
- # .*?< - match all text (.) for as many characters as possible (*) but don't be greedy (?) and stop at the next greater than (<)
- raw_ips = re.findall('\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}</a></td><td>.*?<', html)
- ips = []
- for ip in raw_ips:
- ip = ip.replace('</a></td><td>', ':')
- ip = ip.strip('<')
- ips.append(ip)
- return ips
- def gatherproxy_req(self):
- r = requests.get('http://gatherproxy.com/proxylist/anonymity/?t=Elite', headers = self.headers)
- lines = r.text.splitlines()
- gatherproxy_list = self.parse_gp(lines)
- return gatherproxy_list
- def parse_gp(self, lines):
- ''' Parse the raw scraped data '''
- gatherproxy_list = []
- for l in lines:
- if 'proxy_ip' in l.lower():
- l = l.replace('gp.insertPrx(', '')
- l = l.replace(');', '')
- l = l.replace('null', 'None')
- l = l.strip()
- l = ast.literal_eval(l)
- proxy = '%s:%s' % (l["PROXY_IP"], l["PROXY_PORT"])
- gatherproxy_list.append(proxy)
- #ctry = l["PROXY_COUNTRY"]
- return gatherproxy_list
- def proxy_checker(self):
- ''' Concurrency stuff here '''
- jobs = [gevent.spawn(self.proxy_checker_req, proxy) for proxy in self.proxy_list]
- gevent.joinall(jobs)
- def proxy_checker_req(self, proxy):
- ''' See how long each proxy takes to open https://www.yahoo.com '''
- urls = ['http://www.ipchicken.com', 'http://whatsmyip.net/', 'https://www.astrill.com/what-is-my-ip-address.php']
- results = []
- for url in urls:
- try:
- check = requests.get(url,
- headers = self.headers,
- proxies = {'http':'http://'+proxy,
- 'https':'http://'+proxy},
- timeout = 15)
- time = str(check.elapsed)
- html = check.text
- proxyip = str(proxy.split(':', 1)[0])
- proxy_split = proxyip.split('.')
- first_3_octets = '.'.join(proxy_split[:3])+'.'
- if 'Access denied' in html:
- time = 'Access denied'
- elif first_3_octets not in html:
- time = 'Page loaded but proxy failed'
- if 'captcha' in html.lower():
- time = time+' - Captcha detected'
- url = self.url_shortener(url)
- results.append((time, proxy, url))
- except Exception as e:
- #raise
- time = self.error_handler(e)
- url = self.url_shortener(url)
- results.append((time, proxy, url))
- self.printer(results)
- self.limiter()
- def error_handler(self, e):
- if 'Cannot connect' in str(e):
- time = 'Cannot connect to proxy'
- elif 'timed out' in str(e).lower():
- time = 'Timed out'
- elif 'retries exceeded' in str(e):
- time = 'Max retries exceeded'
- elif 'Connection reset by peer' in str(e):
- time = 'Connection reset by peer'
- elif 'readline() takes exactly 1 argument (2 given)' in str(e):
- time = 'SSL error'
- else:
- time = 'Err: '+str(e)
- return time
- def url_shortener(self, url):
- if 'ipchicken' in url:
- url = 'http://ipchicken.com'
- elif 'whatsmyip' in url:
- url = 'http://whatsmyip.net'
- elif 'astrill' in url:
- url = 'https://astrill.com'
- return url
- def printer(self, results):
- #def printer(self, times):
- print '---------------------------------------------------------------'
- for r in results:
- time = r[0]
- proxy = r[1]
- url = r[2]
- print '%s | %s - %s' % (proxy.ljust(21), url.ljust(20), time)
- def limiter(self):
- ''' Kill the script if user supplied limit of successful proxy attempts (-s argument) is reached '''
- if self.show:
- self.proxy_counter += 1
- if self.proxy_counter == int(self.show):
- sys.exit()
- P = find_http_proxy(parse_args())
- P.run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement