Advertisement
opexxx

proxyfinder.py

Apr 23rd, 2014
268
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.38 KB | None | 0 0
  1. #!/usr/bin/env python2
  2.  
  3. from gevent import monkey
  4. monkey.patch_all()
  5.  
  6. import requests
  7. import ast
  8. import gevent
  9. import sys, re, time, os, argparse
  10.  
  11. def parse_args():
  12.     parser = argparse.ArgumentParser()
  13.     parser.add_argument('-s', '--show', help='Show this number of results. Example: -s 5 will show the 5 fastest proxies then stop')
  14.     return parser.parse_args()
  15.  
  16. class find_http_proxy():
  17.     ''' Will only gather L1 (elite anonymity) proxies
  18.    which should not give out your IP or advertise
  19.    that you are using a proxy at all '''
  20.  
  21.     def __init__(self, args):
  22.         self.checked_proxies = []
  23.         self.proxy_list = []
  24.         self.headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36'}
  25.         self.show = args.show
  26.         self.proxy_counter = 0
  27.         self.errors = []
  28.  
  29.     def run(self):
  30.         ''' Gets raw high anonymity (L1) proxy data then calls make_proxy_list()
  31.        Currently parses data from gatherproxy.com and letushide.com '''
  32.         letushide_list = self.letushide_req()
  33.         gatherproxy_list = self.gatherproxy_req()
  34.  
  35.         self.proxy_list.append(letushide_list)
  36.         self.proxy_list.append(gatherproxy_list)
  37.         # Flatten list of lists (1 master list containing 1 list of ips per proxy website)
  38.         self.proxy_list = [ips for proxy_site in self.proxy_list for ips in proxy_site]
  39.  
  40.         print '[*] %d high anonymity proxies found' % len(self.proxy_list)
  41.         print '[*] Testing proxy speeds ...'
  42.         print ''
  43.         print '      Proxy           |       Domain         - Load Time/Errors'
  44.  
  45.         self.proxy_checker()
  46.  
  47.     def letushide_req(self):
  48.         ''' Make the request to the proxy site and create a master list from that site '''
  49.         letushide_ips = []
  50.         for i in xrange(1,20): # can search maximum of 20 pages
  51.             try:
  52.                 url = 'http://letushide.com/filter/http,hap,all/%s/list_of_free_HTTP_High_Anonymity_proxy_servers' % str(i)
  53.                 r = requests.get(url, headers=self.headers)
  54.                 html = r.text
  55.                 ips = self.parse_letushide(html)
  56.  
  57.                 # Check html for a link to the next page
  58.                 if '/filter/http,hap,all/%s/list_of_free_HTTP_High_Anonymity_proxy_servers' % str(i+1) in html:
  59.                     pass
  60.                 else:
  61.                     letushide_ips.append(ips)
  62.                     break
  63.                 letushide_ips.append(ips)
  64.             except:
  65.                 print '[!] Failed get reply from %s' % url
  66.                 break
  67.  
  68.         # Flatten list of lists (1 list containing 1 list of ips for each page)
  69.         letushide_list = [item for sublist in letushide_ips for item in sublist]
  70.         return letushide_list
  71.  
  72.     def parse_letushide(self, html):
  73.         ''' Parse out list of IP:port strings from the html '''
  74.         # \d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}  -  matches IP addresses
  75.         # </a></td><td>  -  is in between the IP and the port
  76.         # .*?<  -  match all text (.) for as many characters as possible (*) but don't be greedy (?) and stop at the next greater than (<)
  77.         raw_ips = re.findall('\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}</a></td><td>.*?<', html)
  78.         ips = []
  79.         for ip in raw_ips:
  80.             ip = ip.replace('</a></td><td>', ':')
  81.             ip = ip.strip('<')
  82.             ips.append(ip)
  83.         return ips
  84.  
  85.     def gatherproxy_req(self):
  86.         r = requests.get('http://gatherproxy.com/proxylist/anonymity/?t=Elite', headers = self.headers)
  87.         lines = r.text.splitlines()
  88.         gatherproxy_list = self.parse_gp(lines)
  89.         return gatherproxy_list
  90.  
  91.     def parse_gp(self, lines):
  92.         ''' Parse the raw scraped data '''
  93.         gatherproxy_list = []
  94.         for l in lines:
  95.             if 'proxy_ip' in l.lower():
  96.                 l = l.replace('gp.insertPrx(', '')
  97.                 l = l.replace(');', '')
  98.                 l = l.replace('null', 'None')
  99.                 l = l.strip()
  100.                 l = ast.literal_eval(l)
  101.  
  102.                 proxy = '%s:%s' % (l["PROXY_IP"], l["PROXY_PORT"])
  103.                 gatherproxy_list.append(proxy)
  104.                 #ctry = l["PROXY_COUNTRY"]
  105.         return gatherproxy_list
  106.  
  107.     def proxy_checker(self):
  108.         ''' Concurrency stuff here '''
  109.         jobs = [gevent.spawn(self.proxy_checker_req, proxy) for proxy in self.proxy_list]
  110.         gevent.joinall(jobs)
  111.  
  112.     def proxy_checker_req(self, proxy):
  113.         ''' See how long each proxy takes to open https://www.yahoo.com '''
  114.         urls = ['http://www.ipchicken.com', 'http://whatsmyip.net/', 'https://www.astrill.com/what-is-my-ip-address.php']
  115.         results = []
  116.         for url in urls:
  117.             try:
  118.                 check = requests.get(url,
  119.                                     headers = self.headers,
  120.                                     proxies = {'http':'http://'+proxy,
  121.                                                'https':'http://'+proxy},
  122.                                     timeout = 15)
  123.                 time = str(check.elapsed)
  124.                 html = check.text
  125.                 proxyip = str(proxy.split(':', 1)[0])
  126.                 proxy_split = proxyip.split('.')
  127.                 first_3_octets = '.'.join(proxy_split[:3])+'.'
  128.  
  129.                 if 'Access denied' in html:
  130.                     time = 'Access denied'
  131.                 elif first_3_octets not in html:
  132.                     time = 'Page loaded but proxy failed'
  133.                     if 'captcha' in html.lower():
  134.                         time = time+' - Captcha detected'
  135.  
  136.                 url = self.url_shortener(url)
  137.                 results.append((time, proxy, url))
  138.  
  139.             except Exception as e:
  140.                 #raise
  141.                 time = self.error_handler(e)
  142.                 url = self.url_shortener(url)
  143.                 results.append((time, proxy, url))
  144.  
  145.         self.printer(results)
  146.         self.limiter()
  147.  
  148.     def error_handler(self, e):
  149.         if 'Cannot connect' in str(e):
  150.             time = 'Cannot connect to proxy'
  151.         elif 'timed out' in str(e).lower():
  152.             time = 'Timed out'
  153.         elif 'retries exceeded' in str(e):
  154.             time = 'Max retries exceeded'
  155.         elif 'Connection reset by peer' in str(e):
  156.             time = 'Connection reset by peer'
  157.         elif 'readline() takes exactly 1 argument (2 given)' in str(e):
  158.             time = 'SSL error'
  159.         else:
  160.             time = 'Err: '+str(e)
  161.         return time
  162.  
  163.     def url_shortener(self, url):
  164.         if 'ipchicken' in url:
  165.             url = 'http://ipchicken.com'
  166.         elif 'whatsmyip' in url:
  167.             url = 'http://whatsmyip.net'
  168.         elif 'astrill' in url:
  169.             url = 'https://astrill.com'
  170.         return url
  171.  
  172.     def printer(self, results):
  173.     #def printer(self, times):
  174.         print '---------------------------------------------------------------'
  175.         for r in results:
  176.             time = r[0]
  177.             proxy = r[1]
  178.             url = r[2]
  179.             print '%s | %s - %s' % (proxy.ljust(21), url.ljust(20), time)
  180.  
  181.     def limiter(self):
  182.         ''' Kill the script if user supplied limit of successful proxy attempts (-s argument) is reached '''
  183.         if self.show:
  184.             self.proxy_counter += 1
  185.             if self.proxy_counter == int(self.show):
  186.                 sys.exit()
  187.  
  188. P = find_http_proxy(parse_args())
  189. P.run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement