Advertisement
opexxx

urlinfo.py

Jun 13th, 2014
287
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.00 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # Malware.lu
  4. import sys
  5. import argparse
  6. import requests
  7. from termcolor import colored
  8. from lxml import html
  9. import jsbeautifier
  10. import dns.resolver # dnspython
  11. from urlparse import urlparse
  12. import pygeoip
  13. import urllib2, urllib
  14.  
  15. geoip_db = "/usr/share/GeoIP/GeoIP.dat" # from package ubuntu geoip-database
  16.  
  17. def check_blacklist_dns(ip):
  18.     provider = ['dnsbl.sorbs.net', 'cbl.abuseat.org',
  19.             'bl.spamcop.net', 'zen.spamhaus.org',
  20.             'sbl.spamhaus.org', 'xbl.spamhaus.org',
  21.             'pbl.spamhaus.org', 'combined.abuse.ch',
  22.             'dbl.spamhaus.org',
  23.             'drone.abuse.ch', 'spam.abuse.ch',
  24.             'dnsbl.abuse.ch',
  25.             'httpbl.abuse.ch', 'ipbl.zeustracker.abuse.ch',
  26.             'uribl.zeustracker.abuse.ch',
  27.             'psbl.surriel.com', 'bl.blocklist.de',
  28.             'bsb.empty.us', 'b.barracudacentral.org',
  29.             'bb.barracudacentral.org', 'bl.dronebl.org',
  30.             'origin.asn.cymru.com', 'peer.asn.cymru.com']
  31.     ip_rev = '.'.join(reversed(ip.split('.')))
  32.  
  33.     for blacklist in provider:
  34.         check_domain = "%s.%s" % (ip_rev, blacklist)
  35.         try:
  36.             answers = dns.resolver.query(check_domain)
  37.             ip = answers[0].to_text()
  38.             print colored("%s: %s" % (blacklist, ip), "red")
  39.         except Exception, e:
  40.             pass
  41.  
  42.  
  43. def google_safebrowsing(url):
  44.     api_key = "ABQIAAAA55hJZAWo2KBLCGcGYtI03BSLNEcy237KLTt66fvN757NqGaakA"
  45.     app = "blackchecker"
  46.     url_api = "https://sb-ssl.google.com/safebrowsing/api/lookup" + \
  47.         "?client=%s&apikey=%s&appver=1.5.2&pver=3.0&url=%s"
  48.  
  49.     url_api = url_api % (app, api_key, urllib.quote(url, ''))
  50.     #print url_api
  51.  
  52.     # failed with requests modules proxy return 501 weird :s
  53.     # req = requests.get(url_api)
  54.     req = urllib2.urlopen(url_api)
  55.     result = req.read()
  56.     print "%d:%s" % (req.code, result )
  57.  
  58. def cybercrime_tracker(domain):
  59.     #curl http://cybercrime-tracker.net/all.php | sed 's/<[^>]*>/\n/g' > /work/db/cybercrime-tracker.txt
  60.     cc_tracker_file = "/work/db/cybercrime-tracker.txt"
  61.     fp = open(cc_tracker_file)
  62.     domain = domain.lower()
  63.     for line in fp:
  64.         line = line.strip('\n').lower()
  65.         if domain in line:
  66.             print colored("%s" % line, "red")
  67.  
  68.  
  69. if __name__ == "__main__":
  70.     parser = argparse.ArgumentParser(description='Extract usefull information from html page')
  71.     parser.add_argument('url', type=str)
  72.     args = parser.parse_args()
  73.  
  74.  
  75.     up = urlparse(args.url)
  76.     domain = up.netloc
  77.     print colored("Domain lookup: %s" % domain, "green")
  78.     answers = dns.resolver.query(domain )
  79.     print "TTL: %d" % answers.rrset.ttl
  80.     geoip = pygeoip.GeoIP(geoip_db)
  81.  
  82.     ips = []
  83.     for rdata in answers:
  84.         if rdata.to_text() != domain:
  85.             ips.append(rdata.to_text())
  86.         cc = geoip.country_code_by_name(rdata.to_text())
  87.         print "%s %s" % (rdata.to_text(), cc)
  88.  
  89.     print ""
  90.     print colored("Blacklist check: %s" % domain, "green")
  91.     check_blacklist_dns(domain)
  92.     for ip in ips:
  93.         print colored("Blacklist check: %s" % ip, "green")
  94.         check_blacklist_dns(ip)
  95.  
  96.     print ""
  97.     print colored("Google safebrowsing: %s" % args.url, "green")
  98.     google_safebrowsing(args.url)
  99.  
  100.     print ""
  101.     print colored("Cybercrime tracker: %s" % domain, "green")
  102.     cybercrime_tracker(domain)
  103.  
  104.     print ""
  105.     req = requests.get(args.url)
  106.     print colored("Server header:", "green")
  107.     print "%s" % (req.status_code)
  108.     for k,v in req.headers.items(): print "%s: %s" % (k, v)
  109.  
  110.     print ""
  111.     tree = html.fromstring(req.content)
  112.     info = tree.xpath('//a')
  113.     print colored("Links found (%d):" % len(info), "green")
  114.     for h in info:
  115.         print "uri: %s" % \
  116.               (h.attrib.get('href', ''))
  117.         #print "title: %s uri: %s" % \
  118.               #(h.attrib.get('title', ''),
  119.               #h.attrib.get('href', ''))
  120.  
  121.     print ""
  122.     info = tree.xpath('//script')
  123.     print colored("Javascript found (%d):" % len(info), "green")
  124.     for h in info:
  125.         print "-"*32
  126.         print "src: %s" % h.attrib.get('src', '')
  127.         if h.text:
  128.             print "content:"
  129.             opts = jsbeautifier.default_options()
  130.             opts.unescape_strings = True
  131.             opts.eval_code = True # dangerous
  132.             print jsbeautifier.beautify(h.text)
  133.  
  134.     print ""
  135.     info = tree.xpath('//iframe')
  136.     print colored("Iframe found (%d):" % len(info), "green")
  137.     for h in info:
  138.         print "-"*32
  139.         print "src=\"%s\" width=%s height=%s" % \
  140.                 (h.attrib.get('src', ''),
  141.                  h.attrib.get('width', ''),
  142.                  h.attrib.get('height', ''))
  143.  
  144.  
  145.     print ""
  146.     info = tree.xpath('//applet')
  147.     print colored("Java found (%d):" % len(info), "green")
  148.     for h in info:
  149.         print "-"*32
  150.         print "object=\"%s\" code=\"%s\" width=%s height=%s" % \
  151.                 (h.attrib.get('code', ''),
  152.                  h.attrib.get('object', ''),
  153.                  h.attrib.get('width', ''),
  154.                  h.attrib.get('height', ''))
  155.  
  156.     print ""
  157.     info = tree.xpath('//object')
  158.     print colored("Object found (%d):" % len(info), "green")
  159.     for h in info:
  160.         print "-"*32
  161.         print "data=\"%s\" classid=\"%s\" codebase=\"%s\" width=%s height=%s" % \
  162.                 (h.attrib.get('data', ''),
  163.                  h.attrib.get('classid', ''),
  164.                  h.attrib.get('codebase', ''),
  165.                  h.attrib.get('width', ''),
  166.                  h.attrib.get('height', ''))
  167.  
  168.         subinfo = h.xpath('param')
  169.         for sh in subinfo:
  170.            print "name=%s value=%s" % \
  171.                 (sh.attrib.get('name', ''),
  172.                  sh.attrib.get('value', ''))
  173.  
  174.         subinfo = h.xpath('embed')
  175.         for sh in subinfo:
  176.            print "src=\"%s\" width=%s height=%s" % \
  177.                (sh.attrib.get('src', ''),
  178.                 sh.attrib.get('width', ''),
  179.                 sh.attrib.get('height', ''))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement