Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- ##### License:
- # - no warranty express or implied
- # - free to use if you don't use-it to gain money
- #
- ##### Warning:
- # - downloaded files may harm your computer.
- #
- ##### Config:
- # If want to use proxy option:
- # - create a file named "banned_country.txt" and put there banned country name, one per line
- # - create a file named "proxy.txt" and put there your proxyes, ip:port, one per line
- #
- ##### Usage examples:
- # - download 100 samples:
- # python this_scrypt.py 100
- # - download 100 samples, using 55 threads:
- # python this_scrypt.py 100 -t 55
- # - download 100 samples using proxy
- # python this_scrypt.py 100 -p proxy.txt
- #
- import re
- import urllib2
- import hashlib
- import os
- import random
- import Queue
- import threading
- import argparse
- import time
- print """
- Malc0de.com Malware sample downloader IV
- )\._.,--....,'``.
- .b--. /; _.. \ _\ (`._ ,.
- `=,-,-'~~~ `----(,_..'--(,_..'`-.;.'
- http://virii.tk http://twitter.com/ViRiiTk
- """
- parser = argparse.ArgumentParser(description="Malc0de.com Malware sample downloader IV")
- parser.add_argument("nr_samples", type=int,
- help= "Number of samples you want to download")
- parser.add_argument("-t", "--nr_threads", metavar="threads", type=int, default=200,
- help= "Threads number (Default: 200)")
- parser.add_argument("-a", "--agent", default="Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
- help= "User Agent used to download samples")
- parser.add_argument("-d", "--dldfolder", default = "C:\malware\\",
- help= "Local folder to download samples (Default: C:\malware\\ )")
- parser.add_argument("-i", "--info", default = "_files.txt",
- help = "file to store info about downloaded samples (Default: _files.txt)")
- parser.add_argument("-e", "--error", default = "_errors.txt",
- help = "file to store errors (Default: _errors.txt)")
- parser.add_argument("-u", "--malurl", default = "_mal_url.txt",
- help = "file to store malware urls (Default: _mal_url.txt)")
- parser.add_argument("-p", "--proxy",
- help = """use proxy to get malware urls (proxy.txt)
- Ex:
- 127.0.0.1:80
- 127.0.0.2:80
- ...""")
- args = parser.parse_args()
- # user agents
- dldagent = {'User-Agent' : args.agent}
- useragent = {'User-Agent' : 'Malc0de.com Malware sample downloader IV, more info on: http://ViRii.Tk'}
- # create download folder if not exist
- if not os.path.isdir(args.dldfolder):
- os.mkdir(args.dldfolder)
- # remove sample nr errors
- if args.nr_samples < 0:
- print "You want to download %i ?? I can't do that" %(args.nr_samples)
- exit()
- # limit the number of download samples
- if args.nr_samples > 10000:
- print "You need very Very VERY many samples, 5k is enough for you"
- args.nr_samples = 4999
- # remove useless threads
- if args.nr_threads >= args.nr_samples:
- args.nr_threads = args.nr_samples
- print "Try to download latest %i samples" %(args.nr_samples)
- print "Threads: %i" %(args.nr_threads)
- print "Malware samples will be downloaded to %s" %(args.dldfolder), "\n"
- # remove proxy from banned country
- banned = []
- proxylist =[]
- # exit if proxy option is selected and file not found
- if args.proxy and not os.path.isfile(args.proxy):
- exit("Option proxy: %s not found" % (args.proxy))
- if args.proxy and os.path.isfile(args.proxy):
- # load banned country list
- with open("banned_country.txt", "r") as handle:
- for country in handle.read().split("\n"):
- banned.append(country.strip())
- # get proxy from proxy.txt
- listaproxytemp = open(args.proxy, "r").read()
- listaproxytemp = re.findall("[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}.[\d]{1,3}:[\d]{2,5}", listaproxytemp)
- listaproxytemp = list(set(listaproxytemp))
- # test proxy
- print "Testing proxy: %d" %(len(listaproxytemp))
- url = 'http://www.geoips.com/en/geolocation'
- for p in listaproxytemp:
- try:
- proxy = urllib2.ProxyHandler({'http': p})
- opener = urllib2.build_opener(proxy)
- tester = opener.open(url ,timeout = 2)
- country = re.search("\<strong\>Country:\<\/strong\>([a-z A-Z ]{1,30})",tester.read())
- if country:
- if str(country.group(1))[1:] not in banned:
- print str(p) + "\t" + str(country.group(1))[1:]
- proxylist.append(p)
- except :
- pass
- print "Alive proxy: %d" %(len(proxylist))
- with open ("good_proxy.txt" , "a") as good_p:
- good_p.write("--->" + time.strftime("%c") + "<---\n")
- for w_p in proxylist:
- good_p.write(w_p + "\n")
- # exit if no working proxy was found
- if args.proxy and (len(proxylist) == 0):
- exit("Working proxy: None")
- # queue
- q = Queue.Queue()
- # generate random string
- def get_random_word(a):
- word = ''
- for i in range(a):
- word += random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789')
- return word
- # md5 file
- def md5Checksum(filePath):
- fh = open(filePath, 'rb')
- m = hashlib.md5()
- while True:
- data = fh.read(8192)
- if not data:
- break
- m.update(data)
- return m.hexdigest()
- # nr paginilor ce trebuie vizitate
- counter = 0
- if args.nr_samples % 50 == 0:
- pages = args.nr_samples / 50
- else :
- pages = (args.nr_samples / 50) + 1
- # find all malware address on curent page
- def getmalware(pagina):
- global counter
- b = re.findall("<td>[\d]{4}-[\d]{2}-[\d]{2}<\/td>\n.+\n", pagina)
- if b:
- for i in b:
- data = re.search("<td>([\d]{4}-[\d]{2}-[\d]{2})<\/td>", i)
- malware = re.search("\t<td>(.+)<\/td>", i)
- if data and malware:
- malware= re.sub("<br\/>", "",malware.group(1) )
- #print data.group(1), malware
- if counter >= args.nr_samples:
- return
- else:
- q.put(malware)
- counter += 1
- #browsing pages
- print "Browsing pages:"
- for i in range(1, pages + 1):
- adresa = "http://malc0de.com/database/?&page=" + str(i)
- print "Searching on:", adresa,
- try:
- if len(proxylist) > 0:
- # choose proxy
- p = random.choice(proxylist)
- print p
- proxy = urllib2.ProxyHandler({'http': p})
- opener = urllib2.build_opener(proxy)
- urllib2.install_opener(opener)
- # set useragent
- req = urllib2.Request(adresa, None, useragent)
- # access malc0de
- continut = urllib2.urlopen(req, timeout = 30).read()
- # extract sample url's
- getmalware(continut)
- except Exception as e:
- print str(e) + "\t maybe your ip is banned or proxy(if use) not work"
- pass
- # download malware samples
- def dld_mal(url_mal):
- # write address of this sample
- with open(args.dldfolder + args.malurl, "a") as handle:
- handle.write(url_mal + "\n")
- handle.close()
- url_mal = re.sub(" ", "%20", url_mal)
- #get file name
- file_name = url_mal.split("/")[-1]
- # remove bad characters from file name
- if len(file_name)==0 or re.search("\?", file_name) or re.search("\&", file_name):
- file_name = "No_name" + str(get_random_word(8))
- # try to download sample
- try:
- # check if url start with "http://
- if url_mal[:7] != "http://":
- url_mal = "http://" + url_mal
- if len(proxylist) >0 :
- # choose proxy
- p = random.choice(proxylist)
- proxy = urllib2.ProxyHandler({'http': p})
- opener = urllib2.build_opener(proxy)
- urllib2.install_opener(opener)
- # set download useragent
- req = urllib2.Request(url_mal, None, dldagent)
- u = urllib2.urlopen(req, timeout = 137) #timeout
- # make every filename uniq: "Malware_original_filename" + "_" + 3 random characters
- f_name = args.dldfolder + str(file_name) +"_" + get_random_word(3)
- # write to file
- f = open(f_name, 'wb')
- block_sz = 8192
- while True:
- buffer = u.read(block_sz)
- if not buffer:
- break
- f.write(buffer)
- f.close()
- # write info to _files.txt
- with open(args.dldfolder + args.info, "a") as handle:
- md5hash = md5Checksum(f_name)
- handle.write(str(md5Checksum(f_name)) +"\t" + str(file_name)+ "\t" + url_mal + "\n")
- handle.close
- print "\n" + "Am descarcat: " + file_name,
- except Exception as e:
- # adding error to _errors.txt
- with open(args.dldfolder + args.error, "a") as handle:
- handle.write(url_mal + "\t" + str(e) + "\n")
- handle.close()
- pass
- # get malware address from queue and download files
- print "Downloading:",
- def worker():
- while True:
- if not q.empty():
- try:
- item = q.get()
- dld_mal(item)
- q.task_done()
- except Exception as e:
- print e
- # threads number limit
- for i in range(args.nr_threads):
- t = threading.Thread(target=worker)
- t.daemon = True
- t.start()
- q.join()
- exit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement