opexxx

neopi.py

Jun 5th, 2014
290
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 17.59 KB | None | 0 0
  1. #!/usr/bin/python
  2. # Name: neopi.py
  3. # Description: Utility to scan a file path for encrypted and obfuscated files
  4. # Authors: Ben Hagen (ben.hagen@neohapsis.com)
  5. #         Scott Behrens (scott.behrens@neohapsis.com)
  6. #
  7. # Date: 11/4/2010
  8. #
  9. # pep-0008 - Is stupid. TABS FO'EVER!
  10.  
  11. # Try catch regular expressions/bad path/bad filename/bad regex/
  12.  
  13. # Library imports
  14. import math
  15. import sys
  16. import os
  17. import re
  18. import csv
  19. import zlib
  20. import time
  21. from collections import defaultdict
  22. from optparse import OptionParser
  23.  
  24. #
  25. # Globals
  26. #
  27.    
  28. # Smallest filesize to checkfor in bytes.  
  29. SMALLEST = 60
  30.  
  31. class LanguageIC:
  32.    """Class that calculates a file's Index of Coincidence as
  33.   as well as a a subset of files average Index of Coincidence.
  34.   """
  35.    def __init__(self):
  36.        """Initialize results arrays as well as character counters."""
  37.        self.char_count =  defaultdict(int)
  38.        self.total_char_count = 0
  39.        self.results = []
  40.        self.ic_total_results = ""
  41.  
  42.    def calculate_char_count(self,data):
  43.        """Method to calculate character counts for a particular data file."""
  44.        if not data:
  45.            return 0
  46.        for x in range(256):
  47.            char = chr(x)
  48.            charcount = data.count(char)
  49.            self.char_count[char] += charcount
  50.            self.total_char_count += charcount
  51.        return
  52.  
  53.    def calculate_IC(self):
  54.        """Calculate the Index of Coincidence for the self variables"""
  55.        total = 0
  56.        for val in self.char_count.values():
  57.  
  58.            if val == 0:
  59.                continue
  60.            total += val * (val-1)
  61.  
  62.        try:
  63.            ic_total =      float(total)/(self.total_char_count * (self.total_char_count - 1))
  64.        except:
  65.            ic_total = 0
  66.        self.ic_total_results = ic_total
  67.        return
  68.  
  69.    def calculate(self,data,filename):
  70.        """Calculate the Index of Coincidence for a file and append to self.ic_results array"""
  71.        if not data:
  72.            return 0
  73.        char_count = 0
  74.        total_char_count = 0
  75.  
  76.        for x in range(256):
  77.            char = chr(x)
  78.            charcount = data.count(char)
  79.            char_count += charcount * (charcount - 1)
  80.            total_char_count += charcount
  81.  
  82.        ic = float(char_count)/(total_char_count * (total_char_count - 1))
  83.        self.results.append({"filename":filename, "value":ic})
  84.        # Call method to calculate_char_count and append to total_char_count
  85.        self.calculate_char_count(data)
  86.        return ic
  87.  
  88.    def sort(self):
  89.        self.results.sort(key=lambda item: item["value"])
  90.        self.results = resultsAddRank(self.results)
  91.  
  92.    def printer(self, count):
  93.        """Print the top signature count match files for a given search"""
  94.        # Calculate the Total IC for a Search
  95.        self.calculate_IC()
  96.        print "\n[[ Average IC for Search ]]"
  97.        print self.ic_total_results
  98.        print "\n[[ Top %i lowest IC files ]]" % (count)
  99.        if (count > len(self.results)): count = len(self.results)
  100.        for x in range(count):
  101.            print ' {0:>7.4f}        {1}'.format(self.results[x]["value"], self.results[x]["filename"])
  102.        return
  103.  
  104. class Entropy:
  105.    """Class that calculates a file's Entropy."""
  106.  
  107.    def __init__(self):
  108.        """Instantiate the entropy_results array."""
  109.        self.results = []
  110.  
  111.    def calculate(self,data,filename):
  112.        """Calculate the entropy for 'data' and append result to entropy_results array."""
  113.  
  114.        if not data:
  115.            return 0
  116.        entropy = 0
  117.        self.stripped_data =data.replace(' ', '')
  118.        for x in range(256):
  119.            p_x = float(self.stripped_data.count(chr(x)))/len(self.stripped_data)
  120.            if p_x > 0:
  121.                entropy += - p_x * math.log(p_x, 2)
  122.        self.results.append({"filename":filename, "value":entropy})
  123.        return entropy
  124.  
  125.    def sort(self):
  126.        self.results.sort(key=lambda item: item["value"])
  127.        self.results.reverse()
  128.        self.results = resultsAddRank(self.results)
  129.  
  130.    def printer(self, count):
  131.        """Print the top signature count match files for a given search"""
  132.        print "\n[[ Top %i entropic files for a given search ]]" % (count)
  133.        if (count > len(self.results)): count = len(self.results)
  134.        for x in range(count):
  135.            print ' {0:>7.4f}        {1}'.format(self.results[x]["value"], self.results[x]["filename"])
  136.        return
  137.  
  138. class LongestWord:
  139.    """Class that determines the longest word for a particular file."""
  140.    def __init__(self):
  141.        """Instantiate the longestword_results array."""
  142.        self.results = []
  143.  
  144.    def calculate(self,data,filename):
  145.        """Find the longest word in a string and append to longestword_results array"""
  146.        if not data:
  147.            return "", 0
  148.        longest = 0
  149.        longest_word = ""
  150.        words = re.split("[\s,\n,\r]", data)
  151.        if words:
  152.            for word in words:
  153.                length = len(word)
  154.                if length > longest:
  155.                    longest = length
  156.                    longest_word = word
  157.        self.results.append({"filename":filename, "value":longest})
  158.        return longest
  159.  
  160.    def sort(self):
  161.        self.results.sort(key=lambda item: item["value"])
  162.        self.results.reverse()
  163.        self.results = resultsAddRank(self.results)
  164.  
  165.    def printer(self, count):
  166.        """Print the top signature count match files for a given search"""
  167.        print "\n[[ Top %i longest word files ]]" % (count)
  168.        if (count > len(self.results)): count = len(self.results)
  169.        for x in range(count):
  170.            print ' {0:>7}        {1}'.format(self.results[x]["value"], self.results[x]["filename"])
  171.        return
  172.  
  173. class SignatureNasty:
  174.    """Generator that searches a given file for nasty expressions"""
  175.  
  176.    def __init__(self):
  177.        """Instantiate the results array."""
  178.        self.results = []
  179.  
  180.    def calculate(self, data, filename):
  181.        if not data:
  182.            return "", 0
  183.        # Lots taken from the wonderful post at http://stackoverflow.com/questions/3115559/exploitable-php-functions
  184.        valid_regex = re.compile('(eval\(|file_put_contents|base64_decode|python_eval|exec\(|passthru|popen|proc_open|pcntl|assert\(|system\(|shell)', re.I)
  185.        matches = re.findall(valid_regex, data)
  186.        self.results.append({"filename":filename, "value":len(matches)})
  187.        return len(matches)
  188.  
  189.    def sort(self):
  190.        self.results.sort(key=lambda item: item["value"])
  191.        self.results.reverse()
  192.        self.results = resultsAddRank(self.results)
  193.  
  194.    def printer(self, count):
  195.        """Print the top signature count match files for a given search"""
  196.        print "\n[[ Top %i signature match counts ]]" % (count)
  197.        if (count > len(self.results)): count = len(self.results)
  198.        for x in range(count):
  199.            print ' {0:>7}        {1}'.format(self.results[x]["value"], self.results[x]["filename"])
  200.        return
  201.  
  202. class SignatureSuperNasty:
  203.    """Generator that searches a given file for SUPER-nasty expressions (These are almost always bad!)"""
  204.  
  205.    def __init__(self):
  206.        """Instantiate the results array."""
  207.        self.results = []
  208.  
  209.    def calculate(self, data, filename):
  210.        if not data:
  211.            return "", 0
  212.        valid_regex = re.compile('(@\$_\[\]=|\$_=@\$_GET|\$_\[\+""\]=)', re.I)
  213.        matches = re.findall(valid_regex, data)
  214.        self.results.append({"filename":filename, "value":len(matches)})
  215.        return len(matches)
  216.  
  217.    def sort(self):
  218.        self.results.sort(key=lambda item: item["value"])
  219.        self.results.reverse()
  220.        self.results = resultsAddRank(self.results)
  221.  
  222.    def printer(self, count):
  223.        """Print the top signature count match files for a given search"""
  224.        print "\n[[ Top %i SUPER-signature match counts (These are usually bad!) ]]" % (count)
  225.        if (count > len(self.results)): count = len(self.results)
  226.        for x in range(count):
  227.            print ' {0:>7}        {1}'.format(self.results[x]["value"], self.results[x]["filename"])
  228.        return
  229.  
  230. class UsesEval:
  231.    """Generator that searches a given file for nasty eval with variable"""
  232.  
  233.    def __init__(self):
  234.       """Instantiate the eval_results array."""
  235.       self.results = []
  236.  
  237.    def calculate(self, data, filename):
  238.       if not data:
  239.                return "", 0
  240.            # Lots taken from the wonderful post at http://stackoverflow.com/questions/3115559/exploitable-php-functions
  241.       valid_regex = re.compile('(eval\(\$(\w|\d))', re.I)
  242.       matches = re.findall(valid_regex, data)
  243.       self.results.append({"filename":filename, "value":len(matches)})
  244.       return len(matches)
  245.  
  246.    def sort(self):
  247.       self.results.sort(key=lambda item: item["value"])
  248.       self.results.reverse()
  249.       self.results = resultsAddRank(self.results)
  250.  
  251.    def printer(self, count):
  252.       """Print the files that use eval"""
  253.       print "\n[[ Top %i eval match counts ]]" % (count)
  254.       if (count > len(self.results)): count = len(self.results)
  255.       for x in range(count):
  256.         print ' {0:>7}          {1}'.format(self.results[x]["value"], self.results[x]["filename"])
  257.       return
  258.  
  259.  
  260. class Compression:
  261.    """Generator finds compression ratio"""
  262.  
  263.    def __init__(self):
  264.        """Instantiate the results array."""
  265.        self.results = []
  266.  
  267.    def calculate(self, data, filename):
  268.        if not data:
  269.            return "", 0
  270.        compressed = zlib.compress(data)
  271.        ratio = float(len(compressed)) / float(len(data))
  272.        self.results.append({"filename":filename, "value":ratio})
  273.        return ratio
  274.  
  275.    def sort(self):
  276.        self.results.sort(key=lambda item: item["value"])
  277.        self.results.reverse()
  278.        self.results = resultsAddRank(self.results)
  279.  
  280.    def printer(self, count):
  281.        """Print the top files for a given search"""
  282.        print "\n[[ Top %i compression match counts ]]" % (count)
  283.        if (count > len(self.results)): count = len(self.results)
  284.        for x in range(count):
  285.            print ' {0:>7.4f}        {1}'.format(self.results[x]["value"], self.results[x]["filename"])
  286.        return
  287.  
  288. def resultsAddRank(results):
  289.    rank = 1
  290.    offset = 1
  291.    previousValue = False
  292.    newList = []
  293.    for file in results:
  294.        if (previousValue and previousValue != file["value"]):
  295.            rank = offset
  296.        file["rank"] = rank
  297.        newList.append(file)
  298.        previousValue = file["value"]
  299.        offset = offset + 1
  300.    return newList
  301.  
  302. class SearchFile:
  303.    """Generator that searches a given filepath with an optional regular
  304.   expression and returns the filepath and filename"""
  305.    def search_file_path(self, args, valid_regex):
  306.        for root, dirs, files in os.walk(args[0]):
  307.            for file in files:
  308.                filename = os.path.join(root, file)
  309.                if (valid_regex.search(file) and os.path.getsize(filename) > SMALLEST):
  310.                    try:
  311.                        data = open(root + "/" + file, 'rb').read()
  312.                    except:
  313.                        data = False
  314.                        print "Could not read file :: %s/%s" % (root, file)
  315.                    yield data, filename
  316.  
  317. if __name__ == "__main__":
  318.    """Parse all the options"""
  319.  
  320.    timeStart = time.clock()
  321.  
  322.    print """
  323.       )         (   (
  324.    ( /(         )\ ))\ )
  325.    )\())  (    (()/(()/(
  326.   ((_)\ ))\ (  /(_))(_))
  327.    _((_)/((_))\(_))(_))
  328.   | \| (_)) ((_) _ \_ _|
  329.   | .` / -_) _ \ _/| |
  330.   |_|\_\___\___/_| |___| Ver. *.USEGIT
  331.   """
  332.  
  333.    parser = OptionParser(usage="usage: %prog [options] <start directory> <OPTIONAL: filename regex>",
  334.                          version="%prog 1.0")
  335.    parser.add_option("-c", "--csv",
  336.                      action="store",
  337.                      dest="is_csv",
  338.                      default=False,
  339.                      help="generate CSV outfile",
  340.                      metavar="FILECSV")
  341.    parser.add_option("-a", "--all",
  342.                      action="store_true",
  343.                      dest="is_all",
  344.                      default=False,
  345.                      help="Run all (useful) tests [Entropy, Longest Word, IC, Signature]",)
  346.    parser.add_option("-z", "--zlib",
  347.                      action="store_true",
  348.                      dest="is_zlib",
  349.                      default=False,
  350.                      help="Run compression Test",)
  351.    parser.add_option("-e", "--entropy",
  352.                      action="store_true",
  353.                      dest="is_entropy",
  354.                      default=False,
  355.                      help="Run entropy Test",)
  356.    parser.add_option("-E", "--eval",
  357.                      action="store_true",
  358.                      dest="is_eval",
  359.                      default=False,
  360.                      help="Run signiture test for the eval",)
  361.    parser.add_option("-l", "--longestword",
  362.                      action="store_true",
  363.                      dest="is_longest",
  364.                      default=False,
  365.                      help="Run longest word test",)
  366.    parser.add_option("-i", "--ic",
  367.                      action="store_true",
  368.                      dest="is_ic",
  369.                      default=False,
  370.                      help="Run IC test",)
  371.    parser.add_option("-s", "--signature",
  372.                      action="store_true",
  373.                      dest="is_signature",
  374.                      default=False,
  375.                      help="Run signature test",)
  376.    parser.add_option("-S", "--supersignature",
  377.                      action="store_true",
  378.                      dest="is_supersignature",
  379.                      default=False,
  380.                      help="Run SUPER-signature test",)
  381.    parser.add_option("-A", "--auto",
  382.                      action="store_true",
  383.                      dest="is_auto",
  384.                      default=False,
  385.                      help="Run auto file extension tests",)
  386.    parser.add_option("-u", "--unicode",
  387.                      action="store_true",
  388.                      dest="ignore_unicode",
  389.                      default=False,
  390.                      help="Skip over unicode-y/UTF'y files",)
  391.  
  392.    (options, args) = parser.parse_args()
  393.  
  394.    # Error on invalid number of arguements
  395.    if len(args) < 1:
  396.        parser.print_help()
  397.        print ""
  398.        sys.exit()
  399.  
  400.    # Error on an invalid path
  401.    if os.path.exists(args[0]) == False:
  402.        parser.error("Invalid path")
  403.  
  404.    valid_regex = ""
  405.    if (len(args) == 2 and options.is_auto is False):
  406.        try:
  407.            valid_regex = re.compile(args[1])
  408.        except:
  409.            parser.error("Invalid regular expression")
  410.    else:
  411.        valid_regex = re.compile('.*')
  412.    tests = []
  413.  
  414.    if options.is_auto:
  415.        valid_regex = re.compile('(\.php|\.asp|\.aspx|\.scath|\.bash|\.zsh|\.csh|\.tsch|\.pl|\.py|\.txt|\.cgi|\.cfm|\.htaccess)$')
  416.  
  417.    if options.is_all:
  418.        tests.append(LanguageIC())
  419.        tests.append(Entropy())
  420.        tests.append(LongestWord())
  421.        tests.append(SignatureNasty())
  422.        tests.append(SignatureSuperNasty())
  423.    else:
  424.        if options.is_entropy:
  425.            tests.append(Entropy())
  426.        if options.is_longest:
  427.            tests.append(LongestWord())
  428.        if options.is_ic:
  429.            tests.append(LanguageIC())
  430.        if options.is_signature:
  431.            tests.append(SignatureNasty())
  432.        if options.is_supersignature:
  433.            tests.append(SignatureSuperNasty())
  434.        if options.is_eval:
  435.            tests.append(UsesEval())
  436.        if options.is_zlib:
  437.            tests.append(Compression())
  438.  
  439.    # Instantiate the Generator Class used for searching, opening, and reading files
  440.    locator = SearchFile()
  441.  
  442.    # CSV file output array
  443.    csv_array = []
  444.    csv_header = ["filename"]
  445.  
  446.    # Grab the file and calculate each test against file
  447.    fileCount = 0
  448.    fileIgnoreCount = 0
  449.    for data, filename in locator.search_file_path(args, valid_regex):
  450.        if data:
  451.            # a row array for the CSV
  452.            csv_row = []
  453.            csv_row.append(filename)
  454.  
  455.            if options.ignore_unicode:
  456.                asciiHighCount = 0
  457.                for character in data:
  458.                    if ord(character) > 127:
  459.                        asciiHighCount = asciiHighCount + 1
  460.  
  461.                fileAsciiHighRatio = float(asciiHighCount) / float(len(data))
  462.  
  463.            if (options.ignore_unicode == False or fileAsciiHighRatio < .1):
  464.                for test in tests:
  465.                    calculated_value = test.calculate(data, filename)
  466.                    # Make the header row if it hasn't been fully populated, +1 here to account for filename column
  467.                    if len(csv_header) < len(tests) + 1:
  468.                        csv_header.append(test.__class__.__name__)
  469.                    csv_row.append(calculated_value)
  470.                    fileCount = fileCount + 1
  471.                csv_array.append(csv_row)
  472.            else:
  473.                fileIgnoreCount = fileIgnoreCount + 1
  474.  
  475.    if options.is_csv:
  476.        csv_array.insert(0,csv_header)
  477.        fileOutput = csv.writer(open(options.is_csv, "wb"))
  478.        fileOutput.writerows(csv_array)
  479.  
  480.    timeFinish = time.clock()
  481.  
  482.    # Print some stats
  483.    print "\n[[ Total files scanned: %i ]]" % (fileCount)
  484.    print "[[ Total files ignored: %i ]]" % (fileIgnoreCount)
  485.    print "[[ Scan Time: %f seconds ]]" % (timeFinish - timeStart)
  486.  
  487.    # Print top rank lists
  488.    rank_list = {}
  489.    for test in tests:
  490.        test.sort()
  491.        test.printer(10)
  492.        for file in test.results:
  493.            rank_list[file["filename"]] = rank_list.setdefault(file["filename"], 0) + file["rank"]
  494.  
  495.    rank_sorted = sorted(rank_list.items(), key=lambda x: x[1])
  496.  
  497.    print "\n[[ Top cumulative ranked files ]]"
  498.    count = 10
  499.    if (count > len(rank_sorted)): count = len(rank_sorted)
  500.    for x in range(count):
  501.        print ' {0:>7}        {1}'.format(rank_sorted[x][1], rank_sorted[x][0])
Add Comment
Please, Sign In to add comment