Advertisement
opexxx

tekCollect.py

Feb 23rd, 2014
234
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.07 KB | None | 0 0
  1. #!/usr/bin/python
  2.  
  3. '''
  4. This is tekCollect! This tool will scrape specified data types out of a URL or file.
  5. @TekDefense
  6. Ian Ahl | www.TekDefense.com | 1aN0rmus@tekDefense.com
  7. *Some of the Regular Expressions were taken from http://gskinner.com/RegExr/
  8. Version: 0.5
  9.  
  10. Changelog:
  11. .5
  12. [+] Quick update to add the WDIR Regex. This will pull Windows directories.
  13. [+] Modified the URL regext to be less strict.
  14. .4
  15. [+] Fixed issue where -t IP4 returned URLs
  16. [+] Added summary functions that shows what types of data are in a specified target.
  17. [+] Modified the regex for many of the data types for better results
  18. [+] Added several new data types: zip, twitter, doc, exe, MYSQL hash, Wordpress (WP) hash, IMG, FLASH
  19. [+] Modified the way summary is displayed
  20. [+] several improvements by machn1k (https://github.com/machn1k, http://twitter.com/machn1k)
  21. [+] Made some modifications based on machn1k's changes
  22. .3
  23. [+] Added predefined data types that can be invoke with -t type
  24. .2
  25. [+] Expanded the script to allow custom regex with a -r 'regex here'
  26. .1
  27. [+] Replaced listTypes selction with loop
  28. [+] Tool created and can only pull md5 hashes
  29.  
  30. TODO
  31. [-] Proper hash values matching
  32. [-] Ability to accept multiple --types
  33. [-] Summary sub options (Hash, Host, PII)
  34. [-] Improved menu selections & functions
  35. '''
  36.  
  37. import httplib2, re, sys, argparse
  38. dTypes = 'MD5, SHA1, SHA256, MySQL, WP (Wordpress), Domain, URL, IP4, IP6, SSN, EMAIL, CCN, Twitter, DOC, EXE, ZIP, IMG '
  39. # Adding arguments
  40. parser = argparse.ArgumentParser(description='tekCollect is a tool that will scrape a file or website for specified data')
  41. parser.add_argument('-u', '--url', help='This option is used to search for hashes on a website')
  42. parser.add_argument('-f', '--file', help='This option is used to import a file that contains hashes')
  43. parser.add_argument('-o', '--output', help='This option will output the results to a file.')
  44. parser.add_argument('-r', '--regex', help='This option allows the user to set a custom regex value. Must encase in single or double quotes.')
  45. parser.add_argument('-t', '--type', help='This option allows a user to choose the type of data they want to pull out. Currently supports ' + dTypes)
  46. parser.add_argument('-s', '--summary', action='store_true', default=False, help='This options will show a summary of the data types in a file')
  47. args = parser.parse_args()
  48.  
  49. # Setting some variables and lists
  50. regVal = ''    # Initial revVal
  51. listResults = []
  52. MD5 = '\W([a-fA-F0-9]{32})\W'
  53. SHA1 = '[a-fA-F0-9]{40}'
  54. SHA256 = '[a-fA-F0-9]{64}'
  55. LM = '[a-fA-F0-9]{32}'
  56. DOMAIN = '\W(\w+\.){1,4}(com|net|biz|cat|aero|asia|coop|info|int|jobs|mobi|museum|name|org|post|pre|tel|travel|xxx|edu|gov|mil|br|cc|ca|uk|ch|co|cx|de|fr|hk|jp|kr|nl|nr|ru|tk|ws|tw)[^a-fA-F0-9_-]'
  57. URL = '(http\:\/\/|https\:\/\/)(.+\S)'
  58. IP4 = '((?<![0-9])(?:(?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2}))(?![0-9]))'
  59. IP6 = '(((([01]? d?\\d)|(2[0-5]{2}))\\.){3}(([01]?\\d?\\d)|(2[0-5]{2})))|(([A-F0-9]){4}(:|::)){1,7}(([A-F0-9]){4})'
  60. SSN = '(\d{3}\-\d{2}\-\d{3})|(\d{3}\s\d{2}\s\d{3})'
  61. EMAIL = '([a-zA-Z0-9\.-_]+@)([a-zA-Z0-9-]+\.)(com|net|biz|cat|aero|asia|coop|info|int|jobs|mobi|museum|name|org|post|pre|tel|travel|xxx|edu|gov|mil|br|cc|ca|uk|ch|co|cx|de|fr|hk|jp|kr|nl|nr|ru|tk|ws|tw)\W'
  62. CCN = '\d{4}\s\d{4}\s\d{4}\s\d{2,4}|\d{4}\-\d{4}\-\d{4}\-\d{2,4}'
  63. TWITTER = '(?<=^|(?<=[^a-zA-Z0-9-_\.]))(@)([A-Za-z]+[A-Za-z0-9]+)'
  64. PHONE = ''
  65. NTLM = ''
  66. WDIR = '[a-zA-Z]\:\\\\.+'
  67. DOC = '\W([\w-]+\.)(docx|doc|csv|pdf|xlsx|xls|rtf|txt|pptx|ppt)'
  68. EXE = '\W([\w-]+\.)(exe|dll)'
  69. ZIP = '\W([\w-]+\.)(zip|zipx|7z|rar|tar|gz)'
  70. IMG = '\W([\w-]+\.)(jpeg|jpg|gif|png|tiff|bmp)'
  71. FLASH = '\W([\w-]+\.)(flv|swf)'
  72. MYSQL = '\*[a-fA-F0-9]{40}'
  73. WP = '\$P\$\w{31}'
  74. CISCO5 = ''
  75. CISCO7 = ''
  76.  
  77. listTypes = [   ('MD5',MD5),
  78.         ('SHA1',SHA1),
  79.             ('SHA256',SHA256),
  80.             ('MYSQL', MYSQL),
  81.                 ('WP', WP),
  82.             ('DOMAIN', DOMAIN),
  83.             ('URL', URL),
  84.                 ('EMAIL',EMAIL),
  85.             ('TWITTER', TWITTER),
  86.             ('IP4',IP4),
  87.             ('IP6',IP6),
  88.             ('DOC', DOC),
  89.             ('EXE', EXE),
  90.             ('ZIP', ZIP),
  91.             ('IMG', IMG),
  92.             ('FLASH', FLASH),
  93.             ('WDIR', WDIR),  
  94.             ('SSN', SSN),
  95.             ('CCN',CCN)]
  96.  
  97. # Determining what type of data the user wants and setting the regex to the regVal variable for that data type
  98. if args.type:
  99.     for t in listTypes:
  100.         if args.type.upper() == t[0]:
  101.             regVal = t[1]
  102. # If summarry or custom regex option is selected pass to later functions
  103. elif args.summary == True:
  104.     pass
  105. elif args.regex != None:
  106.     pass
  107.  
  108. # If the user wants to set a custom regex, it is collected here and added to the regVal variable.
  109. if args.regex:
  110.     regVal = str(args.regex)
  111.  
  112. # If the user does not give us a file or url to scrape show help and exit.
  113. if args.url == None and args.file == None:
  114.     parser.print_help()
  115.     sys.exit()
  116.  
  117. # If the user wants to output the results to a file this will collect the name of the file and redirect all sys.stdout to that file
  118. if args.output:
  119.     oFile = args.output
  120.     print '[+] Printing results to file:', args.output
  121.     o = open(oFile, "w")
  122.     sys.stdout = o
  123.  
  124. # If the target to scrape is a file open the file create a string for each line, regex the string for the data type specified by the regVal, and put results in a list.
  125. if args.file:
  126.     if args.summary == True:
  127.         iFile = args.file
  128.         fileImport =open(iFile)
  129.         strFile=''
  130.         print '[*] Summary of files types for: ' + iFile        
  131.         for line in fileImport:
  132.             strFile += line
  133.         for i in listTypes:
  134.             regVal = i[1]
  135.             regexValue = re.compile(regVal)
  136.             regexSearch = re.findall(regexValue,strFile)
  137.             listResults = []
  138.             for j in regexSearch:
  139.                 listResults.append(j)
  140.             #for i in tup in
  141.             listResults = list(set(listResults))
  142.             for k in listResults:
  143.                 ''.join(k)
  144.             print '[+] ' + i[0] + ': ' + str(len(listResults))
  145.         sys.exit()  
  146.     else:
  147.         iFile = args.file
  148.         fileImport =open(iFile)
  149.         strFile=''
  150.         for line in fileImport:
  151.             strFile += line    
  152.         #print strFile
  153.         regexValue = re.compile(regVal)
  154.         regexSearch = re.findall(regexValue,strFile)
  155.         for i in regexSearch:
  156.             listResults.append(i)
  157.  
  158. # If the target to scrape is a url conect to and get content from the url, create a string out of the content, regex the string for the data type specified by the regVal, and put results in a list.    
  159. if args.url:
  160.     if args.summary == True:
  161.         url = args.url
  162.         h = httplib2.Http(".cache")
  163.         resp, content = h.request((url), "GET")
  164.         contentString = (str(content))
  165.         print '[*] Summary of files types for: ' + url
  166.         for i in listTypes:
  167.             regVal = i[1]
  168.             regexValue = re.compile(regVal)
  169.             regexSearch = re.findall(regexValue,contentString)
  170.             listResults = []
  171.             for j in regexSearch:
  172.                 listResults.append(j)
  173.             #for i in tup in
  174.             listResults = list(set(listResults))
  175.             for k in listResults:
  176.                 ''.join(k)
  177.             print '[+] ' + i[0] + ': ' + str(len(listResults))
  178.         sys.exit()
  179.     else:
  180.         url = args.url
  181.         h = httplib2.Http(".cache")
  182.         resp, content = h.request((url), "GET")
  183.         contentString = (str(content))
  184.         regexValue = re.compile(regVal)
  185.         regexSearch = re.findall(regexValue,contentString)
  186.         for i in regexSearch:
  187.             listResults.append(i)
  188.  
  189. if regVal == '':
  190.     print '[-] ' + str(args.type) + ' is not a valid type. \nCurrent valid types are ' + dTypes
  191.     sys.exit()
  192.    
  193. # Remove duplicates from the list and print
  194. listResults = list(set(listResults))  
  195. for i in listResults:
  196.     print ''.join(i)
  197.  
  198. if __name__ == '__main__':
  199.     pass
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement