Advertisement
opexxx

parseusn.py

May 14th, 2014
320
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 16.45 KB | None | 0 0
  1. #!/usr/bin/env python
  2. '''
  3. ------------------------------
  4. parseusn.py
  5.  
  6. Dave Lassalle, @superponible
  7. email: dave@superponible.com
  8. ------------------------------
  9.  
  10. This is an adaptation of UsnJrnl.py at by https://code.google.com/p/parser-usnjrnl/,
  11. which is based on USNJRNL parser blog from Lance Mueller
  12.     (http://www.forensickb.com/2008/09/enscript-to-parse-usnjrnl.html)
  13.  
  14. This script will parse the entries from the $USNJRNL$J alternate data stream used by NTFS filesystem.
  15. To use the script, extract the journal using a forensic tool such as EnCase, FTK, or ProDiscover.
  16.  
  17. This is intended to be cross platform and not memory intensive.
  18.  
  19. LICENSE: MIT Open Source License (http://opensource.org/licenses/mit-license.php)
  20. '''
  21.  
  22. import struct
  23. import binascii
  24. import datetime
  25. import sys
  26. import os
  27. import time
  28. import argparse
  29.  
  30. # GLOBAL variables
  31. RECORD_HEADER = ('Timestamp', 'MFT Reference', 'MFT Sequence', 'Parent MFT Reference', 'Parent MFT Sequence', 'USN', 'Filename', 'Attributes', 'Change Type', 'Source Info')
  32.  
  33. FLAGS_LONG = {0x00:" ",
  34.               0x01:"The data in the file or directory is overwritten.",
  35.               0x02:"The file or directory was added to.",
  36.               0x04:"The file or directory was truncated.",
  37.               0x10:"Data in one or more named data streams for the file was overwritten.",
  38.               0x20:"One or more named data streams for the file were added to.",
  39.               0x40:"One or more named data streams for the file was truncated.",
  40.               0x100:"The file or directory was created for the first time.",
  41.               0x200:"The file or directory was deleted.",
  42.               0x400:"The user made a change to the file's or directory's extended attributes.",
  43.               0x800:"A change was made in the access rights to the file or directory.",
  44.               0x1000:"The file or directory was renamed and the file name in this structure is the previous name.",
  45.               0x2000:"The file or directory was renamed and the file name in this structure is the new name.",
  46.               0x4000:"A user toggled the FILE_ATTRIBUTE_NOT_CONTENT_INDEXED attribute.",
  47.               0x8000:"A user has either changed one or more file or directory attributes or one or more time stamps.",
  48.               0x10000:"An NTFS hard link was added to or removed from the file or directory",
  49.               0x20000:"The compression state of the file or directory was changed from or to compressed.",
  50.               0x40000:"The file or directory was encrypted or decrypted.",
  51.               0x80000:"The object identifier of the file or directory was changed.",
  52.               0x100000:"The reparse point contained in the file or directory was changed, or a reparse point was added to or deleted from the file or directory.",
  53.               0x200000:"A named stream has been added to or removed from the file or a named stream has been renamed.",
  54.               0x80000000:"The file or directory was closed.",
  55.              }
  56.  
  57. FLAGS_SHORT = {0x00:" ",
  58.                0x01:"data_overwritten",
  59.                0x02:"data_appended",
  60.                0x04:"data_truncated",
  61.                0x10:"ads_data_overwritten",
  62.                0x20:"ads_data_appended",
  63.                0x40:"ads_data_truncated",
  64.                0x100:"file_created",
  65.                0x200:"file_deleted",
  66.                0x400:"extended_attrib_chnaged",
  67.                0x800:"access_changed",
  68.                0x1000:"file_old_name",
  69.                0x2000:"file_new_name",
  70.                0x4000:"context_indexed_changed",
  71.                0x8000:"basic_info_changed",
  72.                0x10000:"hardlink_changed",
  73.                0x20000:"compression_changed",
  74.                0x40000:"encryption_changed",
  75.                0x80000:"objid_changed",
  76.                0x100000:"reparse_changed",
  77.                0x200000:"ads_added_or_deleted",
  78.                0x80000000:"file_closed",
  79.               }
  80.      
  81. #REM this is taken from http://msdn.microsoft.com/en-us/library/ee332330(VS.85).aspx
  82. FILE_ATTRIBUTES = {32:'ARCHIVE',
  83.                    2048:'COMPRESSED',
  84.                    64:'DEVICE',
  85.                    16:'DIRECTORY',
  86.                    16384:'ENCRYPTED',
  87.                    2:'HIDDEN',
  88.                    128:'NORMAL',
  89.                    8192:'NOT_CONTENT_INDEXED',
  90.                    4096:'OFFLINE',
  91.                    1:'READONLY',
  92.                    1024:'REPARSE_POINT',
  93.                    512:'SPARSE_FILE',
  94.                    4:'SYSTEM',
  95.                    256:'TEMPORARY',
  96.                    65536:'VIRTUAL',
  97.                   }
  98.  
  99. SOURCEINFO = {4:"The operation is modifying a file to match the contents of the same file which exists in another member of the replica set.",
  100.               2:"The operation adds a private data stream to a file or directory.",
  101.               1:"The operation provides information about a cahnge to the file or directory made by the operating system.",
  102.               0:"",
  103.              }
  104.  
  105. def main(argv):
  106.     args = cliargs()
  107.     infile = args.infilename
  108.     outfile = args.outfilename
  109.     all_records = args.all_records
  110.     flags = FLAGS_SHORT
  111.     if args.long_flags == True:
  112.         flags = FLAGS_LONG
  113.    
  114.     create_temp_file(infile)
  115.    
  116.     it = file("{}.tmp".format(infile),'rb')
  117.     if outfile is None:
  118.         ot = sys.stdout
  119.     else:
  120.         ot = file(outfile,'wb')
  121.  
  122.     if args.out_format == 'csv':
  123.         joinchar = '","'
  124.     elif args.out_format == 'tab':
  125.         joinchar = "\t"
  126.  
  127.     if args.out_format != 'body':
  128.         if args.out_format == "csv":
  129.             ot.write('"')
  130.         ot.write(joinchar.join(RECORD_HEADER))
  131.         if args.out_format == "csv":
  132.             ot.write('"')
  133.         ot.write('\n')
  134.  
  135.     if args.out_format == 'body':
  136.         all_records = True
  137.         joinchar = '|'
  138.  
  139.     position_marker = 0
  140.     go = True
  141.  
  142.     while (go == True):
  143.         try:
  144.             #Read the record size, read the next record
  145.             #sys.stderr.write("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b Offset {}".format(position_marker))
  146.  
  147.             it.seek(position_marker, os.SEEK_SET)
  148.             data = it.read(800)
  149.             if len(data) < 60:
  150.                 go = False
  151.                 continue
  152.                
  153.             recordsize = struct.unpack_from('i', data)[0]
  154.                                    
  155.             if (recordsize <0) :
  156.                 go = False          #Invalid data can create an endless loop
  157.             if (recordsize < 60):
  158.                 #Note: There are places in the test $USNJRNL$J file where there are gaps between records that are not accounted for by the record size.
  159.                 #The gaps are always 0x00 filled. If the record size is zero, move forward until the next non zero byte is found. The largest gap I found was 296 bytes.
  160.  
  161.                 gap_size = len(data.lstrip('\x00'))
  162.                 if  gap_size <1:
  163.                     break
  164.                 else:
  165.                     position_marker = position_marker + 800- gap_size
  166.                     # records are aligned at 0x0 or 0x8, so zero out least significant 3 bits
  167.                     # this is necessary if the first non-zero byte is not found at an 0x0 or 0x8 offset
  168.                     position_marker = position_marker & 0xfffffff8
  169.                     continue
  170.  
  171.             it.seek(position_marker)
  172.             data = it.read(recordsize)
  173.             try:
  174.                 usn_record = decode_USN_record(data, recordsize)
  175.             except struct.error as se:
  176.                 sys.stderr.write ("\nCannot parse {} at offset {}\n".format(data, position_marker))
  177.                 sys.stderr.write ("\nLength of data is {}\n".format(len(data)))
  178.                 position_marker = position_marker + recordsize      #Initially forgot this. A struct error would loop forever...
  179.                 continue
  180.             if usn_record == None:
  181.                 position_marker = position_marker + recordsize
  182.                 continue
  183.             usn_record = deflag_item(usn_record,flags)
  184.                            
  185.             if (all_records or "closed" in usn_record['reason'] or "old_name" in usn_record['reason']):
  186.                 #Print in appropriate format
  187.                 if args.out_format != 'body':
  188.                     fields = (usn_record['time'],
  189.                               usn_record['mft_ref'],
  190.                               usn_record['mft_ref_seq'],
  191.                               usn_record['parent_ref'],
  192.                               usn_record['parent_ref_seq'],
  193.                               usn_record['usn'],
  194.                               usn_record['filename'],
  195.                               usn_record['file_attrib'],
  196.                               usn_record['reason'],
  197.                               usn_record['sourceinfo'],
  198.                              )
  199.                 # print body file format
  200.                 else:
  201.                     atime = int(usn_record['time'].strftime("%s"))
  202.                     mtime = int(usn_record['time'].strftime("%s"))
  203.                     ctime = int(usn_record['time'].strftime("%s"))
  204.                     etime = int(usn_record['time'].strftime("%s"))
  205.                     fields = ('0',
  206.                               usn_record['filename'],
  207.                               usn_record['mft_ref'],
  208.                               '',
  209.                               '0',
  210.                               '0',
  211.                               '0',
  212.                               atime,
  213.                               mtime,
  214.                               ctime,
  215.                               etime,
  216.                              )
  217.                 try:
  218.                     if args.out_format == "csv":
  219.                         ot.write('"')
  220.                     ot.write(joinchar.join(["{}".format(a) for a in fields]))
  221.                     if args.out_format == "csv":
  222.                         ot.write('"')
  223.                     ot.write('\n')
  224.                     ot.flush()
  225.                 except IOError :
  226.                     try:
  227.                         sys.stdout.close()
  228.                     except IOError:
  229.                         pass
  230.                     try:
  231.                         sys.stderr.close()
  232.                     except IOError:
  233.                         pass
  234.  
  235.             usn_record = None
  236.             position_marker = position_marker + recordsize
  237.            
  238.         except struct.error, e:
  239.             sys.stderr.write(e.message)
  240.             go = False
  241.             sys.stderr.write( "Struct format error at Tell: {}\n".format(it.tell()))
  242.            
  243.         except:
  244.             go = False
  245.             print ("Unexpected error:", sys.exc_info()[0])
  246.             raise
  247.  
  248.     it.close()
  249.     ot.close()
  250.  
  251.     os.unlink("{}.tmp".format(infile))
  252.  
  253.     exit(0)
  254.  
  255. def cliargs():
  256.     '''Parse CLI args'''
  257.     parser = argparse.ArgumentParser(description="parseusn.py -- USN Journal Parser")
  258.     parser.add_argument('-f', '--infile', required=True, action='store', dest='infilename', help='Input filename, extracted $UsnJrnl:$J')
  259.     parser.add_argument('-m', '--mft', required=False, action='store', dest='mftfilename', help='MFT filename, doesn\'t work yet')
  260.     parser.add_argument('-o', '--outfile', required=False, action='store', dest='outfilename', help='Output filename, default to STDOUT')
  261.     parser.add_argument('-t', '--type', required=False, action='store', dest='out_format', default="csv", choices=['csv', 'tab', 'body'], help='Output format, default to CSV')
  262.     parser.add_argument('-a', '--all', required=False, action='store_true', dest='all_records', default=False, help='Print all records, not just closed records.')
  263.     parser.add_argument('-l', '--long', required=False, action='store_true', dest='long_flags', default=False, help='Print long strings for the file attribute flgas.')
  264.     args = parser.parse_args()
  265.     return args
  266.  
  267. def create_temp_file(infile):
  268.     '''$USNJRNL files can contain a large amount of leading zeros.
  269.    Create a smaller file that eliminate them.'''
  270.     it = file(infile, 'rb')
  271.     counter = 0
  272.     while (True):
  273.         data = it.read(6553600)
  274.         data = data.lstrip('\x00')
  275.         if len(data) > 0:
  276.             break
  277.     position = it.tell() - len(data)
  278.     it.seek(position)
  279.  
  280.     #replace main file with working file, then clean up
  281.     ot = file("{}.tmp".format(infile),'wb')
  282.     while (True):
  283.         data = it.read(655360)
  284.         if len(data) <655359:
  285.             ot.write(data)
  286.             break
  287.         else:
  288.             ot.write(data)
  289.    
  290.     it.close()
  291.     ot.close()
  292.     data = ''
  293.  
  294. def deflag_item(r, flags):
  295.     '''Replaces values where needed for each tuple, returns new
  296.    If flags do not exits, then return same value'''
  297.    
  298.     filename = r['filename']
  299.     # drop anything after the first double-null, some lines have garbage at the end
  300.     filename = filename[:filename.find('\x00\x00')]
  301.     #strip the extra hex zeros put in by MS encoding
  302.     r['filename'] = filename.replace('\x00', '')
  303.  
  304.     # convert 64-bit windows time to human readable date
  305.     r['time'] = conv_time(r['time'])
  306.  
  307.     try:
  308.         r['reason'] = flags[r['reason']]
  309.     except KeyError as ke:
  310.         r['reason'] = deflag_long_field(r['reason'], flags)
  311.     try:
  312.         r['sourceinfo'] = SOURCEINFO[r['sourceinfo']]
  313.     except KeyError as ke:
  314.         r['sourceinfo'] = deflag_long_field(r['sourceinfo'], SOURCEINFO)
  315.     try:
  316.         r['file_attrib'] = FILE_ATTRIBUTES[r['file_attrib']]
  317.     except KeyError as ke:
  318.         r['file_attrib'] = deflag_long_field(r['file_attrib'], FILE_ATTRIBUTES)
  319.     return r
  320.  
  321. def deflag_long_field(value, flags):
  322.     '''In the event that more than one flag is set for a field,
  323.    this will read through the flags and concatenate the values.'''
  324.     setflags = []
  325.  
  326.     keylist = sorted(flags.keys())
  327.     for i in keylist:
  328.         if i&value > 0:
  329.             setflags.append(flags[i])
  330.     return "; ".join(setflags)
  331.  
  332. def decode_USN_record(d, size):
  333.     '''Given a chunk of data and its size, parse out the fields of the USN header'''
  334.     r = {}
  335.     r['length'] = size 
  336.  
  337.     #Combine Major Minor version fields
  338.     r['major'] = struct.unpack("h",d[4:6])[0]
  339.     r['minor'] = struct.unpack("h",d[6:8])[0]
  340.     r['version'] = "{}.{}".format(r['major'], r['minor'])
  341.  
  342.     if r['major'] == 2:
  343.         r['mft_ref'] = struct.unpack("ixx",d[8:14])[0]
  344.         r['mft_ref_seq'] = struct.unpack("h",d[14:16])[0]
  345.         r['parent_ref'] = struct.unpack("ixx",d[16:22])[0]
  346.         r['parent_ref_seq'] =  struct.unpack("h",d[22:24])[0]
  347.         r['usn'] = struct.unpack("q",d[24:32])[0]
  348.         r['time'] = binascii.hexlify(struct.unpack("8s",d[39:31:-1])[0])
  349.         r['reason'] =  struct.unpack("i",d[40:44])[0]
  350.         r['sourceinfo'] = struct.unpack("i",d[44:48])[0]
  351.         r['securityid'] = struct.unpack("i",d[48:52])[0]
  352.         r['file_attrib'] = struct.unpack("i",d[52:56])[0]
  353.         r['filename_length'] = struct.unpack("h",d[56:58])[0]
  354.         r['filename_offset'] = struct.unpack("h",d[58:60])[0]
  355.         off = r['filename_offset']
  356.         length = r['filename_length']
  357.         r['filename'] =  struct.unpack("{}s".format(length),d[off:off+length])[0]
  358.     # TODO: this needs to be tested on a system with actual 3.0 records
  359.     elif r['major'] == 3:
  360.         mft_ref1, mft_ref2 = struct.unpack("<QQ",d[8:24])[0]
  361.         r['mft_ref'] = (mft_ref2 << 64) | mft_ref1
  362.         r['mft_ref_seq'] = 0
  363.         #assert u.int == r['mft_ref']
  364.         parent_ref1, parent_ref2 = struct.unpack("<QQ",d[24:40])[0]
  365.         r['parent_ref'] = (parent_ref2 << 64) | parent_ref1
  366.         r['parent_ref_seq'] = 0
  367.         #assert u.int == r['parent_ref']
  368.         r['usn'] = struct.unpack("q",d[40:48])[0]
  369.         r['time'] = binascii.hexlify(struct.unpack("8s",d[55:47:-1])[0])
  370.         r['reason'] =  struct.unpack("i",d[56:60])[0]
  371.         r['sourceinfo'] = struct.unpack("i",d[60:64])[0]
  372.         r['securityid'] = struct.unpack("i",d[64:68])[0]
  373.         r['file_attrib'] = struct.unpack("i",d[68:72])[0]
  374.         r['filename_length'] = struct.unpack("h",d[72:74])[0]
  375.         r['filename_offset'] = struct.unpack("h",d[74:76])[0]
  376.         off = r['filename_offset']
  377.         length = r['filename_length']
  378.         r['filename'] =  struct.unpack("{}s".format(length),d[off:off+length])[0]
  379.     else:
  380.         r = None
  381.     return r
  382.                
  383. def conv_time(dt):
  384.     '''convert Windows 64-bit time, passed as big endian string representation, to datetime value'''
  385.     us = int(dt,16) / 10.
  386.     return datetime.datetime(1601,1,1) + datetime.timedelta(microseconds=us)
  387.  
  388. if __name__ == '__main__':
  389.     main(sys.argv[1:])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement