Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- '''
- ------------------------------
- parseusn.py
- Dave Lassalle, @superponible
- email: dave@superponible.com
- ------------------------------
- This is an adaptation of UsnJrnl.py at by https://code.google.com/p/parser-usnjrnl/,
- which is based on USNJRNL parser blog from Lance Mueller
- (http://www.forensickb.com/2008/09/enscript-to-parse-usnjrnl.html)
- This script will parse the entries from the $USNJRNL$J alternate data stream used by NTFS filesystem.
- To use the script, extract the journal using a forensic tool such as EnCase, FTK, or ProDiscover.
- This is intended to be cross platform and not memory intensive.
- LICENSE: MIT Open Source License (http://opensource.org/licenses/mit-license.php)
- '''
- import struct
- import binascii
- import datetime
- import sys
- import os
- import time
- import argparse
- # GLOBAL variables
- RECORD_HEADER = ('Timestamp', 'MFT Reference', 'MFT Sequence', 'Parent MFT Reference', 'Parent MFT Sequence', 'USN', 'Filename', 'Attributes', 'Change Type', 'Source Info')
- FLAGS_LONG = {0x00:" ",
- 0x01:"The data in the file or directory is overwritten.",
- 0x02:"The file or directory was added to.",
- 0x04:"The file or directory was truncated.",
- 0x10:"Data in one or more named data streams for the file was overwritten.",
- 0x20:"One or more named data streams for the file were added to.",
- 0x40:"One or more named data streams for the file was truncated.",
- 0x100:"The file or directory was created for the first time.",
- 0x200:"The file or directory was deleted.",
- 0x400:"The user made a change to the file's or directory's extended attributes.",
- 0x800:"A change was made in the access rights to the file or directory.",
- 0x1000:"The file or directory was renamed and the file name in this structure is the previous name.",
- 0x2000:"The file or directory was renamed and the file name in this structure is the new name.",
- 0x4000:"A user toggled the FILE_ATTRIBUTE_NOT_CONTENT_INDEXED attribute.",
- 0x8000:"A user has either changed one or more file or directory attributes or one or more time stamps.",
- 0x10000:"An NTFS hard link was added to or removed from the file or directory",
- 0x20000:"The compression state of the file or directory was changed from or to compressed.",
- 0x40000:"The file or directory was encrypted or decrypted.",
- 0x80000:"The object identifier of the file or directory was changed.",
- 0x100000:"The reparse point contained in the file or directory was changed, or a reparse point was added to or deleted from the file or directory.",
- 0x200000:"A named stream has been added to or removed from the file or a named stream has been renamed.",
- 0x80000000:"The file or directory was closed.",
- }
- FLAGS_SHORT = {0x00:" ",
- 0x01:"data_overwritten",
- 0x02:"data_appended",
- 0x04:"data_truncated",
- 0x10:"ads_data_overwritten",
- 0x20:"ads_data_appended",
- 0x40:"ads_data_truncated",
- 0x100:"file_created",
- 0x200:"file_deleted",
- 0x400:"extended_attrib_chnaged",
- 0x800:"access_changed",
- 0x1000:"file_old_name",
- 0x2000:"file_new_name",
- 0x4000:"context_indexed_changed",
- 0x8000:"basic_info_changed",
- 0x10000:"hardlink_changed",
- 0x20000:"compression_changed",
- 0x40000:"encryption_changed",
- 0x80000:"objid_changed",
- 0x100000:"reparse_changed",
- 0x200000:"ads_added_or_deleted",
- 0x80000000:"file_closed",
- }
- #REM this is taken from http://msdn.microsoft.com/en-us/library/ee332330(VS.85).aspx
- FILE_ATTRIBUTES = {32:'ARCHIVE',
- 2048:'COMPRESSED',
- 64:'DEVICE',
- 16:'DIRECTORY',
- 16384:'ENCRYPTED',
- 2:'HIDDEN',
- 128:'NORMAL',
- 8192:'NOT_CONTENT_INDEXED',
- 4096:'OFFLINE',
- 1:'READONLY',
- 1024:'REPARSE_POINT',
- 512:'SPARSE_FILE',
- 4:'SYSTEM',
- 256:'TEMPORARY',
- 65536:'VIRTUAL',
- }
- SOURCEINFO = {4:"The operation is modifying a file to match the contents of the same file which exists in another member of the replica set.",
- 2:"The operation adds a private data stream to a file or directory.",
- 1:"The operation provides information about a cahnge to the file or directory made by the operating system.",
- 0:"",
- }
- def main(argv):
- args = cliargs()
- infile = args.infilename
- outfile = args.outfilename
- all_records = args.all_records
- flags = FLAGS_SHORT
- if args.long_flags == True:
- flags = FLAGS_LONG
- create_temp_file(infile)
- it = file("{}.tmp".format(infile),'rb')
- if outfile is None:
- ot = sys.stdout
- else:
- ot = file(outfile,'wb')
- if args.out_format == 'csv':
- joinchar = '","'
- elif args.out_format == 'tab':
- joinchar = "\t"
- if args.out_format != 'body':
- if args.out_format == "csv":
- ot.write('"')
- ot.write(joinchar.join(RECORD_HEADER))
- if args.out_format == "csv":
- ot.write('"')
- ot.write('\n')
- if args.out_format == 'body':
- all_records = True
- joinchar = '|'
- position_marker = 0
- go = True
- while (go == True):
- try:
- #Read the record size, read the next record
- #sys.stderr.write("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b Offset {}".format(position_marker))
- it.seek(position_marker, os.SEEK_SET)
- data = it.read(800)
- if len(data) < 60:
- go = False
- continue
- recordsize = struct.unpack_from('i', data)[0]
- if (recordsize <0) :
- go = False #Invalid data can create an endless loop
- if (recordsize < 60):
- #Note: There are places in the test $USNJRNL$J file where there are gaps between records that are not accounted for by the record size.
- #The gaps are always 0x00 filled. If the record size is zero, move forward until the next non zero byte is found. The largest gap I found was 296 bytes.
- gap_size = len(data.lstrip('\x00'))
- if gap_size <1:
- break
- else:
- position_marker = position_marker + 800- gap_size
- # records are aligned at 0x0 or 0x8, so zero out least significant 3 bits
- # this is necessary if the first non-zero byte is not found at an 0x0 or 0x8 offset
- position_marker = position_marker & 0xfffffff8
- continue
- it.seek(position_marker)
- data = it.read(recordsize)
- try:
- usn_record = decode_USN_record(data, recordsize)
- except struct.error as se:
- sys.stderr.write ("\nCannot parse {} at offset {}\n".format(data, position_marker))
- sys.stderr.write ("\nLength of data is {}\n".format(len(data)))
- position_marker = position_marker + recordsize #Initially forgot this. A struct error would loop forever...
- continue
- if usn_record == None:
- position_marker = position_marker + recordsize
- continue
- usn_record = deflag_item(usn_record,flags)
- if (all_records or "closed" in usn_record['reason'] or "old_name" in usn_record['reason']):
- #Print in appropriate format
- if args.out_format != 'body':
- fields = (usn_record['time'],
- usn_record['mft_ref'],
- usn_record['mft_ref_seq'],
- usn_record['parent_ref'],
- usn_record['parent_ref_seq'],
- usn_record['usn'],
- usn_record['filename'],
- usn_record['file_attrib'],
- usn_record['reason'],
- usn_record['sourceinfo'],
- )
- # print body file format
- else:
- atime = int(usn_record['time'].strftime("%s"))
- mtime = int(usn_record['time'].strftime("%s"))
- ctime = int(usn_record['time'].strftime("%s"))
- etime = int(usn_record['time'].strftime("%s"))
- fields = ('0',
- usn_record['filename'],
- usn_record['mft_ref'],
- '',
- '0',
- '0',
- '0',
- atime,
- mtime,
- ctime,
- etime,
- )
- try:
- if args.out_format == "csv":
- ot.write('"')
- ot.write(joinchar.join(["{}".format(a) for a in fields]))
- if args.out_format == "csv":
- ot.write('"')
- ot.write('\n')
- ot.flush()
- except IOError :
- try:
- sys.stdout.close()
- except IOError:
- pass
- try:
- sys.stderr.close()
- except IOError:
- pass
- usn_record = None
- position_marker = position_marker + recordsize
- except struct.error, e:
- sys.stderr.write(e.message)
- go = False
- sys.stderr.write( "Struct format error at Tell: {}\n".format(it.tell()))
- except:
- go = False
- print ("Unexpected error:", sys.exc_info()[0])
- raise
- it.close()
- ot.close()
- os.unlink("{}.tmp".format(infile))
- exit(0)
- def cliargs():
- '''Parse CLI args'''
- parser = argparse.ArgumentParser(description="parseusn.py -- USN Journal Parser")
- parser.add_argument('-f', '--infile', required=True, action='store', dest='infilename', help='Input filename, extracted $UsnJrnl:$J')
- parser.add_argument('-m', '--mft', required=False, action='store', dest='mftfilename', help='MFT filename, doesn\'t work yet')
- parser.add_argument('-o', '--outfile', required=False, action='store', dest='outfilename', help='Output filename, default to STDOUT')
- parser.add_argument('-t', '--type', required=False, action='store', dest='out_format', default="csv", choices=['csv', 'tab', 'body'], help='Output format, default to CSV')
- parser.add_argument('-a', '--all', required=False, action='store_true', dest='all_records', default=False, help='Print all records, not just closed records.')
- parser.add_argument('-l', '--long', required=False, action='store_true', dest='long_flags', default=False, help='Print long strings for the file attribute flgas.')
- args = parser.parse_args()
- return args
- def create_temp_file(infile):
- '''$USNJRNL files can contain a large amount of leading zeros.
- Create a smaller file that eliminate them.'''
- it = file(infile, 'rb')
- counter = 0
- while (True):
- data = it.read(6553600)
- data = data.lstrip('\x00')
- if len(data) > 0:
- break
- position = it.tell() - len(data)
- it.seek(position)
- #replace main file with working file, then clean up
- ot = file("{}.tmp".format(infile),'wb')
- while (True):
- data = it.read(655360)
- if len(data) <655359:
- ot.write(data)
- break
- else:
- ot.write(data)
- it.close()
- ot.close()
- data = ''
- def deflag_item(r, flags):
- '''Replaces values where needed for each tuple, returns new
- If flags do not exits, then return same value'''
- filename = r['filename']
- # drop anything after the first double-null, some lines have garbage at the end
- filename = filename[:filename.find('\x00\x00')]
- #strip the extra hex zeros put in by MS encoding
- r['filename'] = filename.replace('\x00', '')
- # convert 64-bit windows time to human readable date
- r['time'] = conv_time(r['time'])
- try:
- r['reason'] = flags[r['reason']]
- except KeyError as ke:
- r['reason'] = deflag_long_field(r['reason'], flags)
- try:
- r['sourceinfo'] = SOURCEINFO[r['sourceinfo']]
- except KeyError as ke:
- r['sourceinfo'] = deflag_long_field(r['sourceinfo'], SOURCEINFO)
- try:
- r['file_attrib'] = FILE_ATTRIBUTES[r['file_attrib']]
- except KeyError as ke:
- r['file_attrib'] = deflag_long_field(r['file_attrib'], FILE_ATTRIBUTES)
- return r
- def deflag_long_field(value, flags):
- '''In the event that more than one flag is set for a field,
- this will read through the flags and concatenate the values.'''
- setflags = []
- keylist = sorted(flags.keys())
- for i in keylist:
- if i&value > 0:
- setflags.append(flags[i])
- return "; ".join(setflags)
- def decode_USN_record(d, size):
- '''Given a chunk of data and its size, parse out the fields of the USN header'''
- r = {}
- r['length'] = size
- #Combine Major Minor version fields
- r['major'] = struct.unpack("h",d[4:6])[0]
- r['minor'] = struct.unpack("h",d[6:8])[0]
- r['version'] = "{}.{}".format(r['major'], r['minor'])
- if r['major'] == 2:
- r['mft_ref'] = struct.unpack("ixx",d[8:14])[0]
- r['mft_ref_seq'] = struct.unpack("h",d[14:16])[0]
- r['parent_ref'] = struct.unpack("ixx",d[16:22])[0]
- r['parent_ref_seq'] = struct.unpack("h",d[22:24])[0]
- r['usn'] = struct.unpack("q",d[24:32])[0]
- r['time'] = binascii.hexlify(struct.unpack("8s",d[39:31:-1])[0])
- r['reason'] = struct.unpack("i",d[40:44])[0]
- r['sourceinfo'] = struct.unpack("i",d[44:48])[0]
- r['securityid'] = struct.unpack("i",d[48:52])[0]
- r['file_attrib'] = struct.unpack("i",d[52:56])[0]
- r['filename_length'] = struct.unpack("h",d[56:58])[0]
- r['filename_offset'] = struct.unpack("h",d[58:60])[0]
- off = r['filename_offset']
- length = r['filename_length']
- r['filename'] = struct.unpack("{}s".format(length),d[off:off+length])[0]
- # TODO: this needs to be tested on a system with actual 3.0 records
- elif r['major'] == 3:
- mft_ref1, mft_ref2 = struct.unpack("<QQ",d[8:24])[0]
- r['mft_ref'] = (mft_ref2 << 64) | mft_ref1
- r['mft_ref_seq'] = 0
- #assert u.int == r['mft_ref']
- parent_ref1, parent_ref2 = struct.unpack("<QQ",d[24:40])[0]
- r['parent_ref'] = (parent_ref2 << 64) | parent_ref1
- r['parent_ref_seq'] = 0
- #assert u.int == r['parent_ref']
- r['usn'] = struct.unpack("q",d[40:48])[0]
- r['time'] = binascii.hexlify(struct.unpack("8s",d[55:47:-1])[0])
- r['reason'] = struct.unpack("i",d[56:60])[0]
- r['sourceinfo'] = struct.unpack("i",d[60:64])[0]
- r['securityid'] = struct.unpack("i",d[64:68])[0]
- r['file_attrib'] = struct.unpack("i",d[68:72])[0]
- r['filename_length'] = struct.unpack("h",d[72:74])[0]
- r['filename_offset'] = struct.unpack("h",d[74:76])[0]
- off = r['filename_offset']
- length = r['filename_length']
- r['filename'] = struct.unpack("{}s".format(length),d[off:off+length])[0]
- else:
- r = None
- return r
- def conv_time(dt):
- '''convert Windows 64-bit time, passed as big endian string representation, to datetime value'''
- us = int(dt,16) / 10.
- return datetime.datetime(1601,1,1) + datetime.timedelta(microseconds=us)
- if __name__ == '__main__':
- main(sys.argv[1:])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement