Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- # Copyright 2013 The Plaso Project Authors.
- # Please see the AUTHORS file for details on individual authors.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- """Parser for the NTFS USN change journal.
- More information about the format can be found here:
- http://msdn.microsoft.com/en-us/library/windows/desktop/aa365720%28v=vs.85%29
- .aspx
- """
- import struct
- from plaso.lib import event
- from plaso.lib import eventdata
- from plaso.lib import parser
- from plaso.lib import errors
- from plaso.lib import timelib
- from functools import partial
- class UsnJrnlEvent(event.TimestampEvent):
- """ USN change journal record / event """
- DATA_TYPE = 'windows:metadata:usnjrnl'
- def __init__(
- self, target_file, majorvers, minorvers, mft, parentmft, usn, reasons,
- attributes, securityid, timestamp, timestamptype
- ) :
- super(UsnJrnlEvent, self).__init__(timestamp, timestamptype,
- 'windows:metadata:usnjrnl')
- self.data_type = 'windows:metadata:usnjrnl'
- self.target_file = target_file
- self.majorvers = majorvers
- self.minorvers = minorvers
- self.mft = mft
- self.parentmft = parentmft
- self.usn = usn
- self.reasons = reasons
- self.fileattrib = attributes
- self.security = securityid
- self.timestamp = timestamp
- self.timestamptype = timestamptype
- class UsnJrnlParser(parser.PlasoParser):
- """ Parses the NTFS change journal.
- The NTFS update sequence number (USN) change journal resides at
- /$Extend/$UsnJrnl. It is present in NTFS since Windows XP. The most
- relevant data is contained in the named alternate data stream $J, thus
- this parsers looks for the file /$Extend/$UsnJrnl:$J.
- Information about the journal itself will not be reconstructed.
- More details on the USN change journal can be found here:
- http://msdn.microsoft.com/en-us/library/windows/desktop/aa365720%28v=vs.85%29.aspx
- Two versions of USN change journal records exist. Major version 2 was
- used since Windows XP / Server 2003, Major version 3 was used since
- Windows 8 / Windows Server 2012.
- All records should be aligned at 64-bit boundaries.
- The two structures can be defined as follows:
- Major version 2: (Size in Byte)
- Offset Type Size Record
- 0x00 DWORD 4 RecordLength
- 0x04 WORD 2 MajorVersion;
- 0x06 WORD 2 MinorVersion;
- 0x08 DWORDLONG 8 FileReferenceNumber;
- 0x10 DWORDLONG 8 ParentFileReferenceNumber;
- 0x18 USN 8 Usn;
- 0x20 LARGE_INTEGER 8 TimeStamp;
- 0x28 DWORD 4 Reason;
- 0x2B DWORD 4 SourceInfo;
- 0x30 DWORD 4 SecurityId;
- 0x34 DWORD 4 FileAttributes;
- 0x38 WORD 2 FileNameLength;
- 0x3A WORD 2 FileNameOffset;
- 0x3C WCHAR * FileName[1];
- Major version 3: (Size in Byte)
- Offset Type Size Record
- 0x00 DWORD 4 RecordLength
- 0x04 WORD 2 MajorVersion;
- 0x06 WORD 2 MinorVersion;
- 0x08 BYTE 16 FileReferenceNumber;
- 0x18 BYTE 16 ParentFileReferenceNumber;
- 0x28 USN 8 Usn;
- 0x30 LARGE_INTEGER 8 TimeStamp;
- 0x38 DWORD 4 Reason;
- 0x3B DWORD 4 SourceInfo;
- 0x40 DWORD 4 SecurityId;
- 0x44 DWORD 4 FileAttributes;
- 0x48 WORD 2 FileNameLength;
- 0x4A WORD 2 FileNameOffset;
- 0x4C WCHAR * FileName[1];
- The major difference between the two versions is the size and type of
- the two FileReferenceNumbers. With the minor difference of the version
- number, the official reasonstrings are identical between the versions
- as well. SourceInfo and File-Attributes are version indepent.
- """
- REASONS = {
- 0x00000001 : 'The data in the file or directory is overwritten.',
- 0x00000002 : 'The file or directory is extended (added to).',
- 0x00000004 : 'The file or directory is truncated.',
- 0x00000010 : 'The data in one or more named data streams for a file is '
- 'overwritten.',
- 0x00000020 : 'The one or more named data streams for a file are extended '
- '(added to).',
- 0x00000040 : 'The one or more named data streams for a file is truncated.',
- 0x00000100 : 'The file or directory is created for the first time.',
- 0x00000200 : 'The file or directory is deleted.',
- 0x00000400 : 'The user made a change to the extended attributes of a file'
- 'or directory. These NTFS file system attributes are not '
- 'accessible to Windows-based applications.',
- 0x00000800 : 'A change is made in the access rights to a file or '
- 'directory.',
- 0x00001000 : 'The file or directory is renamed, and the file name in the '
- 'USN_RECORD structure is the previous name.',
- 0x00002000 : 'A file or directory is renamed, and the file name in the '
- 'USN_RECORD_V2 structure is the new name.',
- 0x00004000 : 'A user changes the FILE_ATTRIBUTE_NOT_CONTENT_INDEXED '
- 'attribute. That is, the user changes the file or directory '
- 'from one where content can be indexed to one where content '
- 'cannot be indexed, or vice versa. Content indexing permits '
- 'rapid searching of data by building a database of selected '
- 'content.',
- 0x00008000 : 'A user has either changed one or more file or directory '
- 'attributes (for example, the read-only, hidden, system, '
- 'archive, or sparse attribute), or one or more time '
- 'stamps.',
- 0x00010000 : 'An NTFS file system hard link is added to or removed from '
- 'the file or directory. An NTFS file system hard link, '
- 'similar to a POSIX hard link, is one of several directory '
- 'entries that see the same file or directory.',
- 0x00020000 : 'The compression state of the file or directory is changed '
- 'drom or to compressed.',
- 0x00040000 : 'The file or directory is encrypted or decrypted.',
- 0x00080000 : 'The object identifier of a file or directory is changed.',
- 0x00100000 : 'The reparse point that is contained in a file or directory '
- 'is changed, or a reparse point is added to or deleted from a '
- 'file or directory.',
- 0x00200000 : 'A named stream is added to or removed from a file, or a named'
- 'stream is renamed.',
- 0x80000000 : 'The file or directory is closed.'
- }
- SOURCES = {
- 0x00000002 : 'USN_SOURCE_AUXILIARY_DATA',
- 0x00000001 : 'USN_SOURCE_DATA_MANAGEMENT',
- 0x00000004 : 'USN_SOURCE_REPLICATION_MANAGEMENT'
- }
- # See http://msdn.microsoft.com/en-us/library/ee332330%28VS.85%29.aspx
- ATTRIBUTES = {
- 1:'FILE_ATTRIBUTE_READONLY',
- 2:'FILE_ATTRIBUTE_HIDDEN',
- 4:'FILE_ATTRIBUTE_SYSTEM',
- 16:'FILE_ATTRIBUTE_DIRECTORY',
- 32:'FILE_ATTRIBUTE_ARCHIVE',
- 64:'FILE_ATTRIBUTE_DEVICE',
- 128:'FILE_ATTRIBUTE_NORMAL',
- 256:'FILE_ATTRIBUTE_TEMPORARY',
- 512:'FILE_ATTRIBUTE_SPARSE_FILE',
- 1024:'FILE_ATTRIBUTE_REPARSE_POINT',
- 2048:'FILE_ATTRIBUTE_COMPRESSED',
- 4096:'FILE_ATTRIBUTE_OFFLINE',
- 8192:'FILE_ATTRIBUTE_NOT_CONTENT_INDEXED',
- 16384:'FILE_ATTRIBUTE_ENCRYPTED',
- 65536:'FILE_ATTRIBUTE_VIRTUAL'
- }
- def Parse(self, file_object):
- """ Verifies the requested file as change journal and returns the parsed
- events.
- As the journal has no magic bytes or unique recognizable byte patters,
- verification is done by checking the filename for $UsnJrnl and $J.
- Args:
- file_object: A filehandle/file-like-object that is seekable to the
- file needed to be checked.
- Raises:
- UnableToParseFile when the file has the wrong name or cannot be
- parsed """
- # Check the given filename ( *$usnjrnl*$J ) at least basically
- try:
- name = file_object.name.lower()
- if not name.endswith(u'$j') or not u'$usnjrnl' in name:
- raise errors.UnableToParseFile(u'[%s] file %s not named *$UsnJrnl*$J'
- % (self.parser_name, file_object.name))
- except UnicodeEncodeError as error:
- raise errors.UnableToParseFile(u'[%s] unable to read name of file %s: %s'
- % (self.parser_name, file_object.name,error))
- res = self.Scan(file_object)
- return res
- def Scan(self, file_object):
- """ Parses and returns change journal records from the given file.
- Args:
- file_object: A filehandle/file-like-object that is seekable to the
- file needed to be checked.
- Raises:
- UnableToParseFile when the file has the wrong name or cannot be
- parsed """
- try:
- offset = self.ReadSparseOffset(file_object)
- resultset = self.OffsetParse(file_object, offset)
- return resultset
- except Exception as error:
- raise errors.UnableToParseFile(u'[%s] Exception with scan %s: %s'
- % (self.parser_name, file_object,error))
- def ReadSparseOffset(self, file_object):
- """Reads file_object and determines the offset of the first non-zero
- byte.
- Reads the file_object in 1MB chunks, reading from the left and
- reading till the first non-zero byte is reached. This is determined to
- be the offset and returned.
- Args:
- file_object: A filehandle/file-like-object that is seekable to the
- file needed to be checked."""
- # $UsnJrnl may contain lots of leading zeros. Try to skip them
- # fast by reading 1MB chunks and stripping zeros
- chunksize = 1024*1024*1024
- l = 0
- for chunk in iter(partial(file_object.read, chunksize), ''):
- chunk = chunk.lstrip('\x00')
- l = len(chunk)
- if l > 0:
- break
- # The offset is the current file position minus the rest of the
- # current chunk
- offset = file_object.tell() - l
- return offset
- def OffsetParse(self, file_object, offset):
- """ Parses filesystem journal from file_object, omitting 'offset' bytes.
- $UsnJrnl:$J often contains a large number of leading zeroes, the
- offset can be specified in order to skip over them. """
- try:
- file_object.seek(offset)
- except Exception as error:
- raise errors.UnableToParseFile(u'[%s] unable to seek offset %i in file'
- '%s: %s' % (offset, self.parser_name, file_object.name,error))
- # seek till non-sparse
- # parse
- while ( True ) :
- try:
- entry = self.readEntry(file_object)
- yield UsnJrnlEvent(entry[0], entry[1], entry[2], entry[3], entry[4],
- entry[5], entry[6], entry[7], entry[8], entry[9],
- entry[10])
- except EndOfFileError as error:
- break
- except SparseError as error:
- # Try to jump over sparse parts
- # preoff = file_object.tell()
- offset = self.ReadSparseOffset(file_object)
- # We only want to skip to 64-bit boundaries
- offset = offset - (offset % 8)
- file_object.seek(offset)
- def readEntry(self, file_object):
- currentOffset = file_object.tell()
- # Read record size and version numbers only
- data = file_object.read(0x08)
- if len(data) < 0x08 :
- # end of file
- raise EndOfFileError("Reached end of file at offset %i" % currentOffset)
- try:
- formatstring = 'IHH'
- sdata = struct.unpack_from(formatstring, data)
- except struct.error as error:
- raise error
- recordsize = sdata[0]
- if recordsize == 0:
- file_object.seek(currentOffset+4)
- raise SparseError('Reached 0 Byte in recordsize at offset %i - sparse'
- ' block?' % currentOffset)
- majorversion = sdata[1]
- minorversion = sdata[2]
- #Depending on the version, we need to read 52 or 68 bytes with slight
- #differences in the formatstring. Everything else is identical.
- if majorversion == 2 :
- rsize = 0x34
- formatstring = 'QQQqIIIIHH'
- else :
- rsize = 0x44
- formatstring = '16B16BQqIIIIHH'
- # Read and parse the rest of the non-variable record
- try:
- data = file_object.read(rsize)
- if len(data) < rsize :
- raise EndOfFileError("Unexpectetly reached end of file at offset %i" %
- currentOffset)
- sdata = struct.unpack_from(formatstring, data)
- except struct.error as error:
- raise error
- mftref = sdata[0]
- mftparentref = sdata[1]
- usn = sdata[2]
- timestamp = sdata[3]
- reasonID = sdata[4]
- sourceID = sdata[5]
- securityID = sdata[6]
- fileattrib = sdata[7]
- sizefilename = sdata[8]
- # provided for completeness
- # pylint: disable-msg=unused-variable
- offset = sdata[9]
- # And now read the filename
- try:
- data = file_object.read(sizefilename)
- if len(data) < sizefilename :
- raise EndOfFileError("Unexpectetly reached end of file at offset %i" % currentOffset)
- # formatstring2 = '%is' % (sizefilename)
- # sdata = struct.unpack_from(formatstring2, data)
- # filename = sdata[0].decode('utf-16')
- filename = data.decode('utf-16')
- padding = recordsize - (rsize+8) - sizefilename
- # Jump over Padding
- data = file_object.read(padding)
- except struct.error as error:
- raise error
- # Reasons, Sources and file attributes are (in essence) bit-arrays, so
- # they can be read by checking which attribute-bits are set.
- reasons = u''
- reasonlist = self.REASONS.keys()
- for r in reasonlist:
- if r & reasonID > 0:
- if len(reasons) > 0:
- reasons = reasons + ' ' + self.REASONS[r]
- else:
- reasons = self.REASONS[r]
- sources = u''
- sourcelist = self.SOURCES.keys()
- for s in sourcelist:
- if s & sourceID > 0:
- if len(sources) > 0:
- sources = self.SOURCES[s]
- else:
- sources = sources + ', ' + self.SOURCES[s]
- attributes = u''
- attributelist = self.ATTRIBUTES.keys()
- for a in attributelist:
- if a & fileattrib > 0:
- if len(attributes) > 0:
- attributes = attributes + ', ' + self.ATTRIBUTES[a]
- else:
- attributes = self.ATTRIBUTES[a]
- # The timestamp is given in the NTFS filetime format
- timestamp = timelib.Timestamp.FromFiletime(timestamp)
- # Event needs a type, so try to specify which reason maps to which type
- MODIFICATION_TIME_LIST = [ 0x01, 0x02, 0x04, 0x10, 0x20, 0x40, 0x10000,
- 0x40000, 0x200000 ]
- CREATION_TIME_LIST = [ 0x100, 0x2000 ]
- DELETED_TIME_LIST = [ 0x200, 0x1000 ]
- CHANGE_TIME_LIST = [ 0x400, 0x800, 0x4000, 0x8000, 0x20000, 0x80000,
- 0x100000 ]
- # Provided for completeness
- # pylint: disable-msg=unused-variable
- ACCESS_TIME_LIST = [ 0x80000000 ]
- timestamptype = eventdata.EventTimestamp.ACCESS_TIME
- for m in MODIFICATION_TIME_LIST:
- if reasonID & m > 0:
- timestamptype = eventdata.EventTimestamp.MODIFICATION_TIME
- break
- for m in CHANGE_TIME_LIST:
- if reasonID & m > 0:
- timestamptype = eventdata.EventTimestamp.CHANGE_TIME
- break
- for m in CREATION_TIME_LIST:
- if reasonID & m > 0:
- timestamptype = eventdata.EventTimestamp.CREATION_TIME
- break
- for m in DELETED_TIME_LIST:
- if reasonID & m > 0:
- timestamptype = eventdata.EventTimestamp.DELETED_TIME
- break
- return [filename, majorversion, minorversion, mftref,
- mftparentref, usn, reasons, attributes, securityID,
- timestamp, timestamptype]
- class EndOfFileError(Exception):
- def __init__(self, value):
- Exception.__init__(self, value)
- self.value = value
- def __str__(self):
- return repr(self.value)
- class SparseError(Exception):
- def __init__(self, value):
- Exception.__init__(self, value)
- self.value = value
- def __str__(self):
- return repr(self.value)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement