Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import sys
- import array
- import os
- # Usage message if arguments incomplete
- if len( sys.argv ) < 3:
- print "Usage: %s disk-image directory" % sys.argv[ 0 ]
- sys.exit( 0 )
- # Deal with arguments
- disk_image = sys.argv[ 1 ]
- directory = sys.argv[ 2 ]
- pattern = directory + "/%05d.jpg"
- # Create the output directory if doesn't exist
- if not os.path.exists( directory ):
- os.makedirs( directory )
- # Open the disk image
- fd = open( disk_image, "rb" )
- # Create an array to hold the bytes read
- buffer = array.array( "B" )
- # Holds the number of images found
- found = 0
- # Start and End of image constants
- SOI = chr( 0xff ) + chr( 0xd8 ) + chr( 0xff )
- EOI = chr( 0xff ) + chr( 0xd9 )
- while True:
- # Keep track of where we are in the file (debug mostly)
- # NB: buffer might not be empty here - see below - hence real offset is
- # current position minus the length of the buffer
- offset = fd.tell( ) - len( buffer )
- # Add 1K of data to the array
- buffer.fromfile( fd, 1024 )
- if len( buffer ) == 0: break
- # Ensure that we don't miss the marker or length - keep adding until we
- # are sure we won't run out of bytes during the parsing of the current buffer
- while buffer[ -2 ] == 0xff or buffer[ -1 ] == 0xff:
- buffer.fromfile( fd, 4 )
- # Determine if and where the array contains a start of image
- # NB: It could possibly contain multiple SOI's - we'll deal with that below
- soi = buffer.tostring( ).find( SOI )
- if soi != -1:
- # Determine the absolute soi (for diagnostics)
- abs_soi = offset + soi
- # There is definitely nothing of interest before the SOI, so discard what comes before
- buffer = buffer[ soi : ]
- # Current eoi is immediately after the soi
- eoi = 2
- # Keep reading while the current eoi is a valid jpeg section (starting with 0xff) and
- # not followed immediately by 0xd9
- while buffer[ eoi ] == 0xff and buffer[ eoi + 1 ] != 0xd9:
- # All sections apart from the data (0xda) should follow with a length
- if buffer[ eoi + 1 ] != 0xda:
- # There must be a 2 byte length following the JPEG marker here
- if len( buffer ) < eoi + 4:
- buffer.fromfile( fd, eoi + 4 - len( buffer ) )
- # Calculate the size of the section plus the following marker and length
- collect = buffer[ eoi + 2 ] * 256 + buffer[ eoi + 3 ] + 4
- # Calculate number of bytes not in the buffer already and fetch them
- remainder = collect - len( buffer ) + eoi
- if remainder > 0:
- buffer.fromfile( fd, remainder )
- # Current eoi should point at the start of a JPEG section (0xff)
- eoi += collect - 2
- else:
- # Don't like this - assumes a SOX (0xff 0xda) will be followed by an EOI eventually..
- while True:
- buffer.fromfile( fd, 1024 )
- e = buffer[ eoi : ].tostring( ).find( EOI )
- if e != -1:
- eoi += e
- break
- else:
- # Just in case the EOI marker falls between the last bytes read and the following
- eoi = len( buffer ) - 1
- # Ensure that we don't encounter an inner SOI here and that we are pointing at a 0xff value
- if buffer[ eoi ] != 0xff or buffer[ eoi + 1 ] == 0xd8: break
- # Determine if a valid image has been detected (SOI ... EOI)
- if buffer[ eoi ] == 0xff and buffer[ eoi + 1 ] == 0xd9:
- # Report and save the image
- print found, ": found at", abs_soi, eoi + 2
- buffer[ : eoi + 2 ].tofile( open( pattern % found, "wb" ) )
- found += 1
- # Retain everything after the image
- buffer = buffer[ eoi + 2 : ]
- else:
- # Discard all but the first 2 bytes (SOI) in case another SOI exists in the buffer
- print "false positive at", abs_soi, "of", eoi + 2
- buffer = buffer[ 2 : ]
- else:
- # Definitely nothing so empty the buffer and continue
- buffer = buffer[ 0 : 0 ]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement