Advertisement
opexxx

magic.py

Mar 30th, 2015
406
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 9.33 KB | None | 0 0
  1. """
  2. magic is a wrapper around the libmagic file identification library.
  3.  
  4. See README for more information.
  5.  
  6. Usage:
  7.  
  8. >>> import magic
  9. >>> magic.from_file("testdata/test.pdf")
  10. 'PDF document, version 1.2'
  11. >>> magic.from_file("testdata/test.pdf", mime=True)
  12. 'application/pdf'
  13. >>> magic.from_buffer(open("testdata/test.pdf").read(1024))
  14. 'PDF document, version 1.2'
  15. >>>
  16.  
  17.  
  18. """
  19.  
  20. import sys
  21. import glob
  22. import os.path
  23. import ctypes
  24. import ctypes.util
  25. import threading
  26.  
  27. from ctypes import c_char_p, c_int, c_size_t, c_void_p
  28.  
  29. class MagicException(Exception): pass
  30.  
  31. class Magic:
  32.     """
  33.    Magic is a wrapper around the libmagic C library.
  34.  
  35.    """
  36.  
  37.     def __init__(self, mime=False, magic_file=None, mime_encoding=False,
  38.                  keep_going=False, uncompress=False):
  39.         """
  40.        Create a new libmagic wrapper.
  41.  
  42.        mime - if True, mimetypes are returned instead of textual descriptions
  43.        mime_encoding - if True, codec is returned
  44.        magic_file - use a mime database other than the system default
  45.        keep_going - don't stop at the first match, keep going
  46.        uncompress - Try to look inside compressed files.
  47.        """
  48.         self.flags = MAGIC_NONE
  49.         if mime:
  50.             self.flags |= MAGIC_MIME
  51.         elif mime_encoding:
  52.             self.flags |= MAGIC_MIME_ENCODING
  53.         if keep_going:
  54.             self.flags |= MAGIC_CONTINUE
  55.  
  56.         if uncompress:
  57.             self.flags |= MAGIC_COMPRESS
  58.  
  59.         self.cookie = magic_open(self.flags)
  60.  
  61.         magic_load(self.cookie, magic_file)
  62.  
  63.         self.thread = threading.currentThread()
  64.  
  65.     def from_buffer(self, buf):
  66.         """
  67.        Identify the contents of `buf`
  68.        """
  69.         self._thread_check()
  70.         try:
  71.             return magic_buffer(self.cookie, buf)
  72.         except MagicException as e:
  73.             return self._handle509Bug(e)
  74.  
  75.     def from_file(self, filename):
  76.         """
  77.        Identify the contents of file `filename`
  78.        raises IOError if the file does not exist
  79.        """
  80.         self._thread_check()
  81.         if not os.path.exists(filename):
  82.             raise IOError("File does not exist: " + filename)
  83.         try:
  84.             return magic_file(self.cookie, filename)
  85.         except MagicException as e:
  86.             return self._handle509Bug(e)
  87.  
  88.     def _handle509Bug(self, e):
  89.         # libmagic 5.09 has a bug where it might fail to identify the
  90.         # mimetype of a file and returns null from magic_file (and
  91.         # likely _buffer), but also does not return an error message.
  92.         if e.message is None and (self.flags & MAGIC_MIME):
  93.             return "application/octet-stream"
  94.  
  95.     def _thread_check(self):
  96.         if self.thread != threading.currentThread():
  97.             raise Exception('attempting to use libmagic on multiple threads will '
  98.                             'end in SEGV.  Prefer to use the module functions '
  99.                             'from_file or from_buffer, or carefully manage direct '
  100.                             'use of the Magic class')
  101.  
  102.     def __del__(self):
  103.         # no _thread_check here because there can be no other
  104.         # references to this object at this point.
  105.  
  106.         # during shutdown magic_close may have been cleared already so
  107.         # make sure it exists before using it.
  108.  
  109.         # the self.cookie check should be unnecessary and was an
  110.         # incorrect fix for a threading problem, however I'm leaving
  111.         # it in because it's harmless and I'm slightly afraid to
  112.         # remove it.
  113.         if self.cookie and magic_close:
  114.             magic_close(self.cookie)
  115.             self.cookie = None
  116.  
  117.  
  118. instances = threading.local()
  119.  
  120. def _get_magic_type(mime):
  121.     i = instances.__dict__.get(mime)
  122.     if i is None:
  123.         i = instances.__dict__[mime] = Magic(mime=mime)
  124.     return i
  125.  
  126. def from_file(filename, mime=False):
  127.     """"
  128.    Accepts a filename and returns the detected filetype.  Return
  129.    value is the mimetype if mime=True, otherwise a human readable
  130.    name.
  131.  
  132.    >>> magic.from_file("testdata/test.pdf", mime=True)
  133.    'application/pdf'
  134.    """
  135.     m = _get_magic_type(mime)
  136.     return m.from_file(filename)
  137.  
  138. def from_buffer(buffer, mime=False):
  139.     """
  140.    Accepts a binary string and returns the detected filetype.  Return
  141.    value is the mimetype if mime=True, otherwise a human readable
  142.    name.
  143.  
  144.    >>> magic.from_buffer(open("testdata/test.pdf").read(1024))
  145.    'PDF document, version 1.2'
  146.    """
  147.     m = _get_magic_type(mime)
  148.     return m.from_buffer(buffer)
  149.  
  150.  
  151.  
  152.  
  153. libmagic = None
  154. # Let's try to find magic or magic1
  155. dll = ctypes.util.find_library('magic') or ctypes.util.find_library('magic1') or ctypes.util.find_library('cygmagic-1')
  156.  
  157. # This is necessary because find_library returns None if it doesn't find the library
  158. if dll:
  159.     libmagic = ctypes.CDLL(dll)
  160.  
  161. if not libmagic or not libmagic._name:
  162.     windows_dlls = ['magic1.dll','cygmagic-1.dll']
  163.     platform_to_lib = {'darwin': ['/opt/local/lib/libmagic.dylib',
  164.                                   '/usr/local/lib/libmagic.dylib'] +
  165.                          # Assumes there will only be one version installed
  166.                          glob.glob('/usr/local/Cellar/libmagic/*/lib/libmagic.dylib'),
  167.                        'win32': windows_dlls,
  168.                        'cygwin': windows_dlls }
  169.     for dll in platform_to_lib.get(sys.platform, []):
  170.         try:
  171.             libmagic = ctypes.CDLL(dll)
  172.             break
  173.         except OSError:
  174.             pass
  175.  
  176. if not libmagic or not libmagic._name:
  177.     # It is better to raise an ImportError since we are importing magic module
  178.     raise ImportError('failed to find libmagic.  Check your installation')
  179.  
  180. magic_t = ctypes.c_void_p
  181.  
  182. def errorcheck_null(result, func, args):
  183.     if result is None:
  184.         err = magic_error(args[0])
  185.         raise MagicException(err)
  186.     else:
  187.         return result
  188.  
  189. def errorcheck_negative_one(result, func, args):
  190.     if result is -1:
  191.         err = magic_error(args[0])
  192.         raise MagicException(err)
  193.     else:
  194.         return result
  195.  
  196.  
  197. def coerce_filename(filename):
  198.     if filename is None:
  199.         return None
  200.  
  201.     # ctypes will implicitly convert unicode strings to bytes with
  202.     # .encode('ascii').  If you use the filesystem encoding
  203.     # then you'll get inconsistent behavior (crashes) depending on the user's
  204.     # LANG environment variable
  205.     is_unicode = (sys.version_info[0] <= 2 and
  206.                   isinstance(filename, unicode)) or \
  207.                   (sys.version_info[0] >= 3 and
  208.                    isinstance(filename, str))
  209.     if is_unicode:
  210.         return filename.encode('utf-8')
  211.     else:
  212.         return filename
  213.  
  214. magic_open = libmagic.magic_open
  215. magic_open.restype = magic_t
  216. magic_open.argtypes = [c_int]
  217.  
  218. magic_close = libmagic.magic_close
  219. magic_close.restype = None
  220. magic_close.argtypes = [magic_t]
  221.  
  222. magic_error = libmagic.magic_error
  223. magic_error.restype = c_char_p
  224. magic_error.argtypes = [magic_t]
  225.  
  226. magic_errno = libmagic.magic_errno
  227. magic_errno.restype = c_int
  228. magic_errno.argtypes = [magic_t]
  229.  
  230. _magic_file = libmagic.magic_file
  231. _magic_file.restype = c_char_p
  232. _magic_file.argtypes = [magic_t, c_char_p]
  233. _magic_file.errcheck = errorcheck_null
  234.  
  235. def magic_file(cookie, filename):
  236.     return _magic_file(cookie, coerce_filename(filename))
  237.  
  238. _magic_buffer = libmagic.magic_buffer
  239. _magic_buffer.restype = c_char_p
  240. _magic_buffer.argtypes = [magic_t, c_void_p, c_size_t]
  241. _magic_buffer.errcheck = errorcheck_null
  242.  
  243. def magic_buffer(cookie, buf):
  244.     return _magic_buffer(cookie, buf, len(buf))
  245.  
  246.  
  247. _magic_load = libmagic.magic_load
  248. _magic_load.restype = c_int
  249. _magic_load.argtypes = [magic_t, c_char_p]
  250. _magic_load.errcheck = errorcheck_negative_one
  251.  
  252. def magic_load(cookie, filename):
  253.     return _magic_load(cookie, coerce_filename(filename))
  254.  
  255. magic_setflags = libmagic.magic_setflags
  256. magic_setflags.restype = c_int
  257. magic_setflags.argtypes = [magic_t, c_int]
  258.  
  259. magic_check = libmagic.magic_check
  260. magic_check.restype = c_int
  261. magic_check.argtypes = [magic_t, c_char_p]
  262.  
  263. magic_compile = libmagic.magic_compile
  264. magic_compile.restype = c_int
  265. magic_compile.argtypes = [magic_t, c_char_p]
  266.  
  267.  
  268.  
  269. MAGIC_NONE = 0x000000 # No flags
  270.  
  271. MAGIC_DEBUG = 0x000001 # Turn on debugging
  272.  
  273. MAGIC_SYMLINK = 0x000002 # Follow symlinks
  274.  
  275. MAGIC_COMPRESS = 0x000004 # Check inside compressed files
  276.  
  277. MAGIC_DEVICES = 0x000008 # Look at the contents of devices
  278.  
  279. MAGIC_MIME = 0x000010 # Return a mime string
  280.  
  281. MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding
  282.  
  283. MAGIC_CONTINUE = 0x000020 # Return all matches
  284.  
  285. MAGIC_CHECK = 0x000040 # Print warnings to stderr
  286.  
  287. MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit
  288.  
  289. MAGIC_RAW = 0x000100 # Don't translate unprintable chars
  290.  
  291. MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors
  292.  
  293. MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files
  294.  
  295. MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files
  296.  
  297. MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries
  298.  
  299. MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type
  300.  
  301. MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details
  302.  
  303. MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files
  304.  
  305. MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff
  306.  
  307. MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran
  308.  
  309. MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement