v1ral_ITS

DTRX use every archiver easy in one python script to act on any archvied file ext

Aug 22nd, 2020 (edited)
613
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 51.39 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. #
  4. # dtrx -- Intelligently extract various archive types.
  5. #
  6. # This program is free software; you can redistribute it and/or modify it
  7. # under the terms of the GNU General Public License as published by the
  8. # Free Software Foundation; either version 3 of the License, or (at your
  9. # option) any later version.
  10. #
  11. # This program is distributed in the hope that it will be useful, but
  12. # WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
  14. # Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License along
  17. # with this program; if not, see <http://www.gnu.org/licenses/>.
  18.  
  19. # Python 2.3 string methods: 'rfind', 'rindex', 'rjust', 'rstrip'
  20.  
  21. import errno
  22. import fcntl
  23. import logging
  24. import mimetypes
  25. import optparse
  26. import os
  27. import re
  28. import shutil
  29. import signal
  30. import stat
  31. import string
  32. import struct
  33. import subprocess
  34. import sys
  35. import tempfile
  36. import termios
  37. import textwrap
  38. import traceback
  39. import urlparse
  40.  
  41. try:
  42.     set
  43. except NameError:
  44.     from sets import Set as set
  45.  
  46. VERSION = "7.1"
  47. VERSION_BANNER = """dtrx version %s
  48. Copyright © 2006-2011 Brett Smith <brettcsmith@brettcsmith.org>
  49. Copyright © 2008 Peter Kelemen <Peter.Kelemen@gmail.com>
  50.  
  51. This program is free software; you can redistribute it and/or modify it
  52. under the terms of the GNU General Public License as published by the
  53. Free Software Foundation; either version 3 of the License, or (at your
  54. option) any later version.
  55.  
  56. This program is distributed in the hope that it will be useful, but
  57. WITHOUT ANY WARRANTY; without even the implied warranty of
  58. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
  59. Public License for more details.""" % (VERSION,)
  60.  
  61. MATCHING_DIRECTORY = 1
  62. ONE_ENTRY_KNOWN = 2
  63. BOMB = 3
  64. EMPTY = 4
  65. ONE_ENTRY_FILE = 'file'
  66. ONE_ENTRY_DIRECTORY = 'directory'
  67.  
  68. ONE_ENTRY_UNKNOWN = [ONE_ENTRY_FILE, ONE_ENTRY_DIRECTORY]
  69.  
  70. EXTRACT_HERE = 1
  71. EXTRACT_WRAP = 2
  72. EXTRACT_RENAME = 3
  73.  
  74. RECURSE_ALWAYS = 1
  75. RECURSE_ONCE = 2
  76. RECURSE_NOT_NOW = 3
  77. RECURSE_NEVER = 4
  78. RECURSE_LIST = 5
  79.  
  80. mimetypes.encodings_map.setdefault('.bz2', 'bzip2')
  81. mimetypes.encodings_map.setdefault('.lzma', 'lzma')
  82. mimetypes.encodings_map.setdefault('.xz', 'xz')
  83. mimetypes.encodings_map.setdefault('.lz', 'lzip')
  84. mimetypes.types_map.setdefault('.gem', 'application/x-ruby-gem')
  85.  
  86. logger = logging.getLogger('dtrx-log')
  87.  
  88. class FilenameChecker(object):
  89.     free_func = os.open
  90.     free_args = (os.O_CREAT | os.O_EXCL,)
  91.     free_close = os.close
  92.  
  93.     def __init__(self, original_name):
  94.         self.original_name = original_name
  95.  
  96.     def is_free(self, filename):
  97.         try:
  98.             result = self.free_func(filename, *self.free_args)
  99.         except OSError, error:
  100.             if error.errno == errno.EEXIST:
  101.                 return False
  102.             raise
  103.         if self.free_close:
  104.             self.free_close(result)
  105.         return True
  106.  
  107.     def create(self):
  108.         fd, filename = tempfile.mkstemp(prefix=self.original_name + '.',
  109.                                         dir='.')
  110.         os.close(fd)
  111.         return filename
  112.  
  113.     def check(self):
  114.         for suffix in [''] + ['.%s' % (x,) for x in range(1, 10)]:
  115.             filename = '%s%s' % (self.original_name, suffix)
  116.             if self.is_free(filename):
  117.                 return filename
  118.         return self.create()
  119.  
  120.  
  121. class DirectoryChecker(FilenameChecker):
  122.     free_func = os.mkdir
  123.     free_args = ()
  124.     free_close = None
  125.  
  126.     def create(self):
  127.         return tempfile.mkdtemp(prefix=self.original_name + '.', dir='.')
  128.  
  129.  
  130. class ExtractorError(Exception):
  131.     pass
  132.  
  133.  
  134. class ExtractorUnusable(Exception):
  135.     pass
  136.  
  137.  
  138. EXTRACTION_ERRORS = (ExtractorError, ExtractorUnusable, OSError, IOError)
  139.  
  140. class BaseExtractor(object):
  141.     decoders = {'bzip2': ['bzcat'], 'gzip': ['zcat'], 'compress': ['zcat'],
  142.                 'lzma': ['lzcat'], 'xz': ['xzcat'], 'lzip': ['lzip', '-cd']}
  143.     name_checker = DirectoryChecker
  144.  
  145.     def __init__(self, filename, encoding):
  146.         if encoding and (not self.decoders.has_key(encoding)):
  147.             raise ValueError("unrecognized encoding %s" % (encoding,))
  148.         self.filename = os.path.realpath(filename)
  149.         self.encoding = encoding
  150.         self.file_count = 0
  151.         self.included_archives = []
  152.         self.target = None
  153.         self.content_type = None
  154.         self.content_name = None
  155.         self.pipes = []
  156.         self.stderr = tempfile.TemporaryFile()
  157.         self.exit_codes = []
  158.         try:
  159.             self.archive = open(filename, 'r')
  160.         except (IOError, OSError), error:
  161.             raise ExtractorError("could not open %s: %s" %
  162.                                  (filename, error.strerror))
  163.         if encoding:
  164.             self.pipe(self.decoders[encoding], "decoding")
  165.         self.prepare()
  166.  
  167.     def pipe(self, command, description="extraction"):
  168.         self.pipes.append((command, description))
  169.  
  170.     def add_process(self, processes, command, stdin, stdout):
  171.         try:
  172.             processes.append(subprocess.Popen(command, stdin=stdin,
  173.                                               stdout=stdout,
  174.                                               stderr=self.stderr))
  175.         except OSError, error:
  176.             if error.errno == errno.ENOENT:
  177.                 raise ExtractorUnusable("could not run %s" % (command[0],))
  178.             raise
  179.  
  180.     def run_pipes(self, final_stdout=None):
  181.         if not self.pipes:
  182.             return
  183.         elif final_stdout is None:
  184.             final_stdout = open('/dev/null', 'w')
  185.         num_pipes = len(self.pipes)
  186.         last_pipe = num_pipes - 1
  187.         processes = []
  188.         for index, command in enumerate([pipe[0] for pipe in self.pipes]):
  189.             if index == 0:
  190.                 stdin = self.archive
  191.             else:
  192.                 stdin = processes[-1].stdout
  193.             if index == last_pipe:
  194.                 stdout = final_stdout
  195.             else:
  196.                 stdout = subprocess.PIPE
  197.             self.add_process(processes, command, stdin, stdout)
  198.         self.exit_codes = [pipe.wait() for pipe in processes]
  199.         self.archive.close()
  200.         for index in range(last_pipe):
  201.             processes[index].stdout.close()
  202.         self.archive = final_stdout
  203.  
  204.     def prepare(self):
  205.         pass
  206.  
  207.     def check_included_archives(self):
  208.         if (self.content_name is None) or (not self.content_name.endswith('/')):
  209.             self.included_root = './'
  210.         else:
  211.             self.included_root = self.content_name
  212.         start_index = len(self.included_root)
  213.         for path, dirname, filenames in os.walk(self.included_root):
  214.             self.file_count += len(filenames)
  215.             path = path[start_index:]
  216.             for filename in filenames:
  217.                 if (ExtractorBuilder.try_by_mimetype(filename) or
  218.                     ExtractorBuilder.try_by_extension(filename)):
  219.                     self.included_archives.append(os.path.join(path, filename))
  220.  
  221.     def check_contents(self):
  222.         if not self.contents:
  223.             self.content_type = EMPTY
  224.         elif len(self.contents) == 1:
  225.             if self.basename() == self.contents[0]:
  226.                 self.content_type = MATCHING_DIRECTORY
  227.             elif os.path.isdir(self.contents[0]):
  228.                 self.content_type = ONE_ENTRY_DIRECTORY
  229.             else:
  230.                 self.content_type = ONE_ENTRY_FILE
  231.             self.content_name = self.contents[0]
  232.             if os.path.isdir(self.contents[0]):
  233.                 self.content_name += '/'
  234.         else:
  235.             self.content_type = BOMB
  236.         self.check_included_archives()
  237.  
  238.     def basename(self):
  239.         pieces = os.path.basename(self.filename).split('.')
  240.         orig_len = len(pieces)
  241.         extension = '.' + pieces[-1]
  242.         # This is maybe a little more clever than it ought to be.
  243.         # We're trying to be conservative about what remove, but also DTRT
  244.         # in cases like .tar.gz, and also do something reasonable if we
  245.         # encounter some completely off-the-wall extension.  So that means:
  246.         # 1. First remove any compression extension.
  247.         # 2. Then remove any commonly known extension that remains.
  248.         # 3. If neither of those did anything, remove anything that looks
  249.         #    like it's almost certainly an extension (less than 5 chars).
  250.         if mimetypes.encodings_map.has_key(extension):
  251.             pieces.pop()
  252.             extension = '.' + pieces[-1]
  253.         if (mimetypes.types_map.has_key(extension) or
  254.             mimetypes.common_types.has_key(extension) or
  255.             mimetypes.suffix_map.has_key(extension)):
  256.             pieces.pop()
  257.         if ((orig_len == len(pieces)) and
  258.             (orig_len > 1) and (len(pieces[-1]) < 5)):
  259.             pieces.pop()
  260.         return '.'.join(pieces)
  261.  
  262.     def get_stderr(self):
  263.         self.stderr.seek(0, 0)
  264.         errors = self.stderr.read(-1)
  265.         self.stderr.close()
  266.         return errors
  267.  
  268.     def is_fatal_error(self, status):
  269.         return False
  270.  
  271.     def first_bad_exit_code(self):
  272.         for index, code in enumerate(self.exit_codes):
  273.             if code > 0:
  274.                 return index, code
  275.         return None, None
  276.  
  277.     def check_success(self, got_files):
  278.         error_index, error_code = self.first_bad_exit_code()
  279.         logger.debug("success results: %s %s %s" % (got_files, error_index,
  280.                                                     self.exit_codes))
  281.         if (self.is_fatal_error(error_code) or
  282.             ((not got_files) and (error_code is not None))):
  283.             command = ' '.join(self.pipes[error_index][0])
  284.             raise ExtractorError("%s error: '%s' returned status code %s" %
  285.                                  (self.pipes[error_index][1], command,
  286.                                   error_code))
  287.        
  288.     def extract_archive(self):
  289.         self.pipe(self.extract_pipe)
  290.         self.run_pipes()
  291.  
  292.     def extract(self):
  293.         try:
  294.             self.target = tempfile.mkdtemp(prefix='.dtrx-', dir='.')
  295.         except (OSError, IOError), error:
  296.             raise ExtractorError("cannot extract here: %s" % (error.strerror,))
  297.         old_path = os.path.realpath(os.curdir)
  298.         os.chdir(self.target)
  299.         try:
  300.             self.archive.seek(0, 0)
  301.             self.extract_archive()
  302.             self.contents = os.listdir('.')
  303.             self.check_contents()
  304.             self.check_success(self.content_type != EMPTY)
  305.         except EXTRACTION_ERRORS:
  306.             self.archive.close()
  307.             os.chdir(old_path)
  308.             shutil.rmtree(self.target, ignore_errors=True)
  309.             raise
  310.         self.archive.close()
  311.         os.chdir(old_path)
  312.  
  313.     def get_filenames(self, internal=False):
  314.         if not internal:
  315.             self.pipe(self.list_pipe, "listing")
  316.         processes = []
  317.         stdin = self.archive
  318.         for command in [pipe[0] for pipe in self.pipes]:
  319.             self.add_process(processes, command, stdin, subprocess.PIPE)
  320.             stdin = processes[-1].stdout
  321.         get_output_line = processes[-1].stdout.readline
  322.         while True:
  323.             line = get_output_line()
  324.             if not line:
  325.                 break
  326.             yield line.rstrip('\n')
  327.         self.exit_codes = [pipe.wait() for pipe in processes]
  328.         self.archive.close()
  329.         for process in processes:
  330.             process.stdout.close()
  331.         self.check_success(False)
  332.    
  333.  
  334. class CompressionExtractor(BaseExtractor):
  335.     file_type = 'compressed file'
  336.     name_checker = FilenameChecker
  337.  
  338.     def basename(self):
  339.         pieces = os.path.basename(self.filename).split('.')
  340.         extension = '.' + pieces[-1]
  341.         if mimetypes.encodings_map.has_key(extension):
  342.             pieces.pop()
  343.         return '.'.join(pieces)
  344.  
  345.     def get_filenames(self):
  346.         # This code used to just immediately yield the basename, under the
  347.         # assumption that that would be the filename.  However, if that
  348.         # happens, dtrx -l will report this as a valid result for files with
  349.         # compression extensions, even if those files shouldn't actually be
  350.         # handled this way.  So, we call out to the file command to do a quick
  351.         # check and make sure this actually looks like a compressed file.
  352.         if 'compress' not in [match[0] for match in
  353.                               ExtractorBuilder.try_by_magic(self.filename)]:
  354.             raise ExtractorError("doesn't look like a compressed file")
  355.         yield self.basename()
  356.  
  357.     def extract(self):
  358.         self.content_type = ONE_ENTRY_KNOWN
  359.         self.content_name = self.basename()
  360.         self.contents = None
  361.         self.file_count = 1
  362.         self.included_root = './'
  363.         try:
  364.             output_fd, self.target = tempfile.mkstemp(prefix='.dtrx-', dir='.')
  365.         except (OSError, IOError), error:
  366.             raise ExtractorError("cannot extract here: %s" % (error.strerror,))
  367.         self.run_pipes(output_fd)
  368.         os.close(output_fd)
  369.         try:
  370.             self.check_success(os.stat(self.target)[stat.ST_SIZE] > 0)
  371.         except EXTRACTION_ERRORS:
  372.             os.unlink(self.target)
  373.             raise
  374.  
  375.            
  376. class TarExtractor(BaseExtractor):
  377.     file_type = 'tar file'
  378.     extract_pipe = ['tar', '-x']
  379.     list_pipe = ['tar', '-t']
  380.        
  381.        
  382. class CpioExtractor(BaseExtractor):
  383.     file_type = 'cpio file'
  384.     extract_pipe = ['cpio', '-i', '--make-directories', '--quiet',
  385.                    '--no-absolute-filenames']
  386.     list_pipe = ['cpio', '-t', '--quiet']
  387.  
  388.  
  389. class RPMExtractor(CpioExtractor):
  390.     file_type = 'RPM'
  391.  
  392.     def prepare(self):
  393.         self.pipe(['rpm2cpio', '-'], "rpm2cpio")
  394.  
  395.     def basename(self):
  396.         pieces = os.path.basename(self.filename).split('.')
  397.         if len(pieces) == 1:
  398.             return pieces[0]
  399.         elif pieces[-1] != 'rpm':
  400.             return BaseExtractor.basename(self)
  401.         pieces.pop()
  402.         if len(pieces) == 1:
  403.             return pieces[0]
  404.         elif len(pieces[-1]) < 8:
  405.             pieces.pop()
  406.         return '.'.join(pieces)
  407.  
  408.     def check_contents(self):
  409.         self.check_included_archives()
  410.         self.content_type = BOMB
  411.  
  412.  
  413. class DebExtractor(TarExtractor):
  414.     file_type = 'Debian package'
  415.     data_re = re.compile(r'^data\.tar\.[a-z0-9]+$')
  416.  
  417.     def prepare(self):
  418.         self.pipe(['ar', 't', self.filename], "finding package data file")
  419.         for filename in self.get_filenames(internal=True):
  420.             if self.data_re.match(filename):
  421.                 data_filename = filename
  422.                 break
  423.         else:
  424.             raise ExtractorError(".deb contains no data.tar file")
  425.         self.archive.seek(0, 0)
  426.         self.pipes.pop()
  427.         # self.pipes = start_pipes
  428.         encoding = mimetypes.guess_type(data_filename)[1]
  429.         if not encoding:
  430.             raise ExtractorError("data.tar file has unrecognized encoding")
  431.         self.pipe(['ar', 'p', self.filename, data_filename],
  432.                   "extracting data.tar from .deb")
  433.         self.pipe(self.decoders[encoding], "decoding data.tar")
  434.  
  435.     def basename(self):
  436.         pieces = os.path.basename(self.filename).split('_')
  437.         if len(pieces) == 1:
  438.             return pieces[0]
  439.         last_piece = pieces.pop()
  440.         if (len(last_piece) > 10) or (not last_piece.endswith('.deb')):
  441.             return BaseExtractor.basename(self)
  442.         return '_'.join(pieces)
  443.  
  444.     def check_contents(self):
  445.         self.check_included_archives()
  446.         self.content_type = BOMB
  447.  
  448.  
  449. class DebMetadataExtractor(DebExtractor):
  450.     def prepare(self):
  451.         self.pipe(['ar', 'p', self.filename, 'control.tar.gz'],
  452.                   "control.tar.gz extraction")
  453.         self.pipe(['zcat'], "control.tar.gz decompression")
  454.  
  455.  
  456. class GemExtractor(TarExtractor):
  457.     file_type = 'Ruby gem'
  458.  
  459.     def prepare(self):
  460.         self.pipe(['tar', '-xO', 'data.tar.gz'], "data.tar.gz extraction")
  461.         self.pipe(['zcat'], "data.tar.gz decompression")
  462.  
  463.     def check_contents(self):
  464.         self.check_included_archives()
  465.         self.content_type = BOMB
  466.  
  467.  
  468. class GemMetadataExtractor(CompressionExtractor):
  469.     file_type = 'Ruby gem'
  470.  
  471.     def prepare(self):
  472.         self.pipe(['tar', '-xO', 'metadata.gz'], "metadata.gz extraction")
  473.         self.pipe(['zcat'], "metadata.gz decompression")
  474.  
  475.     def basename(self):
  476.         return os.path.basename(self.filename) + '-metadata.txt'
  477.  
  478.  
  479. class NoPipeExtractor(BaseExtractor):
  480.     # Some extraction tools won't accept the archive from stdin.  With
  481.     # these, the piping infrastructure we normally set up generally doesn't
  482.     # work, at least at first.  We can still use most of it; we just don't
  483.     # want to seed self.archive with the archive file, since that sucks up
  484.     # memory.  So instead we seed it with /dev/null, and specify the
  485.     # filename on the command line as necessary.  We also open the actual
  486.     # file with os.open, to make sure we can actually do it (permissions
  487.     # are good, etc.).  This class doesn't do anything by itself; it's just
  488.     # meant to be a base class for extractors that rely on these dumb
  489.     # tools.
  490.     def __init__(self, filename, encoding):
  491.         os.close(os.open(filename, os.O_RDONLY))
  492.         BaseExtractor.__init__(self, '/dev/null', None)
  493.         self.filename = os.path.realpath(filename)
  494.  
  495.     def extract_archive(self):
  496.         self.extract_pipe = self.extract_command + [self.filename]
  497.         BaseExtractor.extract_archive(self)
  498.  
  499.     def get_filenames(self):
  500.         self.list_pipe = self.list_command + [self.filename]
  501.         return BaseExtractor.get_filenames(self)
  502.  
  503.  
  504. class ZipExtractor(NoPipeExtractor):
  505.     file_type = 'Zip file'
  506.     extract_command = ['unzip', '-q']
  507.     list_command = ['zipinfo', '-1']
  508.  
  509.     def is_fatal_error(self, status):
  510.         return status > 1
  511.  
  512.  
  513. class LZHExtractor(ZipExtractor):
  514.     file_type = 'LZH file'
  515.     extract_command = ['lha', 'xq']
  516.     list_command = ['lha', 'l']
  517.  
  518.     def border_line_file_index(self, line):
  519.         last_space_index = None
  520.         for index, char in enumerate(line):
  521.             if char == ' ':
  522.                 last_space_index = index
  523.             elif char != '-':
  524.                 return None
  525.         if last_space_index is None:
  526.             return None
  527.         return last_space_index + 1
  528.  
  529.     def get_filenames(self):
  530.         filenames = NoPipeExtractor.get_filenames(self)
  531.         for line in filenames:
  532.             fn_index = self.border_line_file_index(line)
  533.             if fn_index is not None:
  534.                 break
  535.         for line in filenames:
  536.             if self.border_line_file_index(line):
  537.                 break
  538.             else:
  539.                 yield line[fn_index:]
  540.         self.archive.close()
  541.  
  542.  
  543. class SevenExtractor(NoPipeExtractor):
  544.     file_type = '7z file'
  545.     extract_command = ['7z', 'x']
  546.     list_command = ['7z', 'l']
  547.     border_re = re.compile('^[- ]+$')
  548.  
  549.     def get_filenames(self):
  550.         fn_index = None
  551.         for line in NoPipeExtractor.get_filenames(self):
  552.             if self.border_re.match(line):
  553.                 if fn_index is not None:
  554.                     break
  555.                 else:
  556.                     fn_index = string.rindex(line, ' ') + 1
  557.             elif fn_index is not None:
  558.                 yield line[fn_index:]
  559.         self.archive.close()
  560.        
  561.  
  562. class CABExtractor(NoPipeExtractor):
  563.     file_type = 'CAB archive'
  564.     extract_command = ['cabextract', '-q']
  565.     list_command = ['cabextract', '-l']
  566.     border_re = re.compile(r'^[-\+]+$')
  567.  
  568.     def get_filenames(self):
  569.         fn_index = None
  570.         filenames = NoPipeExtractor.get_filenames(self)
  571.         for line in filenames:
  572.             if self.border_re.match(line):
  573.                 break
  574.         for line in filenames:
  575.             try:
  576.                 yield line.split(' | ', 2)[2]
  577.             except IndexError:
  578.                 break
  579.         self.archive.close()
  580.  
  581.  
  582. class ShieldExtractor(NoPipeExtractor):
  583.     file_type = 'InstallShield archive'
  584.     extract_command = ['unshield', 'x']
  585.     list_command = ['unshield', 'l']
  586.     prefix_re = re.compile(r'^\s+\d+\s+')
  587.     end_re = re.compile(r'^\s+-+\s+-+\s*$')
  588.  
  589.     def get_filenames(self):
  590.         for line in NoPipeExtractor.get_filenames(self):
  591.             if self.end_re.match(line):
  592.                 break
  593.             else:
  594.                 match = self.prefix_re.match(line)
  595.                 if match:
  596.                     yield line[match.end():]
  597.         self.archive.close()
  598.  
  599.     def basename(self):
  600.         result = NoPipeExtractor.basename(self)
  601.         if result.endswith('.hdr'):
  602.             result = result[:-4]
  603.         return result
  604.  
  605.  
  606. class RarExtractor(NoPipeExtractor):
  607.     file_type = 'RAR archive'
  608.     extract_command = ['unrar', 'x']
  609.     list_command = ['unrar', 'l']
  610.     border_re = re.compile('^-+$')
  611.  
  612.     def get_filenames(self):
  613.         inside = False
  614.         for line in NoPipeExtractor.get_filenames(self):
  615.             if self.border_re.match(line):
  616.                 if inside:
  617.                     break
  618.                 else:
  619.                     inside = True
  620.             elif inside:
  621.                 yield line.split(' ')[1]
  622.         self.archive.close()
  623.  
  624.  
  625. class BaseHandler(object):
  626.     def __init__(self, extractor, options):
  627.         self.extractor = extractor
  628.         self.options = options
  629.         self.target = None
  630.  
  631.     def handle(self):
  632.         command = 'find'
  633.         status = subprocess.call(['find', self.extractor.target, '-type', 'd',
  634.                                   '-exec', 'chmod', 'u+rwx', '{}', ';'])
  635.         if status == 0:
  636.             command = 'chmod'
  637.             status = subprocess.call(['chmod', '-R', 'u+rwX',
  638.                                       self.extractor.target])
  639.         if status != 0:
  640.             return "%s returned with exit status %s" % (command, status)
  641.         return self.organize()
  642.  
  643.     def set_target(self, target, checker):
  644.         self.target = checker(target).check()
  645.         if self.target != target:
  646.             logger.warning("extracting %s to %s" %
  647.                            (self.extractor.filename, self.target))
  648.  
  649.  
  650. # The "where to extract" table, with options and archive types.
  651. # This dictates the contents of each can_handle method.
  652. #
  653. #         Flat           Overwrite            None
  654. # File    basename       basename             FilenameChecked
  655. # Match   .              .                    tempdir + checked
  656. # Bomb    .              basename             DirectoryChecked
  657.  
  658. class FlatHandler(BaseHandler):
  659.     def can_handle(contents, options):
  660.         return ((options.flat and (contents != ONE_ENTRY_KNOWN)) or
  661.                 (options.overwrite and (contents == MATCHING_DIRECTORY)))
  662.     can_handle = staticmethod(can_handle)
  663.  
  664.     def organize(self):
  665.         self.target = '.'
  666.         for curdir, dirs, filenames in os.walk(self.extractor.target,
  667.                                                topdown=False):
  668.             path_parts = curdir.split(os.sep)
  669.             if path_parts[0] == '.':
  670.                 del path_parts[1]
  671.             else:
  672.                 del path_parts[0]
  673.             newdir = os.path.join(*path_parts)
  674.             if not os.path.isdir(newdir):
  675.                 os.makedirs(newdir)
  676.             for filename in filenames:
  677.                 os.rename(os.path.join(curdir, filename),
  678.                           os.path.join(newdir, filename))
  679.             os.rmdir(curdir)
  680.  
  681.  
  682. class OverwriteHandler(BaseHandler):
  683.     def can_handle(contents, options):
  684.         return ((options.flat and (contents == ONE_ENTRY_KNOWN)) or
  685.                 (options.overwrite and (contents != MATCHING_DIRECTORY)))
  686.     can_handle = staticmethod(can_handle)
  687.  
  688.     def organize(self):
  689.         self.target = self.extractor.basename()
  690.         if os.path.isdir(self.target):
  691.             shutil.rmtree(self.target)
  692.         os.rename(self.extractor.target, self.target)
  693.        
  694.  
  695. class MatchHandler(BaseHandler):
  696.     def can_handle(contents, options):
  697.         return ((contents == MATCHING_DIRECTORY) or
  698.                 ((contents in ONE_ENTRY_UNKNOWN) and
  699.                  options.one_entry_policy.ok_for_match()))
  700.     can_handle = staticmethod(can_handle)
  701.  
  702.     def organize(self):
  703.         source = os.path.join(self.extractor.target,
  704.                               os.listdir(self.extractor.target)[0])
  705.         if os.path.isdir(source):
  706.             checker = DirectoryChecker
  707.         else:
  708.             checker = FilenameChecker
  709.         if self.options.one_entry_policy == EXTRACT_HERE:
  710.             destination = self.extractor.content_name.rstrip('/')
  711.         else:
  712.             destination = self.extractor.basename()
  713.         self.set_target(destination, checker)
  714.         if os.path.isdir(self.extractor.target):
  715.             os.rename(source, self.target)
  716.             os.rmdir(self.extractor.target)
  717.         else:
  718.             os.rename(self.extractor.target, self.target)
  719.         self.extractor.included_root = './'
  720.  
  721.  
  722. class EmptyHandler(object):
  723.     target = ''
  724.  
  725.     def can_handle(contents, options):
  726.         return contents == EMPTY
  727.     can_handle = staticmethod(can_handle)
  728.  
  729.     def __init__(self, extractor, options):
  730.         os.rmdir(extractor.target)
  731.  
  732.     def handle(self): pass
  733.  
  734.  
  735. class BombHandler(BaseHandler):
  736.     def can_handle(contents, options):
  737.         return True
  738.     can_handle = staticmethod(can_handle)
  739.  
  740.     def organize(self):
  741.         basename = self.extractor.basename()
  742.         self.set_target(basename, self.extractor.name_checker)
  743.         os.rename(self.extractor.target, self.target)
  744.  
  745.        
  746. class BasePolicy(object):
  747.     try:
  748.         size = fcntl.ioctl(sys.stdout.fileno(), termios.TIOCGWINSZ,
  749.                            struct.pack("HHHH", 0, 0, 0, 0))
  750.         width = struct.unpack("HHHH", size)[1]
  751.     except IOError:
  752.         width = 80
  753.     width = width - 1
  754.     choice_wrapper = textwrap.TextWrapper(width=width, initial_indent=' * ',
  755.                                           subsequent_indent='   ',
  756.                                           break_long_words=False)
  757.    
  758.     def __init__(self, options):
  759.         self.current_policy = None
  760.         if options.batch:
  761.             self.permanent_policy = self.answers['']
  762.         else:
  763.             self.permanent_policy = None
  764.  
  765.     def ask_question(self, question):
  766.         question = question + ["You can:"]
  767.         for choice in self.choices:
  768.             question.extend(self.choice_wrapper.wrap(choice))
  769.         while True:
  770.             print "\n".join(question)
  771.             try:
  772.                 answer = raw_input(self.prompt)
  773.             except EOFError:
  774.                 return self.answers['']
  775.             try:
  776.                 return self.answers[answer.lower()]
  777.             except KeyError:
  778.                 print
  779.  
  780.     def wrap(self, question, *args):
  781.         words = question.split()
  782.         for arg in args:
  783.             words[words.index('%s')] = arg
  784.         result = [words.pop(0)]
  785.         for word in words:
  786.             extend = '%s %s' % (result[-1], word)
  787.             if len(extend) > self.width:
  788.                 result.append(word)
  789.             else:
  790.                 result[-1] = extend
  791.         return result
  792.  
  793.     def __cmp__(self, other):
  794.         return cmp(self.current_policy, other)
  795.    
  796.  
  797. class OneEntryPolicy(BasePolicy):
  798.     answers = {'h': EXTRACT_HERE, 'i': EXTRACT_WRAP, 'r': EXTRACT_RENAME,
  799.                '': EXTRACT_WRAP}
  800.     choice_template = ["extract the %s _I_nside a new directory named %s",
  801.                        "extract the %s and _R_ename it %s",
  802.                        "extract the %s _H_ere"]
  803.     prompt = "What do you want to do?  (I/r/h) "
  804.  
  805.     def __init__(self, options):
  806.         BasePolicy.__init__(self, options)
  807.         if options.flat:
  808.             default = 'h'
  809.         elif options.one_entry_default is not None:
  810.             default = options.one_entry_default.lower()
  811.         else:
  812.             return
  813.         if 'here'.startswith(default):
  814.             self.permanent_policy = EXTRACT_HERE
  815.         elif 'rename'.startswith(default):
  816.             self.permanent_policy = EXTRACT_RENAME
  817.         elif 'inside'.startswith(default):
  818.             self.permanent_policy = EXTRACT_WRAP
  819.         elif default is not None:
  820.             raise ValueError("bad value %s for default policy" % (default,))
  821.  
  822.     def prep(self, archive_filename, extractor):
  823.         question = self.wrap(
  824.             "%s contains one %s but its name doesn't match.",
  825.             archive_filename, extractor.content_type)
  826.         question.append(" Expected: " + extractor.basename())
  827.         question.append("   Actual: " + extractor.content_name)
  828.         choice_vars = (extractor.content_type, extractor.basename())
  829.         self.choices = [text % choice_vars[:text.count('%s')]
  830.                         for text in self.choice_template]
  831.         self.current_policy = (self.permanent_policy or
  832.                                self.ask_question(question))
  833.  
  834.     def ok_for_match(self):
  835.         return self.current_policy in (EXTRACT_RENAME, EXTRACT_HERE)
  836.  
  837.  
  838. class RecursionPolicy(BasePolicy):
  839.     answers = {'o': RECURSE_ONCE, 'a': RECURSE_ALWAYS, 'n': RECURSE_NOT_NOW,
  840.                'v': RECURSE_NEVER, 'l': RECURSE_LIST, '': RECURSE_NOT_NOW}
  841.     choices = ["_A_lways extract included archives during this session",
  842.                "extract included archives this _O_nce",
  843.                "choose _N_ot to extract included archives this once",
  844.                "ne_V_er extract included archives during this session",
  845.                "_L_ist included archives"]
  846.     prompt = "What do you want to do?  (a/o/N/v/l) "
  847.  
  848.     def __init__(self, options):
  849.         BasePolicy.__init__(self, options)
  850.         if options.show_list:
  851.             self.permanent_policy = RECURSE_NEVER
  852.         elif options.recursive:
  853.             self.permanent_policy = RECURSE_ALWAYS
  854.  
  855.     def prep(self, current_filename, target, extractor):
  856.         archive_count = len(extractor.included_archives)
  857.         if ((self.permanent_policy is not None) or
  858.             ((archive_count * 10) <= extractor.file_count)):
  859.             self.current_policy = self.permanent_policy or RECURSE_NOT_NOW
  860.             return
  861.         question = self.wrap(
  862.             "%s contains %s other archive file(s), out of %s file(s) total.",
  863.             current_filename, archive_count, extractor.file_count)
  864.         if target == '.':
  865.             target = ''
  866.         included_root = extractor.included_root
  867.         if included_root == './':
  868.             included_root = ''
  869.         while True:
  870.             self.current_policy = self.ask_question(question)
  871.             if self.current_policy != RECURSE_LIST:
  872.                 break
  873.             print ("\n%s\n" %
  874.                    '\n'.join([os.path.join(target, included_root, filename)
  875.                               for filename in extractor.included_archives]))
  876.         if self.current_policy in (RECURSE_ALWAYS, RECURSE_NEVER):
  877.             self.permanent_policy = self.current_policy
  878.  
  879.     def ok_to_recurse(self):
  880.         return self.current_policy in (RECURSE_ALWAYS, RECURSE_ONCE)
  881.            
  882.  
  883. class ExtractorBuilder(object):
  884.     extractor_map = {'tar': {'extractors': (TarExtractor,),
  885.                              'mimetypes': ('x-tar',),
  886.                              'extensions': ('tar',),
  887.                              'magic': ('POSIX tar archive',)},
  888.                      'zip': {'extractors': (ZipExtractor, SevenExtractor),
  889.                              'mimetypes': ('zip',),
  890.                              'extensions': ('zip',),
  891.                              'magic': ('(Zip|ZIP self-extracting) archive',)},
  892.                      'lzh': {'extractors': (LZHExtractor,),
  893.                              'mimetypes': ('x-lzh', 'x-lzh-compressed'),
  894.                              'extensions': ('lzh', 'lha'),
  895.                              'magic': ('LHa [\d\.\?]+ archive',)},
  896.                      'rpm': {'extractors': (RPMExtractor,),
  897.                              'mimetypes': ('x-redhat-package-manager', 'x-rpm'),
  898.                              'extensions': ('rpm',),
  899.                              'magic': ('RPM',)},
  900.                      'deb': {'extractors': (DebExtractor,),
  901.                              'metadata': (DebMetadataExtractor,),
  902.                              'mimetypes': ('x-debian-package',),
  903.                              'extensions': ('deb',),
  904.                              'magic': ('Debian binary package',)},
  905.                      'cpio': {'extractors': (CpioExtractor,),
  906.                               'mimetypes': ('x-cpio',),
  907.                               'extensions': ('cpio',),
  908.                               'magic': ('cpio archive',)},
  909.                      'gem': {'extractors': (GemExtractor,),
  910.                              'metadata': (GemMetadataExtractor,),
  911.                              'mimetypes': ('x-ruby-gem',),
  912.                              'extensions': ('gem',)},
  913.                      '7z': {'extractors': (SevenExtractor,),
  914.                              'mimetypes': ('x-7z-compressed',),
  915.                              'extensions': ('7z',),
  916.                              'magic': ('7-zip archive',)},
  917.                      'cab': {'extractors': (CABExtractor,),
  918.                              'mimetypes': ('x-cab',),
  919.                              'extensions': ('cab',),
  920.                              'magic': ('Microsoft Cabinet Archive',)},
  921.                      'rar': {'extractors': (RarExtractor,),
  922.                              'mimetypes': ('rar',),
  923.                              'extensions': ('rar',),
  924.                              'magic': ('RAR archive',)},
  925.                      'shield': {'extractors': (ShieldExtractor,),
  926.                                 'mimetypes': ('x-cab',),
  927.                                 'extensions': ('cab', 'hdr'),
  928.                                 'magic': ('InstallShield CAB',)},
  929.                      'msi': {'extractors': (SevenExtractor,),
  930.                              'mimetypes': ('x-msi', 'x-ole-storage'),
  931.                              'extensions': ('msi',),
  932.                              'magic': ('Application: Windows Installer',)},
  933.                      'compress': {'extractors': (CompressionExtractor,)}
  934.                      }
  935.  
  936.     mimetype_map = {}
  937.     magic_mime_map = {}
  938.     extension_map = {}
  939.     for ext_name, ext_info in extractor_map.items():
  940.         for mimetype in ext_info.get('mimetypes', ()):
  941.             if '/' not in mimetype:
  942.                 mimetype = 'application/' + mimetype
  943.             mimetype_map[mimetype] = ext_name
  944.         for magic_re in ext_info.get('magic', ()):
  945.             magic_mime_map[re.compile(magic_re)] = ext_name
  946.         for extension in ext_info.get('extensions', ()):
  947.             extension_map.setdefault(extension, []).append((ext_name, None))
  948.  
  949.     for mapping in (('tar', 'bzip2', 'tar.bz2', 'tbz2', 'tb2', 'tbz'),
  950.                     ('tar', 'gzip', 'tar.gz', 'tgz'),
  951.                     ('tar', 'lzma', 'tar.lzma', 'tlz'),
  952.                     ('tar', 'xz', 'tar.xz'),
  953.                     ('tar', 'lz', 'tar.lz'),
  954.                     ('tar', 'compress', 'tar.Z', 'taz'),
  955.                     ('compress', 'gzip', 'Z', 'gz'),
  956.                     ('compress', 'bzip2', 'bz2'),
  957.                     ('compress', 'lzma', 'lzma'),
  958.                     ('compress', 'xz', 'xz')):
  959.         for extension in mapping[2:]:
  960.             extension_map.setdefault(extension, []).append(mapping[:2])
  961.  
  962.     magic_encoding_map = {}
  963.     for mapping in (('bzip2', 'bzip2 compressed'),
  964.                     ('gzip', 'gzip compressed'),
  965.                     ('lzma', 'LZMA compressed'),
  966.                     ('lzip', 'lzip compressed'),
  967.                     ('xz', 'xz compressed')):
  968.         for pattern in mapping[1:]:
  969.             magic_encoding_map[re.compile(pattern)] = mapping[0]
  970.  
  971.     def __init__(self, filename, options):
  972.         self.filename = filename
  973.         self.options = options
  974.  
  975.     def build_extractor(self, archive_type, encoding):
  976.         type_info = self.extractor_map[archive_type]
  977.         if self.options.metadata and type_info.has_key('metadata'):
  978.             extractors = type_info['metadata']
  979.         else:
  980.             extractors = type_info['extractors']
  981.         for extractor in extractors:
  982.             yield extractor(self.filename, encoding)
  983.  
  984.     def get_extractor(self):
  985.         tried_types = set()
  986.         # As smart as it is, the magic test can't go first, because at least
  987.         # on my system it just recognizes gem files as tar files.  I guess
  988.         # it's possible for the opposite problem to occur -- where the mimetype
  989.         # or extension suggests something less than ideal -- but it seems less
  990.         # likely so I'm sticking with this.
  991.         for func_name in ('mimetype', 'extension', 'magic'):
  992.             logger.debug("getting extractors by %s" % (func_name,))
  993.             extractor_types = \
  994.                             getattr(self, 'try_by_' + func_name)(self.filename)
  995.             logger.debug("done getting extractors")
  996.             for ext_args in extractor_types:
  997.                 if ext_args in tried_types:
  998.                     continue
  999.                 tried_types.add(ext_args)
  1000.                 logger.debug("trying %s extractor from %s" %
  1001.                              (ext_args, func_name))
  1002.                 for extractor in self.build_extractor(*ext_args):
  1003.                     yield extractor
  1004.  
  1005.     def try_by_mimetype(cls, filename):
  1006.         mimetype, encoding = mimetypes.guess_type(filename)
  1007.         try:
  1008.             return [(cls.mimetype_map[mimetype], encoding)]
  1009.         except KeyError:
  1010.             if encoding:
  1011.                 return [('compress', encoding)]
  1012.         return []
  1013.     try_by_mimetype = classmethod(try_by_mimetype)
  1014.  
  1015.     def magic_map_matches(cls, output, magic_map):
  1016.         return [result for regexp, result in magic_map.items()
  1017.                 if regexp.search(output)]
  1018.     magic_map_matches = classmethod(magic_map_matches)
  1019.        
  1020.     def try_by_magic(cls, filename):
  1021.         process = subprocess.Popen(['file', '-z', filename],
  1022.                                    stdout=subprocess.PIPE)
  1023.         status = process.wait()
  1024.         if status != 0:
  1025.             return []
  1026.         output = process.stdout.readline()
  1027.         process.stdout.close()
  1028.         if output.startswith('%s: ' % filename):
  1029.             output = output[len(filename) + 2:]
  1030.         mimes = cls.magic_map_matches(output, cls.magic_mime_map)
  1031.         encodings = cls.magic_map_matches(output, cls.magic_encoding_map)
  1032.         if mimes and not encodings:
  1033.             encodings = [None]
  1034.         elif encodings and not mimes:
  1035.             mimes = ['compress']
  1036.         return [(m, e) for m in mimes for e in encodings]
  1037.     try_by_magic = classmethod(try_by_magic)
  1038.  
  1039.     def try_by_extension(cls, filename):
  1040.         parts = filename.split('.')[-2:]
  1041.         results = []
  1042.         while parts:
  1043.             results.extend(cls.extension_map.get('.'.join(parts), []))
  1044.             del parts[0]
  1045.         return results
  1046.     try_by_extension = classmethod(try_by_extension)
  1047.  
  1048.  
  1049. class BaseAction(object):
  1050.     def __init__(self, options, filenames):
  1051.         self.options = options
  1052.         self.filenames = filenames
  1053.         self.target = None
  1054.         self.do_print = False
  1055.        
  1056.     def report(self, function, *args):
  1057.         try:
  1058.             error = function(*args)
  1059.         except EXTRACTION_ERRORS, exception:
  1060.             error = str(exception)
  1061.             logger.debug(''.join(traceback.format_exception(*sys.exc_info())))
  1062.         return error
  1063.  
  1064.     def show_filename(self, filename):
  1065.         if len(self.filenames) < 2:
  1066.             return
  1067.         elif self.do_print:
  1068.             print
  1069.         else:
  1070.             self.do_print = True
  1071.         print "%s:" % (filename,)
  1072.  
  1073.  
  1074. class ExtractionAction(BaseAction):
  1075.     handlers = [FlatHandler, OverwriteHandler, MatchHandler, EmptyHandler,
  1076.                 BombHandler]
  1077.  
  1078.     def get_handler(self, extractor):
  1079.         if extractor.content_type in ONE_ENTRY_UNKNOWN:
  1080.             self.options.one_entry_policy.prep(self.current_filename,
  1081.                                                extractor)
  1082.         for handler in self.handlers:
  1083.             if handler.can_handle(extractor.content_type, self.options):
  1084.                 logger.debug("using %s handler" % (handler.__name__,))
  1085.                 self.current_handler = handler(extractor, self.options)
  1086.                 break
  1087.  
  1088.     def show_extraction(self, extractor):
  1089.         if self.options.log_level > logging.INFO:
  1090.             return
  1091.         self.show_filename(self.current_filename)
  1092.         if extractor.contents is None:
  1093.             print self.current_handler.target
  1094.             return
  1095.         def reverser(x, y):
  1096.             return cmp(y, x)
  1097.         if self.current_handler.target == '.':
  1098.             filenames = extractor.contents
  1099.             filenames.sort(reverser)
  1100.         else:
  1101.             filenames = [self.current_handler.target]
  1102.         pathjoin = os.path.join
  1103.         isdir = os.path.isdir
  1104.         while filenames:
  1105.             filename = filenames.pop()
  1106.             if isdir(filename):
  1107.                 print "%s/" % (filename,)
  1108.                 new_filenames = os.listdir(filename)
  1109.                 new_filenames.sort(reverser)
  1110.                 filenames.extend([pathjoin(filename, new_filename)
  1111.                                   for new_filename in new_filenames])
  1112.             else:
  1113.                 print filename
  1114.  
  1115.     def run(self, filename, extractor):
  1116.         self.current_filename = filename
  1117.         error = (self.report(extractor.extract) or
  1118.                  self.report(self.get_handler, extractor) or
  1119.                  self.report(self.current_handler.handle) or
  1120.                  self.report(self.show_extraction, extractor))
  1121.         if not error:
  1122.             self.target = self.current_handler.target
  1123.         return error
  1124.  
  1125.  
  1126. class ListAction(BaseAction):
  1127.     def list_filenames(self, extractor, filename):
  1128.         # We get a line first to make sure there's not going to be some
  1129.         # basic error before we show what filename we're listing.
  1130.         filename_lister = extractor.get_filenames()
  1131.         try:
  1132.             first_line = filename_lister.next()
  1133.         except StopIteration:
  1134.             self.show_filename(filename)
  1135.         else:
  1136.             self.did_list = True
  1137.             self.show_filename(filename)
  1138.             print first_line
  1139.         for line in filename_lister:
  1140.             print line
  1141.            
  1142.     def run(self, filename, extractor):
  1143.         self.did_list = False
  1144.         error = self.report(self.list_filenames, extractor, filename)
  1145.         if error and self.did_list:
  1146.             logger.error("lister failed: ignore above listing for %s" %
  1147.                          (filename,))
  1148.         return error
  1149.  
  1150.  
  1151. class ExtractorApplication(object):
  1152.     def __init__(self, arguments):
  1153.         for signal_num in (signal.SIGINT, signal.SIGTERM):
  1154.             signal.signal(signal_num, self.abort)
  1155.         signal.signal(signal.SIGPIPE, signal.SIG_DFL)
  1156.         self.parse_options(arguments)
  1157.         self.setup_logger()
  1158.         self.successes = []
  1159.         self.failures = []
  1160.  
  1161.     def clean_destination(self, dest_name):
  1162.         try:
  1163.             os.unlink(dest_name)
  1164.         except OSError, error:
  1165.             if error.errno == errno.EISDIR:
  1166.                 shutil.rmtree(dest_name, ignore_errors=True)
  1167.  
  1168.     def abort(self, signal_num, frame):
  1169.         signal.signal(signal_num, signal.SIG_IGN)
  1170.         print
  1171.         logger.debug("traceback:\n" +
  1172.                      ''.join(traceback.format_stack(frame)).rstrip())
  1173.         logger.debug("got signal %s" % (signal_num,))
  1174.         try:
  1175.             basename = self.current_extractor.target
  1176.         except AttributeError:
  1177.             basename = None
  1178.         if basename is not None:
  1179.             logger.debug("cleaning up %s" % (basename,))
  1180.             clean_targets = set([os.path.realpath('.')])
  1181.             if hasattr(self, 'current_directory'):
  1182.                 clean_targets.add(os.path.realpath(self.current_directory))
  1183.             for directory in clean_targets:
  1184.                 self.clean_destination(os.path.join(directory, basename))
  1185.         sys.exit(1)
  1186.  
  1187.     def parse_options(self, arguments):
  1188.         parser = optparse.OptionParser(
  1189.             usage="%prog [options] archive [archive2 ...]",
  1190.             description="Intelligent archive extractor",
  1191.             version=VERSION_BANNER
  1192.             )
  1193.         parser.add_option('-l', '-t', '--list', '--table', dest='show_list',
  1194.                           action='store_true', default=False,
  1195.                           help="list contents of archives on standard output")
  1196.         parser.add_option('-m', '--metadata', dest='metadata',
  1197.                           action='store_true', default=False,
  1198.                           help="extract metadata from a .deb/.gem")
  1199.         parser.add_option('-r', '--recursive', dest='recursive',
  1200.                           action='store_true', default=False,
  1201.                           help="extract archives contained in the ones listed")
  1202.         parser.add_option('--one', '--one-entry', dest='one_entry_default',
  1203.                           default=None,
  1204.                           help=("specify extraction policy for one-entry " +
  1205.                                 "archives: inside/rename/here"))
  1206.         parser.add_option('-n', '--noninteractive', dest='batch',
  1207.                           action='store_true', default=False,
  1208.                           help="don't ask how to handle special cases")
  1209.         parser.add_option('-o', '--overwrite', dest='overwrite',
  1210.                           action='store_true', default=False,
  1211.                           help="overwrite any existing target output")
  1212.         parser.add_option('-f', '--flat', '--no-directory', dest='flat',
  1213.                           action='store_true', default=False,
  1214.                           help="extract everything to the current directory")
  1215.         parser.add_option('-v', '--verbose', dest='verbose',
  1216.                           action='count', default=0,
  1217.                           help="be verbose/print debugging information")
  1218.         parser.add_option('-q', '--quiet', dest='quiet',
  1219.                           action='count', default=3,
  1220.                           help="suppress warning/error messages")
  1221.         self.options, filenames = parser.parse_args(arguments)
  1222.         if not filenames:
  1223.             parser.error("you did not list any archives")
  1224.         # This makes WARNING is the default.
  1225.         self.options.log_level = (10 * (self.options.quiet -
  1226.                                         self.options.verbose))
  1227.         try:
  1228.             self.options.one_entry_policy = OneEntryPolicy(self.options)
  1229.         except ValueError:
  1230.             parser.error("invalid value for --one-entry option")
  1231.         self.options.recursion_policy = RecursionPolicy(self.options)
  1232.         self.archives = {os.path.realpath(os.curdir): filenames}
  1233.  
  1234.     def setup_logger(self):
  1235.         logging.getLogger().setLevel(self.options.log_level)
  1236.         handler = logging.StreamHandler()
  1237.         handler.setLevel(self.options.log_level)
  1238.         formatter = logging.Formatter("dtrx: %(levelname)s: %(message)s")
  1239.         handler.setFormatter(formatter)
  1240.         logger.addHandler(handler)
  1241.         logger.debug("logger is set up")
  1242.  
  1243.     def recurse(self, filename, extractor, action):
  1244.         self.options.recursion_policy.prep(filename, action.target, extractor)
  1245.         if self.options.recursion_policy.ok_to_recurse():
  1246.             for filename in extractor.included_archives:
  1247.                 logger.debug("recursing with %s archive" %
  1248.                              (extractor.content_type,))
  1249.                 tail_path, basename = os.path.split(filename)
  1250.                 path_args = [self.current_directory, extractor.included_root,
  1251.                              tail_path]
  1252.                 logger.debug("included root: %s" % (extractor.included_root,))
  1253.                 logger.debug("tail path: %s" % (tail_path,))
  1254.                 if os.path.isdir(action.target):
  1255.                     logger.debug("action target: %s" % (action.target,))
  1256.                     path_args.insert(1, action.target)
  1257.                 directory = os.path.join(*path_args)
  1258.                 self.archives.setdefault(directory, []).append(basename)
  1259.  
  1260.     def check_file(self, filename):
  1261.         try:
  1262.             result = os.stat(filename)
  1263.         except OSError, error:
  1264.             return error.strerror
  1265.         if stat.S_ISDIR(result.st_mode):
  1266.             return "cannot work with a directory"
  1267.  
  1268.     def show_stderr(self, logger_func, stderr):
  1269.         if stderr:
  1270.             logger_func("Error output from this process:\n" +
  1271.                         stderr.rstrip('\n'))
  1272.  
  1273.     def try_extractors(self, filename, builder):
  1274.         errors = []
  1275.         for extractor in builder:
  1276.             self.current_extractor = extractor  # For the abort() method.
  1277.             error = self.action.run(filename, extractor)
  1278.             if error:
  1279.                 errors.append((extractor.file_type, extractor.encoding, error,
  1280.                                extractor.get_stderr()))
  1281.                 if extractor.target is not None:
  1282.                     self.clean_destination(extractor.target)
  1283.             else:
  1284.                 self.show_stderr(logger.warn, extractor.get_stderr())
  1285.                 self.recurse(filename, extractor, self.action)
  1286.                 return
  1287.         logger.error("could not handle %s" % (filename,))
  1288.         if not errors:
  1289.             logger.error("not a known archive type")
  1290.             return True
  1291.         for file_type, encoding, error, stderr in errors:
  1292.             message = ["treating as", file_type, "failed:", error]
  1293.             if encoding:
  1294.                 message.insert(1, "%s-encoded" % (encoding,))
  1295.             logger.error(' '.join(message))
  1296.             self.show_stderr(logger.error, stderr)
  1297.         return True
  1298.        
  1299.     def download(self, filename):
  1300.         url = filename.lower()
  1301.         for protocol in 'http', 'https', 'ftp':
  1302.             if url.startswith(protocol + '://'):
  1303.                 break
  1304.         else:
  1305.             return filename, None
  1306.         # FIXME: This can fail if there's already a file in the directory
  1307.         # that matches the basename of the URL.
  1308.         status = subprocess.call(['wget', '-c', filename],
  1309.                                  stdin=subprocess.PIPE)
  1310.         if status != 0:
  1311.             return None, "wget returned status code %s" % (status,)
  1312.         return os.path.basename(urlparse.urlparse(filename)[2]), None
  1313.  
  1314.     def run(self):
  1315.         if self.options.show_list:
  1316.             action = ListAction
  1317.         else:
  1318.             action = ExtractionAction
  1319.         self.action = action(self.options, self.archives.values()[0])
  1320.         while self.archives:
  1321.             self.current_directory, self.filenames = self.archives.popitem()
  1322.             os.chdir(self.current_directory)
  1323.             for filename in self.filenames:
  1324.                 filename, error = self.download(filename)
  1325.                 if not error:
  1326.                     builder = ExtractorBuilder(filename, self.options)
  1327.                     error = (self.check_file(filename) or
  1328.                              self.try_extractors(filename,
  1329.                                                  builder.get_extractor()))
  1330.                 if error:
  1331.                     if error != True:
  1332.                         logger.error("%s: %s" % (filename, error))
  1333.                     self.failures.append(filename)
  1334.                 else:
  1335.                     self.successes.append(filename)
  1336.             self.options.one_entry_policy.permanent_policy = EXTRACT_WRAP
  1337.         if self.failures:
  1338.             return 1
  1339.         return 0
  1340.  
  1341.  
  1342. if __name__ == '__main__':
  1343.     app = ExtractorApplication(sys.argv[1:])
  1344.     sys.exit(app.run())
  1345.  
Add Comment
Please, Sign In to add comment