Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- __author__ = 'Andrii Shelestov <streamx3@gmail.com>'
- # http://pastebin.com/FBP6zSKC
- import sys
- import os
- import shutil
- import ntpath
- from hashlib import md5
- #############
- # Functions #
- ################################################################################
- def error_and_leave(error):
- print(error)
- sys.exit(1)
- def assert_dir_exists(dir):
- if not os.path.exists(dir):
- error_and_leave('Dir "' + dir + '" does not exitst!')
- def get_filepaths(directory, ext=None):
- file_paths = []
- for root, directories, files in os.walk(directory):
- for filename in files:
- filepath = os.path.join(root, filename)
- if ext is None:
- file_paths.append(filepath)
- else:
- if filepath.endswith(ext):
- file_paths.append(filepath)
- file_paths.sort(reverse=True)
- return file_paths # Self-explanatory.
- def md5sum(filename):
- # Calculates MD5 hash of file content just like program md5sum
- return md5(open(filename, 'rb').read()).hexdigest()
- def file_list2dict(filelist):
- retval = {}
- for filename in filelist:
- retval[md5sum(filename)] = filename
- return retval
- def filter_and_copy_unique_files(argv):
- if len(argv) not in (3, 4):
- error_and_leave('usage: ' + sys.argv[0] + ' srcDir dstDir [extension]')
- src_dir = argv[1]
- dst_dir = argv[2]
- assert_dir_exists(src_dir)
- assert_dir_exists(dst_dir)
- if len(argv) is 4:
- file_list = get_filepaths(src_dir, argv[3])
- else:
- file_list = get_filepaths(src_dir)
- print('Files:\t' + str(len(file_list)))
- file_dict = file_list2dict(file_list)
- print('Unique:\t' + str(len(file_dict.items())))
- print('')
- for key, filename in file_dict.items():
- dst_file = os.path.join(dst_dir, ntpath.basename(filename))
- if os.path.exists(dst_file):
- if md5sum(dst_file) == md5sum(filename):
- print('Skipping ' + key + ' "' + filename + '"')
- continue
- else:
- print('Replacing ' + key + ' "' + filename + '"')
- else:
- print('Copying ' + key + ' "' + filename + '"')
- shutil.copy(filename, os.path.join(dst_dir, ntpath.basename(filename)))
- ########
- # Main #
- ################################################################################
- filter_and_copy_unique_files(sys.argv)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement