Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/python3
- #
- # analyse the output of fdupes
- # 1. run fdupes -rS * > fdupesoutput
- # 2. run this script for a summary
- #
- # The above fdupes command give output such as:
- # 5060855 bytes each:
- # ./Photos/Photos 2011/IMG_1740.JPG
- # ./Photos/Uploaded Photos/IMG_1740.JPG
- #
- # This python script counts the number of duplicate files and
- # the total amount of space wasted by the duplicate
- #
- INPUT="fdupesoutput"
- byte_count=0
- duplicate_count=0
- current_file_size=0
- header = True # true if we are reading the header, false if counting files
- with open(INPUT) as f:
- for line in f:
- if line.strip() == '':
- header = True
- continue
- if header == True:
- current_file_size = int(line[:line.find(" ")])
- header = False
- # we don't want to count the "original", so pre-emptively remove
- # one file's worth of size and quantity from the tally
- byte_count -= current_file_size
- duplicate_count -= 1
- else:
- byte_count += current_file_size
- duplicate_count += 1
- print("Count duplicate files: ", duplicate_count)
- print("Wasted space (MB): ", byte_count / 1000000)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement