Advertisement
markruff

analyse fdupes

Sep 6th, 2016
206
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.09 KB | None | 0 0
  1. #!/bin/python3
  2. #
  3. # analyse the output of fdupes
  4. # 1. run fdupes -rS * > fdupesoutput
  5. # 2. run this script for a summary
  6. #
  7. # The above fdupes command give output such as:
  8. # 5060855 bytes each:
  9. # ./Photos/Photos 2011/IMG_1740.JPG
  10. # ./Photos/Uploaded Photos/IMG_1740.JPG
  11. #
  12. # This python script counts the number of duplicate files and
  13. # the total amount of space wasted by the duplicate
  14. #
  15.  
  16. INPUT="fdupesoutput"
  17. byte_count=0
  18. duplicate_count=0
  19. current_file_size=0
  20. header = True # true if we are reading the header, false if counting files
  21.  
  22. with open(INPUT) as f:
  23.   for line in f:
  24.     if line.strip() == '':
  25.       header = True
  26.       continue
  27.     if header == True:
  28.       current_file_size = int(line[:line.find(" ")])
  29.       header = False
  30.       # we don't want to count the "original", so pre-emptively remove
  31.       # one file's worth of size and quantity from the tally
  32.       byte_count -= current_file_size
  33.       duplicate_count -= 1
  34.     else:
  35.       byte_count += current_file_size
  36.       duplicate_count += 1
  37.  
  38. print("Count duplicate files: ", duplicate_count)
  39. print("Wasted space (MB):     ", byte_count / 1000000)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement