Advertisement
Nattack

Duplicate file finder

May 20th, 2012
270
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 1.70 KB | None | 0 0
  1. #!/usr/bin/perl
  2.  
  3. # Duplicate file finder,
  4. # finds duplicate files based on SHA-1 hash.
  5. #
  6. # deletes all found duplicates.
  7.  
  8. # requires Digest::SHA1 to be installed from CPAN.
  9.  
  10.         use 5.010;
  11.         use Digest::SHA1;
  12.  
  13.         my $subbit; if ($ARGV[0] eq "R") { $subbit = 1; } else { $subbit = 0; }
  14.         my $sha1 = Digest::SHA1->new;
  15.         my @flatSHA1;       # Array that holds all found
  16.         my $hash;
  17.    
  18.     # start by getting all files, then convert to hash
  19.         foreach my $filename ( &getFiles( $ENV{'PWD'}, $subbit ) )
  20.         {
  21.        
  22.     # Get SHA-1 hash.
  23.             open (FILE, "<", $filename);
  24.             $sha1->addfile(FILE);
  25.  
  26.             $hash = $sha1->hexdigest;
  27.  
  28.             $sha1->reset;
  29.             close FILE;
  30.            
  31.     #test for duplicates, if none found, add to SHA-1 list, else delete the file.
  32.             unless (@flatSHA1 ~~ $hash)
  33.             {
  34.                 push (@flatSHA1, $hash);
  35.             } else {
  36.                 unlink $filename;
  37.             }
  38.                    
  39.         }
  40.    
  41. # returns an array of files.
  42. # if element 1 is 1, recurse directories.
  43.     sub getFiles
  44.     {
  45.         my $directory = $_[0];
  46.         my @files;
  47.         my @filelist;
  48.         my @subdirs;
  49.  
  50.     # remove trailing slash if present
  51.         $directory =~ s/\/$//;
  52.  
  53.     # iterate through directory
  54.         chdir $directory;
  55.         @filelist = glob "*";
  56.        
  57.         foreach my $filename (@filelist)
  58.         {          
  59.     # if you are to recurse through subdirectories, recurse this subroutine to get all the files.
  60.             if ( $_[1] == 1 )
  61.             {
  62.                 if ( -d $filename )
  63.                 {  
  64.                     push ( @subdirs, $filename );
  65.                 }
  66.             }
  67.             push (@files, $directory . "/" .$filename) unless (-d $directory . "/" . $filename);
  68.         }
  69.        
  70.     # if subdirectories were found, find files in them as well.
  71.         foreach my $dirname (@subdirs)
  72.         {
  73.             push( @files, &getFiles($directory . "/" . $dirname, 1) );
  74.         }
  75.        
  76.         return @files;
  77.     }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement