Advertisement
kwasinski

FileCircus.pl

Mar 22nd, 2018
289
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Perl 2.97 KB | None | 0 0
  1. #!perl -w
  2.  
  3. use utf8;
  4. use Encode qw/decode from_to decode_utf8/;
  5. use File::Stat qw/stat/;
  6. use File::Find;
  7. use File::Basename;
  8. use File::Copy qw/cp mv/;
  9. use Time::localtime;
  10.  
  11. use Data::Dumper qw/Dumper/;
  12.  
  13.  
  14. %months = (
  15.     'Jan' => 'janeiro', 'Feb' => 'fevereiro', 'Mar' => 'marco',
  16.     'Apr' => 'abril', 'May' => 'maio', 'Jun' => 'junho',
  17.     'Jul' => 'julho', 'Aug' => 'agosto', 'Sep' => 'setembro',
  18.     'Oct' => 'outubro', 'Nov' => 'novembro', 'Dec' => 'dezembro',
  19. );
  20.  
  21. find({
  22.     wanted => \&organizeFiles,
  23. }, @ARGV);
  24.  
  25.  
  26. ##
  27. ##
  28. ## Sanitize filenames, fx enconding and removing spaces.
  29. sub sanitizeFilename {
  30.     return  if -d;
  31.     fileparse_set_fstype('MSWin32');
  32.  
  33.     my $filename = basename($_);
  34.     my $sanitizedFilename = $filename;
  35.  
  36.     $sanitizedFilename =~ s{[\_\-\+]}{ }ig; # creating spaces
  37.     $sanitizedFilename =~ s{\s+?}{_}ig; # creating spaces
  38.     $sanitizedFilename =~ s{([0-9])([a-z])}{$1 $2}ig; # spacing numbers and words
  39.     $sanitizedFilename =~ s{\d{8}}{}ig; # spacing numbers and words
  40.     $sanitizedFilename =~ s{^\s*|\s*$}{}ig;
  41.     $sanitizedFilename =~ s{[^a-z0-9\s\.]+?}{}ig;
  42.  
  43.     mv $filename, $sanitizedFilename;
  44.  
  45.     print 'Sanitization complete: ', $sanitizedFilename, "\n"  if !$!;
  46.     print 'Error: ', $filename,' -> ', $sanitizedFilename,' ', $!, "\n"  if $!;
  47. }
  48.  
  49. ##
  50. ##
  51. ## Copy files to its respective folder with year and month
  52. sub organizeFiles {
  53.     return  if -d;
  54.  
  55.     fileparse_set_fstype('MSWin32');
  56.     # print 'Processing file: ', $_, "\n";
  57.  
  58.     my $filename  = basename($_);
  59.     my $filedir = $File::Find::dir;
  60.     my $fullfilename = $File::Find::name;
  61.     my $basePathToFiles = '/var/www/html/oi/oi_repositorio_ofertas/organized';
  62.     my $creationDate = ctime( stat($filename)->mtime );
  63.  
  64.     my ($month, $year) = ($1, $2)  if $creationDate =~ m/^\w{3,4}\s(\w{3,4}).+(\w{4})$/i;
  65.     my $pathToCopy = "$basePathToFiles/$year/$months{$month}";
  66.  
  67.     mkdir $pathToCopy  if !-d $pathToCopy;
  68.  
  69.     cp $filename, $pathToCopy  if -d $pathToCopy;
  70.  
  71.     print 'Copied to', $pathToCopy, "\n"  if !$!;
  72.     print 'Error: ', $!  if $!;
  73. }
  74.  
  75. ##
  76. ##
  77. ## Convert all files to PDF
  78. sub proccess_file {
  79.     return  if -d;
  80.     return  if !m/\.docx?$/;
  81.  
  82.     my $filename  = $_;
  83.     my $filedir = $File::Find::dir;
  84.     my $fullfilename = $File::Find::name;
  85.  
  86.     $fullfilename =~ s{(\s)}{\\$1}g;
  87.     $filedir =~ s{(\s)}{\\$1}g;
  88.     $filename =~ s{(\s)}{\\$1}g;
  89.  
  90.     print 'Starting conversion of: ', $filename, "\n";
  91.     system("libreoffice --invisible --convert-to pdf $filename");
  92.     print 'Converted!', "\n";
  93. }
  94.  
  95.  
  96. __DATA__
  97.  
  98.  
  99. Steps:
  100.  
  101. [X] cast lockfiles into the void
  102. [X] Sanitize files, remove "_" and decode broken enconding filenames
  103. [X] Transform all files to PDF
  104. [X] Separate files by Year and Month
  105.     [X]Parse file meta data
  106. [?] Organize files by Offers
  107.     Open file and search for known names
  108.     Save files inside the year and month
  109.  
  110. -----------------------------------------------
  111. -----------------------------------------------
  112. unzip all:
  113.     find . -name *.zip  -exec unzip "{}" \;
  114.  
  115. remove all zips:
  116.     find . -name *.zip  -exec rm -f "{}" \;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement