Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!perl -w
- use utf8;
- use Encode qw/decode from_to decode_utf8/;
- use File::Stat qw/stat/;
- use File::Find;
- use File::Basename;
- use File::Copy qw/cp mv/;
- use Time::localtime;
- use Data::Dumper qw/Dumper/;
- %months = (
- 'Jan' => 'janeiro', 'Feb' => 'fevereiro', 'Mar' => 'marco',
- 'Apr' => 'abril', 'May' => 'maio', 'Jun' => 'junho',
- 'Jul' => 'julho', 'Aug' => 'agosto', 'Sep' => 'setembro',
- 'Oct' => 'outubro', 'Nov' => 'novembro', 'Dec' => 'dezembro',
- );
- find({
- wanted => \&organizeFiles,
- }, @ARGV);
- ##
- ##
- ## Sanitize filenames, fx enconding and removing spaces.
- sub sanitizeFilename {
- return if -d;
- fileparse_set_fstype('MSWin32');
- my $filename = basename($_);
- my $sanitizedFilename = $filename;
- $sanitizedFilename =~ s{[\_\-\+]}{ }ig; # creating spaces
- $sanitizedFilename =~ s{\s+?}{_}ig; # creating spaces
- $sanitizedFilename =~ s{([0-9])([a-z])}{$1 $2}ig; # spacing numbers and words
- $sanitizedFilename =~ s{\d{8}}{}ig; # spacing numbers and words
- $sanitizedFilename =~ s{^\s*|\s*$}{}ig;
- $sanitizedFilename =~ s{[^a-z0-9\s\.]+?}{}ig;
- mv $filename, $sanitizedFilename;
- print 'Sanitization complete: ', $sanitizedFilename, "\n" if !$!;
- print 'Error: ', $filename,' -> ', $sanitizedFilename,' ', $!, "\n" if $!;
- }
- ##
- ##
- ## Copy files to its respective folder with year and month
- sub organizeFiles {
- return if -d;
- fileparse_set_fstype('MSWin32');
- # print 'Processing file: ', $_, "\n";
- my $filename = basename($_);
- my $filedir = $File::Find::dir;
- my $fullfilename = $File::Find::name;
- my $basePathToFiles = '/var/www/html/oi/oi_repositorio_ofertas/organized';
- my $creationDate = ctime( stat($filename)->mtime );
- my ($month, $year) = ($1, $2) if $creationDate =~ m/^\w{3,4}\s(\w{3,4}).+(\w{4})$/i;
- my $pathToCopy = "$basePathToFiles/$year/$months{$month}";
- mkdir $pathToCopy if !-d $pathToCopy;
- cp $filename, $pathToCopy if -d $pathToCopy;
- print 'Copied to', $pathToCopy, "\n" if !$!;
- print 'Error: ', $! if $!;
- }
- ##
- ##
- ## Convert all files to PDF
- sub proccess_file {
- return if -d;
- return if !m/\.docx?$/;
- my $filename = $_;
- my $filedir = $File::Find::dir;
- my $fullfilename = $File::Find::name;
- $fullfilename =~ s{(\s)}{\\$1}g;
- $filedir =~ s{(\s)}{\\$1}g;
- $filename =~ s{(\s)}{\\$1}g;
- print 'Starting conversion of: ', $filename, "\n";
- system("libreoffice --invisible --convert-to pdf $filename");
- print 'Converted!', "\n";
- }
- __DATA__
- Steps:
- [X] cast lockfiles into the void
- [X] Sanitize files, remove "_" and decode broken enconding filenames
- [X] Transform all files to PDF
- [X] Separate files by Year and Month
- [X]Parse file meta data
- [?] Organize files by Offers
- Open file and search for known names
- Save files inside the year and month
- -----------------------------------------------
- -----------------------------------------------
- unzip all:
- find . -name *.zip -exec unzip "{}" \;
- remove all zips:
- find . -name *.zip -exec rm -f "{}" \;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement