Advertisement
p-kl

[Powershell] Tesseract auto-convert from folder

May 6th, 2020
1,258
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # TODO:
  2. # - parse .pdf
  3. # - convert to .pdf even if OCRing not working
  4.  
  5. $scan_dir      = 'C:\Users\LENOVO-PAWEL\Desktop\SKAN\'
  6. $tesseract_exe = "C:\Program Files\Tesseract-OCR\tesseract.exe"
  7. $source_dir    = "$scan_dir\source"
  8. $converted_dir = "$scan_dir\converted"
  9.  
  10. if ( Test-Path "$source_dir" -PathType Container )
  11. { echo "Directory for source file exist" }
  12. else
  13. {
  14.     mkdir -p "$source_dir"
  15.     echo "Directory for source files created"
  16. }
  17. if ( Test-Path "$converted_dir" -PathType Container )
  18. { echo "Directory for converted files exist" }
  19. else
  20. {
  21.     mkdir -p "$converted_dir"
  22.     echo "Directory for converted files created"
  23. }
  24.  
  25. function monitor {
  26.     C:\drukarki\inotifywait.exe -m --format '%w\%f' -e modify "$scan_dir"
  27. }
  28. function convert {
  29.     process {
  30.         $source_file = $_
  31.         $ext = [System.IO.Path]::GetExtension("$_")
  32.         if ($ext -match "[.]tif$") {
  33.             $output_file = $source_file
  34.             $output_file = $output_file -replace "[.]tif$", ""
  35.             echo "File to be OCRed = '$source_file' [ extension '$ext' ]"      
  36.             &$tesseract_exe "$source_file" "$output_file" pdf
  37.             if ($LASTEXITCODE -eq 0){
  38.                 echo "Convertion is succesful '$output_file.pdf'"
  39.                 mv "$output_file" "$source_dir"
  40.                 mv "$output_file.pdf" "$converted_dir"
  41.             }
  42.             else
  43.             {
  44.                 echo "Conversion failed! '$source_file'"
  45.             }
  46.         }
  47.         else
  48.         {
  49.             echo "File is not in TIFF format! $_"
  50.         }
  51.        
  52.     }
  53. }
  54. monitor | convert
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement