Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # TODO:
- # - parse .pdf
- # - convert to .pdf even if OCRing not working
- $scan_dir = 'C:\Users\LENOVO-PAWEL\Desktop\SKAN\'
- $tesseract_exe = "C:\Program Files\Tesseract-OCR\tesseract.exe"
- $source_dir = "$scan_dir\source"
- $converted_dir = "$scan_dir\converted"
- if ( Test-Path "$source_dir" -PathType Container )
- { echo "Directory for source file exist" }
- else
- {
- mkdir -p "$source_dir"
- echo "Directory for source files created"
- }
- if ( Test-Path "$converted_dir" -PathType Container )
- { echo "Directory for converted files exist" }
- else
- {
- mkdir -p "$converted_dir"
- echo "Directory for converted files created"
- }
- function monitor {
- C:\drukarki\inotifywait.exe -m --format '%w\%f' -e modify "$scan_dir"
- }
- function convert {
- process {
- $source_file = $_
- $ext = [System.IO.Path]::GetExtension("$_")
- if ($ext -match "[.]tif$") {
- $output_file = $source_file
- $output_file = $output_file -replace "[.]tif$", ""
- echo "File to be OCRed = '$source_file' [ extension '$ext' ]"
- &$tesseract_exe "$source_file" "$output_file" pdf
- if ($LASTEXITCODE -eq 0){
- echo "Convertion is succesful '$output_file.pdf'"
- mv "$output_file" "$source_dir"
- mv "$output_file.pdf" "$converted_dir"
- }
- else
- {
- echo "Conversion failed! '$source_file'"
- }
- }
- else
- {
- echo "File is not in TIFF format! $_"
- }
- }
- }
- monitor | convert
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement