metalx1000

Walmart Receipt to PDF with ORC

Apr 10th, 2023
717
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 1.50 KB | None | 0 0
  1. #!/bin/bash
  2. ######################################################################
  3. #Copyright (C) 2023  Kris Occhipinti
  4. #https://filmsbykris.com
  5.  
  6. #This program is free software: you can redistribute it and/or modify
  7. #it under the terms of the GNU General Public License as published by
  8. #the Free Software Foundation version 3 of the License.
  9.  
  10. #This program is distributed in the hope that it will be useful,
  11. #but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13. #GNU General Public License for more details.
  14.  
  15. #You should have received a copy of the GNU General Public License
  16. #along with this program.  If not, see <http://www.gnu.org/licenses/>.
  17. ######################################################################
  18.  
  19. # downloads and converts Walmart Reciepts to pdf with ORC
  20. # example url 'https://receipts-query.edge.walmart.com/er/930d5bb3-3784-58bb-06b0-e712ba6eadc6'
  21. [[ $1 ]] ||{
  22.   echo -e "URL Needed\nExample: $0 'https://receipts-query.edge.walmart.com/er/930d5bb3-3784-58bb-06b0-e712ba6eadc6'"
  23.   exit 1
  24. }
  25.  
  26. url="$*"
  27. [[ "$url" == *"https://receipts-query.edge.walmart.com"* ]] ||{
  28.   echo "Invalid URL"
  29.   exit 1
  30. }
  31.  
  32. id="$(date +%s)_$RANDOM"
  33. wget "$url" -qO- |tr " " "\n"|grep "src="|cut -d\" -f2|cut -d\, -f2|base64 -d > $id.png
  34. convert "$id.png" "$id.pdf"
  35. rm "$id.png"
  36.  
  37.  
  38. pdf2ps "$id.pdf"
  39. ps2pdf "$id.ps" || exit 1
  40. rm "$id.ps"
  41. ocrmypdf "$id.pdf" "$id.pdf"
  42.  
  43. #pdf2txt "$1" |sed '/^$/d'
  44. pdftotext -layout "$id.pdf" -
  45.  
Add Comment
Please, Sign In to add comment