Advertisement
slik1977

Text_recognition 1.0

Feb 11th, 2022
232
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.79 KB | None | 0 0
  1. import pytesseract
  2. import cv2
  3.  
  4. pytesseract.pytesseract.tesseract_cmd = r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
  5.  
  6. import numpy as np
  7.  
  8. img = cv2.imread('test.png')
  9. img = cv2.resize(img, None, fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC)
  10. # Convert to gray
  11. img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  12.  
  13. # Apply dilation and erosion to remove some noise
  14. kernel = np.ones((1, 1), np.uint8)
  15. img = cv2.dilate(img, kernel, iterations=1)
  16. img = cv2.erode(img, kernel, iterations=1)
  17.  
  18. # Apply blur to smooth out the edges
  19. #img = cv2.GaussianBlur(img, (5, 5), 0)
  20. #img = cv2.bilateralFilter(img,9,75,75)
  21. img = cv2.medianBlur(img, 3)
  22.  
  23. # Apply threshold to get image with only b&w (binarization)
  24. img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
  25. cv2.threshold(img,127,255,cv2.THRESH_BINARY)
  26. cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
  27.  
  28. # selection and output of the found words on the picture
  29. custom_config = r'-c tessedit_char_whitelist=!$&()-./0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ\\_abcdefghijklmnopqrstuvwxyz --oem 3 --psm 12 '
  30.  
  31. text = pytesseract.image_to_string(img, config=custom_config)
  32. words = text.split()
  33.  
  34. with open('WordsFromWindowsScreen.txt', 'w') as f:
  35.     for word in words:
  36.         f.write(word + " ")
  37. data = pytesseract.image_to_data(img, config=custom_config)
  38. for i, item in enumerate(data.splitlines()):
  39.     if i == 0:
  40.         continue
  41.     item = item.split()
  42.     try:
  43.         x, y, w, h = int(item[6]), int(item[7]), int(item[8]), int(item[9])
  44.         cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 1)
  45.         cv2.putText(img, item[11], (x, y), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 0), 1)
  46.     except IndexError:
  47.         continue
  48. # output of the result
  49. cv2.imshow("Output", img)
  50. cv2.waitKey(0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement